Skip to content

docs: Improve API doc groups #1309

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 18, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
docs: update API doc groups
Closes: #1293
  • Loading branch information
vdusek committed Jul 16, 2025
commit fcfcf627a28145702efc145b105092e16e412d5b
2 changes: 1 addition & 1 deletion src/crawlee/_autoscaling/autoscaled_pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def __init__(self) -> None:
self.result: asyncio.Future = asyncio.Future()


@docs_group('Classes')
@docs_group('Autoscaling')
class AutoscaledPool:
"""Manages a pool of asynchronous resource-intensive tasks that are executed in parallel.

Expand Down
2 changes: 1 addition & 1 deletion src/crawlee/_autoscaling/snapshotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
T = TypeVar('T')


@docs_group('Classes')
@docs_group('Autoscaling')
class Snapshotter:
"""Monitors and logs system resource usage at predefined intervals for performance optimization.

Expand Down
2 changes: 1 addition & 1 deletion src/crawlee/_autoscaling/system_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
logger = getLogger(__name__)


@docs_group('Classes')
@docs_group('Autoscaling')
class SystemStatus:
"""Provides a simple interface for evaluating system resource usage from snapshots collected by `Snapshotter`.

Expand Down
2 changes: 1 addition & 1 deletion src/crawlee/_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ class RequestOptions(TypedDict):
no_retry: NotRequired[bool]


@docs_group('Data structures')
@docs_group('Storage data')
class Request(BaseModel):
"""Represents a request in the Crawlee framework, containing the necessary information for crawling operations.

Expand Down
2 changes: 1 addition & 1 deletion src/crawlee/_service_locator.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from crawlee.storages._storage_instance_manager import StorageInstanceManager


@docs_group('Classes')
@docs_group('Configuration')
class ServiceLocator:
"""Service locator for managing the services used by Crawlee.

Expand Down
8 changes: 4 additions & 4 deletions src/crawlee/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def _normalize_headers(headers: Mapping[str, str]) -> dict[str, str]:
return dict(sorted_headers)


@docs_group('Data structures')
@docs_group('Others')
class HttpHeaders(RootModel, Mapping[str, str]):
"""A dictionary-like object representing HTTP headers."""

Expand Down Expand Up @@ -103,7 +103,7 @@ def __len__(self) -> int:
return len(self.root)


@docs_group('Data structures')
@docs_group('Configuration')
class ConcurrencySettings:
"""Concurrency settings for AutoscaledPool."""

Expand Down Expand Up @@ -507,7 +507,7 @@ def __call__(
"""


@docs_group('Data structures')
@docs_group('Others')
@dataclasses.dataclass
class PageSnapshot:
"""Snapshot of a crawled page."""
Expand Down Expand Up @@ -547,7 +547,7 @@ def __call__(


@dataclass(frozen=True)
@docs_group('Data structures')
@docs_group('Crawling contexts')
class BasicCrawlingContext:
"""Basic crawling context.

Expand Down
21 changes: 20 additions & 1 deletion src/crawlee/_utils/docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,26 @@
from collections.abc import Callable
from typing import Any, Literal, TypeVar

GroupName = Literal['Classes', 'Abstract classes', 'Data structures', 'Event payloads', 'Errors', 'Functions']
GroupName = Literal[
'Autoscaling',
'Browser management',
'Configuration',
'Crawlers',
'Crawling contexts',
'Errors',
'Event data',
'Event managers',
'Functions',
'HTTP clients',
'HTTP parsers',
'Others',
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we use a singular (Other) here and also move it to the bottom of the class list?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated to "Other". However, the sequence defined in GroupName doesn't affect the order in the rendered page.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know, but I don't know where to change this to actually change the order 😁 Could you please give it a try?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Group sorting is implemented in the docusaurus-plugin-typedoc-api. @barjin will do the changes, and once that's done, we can bump the package version here.

'Request loaders',
'Session management',
'Statistics',
'Storage clients',
'Storage data',
'Storages',
]

T = TypeVar('T', bound=Callable[..., Any])

Expand Down
3 changes: 3 additions & 0 deletions src/crawlee/browsers/_browser_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Any

from crawlee._utils.docs import docs_group

if TYPE_CHECKING:
from collections.abc import Mapping
from datetime import datetime, timedelta
Expand All @@ -15,6 +17,7 @@
from crawlee.proxy_configuration import ProxyInfo


@docs_group('Browser management')
class BrowserController(ABC):
"""An abstract base class for managing browser instance and their pages."""

Expand Down
3 changes: 3 additions & 0 deletions src/crawlee/browsers/_browser_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Any

from crawlee._utils.docs import docs_group

if TYPE_CHECKING:
from collections.abc import Mapping
from types import TracebackType
Expand All @@ -13,6 +15,7 @@
from crawlee.browsers._types import BrowserType


@docs_group('Browser management')
class BrowserPlugin(ABC):
"""An abstract base class for browser plugins.

Expand Down
2 changes: 1 addition & 1 deletion src/crawlee/browsers/_browser_pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
logger = getLogger(__name__)


@docs_group('Classes')
@docs_group('Browser management')
class BrowserPool:
"""Manage a pool of browsers and pages, handling their lifecycle and resource allocation.

Expand Down
3 changes: 3 additions & 0 deletions src/crawlee/browsers/_playwright_browser.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,15 @@
from playwright.async_api import Browser
from typing_extensions import override

from crawlee._utils.docs import docs_group

if TYPE_CHECKING:
from playwright.async_api import BrowserContext, BrowserType, CDPSession, Page

logger = getLogger(__name__)


@docs_group('Browser management')
class PlaywrightPersistentBrowser(Browser):
"""A wrapper for Playwright's `Browser` that operates with a persistent context.

Expand Down
2 changes: 1 addition & 1 deletion src/crawlee/browsers/_playwright_browser_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
logger = getLogger(__name__)


@docs_group('Classes')
@docs_group('Browser management')
class PlaywrightBrowserController(BrowserController):
"""Controller for managing Playwright browser instances and their pages.

Expand Down
2 changes: 1 addition & 1 deletion src/crawlee/browsers/_playwright_browser_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
logger = getLogger(__name__)


@docs_group('Classes')
@docs_group('Browser management')
class PlaywrightBrowserPlugin(BrowserPlugin):
"""A plugin for managing Playwright automation library.

Expand Down
2 changes: 1 addition & 1 deletion src/crawlee/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
__all__ = ['Configuration']


@docs_group('Data structures')
@docs_group('Configuration')
class Configuration(BaseSettings):
"""Configuration settings for the Crawlee project.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
TStatisticsState = TypeVar('TStatisticsState', bound=StatisticsState, default=StatisticsState)


@docs_group('Abstract classes')
@docs_group('Crawlers')
class AbstractHttpCrawler(
Generic[TCrawlingContext, TParseResult, TSelectResult], BasicCrawler[TCrawlingContext, StatisticsState], ABC
):
Expand Down
6 changes: 3 additions & 3 deletions src/crawlee/crawlers/_abstract_http/_abstract_http_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@
from crawlee.http_clients import HttpResponse


@docs_group('Abstract classes')
@docs_group('HTTP parsers')
class AbstractHttpParser(Generic[TParseResult, TSelectResult], ABC):
"""Parser used for parsing http response and inspecting parsed result to find links or detect blocking."""
"""Parser used for parsing HTTP response and inspecting parsed result to find links or detect blocking."""

@abstractmethod
async def parse(self, response: HttpResponse) -> TParseResult:
"""Parse http response.
"""Parse HTTP response.

Args:
response: HTTP response to be parsed.
Expand Down
4 changes: 2 additions & 2 deletions src/crawlee/crawlers/_abstract_http/_http_crawling_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@


@dataclass(frozen=True)
@docs_group('Data structures')
@docs_group('Crawling contexts')
class HttpCrawlingContext(BasicCrawlingContext, HttpCrawlingResult):
"""The crawling context used by the `AbstractHttpCrawler`."""

Expand All @@ -30,7 +30,7 @@ async def get_snapshot(self) -> PageSnapshot:


@dataclass(frozen=True)
@docs_group('Data structures')
@docs_group('Crawling contexts')
class ParsedHttpCrawlingContext(Generic[TParseResult], HttpCrawlingContext):
"""The crawling context used by `AbstractHttpCrawler`.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ async def __aexit__(
self._active = False


@docs_group('Classes')
@docs_group('Crawlers')
class AdaptivePlaywrightCrawler(
Generic[TStaticCrawlingContext, TStaticParseResult, TStaticSelectResult],
BasicCrawler[AdaptivePlaywrightCrawlingContext, AdaptivePlaywrightCrawlerStatisticState],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from crawlee.statistics import StatisticsState


@docs_group('Data structures')
@docs_group('Statistics')
class AdaptivePlaywrightCrawlerStatisticState(StatisticsState):
"""Statistic data about a crawler run with additional information related to adaptive crawling."""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class AdaptiveContextError(RuntimeError):


@dataclass(frozen=True)
@docs_group('Data structures')
@docs_group('Crawling contexts')
class AdaptivePlaywrightCrawlingContext(
Generic[TStaticParseResult, TStaticSelectResult], ParsedHttpCrawlingContext[TStaticParseResult]
):
Expand Down Expand Up @@ -200,7 +200,7 @@ async def from_playwright_crawling_context(


@dataclass(frozen=True)
@docs_group('Data structures')
@docs_group('Crawling contexts')
class AdaptivePlaywrightPreNavCrawlingContext(BasicCrawlingContext):
"""A wrapper around BasicCrawlingContext or AdaptivePlaywrightCrawlingContext.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
FeatureVector = tuple[float, float]


@docs_group('Data structures')
@docs_group('Others')
@dataclass(frozen=True)
class RenderingTypePrediction:
"""Rendering type recommendation with detection probability recommendation."""
Expand All @@ -32,7 +32,7 @@ class RenderingTypePrediction:
One represents no confidence in `rendering_type` recommendation."""


@docs_group('Classes')
@docs_group('Others')
class RenderingTypePredictor(ABC):
"""Stores rendering type for previously crawled URLs and predicts the rendering type for unvisited urls."""

Expand All @@ -54,7 +54,7 @@ def store_result(self, request: Request, rendering_type: RenderingType) -> None:
"""


@docs_group('Classes')
@docs_group('Others')
class DefaultRenderingTypePredictor(RenderingTypePredictor):
"""Stores rendering type for previously crawled URLs and predicts the rendering type for unvisited urls.

Expand Down
3 changes: 1 addition & 2 deletions src/crawlee/crawlers/_basic/_basic_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,6 @@ class _BasicCrawlerOptionsGeneric(Generic[TCrawlingContext, TStatisticsState], T
"""A custom `Statistics` instance, allowing the use of non-default configuration."""


@docs_group('Data structures')
class BasicCrawlerOptions(
Generic[TCrawlingContext, TStatisticsState],
_BasicCrawlerOptions,
Expand All @@ -230,7 +229,7 @@ class BasicCrawlerOptions(
"""


@docs_group('Classes')
@docs_group('Crawlers')
class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
"""A basic web crawler providing a framework for crawling websites.

Expand Down
2 changes: 1 addition & 1 deletion src/crawlee/crawlers/_basic/_context_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ async def cleanup(self, final_consumer_exception: Exception | None) -> None:
raise RuntimeError('The middleware yielded more than once')


@docs_group('Classes')
@docs_group('Others')
class ContextPipeline(Generic[TCrawlingContext]):
"""Encapsulates the logic of gradually enhancing the crawling context with additional information and utilities.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from crawlee.crawlers._abstract_http import ParsedHttpCrawlingContext


@docs_group('Classes')
@docs_group('Crawlers')
class BeautifulSoupCrawler(AbstractHttpCrawler[BeautifulSoupCrawlingContext, BeautifulSoup, Tag]):
"""A web crawler for performing HTTP requests and parsing HTML/XML content.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@


@dataclass(frozen=True)
@docs_group('Data structures')
@docs_group('Crawling contexts')
class BeautifulSoupCrawlingContext(ParsedHttpCrawlingContext[BeautifulSoup]):
"""The crawling context used by the `BeautifulSoupCrawler`.

Expand Down
2 changes: 2 additions & 0 deletions src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from bs4 import BeautifulSoup, Tag
from typing_extensions import override

from crawlee._utils.docs import docs_group
from crawlee.crawlers._abstract_http import AbstractHttpParser

if TYPE_CHECKING:
Expand All @@ -13,6 +14,7 @@
from crawlee.http_clients import HttpResponse


@docs_group('HTTP parsers')
class BeautifulSoupParser(AbstractHttpParser[BeautifulSoup, Tag]):
"""Parser for parsing HTTP response using `BeautifulSoup`."""

Expand Down
2 changes: 1 addition & 1 deletion src/crawlee/crawlers/_http/_http_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from crawlee.crawlers import BasicCrawlerOptions


@docs_group('Classes')
@docs_group('Crawlers')
class HttpCrawler(AbstractHttpCrawler[ParsedHttpCrawlingContext[bytes], bytes, bytes]):
"""Specific version of generic `AbstractHttpCrawler`.

Expand Down
7 changes: 5 additions & 2 deletions src/crawlee/crawlers/_http/_http_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from typing_extensions import override

from crawlee._utils.docs import docs_group
from crawlee.crawlers._abstract_http import AbstractHttpParser
from crawlee.crawlers._types import BlockedInfo

Expand All @@ -13,10 +14,12 @@
from crawlee.http_clients import HttpResponse


@docs_group('HTTP parsers')
class NoParser(AbstractHttpParser[bytes, bytes]):
"""Dummy parser for backwards compatibility.
"""A no-op parser that returns raw response content without any processing.

To enable using `HttpCrawler` without need for additional specific parser.
This is useful when you only need the raw response data and don't require HTML
parsing, link extraction, or content selection functionality.
"""

@override
Expand Down
Loading
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy