From 307df23756f7737e5958ea44eee63b63adee94fe Mon Sep 17 00:00:00 2001 From: Josef Prochazka Date: Tue, 21 Apr 2026 16:57:39 +0200 Subject: [PATCH 1/7] WIP Working tests and implementation. TODO: -Check KVS and RQ special cases -Figure out model mocking in some elegant way --- src/apify_client/_iterable_list_page.py | 269 ++++++++ .../_resource_clients/actor_collection.py | 35 +- .../actor_env_var_collection.py | 32 +- .../actor_version_collection.py | 30 +- .../_resource_clients/build_collection.py | 35 +- src/apify_client/_resource_clients/dataset.py | 300 ++++---- .../_resource_clients/dataset_collection.py | 39 +- .../_resource_clients/key_value_store.py | 220 +++--- .../key_value_store_collection.py | 38 +- .../_resource_clients/request_queue.py | 121 +++- .../request_queue_collection.py | 39 +- .../_resource_clients/run_collection.py | 61 +- .../_resource_clients/schedule_collection.py | 35 +- .../_resource_clients/store_collection.py | 69 +- .../_resource_clients/task_collection.py | 35 +- .../_resource_clients/webhook_collection.py | 36 +- .../webhook_dispatch_collection.py | 38 +- tests/unit/test_client_pagination.py | 643 ++++++++++++++++++ 18 files changed, 1663 insertions(+), 412 deletions(-) create mode 100644 src/apify_client/_iterable_list_page.py create mode 100644 tests/unit/test_client_pagination.py diff --git a/src/apify_client/_iterable_list_page.py b/src/apify_client/_iterable_list_page.py new file mode 100644 index 00000000..ee65e572 --- /dev/null +++ b/src/apify_client/_iterable_list_page.py @@ -0,0 +1,269 @@ +from __future__ import annotations + +from collections.abc import AsyncIterable, AsyncIterator, Awaitable, Callable, Generator, Iterable, Iterator +from typing import Any, Generic, TypeVar + +from apify_client._docs import docs_group + +T = TypeVar('T') + + +def _min_for_limit_param(a: int | None, b: int | None) -> int | None: + """Return minimum of two limit parameters, treating `None` or `0` as infinity. + + The Apify API treats `0` as no limit for the `limit` parameter, so `0` here means infinity. + Returns `None` when both inputs represent infinity. + """ + if a == 0: + a = None + if b == 0: + b = None + if a is None: + return b + if b is None: + return a + return min(a, b) + + +@docs_group('Other') +class IterableListPage(Iterable[T], Generic[T]): + """A page of results that can also be iterated to yield items across subsequent pages. + + Accessing fields such as `items`, `count`, or `total` returns the metadata of the first page, + preserving the behavior of the previous non-iterable return value. Iterating with `for item in ...` + yields individual items and performs additional API calls as needed to fetch further pages. + """ + + items: list[T] + """List of items on this page.""" + + count: int + """Number of items on this page.""" + + offset: int + """The starting offset of this page.""" + + limit: int + """The maximum number of items per page requested from the API.""" + + total: int + """Total number of items matching the query, as reported by the first page.""" + + desc: bool + """Whether the items are sorted in descending order.""" + + def __init__(self, first_page: Any, iterator: Iterator[T]) -> None: + """Initialize a page wrapper from a Pydantic paginated model and an iterator over all items.""" + self.items = first_page.items + count = getattr(first_page, 'count', None) + self.count = count if count is not None else len(first_page.items) + self.offset = getattr(first_page, 'offset', 0) or 0 + self.limit = getattr(first_page, 'limit', 0) or 0 + self.total = getattr(first_page, 'total', None) or len(first_page.items) + self.desc = getattr(first_page, 'desc', False) or False + self._first_page = first_page + self._iterator = iterator + + def __iter__(self) -> Iterator[T]: + """Return an iterator over all items across pages, fetching additional pages as needed.""" + return self._iterator + + +@docs_group('Other') +class IterableListPageAsync(AsyncIterable[T], Generic[T]): + """An awaitable result that can also be asynchronously iterated to yield items across pages. + + Awaiting the instance (`await client.list(...)`) performs a single API call and returns a + populated `IterableListPage`. Iterating (`async for item in client.list(...)`) yields individual + items and performs additional API calls as needed to fetch further pages. + + A single instance supports either awaiting or iterating — not both. + """ + + def __init__( + self, + make_awaitable: Callable[[], Awaitable[IterableListPage[T]]], + async_iterator: AsyncIterator[T], + ) -> None: + """Initialize with a factory that creates the awaitable on demand and the async iterator over items.""" + self._make_awaitable = make_awaitable + self._async_iterator = async_iterator + + def __aiter__(self) -> AsyncIterator[T]: + """Return an asynchronous iterator over all items across pages.""" + return self._async_iterator + + def __await__(self) -> Generator[Any, Any, IterableListPage[T]]: + """Return an awaitable that resolves to an `IterableListPage` containing the first page.""" + return self._make_awaitable().__await__() + + +def build_iterable_list_page( + callback: Callable[..., Any], + **kwargs: Any, +) -> IterableListPage[Any]: + """Build an `IterableListPage` from a paginated sync callback. + + The callback is invoked once immediately to fetch the first page, and again lazily during + iteration to fetch further pages. The `total` field from the first page is not trusted for + stopping iteration because it may change between calls; iteration stops when a page has + no items or when the user-requested `limit` has been reached. + + Recognized kwargs: + chunk_size: Maximum number of items requested per API call during iteration. Pass `0` + or `None` to let the API decide (effectively infinity). + limit: User-requested total item limit. Stops iteration once this many items are yielded. + offset: Starting offset for the first page. + **other: Passed through to the callback unchanged. + """ + chunk_size = kwargs.pop('chunk_size', 0) or 0 + offset = kwargs.get('offset') or 0 + limit = kwargs.get('limit') or 0 + + first_page = callback(**{**kwargs, 'limit': _min_for_limit_param(kwargs.get('limit'), chunk_size)}) + + def iterator() -> Iterator[Any]: + current_page = first_page + yield from current_page.items + + fetched_items = len(current_page.items) + while current_page.items and (not limit or (limit > fetched_items)): + new_kwargs = { + **kwargs, + 'offset': offset + fetched_items, + 'limit': chunk_size if not limit else _min_for_limit_param(limit - fetched_items, chunk_size), + } + current_page = callback(**new_kwargs) + yield from current_page.items + fetched_items += len(current_page.items) + + return IterableListPage(first_page, iterator()) + + +def build_iterable_list_page_async( + callback: Callable[..., Awaitable[Any]], + **kwargs: Any, +) -> IterableListPageAsync[Any]: + """Build an `IterableListPageAsync` from a paginated async callback. + + Mirrors `build_iterable_list_page` but for async callbacks. The returned object is both + awaitable (resolves to the first page wrapped in `IterableListPage`) and asynchronously + iterable (yields items across pages). + """ + chunk_size = kwargs.pop('chunk_size', 0) or 0 + offset = kwargs.get('offset') or 0 + limit = kwargs.get('limit') or 0 + + async def fetch_first_page() -> Any: + return await callback(**{**kwargs, 'limit': _min_for_limit_param(kwargs.get('limit'), chunk_size)}) + + async def async_iterator() -> AsyncIterator[Any]: + current_page = await fetch_first_page() + for item in current_page.items: + yield item + + fetched_items = len(current_page.items) + while current_page.items and (not limit or (limit > fetched_items)): + new_kwargs = { + **kwargs, + 'offset': offset + fetched_items, + 'limit': chunk_size if not limit else _min_for_limit_param(limit - fetched_items, chunk_size), + } + current_page = await callback(**new_kwargs) + for item in current_page.items: + yield item + fetched_items += len(current_page.items) + + async def wrap_first_page() -> IterableListPage[Any]: + first_page = await fetch_first_page() + return IterableListPage(first_page, iter(first_page.items)) + + return IterableListPageAsync(wrap_first_page, async_iterator()) + + +def build_cursor_iterable_list_page( + callback: Callable[..., Any], + *, + cursor_param: str, + next_cursor_fn: Callable[[Any], Any], + initial_cursor: Any = None, + limit: int | None = None, + chunk_size: int | None = None, + **kwargs: Any, +) -> IterableListPage[Any]: + """Build an `IterableListPage` for endpoints that paginate with a cursor instead of an offset. + + The callback is invoked with `{cursor_param: cursor, 'limit': effective_limit, **kwargs}` for each + page, starting from `initial_cursor`. After each page, `next_cursor_fn(page)` is consulted to + obtain the next cursor; returning `None` ends iteration. The iteration also stops when a page is + empty or when the caller-requested `limit` has been reached. + """ + effective_chunk = chunk_size or 0 + user_limit = limit or 0 + + first_limit = _min_for_limit_param(limit, effective_chunk) + first_page = callback(**{**kwargs, cursor_param: initial_cursor, 'limit': first_limit}) + + def iterator() -> Iterator[Any]: + current_page = first_page + yield from current_page.items + + fetched = len(current_page.items) + next_cursor = next_cursor_fn(current_page) + + while current_page.items and next_cursor is not None and (not user_limit or user_limit > fetched): + remaining = (user_limit - fetched) if user_limit else 0 + next_limit = effective_chunk if not user_limit else _min_for_limit_param(remaining, effective_chunk) + current_page = callback(**{**kwargs, cursor_param: next_cursor, 'limit': next_limit}) + yield from current_page.items + fetched += len(current_page.items) + next_cursor = next_cursor_fn(current_page) + + return IterableListPage(first_page, iterator()) + + +def build_cursor_iterable_list_page_async( + callback: Callable[..., Awaitable[Any]], + *, + cursor_param: str, + next_cursor_fn: Callable[[Any], Any], + initial_cursor: Any = None, + limit: int | None = None, + chunk_size: int | None = None, + **kwargs: Any, +) -> IterableListPageAsync[Any]: + """Build an `IterableListPageAsync` for endpoints that paginate with a cursor instead of an offset. + + Mirrors `build_cursor_iterable_list_page` but for async callbacks. The returned object is both + awaitable (resolves to the first page wrapped in `IterableListPage`) and asynchronously iterable + (yields items across pages using the supplied cursor strategy). + """ + effective_chunk = chunk_size or 0 + user_limit = limit or 0 + first_limit = _min_for_limit_param(limit, effective_chunk) + + async def fetch_first_page() -> Any: + return await callback(**{**kwargs, cursor_param: initial_cursor, 'limit': first_limit}) + + async def async_iterator() -> AsyncIterator[Any]: + current_page = await fetch_first_page() + for item in current_page.items: + yield item + + fetched = len(current_page.items) + next_cursor = next_cursor_fn(current_page) + + while current_page.items and next_cursor is not None and (not user_limit or user_limit > fetched): + remaining = (user_limit - fetched) if user_limit else 0 + next_limit = effective_chunk if not user_limit else _min_for_limit_param(remaining, effective_chunk) + current_page = await callback(**{**kwargs, cursor_param: next_cursor, 'limit': next_limit}) + for item in current_page.items: + yield item + fetched += len(current_page.items) + next_cursor = next_cursor_fn(current_page) + + async def wrap_first_page() -> IterableListPage[Any]: + first_page = await fetch_first_page() + return IterableListPage(first_page, iter(first_page.items)) + + return IterableListPageAsync(wrap_first_page, async_iterator()) diff --git a/src/apify_client/_resource_clients/actor_collection.py b/src/apify_client/_resource_clients/actor_collection.py index 21f8b19b..8a29caf6 100644 --- a/src/apify_client/_resource_clients/actor_collection.py +++ b/src/apify_client/_resource_clients/actor_collection.py @@ -3,6 +3,12 @@ from typing import TYPE_CHECKING, Any, Literal from apify_client._docs import docs_group +from apify_client._iterable_list_page import ( + IterableListPage, + IterableListPageAsync, + build_iterable_list_page, + build_iterable_list_page_async, +) from apify_client._models import ( Actor, ActorResponse, @@ -19,6 +25,7 @@ if TYPE_CHECKING: from datetime import timedelta + from apify_client._models import ActorShort from apify_client._types import Timeout _SORT_BY_TO_API: dict[str, str] = { @@ -55,9 +62,12 @@ def list( desc: bool | None = None, sort_by: Literal['created_at', 'last_run_started_at'] | None = 'created_at', timeout: Timeout = 'medium', - ) -> ListOfActors: + ) -> IterableListPage[ActorShort]: """List the Actors the user has created or used. + The returned page also supports iteration: `for item in client.list(...)` yields individual Actors + and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/actors/actor-collection/get-list-of-actors Args: @@ -72,8 +82,12 @@ def list( The list of available Actors matching the specified filters. """ api_sort_by = _SORT_BY_TO_API[sort_by] if sort_by is not None else None - result = self._list(timeout=timeout, my=my, limit=limit, offset=offset, desc=desc, sortBy=api_sort_by) - return ListOfActorsResponse.model_validate(result).data + + def _callback(**kwargs: Any) -> ListOfActors: + result = self._list(timeout=timeout, my=my, sortBy=api_sort_by, **kwargs) + return ListOfActorsResponse.model_validate(result).data + + return build_iterable_list_page(_callback, limit=limit, offset=offset, desc=desc) def create( self, @@ -192,7 +206,7 @@ def __init__( **kwargs, ) - async def list( + def list( self, *, my: bool | None = None, @@ -201,9 +215,12 @@ async def list( desc: bool | None = None, sort_by: Literal['created_at', 'last_run_started_at'] | None = 'created_at', timeout: Timeout = 'medium', - ) -> ListOfActors: + ) -> IterableListPageAsync[ActorShort]: """List the Actors the user has created or used. + The returned page also supports iteration: `for item in client.list(...)` yields individual Actors + and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/actors/actor-collection/get-list-of-actors Args: @@ -218,8 +235,12 @@ async def list( The list of available Actors matching the specified filters. """ api_sort_by = _SORT_BY_TO_API[sort_by] if sort_by is not None else None - result = await self._list(timeout=timeout, my=my, limit=limit, offset=offset, desc=desc, sortBy=api_sort_by) - return ListOfActorsResponse.model_validate(result).data + + async def _callback(**kwargs: Any) -> ListOfActors: + result = await self._list(timeout=timeout, my=my, sortBy=api_sort_by, **kwargs) + return ListOfActorsResponse.model_validate(result).data + + return build_iterable_list_page_async(_callback, limit=limit, offset=offset, desc=desc) async def create( self, diff --git a/src/apify_client/_resource_clients/actor_env_var_collection.py b/src/apify_client/_resource_clients/actor_env_var_collection.py index 788745b4..b8690409 100644 --- a/src/apify_client/_resource_clients/actor_env_var_collection.py +++ b/src/apify_client/_resource_clients/actor_env_var_collection.py @@ -3,6 +3,12 @@ from typing import TYPE_CHECKING, Any from apify_client._docs import docs_group +from apify_client._iterable_list_page import ( + IterableListPage, + IterableListPageAsync, + build_iterable_list_page, + build_iterable_list_page_async, +) from apify_client._models import EnvVar, EnvVarResponse, ListOfEnvVars, ListOfEnvVarsResponse from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync @@ -29,9 +35,12 @@ def __init__( **kwargs, ) - def list(self, *, timeout: Timeout = 'short') -> ListOfEnvVars: + def list(self, *, timeout: Timeout = 'short') -> IterableListPage[EnvVar]: """List the available Actor environment variables. + The returned page also supports iteration: `for item in client.list()` yields individual environment + variables. + https://docs.apify.com/api/v2#/reference/actors/environment-variable-collection/get-list-of-environment-variables Args: @@ -40,8 +49,12 @@ def list(self, *, timeout: Timeout = 'short') -> ListOfEnvVars: Returns: The list of available Actor environment variables. """ - result = self._list(timeout=timeout) - return ListOfEnvVarsResponse.model_validate(result).data + + def _callback(**kwargs: Any) -> ListOfEnvVars: + result = self._list(timeout=timeout, **kwargs) + return ListOfEnvVarsResponse.model_validate(result).data + + return build_iterable_list_page(_callback) def create( self, @@ -90,9 +103,12 @@ def __init__( **kwargs, ) - async def list(self, *, timeout: Timeout = 'short') -> ListOfEnvVars: + def list(self, *, timeout: Timeout = 'short') -> IterableListPageAsync[EnvVar]: """List the available Actor environment variables. + The returned page also supports iteration: `for item in client.list()` yields individual environment + variables. + https://docs.apify.com/api/v2#/reference/actors/environment-variable-collection/get-list-of-environment-variables Args: @@ -101,8 +117,12 @@ async def list(self, *, timeout: Timeout = 'short') -> ListOfEnvVars: Returns: The list of available Actor environment variables. """ - result = await self._list(timeout=timeout) - return ListOfEnvVarsResponse.model_validate(result).data + + async def _callback(**kwargs: Any) -> ListOfEnvVars: + result = await self._list(timeout=timeout, **kwargs) + return ListOfEnvVarsResponse.model_validate(result).data + + return build_iterable_list_page_async(_callback) async def create( self, diff --git a/src/apify_client/_resource_clients/actor_version_collection.py b/src/apify_client/_resource_clients/actor_version_collection.py index a6239f26..32a1df5d 100644 --- a/src/apify_client/_resource_clients/actor_version_collection.py +++ b/src/apify_client/_resource_clients/actor_version_collection.py @@ -5,6 +5,12 @@ from pydantic import TypeAdapter from apify_client._docs import docs_group +from apify_client._iterable_list_page import ( + IterableListPage, + IterableListPageAsync, + build_iterable_list_page, + build_iterable_list_page_async, +) from apify_client._models import ( CreateOrUpdateVersionRequest, EnvVarRequest, @@ -44,9 +50,11 @@ def __init__( **kwargs, ) - def list(self, *, timeout: Timeout = 'short') -> ListOfVersions: + def list(self, *, timeout: Timeout = 'short') -> IterableListPage[Version]: """List the available Actor versions. + The returned page also supports iteration: `for item in client.list()` yields individual versions. + https://docs.apify.com/api/v2#/reference/actors/version-collection/get-list-of-versions Args: @@ -55,8 +63,12 @@ def list(self, *, timeout: Timeout = 'short') -> ListOfVersions: Returns: The list of available Actor versions. """ - result = self._list(timeout=timeout) - return ListOfVersionsResponse.model_validate(result).data + + def _callback(**kwargs: Any) -> ListOfVersions: + result = self._list(timeout=timeout, **kwargs) + return ListOfVersionsResponse.model_validate(result).data + + return build_iterable_list_page(_callback) def create( self, @@ -131,9 +143,11 @@ def __init__( **kwargs, ) - async def list(self, *, timeout: Timeout = 'short') -> ListOfVersions: + def list(self, *, timeout: Timeout = 'short') -> IterableListPageAsync[Version]: """List the available Actor versions. + The returned page also supports iteration: `for item in client.list()` yields individual versions. + https://docs.apify.com/api/v2#/reference/actors/version-collection/get-list-of-versions Args: @@ -142,8 +156,12 @@ async def list(self, *, timeout: Timeout = 'short') -> ListOfVersions: Returns: The list of available Actor versions. """ - result = await self._list(timeout=timeout) - return ListOfVersionsResponse.model_validate(result).data + + async def _callback(**kwargs: Any) -> ListOfVersions: + result = await self._list(timeout=timeout, **kwargs) + return ListOfVersionsResponse.model_validate(result).data + + return build_iterable_list_page_async(_callback) async def create( self, diff --git a/src/apify_client/_resource_clients/build_collection.py b/src/apify_client/_resource_clients/build_collection.py index a55ee6c2..d81cc377 100644 --- a/src/apify_client/_resource_clients/build_collection.py +++ b/src/apify_client/_resource_clients/build_collection.py @@ -3,10 +3,17 @@ from typing import TYPE_CHECKING, Any from apify_client._docs import docs_group +from apify_client._iterable_list_page import ( + IterableListPage, + IterableListPageAsync, + build_iterable_list_page, + build_iterable_list_page_async, +) from apify_client._models import ListOfBuilds, ListOfBuildsResponse from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync if TYPE_CHECKING: + from apify_client._models import BuildShort from apify_client._types import Timeout @@ -36,12 +43,15 @@ def list( offset: int | None = None, desc: bool | None = None, timeout: Timeout = 'medium', - ) -> ListOfBuilds: + ) -> IterableListPage[BuildShort]: """List all Actor builds. List all Actor builds, either of a single Actor, or all user's Actors, depending on where this client was initialized from. + The returned page also supports iteration: `for item in client.list(...)` yields individual builds + and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/actors/build-collection/get-list-of-builds https://docs.apify.com/api/v2#/reference/actor-builds/build-collection/get-user-builds-list @@ -54,8 +64,12 @@ def list( Returns: The retrieved Actor builds. """ - result = self._list(timeout=timeout, limit=limit, offset=offset, desc=desc) - return ListOfBuildsResponse.model_validate(result).data + + def _callback(**kwargs: Any) -> ListOfBuilds: + result = self._list(timeout=timeout, **kwargs) + return ListOfBuildsResponse.model_validate(result).data + + return build_iterable_list_page(_callback, limit=limit, offset=offset, desc=desc) @docs_group('Resource clients') @@ -77,19 +91,22 @@ def __init__( **kwargs, ) - async def list( + def list( self, *, limit: int | None = None, offset: int | None = None, desc: bool | None = None, timeout: Timeout = 'medium', - ) -> ListOfBuilds: + ) -> IterableListPageAsync[BuildShort]: """List all Actor builds. List all Actor builds, either of a single Actor, or all user's Actors, depending on where this client was initialized from. + The returned page also supports iteration: `for item in client.list(...)` yields individual builds + and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/actors/build-collection/get-list-of-builds https://docs.apify.com/api/v2#/reference/actor-builds/build-collection/get-user-builds-list @@ -102,5 +119,9 @@ async def list( Returns: The retrieved Actor builds. """ - result = await self._list(timeout=timeout, limit=limit, offset=offset, desc=desc) - return ListOfBuildsResponse.model_validate(result).data + + async def _callback(**kwargs: Any) -> ListOfBuilds: + result = await self._list(timeout=timeout, **kwargs) + return ListOfBuildsResponse.model_validate(result).data + + return build_iterable_list_page_async(_callback, limit=limit, offset=offset, desc=desc) diff --git a/src/apify_client/_resource_clients/dataset.py b/src/apify_client/_resource_clients/dataset.py index f4e0e204..065c6c2b 100644 --- a/src/apify_client/_resource_clients/dataset.py +++ b/src/apify_client/_resource_clients/dataset.py @@ -2,11 +2,18 @@ import warnings from contextlib import asynccontextmanager, contextmanager -from dataclasses import dataclass from typing import TYPE_CHECKING, Any from urllib.parse import urlencode, urlparse, urlunparse +from pydantic import BaseModel + from apify_client._docs import docs_group +from apify_client._iterable_list_page import ( + IterableListPage, + IterableListPageAsync, + build_iterable_list_page, + build_iterable_list_page_async, +) from apify_client._models import Dataset, DatasetResponse, DatasetStatistics, DatasetStatisticsResponse from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync from apify_client._utils import ( @@ -27,8 +34,7 @@ @docs_group('Other') -@dataclass -class DatasetItemsPage: +class DatasetItemsPage(BaseModel): """A page of dataset items returned by the `list_items` method. Dataset items are arbitrary JSON objects stored in the dataset, so they cannot be @@ -143,10 +149,14 @@ def list_items( flatten: list[str] | None = None, view: str | None = None, signature: str | None = None, + chunk_size: int | None = None, timeout: Timeout = 'long', - ) -> DatasetItemsPage: + ) -> IterableListPage[DatasetItemsPage]: """List the items of the dataset. + The returned page also supports iteration: `for item in client.list_items(...)` yields individual + items and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/datasets/item-collection/get-items Args: @@ -176,46 +186,56 @@ def list_items( flatten: A list of fields that should be flattened. view: Name of the dataset view to be used. signature: Signature used to access the items. + chunk_size: Maximum number of items requested per API call when iterating. Only relevant when + iterating across pages. timeout: Timeout for the API HTTP request. Returns: A page of the list of dataset items according to the specified filters. """ - request_params = self._build_params( - offset=offset, - limit=limit, - desc=desc, - clean=clean, - fields=fields, - omit=omit, - unwind=unwind, - skipEmpty=skip_empty, - skipHidden=skip_hidden, - flatten=flatten, - view=view, - signature=signature, - ) - response = self._http_client.call( - url=self._build_url('items'), - method='GET', - params=request_params, - timeout=timeout, - ) + def _fetch_page( + *, + offset: int | None = None, + limit: int | None = None, + ) -> DatasetItemsPage: + request_params = self._build_params( + offset=offset, + limit=limit, + desc=desc, + clean=clean, + fields=fields, + omit=omit, + unwind=unwind, + skipEmpty=skip_empty, + skipHidden=skip_hidden, + flatten=flatten, + view=view, + signature=signature, + ) - # When using signature, API returns items as list directly - items = response_to_list(response) - - return DatasetItemsPage( - items=items, - total=int(response.headers['x-apify-pagination-total']), - offset=int(response.headers['x-apify-pagination-offset']), - # x-apify-pagination-count returns invalid values when hidden/empty items are skipped - count=len(items), - # API returns 999999999999 when no limit is used - limit=int(response.headers['x-apify-pagination-limit']), - desc=response.headers['x-apify-pagination-desc'].lower() == 'true', - ) + response = self._http_client.call( + url=self._build_url('items'), + method='GET', + params=request_params, + timeout=timeout, + ) + + # When using signature, API returns items as list directly + items = response_to_list(response) + + return DatasetItemsPage( + items=items, + total=int(response.headers['x-apify-pagination-total']), + offset=int(response.headers['x-apify-pagination-offset']), + # x-apify-pagination-count returns invalid values when hidden/empty items are skipped + count=len(items), + # API returns 999999999999 when no limit is used + limit=int(response.headers['x-apify-pagination-limit']), + desc=response.headers['x-apify-pagination-desc'].lower() == 'true', + ) + + return build_iterable_list_page(_fetch_page, offset=offset, limit=limit, chunk_size=chunk_size) def iterate_items( self, @@ -231,9 +251,11 @@ def iterate_items( skip_hidden: bool | None = None, signature: str | None = None, timeout: Timeout = 'long', - ) -> Iterator[dict]: + ) -> Iterator[DatasetItemsPage]: """Iterate over the items in the dataset. + Deprecated: iterate the return value of `DatasetClient.list_items()` instead. + https://docs.apify.com/api/v2#/reference/datasets/item-collection/get-items Args: @@ -266,42 +288,26 @@ def iterate_items( Yields: An item from the dataset. """ - cache_size = 1000 - - should_finish = False - read_items = 0 - - # We can't rely on DatasetItemsPage.total because that is updated with a delay, - # so if you try to read the dataset items right after a run finishes, you could miss some. - # Instead, we just read and read until we reach the limit, or until there are no more items to read. - while not should_finish: - effective_limit = cache_size - if limit is not None: - if read_items == limit: - break - effective_limit = min(cache_size, limit - read_items) - - current_items_page = self.list_items( - offset=offset + read_items, - limit=effective_limit, - clean=clean, - desc=desc, - fields=fields, - omit=omit, - unwind=unwind, - skip_empty=skip_empty, - skip_hidden=skip_hidden, - signature=signature, - timeout=timeout, - ) - - yield from current_items_page.items - - current_page_item_count = len(current_items_page.items) - read_items += current_page_item_count - - if current_page_item_count < cache_size: - should_finish = True + warnings.warn( + '`DatasetClient.iterate_items()` is deprecated, iterate the return value of ' + '`DatasetClient.list_items()` instead.', + DeprecationWarning, + stacklevel=2, + ) + yield from self.list_items( + offset=offset, + limit=limit, + clean=clean, + desc=desc, + fields=fields, + omit=omit, + unwind=unwind, + skip_empty=skip_empty, + skip_hidden=skip_hidden, + signature=signature, + chunk_size=1000, + timeout=timeout, + ) def download_items( self, @@ -805,7 +811,7 @@ async def delete(self, *, timeout: Timeout = 'short') -> None: """ await self._delete(timeout=timeout) - async def list_items( + def list_items( self, *, offset: int | None = None, @@ -820,10 +826,14 @@ async def list_items( flatten: list[str] | None = None, view: str | None = None, signature: str | None = None, + chunk_size: int | None = None, timeout: Timeout = 'long', - ) -> DatasetItemsPage: + ) -> IterableListPageAsync[DatasetItemsPage]: """List the items of the dataset. + The returned page also supports iteration: `for item in client.list_items(...)` yields individual + items and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/datasets/item-collection/get-items Args: @@ -853,46 +863,56 @@ async def list_items( flatten: A list of fields that should be flattened. view: Name of the dataset view to be used. signature: Signature used to access the items. + chunk_size: Maximum number of items requested per API call when iterating. Only relevant when + iterating across pages. timeout: Timeout for the API HTTP request. Returns: A page of the list of dataset items according to the specified filters. """ - request_params = self._build_params( - offset=offset, - limit=limit, - desc=desc, - clean=clean, - fields=fields, - omit=omit, - unwind=unwind, - skipEmpty=skip_empty, - skipHidden=skip_hidden, - flatten=flatten, - view=view, - signature=signature, - ) - response = await self._http_client.call( - url=self._build_url('items'), - method='GET', - params=request_params, - timeout=timeout, - ) + async def _fetch_page( + *, + offset: int | None = None, + limit: int | None = None, + ) -> DatasetItemsPage: + request_params = self._build_params( + offset=offset, + limit=limit, + desc=desc, + clean=clean, + fields=fields, + omit=omit, + unwind=unwind, + skipEmpty=skip_empty, + skipHidden=skip_hidden, + flatten=flatten, + view=view, + signature=signature, + ) - # When using signature, API returns items as list directly - items = response_to_list(response) - - return DatasetItemsPage( - items=items, - total=int(response.headers['x-apify-pagination-total']), - offset=int(response.headers['x-apify-pagination-offset']), - # x-apify-pagination-count returns invalid values when hidden/empty items are skipped - count=len(items), - # API returns 999999999999 when no limit is used - limit=int(response.headers['x-apify-pagination-limit']), - desc=response.headers['x-apify-pagination-desc'].lower() == 'true', - ) + response = await self._http_client.call( + url=self._build_url('items'), + method='GET', + params=request_params, + timeout=timeout, + ) + + # When using signature, API returns items as list directly + items = response_to_list(response) + + return DatasetItemsPage( + items=items, + total=int(response.headers['x-apify-pagination-total']), + offset=int(response.headers['x-apify-pagination-offset']), + # x-apify-pagination-count returns invalid values when hidden/empty items are skipped + count=len(items), + # API returns 999999999999 when no limit is used + limit=int(response.headers['x-apify-pagination-limit']), + desc=response.headers['x-apify-pagination-desc'].lower() == 'true', + ) + + return build_iterable_list_page_async(_fetch_page, offset=offset, limit=limit, chunk_size=chunk_size) async def iterate_items( self, @@ -908,9 +928,11 @@ async def iterate_items( skip_hidden: bool | None = None, signature: str | None = None, timeout: Timeout = 'long', - ) -> AsyncIterator[dict]: + ) -> AsyncIterator[DatasetItemsPage]: """Iterate over the items in the dataset. + Deprecated: iterate the return value of `DatasetClientAsync.list_items()` instead. + https://docs.apify.com/api/v2#/reference/datasets/item-collection/get-items Args: @@ -943,43 +965,27 @@ async def iterate_items( Yields: An item from the dataset. """ - cache_size = 1000 - - should_finish = False - read_items = 0 - - # We can't rely on DatasetItemsPage.total because that is updated with a delay, - # so if you try to read the dataset items right after a run finishes, you could miss some. - # Instead, we just read and read until we reach the limit, or until there are no more items to read. - while not should_finish: - effective_limit = cache_size - if limit is not None: - if read_items == limit: - break - effective_limit = min(cache_size, limit - read_items) - - current_items_page = await self.list_items( - offset=offset + read_items, - limit=effective_limit, - clean=clean, - desc=desc, - fields=fields, - omit=omit, - unwind=unwind, - skip_empty=skip_empty, - skip_hidden=skip_hidden, - signature=signature, - timeout=timeout, - ) - - for item in current_items_page.items: - yield item - - current_page_item_count = len(current_items_page.items) - read_items += current_page_item_count - - if current_page_item_count < cache_size: - should_finish = True + warnings.warn( + '`DatasetClientAsync.iterate_items()` is deprecated, iterate the return value of ' + '`DatasetClientAsync.list_items()` instead.', + DeprecationWarning, + stacklevel=2, + ) + async for item in self.list_items( + offset=offset, + limit=limit, + clean=clean, + desc=desc, + fields=fields, + omit=omit, + unwind=unwind, + skip_empty=skip_empty, + skip_hidden=skip_hidden, + signature=signature, + chunk_size=1000, + timeout=timeout, + ): + yield item async def get_items_as_bytes( self, diff --git a/src/apify_client/_resource_clients/dataset_collection.py b/src/apify_client/_resource_clients/dataset_collection.py index 698fa31e..66af9545 100644 --- a/src/apify_client/_resource_clients/dataset_collection.py +++ b/src/apify_client/_resource_clients/dataset_collection.py @@ -3,10 +3,17 @@ from typing import TYPE_CHECKING, Any from apify_client._docs import docs_group +from apify_client._iterable_list_page import ( + IterableListPage, + IterableListPageAsync, + build_iterable_list_page, + build_iterable_list_page_async, +) from apify_client._models import Dataset, DatasetResponse, ListOfDatasets, ListOfDatasetsResponse, StorageOwnership from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync if TYPE_CHECKING: + from apify_client._models import DatasetListItem from apify_client._types import Timeout @@ -38,9 +45,12 @@ def list( desc: bool | None = None, ownership: StorageOwnership | None = None, timeout: Timeout = 'medium', - ) -> ListOfDatasets: + ) -> IterableListPage[DatasetListItem]: """List the available datasets. + The returned page also supports iteration: `for item in client.list(...)` yields individual datasets + and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/datasets/dataset-collection/get-list-of-datasets Args: @@ -55,10 +65,12 @@ def list( Returns: The list of available datasets matching the specified filters. """ - result = self._list( - timeout=timeout, unnamed=unnamed, limit=limit, offset=offset, desc=desc, ownership=ownership - ) - return ListOfDatasetsResponse.model_validate(result).data + + def _callback(**kwargs: Any) -> ListOfDatasets: + result = self._list(timeout=timeout, unnamed=unnamed, ownership=ownership, **kwargs) + return ListOfDatasetsResponse.model_validate(result).data + + return build_iterable_list_page(_callback, limit=limit, offset=offset, desc=desc) def get_or_create( self, @@ -102,7 +114,7 @@ def __init__( **kwargs, ) - async def list( + def list( self, *, unnamed: bool | None = None, @@ -111,9 +123,12 @@ async def list( desc: bool | None = None, ownership: StorageOwnership | None = None, timeout: Timeout = 'medium', - ) -> ListOfDatasets: + ) -> IterableListPageAsync[DatasetListItem]: """List the available datasets. + The returned page also supports iteration: `for item in client.list(...)` yields individual datasets + and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/datasets/dataset-collection/get-list-of-datasets Args: @@ -128,10 +143,12 @@ async def list( Returns: The list of available datasets matching the specified filters. """ - result = await self._list( - timeout=timeout, unnamed=unnamed, limit=limit, offset=offset, desc=desc, ownership=ownership - ) - return ListOfDatasetsResponse.model_validate(result).data + + async def _callback(**kwargs: Any) -> ListOfDatasets: + result = await self._list(timeout=timeout, unnamed=unnamed, ownership=ownership, **kwargs) + return ListOfDatasetsResponse.model_validate(result).data + + return build_iterable_list_page_async(_callback, limit=limit, offset=offset, desc=desc) async def get_or_create( self, diff --git a/src/apify_client/_resource_clients/key_value_store.py b/src/apify_client/_resource_clients/key_value_store.py index 144af733..febd9395 100644 --- a/src/apify_client/_resource_clients/key_value_store.py +++ b/src/apify_client/_resource_clients/key_value_store.py @@ -1,12 +1,19 @@ from __future__ import annotations import re +import warnings from contextlib import asynccontextmanager, contextmanager from http import HTTPStatus from typing import TYPE_CHECKING, Any from urllib.parse import urlencode, urlparse, urlunparse from apify_client._docs import docs_group +from apify_client._iterable_list_page import ( + IterableListPage, + IterableListPageAsync, + build_cursor_iterable_list_page, + build_cursor_iterable_list_page_async, +) from apify_client._models import ( KeyValueStore, KeyValueStoreKey, @@ -33,6 +40,13 @@ from apify_client._types import Timeout +def _kvs_next_cursor(page: ListOfKeys) -> str | None: + """Return the next cursor for KVS key pagination, or `None` when there are no more pages.""" + if not page.is_truncated: + return None + return page.next_exclusive_start_key + + def _parse_get_record_response(response: HttpResponse) -> Any: """Parse an HTTP response based on its content type. @@ -144,10 +158,14 @@ def list_keys( collection: str | None = None, prefix: str | None = None, signature: str | None = None, + chunk_size: int | None = None, timeout: Timeout = 'medium', - ) -> ListOfKeys: + ) -> IterableListPage[KeyValueStoreKey]: """List the keys in the key-value store. + The returned page also supports iteration: `for key in client.list_keys(...)` yields individual + keys and transparently fetches further pages using cursor-based pagination. + https://docs.apify.com/api/v2#/reference/key-value-stores/key-collection/get-list-of-keys Args: @@ -156,80 +174,85 @@ def list_keys( collection: The name of the collection in store schema to list keys from. prefix: The prefix of the keys to be listed. signature: Signature used to access the items. + chunk_size: Maximum number of keys requested per API call when iterating. Only relevant + when iterating across pages. timeout: Timeout for the API HTTP request. Returns: The list of keys in the key-value store matching the given arguments. """ - request_params = self._build_params( - limit=limit, - exclusiveStartKey=exclusive_start_key, - collection=collection, - prefix=prefix, - signature=signature, - ) - response = self._http_client.call( - url=self._build_url('keys'), - method='GET', - params=request_params, - timeout=timeout, + def _callback(*, limit: int | None = None, exclusive_start_key: str | None = None) -> ListOfKeys: + request_params = self._build_params( + limit=limit, + exclusiveStartKey=exclusive_start_key, + collection=collection, + prefix=prefix, + signature=signature, + ) + response = self._http_client.call( + url=self._build_url('keys'), + method='GET', + params=request_params, + timeout=timeout, + ) + result = response_to_dict(response) + return ListOfKeysResponse.model_validate(result).data + + return build_cursor_iterable_list_page( + _callback, + cursor_param='exclusive_start_key', + next_cursor_fn=_kvs_next_cursor, + initial_cursor=exclusive_start_key, + limit=limit, + chunk_size=chunk_size, ) - result = response_to_dict(response) - return ListOfKeysResponse.model_validate(result).data - def iterate_keys( self, *, limit: int | None = None, + exclusive_start_key: str | None = None, collection: str | None = None, prefix: str | None = None, signature: str | None = None, - timeout: Timeout = 'long', + chunk_size: int | None = 1000, + timeout: Timeout = 'medium', ) -> Iterator[KeyValueStoreKey]: """Iterate over the keys in the key-value store. + Deprecated: iterate the return value of `KeyValueStoreClient.list_keys()` instead. + https://docs.apify.com/api/v2#/reference/key-value-stores/key-collection/get-list-of-keys Args: - limit: Maximum number of keys to return. By default there is no limit. + limit: Number of keys to be returned. Maximum value is 1000. + exclusive_start_key: All keys up to this one (including) are skipped from the result. collection: The name of the collection in store schema to list keys from. prefix: The prefix of the keys to be listed. signature: Signature used to access the items. + chunk_size: Maximum number of keys requested per API call when iterating. Only relevant + when iterating across pages. timeout: Timeout for the API HTTP request. Yields: A key from the key-value store. """ - cache_size = 1000 - read_keys = 0 - exclusive_start_key: str | None = None - - while True: - effective_limit = cache_size - if limit is not None: - if read_keys == limit: - break - effective_limit = min(cache_size, limit - read_keys) - - current_keys_page = self.list_keys( - limit=effective_limit, - exclusive_start_key=exclusive_start_key, - collection=collection, - prefix=prefix, - signature=signature, - timeout=timeout, - ) - - yield from current_keys_page.items - - read_keys += len(current_keys_page.items) - - if not current_keys_page.is_truncated: - break - - exclusive_start_key = current_keys_page.next_exclusive_start_key + warnings.warn( + '`KeyValueStoreClient.iterate_keys()` is deprecated, iterate the return value of ' + '`KeyValueStoreClient.list_keys()` instead.', + DeprecationWarning, + stacklevel=2, + ) + yield from self.list_keys( + limit=limit, + exclusive_start_key=exclusive_start_key, + collection=collection, + prefix=prefix, + signature=signature, + chunk_size=chunk_size, + timeout=timeout, + ) def get_record(self, key: str, signature: str | None = None, *, timeout: Timeout = 'long') -> dict | None: """Retrieve the given record from the key-value store. @@ -566,7 +589,7 @@ async def delete(self, *, timeout: Timeout = 'short') -> None: """ await self._delete(timeout=timeout) - async def list_keys( + def list_keys( self, *, limit: int | None = None, @@ -574,10 +597,14 @@ async def list_keys( collection: str | None = None, prefix: str | None = None, signature: str | None = None, + chunk_size: int | None = None, timeout: Timeout = 'medium', - ) -> ListOfKeys: + ) -> IterableListPageAsync[KeyValueStoreKey]: """List the keys in the key-value store. + The returned page also supports iteration: `for key in client.list_keys(...)` yields individual + keys and transparently fetches further pages using cursor-based pagination. + https://docs.apify.com/api/v2#/reference/key-value-stores/key-collection/get-list-of-keys Args: @@ -586,81 +613,86 @@ async def list_keys( collection: The name of the collection in store schema to list keys from. prefix: The prefix of the keys to be listed. signature: Signature used to access the items. + chunk_size: Maximum number of keys requested per API call when iterating. Only relevant + when iterating across pages. timeout: Timeout for the API HTTP request. Returns: The list of keys in the key-value store matching the given arguments. """ - request_params = self._build_params( - limit=limit, - exclusiveStartKey=exclusive_start_key, - collection=collection, - prefix=prefix, - signature=signature, - ) - response = await self._http_client.call( - url=self._build_url('keys'), - method='GET', - params=request_params, - timeout=timeout, + async def _callback(*, limit: int | None = None, exclusive_start_key: str | None = None) -> ListOfKeys: + request_params = self._build_params( + limit=limit, + exclusiveStartKey=exclusive_start_key, + collection=collection, + prefix=prefix, + signature=signature, + ) + response = await self._http_client.call( + url=self._build_url('keys'), + method='GET', + params=request_params, + timeout=timeout, + ) + result = response_to_dict(response) + return ListOfKeysResponse.model_validate(result).data + + return build_cursor_iterable_list_page_async( + _callback, + cursor_param='exclusive_start_key', + next_cursor_fn=_kvs_next_cursor, + initial_cursor=exclusive_start_key, + limit=limit, + chunk_size=chunk_size, ) - result = response_to_dict(response) - return ListOfKeysResponse.model_validate(result).data - async def iterate_keys( self, *, limit: int | None = None, + exclusive_start_key: str | None = None, collection: str | None = None, prefix: str | None = None, signature: str | None = None, - timeout: Timeout = 'long', + chunk_size: int | None = 1000, + timeout: Timeout = 'medium', ) -> AsyncIterator[KeyValueStoreKey]: """Iterate over the keys in the key-value store. + Deprecated: iterate the return value of `KeyValueStoreClientAsync.list_keys()` instead. + https://docs.apify.com/api/v2#/reference/key-value-stores/key-collection/get-list-of-keys Args: - limit: Maximum number of keys to return. By default there is no limit. + limit: Number of keys to be returned. Maximum value is 1000. + exclusive_start_key: All keys up to this one (including) are skipped from the result. collection: The name of the collection in store schema to list keys from. prefix: The prefix of the keys to be listed. signature: Signature used to access the items. + chunk_size: Maximum number of keys requested per API call when iterating. Only relevant + when iterating across pages. timeout: Timeout for the API HTTP request. Yields: A key from the key-value store. """ - cache_size = 1000 - read_keys = 0 - exclusive_start_key: str | None = None - - while True: - effective_limit = cache_size - if limit is not None: - if read_keys == limit: - break - effective_limit = min(cache_size, limit - read_keys) - - current_keys_page = await self.list_keys( - limit=effective_limit, - exclusive_start_key=exclusive_start_key, - collection=collection, - prefix=prefix, - signature=signature, - timeout=timeout, - ) - - for key in current_keys_page.items: - yield key - - read_keys += len(current_keys_page.items) - - if not current_keys_page.is_truncated: - break - - exclusive_start_key = current_keys_page.next_exclusive_start_key + warnings.warn( + '`KeyValueStoreClientAsync.iterate_keys()` is deprecated, iterate the return value of ' + '`KeyValueStoreClientAsync.list_keys()` instead.', + DeprecationWarning, + stacklevel=2, + ) + async for key in self.list_keys( + limit=limit, + exclusive_start_key=exclusive_start_key, + collection=collection, + prefix=prefix, + signature=signature, + chunk_size=chunk_size, + timeout=timeout, + ): + yield key async def get_record(self, key: str, signature: str | None = None, *, timeout: Timeout = 'long') -> dict | None: """Retrieve the given record from the key-value store. diff --git a/src/apify_client/_resource_clients/key_value_store_collection.py b/src/apify_client/_resource_clients/key_value_store_collection.py index 0b792360..6391a67b 100644 --- a/src/apify_client/_resource_clients/key_value_store_collection.py +++ b/src/apify_client/_resource_clients/key_value_store_collection.py @@ -3,6 +3,12 @@ from typing import TYPE_CHECKING, Any from apify_client._docs import docs_group +from apify_client._iterable_list_page import ( + IterableListPage, + IterableListPageAsync, + build_iterable_list_page, + build_iterable_list_page_async, +) from apify_client._models import ( KeyValueStore, KeyValueStoreResponse, @@ -44,9 +50,12 @@ def list( desc: bool | None = None, ownership: StorageOwnership | None = None, timeout: Timeout = 'medium', - ) -> ListOfKeyValueStores: + ) -> IterableListPage[KeyValueStore]: """List the available key-value stores. + The returned page also supports iteration: `for item in client.list(...)` yields individual + key-value stores and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/key-value-stores/store-collection/get-list-of-key-value-stores Args: @@ -61,10 +70,12 @@ def list( Returns: The list of available key-value stores matching the specified filters. """ - result = self._list( - timeout=timeout, unnamed=unnamed, limit=limit, offset=offset, desc=desc, ownership=ownership - ) - return ListOfKeyValueStoresResponse.model_validate(result).data + + def _callback(**kwargs: Any) -> ListOfKeyValueStores: + result = self._list(timeout=timeout, unnamed=unnamed, ownership=ownership, **kwargs) + return ListOfKeyValueStoresResponse.model_validate(result).data + + return build_iterable_list_page(_callback, limit=limit, offset=offset, desc=desc) def get_or_create( self, @@ -108,7 +119,7 @@ def __init__( **kwargs, ) - async def list( + def list( self, *, unnamed: bool | None = None, @@ -117,9 +128,12 @@ async def list( desc: bool | None = None, ownership: StorageOwnership | None = None, timeout: Timeout = 'medium', - ) -> ListOfKeyValueStores: + ) -> IterableListPageAsync[KeyValueStore]: """List the available key-value stores. + The returned page also supports iteration: `for item in client.list(...)` yields individual + key-value stores and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/key-value-stores/store-collection/get-list-of-key-value-stores Args: @@ -134,10 +148,12 @@ async def list( Returns: The list of available key-value stores matching the specified filters. """ - result = await self._list( - timeout=timeout, unnamed=unnamed, limit=limit, offset=offset, desc=desc, ownership=ownership - ) - return ListOfKeyValueStoresResponse.model_validate(result).data + + async def _callback(**kwargs: Any) -> ListOfKeyValueStores: + result = await self._list(timeout=timeout, unnamed=unnamed, ownership=ownership, **kwargs) + return ListOfKeyValueStoresResponse.model_validate(result).data + + return build_iterable_list_page_async(_callback, limit=limit, offset=offset, desc=desc) async def get_or_create( self, diff --git a/src/apify_client/_resource_clients/request_queue.py b/src/apify_client/_resource_clients/request_queue.py index 784792fd..079e1971 100644 --- a/src/apify_client/_resource_clients/request_queue.py +++ b/src/apify_client/_resource_clients/request_queue.py @@ -10,6 +10,12 @@ from more_itertools import constrained_batches from apify_client._docs import docs_group +from apify_client._iterable_list_page import ( + IterableListPage, + IterableListPageAsync, + build_cursor_iterable_list_page, + build_cursor_iterable_list_page_async, +) from apify_client._models import ( AddedRequest, AddRequestResponse, @@ -50,6 +56,11 @@ _SAFETY_BUFFER_PERCENT = 0.01 / 100 # 0.01% +def _rq_next_cursor(page: ListOfRequests) -> str | None: + """Return the opaque `next_cursor` from the page, or `None` when there are no more pages.""" + return page.next_cursor + + @docs_group('Resource clients') class RequestQueueClient(ResourceClient): """Sub-client for managing a specific request queue. @@ -496,17 +507,27 @@ def list_requests( *, limit: int | None = None, filter: list[Literal['pending', 'locked']] | None = None, # noqa: A002 - timeout: Timeout = 'medium', cursor: str | None = None, exclusive_start_id: str | None = None, - ) -> ListOfRequests: + chunk_size: int | None = None, + timeout: Timeout = 'medium', + ) -> IterableListPage[Request]: """List requests in the queue. + The returned page also supports iteration: `for request in client.list_requests(...)` yields + individual requests and transparently fetches further pages using the opaque `cursor` + returned by the API. + https://docs.apify.com/api/v2#/reference/request-queues/request-collection/list-requests Args: limit: How many requests to retrieve. filter: List of request states to use as a filter. Multiple values mean union of the given filters. + cursor: A token returned in a previous API response, to continue listing the next page of requests. + exclusive_start_id: (deprecated) All requests up to this one (including) are skipped from the result. + Only applied to the first page fetched; subsequent pages during iteration use `cursor`. + chunk_size: Maximum number of requests requested per API call when iterating. Only + relevant when iterating across pages. timeout: Timeout for the API HTTP request. cursor: A token returned in previous API response, to continue listing next page of requests exclusive_start_id: (deprecated) All requests up to this one (including) are skipped from the result. @@ -521,24 +542,34 @@ def list_requests( stacklevel=2, ) - request_params = self._build_params( - limit=limit, - filter=','.join(filter) if filter else None, - clientKey=self.client_key, - exclusiveStartId=exclusive_start_id, - cursor=cursor, - ) + def _callback(*, limit: int | None = None, cursor: str | None = None) -> ListOfRequests: + # `exclusive_start_id` is honored only on the first page (when no cursor has been + # produced by the server yet); subsequent pages rely on the opaque `cursor`. + request_params = self._build_params( + limit=limit, + filter=','.join(filter) if filter else None, + clientKey=self.client_key, + exclusiveStartId=exclusive_start_id if cursor is None else None, + cursor=cursor, + ) + response = self._http_client.call( + url=self._build_url('requests'), + method='GET', + params=request_params, + timeout=timeout, + ) + result = response_to_dict(response) + return ListOfRequestsResponse.model_validate(result).data - response = self._http_client.call( - url=self._build_url('requests'), - method='GET', - params=request_params, - timeout=timeout, + return build_cursor_iterable_list_page( + _callback, + cursor_param='cursor', + next_cursor_fn=_rq_next_cursor, + initial_cursor=cursor, + limit=limit, + chunk_size=chunk_size, ) - result = response_to_dict(response) - return ListOfRequestsResponse.model_validate(result).data - def unlock_requests(self: RequestQueueClient, *, timeout: Timeout = 'long') -> UnlockRequestsResult: """Unlock all requests in the queue, which were locked by the same clientKey or from the same Actor run. @@ -1049,22 +1080,32 @@ async def batch_delete_requests( result = response_to_dict(response) return BatchDeleteResponse.model_validate(result).data - async def list_requests( + def list_requests( self, *, limit: int | None = None, filter: list[Literal['pending', 'locked']] | None = None, # noqa: A002 - timeout: Timeout = 'medium', cursor: str | None = None, exclusive_start_id: str | None = None, - ) -> ListOfRequests: + chunk_size: int | None = None, + timeout: Timeout = 'medium', + ) -> IterableListPageAsync[Request]: """List requests in the queue. + The returned page also supports iteration: `for request in client.list_requests(...)` yields + individual requests and transparently fetches further pages using the opaque `cursor` + returned by the API. + https://docs.apify.com/api/v2#/reference/request-queues/request-collection/list-requests Args: limit: How many requests to retrieve. filter: List of request states to use as a filter. Multiple values mean union of the given filters. + cursor: A token returned in a previous API response, to continue listing the next page of requests. + exclusive_start_id: (deprecated) All requests up to this one (including) are skipped from the result. + Only applied to the first page fetched; subsequent pages during iteration use `cursor`. + chunk_size: Maximum number of requests requested per API call when iterating. Only + relevant when iterating across pages. timeout: Timeout for the API HTTP request. cursor: A token returned in previous API response, to continue listing next page of requests exclusive_start_id: (deprecated) All requests up to this one (including) are skipped from the result. @@ -1079,24 +1120,34 @@ async def list_requests( stacklevel=2, ) - request_params = self._build_params( - limit=limit, - filter=','.join(filter) if filter else None, - clientKey=self.client_key, - exclusiveStartId=exclusive_start_id, - cursor=cursor, - ) + async def _callback(*, limit: int | None = None, cursor: str | None = None) -> ListOfRequests: + # `exclusive_start_id` is honored only on the first page (when no cursor has been + # produced by the server yet); subsequent pages rely on the opaque `cursor`. + request_params = self._build_params( + limit=limit, + filter=','.join(filter) if filter else None, + clientKey=self.client_key, + exclusiveStartId=exclusive_start_id if cursor is None else None, + cursor=cursor, + ) + response = await self._http_client.call( + url=self._build_url('requests'), + method='GET', + params=request_params, + timeout=timeout, + ) + result = response_to_dict(response) + return ListOfRequestsResponse.model_validate(result).data - response = await self._http_client.call( - url=self._build_url('requests'), - method='GET', - params=request_params, - timeout=timeout, + return build_cursor_iterable_list_page_async( + _callback, + cursor_param='cursor', + next_cursor_fn=_rq_next_cursor, + initial_cursor=cursor, + limit=limit, + chunk_size=chunk_size, ) - result = response_to_dict(response) - return ListOfRequestsResponse.model_validate(result).data - async def unlock_requests( self: RequestQueueClientAsync, *, diff --git a/src/apify_client/_resource_clients/request_queue_collection.py b/src/apify_client/_resource_clients/request_queue_collection.py index c328303a..30d4117f 100644 --- a/src/apify_client/_resource_clients/request_queue_collection.py +++ b/src/apify_client/_resource_clients/request_queue_collection.py @@ -3,6 +3,12 @@ from typing import TYPE_CHECKING, Any from apify_client._docs import docs_group +from apify_client._iterable_list_page import ( + IterableListPage, + IterableListPageAsync, + build_iterable_list_page, + build_iterable_list_page_async, +) from apify_client._models import ( ListOfRequestQueues, ListOfRequestQueuesResponse, @@ -13,6 +19,7 @@ from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync if TYPE_CHECKING: + from apify_client._models import RequestQueueShort from apify_client._types import Timeout @@ -44,9 +51,12 @@ def list( desc: bool | None = None, ownership: StorageOwnership | None = None, timeout: Timeout = 'medium', - ) -> ListOfRequestQueues: + ) -> IterableListPage[RequestQueueShort]: """List the available request queues. + The returned page also supports iteration: `for item in client.list(...)` yields individual + request queues and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/request-queues/queue-collection/get-list-of-request-queues Args: @@ -61,10 +71,12 @@ def list( Returns: The list of available request queues matching the specified filters. """ - result = self._list( - timeout=timeout, unnamed=unnamed, limit=limit, offset=offset, desc=desc, ownership=ownership - ) - return ListOfRequestQueuesResponse.model_validate(result).data + + def _callback(**kwargs: Any) -> ListOfRequestQueues: + result = self._list(timeout=timeout, unnamed=unnamed, ownership=ownership, **kwargs) + return ListOfRequestQueuesResponse.model_validate(result).data + + return build_iterable_list_page(_callback, limit=limit, offset=offset, desc=desc) def get_or_create( self, @@ -106,7 +118,7 @@ def __init__( **kwargs, ) - async def list( + def list( self, *, unnamed: bool | None = None, @@ -115,9 +127,12 @@ async def list( desc: bool | None = None, ownership: StorageOwnership | None = None, timeout: Timeout = 'medium', - ) -> ListOfRequestQueues: + ) -> IterableListPageAsync[RequestQueueShort]: """List the available request queues. + The returned page also supports iteration: `for item in client.list(...)` yields individual + request queues and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/request-queues/queue-collection/get-list-of-request-queues Args: @@ -132,10 +147,12 @@ async def list( Returns: The list of available request queues matching the specified filters. """ - result = await self._list( - timeout=timeout, unnamed=unnamed, limit=limit, offset=offset, desc=desc, ownership=ownership - ) - return ListOfRequestQueuesResponse.model_validate(result).data + + async def _callback(**kwargs: Any) -> ListOfRequestQueues: + result = await self._list(timeout=timeout, unnamed=unnamed, ownership=ownership, **kwargs) + return ListOfRequestQueuesResponse.model_validate(result).data + + return build_iterable_list_page_async(_callback, limit=limit, offset=offset, desc=desc) async def get_or_create( self, diff --git a/src/apify_client/_resource_clients/run_collection.py b/src/apify_client/_resource_clients/run_collection.py index be68121e..3132dc5e 100644 --- a/src/apify_client/_resource_clients/run_collection.py +++ b/src/apify_client/_resource_clients/run_collection.py @@ -3,12 +3,19 @@ from typing import TYPE_CHECKING, Any from apify_client._docs import docs_group +from apify_client._iterable_list_page import ( + IterableListPage, + IterableListPageAsync, + build_iterable_list_page, + build_iterable_list_page_async, +) from apify_client._models import ListOfRuns, ListOfRunsResponse from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync if TYPE_CHECKING: from datetime import datetime + from apify_client._models import RunShort from apify_client._types import ActorJobStatus, Timeout @@ -41,12 +48,15 @@ def list( started_before: str | datetime | None = None, started_after: str | datetime | None = None, timeout: Timeout = 'medium', - ) -> ListOfRuns: + ) -> IterableListPage[RunShort]: """List all Actor runs. List all Actor runs, either of a single Actor, or all user's Actors, depending on where this client was initialized from. + The returned page also supports iteration: `for item in client.list(...)` yields individual runs + and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/actors/run-collection/get-list-of-runs https://docs.apify.com/api/v2#/reference/actor-runs/run-collection/get-user-runs-list @@ -64,16 +74,17 @@ def list( """ status_param = list(status) if isinstance(status, list) else status - result = self._list( - timeout=timeout, - limit=limit, - offset=offset, - desc=desc, - status=status_param, - startedBefore=started_before, - startedAfter=started_after, - ) - return ListOfRunsResponse.model_validate(result).data + def _callback(**kwargs: Any) -> ListOfRuns: + result = self._list( + timeout=timeout, + status=status_param, + startedBefore=started_before, + startedAfter=started_after, + **kwargs, + ) + return ListOfRunsResponse.model_validate(result).data + + return build_iterable_list_page(_callback, limit=limit, offset=offset, desc=desc) @docs_group('Resource clients') @@ -95,7 +106,7 @@ def __init__( **kwargs, ) - async def list( + def list( self, *, limit: int | None = None, @@ -105,12 +116,15 @@ async def list( started_before: str | datetime | None = None, started_after: str | datetime | None = None, timeout: Timeout = 'medium', - ) -> ListOfRuns: + ) -> IterableListPageAsync[RunShort]: """List all Actor runs. List all Actor runs, either of a single Actor, or all user's Actors, depending on where this client was initialized from. + The returned page also supports iteration: `for item in client.list(...)` yields individual runs + and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/actors/run-collection/get-list-of-runs https://docs.apify.com/api/v2#/reference/actor-runs/run-collection/get-user-runs-list @@ -128,13 +142,14 @@ async def list( """ status_param = list(status) if isinstance(status, list) else status - result = await self._list( - timeout=timeout, - limit=limit, - offset=offset, - desc=desc, - status=status_param, - startedBefore=started_before, - startedAfter=started_after, - ) - return ListOfRunsResponse.model_validate(result).data + async def _callback(**kwargs: Any) -> ListOfRuns: + result = await self._list( + timeout=timeout, + status=status_param, + startedBefore=started_before, + startedAfter=started_after, + **kwargs, + ) + return ListOfRunsResponse.model_validate(result).data + + return build_iterable_list_page_async(_callback, limit=limit, offset=offset, desc=desc) diff --git a/src/apify_client/_resource_clients/schedule_collection.py b/src/apify_client/_resource_clients/schedule_collection.py index 94725724..1d78a254 100644 --- a/src/apify_client/_resource_clients/schedule_collection.py +++ b/src/apify_client/_resource_clients/schedule_collection.py @@ -3,6 +3,12 @@ from typing import TYPE_CHECKING, Any from apify_client._docs import docs_group +from apify_client._iterable_list_page import ( + IterableListPage, + IterableListPageAsync, + build_iterable_list_page, + build_iterable_list_page_async, +) from apify_client._models import ( ListOfSchedules, ListOfSchedulesResponse, @@ -13,6 +19,7 @@ from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync if TYPE_CHECKING: + from apify_client._models import ScheduleShort from apify_client._types import Timeout @@ -42,9 +49,12 @@ def list( offset: int | None = None, desc: bool | None = None, timeout: Timeout = 'medium', - ) -> ListOfSchedules: + ) -> IterableListPage[ScheduleShort]: """List the available schedules. + The returned page also supports iteration: `for item in client.list(...)` yields individual + schedules and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/schedules/schedules-collection/get-list-of-schedules Args: @@ -56,8 +66,12 @@ def list( Returns: The list of available schedules matching the specified filters. """ - result = self._list(timeout=timeout, limit=limit, offset=offset, desc=desc) - return ListOfSchedulesResponse.model_validate(result).data + + def _callback(**kwargs: Any) -> ListOfSchedules: + result = self._list(timeout=timeout, **kwargs) + return ListOfSchedulesResponse.model_validate(result).data + + return build_iterable_list_page(_callback, limit=limit, offset=offset, desc=desc) def create( self, @@ -128,16 +142,19 @@ def __init__( **kwargs, ) - async def list( + def list( self, *, limit: int | None = None, offset: int | None = None, desc: bool | None = None, timeout: Timeout = 'medium', - ) -> ListOfSchedules: + ) -> IterableListPageAsync[ScheduleShort]: """List the available schedules. + The returned page also supports iteration: `for item in client.list(...)` yields individual + schedules and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/schedules/schedules-collection/get-list-of-schedules Args: @@ -149,8 +166,12 @@ async def list( Returns: The list of available schedules matching the specified filters. """ - result = await self._list(timeout=timeout, limit=limit, offset=offset, desc=desc) - return ListOfSchedulesResponse.model_validate(result).data + + async def _callback(**kwargs: Any) -> ListOfSchedules: + result = await self._list(timeout=timeout, **kwargs) + return ListOfSchedulesResponse.model_validate(result).data + + return build_iterable_list_page_async(_callback, limit=limit, offset=offset, desc=desc) async def create( self, diff --git a/src/apify_client/_resource_clients/store_collection.py b/src/apify_client/_resource_clients/store_collection.py index ca6b0921..159d19c8 100644 --- a/src/apify_client/_resource_clients/store_collection.py +++ b/src/apify_client/_resource_clients/store_collection.py @@ -3,10 +3,17 @@ from typing import TYPE_CHECKING, Any from apify_client._docs import docs_group +from apify_client._iterable_list_page import ( + IterableListPage, + IterableListPageAsync, + build_iterable_list_page, + build_iterable_list_page_async, +) from apify_client._models import ListOfActorsInStoreResponse, ListOfStoreActors from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync if TYPE_CHECKING: + from apify_client._models import StoreListActor from apify_client._types import Timeout @@ -40,9 +47,12 @@ def list( username: str | None = None, pricing_model: str | None = None, timeout: Timeout = 'medium', - ) -> ListOfStoreActors: + ) -> IterableListPage[StoreListActor]: """List Actors in Apify store. + The returned page also supports iteration: `for item in client.list(...)` yields individual Actors + from the store and transparently fetches further pages from the API. + https://docs.apify.com/api/v2/#/reference/store/store-actors-collection/get-list-of-actors-in-store Args: @@ -59,17 +69,20 @@ def list( Returns: The list of available Actors matching the specified filters. """ - result = self._list( - timeout=timeout, - limit=limit, - offset=offset, - search=search, - sortBy=sort_by, - category=category, - username=username, - pricingModel=pricing_model, - ) - return ListOfActorsInStoreResponse.model_validate(result).data + + def _callback(**kwargs: Any) -> ListOfStoreActors: + result = self._list( + timeout=timeout, + search=search, + sortBy=sort_by, + category=category, + username=username, + pricingModel=pricing_model, + **kwargs, + ) + return ListOfActorsInStoreResponse.model_validate(result).data + + return build_iterable_list_page(_callback, limit=limit, offset=offset) @docs_group('Resource clients') @@ -91,7 +104,7 @@ def __init__( **kwargs, ) - async def list( + def list( self, *, limit: int | None = None, @@ -102,9 +115,12 @@ async def list( username: str | None = None, pricing_model: str | None = None, timeout: Timeout = 'medium', - ) -> ListOfStoreActors: + ) -> IterableListPageAsync[StoreListActor]: """List Actors in Apify store. + The returned page also supports iteration: `for item in client.list(...)` yields individual Actors + from the store and transparently fetches further pages from the API. + https://docs.apify.com/api/v2/#/reference/store/store-actors-collection/get-list-of-actors-in-store Args: @@ -121,14 +137,17 @@ async def list( Returns: The list of available Actors matching the specified filters. """ - result = await self._list( - timeout=timeout, - limit=limit, - offset=offset, - search=search, - sortBy=sort_by, - category=category, - username=username, - pricingModel=pricing_model, - ) - return ListOfActorsInStoreResponse.model_validate(result).data + + async def _callback(**kwargs: Any) -> ListOfStoreActors: + result = await self._list( + timeout=timeout, + search=search, + sortBy=sort_by, + category=category, + username=username, + pricingModel=pricing_model, + **kwargs, + ) + return ListOfActorsInStoreResponse.model_validate(result).data + + return build_iterable_list_page_async(_callback, limit=limit, offset=offset) diff --git a/src/apify_client/_resource_clients/task_collection.py b/src/apify_client/_resource_clients/task_collection.py index cca56bf1..972e70d5 100644 --- a/src/apify_client/_resource_clients/task_collection.py +++ b/src/apify_client/_resource_clients/task_collection.py @@ -3,6 +3,12 @@ from typing import TYPE_CHECKING, Any from apify_client._docs import docs_group +from apify_client._iterable_list_page import ( + IterableListPage, + IterableListPageAsync, + build_iterable_list_page, + build_iterable_list_page_async, +) from apify_client._models import ( ActorStandby, CreateTaskRequest, @@ -19,6 +25,7 @@ if TYPE_CHECKING: from datetime import timedelta + from apify_client._models import TaskShort from apify_client._types import Timeout @@ -48,9 +55,12 @@ def list( offset: int | None = None, desc: bool | None = None, timeout: Timeout = 'medium', - ) -> ListOfTasks: + ) -> IterableListPage[TaskShort]: """List the available tasks. + The returned page also supports iteration: `for item in client.list(...)` yields individual tasks + and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/actor-tasks/task-collection/get-list-of-tasks Args: @@ -62,8 +72,12 @@ def list( Returns: The list of available tasks matching the specified filters. """ - result = self._list(timeout=timeout, limit=limit, offset=offset, desc=desc) - return ListOfTasksResponse.model_validate(result).data + + def _callback(**kwargs: Any) -> ListOfTasks: + result = self._list(timeout=timeout, **kwargs) + return ListOfTasksResponse.model_validate(result).data + + return build_iterable_list_page(_callback, limit=limit, offset=offset, desc=desc) def create( self, @@ -162,16 +176,19 @@ def __init__( **kwargs, ) - async def list( + def list( self, *, limit: int | None = None, offset: int | None = None, desc: bool | None = None, timeout: Timeout = 'medium', - ) -> ListOfTasks: + ) -> IterableListPageAsync[TaskShort]: """List the available tasks. + The returned page also supports iteration: `for item in client.list(...)` yields individual tasks + and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/actor-tasks/task-collection/get-list-of-tasks Args: @@ -183,8 +200,12 @@ async def list( Returns: The list of available tasks matching the specified filters. """ - result = await self._list(timeout=timeout, limit=limit, offset=offset, desc=desc) - return ListOfTasksResponse.model_validate(result).data + + async def _callback(**kwargs: Any) -> ListOfTasks: + result = await self._list(timeout=timeout, **kwargs) + return ListOfTasksResponse.model_validate(result).data + + return build_iterable_list_page_async(_callback, limit=limit, offset=offset, desc=desc) async def create( self, diff --git a/src/apify_client/_resource_clients/webhook_collection.py b/src/apify_client/_resource_clients/webhook_collection.py index d1c579f9..7e581f15 100644 --- a/src/apify_client/_resource_clients/webhook_collection.py +++ b/src/apify_client/_resource_clients/webhook_collection.py @@ -3,6 +3,12 @@ from typing import TYPE_CHECKING, Any from apify_client._docs import docs_group +from apify_client._iterable_list_page import ( + IterableListPage, + IterableListPageAsync, + build_iterable_list_page, + build_iterable_list_page_async, +) from apify_client._models import ( ListOfWebhooks, ListOfWebhooksResponse, @@ -13,7 +19,7 @@ from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync if TYPE_CHECKING: - from apify_client._models import Webhook, WebhookEventType + from apify_client._models import Webhook, WebhookEventType, WebhookShort from apify_client._types import Timeout @@ -43,9 +49,12 @@ def list( offset: int | None = None, desc: bool | None = None, timeout: Timeout = 'medium', - ) -> ListOfWebhooks: + ) -> IterableListPage[WebhookShort]: """List the available webhooks. + The returned page also supports iteration: `for item in client.list(...)` yields individual webhooks + and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/webhooks/webhook-collection/get-list-of-webhooks Args: @@ -57,8 +66,12 @@ def list( Returns: The list of available webhooks matching the specified filters. """ - result = self._list(timeout=timeout, limit=limit, offset=offset, desc=desc) - return ListOfWebhooksResponse.model_validate(result).data + + def _callback(**kwargs: Any) -> ListOfWebhooks: + result = self._list(timeout=timeout, **kwargs) + return ListOfWebhooksResponse.model_validate(result).data + + return build_iterable_list_page(_callback, limit=limit, offset=offset, desc=desc) def create( self, @@ -139,16 +152,19 @@ def __init__( **kwargs, ) - async def list( + def list( self, *, limit: int | None = None, offset: int | None = None, desc: bool | None = None, timeout: Timeout = 'medium', - ) -> ListOfWebhooks: + ) -> IterableListPageAsync[WebhookShort]: """List the available webhooks. + The returned page also supports iteration: `for item in client.list(...)` yields individual webhooks + and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/webhooks/webhook-collection/get-list-of-webhooks Args: @@ -160,8 +176,12 @@ async def list( Returns: The list of available webhooks matching the specified filters. """ - result = await self._list(timeout=timeout, limit=limit, offset=offset, desc=desc) - return ListOfWebhooksResponse.model_validate(result).data + + async def _callback(**kwargs: Any) -> ListOfWebhooks: + result = await self._list(timeout=timeout, **kwargs) + return ListOfWebhooksResponse.model_validate(result).data + + return build_iterable_list_page_async(_callback, limit=limit, offset=offset, desc=desc) async def create( self, diff --git a/src/apify_client/_resource_clients/webhook_dispatch_collection.py b/src/apify_client/_resource_clients/webhook_dispatch_collection.py index 17d17a0d..4348d489 100644 --- a/src/apify_client/_resource_clients/webhook_dispatch_collection.py +++ b/src/apify_client/_resource_clients/webhook_dispatch_collection.py @@ -3,13 +3,23 @@ from typing import TYPE_CHECKING, Any from apify_client._docs import docs_group +from apify_client._iterable_list_page import ( + IterableListPage, + IterableListPageAsync, + build_iterable_list_page, + build_iterable_list_page_async, +) from apify_client._models import ListOfWebhookDispatches, WebhookDispatchList from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync if TYPE_CHECKING: + from apify_client._models import WebhookDispatch from apify_client._types import Timeout +_EMPTY_WEBHOOK_DISPATCHES = ListOfWebhookDispatches(total=0, offset=0, limit=1, desc=False, count=0, items=[]) + + @docs_group('Resource clients') class WebhookDispatchCollectionClient(ResourceClient): """Sub-client for the webhook dispatch collection. @@ -36,9 +46,12 @@ def list( offset: int | None = None, desc: bool | None = None, timeout: Timeout = 'medium', - ) -> ListOfWebhookDispatches | None: + ) -> IterableListPage[WebhookDispatch]: """List all webhook dispatches of a user. + The returned page also supports iteration: `for item in client.list(...)` yields individual + webhook dispatches and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/webhook-dispatches/webhook-dispatches-collection/get-list-of-webhook-dispatches Args: @@ -50,8 +63,12 @@ def list( Returns: The retrieved webhook dispatches of a user. """ - result = self._list(timeout=timeout, limit=limit, offset=offset, desc=desc) - return WebhookDispatchList.model_validate(result).data + + def _callback(**kwargs: Any) -> ListOfWebhookDispatches: + result = self._list(timeout=timeout, **kwargs) + return WebhookDispatchList.model_validate(result).data or _EMPTY_WEBHOOK_DISPATCHES + + return build_iterable_list_page(_callback, limit=limit, offset=offset, desc=desc) @docs_group('Resource clients') @@ -73,16 +90,19 @@ def __init__( **kwargs, ) - async def list( + def list( self, *, limit: int | None = None, offset: int | None = None, desc: bool | None = None, timeout: Timeout = 'medium', - ) -> ListOfWebhookDispatches | None: + ) -> IterableListPageAsync[WebhookDispatch]: """List all webhook dispatches of a user. + The returned page also supports iteration: `for item in client.list(...)` yields individual + webhook dispatches and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/webhook-dispatches/webhook-dispatches-collection/get-list-of-webhook-dispatches Args: @@ -94,5 +114,9 @@ async def list( Returns: The retrieved webhook dispatches of a user. """ - result = await self._list(timeout=timeout, limit=limit, offset=offset, desc=desc) - return WebhookDispatchList.model_validate(result).data + + async def _callback(**kwargs: Any) -> ListOfWebhookDispatches: + result = await self._list(timeout=timeout, **kwargs) + return WebhookDispatchList.model_validate(result).data or _EMPTY_WEBHOOK_DISPATCHES + + return build_iterable_list_page_async(_callback, limit=limit, offset=offset, desc=desc) diff --git a/tests/unit/test_client_pagination.py b/tests/unit/test_client_pagination.py new file mode 100644 index 00000000..ee8585bd --- /dev/null +++ b/tests/unit/test_client_pagination.py @@ -0,0 +1,643 @@ +from __future__ import annotations + +import dataclasses +from typing import TYPE_CHECKING, Any, Literal, TypeAlias +from unittest import mock +from unittest.mock import Mock + +import pytest + +from apify_client import ApifyClient, ApifyClientAsync +from apify_client import _models as _models_module +from apify_client._resource_clients import ( + ActorCollectionClient, + ActorCollectionClientAsync, + ActorEnvVarCollectionClient, + ActorEnvVarCollectionClientAsync, + ActorVersionCollectionClient, + ActorVersionCollectionClientAsync, + BuildCollectionClient, + BuildCollectionClientAsync, + DatasetClient, + DatasetClientAsync, + DatasetCollectionClient, + DatasetCollectionClientAsync, + KeyValueStoreClient, + KeyValueStoreClientAsync, + KeyValueStoreCollectionClient, + KeyValueStoreCollectionClientAsync, + RequestQueueClient, + RequestQueueClientAsync, + RequestQueueCollectionClient, + RequestQueueCollectionClientAsync, + RunCollectionClient, + RunCollectionClientAsync, + ScheduleCollectionClient, + ScheduleCollectionClientAsync, + StoreCollectionClient, + StoreCollectionClientAsync, + TaskCollectionClient, + TaskCollectionClientAsync, + WebhookCollectionClient, + WebhookCollectionClientAsync, + WebhookDispatchCollectionClient, + WebhookDispatchCollectionClientAsync, +) + +if TYPE_CHECKING: + from _pytest.mark import ParameterSet + + from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync + + +CollectionClient: TypeAlias = ( + ActorCollectionClient + | BuildCollectionClient + | RunCollectionClient + | ScheduleCollectionClient + | TaskCollectionClient + | WebhookCollectionClient + | WebhookDispatchCollectionClient + | DatasetCollectionClient + | KeyValueStoreCollectionClient + | RequestQueueCollectionClient + | StoreCollectionClient + | ActorEnvVarCollectionClient + | ActorVersionCollectionClient +) + +CollectionClientAsync: TypeAlias = ( + ActorCollectionClientAsync + | BuildCollectionClientAsync + | RunCollectionClientAsync + | ScheduleCollectionClientAsync + | TaskCollectionClientAsync + | WebhookCollectionClientAsync + | WebhookDispatchCollectionClientAsync + | DatasetCollectionClientAsync + | KeyValueStoreCollectionClientAsync + | RequestQueueCollectionClientAsync + | StoreCollectionClientAsync + | ActorEnvVarCollectionClientAsync + | ActorVersionCollectionClientAsync +) + +ID_PLACEHOLDER = 'some-id' + +# Response wrappers whose `model_validate` should be bypassed during pagination tests so +# synthetic `{'id': N, 'key': N}` test items are accepted without matching the real API schemas. +_BYPASSED_RESPONSE_CLASSES = ( + 'ListOfActorsResponse', + 'ListOfBuildsResponse', + 'ListOfRunsResponse', + 'ListOfSchedulesResponse', + 'ListOfTasksResponse', + 'ListOfWebhooksResponse', + 'WebhookDispatchList', + 'ListOfDatasetsResponse', + 'ListOfKeyValueStoresResponse', + 'ListOfRequestQueuesResponse', + 'ListOfActorsInStoreResponse', + 'ListOfEnvVarsResponse', + 'ListOfVersionsResponse', + 'ListOfKeysResponse', + 'ListOfRequestsResponse', +) + + +class _AttrDict(dict): + """A dict that also supports attribute access — enough for next_cursor_fn to call `item.id`.""" + + def __getattr__(self, name: str) -> Any: + try: + return self[name] + except KeyError as exc: + raise AttributeError(name) from exc + + +class _FakeListModel: + """Stand-in for a paginated list model that mimics the fields the iteration logic accesses.""" + + def __init__(self, **kwargs: Any) -> None: + # Sensible defaults for the pagination fields `IterableListPage` reads. + self.total = 0 + self.count = 0 + self.offset = 0 + self.limit = 1 + self.desc = False + self.items: list[Any] = [] + self.is_truncated = False + self.next_exclusive_start_key: str | None = None + for key, value in kwargs.items(): + setattr(self, key, value) + if 'count' not in kwargs: + self.count = len(self.items) + + +@dataclasses.dataclass +class _FakeResponseWrapper: + """Stand-in for a `*Response` Pydantic model that wraps a paginated list under `.data`.""" + + data: _FakeListModel + + +@pytest.fixture(autouse=True) +def _bypass_response_validation() -> Any: + """Replace the Pydantic `model_validate` of response wrappers with a lightweight builder. + + Pagination tests use synthetic items that don't satisfy the real API schemas. Bypassing + validation lets the iteration logic run while still building a model-like object that exposes + the fields the client code consumes (`.data`, `.items`, `.total`, etc.). + """ + + def _build(_cls: type, obj: dict) -> _FakeResponseWrapper: + data_dict = obj.get('data') or {} + raw_items = data_dict.get('items', []) + # Wrap dict items so cursor-based pagination can read `item.id` from the last item. + items = [_AttrDict(item) if isinstance(item, dict) else item for item in raw_items] + fields = {**data_dict, 'items': items} + return _FakeResponseWrapper(data=_FakeListModel(**fields)) + + patchers = [] + for class_name in _BYPASSED_RESPONSE_CLASSES: + cls = getattr(_models_module, class_name) + patchers.append(mock.patch.object(cls, 'model_validate', classmethod(_build))) + + for p in patchers: + p.start() + try: + yield + finally: + for p in patchers: + p.stop() + + +def create_items(start: int, end: int) -> list[dict[str, int]]: + """Create a list of test items for the given index range.""" + step = -1 if end < start else 1 + return [{'id': i, 'key': i} for i in range(start, end, step)] + + +def _mocked_api_pagination_logic(*, url: str, params: dict[str, Any] | None = None, **_: Any) -> Mock: + """Simulate a paginated Apify API response. + + The mocked platform holds 2500 items normally and an additional 100 when ``unnamed=True`` is + requested. Items are simple objects with an incrementing ``id`` and ``key`` that make it easy + to verify iteration order. + + Pages are capped at 1000 items regardless of the requested limit, mirroring the real API. + """ + params = params or {} + + normal_items = 2500 + extra_items = 100 # for example unnamed resources + max_items_per_page = 1000 + + total_items = (normal_items + extra_items) if params.get('unnamed') else normal_items + + offset_raw = params.get('offset') + offset = int(offset_raw) if offset_raw not in (None, '') else 0 + limit_raw = params.get('limit') + limit = int(limit_raw) if limit_raw not in (None, '') else 0 + assert offset >= 0, 'Invalid offset sent to API' + assert limit >= 0, 'Invalid limit sent to API' + + desc = params.get('desc') in (True, 'true', 'True', 1, '1') + items = create_items(total_items, 0) if desc else create_items(0, total_items) + + lower_index = min(offset, total_items) + upper_index = min(offset + (limit or total_items), total_items) + count = min(max(upper_index - lower_index, 0), max_items_per_page) + + selected_items = items[lower_index : min(upper_index, lower_index + max_items_per_page)] + + response = Mock() + + # The dataset items endpoint returns items as a raw list + if url.endswith(f'/datasets/{ID_PLACEHOLDER}/items'): + response.content = b'' + response.json = lambda: selected_items + else: + response.content = b'' + response.json = lambda: { + 'data': { + 'total': total_items, + 'count': count, + 'offset': offset, + 'limit': limit or (count or 1), + 'desc': desc, + 'items': selected_items, + } + } + + response.headers = { + 'x-apify-pagination-total': str(total_items), + 'x-apify-pagination-offset': str(offset), + 'x-apify-pagination-limit': str(limit or count or 1), + 'x-apify-pagination-desc': str(desc).lower(), + } + return response + + +@dataclasses.dataclass +class _PaginationCase: + """A single parametrized pagination test case.""" + + id: str + inputs: dict + expected_items: list[dict[str, int]] + supported_clients: set[str] + + def __hash__(self) -> int: + return hash(self.id) + + def supports(self, client: ResourceClient | ResourceClientAsync) -> bool: + """Check whether the given client implements functionality tested by this test case.""" + return client.__class__.__name__.removesuffix('Async') in self.supported_clients + + +COLLECTION_CLIENTS = { + 'ActorCollectionClient', + 'BuildCollectionClient', + 'RunCollectionClient', + 'ScheduleCollectionClient', + 'TaskCollectionClient', + 'WebhookCollectionClient', + 'WebhookDispatchCollectionClient', + 'DatasetCollectionClient', + 'KeyValueStoreCollectionClient', + 'RequestQueueCollectionClient', + 'StoreCollectionClient', +} + +NO_OPTIONS_CLIENTS = { + 'ActorEnvVarCollectionClient', + 'ActorVersionCollectionClient', +} + +DATASET_CLIENTS = {'DatasetClient'} +RQ_CLIENTS = {'RequestQueueClient'} +KVS_CLIENTS = {'KeyValueStoreClient'} +STORAGE_CLIENTS = DATASET_CLIENTS | RQ_CLIENTS | KVS_CLIENTS +ALL_CLIENTS = COLLECTION_CLIENTS | NO_OPTIONS_CLIENTS | STORAGE_CLIENTS + +TEST_CASES = ( + _PaginationCase('No options', {}, create_items(0, 2500), ALL_CLIENTS), + _PaginationCase('Limit', {'limit': 1100}, create_items(0, 1100), ALL_CLIENTS - NO_OPTIONS_CLIENTS), + _PaginationCase('Out of range limit', {'limit': 3000}, create_items(0, 2500), ALL_CLIENTS - NO_OPTIONS_CLIENTS), + _PaginationCase( + 'Offset', + {'offset': 1000}, + create_items(1000, 2500), + ALL_CLIENTS - NO_OPTIONS_CLIENTS - KVS_CLIENTS - RQ_CLIENTS, + ), + _PaginationCase( + 'Offset and limit', + {'offset': 1000, 'limit': 1100}, + create_items(1000, 2100), + ALL_CLIENTS - NO_OPTIONS_CLIENTS - KVS_CLIENTS - RQ_CLIENTS, + ), + _PaginationCase( + 'Out of range offset', {'offset': 3000}, [], ALL_CLIENTS - NO_OPTIONS_CLIENTS - KVS_CLIENTS - RQ_CLIENTS + ), + _PaginationCase( + 'Offset, limit, descending', + {'offset': 1000, 'limit': 1100, 'desc': True}, + create_items(1500, 400), + ALL_CLIENTS - NO_OPTIONS_CLIENTS - {'StoreCollectionClient'} - KVS_CLIENTS - RQ_CLIENTS, + ), + _PaginationCase( + 'Offset, limit, descending, unnamed', + {'offset': 50, 'limit': 1100, 'desc': True, 'unnamed': True}, + create_items(2550, 1450), + {'DatasetCollectionClient', 'KeyValueStoreCollectionClient', 'RequestQueueCollectionClient'}, + ), + _PaginationCase( + 'chunk_size', + {'chunk_size': 100, 'limit': 250}, + create_items(0, 250), + STORAGE_CLIENTS, + ), + _PaginationCase( + 'Offset, limit, descending, chunk_size', + {'offset': 50, 'limit': 1100, 'desc': True, 'chunk_size': 100}, + create_items(2450, 1350), + DATASET_CLIENTS, + ), + _PaginationCase( + 'Exclusive start key', + {'exclusive_start_key': '1000'}, + create_items(1001, 2500), + KVS_CLIENTS, + ), + _PaginationCase( + 'Exclusive start key and limit', + {'exclusive_start_key': '1000', 'limit': 500}, + create_items(1001, 1501), + KVS_CLIENTS, + ), + _PaginationCase( + 'Cursor', + {'cursor': '1000'}, + create_items(1001, 2500), + RQ_CLIENTS, + ), + _PaginationCase( + 'Cursor and limit', + {'cursor': '1000', 'limit': 500}, + create_items(1001, 1501), + RQ_CLIENTS, + ), +) + + +def _generate_test_params( + client_set: Literal['collection', 'dataset', 'kvs', 'rq'], *, async_clients: bool +) -> list[ParameterSet]: + """Build the pytest parameter set for the given client category.""" + client = ApifyClientAsync(token='') if async_clients else ApifyClient(token='') + + # Tuple rather than set because pytest-xdist requires a stable iteration order. + # https://pytest-xdist.readthedocs.io/en/stable/known-limitations.html#order-and-amount-of-test-must-be-consistent + clients: tuple[ResourceClient | ResourceClientAsync, ...] + + match client_set: + case 'collection': + clients = ( + client.actors(), + client.schedules(), + client.tasks(), + client.webhooks(), + client.webhook_dispatches(), + client.store(), + client.datasets(), + client.key_value_stores(), + client.request_queues(), + client.actor(ID_PLACEHOLDER).builds(), + client.actor(ID_PLACEHOLDER).runs(), + client.actor(ID_PLACEHOLDER).versions(), + client.actor(ID_PLACEHOLDER).version('some-version').env_vars(), + ) + case 'dataset': + clients = (client.dataset(ID_PLACEHOLDER),) + case 'kvs': + clients = (client.key_value_store(ID_PLACEHOLDER),) + case 'rq': + clients = (client.request_queue(ID_PLACEHOLDER),) + case _: + raise ValueError(f'Unknown client set: {client_set}') + + return [ + pytest.param( + test_case.inputs, test_case.expected_items, sub_client, id=f'{sub_client.__class__.__name__}:{test_case.id}' + ) + for test_case in TEST_CASES + for sub_client in clients + if test_case.supports(sub_client) + ] + + +@pytest.mark.parametrize( + ('inputs', 'expected_items', 'client'), + _generate_test_params(client_set='collection', async_clients=False), +) +def test_client_list_iterable( + client: CollectionClient, + inputs: dict, + expected_items: list[dict[str, int]], +) -> None: + """Every sync collection client's `list()` return value should iterate across pages.""" + with mock.patch.object(client._http_client, 'call', side_effect=_mocked_api_pagination_logic): + returned_items = list(client.list(**inputs)) + + if inputs == {}: + list_response = client.list(**inputs) + assert len(returned_items) == list_response.total + + assert returned_items == expected_items + + +@pytest.mark.parametrize( + ('inputs', 'expected_items', 'client'), + _generate_test_params(client_set='collection', async_clients=True), +) +async def test_client_list_iterable_async( + client: CollectionClientAsync, + inputs: dict, + expected_items: list[dict[str, int]], +) -> None: + """Every async collection client's `list()` return value should iterate across pages.""" + + async def async_side_effect(**kwargs: Any) -> Mock: + return _mocked_api_pagination_logic(**kwargs) + + with mock.patch.object(client._http_client, 'call', side_effect=async_side_effect): + returned_items = [item async for item in client.list(**inputs)] + + if inputs == {}: + list_response = await client.list(**inputs) + assert len(returned_items) == list_response.total + + assert returned_items == expected_items + + +@pytest.mark.parametrize( + ('inputs', 'expected_items', 'client'), + _generate_test_params(client_set='dataset', async_clients=False), +) +def test_dataset_items_list_iterable( + client: DatasetClient, + inputs: dict, + expected_items: list[dict[str, int]], +) -> None: + """The sync dataset client's `list_items()` return value should iterate across pages.""" + with mock.patch.object(client._http_client, 'call', side_effect=_mocked_api_pagination_logic): + returned_items = list(client.list_items(**inputs)) + + if inputs == {}: + list_response = client.list_items(**inputs) + assert len(returned_items) == list_response.total + + assert returned_items == expected_items + + # Until the deprecated `iterate_items` method is removed, it should behave the same + inputs_without_chunk_size = {k: v for k, v in inputs.items() if k != 'chunk_size'} + assert returned_items == list(client.iterate_items(**inputs_without_chunk_size)) + + +@pytest.mark.parametrize( + ('inputs', 'expected_items', 'client'), + _generate_test_params(client_set='dataset', async_clients=True), +) +async def test_dataset_items_list_iterable_async( + client: DatasetClientAsync, + inputs: dict, + expected_items: list[dict[str, int]], +) -> None: + """The async dataset client's `list_items()` return value should iterate across pages.""" + + async def async_side_effect(**kwargs: Any) -> Mock: + return _mocked_api_pagination_logic(**kwargs) + + with mock.patch.object(client._http_client, 'call', side_effect=async_side_effect): + returned_items = [item async for item in client.list_items(**inputs)] + + if inputs == {}: + list_response = await client.list_items(**inputs) + assert len(returned_items) == list_response.total + + assert returned_items == expected_items + + # Until the deprecated `iterate_items` method is removed, it should behave the same + inputs_without_chunk_size = {k: v for k, v in inputs.items() if k != 'chunk_size'} + assert returned_items == [item async for item in client.iterate_items(**inputs_without_chunk_size)] + + +def _mocked_api_cursor_pagination_logic(*, url: str, params: dict[str, Any] | None = None, **_: Any) -> Mock: + """Simulate the KVS keys and RQ requests endpoints, which paginate with a cursor. + + Holds 2500 synthetic items with incrementing integer `id` equal to their position. Each page is + capped at 1000 items. The mock honors ``exclusive_start_key`` for KVS and ``exclusive_start_id`` + for RQ — both are treated as the integer id of the previous page's last item; the next page + starts at that id + 1. + """ + params = params or {} + + total_items = 2500 + max_items_per_page = 1000 + + limit_raw = params.get('limit') + limit = int(limit_raw) if limit_raw not in (None, '') else 0 + assert limit >= 0, 'Invalid limit sent to API' + + # KVS uses `exclusiveStartKey`; RQ accepts either the deprecated `exclusiveStartId` (initial + # call only) or the new opaque `cursor` (subsequent calls use this). Both cursor values encode + # the last-seen item id as a string. + cursor_raw = params.get('exclusiveStartKey') or params.get('exclusiveStartId') or params.get('cursor') + + start = int(cursor_raw) + 1 if cursor_raw not in (None, '') else 0 + end = total_items + if limit: + end = min(start + limit, total_items) + page_end = min(end, start + max_items_per_page) + selected_items = [{'id': i, 'key': i} for i in range(start, page_end)] + + response = Mock() + if url.endswith('/keys'): + next_exclusive_start_key = str(selected_items[-1]['id']) if selected_items else None + is_truncated = page_end < total_items and bool(selected_items) + response.json = lambda: { + 'data': { + 'items': selected_items, + 'count': len(selected_items), + 'limit': limit or (len(selected_items) or 1), + 'is_truncated': is_truncated, + 'next_exclusive_start_key': next_exclusive_start_key if is_truncated else None, + } + } + elif url.endswith('/requests'): + has_more = page_end < total_items and bool(selected_items) + next_cursor = str(selected_items[-1]['id']) if has_more else None + response.json = lambda: { + 'data': { + 'items': selected_items, + 'count': len(selected_items), + 'limit': limit or (len(selected_items) or 1), + 'next_cursor': next_cursor, + } + } + else: + raise ValueError(f'Unexpected URL in pagination test: {url}') + + response.content = b'' + return response + + +@pytest.mark.parametrize( + ('inputs', 'expected_items', 'client'), + _generate_test_params(client_set='kvs', async_clients=False), +) +def test_kvs_list_keys_iterable( + client: KeyValueStoreClient, + inputs: dict, + expected_items: list[dict[str, int]], +) -> None: + """The sync KVS client's `list_keys()` return value should iterate across cursor-paginated pages.""" + with mock.patch.object(client._http_client, 'call', side_effect=_mocked_api_cursor_pagination_logic): + returned_items = [dict(item) for item in client.list_keys(**inputs)] + + assert returned_items == expected_items + + # Until the deprecated `iterate_keys` method is removed, it should behave the same + assert returned_items == [dict(item) for item in client.iterate_keys(**inputs)] + + +@pytest.mark.parametrize( + ('inputs', 'expected_items', 'client'), + _generate_test_params(client_set='kvs', async_clients=True), +) +async def test_kvs_list_keys_iterable_async( + client: KeyValueStoreClientAsync, + inputs: dict, + expected_items: list[dict[str, int]], +) -> None: + """The async KVS client's `list_keys()` return value should iterate across cursor-paginated pages.""" + + async def async_side_effect(**kwargs: Any) -> Mock: + return _mocked_api_cursor_pagination_logic(**kwargs) + + with mock.patch.object(client._http_client, 'call', side_effect=async_side_effect): + returned_items = [dict(item) async for item in client.list_keys(**inputs)] + + assert returned_items == expected_items + + # Until the deprecated `iterate_keys` method is removed, it should behave the same + assert returned_items == [dict(item) async for item in client.iterate_keys(**inputs)] + + +@pytest.mark.parametrize( + ('inputs', 'expected_items', 'client'), + _generate_test_params(client_set='rq', async_clients=False), +) +def test_rq_list_requests_iterable( + client: RequestQueueClient, + inputs: dict, + expected_items: list[dict[str, int]], +) -> None: + """The sync RQ client's `list_requests()` return value should iterate across cursor-paginated pages.""" + with mock.patch.object(client._http_client, 'call', side_effect=_mocked_api_cursor_pagination_logic): + returned_items = [dict(item) for item in client.list_requests(**inputs)] + assert returned_items == expected_items + + +@pytest.mark.parametrize( + ('inputs', 'expected_items', 'client'), + _generate_test_params(client_set='rq', async_clients=True), +) +async def test_rq_list_requests_iterable_async( + client: RequestQueueClientAsync, + inputs: dict, + expected_items: list[dict[str, int]], +) -> None: + """The async RQ client's `list_requests()` return value should iterate across cursor-paginated pages.""" + + async def async_side_effect(**kwargs: Any) -> Mock: + return _mocked_api_cursor_pagination_logic(**kwargs) + + with mock.patch.object(client._http_client, 'call', side_effect=async_side_effect): + returned_items = [dict(item) async for item in client.list_requests(**inputs)] + assert returned_items == expected_items + + +def test_rq_list_requests_rejects_cursor_and_exclusive_start_id() -> None: + """Passing both `cursor` and `exclusive_start_id` is mutually exclusive and must error.""" + client = ApifyClient(token='').request_queue(ID_PLACEHOLDER) + with pytest.raises(ValueError, match='Cannot use both'): + client.list_requests(cursor='a', exclusive_start_id='b') + + +async def test_rq_list_requests_rejects_cursor_and_exclusive_start_id_async() -> None: + """Async variant of the mutual-exclusion check.""" + client = ApifyClientAsync(token='').request_queue(ID_PLACEHOLDER) + with pytest.raises(ValueError, match='Cannot use both'): + client.list_requests(cursor='a', exclusive_start_id='b') From 9c4ee64e239f070716f0c5aae5cadd08204a2da5 Mon Sep 17 00:00:00 2001 From: Josef Prochazka Date: Wed, 22 Apr 2026 15:51:39 +0200 Subject: [PATCH 2/7] Type unsafe --- src/apify_client/_iterable_list_page.py | 40 ++++++++++++++----- src/apify_client/_resource_clients/dataset.py | 10 +++-- tests/unit/test_client_pagination.py | 18 ++++++++- 3 files changed, 52 insertions(+), 16 deletions(-) diff --git a/src/apify_client/_iterable_list_page.py b/src/apify_client/_iterable_list_page.py index ee65e572..09875136 100644 --- a/src/apify_client/_iterable_list_page.py +++ b/src/apify_client/_iterable_list_page.py @@ -1,6 +1,7 @@ from __future__ import annotations -from collections.abc import AsyncIterable, AsyncIterator, Awaitable, Callable, Generator, Iterable, Iterator +import asyncio +from collections.abc import AsyncIterable, AsyncIterator, Awaitable, Callable, Generator, Iterable, Iterator, Coroutine from typing import Any, Generic, TypeVar from apify_client._docs import docs_group @@ -25,6 +26,25 @@ def _min_for_limit_param(a: int | None, b: int | None) -> int | None: return min(a, b) +T = TypeVar('T') + + +class _LazyTask(Generic[T]): + """Task that is created lazily upon awaiting. + + This allows to reuse the same Task multiple times without the need to schedule the task when it is created. + """ + def __init__(self, awaitable: Awaitable[T]) -> None: + self._awaitable = awaitable + self._task: asyncio.Task[T] | None = None + + def __await__(self) -> Generator[Any, None, T]: + if self._task is None: + self._task = asyncio.create_task(self._awaitable) + return (yield from self._task.__await__()) + + + @docs_group('Other') class IterableListPage(Iterable[T], Generic[T]): """A page of results that can also be iterated to yield items across subsequent pages. @@ -109,7 +129,7 @@ def build_iterable_list_page( stopping iteration because it may change between calls; iteration stops when a page has no items or when the user-requested `limit` has been reached. - Recognized kwargs: + Iteration relevant kwargs: chunk_size: Maximum number of items requested per API call during iteration. Pass `0` or `None` to let the API decide (effectively infinity). limit: User-requested total item limit. Stops iteration once this many items are yielded. @@ -126,7 +146,7 @@ def iterator() -> Iterator[Any]: current_page = first_page yield from current_page.items - fetched_items = len(current_page.items) + fetched_items = current_page.count while current_page.items and (not limit or (limit > fetched_items)): new_kwargs = { **kwargs, @@ -135,7 +155,7 @@ def iterator() -> Iterator[Any]: } current_page = callback(**new_kwargs) yield from current_page.items - fetched_items += len(current_page.items) + fetched_items += current_page.count return IterableListPage(first_page, iterator()) @@ -154,15 +174,15 @@ def build_iterable_list_page_async( offset = kwargs.get('offset') or 0 limit = kwargs.get('limit') or 0 - async def fetch_first_page() -> Any: - return await callback(**{**kwargs, 'limit': _min_for_limit_param(kwargs.get('limit'), chunk_size)}) + # Can be awaited multiple times with same result, but not scheduled at this time yet, as it might be pre-emptive. + fetch_first_page = _LazyTask(callback(**{**kwargs, 'limit': _min_for_limit_param(kwargs.get('limit'), chunk_size)})) async def async_iterator() -> AsyncIterator[Any]: - current_page = await fetch_first_page() + current_page = await fetch_first_page for item in current_page.items: yield item - fetched_items = len(current_page.items) + fetched_items = current_page.count while current_page.items and (not limit or (limit > fetched_items)): new_kwargs = { **kwargs, @@ -172,10 +192,10 @@ async def async_iterator() -> AsyncIterator[Any]: current_page = await callback(**new_kwargs) for item in current_page.items: yield item - fetched_items += len(current_page.items) + fetched_items += current_page.count async def wrap_first_page() -> IterableListPage[Any]: - first_page = await fetch_first_page() + first_page = await fetch_first_page return IterableListPage(first_page, iter(first_page.items)) return IterableListPageAsync(wrap_first_page, async_iterator()) diff --git a/src/apify_client/_resource_clients/dataset.py b/src/apify_client/_resource_clients/dataset.py index 065c6c2b..80714353 100644 --- a/src/apify_client/_resource_clients/dataset.py +++ b/src/apify_client/_resource_clients/dataset.py @@ -228,8 +228,9 @@ def _fetch_page( items=items, total=int(response.headers['x-apify-pagination-total']), offset=int(response.headers['x-apify-pagination-offset']), - # x-apify-pagination-count returns invalid values when hidden/empty items are skipped - count=len(items), + # x-apify-pagination-count returns count of processed items, not count of returned items + # This makes difference when items were filtered using hidden/empty + count=int(response.headers['x-apify-pagination-count']), # API returns 999999999999 when no limit is used limit=int(response.headers['x-apify-pagination-limit']), desc=response.headers['x-apify-pagination-desc'].lower() == 'true', @@ -905,8 +906,9 @@ async def _fetch_page( items=items, total=int(response.headers['x-apify-pagination-total']), offset=int(response.headers['x-apify-pagination-offset']), - # x-apify-pagination-count returns invalid values when hidden/empty items are skipped - count=len(items), + # x-apify-pagination-count returns count of processed items, not count of returned items + # This makes difference when items were filtered using hidden/empty + count=int(response.headers['x-apify-pagination-count']), # API returns 999999999999 when no limit is used limit=int(response.headers['x-apify-pagination-limit']), desc=response.headers['x-apify-pagination-desc'].lower() == 'true', diff --git a/tests/unit/test_client_pagination.py b/tests/unit/test_client_pagination.py index ee8585bd..8bd78535 100644 --- a/tests/unit/test_client_pagination.py +++ b/tests/unit/test_client_pagination.py @@ -172,9 +172,10 @@ def _build(_cls: type, obj: dict) -> _FakeResponseWrapper: p.stop() -def create_items(start: int, end: int) -> list[dict[str, int]]: +def create_items(start: int, end: int, step: int | None = None) -> list[dict[str, int]]: """Create a list of test items for the given index range.""" - step = -1 if end < start else 1 + if not step: + step = -1 if end < start else 1 return [{'id': i, 'key': i} for i in range(start, end, step)] @@ -211,6 +212,10 @@ def _mocked_api_pagination_logic(*, url: str, params: dict[str, Any] | None = No selected_items = items[lower_index : min(upper_index, lower_index + max_items_per_page)] + # Every second item would be filtered out when using `skip_empty=True`, `skip_hidden=True`, or `clean=True` + if params.get('skip_empty') or params.get('skip_hidden') or params.get('clean'): + selected_items = selected_items[::2] + response = Mock() # The dataset items endpoint returns items as a raw list @@ -231,6 +236,7 @@ def _mocked_api_pagination_logic(*, url: str, params: dict[str, Any] | None = No } response.headers = { + 'x-apify-pagination-count': count, 'x-apify-pagination-total': str(total_items), 'x-apify-pagination-offset': str(offset), 'x-apify-pagination-limit': str(limit or count or 1), @@ -324,6 +330,14 @@ def supports(self, client: ResourceClient | ResourceClientAsync) -> bool: create_items(2450, 1350), DATASET_CLIENTS, ), + _PaginationCase( + 'Offset, limit, descending, chunk_size, clean', + {'limit': 1500, 'chunk_size': 100, 'clean': True}, + # API behavior with `clean=True` is to apply the cleaning after pagination, so we end up with missing items + # being counted towards the limit and thus fewer total items returned. + create_items(0, 1500, 2), + DATASET_CLIENTS, + ), _PaginationCase( 'Exclusive start key', {'exclusive_start_key': '1000'}, From eeb6be1a31bc16f52f2711ed1655c8388db42954 Mon Sep 17 00:00:00 2001 From: Josef Prochazka Date: Wed, 22 Apr 2026 16:48:25 +0200 Subject: [PATCH 3/7] Use `HasItems` and fix type errors --- src/apify_client/_iterable_list_page.py | 68 +++++++++++-------- src/apify_client/_resource_clients/dataset.py | 8 +-- .../_resource_clients/key_value_store.py | 9 --- .../_resource_clients/request_queue.py | 7 -- 4 files changed, 42 insertions(+), 50 deletions(-) diff --git a/src/apify_client/_iterable_list_page.py b/src/apify_client/_iterable_list_page.py index 09875136..6e39acba 100644 --- a/src/apify_client/_iterable_list_page.py +++ b/src/apify_client/_iterable_list_page.py @@ -1,14 +1,18 @@ from __future__ import annotations import asyncio -from collections.abc import AsyncIterable, AsyncIterator, Awaitable, Callable, Generator, Iterable, Iterator, Coroutine -from typing import Any, Generic, TypeVar +from collections.abc import AsyncIterable, AsyncIterator, Awaitable, Callable, Coroutine, Generator, Iterable, Iterator +from typing import Any, Generic, Protocol, TypeVar from apify_client._docs import docs_group T = TypeVar('T') +class HasItems(Protocol[T]): + items: list[T] + + def _min_for_limit_param(a: int | None, b: int | None) -> int | None: """Return minimum of two limit parameters, treating `None` or `0` as infinity. @@ -26,15 +30,13 @@ def _min_for_limit_param(a: int | None, b: int | None) -> int | None: return min(a, b) -T = TypeVar('T') - - class _LazyTask(Generic[T]): """Task that is created lazily upon awaiting. This allows to reuse the same Task multiple times without the need to schedule the task when it is created. """ - def __init__(self, awaitable: Awaitable[T]) -> None: + + def __init__(self, awaitable: Coroutine[Any, Any, T]) -> None: self._awaitable = awaitable self._task: asyncio.Task[T] | None = None @@ -44,7 +46,6 @@ def __await__(self) -> Generator[Any, None, T]: return (yield from self._task.__await__()) - @docs_group('Other') class IterableListPage(Iterable[T], Generic[T]): """A page of results that can also be iterated to yield items across subsequent pages. @@ -72,7 +73,7 @@ class IterableListPage(Iterable[T], Generic[T]): desc: bool """Whether the items are sorted in descending order.""" - def __init__(self, first_page: Any, iterator: Iterator[T]) -> None: + def __init__(self, first_page: HasItems[T], iterator: Iterator[T]) -> None: """Initialize a page wrapper from a Pydantic paginated model and an iterator over all items.""" self.items = first_page.items count = getattr(first_page, 'count', None) @@ -119,15 +120,24 @@ def __await__(self) -> Generator[Any, Any, IterableListPage[T]]: def build_iterable_list_page( - callback: Callable[..., Any], + callback: Callable[..., HasItems[T]], **kwargs: Any, -) -> IterableListPage[Any]: +) -> IterableListPage[T]: """Build an `IterableListPage` from a paginated sync callback. The callback is invoked once immediately to fetch the first page, and again lazily during - iteration to fetch further pages. The `total` field from the first page is not trusted for - stopping iteration because it may change between calls; iteration stops when a page has - no items or when the user-requested `limit` has been reached. + iteration to fetch further pages. + + There are several optional kwargs that control the pagination, but not all are accepted on each paginated endpoint. + Some endpoints do not return all paginated metadata, so the implementation should be resilient to missing fields, + but it can use them if available. + + The `total` field from the first page is not trusted for stopping iteration because it may change between calls; + iteration stops when a page has no items or when the user-requested `limit` has been reached. + + The `count` field does not count objects returned, but object scanned by the API. For example when using filters, + returned items can be smaller than `count`. Therefore, `count` should be used for correct offset calculation if + available. Iteration relevant kwargs: chunk_size: Maximum number of items requested per API call during iteration. Pass `0` @@ -142,11 +152,11 @@ def build_iterable_list_page( first_page = callback(**{**kwargs, 'limit': _min_for_limit_param(kwargs.get('limit'), chunk_size)}) - def iterator() -> Iterator[Any]: + def iterator() -> Iterator[T]: current_page = first_page yield from current_page.items - fetched_items = current_page.count + fetched_items = getattr(current_page, 'count', len(current_page.items)) while current_page.items and (not limit or (limit > fetched_items)): new_kwargs = { **kwargs, @@ -155,15 +165,15 @@ def iterator() -> Iterator[Any]: } current_page = callback(**new_kwargs) yield from current_page.items - fetched_items += current_page.count + fetched_items += getattr(current_page, 'count', len(current_page.items)) return IterableListPage(first_page, iterator()) def build_iterable_list_page_async( - callback: Callable[..., Awaitable[Any]], + callback: Callable[..., Coroutine[Any, Any, HasItems[T]]], **kwargs: Any, -) -> IterableListPageAsync[Any]: +) -> IterableListPageAsync[T]: """Build an `IterableListPageAsync` from a paginated async callback. Mirrors `build_iterable_list_page` but for async callbacks. The returned object is both @@ -182,7 +192,7 @@ async def async_iterator() -> AsyncIterator[Any]: for item in current_page.items: yield item - fetched_items = current_page.count + fetched_items = getattr(current_page, 'count', len(current_page.items)) while current_page.items and (not limit or (limit > fetched_items)): new_kwargs = { **kwargs, @@ -192,7 +202,7 @@ async def async_iterator() -> AsyncIterator[Any]: current_page = await callback(**new_kwargs) for item in current_page.items: yield item - fetched_items += current_page.count + fetched_items += getattr(current_page, 'count', len(current_page.items)) async def wrap_first_page() -> IterableListPage[Any]: first_page = await fetch_first_page @@ -202,15 +212,14 @@ async def wrap_first_page() -> IterableListPage[Any]: def build_cursor_iterable_list_page( - callback: Callable[..., Any], + callback: Callable[..., HasItems[T]], *, cursor_param: str, - next_cursor_fn: Callable[[Any], Any], initial_cursor: Any = None, limit: int | None = None, chunk_size: int | None = None, **kwargs: Any, -) -> IterableListPage[Any]: +) -> IterableListPage[T]: """Build an `IterableListPage` for endpoints that paginate with a cursor instead of an offset. The callback is invoked with `{cursor_param: cursor, 'limit': effective_limit, **kwargs}` for each @@ -229,7 +238,7 @@ def iterator() -> Iterator[Any]: yield from current_page.items fetched = len(current_page.items) - next_cursor = next_cursor_fn(current_page) + next_cursor = getattr(current_page, f'next_{cursor_param}') while current_page.items and next_cursor is not None and (not user_limit or user_limit > fetched): remaining = (user_limit - fetched) if user_limit else 0 @@ -237,21 +246,20 @@ def iterator() -> Iterator[Any]: current_page = callback(**{**kwargs, cursor_param: next_cursor, 'limit': next_limit}) yield from current_page.items fetched += len(current_page.items) - next_cursor = next_cursor_fn(current_page) + next_cursor = getattr(current_page, f'next_{cursor_param}') return IterableListPage(first_page, iterator()) def build_cursor_iterable_list_page_async( - callback: Callable[..., Awaitable[Any]], + callback: Callable[..., Awaitable[HasItems[T]]], *, cursor_param: str, - next_cursor_fn: Callable[[Any], Any], initial_cursor: Any = None, limit: int | None = None, chunk_size: int | None = None, **kwargs: Any, -) -> IterableListPageAsync[Any]: +) -> IterableListPageAsync[T]: """Build an `IterableListPageAsync` for endpoints that paginate with a cursor instead of an offset. Mirrors `build_cursor_iterable_list_page` but for async callbacks. The returned object is both @@ -271,7 +279,7 @@ async def async_iterator() -> AsyncIterator[Any]: yield item fetched = len(current_page.items) - next_cursor = next_cursor_fn(current_page) + next_cursor = getattr(current_page, f'next_{cursor_param}') while current_page.items and next_cursor is not None and (not user_limit or user_limit > fetched): remaining = (user_limit - fetched) if user_limit else 0 @@ -280,7 +288,7 @@ async def async_iterator() -> AsyncIterator[Any]: for item in current_page.items: yield item fetched += len(current_page.items) - next_cursor = next_cursor_fn(current_page) + next_cursor = getattr(current_page, f'next_{cursor_param}') async def wrap_first_page() -> IterableListPage[Any]: first_page = await fetch_first_page() diff --git a/src/apify_client/_resource_clients/dataset.py b/src/apify_client/_resource_clients/dataset.py index 80714353..9c72ae2d 100644 --- a/src/apify_client/_resource_clients/dataset.py +++ b/src/apify_client/_resource_clients/dataset.py @@ -151,7 +151,7 @@ def list_items( signature: str | None = None, chunk_size: int | None = None, timeout: Timeout = 'long', - ) -> IterableListPage[DatasetItemsPage]: + ) -> IterableListPage[dict[str, Any]]: """List the items of the dataset. The returned page also supports iteration: `for item in client.list_items(...)` yields individual @@ -252,7 +252,7 @@ def iterate_items( skip_hidden: bool | None = None, signature: str | None = None, timeout: Timeout = 'long', - ) -> Iterator[DatasetItemsPage]: + ) -> Iterator[dict[str, Any]]: """Iterate over the items in the dataset. Deprecated: iterate the return value of `DatasetClient.list_items()` instead. @@ -829,7 +829,7 @@ def list_items( signature: str | None = None, chunk_size: int | None = None, timeout: Timeout = 'long', - ) -> IterableListPageAsync[DatasetItemsPage]: + ) -> IterableListPageAsync[dict[str, Any]]: """List the items of the dataset. The returned page also supports iteration: `for item in client.list_items(...)` yields individual @@ -930,7 +930,7 @@ async def iterate_items( skip_hidden: bool | None = None, signature: str | None = None, timeout: Timeout = 'long', - ) -> AsyncIterator[DatasetItemsPage]: + ) -> AsyncIterator[dict[str, Any]]: """Iterate over the items in the dataset. Deprecated: iterate the return value of `DatasetClientAsync.list_items()` instead. diff --git a/src/apify_client/_resource_clients/key_value_store.py b/src/apify_client/_resource_clients/key_value_store.py index febd9395..78f511a8 100644 --- a/src/apify_client/_resource_clients/key_value_store.py +++ b/src/apify_client/_resource_clients/key_value_store.py @@ -40,13 +40,6 @@ from apify_client._types import Timeout -def _kvs_next_cursor(page: ListOfKeys) -> str | None: - """Return the next cursor for KVS key pagination, or `None` when there are no more pages.""" - if not page.is_truncated: - return None - return page.next_exclusive_start_key - - def _parse_get_record_response(response: HttpResponse) -> Any: """Parse an HTTP response based on its content type. @@ -202,7 +195,6 @@ def _callback(*, limit: int | None = None, exclusive_start_key: str | None = Non return build_cursor_iterable_list_page( _callback, cursor_param='exclusive_start_key', - next_cursor_fn=_kvs_next_cursor, initial_cursor=exclusive_start_key, limit=limit, chunk_size=chunk_size, @@ -641,7 +633,6 @@ async def _callback(*, limit: int | None = None, exclusive_start_key: str | None return build_cursor_iterable_list_page_async( _callback, cursor_param='exclusive_start_key', - next_cursor_fn=_kvs_next_cursor, initial_cursor=exclusive_start_key, limit=limit, chunk_size=chunk_size, diff --git a/src/apify_client/_resource_clients/request_queue.py b/src/apify_client/_resource_clients/request_queue.py index 079e1971..5e351bac 100644 --- a/src/apify_client/_resource_clients/request_queue.py +++ b/src/apify_client/_resource_clients/request_queue.py @@ -56,11 +56,6 @@ _SAFETY_BUFFER_PERCENT = 0.01 / 100 # 0.01% -def _rq_next_cursor(page: ListOfRequests) -> str | None: - """Return the opaque `next_cursor` from the page, or `None` when there are no more pages.""" - return page.next_cursor - - @docs_group('Resource clients') class RequestQueueClient(ResourceClient): """Sub-client for managing a specific request queue. @@ -564,7 +559,6 @@ def _callback(*, limit: int | None = None, cursor: str | None = None) -> ListOfR return build_cursor_iterable_list_page( _callback, cursor_param='cursor', - next_cursor_fn=_rq_next_cursor, initial_cursor=cursor, limit=limit, chunk_size=chunk_size, @@ -1142,7 +1136,6 @@ async def _callback(*, limit: int | None = None, cursor: str | None = None) -> L return build_cursor_iterable_list_page_async( _callback, cursor_param='cursor', - next_cursor_fn=_rq_next_cursor, initial_cursor=cursor, limit=limit, chunk_size=chunk_size, From d8bc536561bdd9f622b71f7389b81722f43a7e45 Mon Sep 17 00:00:00 2001 From: Josef Prochazka Date: Thu, 23 Apr 2026 15:32:49 +0200 Subject: [PATCH 4/7] Use test server in tests --- tests/unit/test_client_pagination.py | 608 ++++++++++++++------------- 1 file changed, 306 insertions(+), 302 deletions(-) diff --git a/tests/unit/test_client_pagination.py b/tests/unit/test_client_pagination.py index 8bd78535..2898b42f 100644 --- a/tests/unit/test_client_pagination.py +++ b/tests/unit/test_client_pagination.py @@ -1,11 +1,13 @@ from __future__ import annotations import dataclasses +import json +import re from typing import TYPE_CHECKING, Any, Literal, TypeAlias -from unittest import mock -from unittest.mock import Mock import pytest +from pydantic.fields import FieldInfo +from werkzeug import Response from apify_client import ApifyClient, ApifyClientAsync from apify_client import _models as _models_module @@ -45,9 +47,12 @@ ) if TYPE_CHECKING: - from _pytest.mark import ParameterSet + from collections.abc import Callable - from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync + from _pytest.mark import ParameterSet + from pydantic import BaseModel + from pytest_httpserver import HTTPServer + from werkzeug import Request CollectionClient: TypeAlias = ( @@ -84,147 +89,141 @@ ID_PLACEHOLDER = 'some-id' -# Response wrappers whose `model_validate` should be bypassed during pagination tests so -# synthetic `{'id': N, 'key': N}` test items are accepted without matching the real API schemas. -_BYPASSED_RESPONSE_CLASSES = ( + +# Inner list models whose `items: list[]` is relaxed to `list[dict]`. +# Point of these tests is pagination mechanism, not internal object validation. +_RELAXED_LIST_MODELS = ( + 'ListOfActors', + 'ListOfBuilds', + 'ListOfDatasets', + 'ListOfEnvVars', + 'ListOfKeys', + 'ListOfKeyValueStores', + 'ListOfRequestQueues', + 'ListOfRequests', + 'ListOfRuns', + 'ListOfSchedules', + 'ListOfStoreActors', + 'ListOfTasks', + 'ListOfVersions', + 'ListOfWebhookDispatches', + 'ListOfWebhooks', +) + +# Outer wrappers that embed a relaxed list model via `.data`. Their compiled schema pins the +# inner's schema at construction time, so they need a forced rebuild to pick up the relaxation. +# The wrappers themselves are not mutated — their own field annotations stay as-is. +_REBUILT_RESPONSE_WRAPPERS = ( + 'ListOfActorsInStoreResponse', 'ListOfActorsResponse', 'ListOfBuildsResponse', + 'ListOfDatasetsResponse', + 'ListOfEnvVarsResponse', + 'ListOfKeyValueStoresResponse', + 'ListOfKeysResponse', + 'ListOfRequestQueuesResponse', + 'ListOfRequestsResponse', 'ListOfRunsResponse', 'ListOfSchedulesResponse', 'ListOfTasksResponse', + 'ListOfVersionsResponse', 'ListOfWebhooksResponse', 'WebhookDispatchList', - 'ListOfDatasetsResponse', - 'ListOfKeyValueStoresResponse', - 'ListOfRequestQueuesResponse', - 'ListOfActorsInStoreResponse', - 'ListOfEnvVarsResponse', - 'ListOfVersionsResponse', - 'ListOfKeysResponse', - 'ListOfRequestsResponse', ) -class _AttrDict(dict): - """A dict that also supports attribute access — enough for next_cursor_fn to call `item.id`.""" - - def __getattr__(self, name: str) -> Any: - try: - return self[name] - except KeyError as exc: - raise AttributeError(name) from exc - - -class _FakeListModel: - """Stand-in for a paginated list model that mimics the fields the iteration logic accesses.""" - - def __init__(self, **kwargs: Any) -> None: - # Sensible defaults for the pagination fields `IterableListPage` reads. - self.total = 0 - self.count = 0 - self.offset = 0 - self.limit = 1 - self.desc = False - self.items: list[Any] = [] - self.is_truncated = False - self.next_exclusive_start_key: str | None = None - for key, value in kwargs.items(): - setattr(self, key, value) - if 'count' not in kwargs: - self.count = len(self.items) - - -@dataclasses.dataclass -class _FakeResponseWrapper: - """Stand-in for a `*Response` Pydantic model that wraps a paginated list under `.data`.""" - - data: _FakeListModel - - @pytest.fixture(autouse=True) -def _bypass_response_validation() -> Any: - """Replace the Pydantic `model_validate` of response wrappers with a lightweight builder. +def _relax_item_validation() -> Any: + """Relax only the element type of `items` on paginated list models for the test run. - Pagination tests use synthetic items that don't satisfy the real API schemas. Bypassing - validation lets the iteration logic run while still building a model-like object that exposes - the fields the client code consumes (`.data`, `.items`, `.total`, etc.). + Pagination tests feed synthetic `{'id': N}` items that don't satisfy the real API schemas + (`ActorShort`, `BuildShort`, `Request`, `EnvVar`, …). Instead of bypassing validation + wholesale, each inner `ListOf*` model has its `items` field swapped to `list[dict]` + and rebuilt. Outer `.data` wrapping and every pagination-metadata field remain validated. """ - - def _build(_cls: type, obj: dict) -> _FakeResponseWrapper: - data_dict = obj.get('data') or {} - raw_items = data_dict.get('items', []) - # Wrap dict items so cursor-based pagination can read `item.id` from the last item. - items = [_AttrDict(item) if isinstance(item, dict) else item for item in raw_items] - fields = {**data_dict, 'items': items} - return _FakeResponseWrapper(data=_FakeListModel(**fields)) - - patchers = [] - for class_name in _BYPASSED_RESPONSE_CLASSES: - cls = getattr(_models_module, class_name) - patchers.append(mock.patch.object(cls, 'model_validate', classmethod(_build))) - - for p in patchers: - p.start() + relaxed_field = FieldInfo.from_annotation(list[dict]) + originals: dict[type[BaseModel], FieldInfo] = {} + wrappers = [getattr(_models_module, name) for name in _REBUILT_RESPONSE_WRAPPERS] + + for name in _RELAXED_LIST_MODELS: + cls = getattr(_models_module, name) + originals[cls] = cls.model_fields['items'] + cls.model_fields['items'] = relaxed_field + cls.model_rebuild(force=True) + for wrapper in wrappers: + wrapper.model_rebuild(force=True) try: yield finally: - for p in patchers: - p.stop() + for cls, field in originals.items(): + cls.model_fields['items'] = field + cls.model_rebuild(force=True) + for wrapper in wrappers: + wrapper.model_rebuild(force=True) def create_items(start: int, end: int, step: int | None = None) -> list[dict[str, int]]: """Create a list of test items for the given index range.""" if not step: step = -1 if end < start else 1 - return [{'id': i, 'key': i} for i in range(start, end, step)] + return [{'id': i} for i in range(start, end, step)] -def _mocked_api_pagination_logic(*, url: str, params: dict[str, Any] | None = None, **_: Any) -> Mock: - """Simulate a paginated Apify API response. +NORMAL_ITEMS = 2500 +EXTRA_ITEMS_UNNAMED = 100 +MAX_ITEMS_PER_PAGE = 1000 - The mocked platform holds 2500 items normally and an additional 100 when ``unnamed=True`` is - requested. Items are simple objects with an incrementing ``id`` and ``key`` that make it easy - to verify iteration order. - Pages are capped at 1000 items regardless of the requested limit, mirroring the real API. - """ - params = params or {} +def _is_true(value: str | None) -> bool: + """Match the `'true'` wire form produced by the client's bool→string serialization.""" + return value == 'true' + + +def _parse_int_param(value: str | None) -> int: + return int(value) if value not in (None, '') else 0 - normal_items = 2500 - extra_items = 100 # for example unnamed resources - max_items_per_page = 1000 - total_items = (normal_items + extra_items) if params.get('unnamed') else normal_items +def _handle_offset_pagination(request: Request) -> Response: + """Serve an offset-paginated Apify API response. - offset_raw = params.get('offset') - offset = int(offset_raw) if offset_raw not in (None, '') else 0 - limit_raw = params.get('limit') - limit = int(limit_raw) if limit_raw not in (None, '') else 0 + The simulated platform holds 2500 items normally and an additional 100 when + ``unnamed=true`` is requested. Pages are capped at 1000 items regardless of the requested + limit, mirroring the real API. The dataset items endpoint returns items as a raw list; + all other endpoints wrap them in ``{'data': {...}}``. + """ + params = request.args + + total_items = (NORMAL_ITEMS + EXTRA_ITEMS_UNNAMED) if _is_true(params.get('unnamed')) else NORMAL_ITEMS + offset = _parse_int_param(params.get('offset')) + limit = _parse_int_param(params.get('limit')) assert offset >= 0, 'Invalid offset sent to API' assert limit >= 0, 'Invalid limit sent to API' - desc = params.get('desc') in (True, 'true', 'True', 1, '1') + desc = _is_true(params.get('desc')) items = create_items(total_items, 0) if desc else create_items(0, total_items) lower_index = min(offset, total_items) upper_index = min(offset + (limit or total_items), total_items) - count = min(max(upper_index - lower_index, 0), max_items_per_page) + count = min(max(upper_index - lower_index, 0), MAX_ITEMS_PER_PAGE) + selected_items = items[lower_index : min(upper_index, lower_index + MAX_ITEMS_PER_PAGE)] - selected_items = items[lower_index : min(upper_index, lower_index + max_items_per_page)] - - # Every second item would be filtered out when using `skip_empty=True`, `skip_hidden=True`, or `clean=True` - if params.get('skip_empty') or params.get('skip_hidden') or params.get('clean'): + # Every second item is filtered out when `skipEmpty=true`, `skipHidden=true`, or `clean=true`. + if _is_true(params.get('skipEmpty')) or _is_true(params.get('skipHidden')) or _is_true(params.get('clean')): selected_items = selected_items[::2] - response = Mock() + headers = { + 'x-apify-pagination-count': str(count), + 'x-apify-pagination-total': str(total_items), + 'x-apify-pagination-offset': str(offset), + 'x-apify-pagination-limit': str(limit or count or 1), + 'x-apify-pagination-desc': str(desc).lower(), + 'content-type': 'application/json', + } - # The dataset items endpoint returns items as a raw list - if url.endswith(f'/datasets/{ID_PLACEHOLDER}/items'): - response.content = b'' - response.json = lambda: selected_items + if request.path.endswith(f'/datasets/{ID_PLACEHOLDER}/items'): + body: Any = selected_items else: - response.content = b'' - response.json = lambda: { + body = { 'data': { 'total': total_items, 'count': count, @@ -234,15 +233,122 @@ def _mocked_api_pagination_logic(*, url: str, params: dict[str, Any] | None = No 'items': selected_items, } } + return Response(status=200, headers=headers, response=json.dumps(body)) - response.headers = { - 'x-apify-pagination-count': count, - 'x-apify-pagination-total': str(total_items), - 'x-apify-pagination-offset': str(offset), - 'x-apify-pagination-limit': str(limit or count or 1), - 'x-apify-pagination-desc': str(desc).lower(), - } - return response + +def _handle_cursor_pagination(request: Request) -> Response: + """Serve a cursor-paginated Apify API response for KVS keys and RQ requests. + + Holds 2500 synthetic items whose integer `id` equals their position. Each page is capped + at 1000 items. KVS uses `exclusiveStartKey`; RQ accepts either the deprecated + `exclusiveStartId` on the initial call or the opaque `cursor` on subsequent calls. All + three values encode the last-seen item id as a string — the next page starts at id + 1. + """ + params = request.args + limit = _parse_int_param(params.get('limit')) + assert limit >= 0, 'Invalid limit sent to API' + + cursor_raw = params.get('exclusiveStartKey') or params.get('exclusiveStartId') or params.get('cursor') + + total_items = NORMAL_ITEMS + start = int(cursor_raw) + 1 if cursor_raw not in (None, '') else 0 + end = total_items if not limit else min(start + limit, total_items) + page_end = min(end, start + MAX_ITEMS_PER_PAGE) + selected_items = [{'id': i} for i in range(start, page_end)] + + if request.path.endswith('/keys'): + is_truncated = page_end < total_items and bool(selected_items) + next_exclusive_start_key = str(selected_items[-1]['id']) if selected_items and is_truncated else None + body: dict[str, Any] = { + 'data': { + 'items': selected_items, + 'count': len(selected_items), + 'limit': limit or (len(selected_items) or 1), + 'is_truncated': is_truncated, + 'next_exclusive_start_key': next_exclusive_start_key, + } + } + else: # `/requests` + has_more = page_end < total_items and bool(selected_items) + next_cursor = str(selected_items[-1]['id']) if has_more else None + body = { + 'data': { + 'items': selected_items, + 'count': len(selected_items), + 'limit': limit or (len(selected_items) or 1), + 'next_cursor': next_cursor, + } + } + return Response(status=200, headers={'content-type': 'application/json'}, response=json.dumps(body)) + + +def _pagination_handler(request: Request) -> Response: + """Dispatch between cursor-based (KVS keys, RQ requests) and offset-based endpoints.""" + if request.path.endswith(('/keys', '/requests')): + return _handle_cursor_pagination(request) + return _handle_offset_pagination(request) + + +@pytest.fixture +def pagination_server(httpserver: HTTPServer) -> HTTPServer: + """Register a catch-all handler that mirrors the Apify paginated endpoints.""" + httpserver.expect_request(re.compile(r'.*')).respond_with_handler(_pagination_handler) + return httpserver + + +def _make_sync_client(httpserver: HTTPServer) -> ApifyClient: + return ApifyClient(token='test', api_url=httpserver.url_for('/')) + + +def _make_async_client(httpserver: HTTPServer) -> ApifyClientAsync: + return ApifyClientAsync(token='test', api_url=httpserver.url_for('/')) + + +# Map resource-client class name to a factory that, given an `ApifyClient`/`ApifyClientAsync`, +# returns the sub-client under test. Usable for both sync and async since every accessor is +# available symmetrically on both root clients. +_CLIENT_FACTORIES: dict[str, Callable[[Any], Any]] = { + 'ActorCollectionClient': lambda c: c.actors(), + 'ScheduleCollectionClient': lambda c: c.schedules(), + 'TaskCollectionClient': lambda c: c.tasks(), + 'WebhookCollectionClient': lambda c: c.webhooks(), + 'WebhookDispatchCollectionClient': lambda c: c.webhook_dispatches(), + 'StoreCollectionClient': lambda c: c.store(), + 'DatasetCollectionClient': lambda c: c.datasets(), + 'KeyValueStoreCollectionClient': lambda c: c.key_value_stores(), + 'RequestQueueCollectionClient': lambda c: c.request_queues(), + 'BuildCollectionClient': lambda c: c.actor(ID_PLACEHOLDER).builds(), + 'RunCollectionClient': lambda c: c.actor(ID_PLACEHOLDER).runs(), + 'ActorVersionCollectionClient': lambda c: c.actor(ID_PLACEHOLDER).versions(), + 'ActorEnvVarCollectionClient': lambda c: c.actor(ID_PLACEHOLDER).version('some-version').env_vars(), + 'DatasetClient': lambda c: c.dataset(ID_PLACEHOLDER), + 'KeyValueStoreClient': lambda c: c.key_value_store(ID_PLACEHOLDER), + 'RequestQueueClient': lambda c: c.request_queue(ID_PLACEHOLDER), +} + + +_CLIENT_SET_NAMES: dict[Literal['collection', 'dataset', 'kvs', 'rq'], tuple[str, ...]] = { + # Tuple rather than set: pytest-xdist requires a stable iteration order across workers. + # https://pytest-xdist.readthedocs.io/en/stable/known-limitations.html#order-and-amount-of-test-must-be-consistent + 'collection': ( + 'ActorCollectionClient', + 'ScheduleCollectionClient', + 'TaskCollectionClient', + 'WebhookCollectionClient', + 'WebhookDispatchCollectionClient', + 'StoreCollectionClient', + 'DatasetCollectionClient', + 'KeyValueStoreCollectionClient', + 'RequestQueueCollectionClient', + 'BuildCollectionClient', + 'RunCollectionClient', + 'ActorVersionCollectionClient', + 'ActorEnvVarCollectionClient', + ), + 'dataset': ('DatasetClient',), + 'kvs': ('KeyValueStoreClient',), + 'rq': ('RequestQueueClient',), +} @dataclasses.dataclass @@ -257,10 +363,6 @@ class _PaginationCase: def __hash__(self) -> int: return hash(self.id) - def supports(self, client: ResourceClient | ResourceClientAsync) -> bool: - """Check whether the given client implements functionality tested by this test case.""" - return client.__class__.__name__.removesuffix('Async') in self.supported_clients - COLLECTION_CLIENTS = { 'ActorCollectionClient', @@ -365,282 +467,184 @@ def supports(self, client: ResourceClient | ResourceClientAsync) -> bool: ) -def _generate_test_params( - client_set: Literal['collection', 'dataset', 'kvs', 'rq'], *, async_clients: bool -) -> list[ParameterSet]: - """Build the pytest parameter set for the given client category.""" - client = ApifyClientAsync(token='') if async_clients else ApifyClient(token='') - - # Tuple rather than set because pytest-xdist requires a stable iteration order. - # https://pytest-xdist.readthedocs.io/en/stable/known-limitations.html#order-and-amount-of-test-must-be-consistent - clients: tuple[ResourceClient | ResourceClientAsync, ...] - - match client_set: - case 'collection': - clients = ( - client.actors(), - client.schedules(), - client.tasks(), - client.webhooks(), - client.webhook_dispatches(), - client.store(), - client.datasets(), - client.key_value_stores(), - client.request_queues(), - client.actor(ID_PLACEHOLDER).builds(), - client.actor(ID_PLACEHOLDER).runs(), - client.actor(ID_PLACEHOLDER).versions(), - client.actor(ID_PLACEHOLDER).version('some-version').env_vars(), - ) - case 'dataset': - clients = (client.dataset(ID_PLACEHOLDER),) - case 'kvs': - clients = (client.key_value_store(ID_PLACEHOLDER),) - case 'rq': - clients = (client.request_queue(ID_PLACEHOLDER),) - case _: - raise ValueError(f'Unknown client set: {client_set}') +def _generate_test_params(client_set: Literal['collection', 'dataset', 'kvs', 'rq']) -> list[ParameterSet]: + """Build the pytest parameter set for the given client category. + Each parameter carries the resource-client class name; the test body instantiates + the real client against the `httpserver` URL and looks up the factory in + `_CLIENT_FACTORIES`. + """ + client_names = _CLIENT_SET_NAMES[client_set] return [ - pytest.param( - test_case.inputs, test_case.expected_items, sub_client, id=f'{sub_client.__class__.__name__}:{test_case.id}' - ) + pytest.param(test_case.inputs, test_case.expected_items, client_name, id=f'{client_name}:{test_case.id}') for test_case in TEST_CASES - for sub_client in clients - if test_case.supports(sub_client) + for client_name in client_names + if client_name in test_case.supported_clients ] @pytest.mark.parametrize( - ('inputs', 'expected_items', 'client'), - _generate_test_params(client_set='collection', async_clients=False), + ('inputs', 'expected_items', 'client_name'), + _generate_test_params(client_set='collection'), ) def test_client_list_iterable( - client: CollectionClient, + pagination_server: HTTPServer, + client_name: str, inputs: dict, expected_items: list[dict[str, int]], ) -> None: """Every sync collection client's `list()` return value should iterate across pages.""" - with mock.patch.object(client._http_client, 'call', side_effect=_mocked_api_pagination_logic): - returned_items = list(client.list(**inputs)) + client: CollectionClient = _CLIENT_FACTORIES[client_name](_make_sync_client(pagination_server)) + returned_items = list(client.list(**inputs)) - if inputs == {}: - list_response = client.list(**inputs) - assert len(returned_items) == list_response.total + if inputs == {}: + list_response = client.list(**inputs) + assert len(returned_items) == list_response.total - assert returned_items == expected_items + assert returned_items == expected_items @pytest.mark.parametrize( - ('inputs', 'expected_items', 'client'), - _generate_test_params(client_set='collection', async_clients=True), + ('inputs', 'expected_items', 'client_name'), + _generate_test_params(client_set='collection'), ) async def test_client_list_iterable_async( - client: CollectionClientAsync, + pagination_server: HTTPServer, + client_name: str, inputs: dict, expected_items: list[dict[str, int]], ) -> None: """Every async collection client's `list()` return value should iterate across pages.""" + client: CollectionClientAsync = _CLIENT_FACTORIES[client_name](_make_async_client(pagination_server)) + returned_items = [item async for item in client.list(**inputs)] - async def async_side_effect(**kwargs: Any) -> Mock: - return _mocked_api_pagination_logic(**kwargs) - - with mock.patch.object(client._http_client, 'call', side_effect=async_side_effect): - returned_items = [item async for item in client.list(**inputs)] + if inputs == {}: + list_response = await client.list(**inputs) + assert len(returned_items) == list_response.total - if inputs == {}: - list_response = await client.list(**inputs) - assert len(returned_items) == list_response.total - - assert returned_items == expected_items + assert returned_items == expected_items @pytest.mark.parametrize( - ('inputs', 'expected_items', 'client'), - _generate_test_params(client_set='dataset', async_clients=False), + ('inputs', 'expected_items', 'client_name'), + _generate_test_params(client_set='dataset'), ) def test_dataset_items_list_iterable( - client: DatasetClient, + pagination_server: HTTPServer, + client_name: str, inputs: dict, expected_items: list[dict[str, int]], ) -> None: """The sync dataset client's `list_items()` return value should iterate across pages.""" - with mock.patch.object(client._http_client, 'call', side_effect=_mocked_api_pagination_logic): - returned_items = list(client.list_items(**inputs)) + client: DatasetClient = _CLIENT_FACTORIES[client_name](_make_sync_client(pagination_server)) + returned_items = list(client.list_items(**inputs)) - if inputs == {}: - list_response = client.list_items(**inputs) - assert len(returned_items) == list_response.total + if inputs == {}: + list_response = client.list_items(**inputs) + assert len(returned_items) == list_response.total - assert returned_items == expected_items + assert returned_items == expected_items - # Until the deprecated `iterate_items` method is removed, it should behave the same - inputs_without_chunk_size = {k: v for k, v in inputs.items() if k != 'chunk_size'} - assert returned_items == list(client.iterate_items(**inputs_without_chunk_size)) + # Until the deprecated `iterate_items` method is removed, it should behave the same + inputs_without_chunk_size = {k: v for k, v in inputs.items() if k != 'chunk_size'} + assert returned_items == list(client.iterate_items(**inputs_without_chunk_size)) @pytest.mark.parametrize( - ('inputs', 'expected_items', 'client'), - _generate_test_params(client_set='dataset', async_clients=True), + ('inputs', 'expected_items', 'client_name'), + _generate_test_params(client_set='dataset'), ) async def test_dataset_items_list_iterable_async( - client: DatasetClientAsync, + pagination_server: HTTPServer, + client_name: str, inputs: dict, expected_items: list[dict[str, int]], ) -> None: """The async dataset client's `list_items()` return value should iterate across pages.""" + client: DatasetClientAsync = _CLIENT_FACTORIES[client_name](_make_async_client(pagination_server)) + returned_items = [item async for item in client.list_items(**inputs)] - async def async_side_effect(**kwargs: Any) -> Mock: - return _mocked_api_pagination_logic(**kwargs) - - with mock.patch.object(client._http_client, 'call', side_effect=async_side_effect): - returned_items = [item async for item in client.list_items(**inputs)] - - if inputs == {}: - list_response = await client.list_items(**inputs) - assert len(returned_items) == list_response.total - - assert returned_items == expected_items + if inputs == {}: + list_response = await client.list_items(**inputs) + assert len(returned_items) == list_response.total - # Until the deprecated `iterate_items` method is removed, it should behave the same - inputs_without_chunk_size = {k: v for k, v in inputs.items() if k != 'chunk_size'} - assert returned_items == [item async for item in client.iterate_items(**inputs_without_chunk_size)] + assert returned_items == expected_items - -def _mocked_api_cursor_pagination_logic(*, url: str, params: dict[str, Any] | None = None, **_: Any) -> Mock: - """Simulate the KVS keys and RQ requests endpoints, which paginate with a cursor. - - Holds 2500 synthetic items with incrementing integer `id` equal to their position. Each page is - capped at 1000 items. The mock honors ``exclusive_start_key`` for KVS and ``exclusive_start_id`` - for RQ — both are treated as the integer id of the previous page's last item; the next page - starts at that id + 1. - """ - params = params or {} - - total_items = 2500 - max_items_per_page = 1000 - - limit_raw = params.get('limit') - limit = int(limit_raw) if limit_raw not in (None, '') else 0 - assert limit >= 0, 'Invalid limit sent to API' - - # KVS uses `exclusiveStartKey`; RQ accepts either the deprecated `exclusiveStartId` (initial - # call only) or the new opaque `cursor` (subsequent calls use this). Both cursor values encode - # the last-seen item id as a string. - cursor_raw = params.get('exclusiveStartKey') or params.get('exclusiveStartId') or params.get('cursor') - - start = int(cursor_raw) + 1 if cursor_raw not in (None, '') else 0 - end = total_items - if limit: - end = min(start + limit, total_items) - page_end = min(end, start + max_items_per_page) - selected_items = [{'id': i, 'key': i} for i in range(start, page_end)] - - response = Mock() - if url.endswith('/keys'): - next_exclusive_start_key = str(selected_items[-1]['id']) if selected_items else None - is_truncated = page_end < total_items and bool(selected_items) - response.json = lambda: { - 'data': { - 'items': selected_items, - 'count': len(selected_items), - 'limit': limit or (len(selected_items) or 1), - 'is_truncated': is_truncated, - 'next_exclusive_start_key': next_exclusive_start_key if is_truncated else None, - } - } - elif url.endswith('/requests'): - has_more = page_end < total_items and bool(selected_items) - next_cursor = str(selected_items[-1]['id']) if has_more else None - response.json = lambda: { - 'data': { - 'items': selected_items, - 'count': len(selected_items), - 'limit': limit or (len(selected_items) or 1), - 'next_cursor': next_cursor, - } - } - else: - raise ValueError(f'Unexpected URL in pagination test: {url}') - - response.content = b'' - return response + # Until the deprecated `iterate_items` method is removed, it should behave the same + inputs_without_chunk_size = {k: v for k, v in inputs.items() if k != 'chunk_size'} + assert returned_items == [item async for item in client.iterate_items(**inputs_without_chunk_size)] @pytest.mark.parametrize( - ('inputs', 'expected_items', 'client'), - _generate_test_params(client_set='kvs', async_clients=False), + ('inputs', 'expected_items', 'client_name'), + _generate_test_params(client_set='kvs'), ) def test_kvs_list_keys_iterable( - client: KeyValueStoreClient, + pagination_server: HTTPServer, + client_name: str, inputs: dict, expected_items: list[dict[str, int]], ) -> None: """The sync KVS client's `list_keys()` return value should iterate across cursor-paginated pages.""" - with mock.patch.object(client._http_client, 'call', side_effect=_mocked_api_cursor_pagination_logic): - returned_items = [dict(item) for item in client.list_keys(**inputs)] + client: KeyValueStoreClient = _CLIENT_FACTORIES[client_name](_make_sync_client(pagination_server)) + returned_items = [dict(item) for item in client.list_keys(**inputs)] - assert returned_items == expected_items + assert returned_items == expected_items - # Until the deprecated `iterate_keys` method is removed, it should behave the same - assert returned_items == [dict(item) for item in client.iterate_keys(**inputs)] + # Until the deprecated `iterate_keys` method is removed, it should behave the same + assert returned_items == [dict(item) for item in client.iterate_keys(**inputs)] @pytest.mark.parametrize( - ('inputs', 'expected_items', 'client'), - _generate_test_params(client_set='kvs', async_clients=True), + ('inputs', 'expected_items', 'client_name'), + _generate_test_params(client_set='kvs'), ) async def test_kvs_list_keys_iterable_async( - client: KeyValueStoreClientAsync, + pagination_server: HTTPServer, + client_name: str, inputs: dict, expected_items: list[dict[str, int]], ) -> None: """The async KVS client's `list_keys()` return value should iterate across cursor-paginated pages.""" + client: KeyValueStoreClientAsync = _CLIENT_FACTORIES[client_name](_make_async_client(pagination_server)) + returned_items = [dict(item) async for item in client.list_keys(**inputs)] - async def async_side_effect(**kwargs: Any) -> Mock: - return _mocked_api_cursor_pagination_logic(**kwargs) - - with mock.patch.object(client._http_client, 'call', side_effect=async_side_effect): - returned_items = [dict(item) async for item in client.list_keys(**inputs)] + assert returned_items == expected_items - assert returned_items == expected_items - - # Until the deprecated `iterate_keys` method is removed, it should behave the same - assert returned_items == [dict(item) async for item in client.iterate_keys(**inputs)] + # Until the deprecated `iterate_keys` method is removed, it should behave the same + assert returned_items == [dict(item) async for item in client.iterate_keys(**inputs)] @pytest.mark.parametrize( - ('inputs', 'expected_items', 'client'), - _generate_test_params(client_set='rq', async_clients=False), + ('inputs', 'expected_items', 'client_name'), + _generate_test_params(client_set='rq'), ) def test_rq_list_requests_iterable( - client: RequestQueueClient, + pagination_server: HTTPServer, + client_name: str, inputs: dict, expected_items: list[dict[str, int]], ) -> None: """The sync RQ client's `list_requests()` return value should iterate across cursor-paginated pages.""" - with mock.patch.object(client._http_client, 'call', side_effect=_mocked_api_cursor_pagination_logic): - returned_items = [dict(item) for item in client.list_requests(**inputs)] - assert returned_items == expected_items + client: RequestQueueClient = _CLIENT_FACTORIES[client_name](_make_sync_client(pagination_server)) + returned_items = [dict(item) for item in client.list_requests(**inputs)] + assert returned_items == expected_items @pytest.mark.parametrize( - ('inputs', 'expected_items', 'client'), - _generate_test_params(client_set='rq', async_clients=True), + ('inputs', 'expected_items', 'client_name'), + _generate_test_params(client_set='rq'), ) async def test_rq_list_requests_iterable_async( - client: RequestQueueClientAsync, + pagination_server: HTTPServer, + client_name: str, inputs: dict, expected_items: list[dict[str, int]], ) -> None: """The async RQ client's `list_requests()` return value should iterate across cursor-paginated pages.""" - - async def async_side_effect(**kwargs: Any) -> Mock: - return _mocked_api_cursor_pagination_logic(**kwargs) - - with mock.patch.object(client._http_client, 'call', side_effect=async_side_effect): - returned_items = [dict(item) async for item in client.list_requests(**inputs)] - assert returned_items == expected_items + client: RequestQueueClientAsync = _CLIENT_FACTORIES[client_name](_make_async_client(pagination_server)) + returned_items = [dict(item) async for item in client.list_requests(**inputs)] + assert returned_items == expected_items def test_rq_list_requests_rejects_cursor_and_exclusive_start_id() -> None: From 82ee01faebbd25236b2b04df033698a8a4ab965c Mon Sep 17 00:00:00 2001 From: Josef Prochazka Date: Fri, 24 Apr 2026 13:06:58 +0200 Subject: [PATCH 5/7] Generic `ListPage` approach --- docs/02_concepts/08_pagination.mdx | 6 +- .../code/08_iterate_items_async.py | 11 +++- .../02_concepts/code/08_iterate_items_sync.py | 11 +++- docs/02_concepts/code/08_pagination_async.py | 27 +++------ docs/02_concepts/code/08_pagination_sync.py | 27 +++------ src/apify_client/_iterable_list_page.py | 59 +++++++++++-------- tests/integration/_utils.py | 4 +- tests/integration/test_webhook.py | 10 +++- 8 files changed, 76 insertions(+), 79 deletions(-) diff --git a/docs/02_concepts/08_pagination.mdx b/docs/02_concepts/08_pagination.mdx index 9f1da230..dddeecee 100644 --- a/docs/02_concepts/08_pagination.mdx +++ b/docs/02_concepts/08_pagination.mdx @@ -13,8 +13,6 @@ import ApiLink from '@site/src/components/ApiLink'; import PaginationAsyncExample from '!!raw-loader!./code/08_pagination_async.py'; import PaginationSyncExample from '!!raw-loader!./code/08_pagination_sync.py'; -import IterateItemsAsyncExample from '!!raw-loader!./code/08_iterate_items_async.py'; -import IterateItemsSyncExample from '!!raw-loader!./code/08_iterate_items_sync.py'; Most methods named `list` or `list_something` in the Apify client return a `ListPage` object. This object provides a consistent interface for working with paginated data and includes the following properties: @@ -45,7 +43,7 @@ The `ListPage` interface offers several k ## Generator-based iteration -For most use cases, `iterate_items()` is the recommended way to process all items in a dataset. It handles pagination automatically using a Python generator, fetching items in batches behind the scenes so you don't need to manage offsets or limits yourself. +You can also use the `list` methods directly in iteration. It handles pagination automatically, fetching items in batches behind the scenes so you don't need to manage offsets or limits yourself. @@ -60,6 +58,4 @@ For most use cases, `iterate_items()` is the recommended way to process all item -`iterate_items()` accepts the same filtering parameters as `list_items()` (`clean`, `fields`, `omit`, `unwind`, `skip_empty`, `skip_hidden`), so you can combine automatic pagination with data filtering. - Similarly, `KeyValueStoreClient` provides an `iterate_keys()` method for iterating over all keys in a key-value store without manual pagination. diff --git a/docs/02_concepts/code/08_iterate_items_async.py b/docs/02_concepts/code/08_iterate_items_async.py index fba9b5b0..3b64ea46 100644 --- a/docs/02_concepts/code/08_iterate_items_async.py +++ b/docs/02_concepts/code/08_iterate_items_async.py @@ -7,6 +7,11 @@ async def main() -> None: apify_client = ApifyClientAsync(TOKEN) dataset_client = apify_client.dataset('dataset-id') - # Iterate through all items automatically. - async for item in dataset_client.iterate_items(): - print(item) + # Define the pagination parameters + limit = 1500 # Number of items in total + offset = 100 # Starting offset + + # Iterate through items automatically, lazily sending as many API calls + # as needed and receiving items in chunks. + async for item in dataset_client.list_items(limit=limit, offset=offset): + print(item) # Process the item as needed diff --git a/docs/02_concepts/code/08_iterate_items_sync.py b/docs/02_concepts/code/08_iterate_items_sync.py index 005c899f..d7b57c22 100644 --- a/docs/02_concepts/code/08_iterate_items_sync.py +++ b/docs/02_concepts/code/08_iterate_items_sync.py @@ -7,9 +7,14 @@ def main() -> None: apify_client = ApifyClient(TOKEN) dataset_client = apify_client.dataset('dataset-id') - # Iterate through all items automatically. - for item in dataset_client.iterate_items(): - print(item) + # Define the pagination parameters + limit = 1500 # Number of items in total + offset = 100 # Starting offset + + # Iterate through items automatically, lazily sending as many API calls + # as needed and receiving items in chunks. + for item in dataset_client.list_items(limit=limit, offset=offset): + print(item) # Process the item as needed if __name__ == '__main__': diff --git a/docs/02_concepts/code/08_pagination_async.py b/docs/02_concepts/code/08_pagination_async.py index 50e9d047..23ac5fde 100644 --- a/docs/02_concepts/code/08_pagination_async.py +++ b/docs/02_concepts/code/08_pagination_async.py @@ -10,26 +10,15 @@ async def main() -> None: dataset_client = apify_client.dataset('dataset-id') # Define the pagination parameters - limit = 1000 # Number of items per page + limit = 1000 # Number items to request from API offset = 0 # Starting offset - all_items = [] # List to store all fetched items - while True: - # Fetch a page of items - response = await dataset_client.list_items(limit=limit, offset=offset) - items = response.items - total = response.total + # Send single API call to fetch paginated items. + # (number of items per single call can be limited by API) + paginated_items = await dataset_client.list_items(limit=limit, offset=offset) - print(f'Fetched {len(items)} items') + # Inspect pagination metadata returned by API + print(paginated_items.total) - # Add the fetched items to the complete list - all_items.extend(items) - - # Exit the loop if there are no more items to fetch - if offset + limit >= total: - break - - # Increment the offset for the next page - offset += limit - - print(f'Overall fetched {len(all_items)} items') + for item in paginated_items.items: + print(item) # Process the item as needed diff --git a/docs/02_concepts/code/08_pagination_sync.py b/docs/02_concepts/code/08_pagination_sync.py index 3beb4fbe..f144339e 100644 --- a/docs/02_concepts/code/08_pagination_sync.py +++ b/docs/02_concepts/code/08_pagination_sync.py @@ -10,26 +10,15 @@ def main() -> None: dataset_client = apify_client.dataset('dataset-id') # Define the pagination parameters - limit = 1000 # Number of items per page + limit = 1000 # Number items to request from API offset = 0 # Starting offset - all_items = [] # List to store all fetched items - while True: - # Fetch a page of items - response = dataset_client.list_items(limit=limit, offset=offset) - items = response.items - total = response.total + # Send single API call to fetch paginated items. + # (number of items per single call can be limited by API) + paginated_items = dataset_client.list_items(limit=limit, offset=offset) - print(f'Fetched {len(items)} items') + # Inspect pagination metadata returned by API + print(paginated_items.total) - # Add the fetched items to the complete list - all_items.extend(items) - - # Exit the loop if there are no more items to fetch - if offset + limit >= total: - break - - # Increment the offset for the next page - offset += limit - - print(f'Overall fetched {len(all_items)} items') + for item in paginated_items.items: + print(item) # Process the item as needed diff --git a/src/apify_client/_iterable_list_page.py b/src/apify_client/_iterable_list_page.py index 6e39acba..3832452f 100644 --- a/src/apify_client/_iterable_list_page.py +++ b/src/apify_client/_iterable_list_page.py @@ -47,42 +47,53 @@ def __await__(self) -> Generator[Any, None, T]: @docs_group('Other') -class IterableListPage(Iterable[T], Generic[T]): - """A page of results that can also be iterated to yield items across subsequent pages. +class ListPage(Generic[T]): + """A page of API results. - Accessing fields such as `items`, `count`, or `total` returns the metadata of the first page, - preserving the behavior of the previous non-iterable return value. Iterating with `for item in ...` - yields individual items and performs additional API calls as needed to fetch further pages. + Different endpoints may return different subsets of the available pagination metadata fields, the only field that + is common for all endpoints is items. """ items: list[T] """List of items on this page.""" - count: int + count: int | None """Number of items on this page.""" - offset: int + offset: int | None """The starting offset of this page.""" - limit: int + limit: int | None """The maximum number of items per page requested from the API.""" - total: int + total: int | None """Total number of items matching the query, as reported by the first page.""" - desc: bool + desc: bool | None """Whether the items are sorted in descending order.""" + def __init__(self, first_page: HasItems[T]) -> None: + """Initialize a page wrapper from a Pydantic paginated model.""" + self.items = first_page.items + self.count = getattr(first_page, 'count', None) + self.offset = getattr(first_page, 'offset', None) + self.limit = getattr(first_page, 'limit', None) + self.total = getattr(first_page, 'total', None) + self.desc = getattr(first_page, 'desc', None) + + +@docs_group('Other') +class IterableListPage(ListPage[T], Iterable[T], Generic[T]): + """A page of results that can also be iterated to yield items across subsequent pages. + + Accessing fields such as `items`, `count`, or `total` returns the metadata of the first page, + preserving the behavior of the previous non-iterable return value. Iterating with `for item in ...` + yields individual items and performs additional API calls as needed to fetch further pages. + """ + def __init__(self, first_page: HasItems[T], iterator: Iterator[T]) -> None: """Initialize a page wrapper from a Pydantic paginated model and an iterator over all items.""" - self.items = first_page.items - count = getattr(first_page, 'count', None) - self.count = count if count is not None else len(first_page.items) - self.offset = getattr(first_page, 'offset', 0) or 0 - self.limit = getattr(first_page, 'limit', 0) or 0 - self.total = getattr(first_page, 'total', None) or len(first_page.items) - self.desc = getattr(first_page, 'desc', False) or False - self._first_page = first_page + super().__init__(first_page) self._iterator = iterator def __iter__(self) -> Iterator[T]: @@ -95,28 +106,26 @@ class IterableListPageAsync(AsyncIterable[T], Generic[T]): """An awaitable result that can also be asynchronously iterated to yield items across pages. Awaiting the instance (`await client.list(...)`) performs a single API call and returns a - populated `IterableListPage`. Iterating (`async for item in client.list(...)`) yields individual + populated `ListPage`. Iterating (`async for item in client.list(...)`) yields individual items and performs additional API calls as needed to fetch further pages. - - A single instance supports either awaiting or iterating — not both. """ def __init__( self, - make_awaitable: Callable[[], Awaitable[IterableListPage[T]]], + awaitable_first_page: Callable[[], Awaitable[ListPage[T]]], async_iterator: AsyncIterator[T], ) -> None: """Initialize with a factory that creates the awaitable on demand and the async iterator over items.""" - self._make_awaitable = make_awaitable + self._awaitable_first_page = awaitable_first_page self._async_iterator = async_iterator def __aiter__(self) -> AsyncIterator[T]: """Return an asynchronous iterator over all items across pages.""" return self._async_iterator - def __await__(self) -> Generator[Any, Any, IterableListPage[T]]: + def __await__(self) -> Generator[Any, Any, ListPage[T]]: """Return an awaitable that resolves to an `IterableListPage` containing the first page.""" - return self._make_awaitable().__await__() + return self._awaitable_first_page().__await__() def build_iterable_list_page( diff --git a/tests/integration/_utils.py b/tests/integration/_utils.py index 8a3e1f1f..399301ec 100644 --- a/tests/integration/_utils.py +++ b/tests/integration/_utils.py @@ -89,14 +89,14 @@ async def maybe_await(value: Coroutine[Any, Any, T]) -> T: ... async def maybe_await(value: T) -> T: ... -async def maybe_await(value: T | Coroutine[Any, Any, T]) -> T: +async def maybe_await(value: Any) -> Any: """Await coroutines, pass through other values. Enables unified test code for both sync and async clients: result = await maybe_await(client.datasets().list()) """ if hasattr(value, '__await__'): - return await value # ty: ignore[invalid-await] + return await value return value diff --git a/tests/integration/test_webhook.py b/tests/integration/test_webhook.py index a011aaa7..c77ea050 100644 --- a/tests/integration/test_webhook.py +++ b/tests/integration/test_webhook.py @@ -4,6 +4,8 @@ from typing import TYPE_CHECKING +from apify_client._iterable_list_page import ListPage + if TYPE_CHECKING: from apify_client import ApifyClient, ApifyClientAsync @@ -13,11 +15,11 @@ ActorJobStatus, ListOfRuns, ListOfWebhookDispatches, - ListOfWebhooks, Run, Webhook, WebhookDispatch, WebhookEventType, + WebhookShort, ) HELLO_WORLD_ACTOR = 'apify/hello-world' @@ -49,16 +51,18 @@ async def test_list_webhooks(client: ApifyClient | ApifyClientAsync) -> None: """Test listing webhooks.""" webhooks_page = await maybe_await(client.webhooks().list(limit=10)) - assert isinstance(webhooks_page, ListOfWebhooks) + assert isinstance(webhooks_page, ListPage) assert isinstance(webhooks_page.items, list) + assert isinstance(webhooks_page.items[0], WebhookShort) async def test_list_webhooks_pagination(client: ApifyClient | ApifyClientAsync) -> None: """Test listing webhooks with pagination.""" webhooks_page = await maybe_await(client.webhooks().list(limit=5, offset=0)) - assert isinstance(webhooks_page, ListOfWebhooks) + assert isinstance(webhooks_page, ListPage) assert isinstance(webhooks_page.items, list) + assert isinstance(webhooks_page.items[0], WebhookShort) async def test_webhook_create_and_get(client: ApifyClient | ApifyClientAsync) -> None: From 969322f5d7eea4c199bb8e807a2ad8bde4c2fbfb Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 24 Apr 2026 11:22:38 +0000 Subject: [PATCH 6/7] test(integration): assert `ListPage` + typed item on every `list()` return Align every integration test that calls a `list()` / `list_keys()` / `list_requests()` method with the pattern introduced on iterable-list-methods-2 (commit 82ee01f): xs_page = await maybe_await(client.xs().list(limit=10)) assert isinstance(xs_page, ListPage) assert isinstance(xs_page.items, list) assert isinstance(xs_page.items[0], XShort) Covers: actors, actor-env-vars, actor-versions, builds (user + per-actor), datasets, key-value-stores, key-value-store keys (incl. signature variant), request-queues, request-queue requests (list + batch-add + batch-delete polls), runs (multi-status + user runs + task runs), schedules, store, tasks, webhooks, webhook dispatches, log (build listing). Where a listing may legitimately be empty (user's own actors, user's own datasets/ KVSs/RQs/runs/builds, new task's webhooks, webhook dispatches), the element-type assertion is guarded with `if xs_page.items:` rather than asserting `items[0]`. All `ListOf*` imports from `_models_generated` in integration tests replaced with the item-type import (e.g. `ActorShort`, `BuildShort`, `KeyValueStoreKey`, `Request`) plus `ListPage` from `_iterable_list_page`. No source changes. 258 integration tests collect cleanly; 521 unit tests pass. https://claude.ai/code/session_011VSSFo89Z9LfyFqZGsJKfz --- tests/integration/test_actor.py | 31 +++++----- tests/integration/test_actor_env_var.py | 13 +++-- tests/integration/test_actor_version.py | 13 +++-- tests/integration/test_build.py | 57 +++++++++++-------- tests/integration/test_dataset.py | 22 +++++--- tests/integration/test_key_value_store.py | 45 ++++++++------- tests/integration/test_log.py | 13 +++-- tests/integration/test_request_queue.py | 66 ++++++++++++---------- tests/integration/test_run.py | 33 +++++++---- tests/integration/test_schedule.py | 14 +++-- tests/integration/test_store.py | 40 +++++++------ tests/integration/test_task.py | 30 +++++----- tests/integration/test_webhook.py | 11 ++-- tests/integration/test_webhook_dispatch.py | 22 +++++--- 14 files changed, 235 insertions(+), 175 deletions(-) diff --git a/tests/integration/test_actor.py b/tests/integration/test_actor.py index 4797c664..3305f518 100644 --- a/tests/integration/test_actor.py +++ b/tests/integration/test_actor.py @@ -5,10 +5,12 @@ from typing import TYPE_CHECKING, cast from ._utils import get_random_resource_name, maybe_await +from apify_client._iterable_list_page import ListPage +from apify_client._models_generated import ActorShort if TYPE_CHECKING: from apify_client import ApifyClient, ApifyClientAsync - from apify_client._models_generated import Actor, Build, ListOfActors, Run + from apify_client._models_generated import Actor, Build, Run from apify_client._resource_clients import BuildClient, BuildClientAsync @@ -36,36 +38,31 @@ async def test_get_actor_by_full_name(client: ApifyClient | ApifyClientAsync) -> async def test_list_actors_my(client: ApifyClient | ApifyClientAsync) -> None: """Test listing Actors created by the user.""" - result = await maybe_await(client.actors().list(my=True, limit=10)) - actors_page = cast('ListOfActors', result) + actors_page = await maybe_await(client.actors().list(my=True, limit=10)) - assert actors_page is not None - assert actors_page.items is not None - # User may have 0 actors + assert isinstance(actors_page, ListPage) assert isinstance(actors_page.items, list) + # User may have 0 actors — only check element type when any were returned. + if actors_page.items: + assert isinstance(actors_page.items[0], ActorShort) async def test_list_actors_pagination(client: ApifyClient | ApifyClientAsync) -> None: """Test listing Actors with pagination parameters.""" - # List all actors (public + owned), should return some results - result = await maybe_await(client.actors().list(limit=5, offset=0)) - actors_page = cast('ListOfActors', result) + actors_page = await maybe_await(client.actors().list(limit=5, offset=0)) - assert actors_page is not None - assert actors_page.items is not None + assert isinstance(actors_page, ListPage) assert isinstance(actors_page.items, list) - # Should have at least some actors (public ones exist) - assert len(actors_page.items) >= 0 + assert isinstance(actors_page.items[0], ActorShort) async def test_list_actors_sorting(client: ApifyClient | ApifyClientAsync) -> None: """Test listing Actors with sorting.""" - result = await maybe_await(client.actors().list(limit=10, desc=True, sort_by='created_at')) - actors_page = cast('ListOfActors', result) + actors_page = await maybe_await(client.actors().list(limit=10, desc=True, sort_by='created_at')) - assert actors_page is not None - assert actors_page.items is not None + assert isinstance(actors_page, ListPage) assert isinstance(actors_page.items, list) + assert isinstance(actors_page.items[0], ActorShort) async def test_actor_create_update_delete(client: ApifyClient | ApifyClientAsync) -> None: diff --git a/tests/integration/test_actor_env_var.py b/tests/integration/test_actor_env_var.py index e5d9663e..2e13ecd1 100644 --- a/tests/integration/test_actor_env_var.py +++ b/tests/integration/test_actor_env_var.py @@ -4,9 +4,12 @@ from typing import TYPE_CHECKING, cast +from apify_client._iterable_list_page import ListPage +from apify_client._models_generated import EnvVar + if TYPE_CHECKING: from apify_client import ApifyClient, ApifyClientAsync - from apify_client._models_generated import Actor, EnvVar, ListOfEnvVars + from apify_client._models_generated import Actor from ._utils import get_random_resource_name, maybe_await @@ -49,11 +52,11 @@ async def test_actor_env_var_list(client: ApifyClient | ApifyClientAsync) -> Non try: # List env vars - result = await maybe_await(version_client.env_vars().list()) - env_vars = cast('ListOfEnvVars', result) + env_vars = await maybe_await(version_client.env_vars().list()) - assert env_vars is not None - assert env_vars.items is not None + assert isinstance(env_vars, ListPage) + assert isinstance(env_vars.items, list) + assert isinstance(env_vars.items[0], EnvVar) assert len(env_vars.items) >= 1 # Verify env var fields diff --git a/tests/integration/test_actor_version.py b/tests/integration/test_actor_version.py index b8ff31c3..6b29a27f 100644 --- a/tests/integration/test_actor_version.py +++ b/tests/integration/test_actor_version.py @@ -4,9 +4,12 @@ from typing import TYPE_CHECKING, cast +from apify_client._iterable_list_page import ListPage +from apify_client._models_generated import Version + if TYPE_CHECKING: from apify_client import ApifyClient, ApifyClientAsync - from apify_client._models_generated import Actor, ListOfVersions, Version + from apify_client._models_generated import Actor from ._utils import get_random_resource_name, maybe_await @@ -42,11 +45,11 @@ async def test_actor_version_list(client: ApifyClient | ApifyClientAsync) -> Non try: # List versions - result = await maybe_await(actor_client.versions().list()) - versions = cast('ListOfVersions', result) + versions = await maybe_await(actor_client.versions().list()) - assert versions is not None - assert versions.items is not None + assert isinstance(versions, ListPage) + assert isinstance(versions.items, list) + assert isinstance(versions.items[0], Version) assert len(versions.items) >= 1 # Verify version fields diff --git a/tests/integration/test_build.py b/tests/integration/test_build.py index ef8ac662..cea96797 100644 --- a/tests/integration/test_build.py +++ b/tests/integration/test_build.py @@ -4,9 +4,12 @@ from typing import TYPE_CHECKING, cast +from apify_client._iterable_list_page import ListPage +from apify_client._models_generated import BuildShort + if TYPE_CHECKING: from apify_client import ApifyClient, ApifyClientAsync - from apify_client._models_generated import Actor, Build, ListOfBuilds + from apify_client._models_generated import Actor, Build from datetime import timedelta @@ -21,14 +24,12 @@ async def test_build_list_for_actor(client: ApifyClient | ApifyClientAsync) -> N """Test listing builds for a public Actor.""" # Get builds for hello-world actor actor = client.actor(HELLO_WORLD_ACTOR) - result = await maybe_await(actor.builds().list(limit=10)) - builds_page = cast('ListOfBuilds', result) + builds_page = await maybe_await(actor.builds().list(limit=10)) - assert builds_page is not None - assert builds_page.items is not None - assert len(builds_page.items) > 0 # hello-world should have at least one build + assert isinstance(builds_page, ListPage) + assert isinstance(builds_page.items, list) + assert isinstance(builds_page.items[0], BuildShort) # hello-world has at least one build - # Verify build structure first_build = builds_page.items[0] assert first_build.id is not None assert first_build.act_id is not None @@ -38,9 +39,11 @@ async def test_build_get(client: ApifyClient | ApifyClientAsync) -> None: """Test getting a specific build.""" # First list builds to get a build ID actor = client.actor(HELLO_WORLD_ACTOR) - result = await maybe_await(actor.builds().list(limit=1)) - builds_page = cast('ListOfBuilds', result) - assert builds_page.items + builds_page = await maybe_await(actor.builds().list(limit=1)) + + assert isinstance(builds_page, ListPage) + assert isinstance(builds_page.items, list) + assert isinstance(builds_page.items[0], BuildShort) build_id = builds_page.items[0].id # Get the specific build @@ -56,22 +59,24 @@ async def test_build_get(client: ApifyClient | ApifyClientAsync) -> None: async def test_user_builds_list(client: ApifyClient | ApifyClientAsync) -> None: """Test listing all user builds.""" # List user's builds (may be empty if user has no actors) - result = await maybe_await(client.builds().list(limit=10)) - builds_page = cast('ListOfBuilds', result) + builds_page = await maybe_await(client.builds().list(limit=10)) - assert builds_page is not None - assert builds_page.items is not None - # User may have 0 builds, so we just check the structure + assert isinstance(builds_page, ListPage) assert isinstance(builds_page.items, list) + # User may have 0 builds — only check element type when any were returned. + if builds_page.items: + assert isinstance(builds_page.items[0], BuildShort) async def test_build_log(client: ApifyClient | ApifyClientAsync) -> None: """Test getting build log.""" # First list builds to get a completed build ID actor = client.actor(HELLO_WORLD_ACTOR) - result = await maybe_await(actor.builds().list(limit=5)) - builds_page = cast('ListOfBuilds', result) - assert builds_page.items + builds_page = await maybe_await(actor.builds().list(limit=5)) + + assert isinstance(builds_page, ListPage) + assert isinstance(builds_page.items, list) + assert isinstance(builds_page.items[0], BuildShort) # Find a completed build (SUCCEEDED status) completed_build = None @@ -96,9 +101,11 @@ async def test_build_wait_for_finish(client: ApifyClient | ApifyClientAsync) -> """Test wait_for_finish on an already completed build.""" # First list builds to get a completed build ID actor = client.actor(HELLO_WORLD_ACTOR) - result = await maybe_await(actor.builds().list(limit=5)) - builds_page = cast('ListOfBuilds', result) - assert builds_page.items + builds_page = await maybe_await(actor.builds().list(limit=5)) + + assert isinstance(builds_page, ListPage) + assert isinstance(builds_page.items, list) + assert isinstance(builds_page.items[0], BuildShort) # Find a completed build (SUCCEEDED status) completed_build = None @@ -208,9 +215,11 @@ async def test_build_get_open_api_definition(client: ApifyClient | ApifyClientAs """Test getting OpenAPI definition for a build.""" # Get builds for hello-world actor actor = client.actor(HELLO_WORLD_ACTOR) - result = await maybe_await(actor.builds().list(limit=1)) - builds_page = cast('ListOfBuilds', result) - assert builds_page.items + builds_page = await maybe_await(actor.builds().list(limit=1)) + + assert isinstance(builds_page, ListPage) + assert isinstance(builds_page.items, list) + assert isinstance(builds_page.items[0], BuildShort) build_id = builds_page.items[0].id # Get the OpenAPI definition diff --git a/tests/integration/test_dataset.py b/tests/integration/test_dataset.py index 149a504c..6aa3bdc4 100644 --- a/tests/integration/test_dataset.py +++ b/tests/integration/test_dataset.py @@ -4,6 +4,9 @@ from typing import TYPE_CHECKING, cast +from apify_client._iterable_list_page import ListPage +from apify_client._models_generated import DatasetListItem + if TYPE_CHECKING: from collections.abc import AsyncIterator, Iterator from contextlib import AbstractAsyncContextManager, AbstractContextManager @@ -11,7 +14,7 @@ from impit import Response from apify_client import ApifyClient, ApifyClientAsync - from apify_client._models_generated import Dataset, ListOfDatasets + from apify_client._models_generated import Dataset from apify_client._resource_clients.dataset import DatasetItemsPage import json @@ -26,22 +29,23 @@ async def test_dataset_collection_list(client: ApifyClient | ApifyClientAsync) -> None: """Test listing datasets.""" - result = await maybe_await(client.datasets().list(limit=10)) - datasets_page = cast('ListOfDatasets', result) + datasets_page = await maybe_await(client.datasets().list(limit=10)) - assert datasets_page is not None - assert datasets_page.items is not None + assert isinstance(datasets_page, ListPage) assert isinstance(datasets_page.items, list) + # User may have 0 datasets — only check element type when any were returned. + if datasets_page.items: + assert isinstance(datasets_page.items[0], DatasetListItem) async def test_dataset_collection_list_pagination(client: ApifyClient | ApifyClientAsync) -> None: """Test listing datasets with pagination.""" - result = await maybe_await(client.datasets().list(limit=5, offset=0)) - datasets_page = cast('ListOfDatasets', result) + datasets_page = await maybe_await(client.datasets().list(limit=5, offset=0)) - assert datasets_page is not None - assert datasets_page.items is not None + assert isinstance(datasets_page, ListPage) assert isinstance(datasets_page.items, list) + if datasets_page.items: + assert isinstance(datasets_page.items[0], DatasetListItem) async def test_dataset_collection_get_or_create(client: ApifyClient | ApifyClientAsync) -> None: diff --git a/tests/integration/test_key_value_store.py b/tests/integration/test_key_value_store.py index 952fc0cf..d49632f4 100644 --- a/tests/integration/test_key_value_store.py +++ b/tests/integration/test_key_value_store.py @@ -4,11 +4,13 @@ from typing import TYPE_CHECKING, cast +from apify_client._iterable_list_page import ListPage +from apify_client._models_generated import KeyValueStore, KeyValueStoreKey + if TYPE_CHECKING: from collections.abc import AsyncIterator, Iterator from apify_client import ApifyClient, ApifyClientAsync - from apify_client._models_generated import KeyValueStore, KeyValueStoreKey, ListOfKeys, ListOfKeyValueStores import json from datetime import timedelta @@ -22,22 +24,22 @@ async def test_key_value_store_collection_list(client: ApifyClient | ApifyClientAsync) -> None: """Test listing key-value stores.""" - result = await maybe_await(client.key_value_stores().list(limit=10)) - kvs_page = cast('ListOfKeyValueStores', result) + kvs_page = await maybe_await(client.key_value_stores().list(limit=10)) - assert kvs_page is not None - assert kvs_page.items is not None + assert isinstance(kvs_page, ListPage) assert isinstance(kvs_page.items, list) + if kvs_page.items: + assert isinstance(kvs_page.items[0], KeyValueStore) async def test_key_value_store_collection_list_pagination(client: ApifyClient | ApifyClientAsync) -> None: """Test listing key-value stores with pagination.""" - result = await maybe_await(client.key_value_stores().list(limit=5, offset=0)) - kvs_page = cast('ListOfKeyValueStores', result) + kvs_page = await maybe_await(client.key_value_stores().list(limit=5, offset=0)) - assert kvs_page is not None - assert kvs_page.items is not None + assert isinstance(kvs_page, ListPage) assert isinstance(kvs_page.items, list) + if kvs_page.items: + assert isinstance(kvs_page.items[0], KeyValueStore) async def test_key_value_store_collection_get_or_create(client: ApifyClient | ApifyClientAsync) -> None: @@ -124,11 +126,12 @@ async def test_list_keys_signature( await maybe_await(kvs.list_keys()) # Kvs content retrieved with correct signature - result = await maybe_await(kvs.list_keys(signature=test_kvs_of_another_user.signature)) - response = cast('ListOfKeys', result) - raw_items = response.items + response = await maybe_await(kvs.list_keys(signature=test_kvs_of_another_user.signature)) - assert set(test_kvs_of_another_user.expected_content) == {item.key for item in raw_items} + assert isinstance(response, ListPage) + assert isinstance(response.items, list) + assert isinstance(response.items[0], KeyValueStoreKey) + assert set(test_kvs_of_another_user.expected_content) == {item.key for item in response.items} async def test_get_record_signature( @@ -338,9 +341,11 @@ async def test_key_value_store_list_keys(client: ApifyClient | ApifyClientAsync, await maybe_sleep(1, is_async=is_async) # List keys - result = await maybe_await(store_client.list_keys()) - keys_response = cast('ListOfKeys', result) - assert keys_response is not None + keys_response = await maybe_await(store_client.list_keys()) + + assert isinstance(keys_response, ListPage) + assert isinstance(keys_response.items, list) + assert isinstance(keys_response.items[0], KeyValueStoreKey) assert len(keys_response.items) == 5 # Verify key names @@ -368,9 +373,11 @@ async def test_key_value_store_list_keys_with_limit(client: ApifyClient | ApifyC await maybe_sleep(1, is_async=is_async) # List with limit - result = await maybe_await(store_client.list_keys(limit=5)) - keys_response = cast('ListOfKeys', result) - assert keys_response is not None + keys_response = await maybe_await(store_client.list_keys(limit=5)) + + assert isinstance(keys_response, ListPage) + assert isinstance(keys_response.items, list) + assert isinstance(keys_response.items[0], KeyValueStoreKey) assert len(keys_response.items) == 5 finally: await maybe_await(store_client.delete()) diff --git a/tests/integration/test_log.py b/tests/integration/test_log.py index 85800682..394ba688 100644 --- a/tests/integration/test_log.py +++ b/tests/integration/test_log.py @@ -4,9 +4,12 @@ from typing import TYPE_CHECKING, cast +from apify_client._iterable_list_page import ListPage +from apify_client._models_generated import BuildShort + if TYPE_CHECKING: from apify_client import ApifyClient, ApifyClientAsync - from apify_client._models_generated import ListOfBuilds, Run + from apify_client._models_generated import Run from ._utils import maybe_await @@ -39,9 +42,11 @@ async def test_log_get_from_build(client: ApifyClient | ApifyClientAsync) -> Non """Test retrieving log from a build.""" # Get a build from hello-world actor actor = client.actor(HELLO_WORLD_ACTOR) - result = await maybe_await(actor.builds().list(limit=1)) - builds_page = cast('ListOfBuilds', result) - assert builds_page.items + builds_page = await maybe_await(actor.builds().list(limit=1)) + + assert isinstance(builds_page, ListPage) + assert isinstance(builds_page.items, list) + assert isinstance(builds_page.items[0], BuildShort) build_id = builds_page.items[0].id # Get log from the build diff --git a/tests/integration/test_request_queue.py b/tests/integration/test_request_queue.py index a08e7fb8..a6815d8d 100644 --- a/tests/integration/test_request_queue.py +++ b/tests/integration/test_request_queue.py @@ -4,15 +4,15 @@ from typing import TYPE_CHECKING, cast +from apify_client._iterable_list_page import ListPage +from apify_client._models_generated import Request, RequestQueueShort + if TYPE_CHECKING: from apify_client import ApifyClient, ApifyClientAsync from apify_client._models_generated import ( BatchAddResult, BatchDeleteResult, - ListOfRequestQueues, - ListOfRequests, LockedRequestQueueHead, - Request, RequestLockInfo, RequestQueue, RequestQueueHead, @@ -30,22 +30,22 @@ async def test_request_queue_collection_list(client: ApifyClient | ApifyClientAsync) -> None: """Test listing request queues.""" - result = await maybe_await(client.request_queues().list(limit=10)) - rq_page = cast('ListOfRequestQueues', result) + rq_page = await maybe_await(client.request_queues().list(limit=10)) - assert rq_page is not None - assert rq_page.items is not None + assert isinstance(rq_page, ListPage) assert isinstance(rq_page.items, list) + if rq_page.items: + assert isinstance(rq_page.items[0], RequestQueueShort) async def test_request_queue_collection_list_pagination(client: ApifyClient | ApifyClientAsync) -> None: """Test listing request queues with pagination.""" - result = await maybe_await(client.request_queues().list(limit=5, offset=0)) - rq_page = cast('ListOfRequestQueues', result) + rq_page = await maybe_await(client.request_queues().list(limit=5, offset=0)) - assert rq_page is not None - assert rq_page.items is not None + assert isinstance(rq_page, ListPage) assert isinstance(rq_page.items, list) + if rq_page.items: + assert isinstance(rq_page.items[0], RequestQueueShort) async def test_request_queue_collection_get_or_create(client: ApifyClient | ApifyClientAsync) -> None: @@ -256,15 +256,16 @@ async def test_request_queue_list_requests(client: ApifyClient | ApifyClientAsyn ) # Poll until all requests are available (eventual consistency) - list_response: ListOfRequests | None = None + list_response: ListPage[Request] | None = None for _ in range(5): await maybe_sleep(1, is_async=is_async) - result = await maybe_await(rq_client.list_requests()) - list_response = cast('ListOfRequests', result) - if len(list_response.items) == 5: + list_response = await maybe_await(rq_client.list_requests()) + if list_response.items and len(list_response.items) == 5: break - assert list_response is not None + assert isinstance(list_response, ListPage) + assert isinstance(list_response.items, list) + assert isinstance(list_response.items[0], Request) assert len(list_response.items) == 5 finally: await maybe_await(rq_client.delete()) @@ -325,15 +326,16 @@ async def test_request_queue_batch_add_requests(client: ApifyClient | ApifyClien assert len(batch_response.unprocessed_requests) == 0 # Poll until all requests are available (eventual consistency) - list_response: ListOfRequests | None = None + list_response: ListPage[Request] | None = None for _ in range(5): await maybe_sleep(1, is_async=is_async) - result = await maybe_await(rq_client.list_requests()) - list_response = cast('ListOfRequests', result) - if len(list_response.items) == 10: + list_response = await maybe_await(rq_client.list_requests()) + if list_response.items and len(list_response.items) == 10: break - assert list_response is not None + assert isinstance(list_response, ListPage) + assert isinstance(list_response.items, list) + assert isinstance(list_response.items[0], Request) assert len(list_response.items) == 10 finally: await maybe_await(rq_client.delete()) @@ -355,15 +357,16 @@ async def test_request_queue_batch_delete_requests(client: ApifyClient | ApifyCl ) # Poll until all requests are available (eventual consistency) - list_response: ListOfRequests | None = None + list_response: ListPage[Request] | None = None for _ in range(5): await maybe_sleep(1, is_async=is_async) - result = await maybe_await(rq_client.list_requests()) - list_response = cast('ListOfRequests', result) - if len(list_response.items) == 10: + list_response = await maybe_await(rq_client.list_requests()) + if list_response.items and len(list_response.items) == 10: break - assert list_response is not None + assert isinstance(list_response, ListPage) + assert isinstance(list_response.items, list) + assert isinstance(list_response.items[0], Request) assert len(list_response.items) == 10 requests_to_delete: list[RequestDeleteInputDict] = [ {'unique_key': item.unique_key} for item in list_response.items[:5] @@ -376,15 +379,16 @@ async def test_request_queue_batch_delete_requests(client: ApifyClient | ApifyCl assert len(delete_response.processed_requests) == 5 # Poll until deletions are reflected (eventual consistency) - remaining: ListOfRequests | None = None + remaining: ListPage[Request] | None = None for _ in range(5): await maybe_sleep(1, is_async=is_async) - result = await maybe_await(rq_client.list_requests()) - remaining = cast('ListOfRequests', result) - if len(remaining.items) == 5: + remaining = await maybe_await(rq_client.list_requests()) + if remaining.items and len(remaining.items) == 5: break - assert remaining is not None + assert isinstance(remaining, ListPage) + assert isinstance(remaining.items, list) + assert isinstance(remaining.items[0], Request) assert len(remaining.items) == 5 finally: await maybe_await(rq_client.delete()) diff --git a/tests/integration/test_run.py b/tests/integration/test_run.py index 3416fe85..eab7c01f 100644 --- a/tests/integration/test_run.py +++ b/tests/integration/test_run.py @@ -4,9 +4,12 @@ from typing import TYPE_CHECKING, cast +from apify_client._iterable_list_page import ListPage +from apify_client._models_generated import RunShort + if TYPE_CHECKING: from apify_client import ApifyClient, ApifyClientAsync - from apify_client._models_generated import Dataset, KeyValueStore, ListOfRuns, RequestQueue, Run + from apify_client._models_generated import Dataset, KeyValueStore, RequestQueue, Run from datetime import UTC, datetime, timedelta @@ -35,14 +38,20 @@ async def test_run_collection_list_multiple_statuses(client: ApifyClient | Apify try: run_collection = client.actor(HELLO_WORLD_ACTOR).runs() - result = await maybe_await(run_collection.list(status=[ActorJobStatus.SUCCEEDED, ActorJobStatus.TIMED_OUT])) - multiple_status_runs = cast('ListOfRuns', result) + multiple_status_runs = await maybe_await( + run_collection.list(status=[ActorJobStatus.SUCCEEDED, ActorJobStatus.TIMED_OUT]) + ) + single_status_runs = await maybe_await(run_collection.list(status=ActorJobStatus.SUCCEEDED)) - result = await maybe_await(run_collection.list(status=ActorJobStatus.SUCCEEDED)) - single_status_runs = cast('ListOfRuns', result) + assert isinstance(multiple_status_runs, ListPage) + assert isinstance(multiple_status_runs.items, list) + if multiple_status_runs.items: + assert isinstance(multiple_status_runs.items[0], RunShort) - assert multiple_status_runs is not None - assert single_status_runs is not None + assert isinstance(single_status_runs, ListPage) + assert isinstance(single_status_runs.items, list) + if single_status_runs.items: + assert isinstance(single_status_runs.items[0], RunShort) assert all( run.status in [ActorJobStatus.SUCCEEDED, ActorJobStatus.TIMED_OUT] for run in multiple_status_runs.items @@ -294,13 +303,13 @@ async def test_run_log(client: ApifyClient | ApifyClientAsync) -> None: async def test_run_runs_client(client: ApifyClient | ApifyClientAsync) -> None: """Test listing runs through the run collection client.""" # List runs (should return valid data structure) - result = await maybe_await(client.runs().list(limit=10)) - runs_page = cast('ListOfRuns', result) - assert runs_page is not None - assert runs_page.items is not None + runs_page = await maybe_await(client.runs().list(limit=10)) + + assert isinstance(runs_page, ListPage) assert isinstance(runs_page.items, list) - # The user may have runs, verify the structure + # The user may have 0 runs — only check element type when any were returned. if runs_page.items: + assert isinstance(runs_page.items[0], RunShort) first_run = runs_page.items[0] assert first_run.id is not None assert first_run.act_id is not None diff --git a/tests/integration/test_schedule.py b/tests/integration/test_schedule.py index 2337116f..a7c26d31 100644 --- a/tests/integration/test_schedule.py +++ b/tests/integration/test_schedule.py @@ -4,9 +4,12 @@ from typing import TYPE_CHECKING, cast +from apify_client._iterable_list_page import ListPage +from apify_client._models_generated import ScheduleShort + if TYPE_CHECKING: from apify_client import ApifyClient, ApifyClientAsync - from apify_client._models_generated import ListOfSchedules, Schedule + from apify_client._models_generated import Schedule from ._utils import get_random_resource_name, maybe_await @@ -116,10 +119,11 @@ async def test_schedule_list(client: ApifyClient | ApifyClientAsync) -> None: try: # List schedules - result = await maybe_await(client.schedules().list(limit=100)) - schedules_page = cast('ListOfSchedules', result) - assert schedules_page is not None - assert schedules_page.items is not None + schedules_page = await maybe_await(client.schedules().list(limit=100)) + + assert isinstance(schedules_page, ListPage) + assert isinstance(schedules_page.items, list) + assert isinstance(schedules_page.items[0], ScheduleShort) # Verify our schedules are in the list schedule_ids = [s.id for s in schedules_page.items] diff --git a/tests/integration/test_store.py b/tests/integration/test_store.py index 69a3e8fc..967275bd 100644 --- a/tests/integration/test_store.py +++ b/tests/integration/test_store.py @@ -2,11 +2,13 @@ from __future__ import annotations -from typing import TYPE_CHECKING, cast +from typing import TYPE_CHECKING + +from apify_client._iterable_list_page import ListPage +from apify_client._models_generated import StoreListActor if TYPE_CHECKING: from apify_client import ApifyClient, ApifyClientAsync - from apify_client._models_generated import ListOfStoreActors from ._utils import maybe_await @@ -14,32 +16,34 @@ async def test_store_list(client: ApifyClient | ApifyClientAsync) -> None: """Test listing public Actors in the store.""" - result = await maybe_await(client.store().list(limit=10)) - actors_list = cast('ListOfStoreActors', result) - assert actors_list is not None - assert actors_list.items is not None - assert len(actors_list.items) > 0 # Store always has actors + actors_list = await maybe_await(client.store().list(limit=10)) + + assert isinstance(actors_list, ListPage) + assert isinstance(actors_list.items, list) + assert isinstance(actors_list.items[0], StoreListActor) # Store always has actors async def test_store_list_with_search(client: ApifyClient | ApifyClientAsync) -> None: """Test listing store with search filter.""" - result = await maybe_await(client.store().list(limit=5, search='web scraper')) - store_page = cast('ListOfStoreActors', result) + store_page = await maybe_await(client.store().list(limit=5, search='web scraper')) - assert store_page is not None - assert store_page.items is not None + assert isinstance(store_page, ListPage) assert isinstance(store_page.items, list) + if store_page.items: + assert isinstance(store_page.items[0], StoreListActor) async def test_store_list_pagination(client: ApifyClient | ApifyClientAsync) -> None: """Test store listing pagination.""" - result1 = await maybe_await(client.store().list(limit=5, offset=0)) - result2 = await maybe_await(client.store().list(limit=5, offset=5)) - page1 = cast('ListOfStoreActors', result1) - page2 = cast('ListOfStoreActors', result2) - - assert page1 is not None - assert page2 is not None + page1 = await maybe_await(client.store().list(limit=5, offset=0)) + page2 = await maybe_await(client.store().list(limit=5, offset=5)) + + assert isinstance(page1, ListPage) + assert isinstance(page1.items, list) + assert isinstance(page1.items[0], StoreListActor) + assert isinstance(page2, ListPage) + assert isinstance(page2.items, list) # Verify different results (if enough actors exist) if len(page1.items) == 5 and len(page2.items) > 0: + assert isinstance(page2.items[0], StoreListActor) assert page1.items[0].id != page2.items[0].id diff --git a/tests/integration/test_task.py b/tests/integration/test_task.py index 322185a2..f3194560 100644 --- a/tests/integration/test_task.py +++ b/tests/integration/test_task.py @@ -6,10 +6,12 @@ from typing import TYPE_CHECKING, cast from ._utils import get_random_resource_name, maybe_await +from apify_client._iterable_list_page import ListPage +from apify_client._models_generated import RunShort, TaskShort if TYPE_CHECKING: from apify_client import ApifyClient, ApifyClientAsync - from apify_client._models_generated import Actor, ListOfRuns, ListOfTasks, ListOfWebhooks, Run, Task + from apify_client._models_generated import Actor, Run, Task # Use a simple, fast public actor for testing HELLO_WORLD_ACTOR = 'apify/hello-world' @@ -113,10 +115,11 @@ async def test_task_list(client: ApifyClient | ApifyClientAsync) -> None: try: # List tasks - result = await maybe_await(client.tasks().list(limit=100)) - tasks_page = cast('ListOfTasks', result) - assert tasks_page is not None - assert tasks_page.items is not None + tasks_page = await maybe_await(client.tasks().list(limit=100)) + + assert isinstance(tasks_page, ListPage) + assert isinstance(tasks_page.items, list) + assert isinstance(tasks_page.items[0], TaskShort) # Verify our task is in the list task_ids = [t.id for t in tasks_page.items] @@ -289,10 +292,11 @@ async def test_task_runs(client: ApifyClient | ApifyClientAsync) -> None: # List runs for this task runs_client = task_client.runs() - result = await maybe_await(runs_client.list(limit=10)) - runs_page = cast('ListOfRuns', result) - assert runs_page is not None - assert runs_page.items is not None + runs_page = await maybe_await(runs_client.list(limit=10)) + + assert isinstance(runs_page, ListPage) + assert isinstance(runs_page.items, list) + assert isinstance(runs_page.items[0], RunShort) assert len(runs_page.items) >= 1 # Cleanup run @@ -365,10 +369,10 @@ async def test_task_webhooks(client: ApifyClient | ApifyClientAsync) -> None: try: # Get webhooks client webhooks_client = task_client.webhooks() - result = await maybe_await(webhooks_client.list()) - webhooks_page = cast('ListOfWebhooks', result) - assert webhooks_page is not None - assert webhooks_page.items is not None + webhooks_page = await maybe_await(webhooks_client.list()) + + assert isinstance(webhooks_page, ListPage) + assert isinstance(webhooks_page.items, list) # New task should have no webhooks assert len(webhooks_page.items) == 0 diff --git a/tests/integration/test_webhook.py b/tests/integration/test_webhook.py index c77ea050..970e4c8a 100644 --- a/tests/integration/test_webhook.py +++ b/tests/integration/test_webhook.py @@ -13,9 +13,8 @@ from ._utils import maybe_await from apify_client._models_generated import ( ActorJobStatus, - ListOfRuns, - ListOfWebhookDispatches, Run, + RunShort, Webhook, WebhookDispatch, WebhookEventType, @@ -34,9 +33,11 @@ async def _get_finished_run_id(client: ApifyClient | ApifyClientAsync) -> str: """ runs_page = await maybe_await(client.actor(HELLO_WORLD_ACTOR).runs().list(limit=1, status=ActorJobStatus.SUCCEEDED)) - assert isinstance(runs_page, ListOfRuns) + assert isinstance(runs_page, ListPage) + assert isinstance(runs_page.items, list) if len(runs_page.items) > 0: + assert isinstance(runs_page.items[0], RunShort) return runs_page.items[0].id # No completed runs found - start one and wait for it to finish @@ -170,7 +171,9 @@ async def test_webhook_dispatches(client: ApifyClient | ApifyClientAsync) -> Non # List dispatches for this webhook dispatches = await maybe_await(webhook_client.dispatches().list()) - assert isinstance(dispatches, ListOfWebhookDispatches) + assert isinstance(dispatches, ListPage) + assert isinstance(dispatches.items, list) + assert isinstance(dispatches.items[0], WebhookDispatch) assert len(dispatches.items) > 0 finally: diff --git a/tests/integration/test_webhook_dispatch.py b/tests/integration/test_webhook_dispatch.py index 5bfc106d..00c88b1c 100644 --- a/tests/integration/test_webhook_dispatch.py +++ b/tests/integration/test_webhook_dispatch.py @@ -4,9 +4,11 @@ from typing import TYPE_CHECKING, cast +from apify_client._iterable_list_page import ListPage +from apify_client._models_generated import WebhookDispatch + if TYPE_CHECKING: from apify_client import ApifyClient, ApifyClientAsync - from apify_client._models_generated import ListOfWebhookDispatches, WebhookDispatch from ._utils import maybe_await @@ -14,24 +16,26 @@ async def test_webhook_dispatch_list(client: ApifyClient | ApifyClientAsync) -> None: """Test listing webhook dispatches.""" - result = await maybe_await(client.webhook_dispatches().list(limit=10)) - dispatches_page = cast('ListOfWebhookDispatches', result) + dispatches_page = await maybe_await(client.webhook_dispatches().list(limit=10)) - assert dispatches_page is not None - assert dispatches_page.items is not None + assert isinstance(dispatches_page, ListPage) assert isinstance(dispatches_page.items, list) - # User may have 0 dispatches, so we just verify the structure + # User may have 0 dispatches — only check element type when any were returned. + if dispatches_page.items: + assert isinstance(dispatches_page.items[0], WebhookDispatch) async def test_webhook_dispatch_get(client: ApifyClient | ApifyClientAsync) -> None: """Test getting a specific webhook dispatch.""" # First list dispatches to get a dispatch ID - result = await maybe_await(client.webhook_dispatches().list(limit=1)) - dispatches_page = cast('ListOfWebhookDispatches', result) - assert dispatches_page is not None + dispatches_page = await maybe_await(client.webhook_dispatches().list(limit=1)) + + assert isinstance(dispatches_page, ListPage) + assert isinstance(dispatches_page.items, list) if dispatches_page.items: # If there are dispatches, test the get method + assert isinstance(dispatches_page.items[0], WebhookDispatch) dispatch_id = dispatches_page.items[0].id result = await maybe_await(client.webhook_dispatch(dispatch_id).get()) dispatch = cast('WebhookDispatch', result) From 78c98cd2942ac400c42b45fa7d1d6a989f0c09a7 Mon Sep 17 00:00:00 2001 From: Josef Prochazka Date: Fri, 24 Apr 2026 14:52:06 +0200 Subject: [PATCH 7/7] Fix tests and types --- src/apify_client/_iterable_list_page.py | 19 +++++++++---------- tests/integration/_utils.py | 4 ++-- tests/integration/test_request_queue.py | 8 ++++---- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/src/apify_client/_iterable_list_page.py b/src/apify_client/_iterable_list_page.py index 3832452f..916bf6d0 100644 --- a/src/apify_client/_iterable_list_page.py +++ b/src/apify_client/_iterable_list_page.py @@ -144,7 +144,7 @@ def build_iterable_list_page( The `total` field from the first page is not trusted for stopping iteration because it may change between calls; iteration stops when a page has no items or when the user-requested `limit` has been reached. - The `count` field does not count objects returned, but object scanned by the API. For example when using filters, + The `count` field does not count objects returned, but objects scanned by the API. For example when using filters, returned items can be smaller than `count`. Therefore, `count` should be used for correct offset calculation if available. @@ -213,9 +213,9 @@ async def async_iterator() -> AsyncIterator[Any]: yield item fetched_items += getattr(current_page, 'count', len(current_page.items)) - async def wrap_first_page() -> IterableListPage[Any]: + async def wrap_first_page() -> ListPage[Any]: first_page = await fetch_first_page - return IterableListPage(first_page, iter(first_page.items)) + return ListPage(first_page) return IterableListPageAsync(wrap_first_page, async_iterator()) @@ -261,7 +261,7 @@ def iterator() -> Iterator[Any]: def build_cursor_iterable_list_page_async( - callback: Callable[..., Awaitable[HasItems[T]]], + callback: Callable[..., Coroutine[Any, Any, HasItems[T]]], *, cursor_param: str, initial_cursor: Any = None, @@ -279,11 +279,11 @@ def build_cursor_iterable_list_page_async( user_limit = limit or 0 first_limit = _min_for_limit_param(limit, effective_chunk) - async def fetch_first_page() -> Any: - return await callback(**{**kwargs, cursor_param: initial_cursor, 'limit': first_limit}) + # Can be awaited multiple times with same result, but not scheduled at this time yet, as it might be pre-emptive. + fetch_first_page = _LazyTask(callback(**{**kwargs, cursor_param: initial_cursor, 'limit': first_limit})) async def async_iterator() -> AsyncIterator[Any]: - current_page = await fetch_first_page() + current_page = await fetch_first_page for item in current_page.items: yield item @@ -299,8 +299,7 @@ async def async_iterator() -> AsyncIterator[Any]: fetched += len(current_page.items) next_cursor = getattr(current_page, f'next_{cursor_param}') - async def wrap_first_page() -> IterableListPage[Any]: - first_page = await fetch_first_page() - return IterableListPage(first_page, iter(first_page.items)) + async def wrap_first_page() -> ListPage[Any]: + return ListPage(await fetch_first_page) return IterableListPageAsync(wrap_first_page, async_iterator()) diff --git a/tests/integration/_utils.py b/tests/integration/_utils.py index 399301ec..8a3e1f1f 100644 --- a/tests/integration/_utils.py +++ b/tests/integration/_utils.py @@ -89,14 +89,14 @@ async def maybe_await(value: Coroutine[Any, Any, T]) -> T: ... async def maybe_await(value: T) -> T: ... -async def maybe_await(value: Any) -> Any: +async def maybe_await(value: T | Coroutine[Any, Any, T]) -> T: """Await coroutines, pass through other values. Enables unified test code for both sync and async clients: result = await maybe_await(client.datasets().list()) """ if hasattr(value, '__await__'): - return await value + return await value # ty: ignore[invalid-await] return value diff --git a/tests/integration/test_request_queue.py b/tests/integration/test_request_queue.py index a6815d8d..f3707220 100644 --- a/tests/integration/test_request_queue.py +++ b/tests/integration/test_request_queue.py @@ -256,10 +256,10 @@ async def test_request_queue_list_requests(client: ApifyClient | ApifyClientAsyn ) # Poll until all requests are available (eventual consistency) - list_response: ListPage[Request] | None = None for _ in range(5): await maybe_sleep(1, is_async=is_async) list_response = await maybe_await(rq_client.list_requests()) + assert isinstance(list_response, ListPage) if list_response.items and len(list_response.items) == 5: break @@ -326,10 +326,10 @@ async def test_request_queue_batch_add_requests(client: ApifyClient | ApifyClien assert len(batch_response.unprocessed_requests) == 0 # Poll until all requests are available (eventual consistency) - list_response: ListPage[Request] | None = None for _ in range(5): await maybe_sleep(1, is_async=is_async) list_response = await maybe_await(rq_client.list_requests()) + assert isinstance(list_response, ListPage) if list_response.items and len(list_response.items) == 10: break @@ -357,10 +357,10 @@ async def test_request_queue_batch_delete_requests(client: ApifyClient | ApifyCl ) # Poll until all requests are available (eventual consistency) - list_response: ListPage[Request] | None = None for _ in range(5): await maybe_sleep(1, is_async=is_async) list_response = await maybe_await(rq_client.list_requests()) + assert isinstance(list_response, ListPage) if list_response.items and len(list_response.items) == 10: break @@ -379,10 +379,10 @@ async def test_request_queue_batch_delete_requests(client: ApifyClient | ApifyCl assert len(delete_response.processed_requests) == 5 # Poll until deletions are reflected (eventual consistency) - remaining: ListPage[Request] | None = None for _ in range(5): await maybe_sleep(1, is_async=is_async) remaining = await maybe_await(rq_client.list_requests()) + assert isinstance(remaining, ListPage) if remaining.items and len(remaining.items) == 5: break