diff --git a/docs/02_concepts/08_pagination.mdx b/docs/02_concepts/08_pagination.mdx index 9f1da230..247121db 100644 --- a/docs/02_concepts/08_pagination.mdx +++ b/docs/02_concepts/08_pagination.mdx @@ -12,10 +12,10 @@ import ApiLink from '@site/src/components/ApiLink'; import PaginationAsyncExample from '!!raw-loader!./code/08_pagination_async.py'; import PaginationSyncExample from '!!raw-loader!./code/08_pagination_sync.py'; - import IterateItemsAsyncExample from '!!raw-loader!./code/08_iterate_items_async.py'; import IterateItemsSyncExample from '!!raw-loader!./code/08_iterate_items_sync.py'; + Most methods named `list` or `list_something` in the Apify client return a `ListPage` object. This object provides a consistent interface for working with paginated data and includes the following properties: - `items` - The main results you're looking for. @@ -45,7 +45,7 @@ The `ListPage` interface offers several k ## Generator-based iteration -For most use cases, `iterate_items()` is the recommended way to process all items in a dataset. It handles pagination automatically using a Python generator, fetching items in batches behind the scenes so you don't need to manage offsets or limits yourself. +You can also use the `list` methods directly in iteration. It handles pagination automatically, fetching items in batches behind the scenes so you don't need to manage offsets or limits yourself. @@ -60,6 +60,4 @@ For most use cases, `iterate_items()` is the recommended way to process all item -`iterate_items()` accepts the same filtering parameters as `list_items()` (`clean`, `fields`, `omit`, `unwind`, `skip_empty`, `skip_hidden`), so you can combine automatic pagination with data filtering. - -Similarly, `KeyValueStoreClient` provides an `iterate_keys()` method for iterating over all keys in a key-value store without manual pagination. +Similarly, you can iterate over the return value of `KeyValueStoreClient.list_keys()` to go through all keys in a key-value store without manual pagination. The older `iterate_keys()` method is deprecated. diff --git a/docs/02_concepts/code/08_iterate_items_async.py b/docs/02_concepts/code/08_iterate_items_async.py index fba9b5b0..3b64ea46 100644 --- a/docs/02_concepts/code/08_iterate_items_async.py +++ b/docs/02_concepts/code/08_iterate_items_async.py @@ -7,6 +7,11 @@ async def main() -> None: apify_client = ApifyClientAsync(TOKEN) dataset_client = apify_client.dataset('dataset-id') - # Iterate through all items automatically. - async for item in dataset_client.iterate_items(): - print(item) + # Define the pagination parameters + limit = 1500 # Number of items in total + offset = 100 # Starting offset + + # Iterate through items automatically, lazily sending as many API calls + # as needed and receiving items in chunks. + async for item in dataset_client.list_items(limit=limit, offset=offset): + print(item) # Process the item as needed diff --git a/docs/02_concepts/code/08_iterate_items_sync.py b/docs/02_concepts/code/08_iterate_items_sync.py index 005c899f..d7b57c22 100644 --- a/docs/02_concepts/code/08_iterate_items_sync.py +++ b/docs/02_concepts/code/08_iterate_items_sync.py @@ -7,9 +7,14 @@ def main() -> None: apify_client = ApifyClient(TOKEN) dataset_client = apify_client.dataset('dataset-id') - # Iterate through all items automatically. - for item in dataset_client.iterate_items(): - print(item) + # Define the pagination parameters + limit = 1500 # Number of items in total + offset = 100 # Starting offset + + # Iterate through items automatically, lazily sending as many API calls + # as needed and receiving items in chunks. + for item in dataset_client.list_items(limit=limit, offset=offset): + print(item) # Process the item as needed if __name__ == '__main__': diff --git a/docs/02_concepts/code/08_pagination_async.py b/docs/02_concepts/code/08_pagination_async.py index 50e9d047..23ac5fde 100644 --- a/docs/02_concepts/code/08_pagination_async.py +++ b/docs/02_concepts/code/08_pagination_async.py @@ -10,26 +10,15 @@ async def main() -> None: dataset_client = apify_client.dataset('dataset-id') # Define the pagination parameters - limit = 1000 # Number of items per page + limit = 1000 # Number items to request from API offset = 0 # Starting offset - all_items = [] # List to store all fetched items - while True: - # Fetch a page of items - response = await dataset_client.list_items(limit=limit, offset=offset) - items = response.items - total = response.total + # Send single API call to fetch paginated items. + # (number of items per single call can be limited by API) + paginated_items = await dataset_client.list_items(limit=limit, offset=offset) - print(f'Fetched {len(items)} items') + # Inspect pagination metadata returned by API + print(paginated_items.total) - # Add the fetched items to the complete list - all_items.extend(items) - - # Exit the loop if there are no more items to fetch - if offset + limit >= total: - break - - # Increment the offset for the next page - offset += limit - - print(f'Overall fetched {len(all_items)} items') + for item in paginated_items.items: + print(item) # Process the item as needed diff --git a/docs/02_concepts/code/08_pagination_sync.py b/docs/02_concepts/code/08_pagination_sync.py index 3beb4fbe..f144339e 100644 --- a/docs/02_concepts/code/08_pagination_sync.py +++ b/docs/02_concepts/code/08_pagination_sync.py @@ -10,26 +10,15 @@ def main() -> None: dataset_client = apify_client.dataset('dataset-id') # Define the pagination parameters - limit = 1000 # Number of items per page + limit = 1000 # Number items to request from API offset = 0 # Starting offset - all_items = [] # List to store all fetched items - while True: - # Fetch a page of items - response = dataset_client.list_items(limit=limit, offset=offset) - items = response.items - total = response.total + # Send single API call to fetch paginated items. + # (number of items per single call can be limited by API) + paginated_items = dataset_client.list_items(limit=limit, offset=offset) - print(f'Fetched {len(items)} items') + # Inspect pagination metadata returned by API + print(paginated_items.total) - # Add the fetched items to the complete list - all_items.extend(items) - - # Exit the loop if there are no more items to fetch - if offset + limit >= total: - break - - # Increment the offset for the next page - offset += limit - - print(f'Overall fetched {len(all_items)} items') + for item in paginated_items.items: + print(item) # Process the item as needed diff --git a/scripts/_utils.py b/scripts/_utils.py index 48612c54..b7d91853 100644 --- a/scripts/_utils.py +++ b/scripts/_utils.py @@ -27,6 +27,7 @@ (re.compile(r'\bSynchronous\b'), 'Asynchronous'), (re.compile(r'Retry a function'), 'Retry an async function'), (re.compile(r'Function to retry'), 'Async function to retry'), + (re.compile(r'returned page also supports iteration: `for'), 'returned page also supports iteration: `async for'), ] """Patterns for converting sync docstrings to async docstrings.""" diff --git a/src/apify_client/_pagination.py b/src/apify_client/_pagination.py new file mode 100644 index 00000000..8cf4671f --- /dev/null +++ b/src/apify_client/_pagination.py @@ -0,0 +1,203 @@ +from __future__ import annotations + +import asyncio +from typing import TYPE_CHECKING, Any, Generic, Protocol, TypeVar + +if TYPE_CHECKING: + from collections.abc import AsyncIterator, Awaitable, Callable, Coroutine, Generator, Iterator + +T = TypeVar('T') + + +class HasItems(Protocol[T]): + items: list[T] + + +def _min_for_limit_param(a: int | None, b: int | None) -> int | None: + """Return minimum of two limit parameters, treating `None` or `0` as infinity. + + The Apify API treats `0` as no limit for the `limit` parameter, so `0` here means infinity. + Returns `None` when both inputs represent infinity. + """ + if a == 0: + a = None + if b == 0: + b = None + if a is None: + return b + if b is None: + return a + return min(a, b) + + +class _LazyTask(Generic[T]): + """Task that is created lazily upon awaiting. + + This allows to reuse the same Task multiple times without the need to schedule the task when it is created. + """ + + def __init__(self, awaitable: Coroutine[Any, Any, T]) -> None: + self._awaitable = awaitable + self._task: asyncio.Task[T] | None = None + + def __await__(self) -> Generator[Any, None, T]: + if self._task is None: + self._task = asyncio.create_task(self._awaitable) + return (yield from self._task.__await__()) + + +def build_get_iterator( + callback: Callable[..., HasItems[T]], + first_page: HasItems[T], + **kwargs: Any, +) -> Callable[[], Iterator[T]]: + """Build a factory for `Iterator` to yield items across paginated API calls. + + The callback is invoked to lazy fetch items from API. + + There are several optional kwargs that control the pagination, but not all are accepted on each paginated endpoint. + Some endpoints do not return all paginated metadata, so the implementation should be resilient to missing fields, + but it can use them if available. + + The `total` field from the first page is not trusted for stopping iteration because it may change between calls; + iteration stops when a page has no items or when the user-requested `limit` has been reached. + + The `count` field does not count objects returned, but objects scanned by the API. For example when using filters, + returned items can be smaller than `count`. Therefore, `count` should be used for correct offset calculation if + available. + + Iteration relevant kwargs: + chunk_size: Maximum number of items requested per API call during iteration. Pass `0` + or `None` to let the API decide (effectively infinity). + limit: User-requested total item limit. Stops iteration once this many items are yielded. + offset: Starting offset for the first page. + **other: Passed through to the callback unchanged. + """ + chunk_size = kwargs.pop('chunk_size', 0) or 0 + offset = kwargs.get('offset') or 0 + limit = kwargs.get('limit') or 0 + + def get_iterator() -> Iterator[T]: + current_page = first_page + yield from current_page.items + + fetched_items = getattr(current_page, 'count', len(current_page.items)) + while current_page.items and (not limit or (limit > fetched_items)): + new_kwargs = { + **kwargs, + 'offset': offset + fetched_items, + 'limit': chunk_size if not limit else _min_for_limit_param(limit - fetched_items, chunk_size), + } + current_page = callback(**new_kwargs) + yield from current_page.items + fetched_items += getattr(current_page, 'count', len(current_page.items)) + + return get_iterator + + +def build_get_iterator_async( + callback: Callable[..., Coroutine[Any, Any, HasItems[T]]], + fetch_first_page: Awaitable[HasItems[T]], + **kwargs: Any, +) -> Callable[[], AsyncIterator[T]]: + """Build a factory for `AsyncIterator` to yield items across paginated API calls. + + Mirrors `build_get_iterator` but for async callbacks. + """ + chunk_size = kwargs.pop('chunk_size', 0) or 0 + offset = kwargs.get('offset') or 0 + limit = kwargs.get('limit') or 0 + + async def get_async_iterator() -> AsyncIterator[T]: + current_page = await fetch_first_page + for item in current_page.items: + yield item + + fetched_items = getattr(current_page, 'count', len(current_page.items)) + while current_page.items and (not limit or (limit > fetched_items)): + new_kwargs = { + **kwargs, + 'offset': offset + fetched_items, + 'limit': chunk_size if not limit else _min_for_limit_param(limit - fetched_items, chunk_size), + } + current_page = await callback(**new_kwargs) + for item in current_page.items: + yield item + fetched_items += getattr(current_page, 'count', len(current_page.items)) + + return get_async_iterator + + +def build_get_cursor_iterator( + callback: Callable[..., HasItems[T]], + first_page: HasItems[T], + *, + cursor_param: str, + limit: int | None = None, + chunk_size: int | None = None, + **kwargs: Any, +) -> Callable[[], Iterator[T]]: + """Build a factory for `Iterator` to yield items across paginated API calls. + + Mirrors `build_get_iterator` but with cursor based pagination. + + The caller is responsible for fetching the first page (typically by calling `callback` with + the initial cursor). After each page, `getattr(page, f'next_{cursor_param}')` is consulted + to obtain the next cursor; returning `None` ends iteration. The iteration also stops when a + page is empty or when the caller-requested `limit` has been reached. + """ + effective_chunk = chunk_size or 0 + user_limit = limit or 0 + + def get_iterator() -> Iterator[T]: + current_page = first_page + yield from current_page.items + + fetched = len(current_page.items) + next_cursor = getattr(current_page, f'next_{cursor_param}') + + while current_page.items and next_cursor is not None and (not user_limit or user_limit > fetched): + remaining = (user_limit - fetched) if user_limit else 0 + next_limit = effective_chunk if not user_limit else _min_for_limit_param(remaining, effective_chunk) + current_page = callback(**{**kwargs, cursor_param: next_cursor, 'limit': next_limit}) + yield from current_page.items + fetched += len(current_page.items) + next_cursor = getattr(current_page, f'next_{cursor_param}') + + return get_iterator + + +def build_get_cursor_iterator_async( + callback: Callable[..., Coroutine[Any, Any, HasItems[T]]], + fetch_first_page: Awaitable[HasItems[T]], + *, + cursor_param: str, + limit: int | None = None, + chunk_size: int | None = None, + **kwargs: Any, +) -> Callable[[], AsyncIterator[T]]: + """Build a factory for `Iterator` to yield items across paginated API calls. + + Mirrors `build_get_cursor_iterator` but for async callbacks. + """ + effective_chunk = chunk_size or 0 + user_limit = limit or 0 + + async def get_async_iterator() -> AsyncIterator[T]: + current_page = await fetch_first_page + for item in current_page.items: + yield item + + fetched = len(current_page.items) + next_cursor = getattr(current_page, f'next_{cursor_param}') + + while current_page.items and next_cursor is not None and (not user_limit or user_limit > fetched): + remaining = (user_limit - fetched) if user_limit else 0 + next_limit = effective_chunk if not user_limit else _min_for_limit_param(remaining, effective_chunk) + current_page = await callback(**{**kwargs, cursor_param: next_cursor, 'limit': next_limit}) + for item in current_page.items: + yield item + fetched += len(current_page.items) + next_cursor = getattr(current_page, f'next_{cursor_param}') + + return get_async_iterator diff --git a/src/apify_client/_pagination_classes.py b/src/apify_client/_pagination_classes.py new file mode 100644 index 00000000..b8a489d7 --- /dev/null +++ b/src/apify_client/_pagination_classes.py @@ -0,0 +1,267 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Any, Generic, TypeVar + +if TYPE_CHECKING: + from collections.abc import AsyncIterator, Awaitable, Callable, Generator, Iterator + +from apify_client._models_generated import ( + ActorShort, + BuildShort, + DatasetListItem, + EnvVar, + KeyValueStore, + KeyValueStoreKey, + Request, + RequestQueueShort, + RunShort, + ScheduleShort, + StoreListActor, + TaskShort, + Version, + WebhookDispatch, + WebhookShort, +) + +T = TypeVar('T') + + +@dataclass +class IterableOf(Generic[T]): + _get_iterator: Callable[[], Iterator[T]] = field(repr=False, compare=False) + + def __iter__(self) -> Iterator[T]: + return self._get_iterator() + + +@dataclass +class AsyncIterableOf(Generic[T]): + _get_async_iterator: Callable[[], AsyncIterator[T]] = field(repr=False, compare=False) + + def __aiter__(self) -> AsyncIterator[T]: + return self._get_async_iterator() + + +@dataclass +class PageWithItems(Generic[T]): + items: list[T] + + +@dataclass +class PageWithTotal: + total: int + + +@dataclass +class PageWithLimit: + limit: int + + +@dataclass +class PageWithCount: + count: int + + +@dataclass +class PageWithOffset: + offset: int + + +@dataclass +class PageWithDesc: + desc: bool + + +@dataclass +class PageOfItems(PageWithItems[T], PageWithTotal, PageWithLimit, PageWithCount, PageWithOffset, PageWithDesc): ... + + +@dataclass +class PageOfDatasetItems(PageOfItems[dict[str, Any]]): ... + + +@dataclass +class PageOfItemsOnlyTotal(PageWithItems[T], PageWithTotal): ... + + +@dataclass +class PageOfRequests(PageWithItems[Request], PageWithLimit): + exclusive_start_id: str | None = None + cursor: str | None = None + next_cursor: str | None = None + + +@dataclass +class PageOfKeys(PageWithItems[KeyValueStoreKey], PageWithLimit, PageWithCount): + is_truncated: bool + exclusive_start_key: str | None = None + next_exclusive_start_key: str | None = None + + +@dataclass +class AwaitablePage(Generic[T]): + _awaitable_first_page: Awaitable[PageOfItems[T]] = field(repr=False, compare=False) + + def __await__(self) -> Generator[Any, Any, PageOfItems[T]]: + return self._awaitable_first_page.__await__() + + +@dataclass +class AwaitablePageOnlyTotal(Generic[T]): + _awaitable_first_page: Awaitable[PageOfItemsOnlyTotal[T]] = field(repr=False, compare=False) + + def __await__(self) -> Generator[Any, Any, PageOfItemsOnlyTotal[T]]: + return self._awaitable_first_page.__await__() + + +@dataclass +class AwaitablePageOfDatasetItems: + _awaitable_first_page: Awaitable[PageOfDatasetItems] = field(repr=False, compare=False) + + def __await__(self) -> Generator[Any, Any, PageOfDatasetItems]: + return self._awaitable_first_page.__await__() + + +@dataclass +class AwaitablePageOfRequests: + _awaitable_first_page: Awaitable[PageOfRequests] = field(repr=False, compare=False) + + def __await__(self) -> Generator[Any, Any, PageOfRequests]: + return self._awaitable_first_page.__await__() + + +@dataclass +class AwaitablePageOfKeys: + _awaitable_first_page: Awaitable[PageOfKeys] = field(repr=False, compare=False) + + def __await__(self) -> Generator[Any, Any, PageOfKeys]: + return self._awaitable_first_page.__await__() + + +@dataclass +class IterablePageOfActors(PageOfItems[ActorShort], IterableOf[ActorShort]): ... + + +@dataclass +class IterablePageOfActorsAsync(AwaitablePage[ActorShort], AsyncIterableOf[ActorShort]): ... + + +@dataclass +class IterablePageOfBuilds(PageOfItems[BuildShort], IterableOf[BuildShort]): ... + + +@dataclass +class IterablePageOfBuildsAsync(AwaitablePage[BuildShort], AsyncIterableOf[BuildShort]): ... + + +@dataclass +class IterablePageOfDatasets(PageOfItems[DatasetListItem], IterableOf[DatasetListItem]): ... + + +@dataclass +class IterablePageOfDatasetsAsync(AwaitablePage[DatasetListItem], AsyncIterableOf[DatasetListItem]): ... + + +@dataclass +class IterablePageOfDatasetItems(PageOfDatasetItems, IterableOf[dict[str, Any]]): ... + + +@dataclass +class IterablePageOfDatasetItemsAsync(AwaitablePageOfDatasetItems, AsyncIterableOf[dict[str, Any]]): ... + + +@dataclass +class IterablePageOfKeyValueStores(PageOfItems[KeyValueStore], IterableOf[KeyValueStore]): ... + + +@dataclass +class IterablePageOfKeyValueStoresAsync(AwaitablePage[KeyValueStore], AsyncIterableOf[KeyValueStore]): ... + + +@dataclass +class IterablePageOfRequestQueues(PageOfItems[RequestQueueShort], IterableOf[RequestQueueShort]): ... + + +@dataclass +class IterablePageOfRequestQueuesAsync(AwaitablePage[RequestQueueShort], AsyncIterableOf[RequestQueueShort]): ... + + +@dataclass +class IterablePageOfRuns(PageOfItems[RunShort], IterableOf[RunShort]): ... + + +@dataclass +class IterablePageOfRunsAsync(AwaitablePage[RunShort], AsyncIterableOf[RunShort]): ... + + +@dataclass +class IterablePageOfSchedules(PageOfItems[ScheduleShort], IterableOf[ScheduleShort]): ... + + +@dataclass +class IterablePageOfSchedulesAsync(AwaitablePage[ScheduleShort], AsyncIterableOf[ScheduleShort]): ... + + +@dataclass +class IterablePageOfStoreActors(PageOfItems[StoreListActor], IterableOf[StoreListActor]): ... + + +@dataclass +class IterablePageOfStoreActorsAsync(AwaitablePage[StoreListActor], AsyncIterableOf[StoreListActor]): ... + + +@dataclass +class IterablePageOfTasks(PageOfItems[TaskShort], IterableOf[TaskShort]): ... + + +@dataclass +class IterablePageOfTasksAsync(AwaitablePage[TaskShort], AsyncIterableOf[TaskShort]): ... + + +@dataclass +class IterablePageOfWebhookDispatches(PageOfItems[WebhookDispatch], IterableOf[WebhookDispatch]): ... + + +@dataclass +class IterablePageOfWebhookDispatchesAsync(AwaitablePage[WebhookDispatch], AsyncIterableOf[WebhookDispatch]): ... + + +@dataclass +class IterablePageOfWebhooks(PageOfItems[WebhookShort], IterableOf[WebhookShort]): ... + + +@dataclass +class IterablePageOfWebhooksAsync(AwaitablePage[WebhookShort], AsyncIterableOf[WebhookShort]): ... + + +@dataclass +class IterablePageOfEnvVars(PageOfItemsOnlyTotal, IterableOf[EnvVar]): ... + + +@dataclass +class IterablePageOfEnvVarsAsync(AwaitablePageOnlyTotal[EnvVar], AsyncIterableOf[EnvVar]): ... + + +@dataclass +class IterablePageOfVersions(PageOfItemsOnlyTotal, IterableOf[Version]): ... + + +@dataclass +class IterablePageOfVersionsAsync(AwaitablePageOnlyTotal[Version], AsyncIterableOf[Version]): ... + + +@dataclass +class IterablePageOfRequests(PageOfRequests, IterableOf[Request]): ... + + +@dataclass +class IterablePageOfRequestsAsync(AwaitablePageOfRequests, AsyncIterableOf[Request]): ... + + +@dataclass +class IterablePageOfKeys(PageOfKeys, IterableOf[KeyValueStoreKey]): ... + + +@dataclass +class IterablePageOfKeysAsync(AwaitablePageOfKeys, AsyncIterableOf[KeyValueStoreKey]): ... diff --git a/src/apify_client/_resource_clients/actor_collection.py b/src/apify_client/_resource_clients/actor_collection.py index 63a780df..802f0799 100644 --- a/src/apify_client/_resource_clients/actor_collection.py +++ b/src/apify_client/_resource_clients/actor_collection.py @@ -10,15 +10,25 @@ CreateActorRequest, DefaultRunOptions, ExampleRunInput, - ListOfActors, ListOfActorsResponse, ) +from apify_client._pagination import ( + _LazyTask, + build_get_iterator, + build_get_iterator_async, +) +from apify_client._pagination_classes import ( + IterablePageOfActors, + IterablePageOfActorsAsync, + PageOfItems, +) from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync from apify_client._utils import to_seconds if TYPE_CHECKING: from datetime import timedelta + from apify_client._models_generated import ActorShort from apify_client._types import Timeout _SORT_BY_TO_API: dict[str, str] = { @@ -55,9 +65,12 @@ def list( desc: bool | None = None, sort_by: Literal['created_at', 'last_run_started_at'] | None = 'created_at', timeout: Timeout = 'medium', - ) -> ListOfActors: + ) -> IterablePageOfActors: """List the Actors the user has created or used. + The returned page also supports iteration: `for item in client.list(...)` yields individual Actors + and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/actors/actor-collection/get-list-of-actors Args: @@ -72,8 +85,31 @@ def list( The list of available Actors matching the specified filters. """ api_sort_by = _SORT_BY_TO_API[sort_by] if sort_by is not None else None - result = self._list(timeout=timeout, my=my, limit=limit, offset=offset, desc=desc, sortBy=api_sort_by) - return ListOfActorsResponse.model_validate(result).data + + def _callback(**kwargs: Any) -> PageOfItems[ActorShort]: + result = self._list(timeout=timeout, my=my, sortBy=api_sort_by, **kwargs) + data = ListOfActorsResponse.model_validate(result).data + return PageOfItems( + items=data.items, + count=data.count, + limit=data.limit, + total=data.total, + offset=data.offset, + desc=data.desc, + ) + + first_page = _callback(limit=limit, offset=offset, desc=desc) + get_iterator = build_get_iterator(_callback, first_page, limit=limit, offset=offset, desc=desc) + + return IterablePageOfActors( + _get_iterator=get_iterator, + items=first_page.items, + count=first_page.count, + limit=first_page.limit, + total=first_page.total, + offset=first_page.offset, + desc=first_page.desc, + ) def create( self, @@ -192,7 +228,7 @@ def __init__( **kwargs, ) - async def list( + def list( self, *, my: bool | None = None, @@ -201,9 +237,12 @@ async def list( desc: bool | None = None, sort_by: Literal['created_at', 'last_run_started_at'] | None = 'created_at', timeout: Timeout = 'medium', - ) -> ListOfActors: + ) -> IterablePageOfActorsAsync: """List the Actors the user has created or used. + The returned page also supports iteration: `async for item in client.list(...)` yields individual Actors + and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/actors/actor-collection/get-list-of-actors Args: @@ -218,8 +257,28 @@ async def list( The list of available Actors matching the specified filters. """ api_sort_by = _SORT_BY_TO_API[sort_by] if sort_by is not None else None - result = await self._list(timeout=timeout, my=my, limit=limit, offset=offset, desc=desc, sortBy=api_sort_by) - return ListOfActorsResponse.model_validate(result).data + + async def _callback(**kwargs: Any) -> PageOfItems[ActorShort]: + result = await self._list(timeout=timeout, my=my, sortBy=api_sort_by, **kwargs) + data = ListOfActorsResponse.model_validate(result).data + return PageOfItems( + items=data.items, + count=data.count, + limit=data.limit, + total=data.total, + offset=data.offset, + desc=data.desc, + ) + + fetch_first_page = _LazyTask(_callback(limit=limit, offset=offset, desc=desc)) + get_async_iterator = build_get_iterator_async( + _callback, fetch_first_page, limit=limit, offset=offset, desc=desc + ) + + return IterablePageOfActorsAsync( + _awaitable_first_page=fetch_first_page, + _get_async_iterator=get_async_iterator, + ) async def create( self, diff --git a/src/apify_client/_resource_clients/actor_env_var_collection.py b/src/apify_client/_resource_clients/actor_env_var_collection.py index d4eb2af5..8cff6522 100644 --- a/src/apify_client/_resource_clients/actor_env_var_collection.py +++ b/src/apify_client/_resource_clients/actor_env_var_collection.py @@ -3,7 +3,17 @@ from typing import TYPE_CHECKING, Any from apify_client._docs import docs_group -from apify_client._models_generated import EnvVar, EnvVarResponse, ListOfEnvVars, ListOfEnvVarsResponse +from apify_client._models_generated import EnvVar, EnvVarResponse, ListOfEnvVarsResponse +from apify_client._pagination import ( + _LazyTask, + build_get_iterator, + build_get_iterator_async, +) +from apify_client._pagination_classes import ( + IterablePageOfEnvVars, + IterablePageOfEnvVarsAsync, + PageOfItemsOnlyTotal, +) from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync if TYPE_CHECKING: @@ -29,9 +39,12 @@ def __init__( **kwargs, ) - def list(self, *, timeout: Timeout = 'short') -> ListOfEnvVars: + def list(self, *, timeout: Timeout = 'short') -> IterablePageOfEnvVars: """List the available Actor environment variables. + The returned page also supports iteration: `for item in client.list()` yields individual environment + variables. + https://docs.apify.com/api/v2#/reference/actors/environment-variable-collection/get-list-of-environment-variables Args: @@ -40,8 +53,20 @@ def list(self, *, timeout: Timeout = 'short') -> ListOfEnvVars: Returns: The list of available Actor environment variables. """ - result = self._list(timeout=timeout) - return ListOfEnvVarsResponse.model_validate(result).data + + def _callback(**kwargs: Any) -> PageOfItemsOnlyTotal[EnvVar]: + result = self._list(timeout=timeout, **kwargs) + data = ListOfEnvVarsResponse.model_validate(result).data + return PageOfItemsOnlyTotal(items=data.items, total=data.total) + + first_page = _callback() + get_iterator = build_get_iterator(_callback, first_page) + + return IterablePageOfEnvVars( + _get_iterator=get_iterator, + items=first_page.items, + total=first_page.total, + ) def create( self, @@ -90,9 +115,12 @@ def __init__( **kwargs, ) - async def list(self, *, timeout: Timeout = 'short') -> ListOfEnvVars: + def list(self, *, timeout: Timeout = 'short') -> IterablePageOfEnvVarsAsync: """List the available Actor environment variables. + The returned page also supports iteration: `async for item in client.list()` yields individual environment + variables. + https://docs.apify.com/api/v2#/reference/actors/environment-variable-collection/get-list-of-environment-variables Args: @@ -101,8 +129,19 @@ async def list(self, *, timeout: Timeout = 'short') -> ListOfEnvVars: Returns: The list of available Actor environment variables. """ - result = await self._list(timeout=timeout) - return ListOfEnvVarsResponse.model_validate(result).data + + async def _callback(**kwargs: Any) -> PageOfItemsOnlyTotal[EnvVar]: + result = await self._list(timeout=timeout, **kwargs) + data = ListOfEnvVarsResponse.model_validate(result).data + return PageOfItemsOnlyTotal(items=data.items, total=data.total) + + fetch_first_page = _LazyTask(_callback()) + get_async_iterator = build_get_iterator_async(_callback, fetch_first_page) + + return IterablePageOfEnvVarsAsync( + _awaitable_first_page=fetch_first_page, + _get_async_iterator=get_async_iterator, + ) async def create( self, diff --git a/src/apify_client/_resource_clients/actor_version_collection.py b/src/apify_client/_resource_clients/actor_version_collection.py index aac5e4c3..3cd7504e 100644 --- a/src/apify_client/_resource_clients/actor_version_collection.py +++ b/src/apify_client/_resource_clients/actor_version_collection.py @@ -8,7 +8,6 @@ from apify_client._models_generated import ( CreateOrUpdateVersionRequest, EnvVarRequest, - ListOfVersions, ListOfVersionsResponse, SourceCodeFile, SourceCodeFolder, @@ -16,6 +15,16 @@ VersionResponse, VersionSourceType, ) +from apify_client._pagination import ( + _LazyTask, + build_get_iterator, + build_get_iterator_async, +) +from apify_client._pagination_classes import ( + IterablePageOfVersions, + IterablePageOfVersionsAsync, + PageOfItemsOnlyTotal, +) from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync if TYPE_CHECKING: @@ -44,9 +53,11 @@ def __init__( **kwargs, ) - def list(self, *, timeout: Timeout = 'short') -> ListOfVersions: + def list(self, *, timeout: Timeout = 'short') -> IterablePageOfVersions: """List the available Actor versions. + The returned page also supports iteration: `for item in client.list()` yields individual versions. + https://docs.apify.com/api/v2#/reference/actors/version-collection/get-list-of-versions Args: @@ -55,8 +66,20 @@ def list(self, *, timeout: Timeout = 'short') -> ListOfVersions: Returns: The list of available Actor versions. """ - result = self._list(timeout=timeout) - return ListOfVersionsResponse.model_validate(result).data + + def _callback(**kwargs: Any) -> PageOfItemsOnlyTotal[Version]: + result = self._list(timeout=timeout, **kwargs) + data = ListOfVersionsResponse.model_validate(result).data + return PageOfItemsOnlyTotal(items=data.items, total=data.total) + + first_page = _callback() + get_iterator = build_get_iterator(_callback, first_page) + + return IterablePageOfVersions( + _get_iterator=get_iterator, + items=first_page.items, + total=first_page.total, + ) def create( self, @@ -131,9 +154,11 @@ def __init__( **kwargs, ) - async def list(self, *, timeout: Timeout = 'short') -> ListOfVersions: + def list(self, *, timeout: Timeout = 'short') -> IterablePageOfVersionsAsync: """List the available Actor versions. + The returned page also supports iteration: `async for item in client.list()` yields individual versions. + https://docs.apify.com/api/v2#/reference/actors/version-collection/get-list-of-versions Args: @@ -142,8 +167,19 @@ async def list(self, *, timeout: Timeout = 'short') -> ListOfVersions: Returns: The list of available Actor versions. """ - result = await self._list(timeout=timeout) - return ListOfVersionsResponse.model_validate(result).data + + async def _callback(**kwargs: Any) -> PageOfItemsOnlyTotal[Version]: + result = await self._list(timeout=timeout, **kwargs) + data = ListOfVersionsResponse.model_validate(result).data + return PageOfItemsOnlyTotal(items=data.items, total=data.total) + + fetch_first_page = _LazyTask(_callback()) + get_async_iterator = build_get_iterator_async(_callback, fetch_first_page) + + return IterablePageOfVersionsAsync( + _awaitable_first_page=fetch_first_page, + _get_async_iterator=get_async_iterator, + ) async def create( self, diff --git a/src/apify_client/_resource_clients/build_collection.py b/src/apify_client/_resource_clients/build_collection.py index 6ead2a67..129ba763 100644 --- a/src/apify_client/_resource_clients/build_collection.py +++ b/src/apify_client/_resource_clients/build_collection.py @@ -3,10 +3,21 @@ from typing import TYPE_CHECKING, Any from apify_client._docs import docs_group -from apify_client._models_generated import ListOfBuilds, ListOfBuildsResponse +from apify_client._models_generated import ListOfBuildsResponse +from apify_client._pagination import ( + _LazyTask, + build_get_iterator, + build_get_iterator_async, +) +from apify_client._pagination_classes import ( + IterablePageOfBuilds, + IterablePageOfBuildsAsync, + PageOfItems, +) from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync if TYPE_CHECKING: + from apify_client._models_generated import BuildShort from apify_client._types import Timeout @@ -36,12 +47,15 @@ def list( offset: int | None = None, desc: bool | None = None, timeout: Timeout = 'medium', - ) -> ListOfBuilds: + ) -> IterablePageOfBuilds: """List all Actor builds. List all Actor builds, either of a single Actor, or all user's Actors, depending on where this client was initialized from. + The returned page also supports iteration: `for item in client.list(...)` yields individual builds + and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/actors/build-collection/get-list-of-builds https://docs.apify.com/api/v2#/reference/actor-builds/build-collection/get-user-builds-list @@ -54,8 +68,31 @@ def list( Returns: The retrieved Actor builds. """ - result = self._list(timeout=timeout, limit=limit, offset=offset, desc=desc) - return ListOfBuildsResponse.model_validate(result).data + + def _callback(**kwargs: Any) -> PageOfItems[BuildShort]: + result = self._list(timeout=timeout, **kwargs) + data = ListOfBuildsResponse.model_validate(result).data + return PageOfItems( + items=data.items, + count=data.count, + limit=data.limit, + total=data.total, + offset=data.offset, + desc=data.desc, + ) + + first_page = _callback(limit=limit, offset=offset, desc=desc) + get_iterator = build_get_iterator(_callback, first_page, limit=limit, offset=offset, desc=desc) + + return IterablePageOfBuilds( + _get_iterator=get_iterator, + items=first_page.items, + count=first_page.count, + limit=first_page.limit, + total=first_page.total, + offset=first_page.offset, + desc=first_page.desc, + ) @docs_group('Resource clients') @@ -77,19 +114,22 @@ def __init__( **kwargs, ) - async def list( + def list( self, *, limit: int | None = None, offset: int | None = None, desc: bool | None = None, timeout: Timeout = 'medium', - ) -> ListOfBuilds: + ) -> IterablePageOfBuildsAsync: """List all Actor builds. List all Actor builds, either of a single Actor, or all user's Actors, depending on where this client was initialized from. + The returned page also supports iteration: `async for item in client.list(...)` yields individual builds + and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/actors/build-collection/get-list-of-builds https://docs.apify.com/api/v2#/reference/actor-builds/build-collection/get-user-builds-list @@ -102,5 +142,25 @@ async def list( Returns: The retrieved Actor builds. """ - result = await self._list(timeout=timeout, limit=limit, offset=offset, desc=desc) - return ListOfBuildsResponse.model_validate(result).data + + async def _callback(**kwargs: Any) -> PageOfItems[BuildShort]: + result = await self._list(timeout=timeout, **kwargs) + data = ListOfBuildsResponse.model_validate(result).data + return PageOfItems( + items=data.items, + count=data.count, + limit=data.limit, + total=data.total, + offset=data.offset, + desc=data.desc, + ) + + fetch_first_page = _LazyTask(_callback(limit=limit, offset=offset, desc=desc)) + get_async_iterator = build_get_iterator_async( + _callback, fetch_first_page, limit=limit, offset=offset, desc=desc + ) + + return IterablePageOfBuildsAsync( + _awaitable_first_page=fetch_first_page, + _get_async_iterator=get_async_iterator, + ) diff --git a/src/apify_client/_resource_clients/dataset.py b/src/apify_client/_resource_clients/dataset.py index 850f32d5..c2d2e0f5 100644 --- a/src/apify_client/_resource_clients/dataset.py +++ b/src/apify_client/_resource_clients/dataset.py @@ -2,12 +2,24 @@ import warnings from contextlib import asynccontextmanager, contextmanager -from dataclasses import dataclass from typing import TYPE_CHECKING, Any from urllib.parse import urlencode, urlparse, urlunparse +from pydantic import BaseModel + from apify_client._docs import docs_group from apify_client._models_generated import Dataset, DatasetResponse, DatasetStatistics, DatasetStatisticsResponse +from apify_client._pagination import ( + _LazyTask, + _min_for_limit_param, + build_get_iterator, + build_get_iterator_async, +) +from apify_client._pagination_classes import ( + IterablePageOfDatasetItems, + IterablePageOfDatasetItemsAsync, + PageOfDatasetItems, +) from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync from apify_client._utils import ( create_storage_content_signature, @@ -25,8 +37,7 @@ @docs_group('Other') -@dataclass -class DatasetItemsPage: +class DatasetItemsPage(BaseModel): """A page of dataset items returned by the `list_items` method. Dataset items are arbitrary JSON objects stored in the dataset, so they cannot be @@ -141,10 +152,14 @@ def list_items( flatten: list[str] | None = None, view: str | None = None, signature: str | None = None, + chunk_size: int | None = None, timeout: Timeout = 'long', - ) -> DatasetItemsPage: + ) -> IterablePageOfDatasetItems: """List the items of the dataset. + The returned page also supports iteration: `for item in client.list_items(...)` yields individual + items and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/datasets/item-collection/get-items Args: @@ -174,45 +189,64 @@ def list_items( flatten: A list of fields that should be flattened. view: Name of the dataset view to be used. signature: Signature used to access the items. + chunk_size: Maximum number of items requested per API call when iterating. Only relevant when + iterating across pages. timeout: Timeout for the API HTTP request. Returns: A page of the list of dataset items according to the specified filters. """ - request_params = self._build_params( - offset=offset, - limit=limit, - desc=desc, - clean=clean, - fields=fields, - omit=omit, - unwind=unwind, - skipEmpty=skip_empty, - skipHidden=skip_hidden, - flatten=flatten, - view=view, - signature=signature, - ) - response = self._http_client.call( - url=self._build_url('items'), - method='GET', - params=request_params, - timeout=timeout, - ) + def _fetch_page(**kwargs: Any) -> PageOfDatasetItems: + request_params = self._build_params( + desc=desc, + clean=clean, + fields=fields, + omit=omit, + unwind=unwind, + skipEmpty=skip_empty, + skipHidden=skip_hidden, + flatten=flatten, + view=view, + signature=signature, + **kwargs, + ) - # When using signature, API returns items as list directly - items = response_to_list(response) - - return DatasetItemsPage( - items=items, - total=int(response.headers['x-apify-pagination-total']), - offset=int(response.headers['x-apify-pagination-offset']), - # x-apify-pagination-count returns invalid values when hidden/empty items are skipped - count=len(items), - # API returns 999999999999 when no limit is used - limit=int(response.headers['x-apify-pagination-limit']), - desc=response.headers['x-apify-pagination-desc'].lower() == 'true', + response = self._http_client.call( + url=self._build_url('items'), + method='GET', + params=request_params, + timeout=timeout, + ) + + # When using signature, API returns items as list directly + items = response_to_list(response) + + # When using signature, API returns items as list directly + + return PageOfDatasetItems( + items=items, + total=int(response.headers['x-apify-pagination-total']), + offset=int(response.headers['x-apify-pagination-offset']), + # x-apify-pagination-count returns count of processed items, not count of returned items + # This makes difference when items were filtered using hidden/empty + count=max(int(response.headers['x-apify-pagination-count']), len(items)), + # API returns 999999999999 when no limit is used + limit=int(response.headers['x-apify-pagination-limit']), + desc=response.headers['x-apify-pagination-desc'].lower() == 'true', + ) + + first_page = _fetch_page(offset=offset, limit=_min_for_limit_param(limit, chunk_size)) + get_iterator = build_get_iterator(_fetch_page, first_page, offset=offset, limit=limit, chunk_size=chunk_size) + + return IterablePageOfDatasetItems( + _get_iterator=get_iterator, + items=first_page.items, + total=first_page.total, + offset=first_page.offset, + count=first_page.count, + limit=first_page.limit, + desc=first_page.desc, ) def iterate_items( @@ -229,9 +263,11 @@ def iterate_items( skip_hidden: bool | None = None, signature: str | None = None, timeout: Timeout = 'long', - ) -> Iterator[dict]: + ) -> Iterator[dict[str, Any]]: """Iterate over the items in the dataset. + Deprecated: iterate the return value of `DatasetClient.list_items()` instead. + https://docs.apify.com/api/v2#/reference/datasets/item-collection/get-items Args: @@ -264,42 +300,26 @@ def iterate_items( Yields: An item from the dataset. """ - cache_size = 1000 - - should_finish = False - read_items = 0 - - # We can't rely on DatasetItemsPage.total because that is updated with a delay, - # so if you try to read the dataset items right after a run finishes, you could miss some. - # Instead, we just read and read until we reach the limit, or until there are no more items to read. - while not should_finish: - effective_limit = cache_size - if limit is not None: - if read_items == limit: - break - effective_limit = min(cache_size, limit - read_items) - - current_items_page = self.list_items( - offset=offset + read_items, - limit=effective_limit, - clean=clean, - desc=desc, - fields=fields, - omit=omit, - unwind=unwind, - skip_empty=skip_empty, - skip_hidden=skip_hidden, - signature=signature, - timeout=timeout, - ) - - yield from current_items_page.items - - current_page_item_count = len(current_items_page.items) - read_items += current_page_item_count - - if current_page_item_count < cache_size: - should_finish = True + warnings.warn( + '`DatasetClient.iterate_items()` is deprecated, iterate the return value of ' + '`DatasetClient.list_items()` instead.', + DeprecationWarning, + stacklevel=2, + ) + yield from self.list_items( + offset=offset, + limit=limit, + clean=clean, + desc=desc, + fields=fields, + omit=omit, + unwind=unwind, + skip_empty=skip_empty, + skip_hidden=skip_hidden, + signature=signature, + chunk_size=1000, + timeout=timeout, + ) def download_items( self, @@ -801,7 +821,7 @@ async def delete(self, *, timeout: Timeout = 'short') -> None: """ await self._delete(timeout=timeout) - async def list_items( + def list_items( self, *, offset: int | None = None, @@ -816,10 +836,14 @@ async def list_items( flatten: list[str] | None = None, view: str | None = None, signature: str | None = None, + chunk_size: int | None = None, timeout: Timeout = 'long', - ) -> DatasetItemsPage: + ) -> IterablePageOfDatasetItemsAsync: """List the items of the dataset. + The returned page also supports iteration: `async for item in client.list_items(...)` yields individual + items and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/datasets/item-collection/get-items Args: @@ -849,45 +873,64 @@ async def list_items( flatten: A list of fields that should be flattened. view: Name of the dataset view to be used. signature: Signature used to access the items. + chunk_size: Maximum number of items requested per API call when iterating. Only relevant when + iterating across pages. timeout: Timeout for the API HTTP request. Returns: A page of the list of dataset items according to the specified filters. """ - request_params = self._build_params( - offset=offset, - limit=limit, - desc=desc, - clean=clean, - fields=fields, - omit=omit, - unwind=unwind, - skipEmpty=skip_empty, - skipHidden=skip_hidden, - flatten=flatten, - view=view, - signature=signature, - ) - response = await self._http_client.call( - url=self._build_url('items'), - method='GET', - params=request_params, - timeout=timeout, + async def _fetch_page( + *, + offset: int | None = None, + limit: int | None = None, + ) -> PageOfDatasetItems: + request_params = self._build_params( + offset=offset, + limit=limit, + desc=desc, + clean=clean, + fields=fields, + omit=omit, + unwind=unwind, + skipEmpty=skip_empty, + skipHidden=skip_hidden, + flatten=flatten, + view=view, + signature=signature, + ) + + response = await self._http_client.call( + url=self._build_url('items'), + method='GET', + params=request_params, + timeout=timeout, + ) + + # When using signature, API returns items as list directly + items = response_to_list(response) + + return PageOfDatasetItems( + items=items, + total=int(response.headers['x-apify-pagination-total']), + offset=int(response.headers['x-apify-pagination-offset']), + # x-apify-pagination-count returns count of processed items, not count of returned items + # This makes difference when items were filtered using hidden/empty + count=max(int(response.headers['x-apify-pagination-count']), len(items)), + # API returns 999999999999 when no limit is used + limit=int(response.headers['x-apify-pagination-limit']), + desc=response.headers['x-apify-pagination-desc'].lower() == 'true', + ) + + fetch_first_page = _LazyTask(_fetch_page(offset=offset, limit=limit)) + get_async_iterator = build_get_iterator_async( + _fetch_page, fetch_first_page, offset=offset, limit=limit, chunk_size=chunk_size ) - # When using signature, API returns items as list directly - items = response_to_list(response) - - return DatasetItemsPage( - items=items, - total=int(response.headers['x-apify-pagination-total']), - offset=int(response.headers['x-apify-pagination-offset']), - # x-apify-pagination-count returns invalid values when hidden/empty items are skipped - count=len(items), - # API returns 999999999999 when no limit is used - limit=int(response.headers['x-apify-pagination-limit']), - desc=response.headers['x-apify-pagination-desc'].lower() == 'true', + return IterablePageOfDatasetItemsAsync( + _awaitable_first_page=fetch_first_page, + _get_async_iterator=get_async_iterator, ) async def iterate_items( @@ -904,9 +947,11 @@ async def iterate_items( skip_hidden: bool | None = None, signature: str | None = None, timeout: Timeout = 'long', - ) -> AsyncIterator[dict]: + ) -> AsyncIterator[dict[str, Any]]: """Iterate over the items in the dataset. + Deprecated: iterate the return value of `DatasetClientAsync.list_items()` instead. + https://docs.apify.com/api/v2#/reference/datasets/item-collection/get-items Args: @@ -939,43 +984,27 @@ async def iterate_items( Yields: An item from the dataset. """ - cache_size = 1000 - - should_finish = False - read_items = 0 - - # We can't rely on DatasetItemsPage.total because that is updated with a delay, - # so if you try to read the dataset items right after a run finishes, you could miss some. - # Instead, we just read and read until we reach the limit, or until there are no more items to read. - while not should_finish: - effective_limit = cache_size - if limit is not None: - if read_items == limit: - break - effective_limit = min(cache_size, limit - read_items) - - current_items_page = await self.list_items( - offset=offset + read_items, - limit=effective_limit, - clean=clean, - desc=desc, - fields=fields, - omit=omit, - unwind=unwind, - skip_empty=skip_empty, - skip_hidden=skip_hidden, - signature=signature, - timeout=timeout, - ) - - for item in current_items_page.items: - yield item - - current_page_item_count = len(current_items_page.items) - read_items += current_page_item_count - - if current_page_item_count < cache_size: - should_finish = True + warnings.warn( + '`DatasetClientAsync.iterate_items()` is deprecated, iterate the return value of ' + '`DatasetClientAsync.list_items()` instead.', + DeprecationWarning, + stacklevel=2, + ) + async for item in self.list_items( + offset=offset, + limit=limit, + clean=clean, + desc=desc, + fields=fields, + omit=omit, + unwind=unwind, + skip_empty=skip_empty, + skip_hidden=skip_hidden, + signature=signature, + chunk_size=1000, + timeout=timeout, + ): + yield item async def get_items_as_bytes( self, diff --git a/src/apify_client/_resource_clients/dataset_collection.py b/src/apify_client/_resource_clients/dataset_collection.py index 2ffb71d6..f9473e67 100644 --- a/src/apify_client/_resource_clients/dataset_collection.py +++ b/src/apify_client/_resource_clients/dataset_collection.py @@ -6,13 +6,23 @@ from apify_client._models_generated import ( Dataset, DatasetResponse, - ListOfDatasets, ListOfDatasetsResponse, StorageOwnership, ) +from apify_client._pagination import ( + _LazyTask, + build_get_iterator, + build_get_iterator_async, +) +from apify_client._pagination_classes import ( + IterablePageOfDatasets, + IterablePageOfDatasetsAsync, + PageOfItems, +) from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync if TYPE_CHECKING: + from apify_client._models_generated import DatasetListItem from apify_client._types import Timeout @@ -44,9 +54,12 @@ def list( desc: bool | None = None, ownership: StorageOwnership | None = None, timeout: Timeout = 'medium', - ) -> ListOfDatasets: + ) -> IterablePageOfDatasets: """List the available datasets. + The returned page also supports iteration: `for item in client.list(...)` yields individual datasets + and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/datasets/dataset-collection/get-list-of-datasets Args: @@ -61,10 +74,31 @@ def list( Returns: The list of available datasets matching the specified filters. """ - result = self._list( - timeout=timeout, unnamed=unnamed, limit=limit, offset=offset, desc=desc, ownership=ownership + + def _callback(**kwargs: Any) -> PageOfItems[DatasetListItem]: + result = self._list(timeout=timeout, unnamed=unnamed, ownership=ownership, **kwargs) + data = ListOfDatasetsResponse.model_validate(result).data + return PageOfItems( + items=data.items, + count=data.count, + limit=data.limit, + total=data.total, + offset=data.offset, + desc=data.desc, + ) + + first_page = _callback(limit=limit, offset=offset, desc=desc) + get_iterator = build_get_iterator(_callback, first_page, limit=limit, offset=offset, desc=desc) + + return IterablePageOfDatasets( + _get_iterator=get_iterator, + items=first_page.items, + count=first_page.count, + limit=first_page.limit, + total=first_page.total, + offset=first_page.offset, + desc=first_page.desc, ) - return ListOfDatasetsResponse.model_validate(result).data def get_or_create( self, @@ -108,7 +142,7 @@ def __init__( **kwargs, ) - async def list( + def list( self, *, unnamed: bool | None = None, @@ -117,9 +151,12 @@ async def list( desc: bool | None = None, ownership: StorageOwnership | None = None, timeout: Timeout = 'medium', - ) -> ListOfDatasets: + ) -> IterablePageOfDatasetsAsync: """List the available datasets. + The returned page also supports iteration: `async for item in client.list(...)` yields individual datasets + and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/datasets/dataset-collection/get-list-of-datasets Args: @@ -134,10 +171,28 @@ async def list( Returns: The list of available datasets matching the specified filters. """ - result = await self._list( - timeout=timeout, unnamed=unnamed, limit=limit, offset=offset, desc=desc, ownership=ownership + + async def _callback(**kwargs: Any) -> PageOfItems[DatasetListItem]: + result = await self._list(timeout=timeout, unnamed=unnamed, ownership=ownership, **kwargs) + data = ListOfDatasetsResponse.model_validate(result).data + return PageOfItems( + items=data.items, + count=data.count, + limit=data.limit, + total=data.total, + offset=data.offset, + desc=data.desc, + ) + + fetch_first_page = _LazyTask(_callback(limit=limit, offset=offset, desc=desc)) + get_async_iterator = build_get_iterator_async( + _callback, fetch_first_page, limit=limit, offset=offset, desc=desc + ) + + return IterablePageOfDatasetsAsync( + _awaitable_first_page=fetch_first_page, + _get_async_iterator=get_async_iterator, ) - return ListOfDatasetsResponse.model_validate(result).data async def get_or_create( self, diff --git a/src/apify_client/_resource_clients/key_value_store.py b/src/apify_client/_resource_clients/key_value_store.py index 247073c4..a9488ad7 100644 --- a/src/apify_client/_resource_clients/key_value_store.py +++ b/src/apify_client/_resource_clients/key_value_store.py @@ -1,6 +1,7 @@ from __future__ import annotations import re +import warnings from contextlib import asynccontextmanager, contextmanager from http import HTTPStatus from typing import TYPE_CHECKING, Any @@ -9,11 +10,20 @@ from apify_client._docs import docs_group from apify_client._models_generated import ( KeyValueStore, - KeyValueStoreKey, KeyValueStoreResponse, - ListOfKeys, ListOfKeysResponse, ) +from apify_client._pagination import ( + _LazyTask, + _min_for_limit_param, + build_get_cursor_iterator, + build_get_cursor_iterator_async, +) +from apify_client._pagination_classes import ( + IterablePageOfKeys, + IterablePageOfKeysAsync, + PageOfKeys, +) from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync from apify_client._utils import ( catch_not_found_or_throw, @@ -29,7 +39,7 @@ from datetime import timedelta from apify_client._http_clients import HttpResponse - from apify_client._models_generated import GeneralAccess + from apify_client._models_generated import GeneralAccess, KeyValueStoreKey from apify_client._types import Timeout @@ -144,92 +154,122 @@ def list_keys( collection: str | None = None, prefix: str | None = None, signature: str | None = None, + chunk_size: int | None = None, timeout: Timeout = 'medium', - ) -> ListOfKeys: + ) -> IterablePageOfKeys: """List the keys in the key-value store. + The returned page also supports iteration: `for key in client.list_keys(...)` yields individual + keys and transparently fetches further pages using cursor-based pagination. + https://docs.apify.com/api/v2#/reference/key-value-stores/key-collection/get-list-of-keys Args: - limit: Number of keys to be returned. Maximum value is 1000. + limit: Total number of keys to yield across all pages when iterating. The API caps each + individual request at 1000 keys; use `chunk_size` to control the per-request size. exclusive_start_key: All keys up to this one (including) are skipped from the result. collection: The name of the collection in store schema to list keys from. prefix: The prefix of the keys to be listed. signature: Signature used to access the items. + chunk_size: Maximum number of keys requested per API call when iterating. Capped at + 1000 by the API. Only relevant when iterating across pages. timeout: Timeout for the API HTTP request. Returns: The list of keys in the key-value store matching the given arguments. """ - request_params = self._build_params( + + def _callback(*, limit: int | None = None, exclusive_start_key: str | None = None) -> PageOfKeys: + request_params = self._build_params( + limit=limit, + exclusiveStartKey=exclusive_start_key, + collection=collection, + prefix=prefix, + signature=signature, + ) + response = self._http_client.call( + url=self._build_url('keys'), + method='GET', + params=request_params, + timeout=timeout, + ) + result = response_to_dict(response) + data = ListOfKeysResponse.model_validate(result).data + return PageOfKeys( + items=data.items, + count=data.count, + limit=data.limit, + is_truncated=data.is_truncated, + exclusive_start_key=data.exclusive_start_key, + next_exclusive_start_key=data.next_exclusive_start_key, + ) + + first_limit = _min_for_limit_param(limit, chunk_size) + first_page = _callback(limit=first_limit, exclusive_start_key=exclusive_start_key) + get_iterator = build_get_cursor_iterator( + _callback, + first_page, + cursor_param='exclusive_start_key', limit=limit, - exclusiveStartKey=exclusive_start_key, - collection=collection, - prefix=prefix, - signature=signature, + chunk_size=chunk_size, ) - response = self._http_client.call( - url=self._build_url('keys'), - method='GET', - params=request_params, - timeout=timeout, + return IterablePageOfKeys( + _get_iterator=get_iterator, + items=first_page.items, + count=first_page.count, + limit=first_page.limit, + is_truncated=first_page.is_truncated, + exclusive_start_key=first_page.exclusive_start_key, + next_exclusive_start_key=first_page.next_exclusive_start_key, ) - result = response_to_dict(response) - return ListOfKeysResponse.model_validate(result).data - def iterate_keys( self, *, limit: int | None = None, + exclusive_start_key: str | None = None, collection: str | None = None, prefix: str | None = None, signature: str | None = None, + chunk_size: int | None = 1000, timeout: Timeout = 'long', ) -> Iterator[KeyValueStoreKey]: """Iterate over the keys in the key-value store. + Deprecated: iterate the return value of `KeyValueStoreClient.list_keys()` instead. + https://docs.apify.com/api/v2#/reference/key-value-stores/key-collection/get-list-of-keys Args: - limit: Maximum number of keys to return. By default there is no limit. + limit: Total number of keys to yield across all pages. The API caps each individual + request at 1000 keys; use `chunk_size` to control the per-request size. + exclusive_start_key: All keys up to this one (including) are skipped from the result. collection: The name of the collection in store schema to list keys from. prefix: The prefix of the keys to be listed. signature: Signature used to access the items. + chunk_size: Maximum number of keys requested per API call when iterating. Capped at + 1000 by the API. Only relevant when iterating across pages. timeout: Timeout for the API HTTP request. Yields: A key from the key-value store. """ - cache_size = 1000 - read_keys = 0 - exclusive_start_key: str | None = None - - while True: - effective_limit = cache_size - if limit is not None: - if read_keys == limit: - break - effective_limit = min(cache_size, limit - read_keys) - - current_keys_page = self.list_keys( - limit=effective_limit, - exclusive_start_key=exclusive_start_key, - collection=collection, - prefix=prefix, - signature=signature, - timeout=timeout, - ) - - yield from current_keys_page.items - - read_keys += len(current_keys_page.items) - - if not current_keys_page.is_truncated: - break - - exclusive_start_key = current_keys_page.next_exclusive_start_key + warnings.warn( + '`KeyValueStoreClient.iterate_keys()` is deprecated, iterate the return value of ' + '`KeyValueStoreClient.list_keys()` instead.', + DeprecationWarning, + stacklevel=2, + ) + yield from self.list_keys( + limit=limit, + exclusive_start_key=exclusive_start_key, + collection=collection, + prefix=prefix, + signature=signature, + chunk_size=chunk_size, + timeout=timeout, + ) def get_record(self, key: str, *, signature: str | None = None, timeout: Timeout = 'long') -> dict | None: """Retrieve the given record from the key-value store. @@ -461,7 +501,7 @@ def create_keys_public_url( Any other options (like `limit` or `prefix`) will be included as query parameters in the URL. Args: - limit: Number of keys to be returned. Maximum value is 1000. + limit: Number of keys to be returned by the signed request. Maximum value is 1000. exclusive_start_key: All keys up to this one (including) are skipped from the result. collection: The name of the collection in store schema to list keys from. prefix: The prefix of the keys to be listed. @@ -566,7 +606,7 @@ async def delete(self, *, timeout: Timeout = 'short') -> None: """ await self._delete(timeout=timeout) - async def list_keys( + def list_keys( self, *, limit: int | None = None, @@ -574,93 +614,118 @@ async def list_keys( collection: str | None = None, prefix: str | None = None, signature: str | None = None, + chunk_size: int | None = None, timeout: Timeout = 'medium', - ) -> ListOfKeys: + ) -> IterablePageOfKeysAsync: """List the keys in the key-value store. + The returned page also supports iteration: `async for key in client.list_keys(...)` yields individual + keys and transparently fetches further pages using cursor-based pagination. + https://docs.apify.com/api/v2#/reference/key-value-stores/key-collection/get-list-of-keys Args: - limit: Number of keys to be returned. Maximum value is 1000. + limit: Total number of keys to yield across all pages when iterating. The API caps each + individual request at 1000 keys; use `chunk_size` to control the per-request size. exclusive_start_key: All keys up to this one (including) are skipped from the result. collection: The name of the collection in store schema to list keys from. prefix: The prefix of the keys to be listed. signature: Signature used to access the items. + chunk_size: Maximum number of keys requested per API call when iterating. Capped at + 1000 by the API. Only relevant when iterating across pages. timeout: Timeout for the API HTTP request. Returns: The list of keys in the key-value store matching the given arguments. """ - request_params = self._build_params( + + async def _callback(*, limit: int | None = None, exclusive_start_key: str | None = None) -> PageOfKeys: + request_params = self._build_params( + limit=limit, + exclusiveStartKey=exclusive_start_key, + collection=collection, + prefix=prefix, + signature=signature, + ) + response = await self._http_client.call( + url=self._build_url('keys'), + method='GET', + params=request_params, + timeout=timeout, + ) + result = response_to_dict(response) + data = ListOfKeysResponse.model_validate(result).data + return PageOfKeys( + items=data.items, + count=data.count, + limit=data.limit, + is_truncated=data.is_truncated, + exclusive_start_key=data.exclusive_start_key, + next_exclusive_start_key=data.next_exclusive_start_key, + ) + + first_limit = _min_for_limit_param(limit, chunk_size) + fetch_first_page = _LazyTask(_callback(limit=first_limit, exclusive_start_key=exclusive_start_key)) + get_async_iterator = build_get_cursor_iterator_async( + _callback, + fetch_first_page, + cursor_param='exclusive_start_key', limit=limit, - exclusiveStartKey=exclusive_start_key, - collection=collection, - prefix=prefix, - signature=signature, + chunk_size=chunk_size, ) - response = await self._http_client.call( - url=self._build_url('keys'), - method='GET', - params=request_params, - timeout=timeout, + return IterablePageOfKeysAsync( + _awaitable_first_page=fetch_first_page, + _get_async_iterator=get_async_iterator, ) - result = response_to_dict(response) - return ListOfKeysResponse.model_validate(result).data - async def iterate_keys( self, *, limit: int | None = None, + exclusive_start_key: str | None = None, collection: str | None = None, prefix: str | None = None, signature: str | None = None, + chunk_size: int | None = 1000, timeout: Timeout = 'long', ) -> AsyncIterator[KeyValueStoreKey]: """Iterate over the keys in the key-value store. + Deprecated: iterate the return value of `KeyValueStoreClientAsync.list_keys()` instead. + https://docs.apify.com/api/v2#/reference/key-value-stores/key-collection/get-list-of-keys Args: - limit: Maximum number of keys to return. By default there is no limit. + limit: Total number of keys to yield across all pages. The API caps each individual + request at 1000 keys; use `chunk_size` to control the per-request size. + exclusive_start_key: All keys up to this one (including) are skipped from the result. collection: The name of the collection in store schema to list keys from. prefix: The prefix of the keys to be listed. signature: Signature used to access the items. + chunk_size: Maximum number of keys requested per API call when iterating. Capped at + 1000 by the API. Only relevant when iterating across pages. timeout: Timeout for the API HTTP request. Yields: A key from the key-value store. """ - cache_size = 1000 - read_keys = 0 - exclusive_start_key: str | None = None - - while True: - effective_limit = cache_size - if limit is not None: - if read_keys == limit: - break - effective_limit = min(cache_size, limit - read_keys) - - current_keys_page = await self.list_keys( - limit=effective_limit, - exclusive_start_key=exclusive_start_key, - collection=collection, - prefix=prefix, - signature=signature, - timeout=timeout, - ) - - for key in current_keys_page.items: - yield key - - read_keys += len(current_keys_page.items) - - if not current_keys_page.is_truncated: - break - - exclusive_start_key = current_keys_page.next_exclusive_start_key + warnings.warn( + '`KeyValueStoreClientAsync.iterate_keys()` is deprecated, iterate the return value of ' + '`KeyValueStoreClientAsync.list_keys()` instead.', + DeprecationWarning, + stacklevel=2, + ) + async for key in self.list_keys( + limit=limit, + exclusive_start_key=exclusive_start_key, + collection=collection, + prefix=prefix, + signature=signature, + chunk_size=chunk_size, + timeout=timeout, + ): + yield key async def get_record(self, key: str, *, signature: str | None = None, timeout: Timeout = 'long') -> dict | None: """Retrieve the given record from the key-value store. @@ -894,7 +959,7 @@ async def create_keys_public_url( Any other options (like `limit` or `prefix`) will be included as query parameters in the URL. Args: - limit: Number of keys to be returned. Maximum value is 1000. + limit: Number of keys to be returned by the signed request. Maximum value is 1000. exclusive_start_key: All keys up to this one (including) are skipped from the result. collection: The name of the collection in store schema to list keys from. prefix: The prefix of the keys to be listed. diff --git a/src/apify_client/_resource_clients/key_value_store_collection.py b/src/apify_client/_resource_clients/key_value_store_collection.py index f221a192..8974edac 100644 --- a/src/apify_client/_resource_clients/key_value_store_collection.py +++ b/src/apify_client/_resource_clients/key_value_store_collection.py @@ -6,10 +6,19 @@ from apify_client._models_generated import ( KeyValueStore, KeyValueStoreResponse, - ListOfKeyValueStores, ListOfKeyValueStoresResponse, StorageOwnership, ) +from apify_client._pagination import ( + _LazyTask, + build_get_iterator, + build_get_iterator_async, +) +from apify_client._pagination_classes import ( + IterablePageOfKeyValueStores, + IterablePageOfKeyValueStoresAsync, + PageOfItems, +) from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync if TYPE_CHECKING: @@ -44,9 +53,12 @@ def list( desc: bool | None = None, ownership: StorageOwnership | None = None, timeout: Timeout = 'medium', - ) -> ListOfKeyValueStores: + ) -> IterablePageOfKeyValueStores: """List the available key-value stores. + The returned page also supports iteration: `for item in client.list(...)` yields individual + key-value stores and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/key-value-stores/store-collection/get-list-of-key-value-stores Args: @@ -61,10 +73,31 @@ def list( Returns: The list of available key-value stores matching the specified filters. """ - result = self._list( - timeout=timeout, unnamed=unnamed, limit=limit, offset=offset, desc=desc, ownership=ownership + + def _callback(**kwargs: Any) -> PageOfItems[KeyValueStore]: + result = self._list(timeout=timeout, unnamed=unnamed, ownership=ownership, **kwargs) + data = ListOfKeyValueStoresResponse.model_validate(result).data + return PageOfItems( + items=data.items, + count=data.count, + limit=data.limit, + total=data.total, + offset=data.offset, + desc=data.desc, + ) + + first_page = _callback(limit=limit, offset=offset, desc=desc) + get_iterator = build_get_iterator(_callback, first_page, limit=limit, offset=offset, desc=desc) + + return IterablePageOfKeyValueStores( + _get_iterator=get_iterator, + items=first_page.items, + count=first_page.count, + limit=first_page.limit, + total=first_page.total, + offset=first_page.offset, + desc=first_page.desc, ) - return ListOfKeyValueStoresResponse.model_validate(result).data def get_or_create( self, @@ -108,7 +141,7 @@ def __init__( **kwargs, ) - async def list( + def list( self, *, unnamed: bool | None = None, @@ -117,9 +150,12 @@ async def list( desc: bool | None = None, ownership: StorageOwnership | None = None, timeout: Timeout = 'medium', - ) -> ListOfKeyValueStores: + ) -> IterablePageOfKeyValueStoresAsync: """List the available key-value stores. + The returned page also supports iteration: `async for item in client.list(...)` yields individual + key-value stores and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/key-value-stores/store-collection/get-list-of-key-value-stores Args: @@ -134,10 +170,28 @@ async def list( Returns: The list of available key-value stores matching the specified filters. """ - result = await self._list( - timeout=timeout, unnamed=unnamed, limit=limit, offset=offset, desc=desc, ownership=ownership + + async def _callback(**kwargs: Any) -> PageOfItems[KeyValueStore]: + result = await self._list(timeout=timeout, unnamed=unnamed, ownership=ownership, **kwargs) + data = ListOfKeyValueStoresResponse.model_validate(result).data + return PageOfItems( + items=data.items, + count=data.count, + limit=data.limit, + total=data.total, + offset=data.offset, + desc=data.desc, + ) + + fetch_first_page = _LazyTask(_callback(limit=limit, offset=offset, desc=desc)) + get_async_iterator = build_get_iterator_async( + _callback, fetch_first_page, limit=limit, offset=offset, desc=desc + ) + + return IterablePageOfKeyValueStoresAsync( + _awaitable_first_page=fetch_first_page, + _get_async_iterator=get_async_iterator, ) - return ListOfKeyValueStoresResponse.model_validate(result).data async def get_or_create( self, diff --git a/src/apify_client/_resource_clients/request_queue.py b/src/apify_client/_resource_clients/request_queue.py index cd00a1cd..d2c7686b 100644 --- a/src/apify_client/_resource_clients/request_queue.py +++ b/src/apify_client/_resource_clients/request_queue.py @@ -20,7 +20,6 @@ BatchDeleteResult, HeadAndLockResponse, HeadResponse, - ListOfRequests, ListOfRequestsResponse, LockedRequestQueueHead, ProlongRequestLockResponse, @@ -35,6 +34,17 @@ UnlockRequestsResponse, UnlockRequestsResult, ) +from apify_client._pagination import ( + _LazyTask, + _min_for_limit_param, + build_get_cursor_iterator, + build_get_cursor_iterator_async, +) +from apify_client._pagination_classes import ( + IterablePageOfRequests, + IterablePageOfRequestsAsync, + PageOfRequests, +) from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync from apify_client._utils import catch_not_found_or_throw, response_to_dict, to_seconds from apify_client.errors import ApifyApiError @@ -500,20 +510,28 @@ def list_requests( *, limit: int | None = None, filter: list[Literal['pending', 'locked']] | None = None, # noqa: A002 - timeout: Timeout = 'medium', cursor: str | None = None, exclusive_start_id: str | None = None, - ) -> ListOfRequests: + chunk_size: int | None = None, + timeout: Timeout = 'medium', + ) -> IterablePageOfRequests: """List requests in the queue. + The returned page also supports iteration: `for request in client.list_requests(...)` yields + individual requests and transparently fetches further pages using the opaque `cursor` + returned by the API. + https://docs.apify.com/api/v2#/reference/request-queues/request-collection/list-requests Args: limit: How many requests to retrieve. filter: List of request states to use as a filter. Multiple values mean union of the given filters. - timeout: Timeout for the API HTTP request. - cursor: A token returned in previous API response, to continue listing next page of requests + cursor: A token returned in a previous API response, to continue listing the next page of requests. exclusive_start_id: (deprecated) All requests up to this one (including) are skipped from the result. + Only applied to the first page fetched; subsequent pages during iteration use `cursor`. + chunk_size: Maximum number of requests requested per API call when iterating. Only + relevant when iterating across pages. + timeout: Timeout for the API HTTP request. """ if exclusive_start_id and cursor: raise ValueError('Cannot use both `exclusive_start_id` and `cursor` for paginating requests.') @@ -525,24 +543,55 @@ def list_requests( stacklevel=2, ) - request_params = self._build_params( + def _callback(*, limit: int | None = None, cursor: str | None = None) -> PageOfRequests: + # `exclusive_start_id` is honored only on the first page (when no cursor has been + # produced by the server yet); subsequent pages rely on the opaque `cursor`. + request_params = self._build_params( + limit=limit, + filter=','.join(filter) if filter else None, + clientKey=self.client_key, + exclusiveStartId=exclusive_start_id if cursor is None else None, + cursor=cursor, + ) + response = self._http_client.call( + url=self._build_url('requests'), + method='GET', + params=request_params, + timeout=timeout, + ) + result = response_to_dict(response) + data = ListOfRequestsResponse.model_validate(result).data + with warnings.catch_warnings(): + # `exclusive_start_id` is deprecated on the API model; reading triggers a warning. + warnings.simplefilter('ignore', DeprecationWarning) + exclusive_start_id_value = data.exclusive_start_id + return PageOfRequests( + items=data.items, + limit=data.limit, + exclusive_start_id=exclusive_start_id_value, + cursor=data.cursor, + next_cursor=data.next_cursor, + ) + + first_limit = _min_for_limit_param(limit, chunk_size) + first_page = _callback(limit=first_limit, cursor=cursor) + get_iterator = build_get_cursor_iterator( + _callback, + first_page, + cursor_param='cursor', limit=limit, - filter=','.join(filter) if filter else None, - clientKey=self.client_key, - exclusiveStartId=exclusive_start_id, - cursor=cursor, + chunk_size=chunk_size, ) - response = self._http_client.call( - url=self._build_url('requests'), - method='GET', - params=request_params, - timeout=timeout, + return IterablePageOfRequests( + _get_iterator=get_iterator, + items=first_page.items, + limit=first_page.limit, + exclusive_start_id=first_page.exclusive_start_id, + cursor=first_page.cursor, + next_cursor=first_page.next_cursor, ) - result = response_to_dict(response) - return ListOfRequestsResponse.model_validate(result).data - def unlock_requests(self: RequestQueueClient, *, timeout: Timeout = 'long') -> UnlockRequestsResult: """Unlock all requests in the queue, which were locked by the same clientKey or from the same Actor run. @@ -1058,25 +1107,33 @@ async def batch_delete_requests( result = response_to_dict(response) return BatchDeleteResponse.model_validate(result).data - async def list_requests( + def list_requests( self, *, limit: int | None = None, filter: list[Literal['pending', 'locked']] | None = None, # noqa: A002 - timeout: Timeout = 'medium', cursor: str | None = None, exclusive_start_id: str | None = None, - ) -> ListOfRequests: + chunk_size: int | None = None, + timeout: Timeout = 'medium', + ) -> IterablePageOfRequestsAsync: """List requests in the queue. + The returned page also supports iteration: `async for request in client.list_requests(...)` yields + individual requests and transparently fetches further pages using the opaque `cursor` + returned by the API. + https://docs.apify.com/api/v2#/reference/request-queues/request-collection/list-requests Args: limit: How many requests to retrieve. filter: List of request states to use as a filter. Multiple values mean union of the given filters. - timeout: Timeout for the API HTTP request. - cursor: A token returned in previous API response, to continue listing next page of requests + cursor: A token returned in a previous API response, to continue listing the next page of requests. exclusive_start_id: (deprecated) All requests up to this one (including) are skipped from the result. + Only applied to the first page fetched; subsequent pages during iteration use `cursor`. + chunk_size: Maximum number of requests requested per API call when iterating. Only + relevant when iterating across pages. + timeout: Timeout for the API HTTP request. """ if exclusive_start_id and cursor: raise ValueError('Cannot use both `exclusive_start_id` and `cursor` for paginating requests.') @@ -1088,24 +1145,51 @@ async def list_requests( stacklevel=2, ) - request_params = self._build_params( + async def _callback(*, limit: int | None = None, cursor: str | None = None) -> PageOfRequests: + # `exclusive_start_id` is honored only on the first page (when no cursor has been + # produced by the server yet); subsequent pages rely on the opaque `cursor`. + request_params = self._build_params( + limit=limit, + filter=','.join(filter) if filter else None, + clientKey=self.client_key, + exclusiveStartId=exclusive_start_id if cursor is None else None, + cursor=cursor, + ) + response = await self._http_client.call( + url=self._build_url('requests'), + method='GET', + params=request_params, + timeout=timeout, + ) + result = response_to_dict(response) + data = ListOfRequestsResponse.model_validate(result).data + with warnings.catch_warnings(): + # `exclusive_start_id` is deprecated on the API model; reading triggers a warning. + warnings.simplefilter('ignore', DeprecationWarning) + exclusive_start_id_value = data.exclusive_start_id + return PageOfRequests( + items=data.items, + limit=data.limit, + exclusive_start_id=exclusive_start_id_value, + cursor=data.cursor, + next_cursor=data.next_cursor, + ) + + first_limit = _min_for_limit_param(limit, chunk_size) + fetch_first_page = _LazyTask(_callback(limit=first_limit, cursor=cursor)) + get_async_iterator = build_get_cursor_iterator_async( + _callback, + fetch_first_page, + cursor_param='cursor', limit=limit, - filter=','.join(filter) if filter else None, - clientKey=self.client_key, - exclusiveStartId=exclusive_start_id, - cursor=cursor, + chunk_size=chunk_size, ) - response = await self._http_client.call( - url=self._build_url('requests'), - method='GET', - params=request_params, - timeout=timeout, + return IterablePageOfRequestsAsync( + _awaitable_first_page=fetch_first_page, + _get_async_iterator=get_async_iterator, ) - result = response_to_dict(response) - return ListOfRequestsResponse.model_validate(result).data - async def unlock_requests( self: RequestQueueClientAsync, *, diff --git a/src/apify_client/_resource_clients/request_queue_collection.py b/src/apify_client/_resource_clients/request_queue_collection.py index 1d06fcbc..38ef8552 100644 --- a/src/apify_client/_resource_clients/request_queue_collection.py +++ b/src/apify_client/_resource_clients/request_queue_collection.py @@ -4,15 +4,25 @@ from apify_client._docs import docs_group from apify_client._models_generated import ( - ListOfRequestQueues, ListOfRequestQueuesResponse, RequestQueue, RequestQueueResponse, StorageOwnership, ) +from apify_client._pagination import ( + _LazyTask, + build_get_iterator, + build_get_iterator_async, +) +from apify_client._pagination_classes import ( + IterablePageOfRequestQueues, + IterablePageOfRequestQueuesAsync, + PageOfItems, +) from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync if TYPE_CHECKING: + from apify_client._models_generated import RequestQueueShort from apify_client._types import Timeout @@ -44,9 +54,12 @@ def list( desc: bool | None = None, ownership: StorageOwnership | None = None, timeout: Timeout = 'medium', - ) -> ListOfRequestQueues: + ) -> IterablePageOfRequestQueues: """List the available request queues. + The returned page also supports iteration: `for item in client.list(...)` yields individual + request queues and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/request-queues/queue-collection/get-list-of-request-queues Args: @@ -61,10 +74,31 @@ def list( Returns: The list of available request queues matching the specified filters. """ - result = self._list( - timeout=timeout, unnamed=unnamed, limit=limit, offset=offset, desc=desc, ownership=ownership + + def _callback(**kwargs: Any) -> PageOfItems[RequestQueueShort]: + result = self._list(timeout=timeout, unnamed=unnamed, ownership=ownership, **kwargs) + data = ListOfRequestQueuesResponse.model_validate(result).data + return PageOfItems( + items=data.items, + count=data.count, + limit=data.limit, + total=data.total, + offset=data.offset, + desc=data.desc, + ) + + first_page = _callback(limit=limit, offset=offset, desc=desc) + get_iterator = build_get_iterator(_callback, first_page, limit=limit, offset=offset, desc=desc) + + return IterablePageOfRequestQueues( + _get_iterator=get_iterator, + items=first_page.items, + count=first_page.count, + limit=first_page.limit, + total=first_page.total, + offset=first_page.offset, + desc=first_page.desc, ) - return ListOfRequestQueuesResponse.model_validate(result).data def get_or_create( self, @@ -106,7 +140,7 @@ def __init__( **kwargs, ) - async def list( + def list( self, *, unnamed: bool | None = None, @@ -115,9 +149,12 @@ async def list( desc: bool | None = None, ownership: StorageOwnership | None = None, timeout: Timeout = 'medium', - ) -> ListOfRequestQueues: + ) -> IterablePageOfRequestQueuesAsync: """List the available request queues. + The returned page also supports iteration: `async for item in client.list(...)` yields individual + request queues and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/request-queues/queue-collection/get-list-of-request-queues Args: @@ -132,10 +169,28 @@ async def list( Returns: The list of available request queues matching the specified filters. """ - result = await self._list( - timeout=timeout, unnamed=unnamed, limit=limit, offset=offset, desc=desc, ownership=ownership + + async def _callback(**kwargs: Any) -> PageOfItems[RequestQueueShort]: + result = await self._list(timeout=timeout, unnamed=unnamed, ownership=ownership, **kwargs) + data = ListOfRequestQueuesResponse.model_validate(result).data + return PageOfItems( + items=data.items, + count=data.count, + limit=data.limit, + total=data.total, + offset=data.offset, + desc=data.desc, + ) + + fetch_first_page = _LazyTask(_callback(limit=limit, offset=offset, desc=desc)) + get_async_iterator = build_get_iterator_async( + _callback, fetch_first_page, limit=limit, offset=offset, desc=desc + ) + + return IterablePageOfRequestQueuesAsync( + _awaitable_first_page=fetch_first_page, + _get_async_iterator=get_async_iterator, ) - return ListOfRequestQueuesResponse.model_validate(result).data async def get_or_create( self, diff --git a/src/apify_client/_resource_clients/run_collection.py b/src/apify_client/_resource_clients/run_collection.py index b63b3fc9..17f51002 100644 --- a/src/apify_client/_resource_clients/run_collection.py +++ b/src/apify_client/_resource_clients/run_collection.py @@ -3,13 +3,23 @@ from typing import TYPE_CHECKING, Any from apify_client._docs import docs_group -from apify_client._models_generated import ListOfRuns, ListOfRunsResponse +from apify_client._models_generated import ListOfRunsResponse +from apify_client._pagination import ( + _LazyTask, + build_get_iterator, + build_get_iterator_async, +) +from apify_client._pagination_classes import ( + IterablePageOfRuns, + IterablePageOfRunsAsync, + PageOfItems, +) from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync if TYPE_CHECKING: from datetime import datetime - from apify_client._models_generated import ActorJobStatus + from apify_client._models_generated import ActorJobStatus, RunShort from apify_client._types import Timeout @@ -42,12 +52,15 @@ def list( started_before: str | datetime | None = None, started_after: str | datetime | None = None, timeout: Timeout = 'medium', - ) -> ListOfRuns: + ) -> IterablePageOfRuns: """List all Actor runs. List all Actor runs, either of a single Actor, or all user's Actors, depending on where this client was initialized from. + The returned page also supports iteration: `for item in client.list(...)` yields individual runs + and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/actors/run-collection/get-list-of-runs https://docs.apify.com/api/v2#/reference/actor-runs/run-collection/get-user-runs-list @@ -65,16 +78,36 @@ def list( """ status_param = list(status) if isinstance(status, list) else status - result = self._list( - timeout=timeout, - limit=limit, - offset=offset, - desc=desc, - status=status_param, - startedBefore=started_before, - startedAfter=started_after, + def _callback(**kwargs: Any) -> PageOfItems[RunShort]: + result = self._list( + timeout=timeout, + status=status_param, + startedBefore=started_before, + startedAfter=started_after, + **kwargs, + ) + data = ListOfRunsResponse.model_validate(result).data + return PageOfItems( + items=data.items, + count=data.count, + limit=data.limit, + total=data.total, + offset=data.offset, + desc=data.desc, + ) + + first_page = _callback(limit=limit, offset=offset, desc=desc) + get_iterator = build_get_iterator(_callback, first_page, limit=limit, offset=offset, desc=desc) + + return IterablePageOfRuns( + _get_iterator=get_iterator, + items=first_page.items, + count=first_page.count, + limit=first_page.limit, + total=first_page.total, + offset=first_page.offset, + desc=first_page.desc, ) - return ListOfRunsResponse.model_validate(result).data @docs_group('Resource clients') @@ -96,7 +129,7 @@ def __init__( **kwargs, ) - async def list( + def list( self, *, limit: int | None = None, @@ -106,12 +139,15 @@ async def list( started_before: str | datetime | None = None, started_after: str | datetime | None = None, timeout: Timeout = 'medium', - ) -> ListOfRuns: + ) -> IterablePageOfRunsAsync: """List all Actor runs. List all Actor runs, either of a single Actor, or all user's Actors, depending on where this client was initialized from. + The returned page also supports iteration: `async for item in client.list(...)` yields individual runs + and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/actors/run-collection/get-list-of-runs https://docs.apify.com/api/v2#/reference/actor-runs/run-collection/get-user-runs-list @@ -129,13 +165,30 @@ async def list( """ status_param = list(status) if isinstance(status, list) else status - result = await self._list( - timeout=timeout, - limit=limit, - offset=offset, - desc=desc, - status=status_param, - startedBefore=started_before, - startedAfter=started_after, + async def _callback(**kwargs: Any) -> PageOfItems[RunShort]: + result = await self._list( + timeout=timeout, + status=status_param, + startedBefore=started_before, + startedAfter=started_after, + **kwargs, + ) + data = ListOfRunsResponse.model_validate(result).data + return PageOfItems( + items=data.items, + count=data.count, + limit=data.limit, + total=data.total, + offset=data.offset, + desc=data.desc, + ) + + fetch_first_page = _LazyTask(_callback(limit=limit, offset=offset, desc=desc)) + get_async_iterator = build_get_iterator_async( + _callback, fetch_first_page, limit=limit, offset=offset, desc=desc + ) + + return IterablePageOfRunsAsync( + _awaitable_first_page=fetch_first_page, + _get_async_iterator=get_async_iterator, ) - return ListOfRunsResponse.model_validate(result).data diff --git a/src/apify_client/_resource_clients/schedule_collection.py b/src/apify_client/_resource_clients/schedule_collection.py index 1421d257..6b1764f0 100644 --- a/src/apify_client/_resource_clients/schedule_collection.py +++ b/src/apify_client/_resource_clients/schedule_collection.py @@ -4,15 +4,25 @@ from apify_client._docs import docs_group from apify_client._models_generated import ( - ListOfSchedules, ListOfSchedulesResponse, Schedule, ScheduleCreate, ScheduleResponse, ) +from apify_client._pagination import ( + _LazyTask, + build_get_iterator, + build_get_iterator_async, +) +from apify_client._pagination_classes import ( + IterablePageOfSchedules, + IterablePageOfSchedulesAsync, + PageOfItems, +) from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync if TYPE_CHECKING: + from apify_client._models_generated import ScheduleShort from apify_client._types import Timeout @@ -42,9 +52,12 @@ def list( offset: int | None = None, desc: bool | None = None, timeout: Timeout = 'medium', - ) -> ListOfSchedules: + ) -> IterablePageOfSchedules: """List the available schedules. + The returned page also supports iteration: `for item in client.list(...)` yields individual + schedules and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/schedules/schedules-collection/get-list-of-schedules Args: @@ -56,8 +69,31 @@ def list( Returns: The list of available schedules matching the specified filters. """ - result = self._list(timeout=timeout, limit=limit, offset=offset, desc=desc) - return ListOfSchedulesResponse.model_validate(result).data + + def _callback(**kwargs: Any) -> PageOfItems[ScheduleShort]: + result = self._list(timeout=timeout, **kwargs) + data = ListOfSchedulesResponse.model_validate(result).data + return PageOfItems( + items=data.items, + count=data.count, + limit=data.limit, + total=data.total, + offset=data.offset, + desc=data.desc, + ) + + first_page = _callback(limit=limit, offset=offset, desc=desc) + get_iterator = build_get_iterator(_callback, first_page, limit=limit, offset=offset, desc=desc) + + return IterablePageOfSchedules( + _get_iterator=get_iterator, + items=first_page.items, + count=first_page.count, + limit=first_page.limit, + total=first_page.total, + offset=first_page.offset, + desc=first_page.desc, + ) def create( self, @@ -128,16 +164,19 @@ def __init__( **kwargs, ) - async def list( + def list( self, *, limit: int | None = None, offset: int | None = None, desc: bool | None = None, timeout: Timeout = 'medium', - ) -> ListOfSchedules: + ) -> IterablePageOfSchedulesAsync: """List the available schedules. + The returned page also supports iteration: `async for item in client.list(...)` yields individual + schedules and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/schedules/schedules-collection/get-list-of-schedules Args: @@ -149,8 +188,28 @@ async def list( Returns: The list of available schedules matching the specified filters. """ - result = await self._list(timeout=timeout, limit=limit, offset=offset, desc=desc) - return ListOfSchedulesResponse.model_validate(result).data + + async def _callback(**kwargs: Any) -> PageOfItems[ScheduleShort]: + result = await self._list(timeout=timeout, **kwargs) + data = ListOfSchedulesResponse.model_validate(result).data + return PageOfItems( + items=data.items, + count=data.count, + limit=data.limit, + total=data.total, + offset=data.offset, + desc=data.desc, + ) + + fetch_first_page = _LazyTask(_callback(limit=limit, offset=offset, desc=desc)) + get_async_iterator = build_get_iterator_async( + _callback, fetch_first_page, limit=limit, offset=offset, desc=desc + ) + + return IterablePageOfSchedulesAsync( + _awaitable_first_page=fetch_first_page, + _get_async_iterator=get_async_iterator, + ) async def create( self, diff --git a/src/apify_client/_resource_clients/store_collection.py b/src/apify_client/_resource_clients/store_collection.py index 9c80ad31..6fe021c8 100644 --- a/src/apify_client/_resource_clients/store_collection.py +++ b/src/apify_client/_resource_clients/store_collection.py @@ -3,10 +3,21 @@ from typing import TYPE_CHECKING, Any from apify_client._docs import docs_group -from apify_client._models_generated import ListOfActorsInStoreResponse, ListOfStoreActors +from apify_client._models_generated import ListOfActorsInStoreResponse +from apify_client._pagination import ( + _LazyTask, + build_get_iterator, + build_get_iterator_async, +) +from apify_client._pagination_classes import ( + IterablePageOfStoreActors, + IterablePageOfStoreActorsAsync, + PageOfItems, +) from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync if TYPE_CHECKING: + from apify_client._models_generated import StoreListActor from apify_client._types import Timeout @@ -40,9 +51,12 @@ def list( username: str | None = None, pricing_model: str | None = None, timeout: Timeout = 'medium', - ) -> ListOfStoreActors: + ) -> IterablePageOfStoreActors: """List Actors in Apify store. + The returned page also supports iteration: `for item in client.list(...)` yields individual Actors + from the store and transparently fetches further pages from the API. + https://docs.apify.com/api/v2/#/reference/store/store-actors-collection/get-list-of-actors-in-store Args: @@ -59,17 +73,39 @@ def list( Returns: The list of available Actors matching the specified filters. """ - result = self._list( - timeout=timeout, - limit=limit, - offset=offset, - search=search, - sortBy=sort_by, - category=category, - username=username, - pricingModel=pricing_model, + + def _callback(**kwargs: Any) -> PageOfItems[StoreListActor]: + result = self._list( + timeout=timeout, + search=search, + sortBy=sort_by, + category=category, + username=username, + pricingModel=pricing_model, + **kwargs, + ) + data = ListOfActorsInStoreResponse.model_validate(result).data + return PageOfItems( + items=data.items, + count=data.count, + limit=data.limit, + total=data.total, + offset=data.offset, + desc=data.desc, + ) + + first_page = _callback(limit=limit, offset=offset) + get_iterator = build_get_iterator(_callback, first_page, limit=limit, offset=offset) + + return IterablePageOfStoreActors( + _get_iterator=get_iterator, + items=first_page.items, + count=first_page.count, + limit=first_page.limit, + total=first_page.total, + offset=first_page.offset, + desc=first_page.desc, ) - return ListOfActorsInStoreResponse.model_validate(result).data @docs_group('Resource clients') @@ -91,7 +127,7 @@ def __init__( **kwargs, ) - async def list( + def list( self, *, limit: int | None = None, @@ -102,9 +138,12 @@ async def list( username: str | None = None, pricing_model: str | None = None, timeout: Timeout = 'medium', - ) -> ListOfStoreActors: + ) -> IterablePageOfStoreActorsAsync: """List Actors in Apify store. + The returned page also supports iteration: `async for item in client.list(...)` yields individual Actors + from the store and transparently fetches further pages from the API. + https://docs.apify.com/api/v2/#/reference/store/store-actors-collection/get-list-of-actors-in-store Args: @@ -121,14 +160,31 @@ async def list( Returns: The list of available Actors matching the specified filters. """ - result = await self._list( - timeout=timeout, - limit=limit, - offset=offset, - search=search, - sortBy=sort_by, - category=category, - username=username, - pricingModel=pricing_model, + + async def _callback(**kwargs: Any) -> PageOfItems[StoreListActor]: + result = await self._list( + timeout=timeout, + search=search, + sortBy=sort_by, + category=category, + username=username, + pricingModel=pricing_model, + **kwargs, + ) + data = ListOfActorsInStoreResponse.model_validate(result).data + return PageOfItems( + items=data.items, + count=data.count, + limit=data.limit, + total=data.total, + offset=data.offset, + desc=data.desc, + ) + + fetch_first_page = _LazyTask(_callback(limit=limit, offset=offset)) + get_async_iterator = build_get_iterator_async(_callback, fetch_first_page, limit=limit, offset=offset) + + return IterablePageOfStoreActorsAsync( + _awaitable_first_page=fetch_first_page, + _get_async_iterator=get_async_iterator, ) - return ListOfActorsInStoreResponse.model_validate(result).data diff --git a/src/apify_client/_resource_clients/task_collection.py b/src/apify_client/_resource_clients/task_collection.py index 44c46c9b..9e718b15 100644 --- a/src/apify_client/_resource_clients/task_collection.py +++ b/src/apify_client/_resource_clients/task_collection.py @@ -6,19 +6,29 @@ from apify_client._models_generated import ( ActorStandby, CreateTaskRequest, - ListOfTasks, ListOfTasksResponse, Task, TaskInput, TaskOptions, TaskResponse, ) +from apify_client._pagination import ( + _LazyTask, + build_get_iterator, + build_get_iterator_async, +) +from apify_client._pagination_classes import ( + IterablePageOfTasks, + IterablePageOfTasksAsync, + PageOfItems, +) from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync from apify_client._utils import to_seconds if TYPE_CHECKING: from datetime import timedelta + from apify_client._models_generated import TaskShort from apify_client._typeddicts_generated import TaskInputDict from apify_client._types import Timeout @@ -49,9 +59,12 @@ def list( offset: int | None = None, desc: bool | None = None, timeout: Timeout = 'medium', - ) -> ListOfTasks: + ) -> IterablePageOfTasks: """List the available tasks. + The returned page also supports iteration: `for item in client.list(...)` yields individual tasks + and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/actor-tasks/task-collection/get-list-of-tasks Args: @@ -63,8 +76,31 @@ def list( Returns: The list of available tasks matching the specified filters. """ - result = self._list(timeout=timeout, limit=limit, offset=offset, desc=desc) - return ListOfTasksResponse.model_validate(result).data + + def _callback(**kwargs: Any) -> PageOfItems[TaskShort]: + result = self._list(timeout=timeout, **kwargs) + data = ListOfTasksResponse.model_validate(result).data + return PageOfItems( + items=data.items, + count=data.count, + limit=data.limit, + total=data.total, + offset=data.offset, + desc=data.desc, + ) + + first_page = _callback(limit=limit, offset=offset, desc=desc) + get_iterator = build_get_iterator(_callback, first_page, limit=limit, offset=offset, desc=desc) + + return IterablePageOfTasks( + _get_iterator=get_iterator, + items=first_page.items, + count=first_page.count, + limit=first_page.limit, + total=first_page.total, + offset=first_page.offset, + desc=first_page.desc, + ) def create( self, @@ -163,16 +199,19 @@ def __init__( **kwargs, ) - async def list( + def list( self, *, limit: int | None = None, offset: int | None = None, desc: bool | None = None, timeout: Timeout = 'medium', - ) -> ListOfTasks: + ) -> IterablePageOfTasksAsync: """List the available tasks. + The returned page also supports iteration: `async for item in client.list(...)` yields individual tasks + and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/actor-tasks/task-collection/get-list-of-tasks Args: @@ -184,8 +223,28 @@ async def list( Returns: The list of available tasks matching the specified filters. """ - result = await self._list(timeout=timeout, limit=limit, offset=offset, desc=desc) - return ListOfTasksResponse.model_validate(result).data + + async def _callback(**kwargs: Any) -> PageOfItems[TaskShort]: + result = await self._list(timeout=timeout, **kwargs) + data = ListOfTasksResponse.model_validate(result).data + return PageOfItems( + items=data.items, + count=data.count, + limit=data.limit, + total=data.total, + offset=data.offset, + desc=data.desc, + ) + + fetch_first_page = _LazyTask(_callback(limit=limit, offset=offset, desc=desc)) + get_async_iterator = build_get_iterator_async( + _callback, fetch_first_page, limit=limit, offset=offset, desc=desc + ) + + return IterablePageOfTasksAsync( + _awaitable_first_page=fetch_first_page, + _get_async_iterator=get_async_iterator, + ) async def create( self, diff --git a/src/apify_client/_resource_clients/webhook_collection.py b/src/apify_client/_resource_clients/webhook_collection.py index 12834ce1..c5fa353b 100644 --- a/src/apify_client/_resource_clients/webhook_collection.py +++ b/src/apify_client/_resource_clients/webhook_collection.py @@ -4,16 +4,25 @@ from apify_client._docs import docs_group from apify_client._models_generated import ( - ListOfWebhooks, ListOfWebhooksResponse, WebhookCondition, WebhookCreate, WebhookResponse, ) +from apify_client._pagination import ( + _LazyTask, + build_get_iterator, + build_get_iterator_async, +) +from apify_client._pagination_classes import ( + IterablePageOfWebhooks, + IterablePageOfWebhooksAsync, + PageOfItems, +) from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync if TYPE_CHECKING: - from apify_client._models_generated import Webhook, WebhookEventType + from apify_client._models_generated import Webhook, WebhookEventType, WebhookShort from apify_client._types import Timeout @@ -43,9 +52,12 @@ def list( offset: int | None = None, desc: bool | None = None, timeout: Timeout = 'medium', - ) -> ListOfWebhooks: + ) -> IterablePageOfWebhooks: """List the available webhooks. + The returned page also supports iteration: `for item in client.list(...)` yields individual webhooks + and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/webhooks/webhook-collection/get-list-of-webhooks Args: @@ -57,8 +69,31 @@ def list( Returns: The list of available webhooks matching the specified filters. """ - result = self._list(timeout=timeout, limit=limit, offset=offset, desc=desc) - return ListOfWebhooksResponse.model_validate(result).data + + def _callback(**kwargs: Any) -> PageOfItems[WebhookShort]: + result = self._list(timeout=timeout, **kwargs) + data = ListOfWebhooksResponse.model_validate(result).data + return PageOfItems( + items=data.items, + count=data.count, + limit=data.limit, + total=data.total, + offset=data.offset, + desc=data.desc, + ) + + first_page = _callback(limit=limit, offset=offset, desc=desc) + get_iterator = build_get_iterator(_callback, first_page, limit=limit, offset=offset, desc=desc) + + return IterablePageOfWebhooks( + _get_iterator=get_iterator, + items=first_page.items, + count=first_page.count, + limit=first_page.limit, + total=first_page.total, + offset=first_page.offset, + desc=first_page.desc, + ) def create( self, @@ -139,16 +174,19 @@ def __init__( **kwargs, ) - async def list( + def list( self, *, limit: int | None = None, offset: int | None = None, desc: bool | None = None, timeout: Timeout = 'medium', - ) -> ListOfWebhooks: + ) -> IterablePageOfWebhooksAsync: """List the available webhooks. + The returned page also supports iteration: `async for item in client.list(...)` yields individual webhooks + and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/webhooks/webhook-collection/get-list-of-webhooks Args: @@ -160,8 +198,28 @@ async def list( Returns: The list of available webhooks matching the specified filters. """ - result = await self._list(timeout=timeout, limit=limit, offset=offset, desc=desc) - return ListOfWebhooksResponse.model_validate(result).data + + async def _callback(**kwargs: Any) -> PageOfItems[WebhookShort]: + result = await self._list(timeout=timeout, **kwargs) + data = ListOfWebhooksResponse.model_validate(result).data + return PageOfItems( + items=data.items, + count=data.count, + limit=data.limit, + total=data.total, + offset=data.offset, + desc=data.desc, + ) + + fetch_first_page = _LazyTask(_callback(limit=limit, offset=offset, desc=desc)) + get_async_iterator = build_get_iterator_async( + _callback, fetch_first_page, limit=limit, offset=offset, desc=desc + ) + + return IterablePageOfWebhooksAsync( + _awaitable_first_page=fetch_first_page, + _get_async_iterator=get_async_iterator, + ) async def create( self, diff --git a/src/apify_client/_resource_clients/webhook_dispatch_collection.py b/src/apify_client/_resource_clients/webhook_dispatch_collection.py index b93f59b1..c263629c 100644 --- a/src/apify_client/_resource_clients/webhook_dispatch_collection.py +++ b/src/apify_client/_resource_clients/webhook_dispatch_collection.py @@ -3,10 +3,21 @@ from typing import TYPE_CHECKING, Any from apify_client._docs import docs_group -from apify_client._models_generated import ListOfWebhookDispatches, ListOfWebhookDispatchesResponse +from apify_client._models_generated import ListOfWebhookDispatchesResponse +from apify_client._pagination import ( + _LazyTask, + build_get_iterator, + build_get_iterator_async, +) +from apify_client._pagination_classes import ( + IterablePageOfWebhookDispatches, + IterablePageOfWebhookDispatchesAsync, + PageOfItems, +) from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync if TYPE_CHECKING: + from apify_client._models_generated import WebhookDispatch from apify_client._types import Timeout @@ -36,9 +47,12 @@ def list( offset: int | None = None, desc: bool | None = None, timeout: Timeout = 'medium', - ) -> ListOfWebhookDispatches | None: + ) -> IterablePageOfWebhookDispatches: """List all webhook dispatches of a user. + The returned page also supports iteration: `for item in client.list(...)` yields individual + webhook dispatches and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/webhook-dispatches/webhook-dispatches-collection/get-list-of-webhook-dispatches Args: @@ -50,8 +64,31 @@ def list( Returns: The retrieved webhook dispatches of a user. """ - result = self._list(timeout=timeout, limit=limit, offset=offset, desc=desc) - return ListOfWebhookDispatchesResponse.model_validate(result).data + + def _callback(**kwargs: Any) -> PageOfItems[WebhookDispatch]: + result = self._list(timeout=timeout, **kwargs) + data = ListOfWebhookDispatchesResponse.model_validate(result).data + return PageOfItems( + items=data.items, + count=data.count, + limit=data.limit, + total=data.total, + offset=data.offset, + desc=data.desc, + ) + + first_page = _callback(limit=limit, offset=offset, desc=desc) + get_iterator = build_get_iterator(_callback, first_page, limit=limit, offset=offset, desc=desc) + + return IterablePageOfWebhookDispatches( + _get_iterator=get_iterator, + items=first_page.items, + count=first_page.count, + limit=first_page.limit, + total=first_page.total, + offset=first_page.offset, + desc=first_page.desc, + ) @docs_group('Resource clients') @@ -73,16 +110,19 @@ def __init__( **kwargs, ) - async def list( + def list( self, *, limit: int | None = None, offset: int | None = None, desc: bool | None = None, timeout: Timeout = 'medium', - ) -> ListOfWebhookDispatches | None: + ) -> IterablePageOfWebhookDispatchesAsync: """List all webhook dispatches of a user. + The returned page also supports iteration: `async for item in client.list(...)` yields individual + webhook dispatches and transparently fetches further pages from the API. + https://docs.apify.com/api/v2#/reference/webhook-dispatches/webhook-dispatches-collection/get-list-of-webhook-dispatches Args: @@ -94,5 +134,25 @@ async def list( Returns: The retrieved webhook dispatches of a user. """ - result = await self._list(timeout=timeout, limit=limit, offset=offset, desc=desc) - return ListOfWebhookDispatchesResponse.model_validate(result).data + + async def _callback(**kwargs: Any) -> PageOfItems[WebhookDispatch]: + result = await self._list(timeout=timeout, **kwargs) + data = ListOfWebhookDispatchesResponse.model_validate(result).data + return PageOfItems( + items=data.items, + count=data.count, + limit=data.limit, + total=data.total, + offset=data.offset, + desc=data.desc, + ) + + fetch_first_page = _LazyTask(_callback(limit=limit, offset=offset, desc=desc)) + get_async_iterator = build_get_iterator_async( + _callback, fetch_first_page, limit=limit, offset=offset, desc=desc + ) + + return IterablePageOfWebhookDispatchesAsync( + _awaitable_first_page=fetch_first_page, + _get_async_iterator=get_async_iterator, + ) diff --git a/tests/integration/test_actor.py b/tests/integration/test_actor.py index 4797c664..513bfebf 100644 --- a/tests/integration/test_actor.py +++ b/tests/integration/test_actor.py @@ -5,10 +5,12 @@ from typing import TYPE_CHECKING, cast from ._utils import get_random_resource_name, maybe_await +from apify_client._models_generated import ActorShort +from apify_client._pagination_classes import PageOfItems if TYPE_CHECKING: from apify_client import ApifyClient, ApifyClientAsync - from apify_client._models_generated import Actor, Build, ListOfActors, Run + from apify_client._models_generated import Actor, Build, Run from apify_client._resource_clients import BuildClient, BuildClientAsync @@ -36,36 +38,31 @@ async def test_get_actor_by_full_name(client: ApifyClient | ApifyClientAsync) -> async def test_list_actors_my(client: ApifyClient | ApifyClientAsync) -> None: """Test listing Actors created by the user.""" - result = await maybe_await(client.actors().list(my=True, limit=10)) - actors_page = cast('ListOfActors', result) + actors_page = await maybe_await(client.actors().list(my=True, limit=10)) - assert actors_page is not None - assert actors_page.items is not None - # User may have 0 actors + assert isinstance(actors_page, PageOfItems) assert isinstance(actors_page.items, list) + # User may have 0 actors — only check element type when any were returned. + if actors_page.items: + assert isinstance(actors_page.items[0], ActorShort) async def test_list_actors_pagination(client: ApifyClient | ApifyClientAsync) -> None: """Test listing Actors with pagination parameters.""" - # List all actors (public + owned), should return some results - result = await maybe_await(client.actors().list(limit=5, offset=0)) - actors_page = cast('ListOfActors', result) + actors_page = await maybe_await(client.actors().list(limit=5, offset=0)) - assert actors_page is not None - assert actors_page.items is not None + assert isinstance(actors_page, PageOfItems) assert isinstance(actors_page.items, list) - # Should have at least some actors (public ones exist) - assert len(actors_page.items) >= 0 + assert isinstance(actors_page.items[0], ActorShort) async def test_list_actors_sorting(client: ApifyClient | ApifyClientAsync) -> None: """Test listing Actors with sorting.""" - result = await maybe_await(client.actors().list(limit=10, desc=True, sort_by='created_at')) - actors_page = cast('ListOfActors', result) + actors_page = await maybe_await(client.actors().list(limit=10, desc=True, sort_by='created_at')) - assert actors_page is not None - assert actors_page.items is not None + assert isinstance(actors_page, PageOfItems) assert isinstance(actors_page.items, list) + assert isinstance(actors_page.items[0], ActorShort) async def test_actor_create_update_delete(client: ApifyClient | ApifyClientAsync) -> None: diff --git a/tests/integration/test_actor_env_var.py b/tests/integration/test_actor_env_var.py index e5d9663e..5c2a1e9e 100644 --- a/tests/integration/test_actor_env_var.py +++ b/tests/integration/test_actor_env_var.py @@ -4,9 +4,12 @@ from typing import TYPE_CHECKING, cast +from apify_client._models_generated import EnvVar +from apify_client._pagination_classes import PageOfItemsOnlyTotal + if TYPE_CHECKING: from apify_client import ApifyClient, ApifyClientAsync - from apify_client._models_generated import Actor, EnvVar, ListOfEnvVars + from apify_client._models_generated import Actor from ._utils import get_random_resource_name, maybe_await @@ -49,11 +52,10 @@ async def test_actor_env_var_list(client: ApifyClient | ApifyClientAsync) -> Non try: # List env vars - result = await maybe_await(version_client.env_vars().list()) - env_vars = cast('ListOfEnvVars', result) - - assert env_vars is not None - assert env_vars.items is not None + env_vars = await maybe_await(version_client.env_vars().list()) + assert isinstance(env_vars, PageOfItemsOnlyTotal) + assert isinstance(env_vars.items, list) + assert isinstance(env_vars.items[0], EnvVar) assert len(env_vars.items) >= 1 # Verify env var fields diff --git a/tests/integration/test_actor_version.py b/tests/integration/test_actor_version.py index b8ff31c3..22a83025 100644 --- a/tests/integration/test_actor_version.py +++ b/tests/integration/test_actor_version.py @@ -4,9 +4,12 @@ from typing import TYPE_CHECKING, cast +from apify_client._models_generated import Version +from apify_client._pagination_classes import PageOfItemsOnlyTotal + if TYPE_CHECKING: from apify_client import ApifyClient, ApifyClientAsync - from apify_client._models_generated import Actor, ListOfVersions, Version + from apify_client._models_generated import Actor from ._utils import get_random_resource_name, maybe_await @@ -42,11 +45,11 @@ async def test_actor_version_list(client: ApifyClient | ApifyClientAsync) -> Non try: # List versions - result = await maybe_await(actor_client.versions().list()) - versions = cast('ListOfVersions', result) + versions = await maybe_await(actor_client.versions().list()) - assert versions is not None - assert versions.items is not None + assert isinstance(versions, PageOfItemsOnlyTotal) + assert isinstance(versions.items, list) + assert isinstance(versions.items[0], Version) assert len(versions.items) >= 1 # Verify version fields diff --git a/tests/integration/test_build.py b/tests/integration/test_build.py index ef8ac662..8193b665 100644 --- a/tests/integration/test_build.py +++ b/tests/integration/test_build.py @@ -4,9 +4,12 @@ from typing import TYPE_CHECKING, cast +from apify_client._models_generated import BuildShort +from apify_client._pagination_classes import PageOfItems + if TYPE_CHECKING: from apify_client import ApifyClient, ApifyClientAsync - from apify_client._models_generated import Actor, Build, ListOfBuilds + from apify_client._models_generated import Actor, Build from datetime import timedelta @@ -21,14 +24,12 @@ async def test_build_list_for_actor(client: ApifyClient | ApifyClientAsync) -> N """Test listing builds for a public Actor.""" # Get builds for hello-world actor actor = client.actor(HELLO_WORLD_ACTOR) - result = await maybe_await(actor.builds().list(limit=10)) - builds_page = cast('ListOfBuilds', result) + builds_page = await maybe_await(actor.builds().list(limit=10)) - assert builds_page is not None - assert builds_page.items is not None - assert len(builds_page.items) > 0 # hello-world should have at least one build + assert isinstance(builds_page, PageOfItems) + assert isinstance(builds_page.items, list) + assert isinstance(builds_page.items[0], BuildShort) # hello-world has at least one build - # Verify build structure first_build = builds_page.items[0] assert first_build.id is not None assert first_build.act_id is not None @@ -38,9 +39,11 @@ async def test_build_get(client: ApifyClient | ApifyClientAsync) -> None: """Test getting a specific build.""" # First list builds to get a build ID actor = client.actor(HELLO_WORLD_ACTOR) - result = await maybe_await(actor.builds().list(limit=1)) - builds_page = cast('ListOfBuilds', result) - assert builds_page.items + builds_page = await maybe_await(actor.builds().list(limit=1)) + + assert isinstance(builds_page, PageOfItems) + assert isinstance(builds_page.items, list) + assert isinstance(builds_page.items[0], BuildShort) build_id = builds_page.items[0].id # Get the specific build @@ -56,22 +59,24 @@ async def test_build_get(client: ApifyClient | ApifyClientAsync) -> None: async def test_user_builds_list(client: ApifyClient | ApifyClientAsync) -> None: """Test listing all user builds.""" # List user's builds (may be empty if user has no actors) - result = await maybe_await(client.builds().list(limit=10)) - builds_page = cast('ListOfBuilds', result) + builds_page = await maybe_await(client.builds().list(limit=10)) - assert builds_page is not None - assert builds_page.items is not None - # User may have 0 builds, so we just check the structure + assert isinstance(builds_page, PageOfItems) assert isinstance(builds_page.items, list) + # User may have 0 builds — only check element type when any were returned. + if builds_page.items: + assert isinstance(builds_page.items[0], BuildShort) async def test_build_log(client: ApifyClient | ApifyClientAsync) -> None: """Test getting build log.""" # First list builds to get a completed build ID actor = client.actor(HELLO_WORLD_ACTOR) - result = await maybe_await(actor.builds().list(limit=5)) - builds_page = cast('ListOfBuilds', result) - assert builds_page.items + builds_page = await maybe_await(actor.builds().list(limit=5)) + + assert isinstance(builds_page, PageOfItems) + assert isinstance(builds_page.items, list) + assert isinstance(builds_page.items[0], BuildShort) # Find a completed build (SUCCEEDED status) completed_build = None @@ -96,9 +101,11 @@ async def test_build_wait_for_finish(client: ApifyClient | ApifyClientAsync) -> """Test wait_for_finish on an already completed build.""" # First list builds to get a completed build ID actor = client.actor(HELLO_WORLD_ACTOR) - result = await maybe_await(actor.builds().list(limit=5)) - builds_page = cast('ListOfBuilds', result) - assert builds_page.items + builds_page = await maybe_await(actor.builds().list(limit=5)) + + assert isinstance(builds_page, PageOfItems) + assert isinstance(builds_page.items, list) + assert isinstance(builds_page.items[0], BuildShort) # Find a completed build (SUCCEEDED status) completed_build = None @@ -208,9 +215,11 @@ async def test_build_get_open_api_definition(client: ApifyClient | ApifyClientAs """Test getting OpenAPI definition for a build.""" # Get builds for hello-world actor actor = client.actor(HELLO_WORLD_ACTOR) - result = await maybe_await(actor.builds().list(limit=1)) - builds_page = cast('ListOfBuilds', result) - assert builds_page.items + builds_page = await maybe_await(actor.builds().list(limit=1)) + + assert isinstance(builds_page, PageOfItems) + assert isinstance(builds_page.items, list) + assert isinstance(builds_page.items[0], BuildShort) build_id = builds_page.items[0].id # Get the OpenAPI definition diff --git a/tests/integration/test_dataset.py b/tests/integration/test_dataset.py index 149a504c..8415a095 100644 --- a/tests/integration/test_dataset.py +++ b/tests/integration/test_dataset.py @@ -4,6 +4,9 @@ from typing import TYPE_CHECKING, cast +from apify_client._models_generated import DatasetListItem +from apify_client._pagination_classes import PageOfDatasetItems, PageOfItems + if TYPE_CHECKING: from collections.abc import AsyncIterator, Iterator from contextlib import AbstractAsyncContextManager, AbstractContextManager @@ -11,7 +14,7 @@ from impit import Response from apify_client import ApifyClient, ApifyClientAsync - from apify_client._models_generated import Dataset, ListOfDatasets + from apify_client._models_generated import Dataset from apify_client._resource_clients.dataset import DatasetItemsPage import json @@ -26,22 +29,23 @@ async def test_dataset_collection_list(client: ApifyClient | ApifyClientAsync) -> None: """Test listing datasets.""" - result = await maybe_await(client.datasets().list(limit=10)) - datasets_page = cast('ListOfDatasets', result) + datasets_page = await maybe_await(client.datasets().list(limit=10)) - assert datasets_page is not None - assert datasets_page.items is not None + assert isinstance(datasets_page, PageOfItems) assert isinstance(datasets_page.items, list) + # User may have 0 datasets — only check element type when any were returned. + if datasets_page.items: + assert isinstance(datasets_page.items[0], DatasetListItem) async def test_dataset_collection_list_pagination(client: ApifyClient | ApifyClientAsync) -> None: """Test listing datasets with pagination.""" - result = await maybe_await(client.datasets().list(limit=5, offset=0)) - datasets_page = cast('ListOfDatasets', result) + datasets_page = await maybe_await(client.datasets().list(limit=5, offset=0)) - assert datasets_page is not None - assert datasets_page.items is not None + assert isinstance(datasets_page, PageOfItems) assert isinstance(datasets_page.items, list) + if datasets_page.items: + assert isinstance(datasets_page.items[0], DatasetListItem) async def test_dataset_collection_get_or_create(client: ApifyClient | ApifyClientAsync) -> None: @@ -261,8 +265,8 @@ async def test_dataset_push_and_list_items(client: ApifyClient | ApifyClientAsyn await maybe_sleep(1, is_async=is_async) # List items - result = await maybe_await(dataset_client.list_items()) - items_page = cast('DatasetItemsPage', result) + items_page = await maybe_await(dataset_client.list_items()) + assert isinstance(items_page, PageOfDatasetItems) assert items_page is not None assert len(items_page.items) == 3 assert items_page.count == 3 @@ -294,22 +298,21 @@ async def test_dataset_list_items_with_pagination(client: ApifyClient | ApifyCli await maybe_sleep(1, is_async=is_async) # List with limit - result = await maybe_await(dataset_client.list_items(limit=5)) - items_page = cast('DatasetItemsPage', result) + items_page = await maybe_await(dataset_client.list_items(limit=5)) + assert isinstance(items_page, PageOfDatasetItems) assert len(items_page.items) == 5 assert items_page.count == 5 # Note: items_page.total may be 0 immediately after push due to eventual consistency assert items_page.limit == 5 # List with offset - result = await maybe_await(dataset_client.list_items(offset=5, limit=5)) - items_page_offset = cast('DatasetItemsPage', result) + items_page_offset = await maybe_await(dataset_client.list_items(offset=5, limit=5)) + assert isinstance(items_page_offset, PageOfDatasetItems) assert len(items_page_offset.items) == 5 assert items_page_offset.offset == 5 # Note: items_page.total may be 0 immediately after push due to eventual consistency - # Verify different items - assert items_page.items[0]['index'] != items_page_offset.items[0]['index'] + assert items_page_offset.items[0]['index'] != items_page.items[0]['index'] finally: await maybe_await(dataset_client.delete()) @@ -334,8 +337,8 @@ async def test_dataset_list_items_with_fields(client: ApifyClient | ApifyClientA await maybe_sleep(1, is_async=is_async) # List with fields filter - result = await maybe_await(dataset_client.list_items(fields=['id', 'name'])) - items_page = cast('DatasetItemsPage', result) + items_page = await maybe_await(dataset_client.list_items(fields=['id', 'name'])) + assert isinstance(items_page, PageOfDatasetItems) assert len(items_page.items) == 2 # Verify only specified fields are returned diff --git a/tests/integration/test_key_value_store.py b/tests/integration/test_key_value_store.py index ecc9d709..f9d2bee1 100644 --- a/tests/integration/test_key_value_store.py +++ b/tests/integration/test_key_value_store.py @@ -4,11 +4,13 @@ from typing import TYPE_CHECKING, cast +from apify_client._models_generated import KeyValueStore, KeyValueStoreKey +from apify_client._pagination_classes import PageOfItems, PageOfKeys + if TYPE_CHECKING: from collections.abc import AsyncIterator, Iterator from apify_client import ApifyClient, ApifyClientAsync - from apify_client._models_generated import KeyValueStore, KeyValueStoreKey, ListOfKeys, ListOfKeyValueStores import json from datetime import timedelta @@ -22,22 +24,22 @@ async def test_key_value_store_collection_list(client: ApifyClient | ApifyClientAsync) -> None: """Test listing key-value stores.""" - result = await maybe_await(client.key_value_stores().list(limit=10)) - kvs_page = cast('ListOfKeyValueStores', result) + kvs_page = await maybe_await(client.key_value_stores().list(limit=10)) - assert kvs_page is not None - assert kvs_page.items is not None + assert isinstance(kvs_page, PageOfItems) assert isinstance(kvs_page.items, list) + if kvs_page.items: + assert isinstance(kvs_page.items[0], KeyValueStore) async def test_key_value_store_collection_list_pagination(client: ApifyClient | ApifyClientAsync) -> None: """Test listing key-value stores with pagination.""" - result = await maybe_await(client.key_value_stores().list(limit=5, offset=0)) - kvs_page = cast('ListOfKeyValueStores', result) + kvs_page = await maybe_await(client.key_value_stores().list(limit=5, offset=0)) - assert kvs_page is not None - assert kvs_page.items is not None + assert isinstance(kvs_page, PageOfItems) assert isinstance(kvs_page.items, list) + if kvs_page.items: + assert isinstance(kvs_page.items[0], KeyValueStore) async def test_key_value_store_collection_get_or_create(client: ApifyClient | ApifyClientAsync) -> None: @@ -124,11 +126,12 @@ async def test_list_keys_signature( await maybe_await(kvs.list_keys()) # Kvs content retrieved with correct signature - result = await maybe_await(kvs.list_keys(signature=test_kvs_of_another_user.signature)) - response = cast('ListOfKeys', result) - raw_items = response.items + response = await maybe_await(kvs.list_keys(signature=test_kvs_of_another_user.signature)) - assert set(test_kvs_of_another_user.expected_content) == {item.key for item in raw_items} + assert isinstance(response, PageOfKeys) + assert isinstance(response.items, list) + assert isinstance(response.items[0], KeyValueStoreKey) + assert set(test_kvs_of_another_user.expected_content) == {item.key for item in response.items} async def test_get_record_signature( @@ -338,9 +341,11 @@ async def test_key_value_store_list_keys(client: ApifyClient | ApifyClientAsync, await maybe_sleep(1, is_async=is_async) # List keys - result = await maybe_await(store_client.list_keys()) - keys_response = cast('ListOfKeys', result) - assert keys_response is not None + keys_response = await maybe_await(store_client.list_keys()) + + assert isinstance(keys_response, PageOfKeys) + assert isinstance(keys_response.items, list) + assert isinstance(keys_response.items[0], KeyValueStoreKey) assert len(keys_response.items) == 5 # Verify key names @@ -368,9 +373,11 @@ async def test_key_value_store_list_keys_with_limit(client: ApifyClient | ApifyC await maybe_sleep(1, is_async=is_async) # List with limit - result = await maybe_await(store_client.list_keys(limit=5)) - keys_response = cast('ListOfKeys', result) - assert keys_response is not None + keys_response = await maybe_await(store_client.list_keys(limit=5)) + + assert isinstance(keys_response, PageOfKeys) + assert isinstance(keys_response.items, list) + assert isinstance(keys_response.items[0], KeyValueStoreKey) assert len(keys_response.items) == 5 finally: await maybe_await(store_client.delete()) diff --git a/tests/integration/test_log.py b/tests/integration/test_log.py index 85800682..76d64488 100644 --- a/tests/integration/test_log.py +++ b/tests/integration/test_log.py @@ -4,9 +4,12 @@ from typing import TYPE_CHECKING, cast +from apify_client._models_generated import BuildShort +from apify_client._pagination_classes import PageOfItems + if TYPE_CHECKING: from apify_client import ApifyClient, ApifyClientAsync - from apify_client._models_generated import ListOfBuilds, Run + from apify_client._models_generated import Run from ._utils import maybe_await @@ -39,9 +42,11 @@ async def test_log_get_from_build(client: ApifyClient | ApifyClientAsync) -> Non """Test retrieving log from a build.""" # Get a build from hello-world actor actor = client.actor(HELLO_WORLD_ACTOR) - result = await maybe_await(actor.builds().list(limit=1)) - builds_page = cast('ListOfBuilds', result) - assert builds_page.items + builds_page = await maybe_await(actor.builds().list(limit=1)) + + assert isinstance(builds_page, PageOfItems) + assert isinstance(builds_page.items, list) + assert isinstance(builds_page.items[0], BuildShort) build_id = builds_page.items[0].id # Get log from the build diff --git a/tests/integration/test_request_queue.py b/tests/integration/test_request_queue.py index a08e7fb8..d26e9245 100644 --- a/tests/integration/test_request_queue.py +++ b/tests/integration/test_request_queue.py @@ -4,15 +4,15 @@ from typing import TYPE_CHECKING, cast +from apify_client._models_generated import Request, RequestQueueShort +from apify_client._pagination_classes import PageOfItems, PageOfRequests + if TYPE_CHECKING: from apify_client import ApifyClient, ApifyClientAsync from apify_client._models_generated import ( BatchAddResult, BatchDeleteResult, - ListOfRequestQueues, - ListOfRequests, LockedRequestQueueHead, - Request, RequestLockInfo, RequestQueue, RequestQueueHead, @@ -30,22 +30,22 @@ async def test_request_queue_collection_list(client: ApifyClient | ApifyClientAsync) -> None: """Test listing request queues.""" - result = await maybe_await(client.request_queues().list(limit=10)) - rq_page = cast('ListOfRequestQueues', result) + rq_page = await maybe_await(client.request_queues().list(limit=10)) - assert rq_page is not None - assert rq_page.items is not None + assert isinstance(rq_page, PageOfItems) assert isinstance(rq_page.items, list) + if rq_page.items: + assert isinstance(rq_page.items[0], RequestQueueShort) async def test_request_queue_collection_list_pagination(client: ApifyClient | ApifyClientAsync) -> None: """Test listing request queues with pagination.""" - result = await maybe_await(client.request_queues().list(limit=5, offset=0)) - rq_page = cast('ListOfRequestQueues', result) + rq_page = await maybe_await(client.request_queues().list(limit=5, offset=0)) - assert rq_page is not None - assert rq_page.items is not None + assert isinstance(rq_page, PageOfItems) assert isinstance(rq_page.items, list) + if rq_page.items: + assert isinstance(rq_page.items[0], RequestQueueShort) async def test_request_queue_collection_get_or_create(client: ApifyClient | ApifyClientAsync) -> None: @@ -256,16 +256,17 @@ async def test_request_queue_list_requests(client: ApifyClient | ApifyClientAsyn ) # Poll until all requests are available (eventual consistency) - list_response: ListOfRequests | None = None for _ in range(5): await maybe_sleep(1, is_async=is_async) - result = await maybe_await(rq_client.list_requests()) - list_response = cast('ListOfRequests', result) - if len(list_response.items) == 5: + list_response = await maybe_await(rq_client.list_requests()) + assert isinstance(list_response, PageOfRequests) + if list_response.items and len(list_response.items) == 5: break - assert list_response is not None + assert isinstance(list_response, PageOfRequests) + assert isinstance(list_response.items, list) assert len(list_response.items) == 5 + assert isinstance(list_response.items[0], Request) finally: await maybe_await(rq_client.delete()) @@ -325,16 +326,17 @@ async def test_request_queue_batch_add_requests(client: ApifyClient | ApifyClien assert len(batch_response.unprocessed_requests) == 0 # Poll until all requests are available (eventual consistency) - list_response: ListOfRequests | None = None for _ in range(5): await maybe_sleep(1, is_async=is_async) - result = await maybe_await(rq_client.list_requests()) - list_response = cast('ListOfRequests', result) - if len(list_response.items) == 10: + list_response = await maybe_await(rq_client.list_requests()) + assert isinstance(list_response, PageOfRequests) + if list_response.items and len(list_response.items) == 10: break - assert list_response is not None + assert isinstance(list_response, PageOfRequests) + assert isinstance(list_response.items, list) assert len(list_response.items) == 10 + assert isinstance(list_response.items[0], Request) finally: await maybe_await(rq_client.delete()) @@ -355,16 +357,17 @@ async def test_request_queue_batch_delete_requests(client: ApifyClient | ApifyCl ) # Poll until all requests are available (eventual consistency) - list_response: ListOfRequests | None = None for _ in range(5): await maybe_sleep(1, is_async=is_async) - result = await maybe_await(rq_client.list_requests()) - list_response = cast('ListOfRequests', result) - if len(list_response.items) == 10: + list_response = await maybe_await(rq_client.list_requests()) + assert isinstance(list_response, PageOfRequests) + if list_response.items and len(list_response.items) == 10: break - assert list_response is not None + assert isinstance(list_response, PageOfRequests) + assert isinstance(list_response.items, list) assert len(list_response.items) == 10 + assert isinstance(list_response.items[0], Request) requests_to_delete: list[RequestDeleteInputDict] = [ {'unique_key': item.unique_key} for item in list_response.items[:5] ] @@ -376,16 +379,17 @@ async def test_request_queue_batch_delete_requests(client: ApifyClient | ApifyCl assert len(delete_response.processed_requests) == 5 # Poll until deletions are reflected (eventual consistency) - remaining: ListOfRequests | None = None for _ in range(5): await maybe_sleep(1, is_async=is_async) - result = await maybe_await(rq_client.list_requests()) - remaining = cast('ListOfRequests', result) - if len(remaining.items) == 5: + remaining = await maybe_await(rq_client.list_requests()) + assert isinstance(remaining, PageOfRequests) + if remaining.items and len(remaining.items) == 5: break - assert remaining is not None + assert isinstance(remaining, PageOfRequests) + assert isinstance(remaining.items, list) assert len(remaining.items) == 5 + assert isinstance(remaining.items[0], Request) finally: await maybe_await(rq_client.delete()) diff --git a/tests/integration/test_run.py b/tests/integration/test_run.py index 8c79e566..8e42693c 100644 --- a/tests/integration/test_run.py +++ b/tests/integration/test_run.py @@ -4,9 +4,12 @@ from typing import TYPE_CHECKING, cast +from apify_client._models_generated import RunShort +from apify_client._pagination_classes import PageOfItems + if TYPE_CHECKING: from apify_client import ApifyClient, ApifyClientAsync - from apify_client._models_generated import Dataset, KeyValueStore, ListOfRuns, RequestQueue, Run + from apify_client._models_generated import Dataset, KeyValueStore, RequestQueue, Run from datetime import UTC, datetime, timedelta @@ -35,14 +38,20 @@ async def test_run_collection_list_multiple_statuses(client: ApifyClient | Apify try: run_collection = client.actor(HELLO_WORLD_ACTOR).runs() - result = await maybe_await(run_collection.list(status=[ActorJobStatus.SUCCEEDED, ActorJobStatus.TIMED_OUT])) - multiple_status_runs = cast('ListOfRuns', result) + multiple_status_runs = await maybe_await( + run_collection.list(status=[ActorJobStatus.SUCCEEDED, ActorJobStatus.TIMED_OUT]) + ) + single_status_runs = await maybe_await(run_collection.list(status=ActorJobStatus.SUCCEEDED)) - result = await maybe_await(run_collection.list(status=ActorJobStatus.SUCCEEDED)) - single_status_runs = cast('ListOfRuns', result) + assert isinstance(multiple_status_runs, PageOfItems) + assert isinstance(multiple_status_runs.items, list) + if multiple_status_runs.items: + assert isinstance(multiple_status_runs.items[0], RunShort) - assert multiple_status_runs is not None - assert single_status_runs is not None + assert isinstance(single_status_runs, PageOfItems) + assert isinstance(single_status_runs.items, list) + if single_status_runs.items: + assert isinstance(single_status_runs.items[0], RunShort) assert all( run.status in [ActorJobStatus.SUCCEEDED, ActorJobStatus.TIMED_OUT] for run in multiple_status_runs.items @@ -294,13 +303,13 @@ async def test_run_log(client: ApifyClient | ApifyClientAsync) -> None: async def test_run_runs_client(client: ApifyClient | ApifyClientAsync) -> None: """Test listing runs through the run collection client.""" # List runs (should return valid data structure) - result = await maybe_await(client.runs().list(limit=10)) - runs_page = cast('ListOfRuns', result) - assert runs_page is not None - assert runs_page.items is not None + runs_page = await maybe_await(client.runs().list(limit=10)) + + assert isinstance(runs_page, PageOfItems) assert isinstance(runs_page.items, list) - # The user may have runs, verify the structure + # The user may have 0 runs — only check element type when any were returned. if runs_page.items: + assert isinstance(runs_page.items[0], RunShort) first_run = runs_page.items[0] assert first_run.id is not None assert first_run.act_id is not None diff --git a/tests/integration/test_schedule.py b/tests/integration/test_schedule.py index 2337116f..3b365ad1 100644 --- a/tests/integration/test_schedule.py +++ b/tests/integration/test_schedule.py @@ -4,9 +4,12 @@ from typing import TYPE_CHECKING, cast +from apify_client._models_generated import ScheduleShort +from apify_client._pagination_classes import PageOfItems + if TYPE_CHECKING: from apify_client import ApifyClient, ApifyClientAsync - from apify_client._models_generated import ListOfSchedules, Schedule + from apify_client._models_generated import Schedule from ._utils import get_random_resource_name, maybe_await @@ -116,10 +119,11 @@ async def test_schedule_list(client: ApifyClient | ApifyClientAsync) -> None: try: # List schedules - result = await maybe_await(client.schedules().list(limit=100)) - schedules_page = cast('ListOfSchedules', result) - assert schedules_page is not None - assert schedules_page.items is not None + schedules_page = await maybe_await(client.schedules().list(limit=100)) + + assert isinstance(schedules_page, PageOfItems) + assert isinstance(schedules_page.items, list) + assert isinstance(schedules_page.items[0], ScheduleShort) # Verify our schedules are in the list schedule_ids = [s.id for s in schedules_page.items] diff --git a/tests/integration/test_store.py b/tests/integration/test_store.py index 69a3e8fc..34df55b6 100644 --- a/tests/integration/test_store.py +++ b/tests/integration/test_store.py @@ -2,11 +2,13 @@ from __future__ import annotations -from typing import TYPE_CHECKING, cast +from typing import TYPE_CHECKING + +from apify_client._models_generated import StoreListActor +from apify_client._pagination_classes import PageOfItems if TYPE_CHECKING: from apify_client import ApifyClient, ApifyClientAsync - from apify_client._models_generated import ListOfStoreActors from ._utils import maybe_await @@ -14,32 +16,34 @@ async def test_store_list(client: ApifyClient | ApifyClientAsync) -> None: """Test listing public Actors in the store.""" - result = await maybe_await(client.store().list(limit=10)) - actors_list = cast('ListOfStoreActors', result) - assert actors_list is not None - assert actors_list.items is not None - assert len(actors_list.items) > 0 # Store always has actors + actors_list = await maybe_await(client.store().list(limit=10)) + + assert isinstance(actors_list, PageOfItems) + assert isinstance(actors_list.items, list) + assert isinstance(actors_list.items[0], StoreListActor) # Store always has actors async def test_store_list_with_search(client: ApifyClient | ApifyClientAsync) -> None: """Test listing store with search filter.""" - result = await maybe_await(client.store().list(limit=5, search='web scraper')) - store_page = cast('ListOfStoreActors', result) + store_page = await maybe_await(client.store().list(limit=5, search='web scraper')) - assert store_page is not None - assert store_page.items is not None + assert isinstance(store_page, PageOfItems) assert isinstance(store_page.items, list) + if store_page.items: + assert isinstance(store_page.items[0], StoreListActor) async def test_store_list_pagination(client: ApifyClient | ApifyClientAsync) -> None: """Test store listing pagination.""" - result1 = await maybe_await(client.store().list(limit=5, offset=0)) - result2 = await maybe_await(client.store().list(limit=5, offset=5)) - page1 = cast('ListOfStoreActors', result1) - page2 = cast('ListOfStoreActors', result2) - - assert page1 is not None - assert page2 is not None + page1 = await maybe_await(client.store().list(limit=5, offset=0)) + page2 = await maybe_await(client.store().list(limit=5, offset=5)) + + assert isinstance(page1, PageOfItems) + assert isinstance(page1.items, list) + assert isinstance(page1.items[0], StoreListActor) + assert isinstance(page2, PageOfItems) + assert isinstance(page2.items, list) # Verify different results (if enough actors exist) if len(page1.items) == 5 and len(page2.items) > 0: + assert isinstance(page2.items[0], StoreListActor) assert page1.items[0].id != page2.items[0].id diff --git a/tests/integration/test_task.py b/tests/integration/test_task.py index 322185a2..3b93e179 100644 --- a/tests/integration/test_task.py +++ b/tests/integration/test_task.py @@ -6,10 +6,12 @@ from typing import TYPE_CHECKING, cast from ._utils import get_random_resource_name, maybe_await +from apify_client._models_generated import RunShort, TaskShort +from apify_client._pagination_classes import PageOfItems if TYPE_CHECKING: from apify_client import ApifyClient, ApifyClientAsync - from apify_client._models_generated import Actor, ListOfRuns, ListOfTasks, ListOfWebhooks, Run, Task + from apify_client._models_generated import Actor, Run, Task # Use a simple, fast public actor for testing HELLO_WORLD_ACTOR = 'apify/hello-world' @@ -113,10 +115,11 @@ async def test_task_list(client: ApifyClient | ApifyClientAsync) -> None: try: # List tasks - result = await maybe_await(client.tasks().list(limit=100)) - tasks_page = cast('ListOfTasks', result) - assert tasks_page is not None - assert tasks_page.items is not None + tasks_page = await maybe_await(client.tasks().list(limit=100)) + + assert isinstance(tasks_page, PageOfItems) + assert isinstance(tasks_page.items, list) + assert isinstance(tasks_page.items[0], TaskShort) # Verify our task is in the list task_ids = [t.id for t in tasks_page.items] @@ -289,10 +292,11 @@ async def test_task_runs(client: ApifyClient | ApifyClientAsync) -> None: # List runs for this task runs_client = task_client.runs() - result = await maybe_await(runs_client.list(limit=10)) - runs_page = cast('ListOfRuns', result) - assert runs_page is not None - assert runs_page.items is not None + runs_page = await maybe_await(runs_client.list(limit=10)) + + assert isinstance(runs_page, PageOfItems) + assert isinstance(runs_page.items, list) + assert isinstance(runs_page.items[0], RunShort) assert len(runs_page.items) >= 1 # Cleanup run @@ -365,10 +369,10 @@ async def test_task_webhooks(client: ApifyClient | ApifyClientAsync) -> None: try: # Get webhooks client webhooks_client = task_client.webhooks() - result = await maybe_await(webhooks_client.list()) - webhooks_page = cast('ListOfWebhooks', result) - assert webhooks_page is not None - assert webhooks_page.items is not None + webhooks_page = await maybe_await(webhooks_client.list()) + + assert isinstance(webhooks_page, PageOfItems) + assert isinstance(webhooks_page.items, list) # New task should have no webhooks assert len(webhooks_page.items) == 0 diff --git a/tests/integration/test_webhook.py b/tests/integration/test_webhook.py index a011aaa7..a40f8551 100644 --- a/tests/integration/test_webhook.py +++ b/tests/integration/test_webhook.py @@ -4,6 +4,8 @@ from typing import TYPE_CHECKING +from apify_client._pagination_classes import PageOfItems + if TYPE_CHECKING: from apify_client import ApifyClient, ApifyClientAsync @@ -11,13 +13,12 @@ from ._utils import maybe_await from apify_client._models_generated import ( ActorJobStatus, - ListOfRuns, - ListOfWebhookDispatches, - ListOfWebhooks, Run, + RunShort, Webhook, WebhookDispatch, WebhookEventType, + WebhookShort, ) HELLO_WORLD_ACTOR = 'apify/hello-world' @@ -32,9 +33,11 @@ async def _get_finished_run_id(client: ApifyClient | ApifyClientAsync) -> str: """ runs_page = await maybe_await(client.actor(HELLO_WORLD_ACTOR).runs().list(limit=1, status=ActorJobStatus.SUCCEEDED)) - assert isinstance(runs_page, ListOfRuns) + assert isinstance(runs_page, PageOfItems) + assert isinstance(runs_page.items, list) if len(runs_page.items) > 0: + assert isinstance(runs_page.items[0], RunShort) return runs_page.items[0].id # No completed runs found - start one and wait for it to finish @@ -49,16 +52,18 @@ async def test_list_webhooks(client: ApifyClient | ApifyClientAsync) -> None: """Test listing webhooks.""" webhooks_page = await maybe_await(client.webhooks().list(limit=10)) - assert isinstance(webhooks_page, ListOfWebhooks) + assert isinstance(webhooks_page, PageOfItems) assert isinstance(webhooks_page.items, list) + assert isinstance(webhooks_page.items[0], WebhookShort) async def test_list_webhooks_pagination(client: ApifyClient | ApifyClientAsync) -> None: """Test listing webhooks with pagination.""" webhooks_page = await maybe_await(client.webhooks().list(limit=5, offset=0)) - assert isinstance(webhooks_page, ListOfWebhooks) + assert isinstance(webhooks_page, PageOfItems) assert isinstance(webhooks_page.items, list) + assert isinstance(webhooks_page.items[0], WebhookShort) async def test_webhook_create_and_get(client: ApifyClient | ApifyClientAsync) -> None: @@ -166,8 +171,10 @@ async def test_webhook_dispatches(client: ApifyClient | ApifyClientAsync) -> Non # List dispatches for this webhook dispatches = await maybe_await(webhook_client.dispatches().list()) - assert isinstance(dispatches, ListOfWebhookDispatches) + assert isinstance(dispatches, PageOfItems) + assert isinstance(dispatches.items, list) assert len(dispatches.items) > 0 + assert isinstance(dispatches.items[0], WebhookDispatch) finally: await maybe_await(webhook_client.delete()) diff --git a/tests/integration/test_webhook_dispatch.py b/tests/integration/test_webhook_dispatch.py index 5bfc106d..3f8e82c5 100644 --- a/tests/integration/test_webhook_dispatch.py +++ b/tests/integration/test_webhook_dispatch.py @@ -4,9 +4,11 @@ from typing import TYPE_CHECKING, cast +from apify_client._models_generated import WebhookDispatch +from apify_client._pagination_classes import PageOfItems + if TYPE_CHECKING: from apify_client import ApifyClient, ApifyClientAsync - from apify_client._models_generated import ListOfWebhookDispatches, WebhookDispatch from ._utils import maybe_await @@ -14,24 +16,26 @@ async def test_webhook_dispatch_list(client: ApifyClient | ApifyClientAsync) -> None: """Test listing webhook dispatches.""" - result = await maybe_await(client.webhook_dispatches().list(limit=10)) - dispatches_page = cast('ListOfWebhookDispatches', result) + dispatches_page = await maybe_await(client.webhook_dispatches().list(limit=10)) - assert dispatches_page is not None - assert dispatches_page.items is not None + assert isinstance(dispatches_page, PageOfItems) assert isinstance(dispatches_page.items, list) - # User may have 0 dispatches, so we just verify the structure + # User may have 0 dispatches — only check element type when any were returned. + if dispatches_page.items: + assert isinstance(dispatches_page.items[0], WebhookDispatch) async def test_webhook_dispatch_get(client: ApifyClient | ApifyClientAsync) -> None: """Test getting a specific webhook dispatch.""" # First list dispatches to get a dispatch ID - result = await maybe_await(client.webhook_dispatches().list(limit=1)) - dispatches_page = cast('ListOfWebhookDispatches', result) - assert dispatches_page is not None + dispatches_page = await maybe_await(client.webhook_dispatches().list(limit=1)) + + assert isinstance(dispatches_page, PageOfItems) + assert isinstance(dispatches_page.items, list) if dispatches_page.items: # If there are dispatches, test the get method + assert isinstance(dispatches_page.items[0], WebhookDispatch) dispatch_id = dispatches_page.items[0].id result = await maybe_await(client.webhook_dispatch(dispatch_id).get()) dispatch = cast('WebhookDispatch', result) diff --git a/tests/unit/test_client_pagination.py b/tests/unit/test_client_pagination.py new file mode 100644 index 00000000..58306ba2 --- /dev/null +++ b/tests/unit/test_client_pagination.py @@ -0,0 +1,660 @@ +from __future__ import annotations + +import dataclasses +import json +import re +from typing import TYPE_CHECKING, Any, Literal, TypeAlias + +import pytest +from pydantic.fields import FieldInfo +from werkzeug import Response + +from apify_client import ApifyClient, ApifyClientAsync +from apify_client import _models_generated as _models_module +from apify_client._resource_clients import ( + ActorCollectionClient, + ActorCollectionClientAsync, + ActorEnvVarCollectionClient, + ActorEnvVarCollectionClientAsync, + ActorVersionCollectionClient, + ActorVersionCollectionClientAsync, + BuildCollectionClient, + BuildCollectionClientAsync, + DatasetClient, + DatasetClientAsync, + DatasetCollectionClient, + DatasetCollectionClientAsync, + KeyValueStoreClient, + KeyValueStoreClientAsync, + KeyValueStoreCollectionClient, + KeyValueStoreCollectionClientAsync, + RequestQueueClient, + RequestQueueClientAsync, + RequestQueueCollectionClient, + RequestQueueCollectionClientAsync, + RunCollectionClient, + RunCollectionClientAsync, + ScheduleCollectionClient, + ScheduleCollectionClientAsync, + StoreCollectionClient, + StoreCollectionClientAsync, + TaskCollectionClient, + TaskCollectionClientAsync, + WebhookCollectionClient, + WebhookCollectionClientAsync, + WebhookDispatchCollectionClient, + WebhookDispatchCollectionClientAsync, +) + +if TYPE_CHECKING: + from collections.abc import Callable + + from _pytest.mark import ParameterSet + from pydantic import BaseModel + from pytest_httpserver import HTTPServer + from werkzeug import Request + + +CollectionClient: TypeAlias = ( + ActorCollectionClient + | BuildCollectionClient + | RunCollectionClient + | ScheduleCollectionClient + | TaskCollectionClient + | WebhookCollectionClient + | WebhookDispatchCollectionClient + | DatasetCollectionClient + | KeyValueStoreCollectionClient + | RequestQueueCollectionClient + | StoreCollectionClient + | ActorEnvVarCollectionClient + | ActorVersionCollectionClient +) + +CollectionClientAsync: TypeAlias = ( + ActorCollectionClientAsync + | BuildCollectionClientAsync + | RunCollectionClientAsync + | ScheduleCollectionClientAsync + | TaskCollectionClientAsync + | WebhookCollectionClientAsync + | WebhookDispatchCollectionClientAsync + | DatasetCollectionClientAsync + | KeyValueStoreCollectionClientAsync + | RequestQueueCollectionClientAsync + | StoreCollectionClientAsync + | ActorEnvVarCollectionClientAsync + | ActorVersionCollectionClientAsync +) + +ID_PLACEHOLDER = 'some-id' + + +# Inner list models whose `items: list[]` is relaxed to `list[dict]`. +# Point of these tests is pagination mechanism, not internal object validation. +_RELAXED_LIST_MODELS = ( + 'ListOfActors', + 'ListOfBuilds', + 'ListOfDatasets', + 'ListOfEnvVars', + 'ListOfKeys', + 'ListOfKeyValueStores', + 'ListOfRequestQueues', + 'ListOfRequests', + 'ListOfRuns', + 'ListOfSchedules', + 'ListOfStoreActors', + 'ListOfTasks', + 'ListOfVersions', + 'ListOfWebhookDispatches', + 'ListOfWebhooks', +) + +# Outer wrappers that embed a relaxed list model via `.data`. Their compiled schema pins the +# inner's schema at construction time, so they need a forced rebuild to pick up the relaxation. +# The wrappers themselves are not mutated — their own field annotations stay as-is. +_REBUILT_RESPONSE_WRAPPERS = ( + 'ListOfActorsInStoreResponse', + 'ListOfActorsResponse', + 'ListOfBuildsResponse', + 'ListOfDatasetsResponse', + 'ListOfEnvVarsResponse', + 'ListOfKeyValueStoresResponse', + 'ListOfKeysResponse', + 'ListOfRequestQueuesResponse', + 'ListOfRequestsResponse', + 'ListOfRunsResponse', + 'ListOfSchedulesResponse', + 'ListOfTasksResponse', + 'ListOfVersionsResponse', + 'ListOfWebhooksResponse', +) + + +@pytest.fixture(autouse=True) +def _relax_item_validation() -> Any: + """Relax only the element type of `items` on paginated list models for the test run. + + Pagination tests feed synthetic `{'id': N}` items that don't satisfy the real API schemas + (`ActorShort`, `BuildShort`, `Request`, `EnvVar`, …). Instead of bypassing validation + wholesale, each inner `ListOf*` model has its `items` field swapped to `list[dict]` + and rebuilt. Outer `.data` wrapping and every pagination-metadata field remain validated. + """ + relaxed_field = FieldInfo.from_annotation(list[dict]) + originals: dict[type[BaseModel], FieldInfo] = {} + wrappers = [getattr(_models_module, name) for name in _REBUILT_RESPONSE_WRAPPERS] + + for name in _RELAXED_LIST_MODELS: + cls = getattr(_models_module, name) + originals[cls] = cls.model_fields['items'] + cls.model_fields['items'] = relaxed_field + cls.model_rebuild(force=True) + for wrapper in wrappers: + wrapper.model_rebuild(force=True) + try: + yield + finally: + for cls, field in originals.items(): + cls.model_fields['items'] = field + cls.model_rebuild(force=True) + for wrapper in wrappers: + wrapper.model_rebuild(force=True) + + +def create_items(start: int, end: int, step: int | None = None) -> list[dict[str, int]]: + """Create a list of test items for the given index range.""" + if not step: + step = -1 if end < start else 1 + return [{'id': i} for i in range(start, end, step)] + + +NORMAL_ITEMS = 2500 +EXTRA_ITEMS_UNNAMED = 100 +MAX_ITEMS_PER_PAGE = 1000 + + +def _is_true(value: str | None) -> bool: + """Match the `'true'` wire form produced by the client's bool→string serialization.""" + return value == 'true' + + +def _parse_int_param(value: str | None) -> int: + return int(value) if value not in (None, '') else 0 + + +def _handle_offset_pagination(request: Request) -> Response: + """Serve an offset-paginated Apify API response. + + The simulated platform holds 2500 items normally and an additional 100 when + ``unnamed=true`` is requested. Pages are capped at 1000 items regardless of the requested + limit, mirroring the real API. The dataset items endpoint returns items as a raw list; + all other endpoints wrap them in ``{'data': {...}}``. + """ + params = request.args + + total_items = (NORMAL_ITEMS + EXTRA_ITEMS_UNNAMED) if _is_true(params.get('unnamed')) else NORMAL_ITEMS + offset = _parse_int_param(params.get('offset')) + limit = _parse_int_param(params.get('limit')) + assert offset >= 0, 'Invalid offset sent to API' + assert limit >= 0, 'Invalid limit sent to API' + + desc = _is_true(params.get('desc')) + items = create_items(total_items, 0) if desc else create_items(0, total_items) + + lower_index = min(offset, total_items) + upper_index = min(offset + (limit or total_items), total_items) + count = min(max(upper_index - lower_index, 0), MAX_ITEMS_PER_PAGE) + selected_items = items[lower_index : min(upper_index, lower_index + MAX_ITEMS_PER_PAGE)] + + # Every second item is filtered out when `skipEmpty=true`, `skipHidden=true`, or `clean=true`. + if _is_true(params.get('skipEmpty')) or _is_true(params.get('skipHidden')) or _is_true(params.get('clean')): + selected_items = selected_items[::2] + + headers = { + 'x-apify-pagination-count': str(count), + 'x-apify-pagination-total': str(total_items), + 'x-apify-pagination-offset': str(offset), + 'x-apify-pagination-limit': str(limit or count or 1), + 'x-apify-pagination-desc': str(desc).lower(), + 'content-type': 'application/json', + } + + if request.path.endswith(f'/datasets/{ID_PLACEHOLDER}/items'): + body: Any = selected_items + else: + body = { + 'data': { + 'total': total_items, + 'count': count, + 'offset': offset, + 'limit': limit or (count or 1), + 'desc': desc, + 'items': selected_items, + } + } + return Response(status=200, headers=headers, response=json.dumps(body)) + + +def _handle_cursor_pagination(request: Request) -> Response: + """Serve a cursor-paginated Apify API response for KVS keys and RQ requests. + + Holds 2500 synthetic items whose integer `id` equals their position. Each page is capped + at 1000 items. KVS uses `exclusiveStartKey`; RQ accepts either the deprecated + `exclusiveStartId` on the initial call or the opaque `cursor` on subsequent calls. All + three values encode the last-seen item id as a string — the next page starts at id + 1. + """ + params = request.args + limit = _parse_int_param(params.get('limit')) + assert limit >= 0, 'Invalid limit sent to API' + + cursor_raw = params.get('exclusiveStartKey') or params.get('exclusiveStartId') or params.get('cursor') + + total_items = NORMAL_ITEMS + start = int(cursor_raw) + 1 if cursor_raw not in (None, '') else 0 + end = total_items if not limit else min(start + limit, total_items) + page_end = min(end, start + MAX_ITEMS_PER_PAGE) + selected_items = [{'id': i} for i in range(start, page_end)] + + if request.path.endswith('/keys'): + is_truncated = page_end < total_items and bool(selected_items) + next_exclusive_start_key = str(selected_items[-1]['id']) if selected_items and is_truncated else None + body: dict[str, Any] = { + 'data': { + 'items': selected_items, + 'count': len(selected_items), + 'limit': limit or (len(selected_items) or 1), + 'is_truncated': is_truncated, + 'next_exclusive_start_key': next_exclusive_start_key, + } + } + else: # `/requests` + has_more = page_end < total_items and bool(selected_items) + next_cursor = str(selected_items[-1]['id']) if has_more else None + body = { + 'data': { + 'items': selected_items, + 'count': len(selected_items), + 'limit': limit or (len(selected_items) or 1), + 'next_cursor': next_cursor, + } + } + return Response(status=200, headers={'content-type': 'application/json'}, response=json.dumps(body)) + + +def _pagination_handler(request: Request) -> Response: + """Dispatch between cursor-based (KVS keys, RQ requests) and offset-based endpoints.""" + if request.path.endswith(('/keys', '/requests')): + return _handle_cursor_pagination(request) + return _handle_offset_pagination(request) + + +@pytest.fixture +def pagination_server(httpserver: HTTPServer) -> HTTPServer: + """Register a catch-all handler that mirrors the Apify paginated endpoints.""" + httpserver.expect_request(re.compile(r'.*')).respond_with_handler(_pagination_handler) + return httpserver + + +def _make_sync_client(httpserver: HTTPServer) -> ApifyClient: + return ApifyClient(token='test', api_url=httpserver.url_for('/')) + + +def _make_async_client(httpserver: HTTPServer) -> ApifyClientAsync: + return ApifyClientAsync(token='test', api_url=httpserver.url_for('/')) + + +# Map resource-client class name to a factory that, given an `ApifyClient`/`ApifyClientAsync`, +# returns the sub-client under test. Usable for both sync and async since every accessor is +# available symmetrically on both root clients. +_CLIENT_FACTORIES: dict[str, Callable[[Any], Any]] = { + 'ActorCollectionClient': lambda c: c.actors(), + 'ScheduleCollectionClient': lambda c: c.schedules(), + 'TaskCollectionClient': lambda c: c.tasks(), + 'WebhookCollectionClient': lambda c: c.webhooks(), + 'WebhookDispatchCollectionClient': lambda c: c.webhook_dispatches(), + 'StoreCollectionClient': lambda c: c.store(), + 'DatasetCollectionClient': lambda c: c.datasets(), + 'KeyValueStoreCollectionClient': lambda c: c.key_value_stores(), + 'RequestQueueCollectionClient': lambda c: c.request_queues(), + 'BuildCollectionClient': lambda c: c.actor(ID_PLACEHOLDER).builds(), + 'RunCollectionClient': lambda c: c.actor(ID_PLACEHOLDER).runs(), + 'ActorVersionCollectionClient': lambda c: c.actor(ID_PLACEHOLDER).versions(), + 'ActorEnvVarCollectionClient': lambda c: c.actor(ID_PLACEHOLDER).version('some-version').env_vars(), + 'DatasetClient': lambda c: c.dataset(ID_PLACEHOLDER), + 'KeyValueStoreClient': lambda c: c.key_value_store(ID_PLACEHOLDER), + 'RequestQueueClient': lambda c: c.request_queue(ID_PLACEHOLDER), +} + + +_CLIENT_SET_NAMES: dict[Literal['collection', 'dataset', 'kvs', 'rq'], tuple[str, ...]] = { + # Tuple rather than set: pytest-xdist requires a stable iteration order across workers. + # https://pytest-xdist.readthedocs.io/en/stable/known-limitations.html#order-and-amount-of-test-must-be-consistent + 'collection': ( + 'ActorCollectionClient', + 'ScheduleCollectionClient', + 'TaskCollectionClient', + 'WebhookCollectionClient', + 'WebhookDispatchCollectionClient', + 'StoreCollectionClient', + 'DatasetCollectionClient', + 'KeyValueStoreCollectionClient', + 'RequestQueueCollectionClient', + 'BuildCollectionClient', + 'RunCollectionClient', + 'ActorVersionCollectionClient', + 'ActorEnvVarCollectionClient', + ), + 'dataset': ('DatasetClient',), + 'kvs': ('KeyValueStoreClient',), + 'rq': ('RequestQueueClient',), +} + + +@dataclasses.dataclass +class _PaginationCase: + """A single parametrized pagination test case.""" + + id: str + inputs: dict + expected_items: list[dict[str, int]] + supported_clients: set[str] + + def __hash__(self) -> int: + return hash(self.id) + + +COLLECTION_CLIENTS = { + 'ActorCollectionClient', + 'BuildCollectionClient', + 'RunCollectionClient', + 'ScheduleCollectionClient', + 'TaskCollectionClient', + 'WebhookCollectionClient', + 'WebhookDispatchCollectionClient', + 'DatasetCollectionClient', + 'KeyValueStoreCollectionClient', + 'RequestQueueCollectionClient', + 'StoreCollectionClient', +} + +NO_OPTIONS_CLIENTS = { + 'ActorEnvVarCollectionClient', + 'ActorVersionCollectionClient', +} + +DATASET_CLIENTS = {'DatasetClient'} +RQ_CLIENTS = {'RequestQueueClient'} +KVS_CLIENTS = {'KeyValueStoreClient'} +STORAGE_CLIENTS = DATASET_CLIENTS | RQ_CLIENTS | KVS_CLIENTS +ALL_CLIENTS = COLLECTION_CLIENTS | NO_OPTIONS_CLIENTS | STORAGE_CLIENTS + +TEST_CASES = ( + _PaginationCase('No options', {}, create_items(0, 2500), ALL_CLIENTS), + _PaginationCase('Limit', {'limit': 1100}, create_items(0, 1100), ALL_CLIENTS - NO_OPTIONS_CLIENTS), + _PaginationCase('Out of range limit', {'limit': 3000}, create_items(0, 2500), ALL_CLIENTS - NO_OPTIONS_CLIENTS), + _PaginationCase( + 'Offset', + {'offset': 1000}, + create_items(1000, 2500), + ALL_CLIENTS - NO_OPTIONS_CLIENTS - KVS_CLIENTS - RQ_CLIENTS, + ), + _PaginationCase( + 'Offset and limit', + {'offset': 1000, 'limit': 1100}, + create_items(1000, 2100), + ALL_CLIENTS - NO_OPTIONS_CLIENTS - KVS_CLIENTS - RQ_CLIENTS, + ), + _PaginationCase( + 'Out of range offset', {'offset': 3000}, [], ALL_CLIENTS - NO_OPTIONS_CLIENTS - KVS_CLIENTS - RQ_CLIENTS + ), + _PaginationCase( + 'Offset, limit, descending', + {'offset': 1000, 'limit': 1100, 'desc': True}, + create_items(1500, 400), + ALL_CLIENTS - NO_OPTIONS_CLIENTS - {'StoreCollectionClient'} - KVS_CLIENTS - RQ_CLIENTS, + ), + _PaginationCase( + 'Offset, limit, descending, unnamed', + {'offset': 50, 'limit': 1100, 'desc': True, 'unnamed': True}, + create_items(2550, 1450), + {'DatasetCollectionClient', 'KeyValueStoreCollectionClient', 'RequestQueueCollectionClient'}, + ), + _PaginationCase( + 'chunk_size', + {'chunk_size': 100, 'limit': 250}, + create_items(0, 250), + STORAGE_CLIENTS, + ), + _PaginationCase( + 'Offset, limit, descending, chunk_size', + {'offset': 50, 'limit': 1100, 'desc': True, 'chunk_size': 100}, + create_items(2450, 1350), + DATASET_CLIENTS, + ), + _PaginationCase( + 'Offset, limit, descending, chunk_size, clean', + {'limit': 1500, 'chunk_size': 100, 'clean': True}, + # API behavior with `clean=True` is to apply the cleaning after pagination, so we end up with missing items + # being counted towards the limit and thus fewer total items returned. + create_items(0, 1500, 2), + DATASET_CLIENTS, + ), + _PaginationCase( + 'Exclusive start key', + {'exclusive_start_key': '1000'}, + create_items(1001, 2500), + KVS_CLIENTS, + ), + _PaginationCase( + 'Exclusive start key and limit', + {'exclusive_start_key': '1000', 'limit': 500}, + create_items(1001, 1501), + KVS_CLIENTS, + ), + _PaginationCase( + 'Cursor', + {'cursor': '1000'}, + create_items(1001, 2500), + RQ_CLIENTS, + ), + _PaginationCase( + 'Cursor and limit', + {'cursor': '1000', 'limit': 500}, + create_items(1001, 1501), + RQ_CLIENTS, + ), +) + + +def _generate_test_params(client_set: Literal['collection', 'dataset', 'kvs', 'rq']) -> list[ParameterSet]: + """Build the pytest parameter set for the given client category. + + Each parameter carries the resource-client class name; the test body instantiates + the real client against the `httpserver` URL and looks up the factory in + `_CLIENT_FACTORIES`. + """ + client_names = _CLIENT_SET_NAMES[client_set] + return [ + pytest.param(test_case.inputs, test_case.expected_items, client_name, id=f'{client_name}:{test_case.id}') + for test_case in TEST_CASES + for client_name in client_names + if client_name in test_case.supported_clients + ] + + +@pytest.mark.parametrize( + ('inputs', 'expected_items', 'client_name'), + _generate_test_params(client_set='collection'), +) +def test_client_list_iterable( + pagination_server: HTTPServer, + client_name: str, + inputs: dict, + expected_items: list[dict[str, int]], +) -> None: + """Every sync collection client's `list()` return value should iterate across pages.""" + client: CollectionClient = _CLIENT_FACTORIES[client_name](_make_sync_client(pagination_server)) + returned_items = list(client.list(**inputs)) + + if inputs == {}: + list_response = client.list(**inputs) + assert len(returned_items) == list_response.total + + assert returned_items == expected_items + + +@pytest.mark.parametrize( + ('inputs', 'expected_items', 'client_name'), + _generate_test_params(client_set='collection'), +) +async def test_client_list_iterable_async( + pagination_server: HTTPServer, + client_name: str, + inputs: dict, + expected_items: list[dict[str, int]], +) -> None: + """Every async collection client's `list()` return value should iterate across pages.""" + client: CollectionClientAsync = _CLIENT_FACTORIES[client_name](_make_async_client(pagination_server)) + returned_items = [item async for item in client.list(**inputs)] + + if inputs == {}: + list_response = await client.list(**inputs) + assert len(returned_items) == list_response.total + + assert returned_items == expected_items + + +@pytest.mark.parametrize( + ('inputs', 'expected_items', 'client_name'), + _generate_test_params(client_set='dataset'), +) +def test_dataset_items_list_iterable( + pagination_server: HTTPServer, + client_name: str, + inputs: dict, + expected_items: list[dict[str, int]], +) -> None: + """The sync dataset client's `list_items()` return value should iterate across pages.""" + client: DatasetClient = _CLIENT_FACTORIES[client_name](_make_sync_client(pagination_server)) + returned_items = list(client.list_items(**inputs)) + + if inputs == {}: + list_response = client.list_items(**inputs) + assert len(returned_items) == list_response.total + + assert returned_items == expected_items + + # Until the deprecated `iterate_items` method is removed, it should behave the same + inputs_without_chunk_size = {k: v for k, v in inputs.items() if k != 'chunk_size'} + assert returned_items == list(client.iterate_items(**inputs_without_chunk_size)) + + +@pytest.mark.parametrize( + ('inputs', 'expected_items', 'client_name'), + _generate_test_params(client_set='dataset'), +) +async def test_dataset_items_list_iterable_async( + pagination_server: HTTPServer, + client_name: str, + inputs: dict, + expected_items: list[dict[str, int]], +) -> None: + """The async dataset client's `list_items()` return value should iterate across pages.""" + client: DatasetClientAsync = _CLIENT_FACTORIES[client_name](_make_async_client(pagination_server)) + returned_items = [item async for item in client.list_items(**inputs)] + + if inputs == {}: + list_response = await client.list_items(**inputs) + assert len(returned_items) == list_response.total + + assert returned_items == expected_items + + # Until the deprecated `iterate_items` method is removed, it should behave the same + inputs_without_chunk_size = {k: v for k, v in inputs.items() if k != 'chunk_size'} + assert returned_items == [item async for item in client.iterate_items(**inputs_without_chunk_size)] + + +@pytest.mark.parametrize( + ('inputs', 'expected_items', 'client_name'), + _generate_test_params(client_set='kvs'), +) +def test_kvs_list_keys_iterable( + pagination_server: HTTPServer, + client_name: str, + inputs: dict, + expected_items: list[dict[str, int]], +) -> None: + """The sync KVS client's `list_keys()` return value should iterate across cursor-paginated pages.""" + client: KeyValueStoreClient = _CLIENT_FACTORIES[client_name](_make_sync_client(pagination_server)) + returned_items = [dict(item) for item in client.list_keys(**inputs)] + + assert returned_items == expected_items + + # Until the deprecated `iterate_keys` method is removed, it should behave the same + assert returned_items == [dict(item) for item in client.iterate_keys(**inputs)] + + +@pytest.mark.parametrize( + ('inputs', 'expected_items', 'client_name'), + _generate_test_params(client_set='kvs'), +) +async def test_kvs_list_keys_iterable_async( + pagination_server: HTTPServer, + client_name: str, + inputs: dict, + expected_items: list[dict[str, int]], +) -> None: + """The async KVS client's `list_keys()` return value should iterate across cursor-paginated pages.""" + client: KeyValueStoreClientAsync = _CLIENT_FACTORIES[client_name](_make_async_client(pagination_server)) + returned_items = [dict(item) async for item in client.list_keys(**inputs)] + + assert returned_items == expected_items + + # Until the deprecated `iterate_keys` method is removed, it should behave the same + assert returned_items == [dict(item) async for item in client.iterate_keys(**inputs)] + + +@pytest.mark.parametrize( + ('inputs', 'expected_items', 'client_name'), + _generate_test_params(client_set='rq'), +) +def test_rq_list_requests_iterable( + pagination_server: HTTPServer, + client_name: str, + inputs: dict, + expected_items: list[dict[str, int]], +) -> None: + """The sync RQ client's `list_requests()` return value should iterate across cursor-paginated pages.""" + client: RequestQueueClient = _CLIENT_FACTORIES[client_name](_make_sync_client(pagination_server)) + returned_items = [dict(item) for item in client.list_requests(**inputs)] + assert returned_items == expected_items + + +@pytest.mark.parametrize( + ('inputs', 'expected_items', 'client_name'), + _generate_test_params(client_set='rq'), +) +async def test_rq_list_requests_iterable_async( + pagination_server: HTTPServer, + client_name: str, + inputs: dict, + expected_items: list[dict[str, int]], +) -> None: + """The async RQ client's `list_requests()` return value should iterate across cursor-paginated pages.""" + client: RequestQueueClientAsync = _CLIENT_FACTORIES[client_name](_make_async_client(pagination_server)) + returned_items = [dict(item) async for item in client.list_requests(**inputs)] + assert returned_items == expected_items + + +def test_rq_list_requests_rejects_cursor_and_exclusive_start_id() -> None: + """Passing both `cursor` and `exclusive_start_id` is mutually exclusive and must error.""" + client = ApifyClient(token='').request_queue(ID_PLACEHOLDER) + with pytest.raises(ValueError, match='Cannot use both'): + client.list_requests(cursor='a', exclusive_start_id='b') + + +async def test_rq_list_requests_rejects_cursor_and_exclusive_start_id_async() -> None: + """Async variant of the mutual-exclusion check.""" + client = ApifyClientAsync(token='').request_queue(ID_PLACEHOLDER) + with pytest.raises(ValueError, match='Cannot use both'): + client.list_requests(cursor='a', exclusive_start_id='b')