diff --git a/docs/02_concepts/08_pagination.mdx b/docs/02_concepts/08_pagination.mdx
index 9f1da230..247121db 100644
--- a/docs/02_concepts/08_pagination.mdx
+++ b/docs/02_concepts/08_pagination.mdx
@@ -12,10 +12,10 @@ import ApiLink from '@site/src/components/ApiLink';
import PaginationAsyncExample from '!!raw-loader!./code/08_pagination_async.py';
import PaginationSyncExample from '!!raw-loader!./code/08_pagination_sync.py';
-
import IterateItemsAsyncExample from '!!raw-loader!./code/08_iterate_items_async.py';
import IterateItemsSyncExample from '!!raw-loader!./code/08_iterate_items_sync.py';
+
Most methods named `list` or `list_something` in the Apify client return a `ListPage` object. This object provides a consistent interface for working with paginated data and includes the following properties:
- `items` - The main results you're looking for.
@@ -45,7 +45,7 @@ The `ListPage` interface offers several k
## Generator-based iteration
-For most use cases, `iterate_items()` is the recommended way to process all items in a dataset. It handles pagination automatically using a Python generator, fetching items in batches behind the scenes so you don't need to manage offsets or limits yourself.
+You can also use the `list` methods directly in iteration. It handles pagination automatically, fetching items in batches behind the scenes so you don't need to manage offsets or limits yourself.
@@ -60,6 +60,4 @@ For most use cases, `iterate_items()` is the recommended way to process all item
-`iterate_items()` accepts the same filtering parameters as `list_items()` (`clean`, `fields`, `omit`, `unwind`, `skip_empty`, `skip_hidden`), so you can combine automatic pagination with data filtering.
-
-Similarly, `KeyValueStoreClient` provides an `iterate_keys()` method for iterating over all keys in a key-value store without manual pagination.
+Similarly, you can iterate over the return value of `KeyValueStoreClient.list_keys()` to go through all keys in a key-value store without manual pagination. The older `iterate_keys()` method is deprecated.
diff --git a/docs/02_concepts/code/08_iterate_items_async.py b/docs/02_concepts/code/08_iterate_items_async.py
index fba9b5b0..3b64ea46 100644
--- a/docs/02_concepts/code/08_iterate_items_async.py
+++ b/docs/02_concepts/code/08_iterate_items_async.py
@@ -7,6 +7,11 @@ async def main() -> None:
apify_client = ApifyClientAsync(TOKEN)
dataset_client = apify_client.dataset('dataset-id')
- # Iterate through all items automatically.
- async for item in dataset_client.iterate_items():
- print(item)
+ # Define the pagination parameters
+ limit = 1500 # Number of items in total
+ offset = 100 # Starting offset
+
+ # Iterate through items automatically, lazily sending as many API calls
+ # as needed and receiving items in chunks.
+ async for item in dataset_client.list_items(limit=limit, offset=offset):
+ print(item) # Process the item as needed
diff --git a/docs/02_concepts/code/08_iterate_items_sync.py b/docs/02_concepts/code/08_iterate_items_sync.py
index 005c899f..d7b57c22 100644
--- a/docs/02_concepts/code/08_iterate_items_sync.py
+++ b/docs/02_concepts/code/08_iterate_items_sync.py
@@ -7,9 +7,14 @@ def main() -> None:
apify_client = ApifyClient(TOKEN)
dataset_client = apify_client.dataset('dataset-id')
- # Iterate through all items automatically.
- for item in dataset_client.iterate_items():
- print(item)
+ # Define the pagination parameters
+ limit = 1500 # Number of items in total
+ offset = 100 # Starting offset
+
+ # Iterate through items automatically, lazily sending as many API calls
+ # as needed and receiving items in chunks.
+ for item in dataset_client.list_items(limit=limit, offset=offset):
+ print(item) # Process the item as needed
if __name__ == '__main__':
diff --git a/docs/02_concepts/code/08_pagination_async.py b/docs/02_concepts/code/08_pagination_async.py
index 50e9d047..23ac5fde 100644
--- a/docs/02_concepts/code/08_pagination_async.py
+++ b/docs/02_concepts/code/08_pagination_async.py
@@ -10,26 +10,15 @@ async def main() -> None:
dataset_client = apify_client.dataset('dataset-id')
# Define the pagination parameters
- limit = 1000 # Number of items per page
+ limit = 1000 # Number items to request from API
offset = 0 # Starting offset
- all_items = [] # List to store all fetched items
- while True:
- # Fetch a page of items
- response = await dataset_client.list_items(limit=limit, offset=offset)
- items = response.items
- total = response.total
+ # Send single API call to fetch paginated items.
+ # (number of items per single call can be limited by API)
+ paginated_items = await dataset_client.list_items(limit=limit, offset=offset)
- print(f'Fetched {len(items)} items')
+ # Inspect pagination metadata returned by API
+ print(paginated_items.total)
- # Add the fetched items to the complete list
- all_items.extend(items)
-
- # Exit the loop if there are no more items to fetch
- if offset + limit >= total:
- break
-
- # Increment the offset for the next page
- offset += limit
-
- print(f'Overall fetched {len(all_items)} items')
+ for item in paginated_items.items:
+ print(item) # Process the item as needed
diff --git a/docs/02_concepts/code/08_pagination_sync.py b/docs/02_concepts/code/08_pagination_sync.py
index 3beb4fbe..f144339e 100644
--- a/docs/02_concepts/code/08_pagination_sync.py
+++ b/docs/02_concepts/code/08_pagination_sync.py
@@ -10,26 +10,15 @@ def main() -> None:
dataset_client = apify_client.dataset('dataset-id')
# Define the pagination parameters
- limit = 1000 # Number of items per page
+ limit = 1000 # Number items to request from API
offset = 0 # Starting offset
- all_items = [] # List to store all fetched items
- while True:
- # Fetch a page of items
- response = dataset_client.list_items(limit=limit, offset=offset)
- items = response.items
- total = response.total
+ # Send single API call to fetch paginated items.
+ # (number of items per single call can be limited by API)
+ paginated_items = dataset_client.list_items(limit=limit, offset=offset)
- print(f'Fetched {len(items)} items')
+ # Inspect pagination metadata returned by API
+ print(paginated_items.total)
- # Add the fetched items to the complete list
- all_items.extend(items)
-
- # Exit the loop if there are no more items to fetch
- if offset + limit >= total:
- break
-
- # Increment the offset for the next page
- offset += limit
-
- print(f'Overall fetched {len(all_items)} items')
+ for item in paginated_items.items:
+ print(item) # Process the item as needed
diff --git a/scripts/_utils.py b/scripts/_utils.py
index 48612c54..b7d91853 100644
--- a/scripts/_utils.py
+++ b/scripts/_utils.py
@@ -27,6 +27,7 @@
(re.compile(r'\bSynchronous\b'), 'Asynchronous'),
(re.compile(r'Retry a function'), 'Retry an async function'),
(re.compile(r'Function to retry'), 'Async function to retry'),
+ (re.compile(r'returned page also supports iteration: `for'), 'returned page also supports iteration: `async for'),
]
"""Patterns for converting sync docstrings to async docstrings."""
diff --git a/src/apify_client/_pagination.py b/src/apify_client/_pagination.py
new file mode 100644
index 00000000..8cf4671f
--- /dev/null
+++ b/src/apify_client/_pagination.py
@@ -0,0 +1,203 @@
+from __future__ import annotations
+
+import asyncio
+from typing import TYPE_CHECKING, Any, Generic, Protocol, TypeVar
+
+if TYPE_CHECKING:
+ from collections.abc import AsyncIterator, Awaitable, Callable, Coroutine, Generator, Iterator
+
+T = TypeVar('T')
+
+
+class HasItems(Protocol[T]):
+ items: list[T]
+
+
+def _min_for_limit_param(a: int | None, b: int | None) -> int | None:
+ """Return minimum of two limit parameters, treating `None` or `0` as infinity.
+
+ The Apify API treats `0` as no limit for the `limit` parameter, so `0` here means infinity.
+ Returns `None` when both inputs represent infinity.
+ """
+ if a == 0:
+ a = None
+ if b == 0:
+ b = None
+ if a is None:
+ return b
+ if b is None:
+ return a
+ return min(a, b)
+
+
+class _LazyTask(Generic[T]):
+ """Task that is created lazily upon awaiting.
+
+ This allows to reuse the same Task multiple times without the need to schedule the task when it is created.
+ """
+
+ def __init__(self, awaitable: Coroutine[Any, Any, T]) -> None:
+ self._awaitable = awaitable
+ self._task: asyncio.Task[T] | None = None
+
+ def __await__(self) -> Generator[Any, None, T]:
+ if self._task is None:
+ self._task = asyncio.create_task(self._awaitable)
+ return (yield from self._task.__await__())
+
+
+def build_get_iterator(
+ callback: Callable[..., HasItems[T]],
+ first_page: HasItems[T],
+ **kwargs: Any,
+) -> Callable[[], Iterator[T]]:
+ """Build a factory for `Iterator` to yield items across paginated API calls.
+
+ The callback is invoked to lazy fetch items from API.
+
+ There are several optional kwargs that control the pagination, but not all are accepted on each paginated endpoint.
+ Some endpoints do not return all paginated metadata, so the implementation should be resilient to missing fields,
+ but it can use them if available.
+
+ The `total` field from the first page is not trusted for stopping iteration because it may change between calls;
+ iteration stops when a page has no items or when the user-requested `limit` has been reached.
+
+ The `count` field does not count objects returned, but objects scanned by the API. For example when using filters,
+ returned items can be smaller than `count`. Therefore, `count` should be used for correct offset calculation if
+ available.
+
+ Iteration relevant kwargs:
+ chunk_size: Maximum number of items requested per API call during iteration. Pass `0`
+ or `None` to let the API decide (effectively infinity).
+ limit: User-requested total item limit. Stops iteration once this many items are yielded.
+ offset: Starting offset for the first page.
+ **other: Passed through to the callback unchanged.
+ """
+ chunk_size = kwargs.pop('chunk_size', 0) or 0
+ offset = kwargs.get('offset') or 0
+ limit = kwargs.get('limit') or 0
+
+ def get_iterator() -> Iterator[T]:
+ current_page = first_page
+ yield from current_page.items
+
+ fetched_items = getattr(current_page, 'count', len(current_page.items))
+ while current_page.items and (not limit or (limit > fetched_items)):
+ new_kwargs = {
+ **kwargs,
+ 'offset': offset + fetched_items,
+ 'limit': chunk_size if not limit else _min_for_limit_param(limit - fetched_items, chunk_size),
+ }
+ current_page = callback(**new_kwargs)
+ yield from current_page.items
+ fetched_items += getattr(current_page, 'count', len(current_page.items))
+
+ return get_iterator
+
+
+def build_get_iterator_async(
+ callback: Callable[..., Coroutine[Any, Any, HasItems[T]]],
+ fetch_first_page: Awaitable[HasItems[T]],
+ **kwargs: Any,
+) -> Callable[[], AsyncIterator[T]]:
+ """Build a factory for `AsyncIterator` to yield items across paginated API calls.
+
+ Mirrors `build_get_iterator` but for async callbacks.
+ """
+ chunk_size = kwargs.pop('chunk_size', 0) or 0
+ offset = kwargs.get('offset') or 0
+ limit = kwargs.get('limit') or 0
+
+ async def get_async_iterator() -> AsyncIterator[T]:
+ current_page = await fetch_first_page
+ for item in current_page.items:
+ yield item
+
+ fetched_items = getattr(current_page, 'count', len(current_page.items))
+ while current_page.items and (not limit or (limit > fetched_items)):
+ new_kwargs = {
+ **kwargs,
+ 'offset': offset + fetched_items,
+ 'limit': chunk_size if not limit else _min_for_limit_param(limit - fetched_items, chunk_size),
+ }
+ current_page = await callback(**new_kwargs)
+ for item in current_page.items:
+ yield item
+ fetched_items += getattr(current_page, 'count', len(current_page.items))
+
+ return get_async_iterator
+
+
+def build_get_cursor_iterator(
+ callback: Callable[..., HasItems[T]],
+ first_page: HasItems[T],
+ *,
+ cursor_param: str,
+ limit: int | None = None,
+ chunk_size: int | None = None,
+ **kwargs: Any,
+) -> Callable[[], Iterator[T]]:
+ """Build a factory for `Iterator` to yield items across paginated API calls.
+
+ Mirrors `build_get_iterator` but with cursor based pagination.
+
+ The caller is responsible for fetching the first page (typically by calling `callback` with
+ the initial cursor). After each page, `getattr(page, f'next_{cursor_param}')` is consulted
+ to obtain the next cursor; returning `None` ends iteration. The iteration also stops when a
+ page is empty or when the caller-requested `limit` has been reached.
+ """
+ effective_chunk = chunk_size or 0
+ user_limit = limit or 0
+
+ def get_iterator() -> Iterator[T]:
+ current_page = first_page
+ yield from current_page.items
+
+ fetched = len(current_page.items)
+ next_cursor = getattr(current_page, f'next_{cursor_param}')
+
+ while current_page.items and next_cursor is not None and (not user_limit or user_limit > fetched):
+ remaining = (user_limit - fetched) if user_limit else 0
+ next_limit = effective_chunk if not user_limit else _min_for_limit_param(remaining, effective_chunk)
+ current_page = callback(**{**kwargs, cursor_param: next_cursor, 'limit': next_limit})
+ yield from current_page.items
+ fetched += len(current_page.items)
+ next_cursor = getattr(current_page, f'next_{cursor_param}')
+
+ return get_iterator
+
+
+def build_get_cursor_iterator_async(
+ callback: Callable[..., Coroutine[Any, Any, HasItems[T]]],
+ fetch_first_page: Awaitable[HasItems[T]],
+ *,
+ cursor_param: str,
+ limit: int | None = None,
+ chunk_size: int | None = None,
+ **kwargs: Any,
+) -> Callable[[], AsyncIterator[T]]:
+ """Build a factory for `Iterator` to yield items across paginated API calls.
+
+ Mirrors `build_get_cursor_iterator` but for async callbacks.
+ """
+ effective_chunk = chunk_size or 0
+ user_limit = limit or 0
+
+ async def get_async_iterator() -> AsyncIterator[T]:
+ current_page = await fetch_first_page
+ for item in current_page.items:
+ yield item
+
+ fetched = len(current_page.items)
+ next_cursor = getattr(current_page, f'next_{cursor_param}')
+
+ while current_page.items and next_cursor is not None and (not user_limit or user_limit > fetched):
+ remaining = (user_limit - fetched) if user_limit else 0
+ next_limit = effective_chunk if not user_limit else _min_for_limit_param(remaining, effective_chunk)
+ current_page = await callback(**{**kwargs, cursor_param: next_cursor, 'limit': next_limit})
+ for item in current_page.items:
+ yield item
+ fetched += len(current_page.items)
+ next_cursor = getattr(current_page, f'next_{cursor_param}')
+
+ return get_async_iterator
diff --git a/src/apify_client/_pagination_classes.py b/src/apify_client/_pagination_classes.py
new file mode 100644
index 00000000..b8a489d7
--- /dev/null
+++ b/src/apify_client/_pagination_classes.py
@@ -0,0 +1,267 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Any, Generic, TypeVar
+
+if TYPE_CHECKING:
+ from collections.abc import AsyncIterator, Awaitable, Callable, Generator, Iterator
+
+from apify_client._models_generated import (
+ ActorShort,
+ BuildShort,
+ DatasetListItem,
+ EnvVar,
+ KeyValueStore,
+ KeyValueStoreKey,
+ Request,
+ RequestQueueShort,
+ RunShort,
+ ScheduleShort,
+ StoreListActor,
+ TaskShort,
+ Version,
+ WebhookDispatch,
+ WebhookShort,
+)
+
+T = TypeVar('T')
+
+
+@dataclass
+class IterableOf(Generic[T]):
+ _get_iterator: Callable[[], Iterator[T]] = field(repr=False, compare=False)
+
+ def __iter__(self) -> Iterator[T]:
+ return self._get_iterator()
+
+
+@dataclass
+class AsyncIterableOf(Generic[T]):
+ _get_async_iterator: Callable[[], AsyncIterator[T]] = field(repr=False, compare=False)
+
+ def __aiter__(self) -> AsyncIterator[T]:
+ return self._get_async_iterator()
+
+
+@dataclass
+class PageWithItems(Generic[T]):
+ items: list[T]
+
+
+@dataclass
+class PageWithTotal:
+ total: int
+
+
+@dataclass
+class PageWithLimit:
+ limit: int
+
+
+@dataclass
+class PageWithCount:
+ count: int
+
+
+@dataclass
+class PageWithOffset:
+ offset: int
+
+
+@dataclass
+class PageWithDesc:
+ desc: bool
+
+
+@dataclass
+class PageOfItems(PageWithItems[T], PageWithTotal, PageWithLimit, PageWithCount, PageWithOffset, PageWithDesc): ...
+
+
+@dataclass
+class PageOfDatasetItems(PageOfItems[dict[str, Any]]): ...
+
+
+@dataclass
+class PageOfItemsOnlyTotal(PageWithItems[T], PageWithTotal): ...
+
+
+@dataclass
+class PageOfRequests(PageWithItems[Request], PageWithLimit):
+ exclusive_start_id: str | None = None
+ cursor: str | None = None
+ next_cursor: str | None = None
+
+
+@dataclass
+class PageOfKeys(PageWithItems[KeyValueStoreKey], PageWithLimit, PageWithCount):
+ is_truncated: bool
+ exclusive_start_key: str | None = None
+ next_exclusive_start_key: str | None = None
+
+
+@dataclass
+class AwaitablePage(Generic[T]):
+ _awaitable_first_page: Awaitable[PageOfItems[T]] = field(repr=False, compare=False)
+
+ def __await__(self) -> Generator[Any, Any, PageOfItems[T]]:
+ return self._awaitable_first_page.__await__()
+
+
+@dataclass
+class AwaitablePageOnlyTotal(Generic[T]):
+ _awaitable_first_page: Awaitable[PageOfItemsOnlyTotal[T]] = field(repr=False, compare=False)
+
+ def __await__(self) -> Generator[Any, Any, PageOfItemsOnlyTotal[T]]:
+ return self._awaitable_first_page.__await__()
+
+
+@dataclass
+class AwaitablePageOfDatasetItems:
+ _awaitable_first_page: Awaitable[PageOfDatasetItems] = field(repr=False, compare=False)
+
+ def __await__(self) -> Generator[Any, Any, PageOfDatasetItems]:
+ return self._awaitable_first_page.__await__()
+
+
+@dataclass
+class AwaitablePageOfRequests:
+ _awaitable_first_page: Awaitable[PageOfRequests] = field(repr=False, compare=False)
+
+ def __await__(self) -> Generator[Any, Any, PageOfRequests]:
+ return self._awaitable_first_page.__await__()
+
+
+@dataclass
+class AwaitablePageOfKeys:
+ _awaitable_first_page: Awaitable[PageOfKeys] = field(repr=False, compare=False)
+
+ def __await__(self) -> Generator[Any, Any, PageOfKeys]:
+ return self._awaitable_first_page.__await__()
+
+
+@dataclass
+class IterablePageOfActors(PageOfItems[ActorShort], IterableOf[ActorShort]): ...
+
+
+@dataclass
+class IterablePageOfActorsAsync(AwaitablePage[ActorShort], AsyncIterableOf[ActorShort]): ...
+
+
+@dataclass
+class IterablePageOfBuilds(PageOfItems[BuildShort], IterableOf[BuildShort]): ...
+
+
+@dataclass
+class IterablePageOfBuildsAsync(AwaitablePage[BuildShort], AsyncIterableOf[BuildShort]): ...
+
+
+@dataclass
+class IterablePageOfDatasets(PageOfItems[DatasetListItem], IterableOf[DatasetListItem]): ...
+
+
+@dataclass
+class IterablePageOfDatasetsAsync(AwaitablePage[DatasetListItem], AsyncIterableOf[DatasetListItem]): ...
+
+
+@dataclass
+class IterablePageOfDatasetItems(PageOfDatasetItems, IterableOf[dict[str, Any]]): ...
+
+
+@dataclass
+class IterablePageOfDatasetItemsAsync(AwaitablePageOfDatasetItems, AsyncIterableOf[dict[str, Any]]): ...
+
+
+@dataclass
+class IterablePageOfKeyValueStores(PageOfItems[KeyValueStore], IterableOf[KeyValueStore]): ...
+
+
+@dataclass
+class IterablePageOfKeyValueStoresAsync(AwaitablePage[KeyValueStore], AsyncIterableOf[KeyValueStore]): ...
+
+
+@dataclass
+class IterablePageOfRequestQueues(PageOfItems[RequestQueueShort], IterableOf[RequestQueueShort]): ...
+
+
+@dataclass
+class IterablePageOfRequestQueuesAsync(AwaitablePage[RequestQueueShort], AsyncIterableOf[RequestQueueShort]): ...
+
+
+@dataclass
+class IterablePageOfRuns(PageOfItems[RunShort], IterableOf[RunShort]): ...
+
+
+@dataclass
+class IterablePageOfRunsAsync(AwaitablePage[RunShort], AsyncIterableOf[RunShort]): ...
+
+
+@dataclass
+class IterablePageOfSchedules(PageOfItems[ScheduleShort], IterableOf[ScheduleShort]): ...
+
+
+@dataclass
+class IterablePageOfSchedulesAsync(AwaitablePage[ScheduleShort], AsyncIterableOf[ScheduleShort]): ...
+
+
+@dataclass
+class IterablePageOfStoreActors(PageOfItems[StoreListActor], IterableOf[StoreListActor]): ...
+
+
+@dataclass
+class IterablePageOfStoreActorsAsync(AwaitablePage[StoreListActor], AsyncIterableOf[StoreListActor]): ...
+
+
+@dataclass
+class IterablePageOfTasks(PageOfItems[TaskShort], IterableOf[TaskShort]): ...
+
+
+@dataclass
+class IterablePageOfTasksAsync(AwaitablePage[TaskShort], AsyncIterableOf[TaskShort]): ...
+
+
+@dataclass
+class IterablePageOfWebhookDispatches(PageOfItems[WebhookDispatch], IterableOf[WebhookDispatch]): ...
+
+
+@dataclass
+class IterablePageOfWebhookDispatchesAsync(AwaitablePage[WebhookDispatch], AsyncIterableOf[WebhookDispatch]): ...
+
+
+@dataclass
+class IterablePageOfWebhooks(PageOfItems[WebhookShort], IterableOf[WebhookShort]): ...
+
+
+@dataclass
+class IterablePageOfWebhooksAsync(AwaitablePage[WebhookShort], AsyncIterableOf[WebhookShort]): ...
+
+
+@dataclass
+class IterablePageOfEnvVars(PageOfItemsOnlyTotal, IterableOf[EnvVar]): ...
+
+
+@dataclass
+class IterablePageOfEnvVarsAsync(AwaitablePageOnlyTotal[EnvVar], AsyncIterableOf[EnvVar]): ...
+
+
+@dataclass
+class IterablePageOfVersions(PageOfItemsOnlyTotal, IterableOf[Version]): ...
+
+
+@dataclass
+class IterablePageOfVersionsAsync(AwaitablePageOnlyTotal[Version], AsyncIterableOf[Version]): ...
+
+
+@dataclass
+class IterablePageOfRequests(PageOfRequests, IterableOf[Request]): ...
+
+
+@dataclass
+class IterablePageOfRequestsAsync(AwaitablePageOfRequests, AsyncIterableOf[Request]): ...
+
+
+@dataclass
+class IterablePageOfKeys(PageOfKeys, IterableOf[KeyValueStoreKey]): ...
+
+
+@dataclass
+class IterablePageOfKeysAsync(AwaitablePageOfKeys, AsyncIterableOf[KeyValueStoreKey]): ...
diff --git a/src/apify_client/_resource_clients/actor_collection.py b/src/apify_client/_resource_clients/actor_collection.py
index 63a780df..802f0799 100644
--- a/src/apify_client/_resource_clients/actor_collection.py
+++ b/src/apify_client/_resource_clients/actor_collection.py
@@ -10,15 +10,25 @@
CreateActorRequest,
DefaultRunOptions,
ExampleRunInput,
- ListOfActors,
ListOfActorsResponse,
)
+from apify_client._pagination import (
+ _LazyTask,
+ build_get_iterator,
+ build_get_iterator_async,
+)
+from apify_client._pagination_classes import (
+ IterablePageOfActors,
+ IterablePageOfActorsAsync,
+ PageOfItems,
+)
from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync
from apify_client._utils import to_seconds
if TYPE_CHECKING:
from datetime import timedelta
+ from apify_client._models_generated import ActorShort
from apify_client._types import Timeout
_SORT_BY_TO_API: dict[str, str] = {
@@ -55,9 +65,12 @@ def list(
desc: bool | None = None,
sort_by: Literal['created_at', 'last_run_started_at'] | None = 'created_at',
timeout: Timeout = 'medium',
- ) -> ListOfActors:
+ ) -> IterablePageOfActors:
"""List the Actors the user has created or used.
+ The returned page also supports iteration: `for item in client.list(...)` yields individual Actors
+ and transparently fetches further pages from the API.
+
https://docs.apify.com/api/v2#/reference/actors/actor-collection/get-list-of-actors
Args:
@@ -72,8 +85,31 @@ def list(
The list of available Actors matching the specified filters.
"""
api_sort_by = _SORT_BY_TO_API[sort_by] if sort_by is not None else None
- result = self._list(timeout=timeout, my=my, limit=limit, offset=offset, desc=desc, sortBy=api_sort_by)
- return ListOfActorsResponse.model_validate(result).data
+
+ def _callback(**kwargs: Any) -> PageOfItems[ActorShort]:
+ result = self._list(timeout=timeout, my=my, sortBy=api_sort_by, **kwargs)
+ data = ListOfActorsResponse.model_validate(result).data
+ return PageOfItems(
+ items=data.items,
+ count=data.count,
+ limit=data.limit,
+ total=data.total,
+ offset=data.offset,
+ desc=data.desc,
+ )
+
+ first_page = _callback(limit=limit, offset=offset, desc=desc)
+ get_iterator = build_get_iterator(_callback, first_page, limit=limit, offset=offset, desc=desc)
+
+ return IterablePageOfActors(
+ _get_iterator=get_iterator,
+ items=first_page.items,
+ count=first_page.count,
+ limit=first_page.limit,
+ total=first_page.total,
+ offset=first_page.offset,
+ desc=first_page.desc,
+ )
def create(
self,
@@ -192,7 +228,7 @@ def __init__(
**kwargs,
)
- async def list(
+ def list(
self,
*,
my: bool | None = None,
@@ -201,9 +237,12 @@ async def list(
desc: bool | None = None,
sort_by: Literal['created_at', 'last_run_started_at'] | None = 'created_at',
timeout: Timeout = 'medium',
- ) -> ListOfActors:
+ ) -> IterablePageOfActorsAsync:
"""List the Actors the user has created or used.
+ The returned page also supports iteration: `async for item in client.list(...)` yields individual Actors
+ and transparently fetches further pages from the API.
+
https://docs.apify.com/api/v2#/reference/actors/actor-collection/get-list-of-actors
Args:
@@ -218,8 +257,28 @@ async def list(
The list of available Actors matching the specified filters.
"""
api_sort_by = _SORT_BY_TO_API[sort_by] if sort_by is not None else None
- result = await self._list(timeout=timeout, my=my, limit=limit, offset=offset, desc=desc, sortBy=api_sort_by)
- return ListOfActorsResponse.model_validate(result).data
+
+ async def _callback(**kwargs: Any) -> PageOfItems[ActorShort]:
+ result = await self._list(timeout=timeout, my=my, sortBy=api_sort_by, **kwargs)
+ data = ListOfActorsResponse.model_validate(result).data
+ return PageOfItems(
+ items=data.items,
+ count=data.count,
+ limit=data.limit,
+ total=data.total,
+ offset=data.offset,
+ desc=data.desc,
+ )
+
+ fetch_first_page = _LazyTask(_callback(limit=limit, offset=offset, desc=desc))
+ get_async_iterator = build_get_iterator_async(
+ _callback, fetch_first_page, limit=limit, offset=offset, desc=desc
+ )
+
+ return IterablePageOfActorsAsync(
+ _awaitable_first_page=fetch_first_page,
+ _get_async_iterator=get_async_iterator,
+ )
async def create(
self,
diff --git a/src/apify_client/_resource_clients/actor_env_var_collection.py b/src/apify_client/_resource_clients/actor_env_var_collection.py
index d4eb2af5..8cff6522 100644
--- a/src/apify_client/_resource_clients/actor_env_var_collection.py
+++ b/src/apify_client/_resource_clients/actor_env_var_collection.py
@@ -3,7 +3,17 @@
from typing import TYPE_CHECKING, Any
from apify_client._docs import docs_group
-from apify_client._models_generated import EnvVar, EnvVarResponse, ListOfEnvVars, ListOfEnvVarsResponse
+from apify_client._models_generated import EnvVar, EnvVarResponse, ListOfEnvVarsResponse
+from apify_client._pagination import (
+ _LazyTask,
+ build_get_iterator,
+ build_get_iterator_async,
+)
+from apify_client._pagination_classes import (
+ IterablePageOfEnvVars,
+ IterablePageOfEnvVarsAsync,
+ PageOfItemsOnlyTotal,
+)
from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync
if TYPE_CHECKING:
@@ -29,9 +39,12 @@ def __init__(
**kwargs,
)
- def list(self, *, timeout: Timeout = 'short') -> ListOfEnvVars:
+ def list(self, *, timeout: Timeout = 'short') -> IterablePageOfEnvVars:
"""List the available Actor environment variables.
+ The returned page also supports iteration: `for item in client.list()` yields individual environment
+ variables.
+
https://docs.apify.com/api/v2#/reference/actors/environment-variable-collection/get-list-of-environment-variables
Args:
@@ -40,8 +53,20 @@ def list(self, *, timeout: Timeout = 'short') -> ListOfEnvVars:
Returns:
The list of available Actor environment variables.
"""
- result = self._list(timeout=timeout)
- return ListOfEnvVarsResponse.model_validate(result).data
+
+ def _callback(**kwargs: Any) -> PageOfItemsOnlyTotal[EnvVar]:
+ result = self._list(timeout=timeout, **kwargs)
+ data = ListOfEnvVarsResponse.model_validate(result).data
+ return PageOfItemsOnlyTotal(items=data.items, total=data.total)
+
+ first_page = _callback()
+ get_iterator = build_get_iterator(_callback, first_page)
+
+ return IterablePageOfEnvVars(
+ _get_iterator=get_iterator,
+ items=first_page.items,
+ total=first_page.total,
+ )
def create(
self,
@@ -90,9 +115,12 @@ def __init__(
**kwargs,
)
- async def list(self, *, timeout: Timeout = 'short') -> ListOfEnvVars:
+ def list(self, *, timeout: Timeout = 'short') -> IterablePageOfEnvVarsAsync:
"""List the available Actor environment variables.
+ The returned page also supports iteration: `async for item in client.list()` yields individual environment
+ variables.
+
https://docs.apify.com/api/v2#/reference/actors/environment-variable-collection/get-list-of-environment-variables
Args:
@@ -101,8 +129,19 @@ async def list(self, *, timeout: Timeout = 'short') -> ListOfEnvVars:
Returns:
The list of available Actor environment variables.
"""
- result = await self._list(timeout=timeout)
- return ListOfEnvVarsResponse.model_validate(result).data
+
+ async def _callback(**kwargs: Any) -> PageOfItemsOnlyTotal[EnvVar]:
+ result = await self._list(timeout=timeout, **kwargs)
+ data = ListOfEnvVarsResponse.model_validate(result).data
+ return PageOfItemsOnlyTotal(items=data.items, total=data.total)
+
+ fetch_first_page = _LazyTask(_callback())
+ get_async_iterator = build_get_iterator_async(_callback, fetch_first_page)
+
+ return IterablePageOfEnvVarsAsync(
+ _awaitable_first_page=fetch_first_page,
+ _get_async_iterator=get_async_iterator,
+ )
async def create(
self,
diff --git a/src/apify_client/_resource_clients/actor_version_collection.py b/src/apify_client/_resource_clients/actor_version_collection.py
index aac5e4c3..3cd7504e 100644
--- a/src/apify_client/_resource_clients/actor_version_collection.py
+++ b/src/apify_client/_resource_clients/actor_version_collection.py
@@ -8,7 +8,6 @@
from apify_client._models_generated import (
CreateOrUpdateVersionRequest,
EnvVarRequest,
- ListOfVersions,
ListOfVersionsResponse,
SourceCodeFile,
SourceCodeFolder,
@@ -16,6 +15,16 @@
VersionResponse,
VersionSourceType,
)
+from apify_client._pagination import (
+ _LazyTask,
+ build_get_iterator,
+ build_get_iterator_async,
+)
+from apify_client._pagination_classes import (
+ IterablePageOfVersions,
+ IterablePageOfVersionsAsync,
+ PageOfItemsOnlyTotal,
+)
from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync
if TYPE_CHECKING:
@@ -44,9 +53,11 @@ def __init__(
**kwargs,
)
- def list(self, *, timeout: Timeout = 'short') -> ListOfVersions:
+ def list(self, *, timeout: Timeout = 'short') -> IterablePageOfVersions:
"""List the available Actor versions.
+ The returned page also supports iteration: `for item in client.list()` yields individual versions.
+
https://docs.apify.com/api/v2#/reference/actors/version-collection/get-list-of-versions
Args:
@@ -55,8 +66,20 @@ def list(self, *, timeout: Timeout = 'short') -> ListOfVersions:
Returns:
The list of available Actor versions.
"""
- result = self._list(timeout=timeout)
- return ListOfVersionsResponse.model_validate(result).data
+
+ def _callback(**kwargs: Any) -> PageOfItemsOnlyTotal[Version]:
+ result = self._list(timeout=timeout, **kwargs)
+ data = ListOfVersionsResponse.model_validate(result).data
+ return PageOfItemsOnlyTotal(items=data.items, total=data.total)
+
+ first_page = _callback()
+ get_iterator = build_get_iterator(_callback, first_page)
+
+ return IterablePageOfVersions(
+ _get_iterator=get_iterator,
+ items=first_page.items,
+ total=first_page.total,
+ )
def create(
self,
@@ -131,9 +154,11 @@ def __init__(
**kwargs,
)
- async def list(self, *, timeout: Timeout = 'short') -> ListOfVersions:
+ def list(self, *, timeout: Timeout = 'short') -> IterablePageOfVersionsAsync:
"""List the available Actor versions.
+ The returned page also supports iteration: `async for item in client.list()` yields individual versions.
+
https://docs.apify.com/api/v2#/reference/actors/version-collection/get-list-of-versions
Args:
@@ -142,8 +167,19 @@ async def list(self, *, timeout: Timeout = 'short') -> ListOfVersions:
Returns:
The list of available Actor versions.
"""
- result = await self._list(timeout=timeout)
- return ListOfVersionsResponse.model_validate(result).data
+
+ async def _callback(**kwargs: Any) -> PageOfItemsOnlyTotal[Version]:
+ result = await self._list(timeout=timeout, **kwargs)
+ data = ListOfVersionsResponse.model_validate(result).data
+ return PageOfItemsOnlyTotal(items=data.items, total=data.total)
+
+ fetch_first_page = _LazyTask(_callback())
+ get_async_iterator = build_get_iterator_async(_callback, fetch_first_page)
+
+ return IterablePageOfVersionsAsync(
+ _awaitable_first_page=fetch_first_page,
+ _get_async_iterator=get_async_iterator,
+ )
async def create(
self,
diff --git a/src/apify_client/_resource_clients/build_collection.py b/src/apify_client/_resource_clients/build_collection.py
index 6ead2a67..129ba763 100644
--- a/src/apify_client/_resource_clients/build_collection.py
+++ b/src/apify_client/_resource_clients/build_collection.py
@@ -3,10 +3,21 @@
from typing import TYPE_CHECKING, Any
from apify_client._docs import docs_group
-from apify_client._models_generated import ListOfBuilds, ListOfBuildsResponse
+from apify_client._models_generated import ListOfBuildsResponse
+from apify_client._pagination import (
+ _LazyTask,
+ build_get_iterator,
+ build_get_iterator_async,
+)
+from apify_client._pagination_classes import (
+ IterablePageOfBuilds,
+ IterablePageOfBuildsAsync,
+ PageOfItems,
+)
from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync
if TYPE_CHECKING:
+ from apify_client._models_generated import BuildShort
from apify_client._types import Timeout
@@ -36,12 +47,15 @@ def list(
offset: int | None = None,
desc: bool | None = None,
timeout: Timeout = 'medium',
- ) -> ListOfBuilds:
+ ) -> IterablePageOfBuilds:
"""List all Actor builds.
List all Actor builds, either of a single Actor, or all user's Actors, depending on where this client
was initialized from.
+ The returned page also supports iteration: `for item in client.list(...)` yields individual builds
+ and transparently fetches further pages from the API.
+
https://docs.apify.com/api/v2#/reference/actors/build-collection/get-list-of-builds
https://docs.apify.com/api/v2#/reference/actor-builds/build-collection/get-user-builds-list
@@ -54,8 +68,31 @@ def list(
Returns:
The retrieved Actor builds.
"""
- result = self._list(timeout=timeout, limit=limit, offset=offset, desc=desc)
- return ListOfBuildsResponse.model_validate(result).data
+
+ def _callback(**kwargs: Any) -> PageOfItems[BuildShort]:
+ result = self._list(timeout=timeout, **kwargs)
+ data = ListOfBuildsResponse.model_validate(result).data
+ return PageOfItems(
+ items=data.items,
+ count=data.count,
+ limit=data.limit,
+ total=data.total,
+ offset=data.offset,
+ desc=data.desc,
+ )
+
+ first_page = _callback(limit=limit, offset=offset, desc=desc)
+ get_iterator = build_get_iterator(_callback, first_page, limit=limit, offset=offset, desc=desc)
+
+ return IterablePageOfBuilds(
+ _get_iterator=get_iterator,
+ items=first_page.items,
+ count=first_page.count,
+ limit=first_page.limit,
+ total=first_page.total,
+ offset=first_page.offset,
+ desc=first_page.desc,
+ )
@docs_group('Resource clients')
@@ -77,19 +114,22 @@ def __init__(
**kwargs,
)
- async def list(
+ def list(
self,
*,
limit: int | None = None,
offset: int | None = None,
desc: bool | None = None,
timeout: Timeout = 'medium',
- ) -> ListOfBuilds:
+ ) -> IterablePageOfBuildsAsync:
"""List all Actor builds.
List all Actor builds, either of a single Actor, or all user's Actors, depending on where this client
was initialized from.
+ The returned page also supports iteration: `async for item in client.list(...)` yields individual builds
+ and transparently fetches further pages from the API.
+
https://docs.apify.com/api/v2#/reference/actors/build-collection/get-list-of-builds
https://docs.apify.com/api/v2#/reference/actor-builds/build-collection/get-user-builds-list
@@ -102,5 +142,25 @@ async def list(
Returns:
The retrieved Actor builds.
"""
- result = await self._list(timeout=timeout, limit=limit, offset=offset, desc=desc)
- return ListOfBuildsResponse.model_validate(result).data
+
+ async def _callback(**kwargs: Any) -> PageOfItems[BuildShort]:
+ result = await self._list(timeout=timeout, **kwargs)
+ data = ListOfBuildsResponse.model_validate(result).data
+ return PageOfItems(
+ items=data.items,
+ count=data.count,
+ limit=data.limit,
+ total=data.total,
+ offset=data.offset,
+ desc=data.desc,
+ )
+
+ fetch_first_page = _LazyTask(_callback(limit=limit, offset=offset, desc=desc))
+ get_async_iterator = build_get_iterator_async(
+ _callback, fetch_first_page, limit=limit, offset=offset, desc=desc
+ )
+
+ return IterablePageOfBuildsAsync(
+ _awaitable_first_page=fetch_first_page,
+ _get_async_iterator=get_async_iterator,
+ )
diff --git a/src/apify_client/_resource_clients/dataset.py b/src/apify_client/_resource_clients/dataset.py
index 850f32d5..c2d2e0f5 100644
--- a/src/apify_client/_resource_clients/dataset.py
+++ b/src/apify_client/_resource_clients/dataset.py
@@ -2,12 +2,24 @@
import warnings
from contextlib import asynccontextmanager, contextmanager
-from dataclasses import dataclass
from typing import TYPE_CHECKING, Any
from urllib.parse import urlencode, urlparse, urlunparse
+from pydantic import BaseModel
+
from apify_client._docs import docs_group
from apify_client._models_generated import Dataset, DatasetResponse, DatasetStatistics, DatasetStatisticsResponse
+from apify_client._pagination import (
+ _LazyTask,
+ _min_for_limit_param,
+ build_get_iterator,
+ build_get_iterator_async,
+)
+from apify_client._pagination_classes import (
+ IterablePageOfDatasetItems,
+ IterablePageOfDatasetItemsAsync,
+ PageOfDatasetItems,
+)
from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync
from apify_client._utils import (
create_storage_content_signature,
@@ -25,8 +37,7 @@
@docs_group('Other')
-@dataclass
-class DatasetItemsPage:
+class DatasetItemsPage(BaseModel):
"""A page of dataset items returned by the `list_items` method.
Dataset items are arbitrary JSON objects stored in the dataset, so they cannot be
@@ -141,10 +152,14 @@ def list_items(
flatten: list[str] | None = None,
view: str | None = None,
signature: str | None = None,
+ chunk_size: int | None = None,
timeout: Timeout = 'long',
- ) -> DatasetItemsPage:
+ ) -> IterablePageOfDatasetItems:
"""List the items of the dataset.
+ The returned page also supports iteration: `for item in client.list_items(...)` yields individual
+ items and transparently fetches further pages from the API.
+
https://docs.apify.com/api/v2#/reference/datasets/item-collection/get-items
Args:
@@ -174,45 +189,64 @@ def list_items(
flatten: A list of fields that should be flattened.
view: Name of the dataset view to be used.
signature: Signature used to access the items.
+ chunk_size: Maximum number of items requested per API call when iterating. Only relevant when
+ iterating across pages.
timeout: Timeout for the API HTTP request.
Returns:
A page of the list of dataset items according to the specified filters.
"""
- request_params = self._build_params(
- offset=offset,
- limit=limit,
- desc=desc,
- clean=clean,
- fields=fields,
- omit=omit,
- unwind=unwind,
- skipEmpty=skip_empty,
- skipHidden=skip_hidden,
- flatten=flatten,
- view=view,
- signature=signature,
- )
- response = self._http_client.call(
- url=self._build_url('items'),
- method='GET',
- params=request_params,
- timeout=timeout,
- )
+ def _fetch_page(**kwargs: Any) -> PageOfDatasetItems:
+ request_params = self._build_params(
+ desc=desc,
+ clean=clean,
+ fields=fields,
+ omit=omit,
+ unwind=unwind,
+ skipEmpty=skip_empty,
+ skipHidden=skip_hidden,
+ flatten=flatten,
+ view=view,
+ signature=signature,
+ **kwargs,
+ )
- # When using signature, API returns items as list directly
- items = response_to_list(response)
-
- return DatasetItemsPage(
- items=items,
- total=int(response.headers['x-apify-pagination-total']),
- offset=int(response.headers['x-apify-pagination-offset']),
- # x-apify-pagination-count returns invalid values when hidden/empty items are skipped
- count=len(items),
- # API returns 999999999999 when no limit is used
- limit=int(response.headers['x-apify-pagination-limit']),
- desc=response.headers['x-apify-pagination-desc'].lower() == 'true',
+ response = self._http_client.call(
+ url=self._build_url('items'),
+ method='GET',
+ params=request_params,
+ timeout=timeout,
+ )
+
+ # When using signature, API returns items as list directly
+ items = response_to_list(response)
+
+ # When using signature, API returns items as list directly
+
+ return PageOfDatasetItems(
+ items=items,
+ total=int(response.headers['x-apify-pagination-total']),
+ offset=int(response.headers['x-apify-pagination-offset']),
+ # x-apify-pagination-count returns count of processed items, not count of returned items
+ # This makes difference when items were filtered using hidden/empty
+ count=max(int(response.headers['x-apify-pagination-count']), len(items)),
+ # API returns 999999999999 when no limit is used
+ limit=int(response.headers['x-apify-pagination-limit']),
+ desc=response.headers['x-apify-pagination-desc'].lower() == 'true',
+ )
+
+ first_page = _fetch_page(offset=offset, limit=_min_for_limit_param(limit, chunk_size))
+ get_iterator = build_get_iterator(_fetch_page, first_page, offset=offset, limit=limit, chunk_size=chunk_size)
+
+ return IterablePageOfDatasetItems(
+ _get_iterator=get_iterator,
+ items=first_page.items,
+ total=first_page.total,
+ offset=first_page.offset,
+ count=first_page.count,
+ limit=first_page.limit,
+ desc=first_page.desc,
)
def iterate_items(
@@ -229,9 +263,11 @@ def iterate_items(
skip_hidden: bool | None = None,
signature: str | None = None,
timeout: Timeout = 'long',
- ) -> Iterator[dict]:
+ ) -> Iterator[dict[str, Any]]:
"""Iterate over the items in the dataset.
+ Deprecated: iterate the return value of `DatasetClient.list_items()` instead.
+
https://docs.apify.com/api/v2#/reference/datasets/item-collection/get-items
Args:
@@ -264,42 +300,26 @@ def iterate_items(
Yields:
An item from the dataset.
"""
- cache_size = 1000
-
- should_finish = False
- read_items = 0
-
- # We can't rely on DatasetItemsPage.total because that is updated with a delay,
- # so if you try to read the dataset items right after a run finishes, you could miss some.
- # Instead, we just read and read until we reach the limit, or until there are no more items to read.
- while not should_finish:
- effective_limit = cache_size
- if limit is not None:
- if read_items == limit:
- break
- effective_limit = min(cache_size, limit - read_items)
-
- current_items_page = self.list_items(
- offset=offset + read_items,
- limit=effective_limit,
- clean=clean,
- desc=desc,
- fields=fields,
- omit=omit,
- unwind=unwind,
- skip_empty=skip_empty,
- skip_hidden=skip_hidden,
- signature=signature,
- timeout=timeout,
- )
-
- yield from current_items_page.items
-
- current_page_item_count = len(current_items_page.items)
- read_items += current_page_item_count
-
- if current_page_item_count < cache_size:
- should_finish = True
+ warnings.warn(
+ '`DatasetClient.iterate_items()` is deprecated, iterate the return value of '
+ '`DatasetClient.list_items()` instead.',
+ DeprecationWarning,
+ stacklevel=2,
+ )
+ yield from self.list_items(
+ offset=offset,
+ limit=limit,
+ clean=clean,
+ desc=desc,
+ fields=fields,
+ omit=omit,
+ unwind=unwind,
+ skip_empty=skip_empty,
+ skip_hidden=skip_hidden,
+ signature=signature,
+ chunk_size=1000,
+ timeout=timeout,
+ )
def download_items(
self,
@@ -801,7 +821,7 @@ async def delete(self, *, timeout: Timeout = 'short') -> None:
"""
await self._delete(timeout=timeout)
- async def list_items(
+ def list_items(
self,
*,
offset: int | None = None,
@@ -816,10 +836,14 @@ async def list_items(
flatten: list[str] | None = None,
view: str | None = None,
signature: str | None = None,
+ chunk_size: int | None = None,
timeout: Timeout = 'long',
- ) -> DatasetItemsPage:
+ ) -> IterablePageOfDatasetItemsAsync:
"""List the items of the dataset.
+ The returned page also supports iteration: `async for item in client.list_items(...)` yields individual
+ items and transparently fetches further pages from the API.
+
https://docs.apify.com/api/v2#/reference/datasets/item-collection/get-items
Args:
@@ -849,45 +873,64 @@ async def list_items(
flatten: A list of fields that should be flattened.
view: Name of the dataset view to be used.
signature: Signature used to access the items.
+ chunk_size: Maximum number of items requested per API call when iterating. Only relevant when
+ iterating across pages.
timeout: Timeout for the API HTTP request.
Returns:
A page of the list of dataset items according to the specified filters.
"""
- request_params = self._build_params(
- offset=offset,
- limit=limit,
- desc=desc,
- clean=clean,
- fields=fields,
- omit=omit,
- unwind=unwind,
- skipEmpty=skip_empty,
- skipHidden=skip_hidden,
- flatten=flatten,
- view=view,
- signature=signature,
- )
- response = await self._http_client.call(
- url=self._build_url('items'),
- method='GET',
- params=request_params,
- timeout=timeout,
+ async def _fetch_page(
+ *,
+ offset: int | None = None,
+ limit: int | None = None,
+ ) -> PageOfDatasetItems:
+ request_params = self._build_params(
+ offset=offset,
+ limit=limit,
+ desc=desc,
+ clean=clean,
+ fields=fields,
+ omit=omit,
+ unwind=unwind,
+ skipEmpty=skip_empty,
+ skipHidden=skip_hidden,
+ flatten=flatten,
+ view=view,
+ signature=signature,
+ )
+
+ response = await self._http_client.call(
+ url=self._build_url('items'),
+ method='GET',
+ params=request_params,
+ timeout=timeout,
+ )
+
+ # When using signature, API returns items as list directly
+ items = response_to_list(response)
+
+ return PageOfDatasetItems(
+ items=items,
+ total=int(response.headers['x-apify-pagination-total']),
+ offset=int(response.headers['x-apify-pagination-offset']),
+ # x-apify-pagination-count returns count of processed items, not count of returned items
+ # This makes difference when items were filtered using hidden/empty
+ count=max(int(response.headers['x-apify-pagination-count']), len(items)),
+ # API returns 999999999999 when no limit is used
+ limit=int(response.headers['x-apify-pagination-limit']),
+ desc=response.headers['x-apify-pagination-desc'].lower() == 'true',
+ )
+
+ fetch_first_page = _LazyTask(_fetch_page(offset=offset, limit=limit))
+ get_async_iterator = build_get_iterator_async(
+ _fetch_page, fetch_first_page, offset=offset, limit=limit, chunk_size=chunk_size
)
- # When using signature, API returns items as list directly
- items = response_to_list(response)
-
- return DatasetItemsPage(
- items=items,
- total=int(response.headers['x-apify-pagination-total']),
- offset=int(response.headers['x-apify-pagination-offset']),
- # x-apify-pagination-count returns invalid values when hidden/empty items are skipped
- count=len(items),
- # API returns 999999999999 when no limit is used
- limit=int(response.headers['x-apify-pagination-limit']),
- desc=response.headers['x-apify-pagination-desc'].lower() == 'true',
+ return IterablePageOfDatasetItemsAsync(
+ _awaitable_first_page=fetch_first_page,
+ _get_async_iterator=get_async_iterator,
)
async def iterate_items(
@@ -904,9 +947,11 @@ async def iterate_items(
skip_hidden: bool | None = None,
signature: str | None = None,
timeout: Timeout = 'long',
- ) -> AsyncIterator[dict]:
+ ) -> AsyncIterator[dict[str, Any]]:
"""Iterate over the items in the dataset.
+ Deprecated: iterate the return value of `DatasetClientAsync.list_items()` instead.
+
https://docs.apify.com/api/v2#/reference/datasets/item-collection/get-items
Args:
@@ -939,43 +984,27 @@ async def iterate_items(
Yields:
An item from the dataset.
"""
- cache_size = 1000
-
- should_finish = False
- read_items = 0
-
- # We can't rely on DatasetItemsPage.total because that is updated with a delay,
- # so if you try to read the dataset items right after a run finishes, you could miss some.
- # Instead, we just read and read until we reach the limit, or until there are no more items to read.
- while not should_finish:
- effective_limit = cache_size
- if limit is not None:
- if read_items == limit:
- break
- effective_limit = min(cache_size, limit - read_items)
-
- current_items_page = await self.list_items(
- offset=offset + read_items,
- limit=effective_limit,
- clean=clean,
- desc=desc,
- fields=fields,
- omit=omit,
- unwind=unwind,
- skip_empty=skip_empty,
- skip_hidden=skip_hidden,
- signature=signature,
- timeout=timeout,
- )
-
- for item in current_items_page.items:
- yield item
-
- current_page_item_count = len(current_items_page.items)
- read_items += current_page_item_count
-
- if current_page_item_count < cache_size:
- should_finish = True
+ warnings.warn(
+ '`DatasetClientAsync.iterate_items()` is deprecated, iterate the return value of '
+ '`DatasetClientAsync.list_items()` instead.',
+ DeprecationWarning,
+ stacklevel=2,
+ )
+ async for item in self.list_items(
+ offset=offset,
+ limit=limit,
+ clean=clean,
+ desc=desc,
+ fields=fields,
+ omit=omit,
+ unwind=unwind,
+ skip_empty=skip_empty,
+ skip_hidden=skip_hidden,
+ signature=signature,
+ chunk_size=1000,
+ timeout=timeout,
+ ):
+ yield item
async def get_items_as_bytes(
self,
diff --git a/src/apify_client/_resource_clients/dataset_collection.py b/src/apify_client/_resource_clients/dataset_collection.py
index 2ffb71d6..f9473e67 100644
--- a/src/apify_client/_resource_clients/dataset_collection.py
+++ b/src/apify_client/_resource_clients/dataset_collection.py
@@ -6,13 +6,23 @@
from apify_client._models_generated import (
Dataset,
DatasetResponse,
- ListOfDatasets,
ListOfDatasetsResponse,
StorageOwnership,
)
+from apify_client._pagination import (
+ _LazyTask,
+ build_get_iterator,
+ build_get_iterator_async,
+)
+from apify_client._pagination_classes import (
+ IterablePageOfDatasets,
+ IterablePageOfDatasetsAsync,
+ PageOfItems,
+)
from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync
if TYPE_CHECKING:
+ from apify_client._models_generated import DatasetListItem
from apify_client._types import Timeout
@@ -44,9 +54,12 @@ def list(
desc: bool | None = None,
ownership: StorageOwnership | None = None,
timeout: Timeout = 'medium',
- ) -> ListOfDatasets:
+ ) -> IterablePageOfDatasets:
"""List the available datasets.
+ The returned page also supports iteration: `for item in client.list(...)` yields individual datasets
+ and transparently fetches further pages from the API.
+
https://docs.apify.com/api/v2#/reference/datasets/dataset-collection/get-list-of-datasets
Args:
@@ -61,10 +74,31 @@ def list(
Returns:
The list of available datasets matching the specified filters.
"""
- result = self._list(
- timeout=timeout, unnamed=unnamed, limit=limit, offset=offset, desc=desc, ownership=ownership
+
+ def _callback(**kwargs: Any) -> PageOfItems[DatasetListItem]:
+ result = self._list(timeout=timeout, unnamed=unnamed, ownership=ownership, **kwargs)
+ data = ListOfDatasetsResponse.model_validate(result).data
+ return PageOfItems(
+ items=data.items,
+ count=data.count,
+ limit=data.limit,
+ total=data.total,
+ offset=data.offset,
+ desc=data.desc,
+ )
+
+ first_page = _callback(limit=limit, offset=offset, desc=desc)
+ get_iterator = build_get_iterator(_callback, first_page, limit=limit, offset=offset, desc=desc)
+
+ return IterablePageOfDatasets(
+ _get_iterator=get_iterator,
+ items=first_page.items,
+ count=first_page.count,
+ limit=first_page.limit,
+ total=first_page.total,
+ offset=first_page.offset,
+ desc=first_page.desc,
)
- return ListOfDatasetsResponse.model_validate(result).data
def get_or_create(
self,
@@ -108,7 +142,7 @@ def __init__(
**kwargs,
)
- async def list(
+ def list(
self,
*,
unnamed: bool | None = None,
@@ -117,9 +151,12 @@ async def list(
desc: bool | None = None,
ownership: StorageOwnership | None = None,
timeout: Timeout = 'medium',
- ) -> ListOfDatasets:
+ ) -> IterablePageOfDatasetsAsync:
"""List the available datasets.
+ The returned page also supports iteration: `async for item in client.list(...)` yields individual datasets
+ and transparently fetches further pages from the API.
+
https://docs.apify.com/api/v2#/reference/datasets/dataset-collection/get-list-of-datasets
Args:
@@ -134,10 +171,28 @@ async def list(
Returns:
The list of available datasets matching the specified filters.
"""
- result = await self._list(
- timeout=timeout, unnamed=unnamed, limit=limit, offset=offset, desc=desc, ownership=ownership
+
+ async def _callback(**kwargs: Any) -> PageOfItems[DatasetListItem]:
+ result = await self._list(timeout=timeout, unnamed=unnamed, ownership=ownership, **kwargs)
+ data = ListOfDatasetsResponse.model_validate(result).data
+ return PageOfItems(
+ items=data.items,
+ count=data.count,
+ limit=data.limit,
+ total=data.total,
+ offset=data.offset,
+ desc=data.desc,
+ )
+
+ fetch_first_page = _LazyTask(_callback(limit=limit, offset=offset, desc=desc))
+ get_async_iterator = build_get_iterator_async(
+ _callback, fetch_first_page, limit=limit, offset=offset, desc=desc
+ )
+
+ return IterablePageOfDatasetsAsync(
+ _awaitable_first_page=fetch_first_page,
+ _get_async_iterator=get_async_iterator,
)
- return ListOfDatasetsResponse.model_validate(result).data
async def get_or_create(
self,
diff --git a/src/apify_client/_resource_clients/key_value_store.py b/src/apify_client/_resource_clients/key_value_store.py
index 247073c4..a9488ad7 100644
--- a/src/apify_client/_resource_clients/key_value_store.py
+++ b/src/apify_client/_resource_clients/key_value_store.py
@@ -1,6 +1,7 @@
from __future__ import annotations
import re
+import warnings
from contextlib import asynccontextmanager, contextmanager
from http import HTTPStatus
from typing import TYPE_CHECKING, Any
@@ -9,11 +10,20 @@
from apify_client._docs import docs_group
from apify_client._models_generated import (
KeyValueStore,
- KeyValueStoreKey,
KeyValueStoreResponse,
- ListOfKeys,
ListOfKeysResponse,
)
+from apify_client._pagination import (
+ _LazyTask,
+ _min_for_limit_param,
+ build_get_cursor_iterator,
+ build_get_cursor_iterator_async,
+)
+from apify_client._pagination_classes import (
+ IterablePageOfKeys,
+ IterablePageOfKeysAsync,
+ PageOfKeys,
+)
from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync
from apify_client._utils import (
catch_not_found_or_throw,
@@ -29,7 +39,7 @@
from datetime import timedelta
from apify_client._http_clients import HttpResponse
- from apify_client._models_generated import GeneralAccess
+ from apify_client._models_generated import GeneralAccess, KeyValueStoreKey
from apify_client._types import Timeout
@@ -144,92 +154,122 @@ def list_keys(
collection: str | None = None,
prefix: str | None = None,
signature: str | None = None,
+ chunk_size: int | None = None,
timeout: Timeout = 'medium',
- ) -> ListOfKeys:
+ ) -> IterablePageOfKeys:
"""List the keys in the key-value store.
+ The returned page also supports iteration: `for key in client.list_keys(...)` yields individual
+ keys and transparently fetches further pages using cursor-based pagination.
+
https://docs.apify.com/api/v2#/reference/key-value-stores/key-collection/get-list-of-keys
Args:
- limit: Number of keys to be returned. Maximum value is 1000.
+ limit: Total number of keys to yield across all pages when iterating. The API caps each
+ individual request at 1000 keys; use `chunk_size` to control the per-request size.
exclusive_start_key: All keys up to this one (including) are skipped from the result.
collection: The name of the collection in store schema to list keys from.
prefix: The prefix of the keys to be listed.
signature: Signature used to access the items.
+ chunk_size: Maximum number of keys requested per API call when iterating. Capped at
+ 1000 by the API. Only relevant when iterating across pages.
timeout: Timeout for the API HTTP request.
Returns:
The list of keys in the key-value store matching the given arguments.
"""
- request_params = self._build_params(
+
+ def _callback(*, limit: int | None = None, exclusive_start_key: str | None = None) -> PageOfKeys:
+ request_params = self._build_params(
+ limit=limit,
+ exclusiveStartKey=exclusive_start_key,
+ collection=collection,
+ prefix=prefix,
+ signature=signature,
+ )
+ response = self._http_client.call(
+ url=self._build_url('keys'),
+ method='GET',
+ params=request_params,
+ timeout=timeout,
+ )
+ result = response_to_dict(response)
+ data = ListOfKeysResponse.model_validate(result).data
+ return PageOfKeys(
+ items=data.items,
+ count=data.count,
+ limit=data.limit,
+ is_truncated=data.is_truncated,
+ exclusive_start_key=data.exclusive_start_key,
+ next_exclusive_start_key=data.next_exclusive_start_key,
+ )
+
+ first_limit = _min_for_limit_param(limit, chunk_size)
+ first_page = _callback(limit=first_limit, exclusive_start_key=exclusive_start_key)
+ get_iterator = build_get_cursor_iterator(
+ _callback,
+ first_page,
+ cursor_param='exclusive_start_key',
limit=limit,
- exclusiveStartKey=exclusive_start_key,
- collection=collection,
- prefix=prefix,
- signature=signature,
+ chunk_size=chunk_size,
)
- response = self._http_client.call(
- url=self._build_url('keys'),
- method='GET',
- params=request_params,
- timeout=timeout,
+ return IterablePageOfKeys(
+ _get_iterator=get_iterator,
+ items=first_page.items,
+ count=first_page.count,
+ limit=first_page.limit,
+ is_truncated=first_page.is_truncated,
+ exclusive_start_key=first_page.exclusive_start_key,
+ next_exclusive_start_key=first_page.next_exclusive_start_key,
)
- result = response_to_dict(response)
- return ListOfKeysResponse.model_validate(result).data
-
def iterate_keys(
self,
*,
limit: int | None = None,
+ exclusive_start_key: str | None = None,
collection: str | None = None,
prefix: str | None = None,
signature: str | None = None,
+ chunk_size: int | None = 1000,
timeout: Timeout = 'long',
) -> Iterator[KeyValueStoreKey]:
"""Iterate over the keys in the key-value store.
+ Deprecated: iterate the return value of `KeyValueStoreClient.list_keys()` instead.
+
https://docs.apify.com/api/v2#/reference/key-value-stores/key-collection/get-list-of-keys
Args:
- limit: Maximum number of keys to return. By default there is no limit.
+ limit: Total number of keys to yield across all pages. The API caps each individual
+ request at 1000 keys; use `chunk_size` to control the per-request size.
+ exclusive_start_key: All keys up to this one (including) are skipped from the result.
collection: The name of the collection in store schema to list keys from.
prefix: The prefix of the keys to be listed.
signature: Signature used to access the items.
+ chunk_size: Maximum number of keys requested per API call when iterating. Capped at
+ 1000 by the API. Only relevant when iterating across pages.
timeout: Timeout for the API HTTP request.
Yields:
A key from the key-value store.
"""
- cache_size = 1000
- read_keys = 0
- exclusive_start_key: str | None = None
-
- while True:
- effective_limit = cache_size
- if limit is not None:
- if read_keys == limit:
- break
- effective_limit = min(cache_size, limit - read_keys)
-
- current_keys_page = self.list_keys(
- limit=effective_limit,
- exclusive_start_key=exclusive_start_key,
- collection=collection,
- prefix=prefix,
- signature=signature,
- timeout=timeout,
- )
-
- yield from current_keys_page.items
-
- read_keys += len(current_keys_page.items)
-
- if not current_keys_page.is_truncated:
- break
-
- exclusive_start_key = current_keys_page.next_exclusive_start_key
+ warnings.warn(
+ '`KeyValueStoreClient.iterate_keys()` is deprecated, iterate the return value of '
+ '`KeyValueStoreClient.list_keys()` instead.',
+ DeprecationWarning,
+ stacklevel=2,
+ )
+ yield from self.list_keys(
+ limit=limit,
+ exclusive_start_key=exclusive_start_key,
+ collection=collection,
+ prefix=prefix,
+ signature=signature,
+ chunk_size=chunk_size,
+ timeout=timeout,
+ )
def get_record(self, key: str, *, signature: str | None = None, timeout: Timeout = 'long') -> dict | None:
"""Retrieve the given record from the key-value store.
@@ -461,7 +501,7 @@ def create_keys_public_url(
Any other options (like `limit` or `prefix`) will be included as query parameters in the URL.
Args:
- limit: Number of keys to be returned. Maximum value is 1000.
+ limit: Number of keys to be returned by the signed request. Maximum value is 1000.
exclusive_start_key: All keys up to this one (including) are skipped from the result.
collection: The name of the collection in store schema to list keys from.
prefix: The prefix of the keys to be listed.
@@ -566,7 +606,7 @@ async def delete(self, *, timeout: Timeout = 'short') -> None:
"""
await self._delete(timeout=timeout)
- async def list_keys(
+ def list_keys(
self,
*,
limit: int | None = None,
@@ -574,93 +614,118 @@ async def list_keys(
collection: str | None = None,
prefix: str | None = None,
signature: str | None = None,
+ chunk_size: int | None = None,
timeout: Timeout = 'medium',
- ) -> ListOfKeys:
+ ) -> IterablePageOfKeysAsync:
"""List the keys in the key-value store.
+ The returned page also supports iteration: `async for key in client.list_keys(...)` yields individual
+ keys and transparently fetches further pages using cursor-based pagination.
+
https://docs.apify.com/api/v2#/reference/key-value-stores/key-collection/get-list-of-keys
Args:
- limit: Number of keys to be returned. Maximum value is 1000.
+ limit: Total number of keys to yield across all pages when iterating. The API caps each
+ individual request at 1000 keys; use `chunk_size` to control the per-request size.
exclusive_start_key: All keys up to this one (including) are skipped from the result.
collection: The name of the collection in store schema to list keys from.
prefix: The prefix of the keys to be listed.
signature: Signature used to access the items.
+ chunk_size: Maximum number of keys requested per API call when iterating. Capped at
+ 1000 by the API. Only relevant when iterating across pages.
timeout: Timeout for the API HTTP request.
Returns:
The list of keys in the key-value store matching the given arguments.
"""
- request_params = self._build_params(
+
+ async def _callback(*, limit: int | None = None, exclusive_start_key: str | None = None) -> PageOfKeys:
+ request_params = self._build_params(
+ limit=limit,
+ exclusiveStartKey=exclusive_start_key,
+ collection=collection,
+ prefix=prefix,
+ signature=signature,
+ )
+ response = await self._http_client.call(
+ url=self._build_url('keys'),
+ method='GET',
+ params=request_params,
+ timeout=timeout,
+ )
+ result = response_to_dict(response)
+ data = ListOfKeysResponse.model_validate(result).data
+ return PageOfKeys(
+ items=data.items,
+ count=data.count,
+ limit=data.limit,
+ is_truncated=data.is_truncated,
+ exclusive_start_key=data.exclusive_start_key,
+ next_exclusive_start_key=data.next_exclusive_start_key,
+ )
+
+ first_limit = _min_for_limit_param(limit, chunk_size)
+ fetch_first_page = _LazyTask(_callback(limit=first_limit, exclusive_start_key=exclusive_start_key))
+ get_async_iterator = build_get_cursor_iterator_async(
+ _callback,
+ fetch_first_page,
+ cursor_param='exclusive_start_key',
limit=limit,
- exclusiveStartKey=exclusive_start_key,
- collection=collection,
- prefix=prefix,
- signature=signature,
+ chunk_size=chunk_size,
)
- response = await self._http_client.call(
- url=self._build_url('keys'),
- method='GET',
- params=request_params,
- timeout=timeout,
+ return IterablePageOfKeysAsync(
+ _awaitable_first_page=fetch_first_page,
+ _get_async_iterator=get_async_iterator,
)
- result = response_to_dict(response)
- return ListOfKeysResponse.model_validate(result).data
-
async def iterate_keys(
self,
*,
limit: int | None = None,
+ exclusive_start_key: str | None = None,
collection: str | None = None,
prefix: str | None = None,
signature: str | None = None,
+ chunk_size: int | None = 1000,
timeout: Timeout = 'long',
) -> AsyncIterator[KeyValueStoreKey]:
"""Iterate over the keys in the key-value store.
+ Deprecated: iterate the return value of `KeyValueStoreClientAsync.list_keys()` instead.
+
https://docs.apify.com/api/v2#/reference/key-value-stores/key-collection/get-list-of-keys
Args:
- limit: Maximum number of keys to return. By default there is no limit.
+ limit: Total number of keys to yield across all pages. The API caps each individual
+ request at 1000 keys; use `chunk_size` to control the per-request size.
+ exclusive_start_key: All keys up to this one (including) are skipped from the result.
collection: The name of the collection in store schema to list keys from.
prefix: The prefix of the keys to be listed.
signature: Signature used to access the items.
+ chunk_size: Maximum number of keys requested per API call when iterating. Capped at
+ 1000 by the API. Only relevant when iterating across pages.
timeout: Timeout for the API HTTP request.
Yields:
A key from the key-value store.
"""
- cache_size = 1000
- read_keys = 0
- exclusive_start_key: str | None = None
-
- while True:
- effective_limit = cache_size
- if limit is not None:
- if read_keys == limit:
- break
- effective_limit = min(cache_size, limit - read_keys)
-
- current_keys_page = await self.list_keys(
- limit=effective_limit,
- exclusive_start_key=exclusive_start_key,
- collection=collection,
- prefix=prefix,
- signature=signature,
- timeout=timeout,
- )
-
- for key in current_keys_page.items:
- yield key
-
- read_keys += len(current_keys_page.items)
-
- if not current_keys_page.is_truncated:
- break
-
- exclusive_start_key = current_keys_page.next_exclusive_start_key
+ warnings.warn(
+ '`KeyValueStoreClientAsync.iterate_keys()` is deprecated, iterate the return value of '
+ '`KeyValueStoreClientAsync.list_keys()` instead.',
+ DeprecationWarning,
+ stacklevel=2,
+ )
+ async for key in self.list_keys(
+ limit=limit,
+ exclusive_start_key=exclusive_start_key,
+ collection=collection,
+ prefix=prefix,
+ signature=signature,
+ chunk_size=chunk_size,
+ timeout=timeout,
+ ):
+ yield key
async def get_record(self, key: str, *, signature: str | None = None, timeout: Timeout = 'long') -> dict | None:
"""Retrieve the given record from the key-value store.
@@ -894,7 +959,7 @@ async def create_keys_public_url(
Any other options (like `limit` or `prefix`) will be included as query parameters in the URL.
Args:
- limit: Number of keys to be returned. Maximum value is 1000.
+ limit: Number of keys to be returned by the signed request. Maximum value is 1000.
exclusive_start_key: All keys up to this one (including) are skipped from the result.
collection: The name of the collection in store schema to list keys from.
prefix: The prefix of the keys to be listed.
diff --git a/src/apify_client/_resource_clients/key_value_store_collection.py b/src/apify_client/_resource_clients/key_value_store_collection.py
index f221a192..8974edac 100644
--- a/src/apify_client/_resource_clients/key_value_store_collection.py
+++ b/src/apify_client/_resource_clients/key_value_store_collection.py
@@ -6,10 +6,19 @@
from apify_client._models_generated import (
KeyValueStore,
KeyValueStoreResponse,
- ListOfKeyValueStores,
ListOfKeyValueStoresResponse,
StorageOwnership,
)
+from apify_client._pagination import (
+ _LazyTask,
+ build_get_iterator,
+ build_get_iterator_async,
+)
+from apify_client._pagination_classes import (
+ IterablePageOfKeyValueStores,
+ IterablePageOfKeyValueStoresAsync,
+ PageOfItems,
+)
from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync
if TYPE_CHECKING:
@@ -44,9 +53,12 @@ def list(
desc: bool | None = None,
ownership: StorageOwnership | None = None,
timeout: Timeout = 'medium',
- ) -> ListOfKeyValueStores:
+ ) -> IterablePageOfKeyValueStores:
"""List the available key-value stores.
+ The returned page also supports iteration: `for item in client.list(...)` yields individual
+ key-value stores and transparently fetches further pages from the API.
+
https://docs.apify.com/api/v2#/reference/key-value-stores/store-collection/get-list-of-key-value-stores
Args:
@@ -61,10 +73,31 @@ def list(
Returns:
The list of available key-value stores matching the specified filters.
"""
- result = self._list(
- timeout=timeout, unnamed=unnamed, limit=limit, offset=offset, desc=desc, ownership=ownership
+
+ def _callback(**kwargs: Any) -> PageOfItems[KeyValueStore]:
+ result = self._list(timeout=timeout, unnamed=unnamed, ownership=ownership, **kwargs)
+ data = ListOfKeyValueStoresResponse.model_validate(result).data
+ return PageOfItems(
+ items=data.items,
+ count=data.count,
+ limit=data.limit,
+ total=data.total,
+ offset=data.offset,
+ desc=data.desc,
+ )
+
+ first_page = _callback(limit=limit, offset=offset, desc=desc)
+ get_iterator = build_get_iterator(_callback, first_page, limit=limit, offset=offset, desc=desc)
+
+ return IterablePageOfKeyValueStores(
+ _get_iterator=get_iterator,
+ items=first_page.items,
+ count=first_page.count,
+ limit=first_page.limit,
+ total=first_page.total,
+ offset=first_page.offset,
+ desc=first_page.desc,
)
- return ListOfKeyValueStoresResponse.model_validate(result).data
def get_or_create(
self,
@@ -108,7 +141,7 @@ def __init__(
**kwargs,
)
- async def list(
+ def list(
self,
*,
unnamed: bool | None = None,
@@ -117,9 +150,12 @@ async def list(
desc: bool | None = None,
ownership: StorageOwnership | None = None,
timeout: Timeout = 'medium',
- ) -> ListOfKeyValueStores:
+ ) -> IterablePageOfKeyValueStoresAsync:
"""List the available key-value stores.
+ The returned page also supports iteration: `async for item in client.list(...)` yields individual
+ key-value stores and transparently fetches further pages from the API.
+
https://docs.apify.com/api/v2#/reference/key-value-stores/store-collection/get-list-of-key-value-stores
Args:
@@ -134,10 +170,28 @@ async def list(
Returns:
The list of available key-value stores matching the specified filters.
"""
- result = await self._list(
- timeout=timeout, unnamed=unnamed, limit=limit, offset=offset, desc=desc, ownership=ownership
+
+ async def _callback(**kwargs: Any) -> PageOfItems[KeyValueStore]:
+ result = await self._list(timeout=timeout, unnamed=unnamed, ownership=ownership, **kwargs)
+ data = ListOfKeyValueStoresResponse.model_validate(result).data
+ return PageOfItems(
+ items=data.items,
+ count=data.count,
+ limit=data.limit,
+ total=data.total,
+ offset=data.offset,
+ desc=data.desc,
+ )
+
+ fetch_first_page = _LazyTask(_callback(limit=limit, offset=offset, desc=desc))
+ get_async_iterator = build_get_iterator_async(
+ _callback, fetch_first_page, limit=limit, offset=offset, desc=desc
+ )
+
+ return IterablePageOfKeyValueStoresAsync(
+ _awaitable_first_page=fetch_first_page,
+ _get_async_iterator=get_async_iterator,
)
- return ListOfKeyValueStoresResponse.model_validate(result).data
async def get_or_create(
self,
diff --git a/src/apify_client/_resource_clients/request_queue.py b/src/apify_client/_resource_clients/request_queue.py
index cd00a1cd..d2c7686b 100644
--- a/src/apify_client/_resource_clients/request_queue.py
+++ b/src/apify_client/_resource_clients/request_queue.py
@@ -20,7 +20,6 @@
BatchDeleteResult,
HeadAndLockResponse,
HeadResponse,
- ListOfRequests,
ListOfRequestsResponse,
LockedRequestQueueHead,
ProlongRequestLockResponse,
@@ -35,6 +34,17 @@
UnlockRequestsResponse,
UnlockRequestsResult,
)
+from apify_client._pagination import (
+ _LazyTask,
+ _min_for_limit_param,
+ build_get_cursor_iterator,
+ build_get_cursor_iterator_async,
+)
+from apify_client._pagination_classes import (
+ IterablePageOfRequests,
+ IterablePageOfRequestsAsync,
+ PageOfRequests,
+)
from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync
from apify_client._utils import catch_not_found_or_throw, response_to_dict, to_seconds
from apify_client.errors import ApifyApiError
@@ -500,20 +510,28 @@ def list_requests(
*,
limit: int | None = None,
filter: list[Literal['pending', 'locked']] | None = None, # noqa: A002
- timeout: Timeout = 'medium',
cursor: str | None = None,
exclusive_start_id: str | None = None,
- ) -> ListOfRequests:
+ chunk_size: int | None = None,
+ timeout: Timeout = 'medium',
+ ) -> IterablePageOfRequests:
"""List requests in the queue.
+ The returned page also supports iteration: `for request in client.list_requests(...)` yields
+ individual requests and transparently fetches further pages using the opaque `cursor`
+ returned by the API.
+
https://docs.apify.com/api/v2#/reference/request-queues/request-collection/list-requests
Args:
limit: How many requests to retrieve.
filter: List of request states to use as a filter. Multiple values mean union of the given filters.
- timeout: Timeout for the API HTTP request.
- cursor: A token returned in previous API response, to continue listing next page of requests
+ cursor: A token returned in a previous API response, to continue listing the next page of requests.
exclusive_start_id: (deprecated) All requests up to this one (including) are skipped from the result.
+ Only applied to the first page fetched; subsequent pages during iteration use `cursor`.
+ chunk_size: Maximum number of requests requested per API call when iterating. Only
+ relevant when iterating across pages.
+ timeout: Timeout for the API HTTP request.
"""
if exclusive_start_id and cursor:
raise ValueError('Cannot use both `exclusive_start_id` and `cursor` for paginating requests.')
@@ -525,24 +543,55 @@ def list_requests(
stacklevel=2,
)
- request_params = self._build_params(
+ def _callback(*, limit: int | None = None, cursor: str | None = None) -> PageOfRequests:
+ # `exclusive_start_id` is honored only on the first page (when no cursor has been
+ # produced by the server yet); subsequent pages rely on the opaque `cursor`.
+ request_params = self._build_params(
+ limit=limit,
+ filter=','.join(filter) if filter else None,
+ clientKey=self.client_key,
+ exclusiveStartId=exclusive_start_id if cursor is None else None,
+ cursor=cursor,
+ )
+ response = self._http_client.call(
+ url=self._build_url('requests'),
+ method='GET',
+ params=request_params,
+ timeout=timeout,
+ )
+ result = response_to_dict(response)
+ data = ListOfRequestsResponse.model_validate(result).data
+ with warnings.catch_warnings():
+ # `exclusive_start_id` is deprecated on the API model; reading triggers a warning.
+ warnings.simplefilter('ignore', DeprecationWarning)
+ exclusive_start_id_value = data.exclusive_start_id
+ return PageOfRequests(
+ items=data.items,
+ limit=data.limit,
+ exclusive_start_id=exclusive_start_id_value,
+ cursor=data.cursor,
+ next_cursor=data.next_cursor,
+ )
+
+ first_limit = _min_for_limit_param(limit, chunk_size)
+ first_page = _callback(limit=first_limit, cursor=cursor)
+ get_iterator = build_get_cursor_iterator(
+ _callback,
+ first_page,
+ cursor_param='cursor',
limit=limit,
- filter=','.join(filter) if filter else None,
- clientKey=self.client_key,
- exclusiveStartId=exclusive_start_id,
- cursor=cursor,
+ chunk_size=chunk_size,
)
- response = self._http_client.call(
- url=self._build_url('requests'),
- method='GET',
- params=request_params,
- timeout=timeout,
+ return IterablePageOfRequests(
+ _get_iterator=get_iterator,
+ items=first_page.items,
+ limit=first_page.limit,
+ exclusive_start_id=first_page.exclusive_start_id,
+ cursor=first_page.cursor,
+ next_cursor=first_page.next_cursor,
)
- result = response_to_dict(response)
- return ListOfRequestsResponse.model_validate(result).data
-
def unlock_requests(self: RequestQueueClient, *, timeout: Timeout = 'long') -> UnlockRequestsResult:
"""Unlock all requests in the queue, which were locked by the same clientKey or from the same Actor run.
@@ -1058,25 +1107,33 @@ async def batch_delete_requests(
result = response_to_dict(response)
return BatchDeleteResponse.model_validate(result).data
- async def list_requests(
+ def list_requests(
self,
*,
limit: int | None = None,
filter: list[Literal['pending', 'locked']] | None = None, # noqa: A002
- timeout: Timeout = 'medium',
cursor: str | None = None,
exclusive_start_id: str | None = None,
- ) -> ListOfRequests:
+ chunk_size: int | None = None,
+ timeout: Timeout = 'medium',
+ ) -> IterablePageOfRequestsAsync:
"""List requests in the queue.
+ The returned page also supports iteration: `async for request in client.list_requests(...)` yields
+ individual requests and transparently fetches further pages using the opaque `cursor`
+ returned by the API.
+
https://docs.apify.com/api/v2#/reference/request-queues/request-collection/list-requests
Args:
limit: How many requests to retrieve.
filter: List of request states to use as a filter. Multiple values mean union of the given filters.
- timeout: Timeout for the API HTTP request.
- cursor: A token returned in previous API response, to continue listing next page of requests
+ cursor: A token returned in a previous API response, to continue listing the next page of requests.
exclusive_start_id: (deprecated) All requests up to this one (including) are skipped from the result.
+ Only applied to the first page fetched; subsequent pages during iteration use `cursor`.
+ chunk_size: Maximum number of requests requested per API call when iterating. Only
+ relevant when iterating across pages.
+ timeout: Timeout for the API HTTP request.
"""
if exclusive_start_id and cursor:
raise ValueError('Cannot use both `exclusive_start_id` and `cursor` for paginating requests.')
@@ -1088,24 +1145,51 @@ async def list_requests(
stacklevel=2,
)
- request_params = self._build_params(
+ async def _callback(*, limit: int | None = None, cursor: str | None = None) -> PageOfRequests:
+ # `exclusive_start_id` is honored only on the first page (when no cursor has been
+ # produced by the server yet); subsequent pages rely on the opaque `cursor`.
+ request_params = self._build_params(
+ limit=limit,
+ filter=','.join(filter) if filter else None,
+ clientKey=self.client_key,
+ exclusiveStartId=exclusive_start_id if cursor is None else None,
+ cursor=cursor,
+ )
+ response = await self._http_client.call(
+ url=self._build_url('requests'),
+ method='GET',
+ params=request_params,
+ timeout=timeout,
+ )
+ result = response_to_dict(response)
+ data = ListOfRequestsResponse.model_validate(result).data
+ with warnings.catch_warnings():
+ # `exclusive_start_id` is deprecated on the API model; reading triggers a warning.
+ warnings.simplefilter('ignore', DeprecationWarning)
+ exclusive_start_id_value = data.exclusive_start_id
+ return PageOfRequests(
+ items=data.items,
+ limit=data.limit,
+ exclusive_start_id=exclusive_start_id_value,
+ cursor=data.cursor,
+ next_cursor=data.next_cursor,
+ )
+
+ first_limit = _min_for_limit_param(limit, chunk_size)
+ fetch_first_page = _LazyTask(_callback(limit=first_limit, cursor=cursor))
+ get_async_iterator = build_get_cursor_iterator_async(
+ _callback,
+ fetch_first_page,
+ cursor_param='cursor',
limit=limit,
- filter=','.join(filter) if filter else None,
- clientKey=self.client_key,
- exclusiveStartId=exclusive_start_id,
- cursor=cursor,
+ chunk_size=chunk_size,
)
- response = await self._http_client.call(
- url=self._build_url('requests'),
- method='GET',
- params=request_params,
- timeout=timeout,
+ return IterablePageOfRequestsAsync(
+ _awaitable_first_page=fetch_first_page,
+ _get_async_iterator=get_async_iterator,
)
- result = response_to_dict(response)
- return ListOfRequestsResponse.model_validate(result).data
-
async def unlock_requests(
self: RequestQueueClientAsync,
*,
diff --git a/src/apify_client/_resource_clients/request_queue_collection.py b/src/apify_client/_resource_clients/request_queue_collection.py
index 1d06fcbc..38ef8552 100644
--- a/src/apify_client/_resource_clients/request_queue_collection.py
+++ b/src/apify_client/_resource_clients/request_queue_collection.py
@@ -4,15 +4,25 @@
from apify_client._docs import docs_group
from apify_client._models_generated import (
- ListOfRequestQueues,
ListOfRequestQueuesResponse,
RequestQueue,
RequestQueueResponse,
StorageOwnership,
)
+from apify_client._pagination import (
+ _LazyTask,
+ build_get_iterator,
+ build_get_iterator_async,
+)
+from apify_client._pagination_classes import (
+ IterablePageOfRequestQueues,
+ IterablePageOfRequestQueuesAsync,
+ PageOfItems,
+)
from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync
if TYPE_CHECKING:
+ from apify_client._models_generated import RequestQueueShort
from apify_client._types import Timeout
@@ -44,9 +54,12 @@ def list(
desc: bool | None = None,
ownership: StorageOwnership | None = None,
timeout: Timeout = 'medium',
- ) -> ListOfRequestQueues:
+ ) -> IterablePageOfRequestQueues:
"""List the available request queues.
+ The returned page also supports iteration: `for item in client.list(...)` yields individual
+ request queues and transparently fetches further pages from the API.
+
https://docs.apify.com/api/v2#/reference/request-queues/queue-collection/get-list-of-request-queues
Args:
@@ -61,10 +74,31 @@ def list(
Returns:
The list of available request queues matching the specified filters.
"""
- result = self._list(
- timeout=timeout, unnamed=unnamed, limit=limit, offset=offset, desc=desc, ownership=ownership
+
+ def _callback(**kwargs: Any) -> PageOfItems[RequestQueueShort]:
+ result = self._list(timeout=timeout, unnamed=unnamed, ownership=ownership, **kwargs)
+ data = ListOfRequestQueuesResponse.model_validate(result).data
+ return PageOfItems(
+ items=data.items,
+ count=data.count,
+ limit=data.limit,
+ total=data.total,
+ offset=data.offset,
+ desc=data.desc,
+ )
+
+ first_page = _callback(limit=limit, offset=offset, desc=desc)
+ get_iterator = build_get_iterator(_callback, first_page, limit=limit, offset=offset, desc=desc)
+
+ return IterablePageOfRequestQueues(
+ _get_iterator=get_iterator,
+ items=first_page.items,
+ count=first_page.count,
+ limit=first_page.limit,
+ total=first_page.total,
+ offset=first_page.offset,
+ desc=first_page.desc,
)
- return ListOfRequestQueuesResponse.model_validate(result).data
def get_or_create(
self,
@@ -106,7 +140,7 @@ def __init__(
**kwargs,
)
- async def list(
+ def list(
self,
*,
unnamed: bool | None = None,
@@ -115,9 +149,12 @@ async def list(
desc: bool | None = None,
ownership: StorageOwnership | None = None,
timeout: Timeout = 'medium',
- ) -> ListOfRequestQueues:
+ ) -> IterablePageOfRequestQueuesAsync:
"""List the available request queues.
+ The returned page also supports iteration: `async for item in client.list(...)` yields individual
+ request queues and transparently fetches further pages from the API.
+
https://docs.apify.com/api/v2#/reference/request-queues/queue-collection/get-list-of-request-queues
Args:
@@ -132,10 +169,28 @@ async def list(
Returns:
The list of available request queues matching the specified filters.
"""
- result = await self._list(
- timeout=timeout, unnamed=unnamed, limit=limit, offset=offset, desc=desc, ownership=ownership
+
+ async def _callback(**kwargs: Any) -> PageOfItems[RequestQueueShort]:
+ result = await self._list(timeout=timeout, unnamed=unnamed, ownership=ownership, **kwargs)
+ data = ListOfRequestQueuesResponse.model_validate(result).data
+ return PageOfItems(
+ items=data.items,
+ count=data.count,
+ limit=data.limit,
+ total=data.total,
+ offset=data.offset,
+ desc=data.desc,
+ )
+
+ fetch_first_page = _LazyTask(_callback(limit=limit, offset=offset, desc=desc))
+ get_async_iterator = build_get_iterator_async(
+ _callback, fetch_first_page, limit=limit, offset=offset, desc=desc
+ )
+
+ return IterablePageOfRequestQueuesAsync(
+ _awaitable_first_page=fetch_first_page,
+ _get_async_iterator=get_async_iterator,
)
- return ListOfRequestQueuesResponse.model_validate(result).data
async def get_or_create(
self,
diff --git a/src/apify_client/_resource_clients/run_collection.py b/src/apify_client/_resource_clients/run_collection.py
index b63b3fc9..17f51002 100644
--- a/src/apify_client/_resource_clients/run_collection.py
+++ b/src/apify_client/_resource_clients/run_collection.py
@@ -3,13 +3,23 @@
from typing import TYPE_CHECKING, Any
from apify_client._docs import docs_group
-from apify_client._models_generated import ListOfRuns, ListOfRunsResponse
+from apify_client._models_generated import ListOfRunsResponse
+from apify_client._pagination import (
+ _LazyTask,
+ build_get_iterator,
+ build_get_iterator_async,
+)
+from apify_client._pagination_classes import (
+ IterablePageOfRuns,
+ IterablePageOfRunsAsync,
+ PageOfItems,
+)
from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync
if TYPE_CHECKING:
from datetime import datetime
- from apify_client._models_generated import ActorJobStatus
+ from apify_client._models_generated import ActorJobStatus, RunShort
from apify_client._types import Timeout
@@ -42,12 +52,15 @@ def list(
started_before: str | datetime | None = None,
started_after: str | datetime | None = None,
timeout: Timeout = 'medium',
- ) -> ListOfRuns:
+ ) -> IterablePageOfRuns:
"""List all Actor runs.
List all Actor runs, either of a single Actor, or all user's Actors, depending on where this client
was initialized from.
+ The returned page also supports iteration: `for item in client.list(...)` yields individual runs
+ and transparently fetches further pages from the API.
+
https://docs.apify.com/api/v2#/reference/actors/run-collection/get-list-of-runs
https://docs.apify.com/api/v2#/reference/actor-runs/run-collection/get-user-runs-list
@@ -65,16 +78,36 @@ def list(
"""
status_param = list(status) if isinstance(status, list) else status
- result = self._list(
- timeout=timeout,
- limit=limit,
- offset=offset,
- desc=desc,
- status=status_param,
- startedBefore=started_before,
- startedAfter=started_after,
+ def _callback(**kwargs: Any) -> PageOfItems[RunShort]:
+ result = self._list(
+ timeout=timeout,
+ status=status_param,
+ startedBefore=started_before,
+ startedAfter=started_after,
+ **kwargs,
+ )
+ data = ListOfRunsResponse.model_validate(result).data
+ return PageOfItems(
+ items=data.items,
+ count=data.count,
+ limit=data.limit,
+ total=data.total,
+ offset=data.offset,
+ desc=data.desc,
+ )
+
+ first_page = _callback(limit=limit, offset=offset, desc=desc)
+ get_iterator = build_get_iterator(_callback, first_page, limit=limit, offset=offset, desc=desc)
+
+ return IterablePageOfRuns(
+ _get_iterator=get_iterator,
+ items=first_page.items,
+ count=first_page.count,
+ limit=first_page.limit,
+ total=first_page.total,
+ offset=first_page.offset,
+ desc=first_page.desc,
)
- return ListOfRunsResponse.model_validate(result).data
@docs_group('Resource clients')
@@ -96,7 +129,7 @@ def __init__(
**kwargs,
)
- async def list(
+ def list(
self,
*,
limit: int | None = None,
@@ -106,12 +139,15 @@ async def list(
started_before: str | datetime | None = None,
started_after: str | datetime | None = None,
timeout: Timeout = 'medium',
- ) -> ListOfRuns:
+ ) -> IterablePageOfRunsAsync:
"""List all Actor runs.
List all Actor runs, either of a single Actor, or all user's Actors, depending on where this client
was initialized from.
+ The returned page also supports iteration: `async for item in client.list(...)` yields individual runs
+ and transparently fetches further pages from the API.
+
https://docs.apify.com/api/v2#/reference/actors/run-collection/get-list-of-runs
https://docs.apify.com/api/v2#/reference/actor-runs/run-collection/get-user-runs-list
@@ -129,13 +165,30 @@ async def list(
"""
status_param = list(status) if isinstance(status, list) else status
- result = await self._list(
- timeout=timeout,
- limit=limit,
- offset=offset,
- desc=desc,
- status=status_param,
- startedBefore=started_before,
- startedAfter=started_after,
+ async def _callback(**kwargs: Any) -> PageOfItems[RunShort]:
+ result = await self._list(
+ timeout=timeout,
+ status=status_param,
+ startedBefore=started_before,
+ startedAfter=started_after,
+ **kwargs,
+ )
+ data = ListOfRunsResponse.model_validate(result).data
+ return PageOfItems(
+ items=data.items,
+ count=data.count,
+ limit=data.limit,
+ total=data.total,
+ offset=data.offset,
+ desc=data.desc,
+ )
+
+ fetch_first_page = _LazyTask(_callback(limit=limit, offset=offset, desc=desc))
+ get_async_iterator = build_get_iterator_async(
+ _callback, fetch_first_page, limit=limit, offset=offset, desc=desc
+ )
+
+ return IterablePageOfRunsAsync(
+ _awaitable_first_page=fetch_first_page,
+ _get_async_iterator=get_async_iterator,
)
- return ListOfRunsResponse.model_validate(result).data
diff --git a/src/apify_client/_resource_clients/schedule_collection.py b/src/apify_client/_resource_clients/schedule_collection.py
index 1421d257..6b1764f0 100644
--- a/src/apify_client/_resource_clients/schedule_collection.py
+++ b/src/apify_client/_resource_clients/schedule_collection.py
@@ -4,15 +4,25 @@
from apify_client._docs import docs_group
from apify_client._models_generated import (
- ListOfSchedules,
ListOfSchedulesResponse,
Schedule,
ScheduleCreate,
ScheduleResponse,
)
+from apify_client._pagination import (
+ _LazyTask,
+ build_get_iterator,
+ build_get_iterator_async,
+)
+from apify_client._pagination_classes import (
+ IterablePageOfSchedules,
+ IterablePageOfSchedulesAsync,
+ PageOfItems,
+)
from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync
if TYPE_CHECKING:
+ from apify_client._models_generated import ScheduleShort
from apify_client._types import Timeout
@@ -42,9 +52,12 @@ def list(
offset: int | None = None,
desc: bool | None = None,
timeout: Timeout = 'medium',
- ) -> ListOfSchedules:
+ ) -> IterablePageOfSchedules:
"""List the available schedules.
+ The returned page also supports iteration: `for item in client.list(...)` yields individual
+ schedules and transparently fetches further pages from the API.
+
https://docs.apify.com/api/v2#/reference/schedules/schedules-collection/get-list-of-schedules
Args:
@@ -56,8 +69,31 @@ def list(
Returns:
The list of available schedules matching the specified filters.
"""
- result = self._list(timeout=timeout, limit=limit, offset=offset, desc=desc)
- return ListOfSchedulesResponse.model_validate(result).data
+
+ def _callback(**kwargs: Any) -> PageOfItems[ScheduleShort]:
+ result = self._list(timeout=timeout, **kwargs)
+ data = ListOfSchedulesResponse.model_validate(result).data
+ return PageOfItems(
+ items=data.items,
+ count=data.count,
+ limit=data.limit,
+ total=data.total,
+ offset=data.offset,
+ desc=data.desc,
+ )
+
+ first_page = _callback(limit=limit, offset=offset, desc=desc)
+ get_iterator = build_get_iterator(_callback, first_page, limit=limit, offset=offset, desc=desc)
+
+ return IterablePageOfSchedules(
+ _get_iterator=get_iterator,
+ items=first_page.items,
+ count=first_page.count,
+ limit=first_page.limit,
+ total=first_page.total,
+ offset=first_page.offset,
+ desc=first_page.desc,
+ )
def create(
self,
@@ -128,16 +164,19 @@ def __init__(
**kwargs,
)
- async def list(
+ def list(
self,
*,
limit: int | None = None,
offset: int | None = None,
desc: bool | None = None,
timeout: Timeout = 'medium',
- ) -> ListOfSchedules:
+ ) -> IterablePageOfSchedulesAsync:
"""List the available schedules.
+ The returned page also supports iteration: `async for item in client.list(...)` yields individual
+ schedules and transparently fetches further pages from the API.
+
https://docs.apify.com/api/v2#/reference/schedules/schedules-collection/get-list-of-schedules
Args:
@@ -149,8 +188,28 @@ async def list(
Returns:
The list of available schedules matching the specified filters.
"""
- result = await self._list(timeout=timeout, limit=limit, offset=offset, desc=desc)
- return ListOfSchedulesResponse.model_validate(result).data
+
+ async def _callback(**kwargs: Any) -> PageOfItems[ScheduleShort]:
+ result = await self._list(timeout=timeout, **kwargs)
+ data = ListOfSchedulesResponse.model_validate(result).data
+ return PageOfItems(
+ items=data.items,
+ count=data.count,
+ limit=data.limit,
+ total=data.total,
+ offset=data.offset,
+ desc=data.desc,
+ )
+
+ fetch_first_page = _LazyTask(_callback(limit=limit, offset=offset, desc=desc))
+ get_async_iterator = build_get_iterator_async(
+ _callback, fetch_first_page, limit=limit, offset=offset, desc=desc
+ )
+
+ return IterablePageOfSchedulesAsync(
+ _awaitable_first_page=fetch_first_page,
+ _get_async_iterator=get_async_iterator,
+ )
async def create(
self,
diff --git a/src/apify_client/_resource_clients/store_collection.py b/src/apify_client/_resource_clients/store_collection.py
index 9c80ad31..6fe021c8 100644
--- a/src/apify_client/_resource_clients/store_collection.py
+++ b/src/apify_client/_resource_clients/store_collection.py
@@ -3,10 +3,21 @@
from typing import TYPE_CHECKING, Any
from apify_client._docs import docs_group
-from apify_client._models_generated import ListOfActorsInStoreResponse, ListOfStoreActors
+from apify_client._models_generated import ListOfActorsInStoreResponse
+from apify_client._pagination import (
+ _LazyTask,
+ build_get_iterator,
+ build_get_iterator_async,
+)
+from apify_client._pagination_classes import (
+ IterablePageOfStoreActors,
+ IterablePageOfStoreActorsAsync,
+ PageOfItems,
+)
from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync
if TYPE_CHECKING:
+ from apify_client._models_generated import StoreListActor
from apify_client._types import Timeout
@@ -40,9 +51,12 @@ def list(
username: str | None = None,
pricing_model: str | None = None,
timeout: Timeout = 'medium',
- ) -> ListOfStoreActors:
+ ) -> IterablePageOfStoreActors:
"""List Actors in Apify store.
+ The returned page also supports iteration: `for item in client.list(...)` yields individual Actors
+ from the store and transparently fetches further pages from the API.
+
https://docs.apify.com/api/v2/#/reference/store/store-actors-collection/get-list-of-actors-in-store
Args:
@@ -59,17 +73,39 @@ def list(
Returns:
The list of available Actors matching the specified filters.
"""
- result = self._list(
- timeout=timeout,
- limit=limit,
- offset=offset,
- search=search,
- sortBy=sort_by,
- category=category,
- username=username,
- pricingModel=pricing_model,
+
+ def _callback(**kwargs: Any) -> PageOfItems[StoreListActor]:
+ result = self._list(
+ timeout=timeout,
+ search=search,
+ sortBy=sort_by,
+ category=category,
+ username=username,
+ pricingModel=pricing_model,
+ **kwargs,
+ )
+ data = ListOfActorsInStoreResponse.model_validate(result).data
+ return PageOfItems(
+ items=data.items,
+ count=data.count,
+ limit=data.limit,
+ total=data.total,
+ offset=data.offset,
+ desc=data.desc,
+ )
+
+ first_page = _callback(limit=limit, offset=offset)
+ get_iterator = build_get_iterator(_callback, first_page, limit=limit, offset=offset)
+
+ return IterablePageOfStoreActors(
+ _get_iterator=get_iterator,
+ items=first_page.items,
+ count=first_page.count,
+ limit=first_page.limit,
+ total=first_page.total,
+ offset=first_page.offset,
+ desc=first_page.desc,
)
- return ListOfActorsInStoreResponse.model_validate(result).data
@docs_group('Resource clients')
@@ -91,7 +127,7 @@ def __init__(
**kwargs,
)
- async def list(
+ def list(
self,
*,
limit: int | None = None,
@@ -102,9 +138,12 @@ async def list(
username: str | None = None,
pricing_model: str | None = None,
timeout: Timeout = 'medium',
- ) -> ListOfStoreActors:
+ ) -> IterablePageOfStoreActorsAsync:
"""List Actors in Apify store.
+ The returned page also supports iteration: `async for item in client.list(...)` yields individual Actors
+ from the store and transparently fetches further pages from the API.
+
https://docs.apify.com/api/v2/#/reference/store/store-actors-collection/get-list-of-actors-in-store
Args:
@@ -121,14 +160,31 @@ async def list(
Returns:
The list of available Actors matching the specified filters.
"""
- result = await self._list(
- timeout=timeout,
- limit=limit,
- offset=offset,
- search=search,
- sortBy=sort_by,
- category=category,
- username=username,
- pricingModel=pricing_model,
+
+ async def _callback(**kwargs: Any) -> PageOfItems[StoreListActor]:
+ result = await self._list(
+ timeout=timeout,
+ search=search,
+ sortBy=sort_by,
+ category=category,
+ username=username,
+ pricingModel=pricing_model,
+ **kwargs,
+ )
+ data = ListOfActorsInStoreResponse.model_validate(result).data
+ return PageOfItems(
+ items=data.items,
+ count=data.count,
+ limit=data.limit,
+ total=data.total,
+ offset=data.offset,
+ desc=data.desc,
+ )
+
+ fetch_first_page = _LazyTask(_callback(limit=limit, offset=offset))
+ get_async_iterator = build_get_iterator_async(_callback, fetch_first_page, limit=limit, offset=offset)
+
+ return IterablePageOfStoreActorsAsync(
+ _awaitable_first_page=fetch_first_page,
+ _get_async_iterator=get_async_iterator,
)
- return ListOfActorsInStoreResponse.model_validate(result).data
diff --git a/src/apify_client/_resource_clients/task_collection.py b/src/apify_client/_resource_clients/task_collection.py
index 44c46c9b..9e718b15 100644
--- a/src/apify_client/_resource_clients/task_collection.py
+++ b/src/apify_client/_resource_clients/task_collection.py
@@ -6,19 +6,29 @@
from apify_client._models_generated import (
ActorStandby,
CreateTaskRequest,
- ListOfTasks,
ListOfTasksResponse,
Task,
TaskInput,
TaskOptions,
TaskResponse,
)
+from apify_client._pagination import (
+ _LazyTask,
+ build_get_iterator,
+ build_get_iterator_async,
+)
+from apify_client._pagination_classes import (
+ IterablePageOfTasks,
+ IterablePageOfTasksAsync,
+ PageOfItems,
+)
from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync
from apify_client._utils import to_seconds
if TYPE_CHECKING:
from datetime import timedelta
+ from apify_client._models_generated import TaskShort
from apify_client._typeddicts_generated import TaskInputDict
from apify_client._types import Timeout
@@ -49,9 +59,12 @@ def list(
offset: int | None = None,
desc: bool | None = None,
timeout: Timeout = 'medium',
- ) -> ListOfTasks:
+ ) -> IterablePageOfTasks:
"""List the available tasks.
+ The returned page also supports iteration: `for item in client.list(...)` yields individual tasks
+ and transparently fetches further pages from the API.
+
https://docs.apify.com/api/v2#/reference/actor-tasks/task-collection/get-list-of-tasks
Args:
@@ -63,8 +76,31 @@ def list(
Returns:
The list of available tasks matching the specified filters.
"""
- result = self._list(timeout=timeout, limit=limit, offset=offset, desc=desc)
- return ListOfTasksResponse.model_validate(result).data
+
+ def _callback(**kwargs: Any) -> PageOfItems[TaskShort]:
+ result = self._list(timeout=timeout, **kwargs)
+ data = ListOfTasksResponse.model_validate(result).data
+ return PageOfItems(
+ items=data.items,
+ count=data.count,
+ limit=data.limit,
+ total=data.total,
+ offset=data.offset,
+ desc=data.desc,
+ )
+
+ first_page = _callback(limit=limit, offset=offset, desc=desc)
+ get_iterator = build_get_iterator(_callback, first_page, limit=limit, offset=offset, desc=desc)
+
+ return IterablePageOfTasks(
+ _get_iterator=get_iterator,
+ items=first_page.items,
+ count=first_page.count,
+ limit=first_page.limit,
+ total=first_page.total,
+ offset=first_page.offset,
+ desc=first_page.desc,
+ )
def create(
self,
@@ -163,16 +199,19 @@ def __init__(
**kwargs,
)
- async def list(
+ def list(
self,
*,
limit: int | None = None,
offset: int | None = None,
desc: bool | None = None,
timeout: Timeout = 'medium',
- ) -> ListOfTasks:
+ ) -> IterablePageOfTasksAsync:
"""List the available tasks.
+ The returned page also supports iteration: `async for item in client.list(...)` yields individual tasks
+ and transparently fetches further pages from the API.
+
https://docs.apify.com/api/v2#/reference/actor-tasks/task-collection/get-list-of-tasks
Args:
@@ -184,8 +223,28 @@ async def list(
Returns:
The list of available tasks matching the specified filters.
"""
- result = await self._list(timeout=timeout, limit=limit, offset=offset, desc=desc)
- return ListOfTasksResponse.model_validate(result).data
+
+ async def _callback(**kwargs: Any) -> PageOfItems[TaskShort]:
+ result = await self._list(timeout=timeout, **kwargs)
+ data = ListOfTasksResponse.model_validate(result).data
+ return PageOfItems(
+ items=data.items,
+ count=data.count,
+ limit=data.limit,
+ total=data.total,
+ offset=data.offset,
+ desc=data.desc,
+ )
+
+ fetch_first_page = _LazyTask(_callback(limit=limit, offset=offset, desc=desc))
+ get_async_iterator = build_get_iterator_async(
+ _callback, fetch_first_page, limit=limit, offset=offset, desc=desc
+ )
+
+ return IterablePageOfTasksAsync(
+ _awaitable_first_page=fetch_first_page,
+ _get_async_iterator=get_async_iterator,
+ )
async def create(
self,
diff --git a/src/apify_client/_resource_clients/webhook_collection.py b/src/apify_client/_resource_clients/webhook_collection.py
index 12834ce1..c5fa353b 100644
--- a/src/apify_client/_resource_clients/webhook_collection.py
+++ b/src/apify_client/_resource_clients/webhook_collection.py
@@ -4,16 +4,25 @@
from apify_client._docs import docs_group
from apify_client._models_generated import (
- ListOfWebhooks,
ListOfWebhooksResponse,
WebhookCondition,
WebhookCreate,
WebhookResponse,
)
+from apify_client._pagination import (
+ _LazyTask,
+ build_get_iterator,
+ build_get_iterator_async,
+)
+from apify_client._pagination_classes import (
+ IterablePageOfWebhooks,
+ IterablePageOfWebhooksAsync,
+ PageOfItems,
+)
from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync
if TYPE_CHECKING:
- from apify_client._models_generated import Webhook, WebhookEventType
+ from apify_client._models_generated import Webhook, WebhookEventType, WebhookShort
from apify_client._types import Timeout
@@ -43,9 +52,12 @@ def list(
offset: int | None = None,
desc: bool | None = None,
timeout: Timeout = 'medium',
- ) -> ListOfWebhooks:
+ ) -> IterablePageOfWebhooks:
"""List the available webhooks.
+ The returned page also supports iteration: `for item in client.list(...)` yields individual webhooks
+ and transparently fetches further pages from the API.
+
https://docs.apify.com/api/v2#/reference/webhooks/webhook-collection/get-list-of-webhooks
Args:
@@ -57,8 +69,31 @@ def list(
Returns:
The list of available webhooks matching the specified filters.
"""
- result = self._list(timeout=timeout, limit=limit, offset=offset, desc=desc)
- return ListOfWebhooksResponse.model_validate(result).data
+
+ def _callback(**kwargs: Any) -> PageOfItems[WebhookShort]:
+ result = self._list(timeout=timeout, **kwargs)
+ data = ListOfWebhooksResponse.model_validate(result).data
+ return PageOfItems(
+ items=data.items,
+ count=data.count,
+ limit=data.limit,
+ total=data.total,
+ offset=data.offset,
+ desc=data.desc,
+ )
+
+ first_page = _callback(limit=limit, offset=offset, desc=desc)
+ get_iterator = build_get_iterator(_callback, first_page, limit=limit, offset=offset, desc=desc)
+
+ return IterablePageOfWebhooks(
+ _get_iterator=get_iterator,
+ items=first_page.items,
+ count=first_page.count,
+ limit=first_page.limit,
+ total=first_page.total,
+ offset=first_page.offset,
+ desc=first_page.desc,
+ )
def create(
self,
@@ -139,16 +174,19 @@ def __init__(
**kwargs,
)
- async def list(
+ def list(
self,
*,
limit: int | None = None,
offset: int | None = None,
desc: bool | None = None,
timeout: Timeout = 'medium',
- ) -> ListOfWebhooks:
+ ) -> IterablePageOfWebhooksAsync:
"""List the available webhooks.
+ The returned page also supports iteration: `async for item in client.list(...)` yields individual webhooks
+ and transparently fetches further pages from the API.
+
https://docs.apify.com/api/v2#/reference/webhooks/webhook-collection/get-list-of-webhooks
Args:
@@ -160,8 +198,28 @@ async def list(
Returns:
The list of available webhooks matching the specified filters.
"""
- result = await self._list(timeout=timeout, limit=limit, offset=offset, desc=desc)
- return ListOfWebhooksResponse.model_validate(result).data
+
+ async def _callback(**kwargs: Any) -> PageOfItems[WebhookShort]:
+ result = await self._list(timeout=timeout, **kwargs)
+ data = ListOfWebhooksResponse.model_validate(result).data
+ return PageOfItems(
+ items=data.items,
+ count=data.count,
+ limit=data.limit,
+ total=data.total,
+ offset=data.offset,
+ desc=data.desc,
+ )
+
+ fetch_first_page = _LazyTask(_callback(limit=limit, offset=offset, desc=desc))
+ get_async_iterator = build_get_iterator_async(
+ _callback, fetch_first_page, limit=limit, offset=offset, desc=desc
+ )
+
+ return IterablePageOfWebhooksAsync(
+ _awaitable_first_page=fetch_first_page,
+ _get_async_iterator=get_async_iterator,
+ )
async def create(
self,
diff --git a/src/apify_client/_resource_clients/webhook_dispatch_collection.py b/src/apify_client/_resource_clients/webhook_dispatch_collection.py
index b93f59b1..c263629c 100644
--- a/src/apify_client/_resource_clients/webhook_dispatch_collection.py
+++ b/src/apify_client/_resource_clients/webhook_dispatch_collection.py
@@ -3,10 +3,21 @@
from typing import TYPE_CHECKING, Any
from apify_client._docs import docs_group
-from apify_client._models_generated import ListOfWebhookDispatches, ListOfWebhookDispatchesResponse
+from apify_client._models_generated import ListOfWebhookDispatchesResponse
+from apify_client._pagination import (
+ _LazyTask,
+ build_get_iterator,
+ build_get_iterator_async,
+)
+from apify_client._pagination_classes import (
+ IterablePageOfWebhookDispatches,
+ IterablePageOfWebhookDispatchesAsync,
+ PageOfItems,
+)
from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync
if TYPE_CHECKING:
+ from apify_client._models_generated import WebhookDispatch
from apify_client._types import Timeout
@@ -36,9 +47,12 @@ def list(
offset: int | None = None,
desc: bool | None = None,
timeout: Timeout = 'medium',
- ) -> ListOfWebhookDispatches | None:
+ ) -> IterablePageOfWebhookDispatches:
"""List all webhook dispatches of a user.
+ The returned page also supports iteration: `for item in client.list(...)` yields individual
+ webhook dispatches and transparently fetches further pages from the API.
+
https://docs.apify.com/api/v2#/reference/webhook-dispatches/webhook-dispatches-collection/get-list-of-webhook-dispatches
Args:
@@ -50,8 +64,31 @@ def list(
Returns:
The retrieved webhook dispatches of a user.
"""
- result = self._list(timeout=timeout, limit=limit, offset=offset, desc=desc)
- return ListOfWebhookDispatchesResponse.model_validate(result).data
+
+ def _callback(**kwargs: Any) -> PageOfItems[WebhookDispatch]:
+ result = self._list(timeout=timeout, **kwargs)
+ data = ListOfWebhookDispatchesResponse.model_validate(result).data
+ return PageOfItems(
+ items=data.items,
+ count=data.count,
+ limit=data.limit,
+ total=data.total,
+ offset=data.offset,
+ desc=data.desc,
+ )
+
+ first_page = _callback(limit=limit, offset=offset, desc=desc)
+ get_iterator = build_get_iterator(_callback, first_page, limit=limit, offset=offset, desc=desc)
+
+ return IterablePageOfWebhookDispatches(
+ _get_iterator=get_iterator,
+ items=first_page.items,
+ count=first_page.count,
+ limit=first_page.limit,
+ total=first_page.total,
+ offset=first_page.offset,
+ desc=first_page.desc,
+ )
@docs_group('Resource clients')
@@ -73,16 +110,19 @@ def __init__(
**kwargs,
)
- async def list(
+ def list(
self,
*,
limit: int | None = None,
offset: int | None = None,
desc: bool | None = None,
timeout: Timeout = 'medium',
- ) -> ListOfWebhookDispatches | None:
+ ) -> IterablePageOfWebhookDispatchesAsync:
"""List all webhook dispatches of a user.
+ The returned page also supports iteration: `async for item in client.list(...)` yields individual
+ webhook dispatches and transparently fetches further pages from the API.
+
https://docs.apify.com/api/v2#/reference/webhook-dispatches/webhook-dispatches-collection/get-list-of-webhook-dispatches
Args:
@@ -94,5 +134,25 @@ async def list(
Returns:
The retrieved webhook dispatches of a user.
"""
- result = await self._list(timeout=timeout, limit=limit, offset=offset, desc=desc)
- return ListOfWebhookDispatchesResponse.model_validate(result).data
+
+ async def _callback(**kwargs: Any) -> PageOfItems[WebhookDispatch]:
+ result = await self._list(timeout=timeout, **kwargs)
+ data = ListOfWebhookDispatchesResponse.model_validate(result).data
+ return PageOfItems(
+ items=data.items,
+ count=data.count,
+ limit=data.limit,
+ total=data.total,
+ offset=data.offset,
+ desc=data.desc,
+ )
+
+ fetch_first_page = _LazyTask(_callback(limit=limit, offset=offset, desc=desc))
+ get_async_iterator = build_get_iterator_async(
+ _callback, fetch_first_page, limit=limit, offset=offset, desc=desc
+ )
+
+ return IterablePageOfWebhookDispatchesAsync(
+ _awaitable_first_page=fetch_first_page,
+ _get_async_iterator=get_async_iterator,
+ )
diff --git a/tests/integration/test_actor.py b/tests/integration/test_actor.py
index 4797c664..513bfebf 100644
--- a/tests/integration/test_actor.py
+++ b/tests/integration/test_actor.py
@@ -5,10 +5,12 @@
from typing import TYPE_CHECKING, cast
from ._utils import get_random_resource_name, maybe_await
+from apify_client._models_generated import ActorShort
+from apify_client._pagination_classes import PageOfItems
if TYPE_CHECKING:
from apify_client import ApifyClient, ApifyClientAsync
- from apify_client._models_generated import Actor, Build, ListOfActors, Run
+ from apify_client._models_generated import Actor, Build, Run
from apify_client._resource_clients import BuildClient, BuildClientAsync
@@ -36,36 +38,31 @@ async def test_get_actor_by_full_name(client: ApifyClient | ApifyClientAsync) ->
async def test_list_actors_my(client: ApifyClient | ApifyClientAsync) -> None:
"""Test listing Actors created by the user."""
- result = await maybe_await(client.actors().list(my=True, limit=10))
- actors_page = cast('ListOfActors', result)
+ actors_page = await maybe_await(client.actors().list(my=True, limit=10))
- assert actors_page is not None
- assert actors_page.items is not None
- # User may have 0 actors
+ assert isinstance(actors_page, PageOfItems)
assert isinstance(actors_page.items, list)
+ # User may have 0 actors — only check element type when any were returned.
+ if actors_page.items:
+ assert isinstance(actors_page.items[0], ActorShort)
async def test_list_actors_pagination(client: ApifyClient | ApifyClientAsync) -> None:
"""Test listing Actors with pagination parameters."""
- # List all actors (public + owned), should return some results
- result = await maybe_await(client.actors().list(limit=5, offset=0))
- actors_page = cast('ListOfActors', result)
+ actors_page = await maybe_await(client.actors().list(limit=5, offset=0))
- assert actors_page is not None
- assert actors_page.items is not None
+ assert isinstance(actors_page, PageOfItems)
assert isinstance(actors_page.items, list)
- # Should have at least some actors (public ones exist)
- assert len(actors_page.items) >= 0
+ assert isinstance(actors_page.items[0], ActorShort)
async def test_list_actors_sorting(client: ApifyClient | ApifyClientAsync) -> None:
"""Test listing Actors with sorting."""
- result = await maybe_await(client.actors().list(limit=10, desc=True, sort_by='created_at'))
- actors_page = cast('ListOfActors', result)
+ actors_page = await maybe_await(client.actors().list(limit=10, desc=True, sort_by='created_at'))
- assert actors_page is not None
- assert actors_page.items is not None
+ assert isinstance(actors_page, PageOfItems)
assert isinstance(actors_page.items, list)
+ assert isinstance(actors_page.items[0], ActorShort)
async def test_actor_create_update_delete(client: ApifyClient | ApifyClientAsync) -> None:
diff --git a/tests/integration/test_actor_env_var.py b/tests/integration/test_actor_env_var.py
index e5d9663e..5c2a1e9e 100644
--- a/tests/integration/test_actor_env_var.py
+++ b/tests/integration/test_actor_env_var.py
@@ -4,9 +4,12 @@
from typing import TYPE_CHECKING, cast
+from apify_client._models_generated import EnvVar
+from apify_client._pagination_classes import PageOfItemsOnlyTotal
+
if TYPE_CHECKING:
from apify_client import ApifyClient, ApifyClientAsync
- from apify_client._models_generated import Actor, EnvVar, ListOfEnvVars
+ from apify_client._models_generated import Actor
from ._utils import get_random_resource_name, maybe_await
@@ -49,11 +52,10 @@ async def test_actor_env_var_list(client: ApifyClient | ApifyClientAsync) -> Non
try:
# List env vars
- result = await maybe_await(version_client.env_vars().list())
- env_vars = cast('ListOfEnvVars', result)
-
- assert env_vars is not None
- assert env_vars.items is not None
+ env_vars = await maybe_await(version_client.env_vars().list())
+ assert isinstance(env_vars, PageOfItemsOnlyTotal)
+ assert isinstance(env_vars.items, list)
+ assert isinstance(env_vars.items[0], EnvVar)
assert len(env_vars.items) >= 1
# Verify env var fields
diff --git a/tests/integration/test_actor_version.py b/tests/integration/test_actor_version.py
index b8ff31c3..22a83025 100644
--- a/tests/integration/test_actor_version.py
+++ b/tests/integration/test_actor_version.py
@@ -4,9 +4,12 @@
from typing import TYPE_CHECKING, cast
+from apify_client._models_generated import Version
+from apify_client._pagination_classes import PageOfItemsOnlyTotal
+
if TYPE_CHECKING:
from apify_client import ApifyClient, ApifyClientAsync
- from apify_client._models_generated import Actor, ListOfVersions, Version
+ from apify_client._models_generated import Actor
from ._utils import get_random_resource_name, maybe_await
@@ -42,11 +45,11 @@ async def test_actor_version_list(client: ApifyClient | ApifyClientAsync) -> Non
try:
# List versions
- result = await maybe_await(actor_client.versions().list())
- versions = cast('ListOfVersions', result)
+ versions = await maybe_await(actor_client.versions().list())
- assert versions is not None
- assert versions.items is not None
+ assert isinstance(versions, PageOfItemsOnlyTotal)
+ assert isinstance(versions.items, list)
+ assert isinstance(versions.items[0], Version)
assert len(versions.items) >= 1
# Verify version fields
diff --git a/tests/integration/test_build.py b/tests/integration/test_build.py
index ef8ac662..8193b665 100644
--- a/tests/integration/test_build.py
+++ b/tests/integration/test_build.py
@@ -4,9 +4,12 @@
from typing import TYPE_CHECKING, cast
+from apify_client._models_generated import BuildShort
+from apify_client._pagination_classes import PageOfItems
+
if TYPE_CHECKING:
from apify_client import ApifyClient, ApifyClientAsync
- from apify_client._models_generated import Actor, Build, ListOfBuilds
+ from apify_client._models_generated import Actor, Build
from datetime import timedelta
@@ -21,14 +24,12 @@ async def test_build_list_for_actor(client: ApifyClient | ApifyClientAsync) -> N
"""Test listing builds for a public Actor."""
# Get builds for hello-world actor
actor = client.actor(HELLO_WORLD_ACTOR)
- result = await maybe_await(actor.builds().list(limit=10))
- builds_page = cast('ListOfBuilds', result)
+ builds_page = await maybe_await(actor.builds().list(limit=10))
- assert builds_page is not None
- assert builds_page.items is not None
- assert len(builds_page.items) > 0 # hello-world should have at least one build
+ assert isinstance(builds_page, PageOfItems)
+ assert isinstance(builds_page.items, list)
+ assert isinstance(builds_page.items[0], BuildShort) # hello-world has at least one build
- # Verify build structure
first_build = builds_page.items[0]
assert first_build.id is not None
assert first_build.act_id is not None
@@ -38,9 +39,11 @@ async def test_build_get(client: ApifyClient | ApifyClientAsync) -> None:
"""Test getting a specific build."""
# First list builds to get a build ID
actor = client.actor(HELLO_WORLD_ACTOR)
- result = await maybe_await(actor.builds().list(limit=1))
- builds_page = cast('ListOfBuilds', result)
- assert builds_page.items
+ builds_page = await maybe_await(actor.builds().list(limit=1))
+
+ assert isinstance(builds_page, PageOfItems)
+ assert isinstance(builds_page.items, list)
+ assert isinstance(builds_page.items[0], BuildShort)
build_id = builds_page.items[0].id
# Get the specific build
@@ -56,22 +59,24 @@ async def test_build_get(client: ApifyClient | ApifyClientAsync) -> None:
async def test_user_builds_list(client: ApifyClient | ApifyClientAsync) -> None:
"""Test listing all user builds."""
# List user's builds (may be empty if user has no actors)
- result = await maybe_await(client.builds().list(limit=10))
- builds_page = cast('ListOfBuilds', result)
+ builds_page = await maybe_await(client.builds().list(limit=10))
- assert builds_page is not None
- assert builds_page.items is not None
- # User may have 0 builds, so we just check the structure
+ assert isinstance(builds_page, PageOfItems)
assert isinstance(builds_page.items, list)
+ # User may have 0 builds — only check element type when any were returned.
+ if builds_page.items:
+ assert isinstance(builds_page.items[0], BuildShort)
async def test_build_log(client: ApifyClient | ApifyClientAsync) -> None:
"""Test getting build log."""
# First list builds to get a completed build ID
actor = client.actor(HELLO_WORLD_ACTOR)
- result = await maybe_await(actor.builds().list(limit=5))
- builds_page = cast('ListOfBuilds', result)
- assert builds_page.items
+ builds_page = await maybe_await(actor.builds().list(limit=5))
+
+ assert isinstance(builds_page, PageOfItems)
+ assert isinstance(builds_page.items, list)
+ assert isinstance(builds_page.items[0], BuildShort)
# Find a completed build (SUCCEEDED status)
completed_build = None
@@ -96,9 +101,11 @@ async def test_build_wait_for_finish(client: ApifyClient | ApifyClientAsync) ->
"""Test wait_for_finish on an already completed build."""
# First list builds to get a completed build ID
actor = client.actor(HELLO_WORLD_ACTOR)
- result = await maybe_await(actor.builds().list(limit=5))
- builds_page = cast('ListOfBuilds', result)
- assert builds_page.items
+ builds_page = await maybe_await(actor.builds().list(limit=5))
+
+ assert isinstance(builds_page, PageOfItems)
+ assert isinstance(builds_page.items, list)
+ assert isinstance(builds_page.items[0], BuildShort)
# Find a completed build (SUCCEEDED status)
completed_build = None
@@ -208,9 +215,11 @@ async def test_build_get_open_api_definition(client: ApifyClient | ApifyClientAs
"""Test getting OpenAPI definition for a build."""
# Get builds for hello-world actor
actor = client.actor(HELLO_WORLD_ACTOR)
- result = await maybe_await(actor.builds().list(limit=1))
- builds_page = cast('ListOfBuilds', result)
- assert builds_page.items
+ builds_page = await maybe_await(actor.builds().list(limit=1))
+
+ assert isinstance(builds_page, PageOfItems)
+ assert isinstance(builds_page.items, list)
+ assert isinstance(builds_page.items[0], BuildShort)
build_id = builds_page.items[0].id
# Get the OpenAPI definition
diff --git a/tests/integration/test_dataset.py b/tests/integration/test_dataset.py
index 149a504c..8415a095 100644
--- a/tests/integration/test_dataset.py
+++ b/tests/integration/test_dataset.py
@@ -4,6 +4,9 @@
from typing import TYPE_CHECKING, cast
+from apify_client._models_generated import DatasetListItem
+from apify_client._pagination_classes import PageOfDatasetItems, PageOfItems
+
if TYPE_CHECKING:
from collections.abc import AsyncIterator, Iterator
from contextlib import AbstractAsyncContextManager, AbstractContextManager
@@ -11,7 +14,7 @@
from impit import Response
from apify_client import ApifyClient, ApifyClientAsync
- from apify_client._models_generated import Dataset, ListOfDatasets
+ from apify_client._models_generated import Dataset
from apify_client._resource_clients.dataset import DatasetItemsPage
import json
@@ -26,22 +29,23 @@
async def test_dataset_collection_list(client: ApifyClient | ApifyClientAsync) -> None:
"""Test listing datasets."""
- result = await maybe_await(client.datasets().list(limit=10))
- datasets_page = cast('ListOfDatasets', result)
+ datasets_page = await maybe_await(client.datasets().list(limit=10))
- assert datasets_page is not None
- assert datasets_page.items is not None
+ assert isinstance(datasets_page, PageOfItems)
assert isinstance(datasets_page.items, list)
+ # User may have 0 datasets — only check element type when any were returned.
+ if datasets_page.items:
+ assert isinstance(datasets_page.items[0], DatasetListItem)
async def test_dataset_collection_list_pagination(client: ApifyClient | ApifyClientAsync) -> None:
"""Test listing datasets with pagination."""
- result = await maybe_await(client.datasets().list(limit=5, offset=0))
- datasets_page = cast('ListOfDatasets', result)
+ datasets_page = await maybe_await(client.datasets().list(limit=5, offset=0))
- assert datasets_page is not None
- assert datasets_page.items is not None
+ assert isinstance(datasets_page, PageOfItems)
assert isinstance(datasets_page.items, list)
+ if datasets_page.items:
+ assert isinstance(datasets_page.items[0], DatasetListItem)
async def test_dataset_collection_get_or_create(client: ApifyClient | ApifyClientAsync) -> None:
@@ -261,8 +265,8 @@ async def test_dataset_push_and_list_items(client: ApifyClient | ApifyClientAsyn
await maybe_sleep(1, is_async=is_async)
# List items
- result = await maybe_await(dataset_client.list_items())
- items_page = cast('DatasetItemsPage', result)
+ items_page = await maybe_await(dataset_client.list_items())
+ assert isinstance(items_page, PageOfDatasetItems)
assert items_page is not None
assert len(items_page.items) == 3
assert items_page.count == 3
@@ -294,22 +298,21 @@ async def test_dataset_list_items_with_pagination(client: ApifyClient | ApifyCli
await maybe_sleep(1, is_async=is_async)
# List with limit
- result = await maybe_await(dataset_client.list_items(limit=5))
- items_page = cast('DatasetItemsPage', result)
+ items_page = await maybe_await(dataset_client.list_items(limit=5))
+ assert isinstance(items_page, PageOfDatasetItems)
assert len(items_page.items) == 5
assert items_page.count == 5
# Note: items_page.total may be 0 immediately after push due to eventual consistency
assert items_page.limit == 5
# List with offset
- result = await maybe_await(dataset_client.list_items(offset=5, limit=5))
- items_page_offset = cast('DatasetItemsPage', result)
+ items_page_offset = await maybe_await(dataset_client.list_items(offset=5, limit=5))
+ assert isinstance(items_page_offset, PageOfDatasetItems)
assert len(items_page_offset.items) == 5
assert items_page_offset.offset == 5
# Note: items_page.total may be 0 immediately after push due to eventual consistency
-
# Verify different items
- assert items_page.items[0]['index'] != items_page_offset.items[0]['index']
+ assert items_page_offset.items[0]['index'] != items_page.items[0]['index']
finally:
await maybe_await(dataset_client.delete())
@@ -334,8 +337,8 @@ async def test_dataset_list_items_with_fields(client: ApifyClient | ApifyClientA
await maybe_sleep(1, is_async=is_async)
# List with fields filter
- result = await maybe_await(dataset_client.list_items(fields=['id', 'name']))
- items_page = cast('DatasetItemsPage', result)
+ items_page = await maybe_await(dataset_client.list_items(fields=['id', 'name']))
+ assert isinstance(items_page, PageOfDatasetItems)
assert len(items_page.items) == 2
# Verify only specified fields are returned
diff --git a/tests/integration/test_key_value_store.py b/tests/integration/test_key_value_store.py
index ecc9d709..f9d2bee1 100644
--- a/tests/integration/test_key_value_store.py
+++ b/tests/integration/test_key_value_store.py
@@ -4,11 +4,13 @@
from typing import TYPE_CHECKING, cast
+from apify_client._models_generated import KeyValueStore, KeyValueStoreKey
+from apify_client._pagination_classes import PageOfItems, PageOfKeys
+
if TYPE_CHECKING:
from collections.abc import AsyncIterator, Iterator
from apify_client import ApifyClient, ApifyClientAsync
- from apify_client._models_generated import KeyValueStore, KeyValueStoreKey, ListOfKeys, ListOfKeyValueStores
import json
from datetime import timedelta
@@ -22,22 +24,22 @@
async def test_key_value_store_collection_list(client: ApifyClient | ApifyClientAsync) -> None:
"""Test listing key-value stores."""
- result = await maybe_await(client.key_value_stores().list(limit=10))
- kvs_page = cast('ListOfKeyValueStores', result)
+ kvs_page = await maybe_await(client.key_value_stores().list(limit=10))
- assert kvs_page is not None
- assert kvs_page.items is not None
+ assert isinstance(kvs_page, PageOfItems)
assert isinstance(kvs_page.items, list)
+ if kvs_page.items:
+ assert isinstance(kvs_page.items[0], KeyValueStore)
async def test_key_value_store_collection_list_pagination(client: ApifyClient | ApifyClientAsync) -> None:
"""Test listing key-value stores with pagination."""
- result = await maybe_await(client.key_value_stores().list(limit=5, offset=0))
- kvs_page = cast('ListOfKeyValueStores', result)
+ kvs_page = await maybe_await(client.key_value_stores().list(limit=5, offset=0))
- assert kvs_page is not None
- assert kvs_page.items is not None
+ assert isinstance(kvs_page, PageOfItems)
assert isinstance(kvs_page.items, list)
+ if kvs_page.items:
+ assert isinstance(kvs_page.items[0], KeyValueStore)
async def test_key_value_store_collection_get_or_create(client: ApifyClient | ApifyClientAsync) -> None:
@@ -124,11 +126,12 @@ async def test_list_keys_signature(
await maybe_await(kvs.list_keys())
# Kvs content retrieved with correct signature
- result = await maybe_await(kvs.list_keys(signature=test_kvs_of_another_user.signature))
- response = cast('ListOfKeys', result)
- raw_items = response.items
+ response = await maybe_await(kvs.list_keys(signature=test_kvs_of_another_user.signature))
- assert set(test_kvs_of_another_user.expected_content) == {item.key for item in raw_items}
+ assert isinstance(response, PageOfKeys)
+ assert isinstance(response.items, list)
+ assert isinstance(response.items[0], KeyValueStoreKey)
+ assert set(test_kvs_of_another_user.expected_content) == {item.key for item in response.items}
async def test_get_record_signature(
@@ -338,9 +341,11 @@ async def test_key_value_store_list_keys(client: ApifyClient | ApifyClientAsync,
await maybe_sleep(1, is_async=is_async)
# List keys
- result = await maybe_await(store_client.list_keys())
- keys_response = cast('ListOfKeys', result)
- assert keys_response is not None
+ keys_response = await maybe_await(store_client.list_keys())
+
+ assert isinstance(keys_response, PageOfKeys)
+ assert isinstance(keys_response.items, list)
+ assert isinstance(keys_response.items[0], KeyValueStoreKey)
assert len(keys_response.items) == 5
# Verify key names
@@ -368,9 +373,11 @@ async def test_key_value_store_list_keys_with_limit(client: ApifyClient | ApifyC
await maybe_sleep(1, is_async=is_async)
# List with limit
- result = await maybe_await(store_client.list_keys(limit=5))
- keys_response = cast('ListOfKeys', result)
- assert keys_response is not None
+ keys_response = await maybe_await(store_client.list_keys(limit=5))
+
+ assert isinstance(keys_response, PageOfKeys)
+ assert isinstance(keys_response.items, list)
+ assert isinstance(keys_response.items[0], KeyValueStoreKey)
assert len(keys_response.items) == 5
finally:
await maybe_await(store_client.delete())
diff --git a/tests/integration/test_log.py b/tests/integration/test_log.py
index 85800682..76d64488 100644
--- a/tests/integration/test_log.py
+++ b/tests/integration/test_log.py
@@ -4,9 +4,12 @@
from typing import TYPE_CHECKING, cast
+from apify_client._models_generated import BuildShort
+from apify_client._pagination_classes import PageOfItems
+
if TYPE_CHECKING:
from apify_client import ApifyClient, ApifyClientAsync
- from apify_client._models_generated import ListOfBuilds, Run
+ from apify_client._models_generated import Run
from ._utils import maybe_await
@@ -39,9 +42,11 @@ async def test_log_get_from_build(client: ApifyClient | ApifyClientAsync) -> Non
"""Test retrieving log from a build."""
# Get a build from hello-world actor
actor = client.actor(HELLO_WORLD_ACTOR)
- result = await maybe_await(actor.builds().list(limit=1))
- builds_page = cast('ListOfBuilds', result)
- assert builds_page.items
+ builds_page = await maybe_await(actor.builds().list(limit=1))
+
+ assert isinstance(builds_page, PageOfItems)
+ assert isinstance(builds_page.items, list)
+ assert isinstance(builds_page.items[0], BuildShort)
build_id = builds_page.items[0].id
# Get log from the build
diff --git a/tests/integration/test_request_queue.py b/tests/integration/test_request_queue.py
index a08e7fb8..d26e9245 100644
--- a/tests/integration/test_request_queue.py
+++ b/tests/integration/test_request_queue.py
@@ -4,15 +4,15 @@
from typing import TYPE_CHECKING, cast
+from apify_client._models_generated import Request, RequestQueueShort
+from apify_client._pagination_classes import PageOfItems, PageOfRequests
+
if TYPE_CHECKING:
from apify_client import ApifyClient, ApifyClientAsync
from apify_client._models_generated import (
BatchAddResult,
BatchDeleteResult,
- ListOfRequestQueues,
- ListOfRequests,
LockedRequestQueueHead,
- Request,
RequestLockInfo,
RequestQueue,
RequestQueueHead,
@@ -30,22 +30,22 @@
async def test_request_queue_collection_list(client: ApifyClient | ApifyClientAsync) -> None:
"""Test listing request queues."""
- result = await maybe_await(client.request_queues().list(limit=10))
- rq_page = cast('ListOfRequestQueues', result)
+ rq_page = await maybe_await(client.request_queues().list(limit=10))
- assert rq_page is not None
- assert rq_page.items is not None
+ assert isinstance(rq_page, PageOfItems)
assert isinstance(rq_page.items, list)
+ if rq_page.items:
+ assert isinstance(rq_page.items[0], RequestQueueShort)
async def test_request_queue_collection_list_pagination(client: ApifyClient | ApifyClientAsync) -> None:
"""Test listing request queues with pagination."""
- result = await maybe_await(client.request_queues().list(limit=5, offset=0))
- rq_page = cast('ListOfRequestQueues', result)
+ rq_page = await maybe_await(client.request_queues().list(limit=5, offset=0))
- assert rq_page is not None
- assert rq_page.items is not None
+ assert isinstance(rq_page, PageOfItems)
assert isinstance(rq_page.items, list)
+ if rq_page.items:
+ assert isinstance(rq_page.items[0], RequestQueueShort)
async def test_request_queue_collection_get_or_create(client: ApifyClient | ApifyClientAsync) -> None:
@@ -256,16 +256,17 @@ async def test_request_queue_list_requests(client: ApifyClient | ApifyClientAsyn
)
# Poll until all requests are available (eventual consistency)
- list_response: ListOfRequests | None = None
for _ in range(5):
await maybe_sleep(1, is_async=is_async)
- result = await maybe_await(rq_client.list_requests())
- list_response = cast('ListOfRequests', result)
- if len(list_response.items) == 5:
+ list_response = await maybe_await(rq_client.list_requests())
+ assert isinstance(list_response, PageOfRequests)
+ if list_response.items and len(list_response.items) == 5:
break
- assert list_response is not None
+ assert isinstance(list_response, PageOfRequests)
+ assert isinstance(list_response.items, list)
assert len(list_response.items) == 5
+ assert isinstance(list_response.items[0], Request)
finally:
await maybe_await(rq_client.delete())
@@ -325,16 +326,17 @@ async def test_request_queue_batch_add_requests(client: ApifyClient | ApifyClien
assert len(batch_response.unprocessed_requests) == 0
# Poll until all requests are available (eventual consistency)
- list_response: ListOfRequests | None = None
for _ in range(5):
await maybe_sleep(1, is_async=is_async)
- result = await maybe_await(rq_client.list_requests())
- list_response = cast('ListOfRequests', result)
- if len(list_response.items) == 10:
+ list_response = await maybe_await(rq_client.list_requests())
+ assert isinstance(list_response, PageOfRequests)
+ if list_response.items and len(list_response.items) == 10:
break
- assert list_response is not None
+ assert isinstance(list_response, PageOfRequests)
+ assert isinstance(list_response.items, list)
assert len(list_response.items) == 10
+ assert isinstance(list_response.items[0], Request)
finally:
await maybe_await(rq_client.delete())
@@ -355,16 +357,17 @@ async def test_request_queue_batch_delete_requests(client: ApifyClient | ApifyCl
)
# Poll until all requests are available (eventual consistency)
- list_response: ListOfRequests | None = None
for _ in range(5):
await maybe_sleep(1, is_async=is_async)
- result = await maybe_await(rq_client.list_requests())
- list_response = cast('ListOfRequests', result)
- if len(list_response.items) == 10:
+ list_response = await maybe_await(rq_client.list_requests())
+ assert isinstance(list_response, PageOfRequests)
+ if list_response.items and len(list_response.items) == 10:
break
- assert list_response is not None
+ assert isinstance(list_response, PageOfRequests)
+ assert isinstance(list_response.items, list)
assert len(list_response.items) == 10
+ assert isinstance(list_response.items[0], Request)
requests_to_delete: list[RequestDeleteInputDict] = [
{'unique_key': item.unique_key} for item in list_response.items[:5]
]
@@ -376,16 +379,17 @@ async def test_request_queue_batch_delete_requests(client: ApifyClient | ApifyCl
assert len(delete_response.processed_requests) == 5
# Poll until deletions are reflected (eventual consistency)
- remaining: ListOfRequests | None = None
for _ in range(5):
await maybe_sleep(1, is_async=is_async)
- result = await maybe_await(rq_client.list_requests())
- remaining = cast('ListOfRequests', result)
- if len(remaining.items) == 5:
+ remaining = await maybe_await(rq_client.list_requests())
+ assert isinstance(remaining, PageOfRequests)
+ if remaining.items and len(remaining.items) == 5:
break
- assert remaining is not None
+ assert isinstance(remaining, PageOfRequests)
+ assert isinstance(remaining.items, list)
assert len(remaining.items) == 5
+ assert isinstance(remaining.items[0], Request)
finally:
await maybe_await(rq_client.delete())
diff --git a/tests/integration/test_run.py b/tests/integration/test_run.py
index 8c79e566..8e42693c 100644
--- a/tests/integration/test_run.py
+++ b/tests/integration/test_run.py
@@ -4,9 +4,12 @@
from typing import TYPE_CHECKING, cast
+from apify_client._models_generated import RunShort
+from apify_client._pagination_classes import PageOfItems
+
if TYPE_CHECKING:
from apify_client import ApifyClient, ApifyClientAsync
- from apify_client._models_generated import Dataset, KeyValueStore, ListOfRuns, RequestQueue, Run
+ from apify_client._models_generated import Dataset, KeyValueStore, RequestQueue, Run
from datetime import UTC, datetime, timedelta
@@ -35,14 +38,20 @@ async def test_run_collection_list_multiple_statuses(client: ApifyClient | Apify
try:
run_collection = client.actor(HELLO_WORLD_ACTOR).runs()
- result = await maybe_await(run_collection.list(status=[ActorJobStatus.SUCCEEDED, ActorJobStatus.TIMED_OUT]))
- multiple_status_runs = cast('ListOfRuns', result)
+ multiple_status_runs = await maybe_await(
+ run_collection.list(status=[ActorJobStatus.SUCCEEDED, ActorJobStatus.TIMED_OUT])
+ )
+ single_status_runs = await maybe_await(run_collection.list(status=ActorJobStatus.SUCCEEDED))
- result = await maybe_await(run_collection.list(status=ActorJobStatus.SUCCEEDED))
- single_status_runs = cast('ListOfRuns', result)
+ assert isinstance(multiple_status_runs, PageOfItems)
+ assert isinstance(multiple_status_runs.items, list)
+ if multiple_status_runs.items:
+ assert isinstance(multiple_status_runs.items[0], RunShort)
- assert multiple_status_runs is not None
- assert single_status_runs is not None
+ assert isinstance(single_status_runs, PageOfItems)
+ assert isinstance(single_status_runs.items, list)
+ if single_status_runs.items:
+ assert isinstance(single_status_runs.items[0], RunShort)
assert all(
run.status in [ActorJobStatus.SUCCEEDED, ActorJobStatus.TIMED_OUT] for run in multiple_status_runs.items
@@ -294,13 +303,13 @@ async def test_run_log(client: ApifyClient | ApifyClientAsync) -> None:
async def test_run_runs_client(client: ApifyClient | ApifyClientAsync) -> None:
"""Test listing runs through the run collection client."""
# List runs (should return valid data structure)
- result = await maybe_await(client.runs().list(limit=10))
- runs_page = cast('ListOfRuns', result)
- assert runs_page is not None
- assert runs_page.items is not None
+ runs_page = await maybe_await(client.runs().list(limit=10))
+
+ assert isinstance(runs_page, PageOfItems)
assert isinstance(runs_page.items, list)
- # The user may have runs, verify the structure
+ # The user may have 0 runs — only check element type when any were returned.
if runs_page.items:
+ assert isinstance(runs_page.items[0], RunShort)
first_run = runs_page.items[0]
assert first_run.id is not None
assert first_run.act_id is not None
diff --git a/tests/integration/test_schedule.py b/tests/integration/test_schedule.py
index 2337116f..3b365ad1 100644
--- a/tests/integration/test_schedule.py
+++ b/tests/integration/test_schedule.py
@@ -4,9 +4,12 @@
from typing import TYPE_CHECKING, cast
+from apify_client._models_generated import ScheduleShort
+from apify_client._pagination_classes import PageOfItems
+
if TYPE_CHECKING:
from apify_client import ApifyClient, ApifyClientAsync
- from apify_client._models_generated import ListOfSchedules, Schedule
+ from apify_client._models_generated import Schedule
from ._utils import get_random_resource_name, maybe_await
@@ -116,10 +119,11 @@ async def test_schedule_list(client: ApifyClient | ApifyClientAsync) -> None:
try:
# List schedules
- result = await maybe_await(client.schedules().list(limit=100))
- schedules_page = cast('ListOfSchedules', result)
- assert schedules_page is not None
- assert schedules_page.items is not None
+ schedules_page = await maybe_await(client.schedules().list(limit=100))
+
+ assert isinstance(schedules_page, PageOfItems)
+ assert isinstance(schedules_page.items, list)
+ assert isinstance(schedules_page.items[0], ScheduleShort)
# Verify our schedules are in the list
schedule_ids = [s.id for s in schedules_page.items]
diff --git a/tests/integration/test_store.py b/tests/integration/test_store.py
index 69a3e8fc..34df55b6 100644
--- a/tests/integration/test_store.py
+++ b/tests/integration/test_store.py
@@ -2,11 +2,13 @@
from __future__ import annotations
-from typing import TYPE_CHECKING, cast
+from typing import TYPE_CHECKING
+
+from apify_client._models_generated import StoreListActor
+from apify_client._pagination_classes import PageOfItems
if TYPE_CHECKING:
from apify_client import ApifyClient, ApifyClientAsync
- from apify_client._models_generated import ListOfStoreActors
from ._utils import maybe_await
@@ -14,32 +16,34 @@
async def test_store_list(client: ApifyClient | ApifyClientAsync) -> None:
"""Test listing public Actors in the store."""
- result = await maybe_await(client.store().list(limit=10))
- actors_list = cast('ListOfStoreActors', result)
- assert actors_list is not None
- assert actors_list.items is not None
- assert len(actors_list.items) > 0 # Store always has actors
+ actors_list = await maybe_await(client.store().list(limit=10))
+
+ assert isinstance(actors_list, PageOfItems)
+ assert isinstance(actors_list.items, list)
+ assert isinstance(actors_list.items[0], StoreListActor) # Store always has actors
async def test_store_list_with_search(client: ApifyClient | ApifyClientAsync) -> None:
"""Test listing store with search filter."""
- result = await maybe_await(client.store().list(limit=5, search='web scraper'))
- store_page = cast('ListOfStoreActors', result)
+ store_page = await maybe_await(client.store().list(limit=5, search='web scraper'))
- assert store_page is not None
- assert store_page.items is not None
+ assert isinstance(store_page, PageOfItems)
assert isinstance(store_page.items, list)
+ if store_page.items:
+ assert isinstance(store_page.items[0], StoreListActor)
async def test_store_list_pagination(client: ApifyClient | ApifyClientAsync) -> None:
"""Test store listing pagination."""
- result1 = await maybe_await(client.store().list(limit=5, offset=0))
- result2 = await maybe_await(client.store().list(limit=5, offset=5))
- page1 = cast('ListOfStoreActors', result1)
- page2 = cast('ListOfStoreActors', result2)
-
- assert page1 is not None
- assert page2 is not None
+ page1 = await maybe_await(client.store().list(limit=5, offset=0))
+ page2 = await maybe_await(client.store().list(limit=5, offset=5))
+
+ assert isinstance(page1, PageOfItems)
+ assert isinstance(page1.items, list)
+ assert isinstance(page1.items[0], StoreListActor)
+ assert isinstance(page2, PageOfItems)
+ assert isinstance(page2.items, list)
# Verify different results (if enough actors exist)
if len(page1.items) == 5 and len(page2.items) > 0:
+ assert isinstance(page2.items[0], StoreListActor)
assert page1.items[0].id != page2.items[0].id
diff --git a/tests/integration/test_task.py b/tests/integration/test_task.py
index 322185a2..3b93e179 100644
--- a/tests/integration/test_task.py
+++ b/tests/integration/test_task.py
@@ -6,10 +6,12 @@
from typing import TYPE_CHECKING, cast
from ._utils import get_random_resource_name, maybe_await
+from apify_client._models_generated import RunShort, TaskShort
+from apify_client._pagination_classes import PageOfItems
if TYPE_CHECKING:
from apify_client import ApifyClient, ApifyClientAsync
- from apify_client._models_generated import Actor, ListOfRuns, ListOfTasks, ListOfWebhooks, Run, Task
+ from apify_client._models_generated import Actor, Run, Task
# Use a simple, fast public actor for testing
HELLO_WORLD_ACTOR = 'apify/hello-world'
@@ -113,10 +115,11 @@ async def test_task_list(client: ApifyClient | ApifyClientAsync) -> None:
try:
# List tasks
- result = await maybe_await(client.tasks().list(limit=100))
- tasks_page = cast('ListOfTasks', result)
- assert tasks_page is not None
- assert tasks_page.items is not None
+ tasks_page = await maybe_await(client.tasks().list(limit=100))
+
+ assert isinstance(tasks_page, PageOfItems)
+ assert isinstance(tasks_page.items, list)
+ assert isinstance(tasks_page.items[0], TaskShort)
# Verify our task is in the list
task_ids = [t.id for t in tasks_page.items]
@@ -289,10 +292,11 @@ async def test_task_runs(client: ApifyClient | ApifyClientAsync) -> None:
# List runs for this task
runs_client = task_client.runs()
- result = await maybe_await(runs_client.list(limit=10))
- runs_page = cast('ListOfRuns', result)
- assert runs_page is not None
- assert runs_page.items is not None
+ runs_page = await maybe_await(runs_client.list(limit=10))
+
+ assert isinstance(runs_page, PageOfItems)
+ assert isinstance(runs_page.items, list)
+ assert isinstance(runs_page.items[0], RunShort)
assert len(runs_page.items) >= 1
# Cleanup run
@@ -365,10 +369,10 @@ async def test_task_webhooks(client: ApifyClient | ApifyClientAsync) -> None:
try:
# Get webhooks client
webhooks_client = task_client.webhooks()
- result = await maybe_await(webhooks_client.list())
- webhooks_page = cast('ListOfWebhooks', result)
- assert webhooks_page is not None
- assert webhooks_page.items is not None
+ webhooks_page = await maybe_await(webhooks_client.list())
+
+ assert isinstance(webhooks_page, PageOfItems)
+ assert isinstance(webhooks_page.items, list)
# New task should have no webhooks
assert len(webhooks_page.items) == 0
diff --git a/tests/integration/test_webhook.py b/tests/integration/test_webhook.py
index a011aaa7..a40f8551 100644
--- a/tests/integration/test_webhook.py
+++ b/tests/integration/test_webhook.py
@@ -4,6 +4,8 @@
from typing import TYPE_CHECKING
+from apify_client._pagination_classes import PageOfItems
+
if TYPE_CHECKING:
from apify_client import ApifyClient, ApifyClientAsync
@@ -11,13 +13,12 @@
from ._utils import maybe_await
from apify_client._models_generated import (
ActorJobStatus,
- ListOfRuns,
- ListOfWebhookDispatches,
- ListOfWebhooks,
Run,
+ RunShort,
Webhook,
WebhookDispatch,
WebhookEventType,
+ WebhookShort,
)
HELLO_WORLD_ACTOR = 'apify/hello-world'
@@ -32,9 +33,11 @@ async def _get_finished_run_id(client: ApifyClient | ApifyClientAsync) -> str:
"""
runs_page = await maybe_await(client.actor(HELLO_WORLD_ACTOR).runs().list(limit=1, status=ActorJobStatus.SUCCEEDED))
- assert isinstance(runs_page, ListOfRuns)
+ assert isinstance(runs_page, PageOfItems)
+ assert isinstance(runs_page.items, list)
if len(runs_page.items) > 0:
+ assert isinstance(runs_page.items[0], RunShort)
return runs_page.items[0].id
# No completed runs found - start one and wait for it to finish
@@ -49,16 +52,18 @@ async def test_list_webhooks(client: ApifyClient | ApifyClientAsync) -> None:
"""Test listing webhooks."""
webhooks_page = await maybe_await(client.webhooks().list(limit=10))
- assert isinstance(webhooks_page, ListOfWebhooks)
+ assert isinstance(webhooks_page, PageOfItems)
assert isinstance(webhooks_page.items, list)
+ assert isinstance(webhooks_page.items[0], WebhookShort)
async def test_list_webhooks_pagination(client: ApifyClient | ApifyClientAsync) -> None:
"""Test listing webhooks with pagination."""
webhooks_page = await maybe_await(client.webhooks().list(limit=5, offset=0))
- assert isinstance(webhooks_page, ListOfWebhooks)
+ assert isinstance(webhooks_page, PageOfItems)
assert isinstance(webhooks_page.items, list)
+ assert isinstance(webhooks_page.items[0], WebhookShort)
async def test_webhook_create_and_get(client: ApifyClient | ApifyClientAsync) -> None:
@@ -166,8 +171,10 @@ async def test_webhook_dispatches(client: ApifyClient | ApifyClientAsync) -> Non
# List dispatches for this webhook
dispatches = await maybe_await(webhook_client.dispatches().list())
- assert isinstance(dispatches, ListOfWebhookDispatches)
+ assert isinstance(dispatches, PageOfItems)
+ assert isinstance(dispatches.items, list)
assert len(dispatches.items) > 0
+ assert isinstance(dispatches.items[0], WebhookDispatch)
finally:
await maybe_await(webhook_client.delete())
diff --git a/tests/integration/test_webhook_dispatch.py b/tests/integration/test_webhook_dispatch.py
index 5bfc106d..3f8e82c5 100644
--- a/tests/integration/test_webhook_dispatch.py
+++ b/tests/integration/test_webhook_dispatch.py
@@ -4,9 +4,11 @@
from typing import TYPE_CHECKING, cast
+from apify_client._models_generated import WebhookDispatch
+from apify_client._pagination_classes import PageOfItems
+
if TYPE_CHECKING:
from apify_client import ApifyClient, ApifyClientAsync
- from apify_client._models_generated import ListOfWebhookDispatches, WebhookDispatch
from ._utils import maybe_await
@@ -14,24 +16,26 @@
async def test_webhook_dispatch_list(client: ApifyClient | ApifyClientAsync) -> None:
"""Test listing webhook dispatches."""
- result = await maybe_await(client.webhook_dispatches().list(limit=10))
- dispatches_page = cast('ListOfWebhookDispatches', result)
+ dispatches_page = await maybe_await(client.webhook_dispatches().list(limit=10))
- assert dispatches_page is not None
- assert dispatches_page.items is not None
+ assert isinstance(dispatches_page, PageOfItems)
assert isinstance(dispatches_page.items, list)
- # User may have 0 dispatches, so we just verify the structure
+ # User may have 0 dispatches — only check element type when any were returned.
+ if dispatches_page.items:
+ assert isinstance(dispatches_page.items[0], WebhookDispatch)
async def test_webhook_dispatch_get(client: ApifyClient | ApifyClientAsync) -> None:
"""Test getting a specific webhook dispatch."""
# First list dispatches to get a dispatch ID
- result = await maybe_await(client.webhook_dispatches().list(limit=1))
- dispatches_page = cast('ListOfWebhookDispatches', result)
- assert dispatches_page is not None
+ dispatches_page = await maybe_await(client.webhook_dispatches().list(limit=1))
+
+ assert isinstance(dispatches_page, PageOfItems)
+ assert isinstance(dispatches_page.items, list)
if dispatches_page.items:
# If there are dispatches, test the get method
+ assert isinstance(dispatches_page.items[0], WebhookDispatch)
dispatch_id = dispatches_page.items[0].id
result = await maybe_await(client.webhook_dispatch(dispatch_id).get())
dispatch = cast('WebhookDispatch', result)
diff --git a/tests/unit/test_client_pagination.py b/tests/unit/test_client_pagination.py
new file mode 100644
index 00000000..58306ba2
--- /dev/null
+++ b/tests/unit/test_client_pagination.py
@@ -0,0 +1,660 @@
+from __future__ import annotations
+
+import dataclasses
+import json
+import re
+from typing import TYPE_CHECKING, Any, Literal, TypeAlias
+
+import pytest
+from pydantic.fields import FieldInfo
+from werkzeug import Response
+
+from apify_client import ApifyClient, ApifyClientAsync
+from apify_client import _models_generated as _models_module
+from apify_client._resource_clients import (
+ ActorCollectionClient,
+ ActorCollectionClientAsync,
+ ActorEnvVarCollectionClient,
+ ActorEnvVarCollectionClientAsync,
+ ActorVersionCollectionClient,
+ ActorVersionCollectionClientAsync,
+ BuildCollectionClient,
+ BuildCollectionClientAsync,
+ DatasetClient,
+ DatasetClientAsync,
+ DatasetCollectionClient,
+ DatasetCollectionClientAsync,
+ KeyValueStoreClient,
+ KeyValueStoreClientAsync,
+ KeyValueStoreCollectionClient,
+ KeyValueStoreCollectionClientAsync,
+ RequestQueueClient,
+ RequestQueueClientAsync,
+ RequestQueueCollectionClient,
+ RequestQueueCollectionClientAsync,
+ RunCollectionClient,
+ RunCollectionClientAsync,
+ ScheduleCollectionClient,
+ ScheduleCollectionClientAsync,
+ StoreCollectionClient,
+ StoreCollectionClientAsync,
+ TaskCollectionClient,
+ TaskCollectionClientAsync,
+ WebhookCollectionClient,
+ WebhookCollectionClientAsync,
+ WebhookDispatchCollectionClient,
+ WebhookDispatchCollectionClientAsync,
+)
+
+if TYPE_CHECKING:
+ from collections.abc import Callable
+
+ from _pytest.mark import ParameterSet
+ from pydantic import BaseModel
+ from pytest_httpserver import HTTPServer
+ from werkzeug import Request
+
+
+CollectionClient: TypeAlias = (
+ ActorCollectionClient
+ | BuildCollectionClient
+ | RunCollectionClient
+ | ScheduleCollectionClient
+ | TaskCollectionClient
+ | WebhookCollectionClient
+ | WebhookDispatchCollectionClient
+ | DatasetCollectionClient
+ | KeyValueStoreCollectionClient
+ | RequestQueueCollectionClient
+ | StoreCollectionClient
+ | ActorEnvVarCollectionClient
+ | ActorVersionCollectionClient
+)
+
+CollectionClientAsync: TypeAlias = (
+ ActorCollectionClientAsync
+ | BuildCollectionClientAsync
+ | RunCollectionClientAsync
+ | ScheduleCollectionClientAsync
+ | TaskCollectionClientAsync
+ | WebhookCollectionClientAsync
+ | WebhookDispatchCollectionClientAsync
+ | DatasetCollectionClientAsync
+ | KeyValueStoreCollectionClientAsync
+ | RequestQueueCollectionClientAsync
+ | StoreCollectionClientAsync
+ | ActorEnvVarCollectionClientAsync
+ | ActorVersionCollectionClientAsync
+)
+
+ID_PLACEHOLDER = 'some-id'
+
+
+# Inner list models whose `items: list[]` is relaxed to `list[dict]`.
+# Point of these tests is pagination mechanism, not internal object validation.
+_RELAXED_LIST_MODELS = (
+ 'ListOfActors',
+ 'ListOfBuilds',
+ 'ListOfDatasets',
+ 'ListOfEnvVars',
+ 'ListOfKeys',
+ 'ListOfKeyValueStores',
+ 'ListOfRequestQueues',
+ 'ListOfRequests',
+ 'ListOfRuns',
+ 'ListOfSchedules',
+ 'ListOfStoreActors',
+ 'ListOfTasks',
+ 'ListOfVersions',
+ 'ListOfWebhookDispatches',
+ 'ListOfWebhooks',
+)
+
+# Outer wrappers that embed a relaxed list model via `.data`. Their compiled schema pins the
+# inner's schema at construction time, so they need a forced rebuild to pick up the relaxation.
+# The wrappers themselves are not mutated — their own field annotations stay as-is.
+_REBUILT_RESPONSE_WRAPPERS = (
+ 'ListOfActorsInStoreResponse',
+ 'ListOfActorsResponse',
+ 'ListOfBuildsResponse',
+ 'ListOfDatasetsResponse',
+ 'ListOfEnvVarsResponse',
+ 'ListOfKeyValueStoresResponse',
+ 'ListOfKeysResponse',
+ 'ListOfRequestQueuesResponse',
+ 'ListOfRequestsResponse',
+ 'ListOfRunsResponse',
+ 'ListOfSchedulesResponse',
+ 'ListOfTasksResponse',
+ 'ListOfVersionsResponse',
+ 'ListOfWebhooksResponse',
+)
+
+
+@pytest.fixture(autouse=True)
+def _relax_item_validation() -> Any:
+ """Relax only the element type of `items` on paginated list models for the test run.
+
+ Pagination tests feed synthetic `{'id': N}` items that don't satisfy the real API schemas
+ (`ActorShort`, `BuildShort`, `Request`, `EnvVar`, …). Instead of bypassing validation
+ wholesale, each inner `ListOf*` model has its `items` field swapped to `list[dict]`
+ and rebuilt. Outer `.data` wrapping and every pagination-metadata field remain validated.
+ """
+ relaxed_field = FieldInfo.from_annotation(list[dict])
+ originals: dict[type[BaseModel], FieldInfo] = {}
+ wrappers = [getattr(_models_module, name) for name in _REBUILT_RESPONSE_WRAPPERS]
+
+ for name in _RELAXED_LIST_MODELS:
+ cls = getattr(_models_module, name)
+ originals[cls] = cls.model_fields['items']
+ cls.model_fields['items'] = relaxed_field
+ cls.model_rebuild(force=True)
+ for wrapper in wrappers:
+ wrapper.model_rebuild(force=True)
+ try:
+ yield
+ finally:
+ for cls, field in originals.items():
+ cls.model_fields['items'] = field
+ cls.model_rebuild(force=True)
+ for wrapper in wrappers:
+ wrapper.model_rebuild(force=True)
+
+
+def create_items(start: int, end: int, step: int | None = None) -> list[dict[str, int]]:
+ """Create a list of test items for the given index range."""
+ if not step:
+ step = -1 if end < start else 1
+ return [{'id': i} for i in range(start, end, step)]
+
+
+NORMAL_ITEMS = 2500
+EXTRA_ITEMS_UNNAMED = 100
+MAX_ITEMS_PER_PAGE = 1000
+
+
+def _is_true(value: str | None) -> bool:
+ """Match the `'true'` wire form produced by the client's bool→string serialization."""
+ return value == 'true'
+
+
+def _parse_int_param(value: str | None) -> int:
+ return int(value) if value not in (None, '') else 0
+
+
+def _handle_offset_pagination(request: Request) -> Response:
+ """Serve an offset-paginated Apify API response.
+
+ The simulated platform holds 2500 items normally and an additional 100 when
+ ``unnamed=true`` is requested. Pages are capped at 1000 items regardless of the requested
+ limit, mirroring the real API. The dataset items endpoint returns items as a raw list;
+ all other endpoints wrap them in ``{'data': {...}}``.
+ """
+ params = request.args
+
+ total_items = (NORMAL_ITEMS + EXTRA_ITEMS_UNNAMED) if _is_true(params.get('unnamed')) else NORMAL_ITEMS
+ offset = _parse_int_param(params.get('offset'))
+ limit = _parse_int_param(params.get('limit'))
+ assert offset >= 0, 'Invalid offset sent to API'
+ assert limit >= 0, 'Invalid limit sent to API'
+
+ desc = _is_true(params.get('desc'))
+ items = create_items(total_items, 0) if desc else create_items(0, total_items)
+
+ lower_index = min(offset, total_items)
+ upper_index = min(offset + (limit or total_items), total_items)
+ count = min(max(upper_index - lower_index, 0), MAX_ITEMS_PER_PAGE)
+ selected_items = items[lower_index : min(upper_index, lower_index + MAX_ITEMS_PER_PAGE)]
+
+ # Every second item is filtered out when `skipEmpty=true`, `skipHidden=true`, or `clean=true`.
+ if _is_true(params.get('skipEmpty')) or _is_true(params.get('skipHidden')) or _is_true(params.get('clean')):
+ selected_items = selected_items[::2]
+
+ headers = {
+ 'x-apify-pagination-count': str(count),
+ 'x-apify-pagination-total': str(total_items),
+ 'x-apify-pagination-offset': str(offset),
+ 'x-apify-pagination-limit': str(limit or count or 1),
+ 'x-apify-pagination-desc': str(desc).lower(),
+ 'content-type': 'application/json',
+ }
+
+ if request.path.endswith(f'/datasets/{ID_PLACEHOLDER}/items'):
+ body: Any = selected_items
+ else:
+ body = {
+ 'data': {
+ 'total': total_items,
+ 'count': count,
+ 'offset': offset,
+ 'limit': limit or (count or 1),
+ 'desc': desc,
+ 'items': selected_items,
+ }
+ }
+ return Response(status=200, headers=headers, response=json.dumps(body))
+
+
+def _handle_cursor_pagination(request: Request) -> Response:
+ """Serve a cursor-paginated Apify API response for KVS keys and RQ requests.
+
+ Holds 2500 synthetic items whose integer `id` equals their position. Each page is capped
+ at 1000 items. KVS uses `exclusiveStartKey`; RQ accepts either the deprecated
+ `exclusiveStartId` on the initial call or the opaque `cursor` on subsequent calls. All
+ three values encode the last-seen item id as a string — the next page starts at id + 1.
+ """
+ params = request.args
+ limit = _parse_int_param(params.get('limit'))
+ assert limit >= 0, 'Invalid limit sent to API'
+
+ cursor_raw = params.get('exclusiveStartKey') or params.get('exclusiveStartId') or params.get('cursor')
+
+ total_items = NORMAL_ITEMS
+ start = int(cursor_raw) + 1 if cursor_raw not in (None, '') else 0
+ end = total_items if not limit else min(start + limit, total_items)
+ page_end = min(end, start + MAX_ITEMS_PER_PAGE)
+ selected_items = [{'id': i} for i in range(start, page_end)]
+
+ if request.path.endswith('/keys'):
+ is_truncated = page_end < total_items and bool(selected_items)
+ next_exclusive_start_key = str(selected_items[-1]['id']) if selected_items and is_truncated else None
+ body: dict[str, Any] = {
+ 'data': {
+ 'items': selected_items,
+ 'count': len(selected_items),
+ 'limit': limit or (len(selected_items) or 1),
+ 'is_truncated': is_truncated,
+ 'next_exclusive_start_key': next_exclusive_start_key,
+ }
+ }
+ else: # `/requests`
+ has_more = page_end < total_items and bool(selected_items)
+ next_cursor = str(selected_items[-1]['id']) if has_more else None
+ body = {
+ 'data': {
+ 'items': selected_items,
+ 'count': len(selected_items),
+ 'limit': limit or (len(selected_items) or 1),
+ 'next_cursor': next_cursor,
+ }
+ }
+ return Response(status=200, headers={'content-type': 'application/json'}, response=json.dumps(body))
+
+
+def _pagination_handler(request: Request) -> Response:
+ """Dispatch between cursor-based (KVS keys, RQ requests) and offset-based endpoints."""
+ if request.path.endswith(('/keys', '/requests')):
+ return _handle_cursor_pagination(request)
+ return _handle_offset_pagination(request)
+
+
+@pytest.fixture
+def pagination_server(httpserver: HTTPServer) -> HTTPServer:
+ """Register a catch-all handler that mirrors the Apify paginated endpoints."""
+ httpserver.expect_request(re.compile(r'.*')).respond_with_handler(_pagination_handler)
+ return httpserver
+
+
+def _make_sync_client(httpserver: HTTPServer) -> ApifyClient:
+ return ApifyClient(token='test', api_url=httpserver.url_for('/'))
+
+
+def _make_async_client(httpserver: HTTPServer) -> ApifyClientAsync:
+ return ApifyClientAsync(token='test', api_url=httpserver.url_for('/'))
+
+
+# Map resource-client class name to a factory that, given an `ApifyClient`/`ApifyClientAsync`,
+# returns the sub-client under test. Usable for both sync and async since every accessor is
+# available symmetrically on both root clients.
+_CLIENT_FACTORIES: dict[str, Callable[[Any], Any]] = {
+ 'ActorCollectionClient': lambda c: c.actors(),
+ 'ScheduleCollectionClient': lambda c: c.schedules(),
+ 'TaskCollectionClient': lambda c: c.tasks(),
+ 'WebhookCollectionClient': lambda c: c.webhooks(),
+ 'WebhookDispatchCollectionClient': lambda c: c.webhook_dispatches(),
+ 'StoreCollectionClient': lambda c: c.store(),
+ 'DatasetCollectionClient': lambda c: c.datasets(),
+ 'KeyValueStoreCollectionClient': lambda c: c.key_value_stores(),
+ 'RequestQueueCollectionClient': lambda c: c.request_queues(),
+ 'BuildCollectionClient': lambda c: c.actor(ID_PLACEHOLDER).builds(),
+ 'RunCollectionClient': lambda c: c.actor(ID_PLACEHOLDER).runs(),
+ 'ActorVersionCollectionClient': lambda c: c.actor(ID_PLACEHOLDER).versions(),
+ 'ActorEnvVarCollectionClient': lambda c: c.actor(ID_PLACEHOLDER).version('some-version').env_vars(),
+ 'DatasetClient': lambda c: c.dataset(ID_PLACEHOLDER),
+ 'KeyValueStoreClient': lambda c: c.key_value_store(ID_PLACEHOLDER),
+ 'RequestQueueClient': lambda c: c.request_queue(ID_PLACEHOLDER),
+}
+
+
+_CLIENT_SET_NAMES: dict[Literal['collection', 'dataset', 'kvs', 'rq'], tuple[str, ...]] = {
+ # Tuple rather than set: pytest-xdist requires a stable iteration order across workers.
+ # https://pytest-xdist.readthedocs.io/en/stable/known-limitations.html#order-and-amount-of-test-must-be-consistent
+ 'collection': (
+ 'ActorCollectionClient',
+ 'ScheduleCollectionClient',
+ 'TaskCollectionClient',
+ 'WebhookCollectionClient',
+ 'WebhookDispatchCollectionClient',
+ 'StoreCollectionClient',
+ 'DatasetCollectionClient',
+ 'KeyValueStoreCollectionClient',
+ 'RequestQueueCollectionClient',
+ 'BuildCollectionClient',
+ 'RunCollectionClient',
+ 'ActorVersionCollectionClient',
+ 'ActorEnvVarCollectionClient',
+ ),
+ 'dataset': ('DatasetClient',),
+ 'kvs': ('KeyValueStoreClient',),
+ 'rq': ('RequestQueueClient',),
+}
+
+
+@dataclasses.dataclass
+class _PaginationCase:
+ """A single parametrized pagination test case."""
+
+ id: str
+ inputs: dict
+ expected_items: list[dict[str, int]]
+ supported_clients: set[str]
+
+ def __hash__(self) -> int:
+ return hash(self.id)
+
+
+COLLECTION_CLIENTS = {
+ 'ActorCollectionClient',
+ 'BuildCollectionClient',
+ 'RunCollectionClient',
+ 'ScheduleCollectionClient',
+ 'TaskCollectionClient',
+ 'WebhookCollectionClient',
+ 'WebhookDispatchCollectionClient',
+ 'DatasetCollectionClient',
+ 'KeyValueStoreCollectionClient',
+ 'RequestQueueCollectionClient',
+ 'StoreCollectionClient',
+}
+
+NO_OPTIONS_CLIENTS = {
+ 'ActorEnvVarCollectionClient',
+ 'ActorVersionCollectionClient',
+}
+
+DATASET_CLIENTS = {'DatasetClient'}
+RQ_CLIENTS = {'RequestQueueClient'}
+KVS_CLIENTS = {'KeyValueStoreClient'}
+STORAGE_CLIENTS = DATASET_CLIENTS | RQ_CLIENTS | KVS_CLIENTS
+ALL_CLIENTS = COLLECTION_CLIENTS | NO_OPTIONS_CLIENTS | STORAGE_CLIENTS
+
+TEST_CASES = (
+ _PaginationCase('No options', {}, create_items(0, 2500), ALL_CLIENTS),
+ _PaginationCase('Limit', {'limit': 1100}, create_items(0, 1100), ALL_CLIENTS - NO_OPTIONS_CLIENTS),
+ _PaginationCase('Out of range limit', {'limit': 3000}, create_items(0, 2500), ALL_CLIENTS - NO_OPTIONS_CLIENTS),
+ _PaginationCase(
+ 'Offset',
+ {'offset': 1000},
+ create_items(1000, 2500),
+ ALL_CLIENTS - NO_OPTIONS_CLIENTS - KVS_CLIENTS - RQ_CLIENTS,
+ ),
+ _PaginationCase(
+ 'Offset and limit',
+ {'offset': 1000, 'limit': 1100},
+ create_items(1000, 2100),
+ ALL_CLIENTS - NO_OPTIONS_CLIENTS - KVS_CLIENTS - RQ_CLIENTS,
+ ),
+ _PaginationCase(
+ 'Out of range offset', {'offset': 3000}, [], ALL_CLIENTS - NO_OPTIONS_CLIENTS - KVS_CLIENTS - RQ_CLIENTS
+ ),
+ _PaginationCase(
+ 'Offset, limit, descending',
+ {'offset': 1000, 'limit': 1100, 'desc': True},
+ create_items(1500, 400),
+ ALL_CLIENTS - NO_OPTIONS_CLIENTS - {'StoreCollectionClient'} - KVS_CLIENTS - RQ_CLIENTS,
+ ),
+ _PaginationCase(
+ 'Offset, limit, descending, unnamed',
+ {'offset': 50, 'limit': 1100, 'desc': True, 'unnamed': True},
+ create_items(2550, 1450),
+ {'DatasetCollectionClient', 'KeyValueStoreCollectionClient', 'RequestQueueCollectionClient'},
+ ),
+ _PaginationCase(
+ 'chunk_size',
+ {'chunk_size': 100, 'limit': 250},
+ create_items(0, 250),
+ STORAGE_CLIENTS,
+ ),
+ _PaginationCase(
+ 'Offset, limit, descending, chunk_size',
+ {'offset': 50, 'limit': 1100, 'desc': True, 'chunk_size': 100},
+ create_items(2450, 1350),
+ DATASET_CLIENTS,
+ ),
+ _PaginationCase(
+ 'Offset, limit, descending, chunk_size, clean',
+ {'limit': 1500, 'chunk_size': 100, 'clean': True},
+ # API behavior with `clean=True` is to apply the cleaning after pagination, so we end up with missing items
+ # being counted towards the limit and thus fewer total items returned.
+ create_items(0, 1500, 2),
+ DATASET_CLIENTS,
+ ),
+ _PaginationCase(
+ 'Exclusive start key',
+ {'exclusive_start_key': '1000'},
+ create_items(1001, 2500),
+ KVS_CLIENTS,
+ ),
+ _PaginationCase(
+ 'Exclusive start key and limit',
+ {'exclusive_start_key': '1000', 'limit': 500},
+ create_items(1001, 1501),
+ KVS_CLIENTS,
+ ),
+ _PaginationCase(
+ 'Cursor',
+ {'cursor': '1000'},
+ create_items(1001, 2500),
+ RQ_CLIENTS,
+ ),
+ _PaginationCase(
+ 'Cursor and limit',
+ {'cursor': '1000', 'limit': 500},
+ create_items(1001, 1501),
+ RQ_CLIENTS,
+ ),
+)
+
+
+def _generate_test_params(client_set: Literal['collection', 'dataset', 'kvs', 'rq']) -> list[ParameterSet]:
+ """Build the pytest parameter set for the given client category.
+
+ Each parameter carries the resource-client class name; the test body instantiates
+ the real client against the `httpserver` URL and looks up the factory in
+ `_CLIENT_FACTORIES`.
+ """
+ client_names = _CLIENT_SET_NAMES[client_set]
+ return [
+ pytest.param(test_case.inputs, test_case.expected_items, client_name, id=f'{client_name}:{test_case.id}')
+ for test_case in TEST_CASES
+ for client_name in client_names
+ if client_name in test_case.supported_clients
+ ]
+
+
+@pytest.mark.parametrize(
+ ('inputs', 'expected_items', 'client_name'),
+ _generate_test_params(client_set='collection'),
+)
+def test_client_list_iterable(
+ pagination_server: HTTPServer,
+ client_name: str,
+ inputs: dict,
+ expected_items: list[dict[str, int]],
+) -> None:
+ """Every sync collection client's `list()` return value should iterate across pages."""
+ client: CollectionClient = _CLIENT_FACTORIES[client_name](_make_sync_client(pagination_server))
+ returned_items = list(client.list(**inputs))
+
+ if inputs == {}:
+ list_response = client.list(**inputs)
+ assert len(returned_items) == list_response.total
+
+ assert returned_items == expected_items
+
+
+@pytest.mark.parametrize(
+ ('inputs', 'expected_items', 'client_name'),
+ _generate_test_params(client_set='collection'),
+)
+async def test_client_list_iterable_async(
+ pagination_server: HTTPServer,
+ client_name: str,
+ inputs: dict,
+ expected_items: list[dict[str, int]],
+) -> None:
+ """Every async collection client's `list()` return value should iterate across pages."""
+ client: CollectionClientAsync = _CLIENT_FACTORIES[client_name](_make_async_client(pagination_server))
+ returned_items = [item async for item in client.list(**inputs)]
+
+ if inputs == {}:
+ list_response = await client.list(**inputs)
+ assert len(returned_items) == list_response.total
+
+ assert returned_items == expected_items
+
+
+@pytest.mark.parametrize(
+ ('inputs', 'expected_items', 'client_name'),
+ _generate_test_params(client_set='dataset'),
+)
+def test_dataset_items_list_iterable(
+ pagination_server: HTTPServer,
+ client_name: str,
+ inputs: dict,
+ expected_items: list[dict[str, int]],
+) -> None:
+ """The sync dataset client's `list_items()` return value should iterate across pages."""
+ client: DatasetClient = _CLIENT_FACTORIES[client_name](_make_sync_client(pagination_server))
+ returned_items = list(client.list_items(**inputs))
+
+ if inputs == {}:
+ list_response = client.list_items(**inputs)
+ assert len(returned_items) == list_response.total
+
+ assert returned_items == expected_items
+
+ # Until the deprecated `iterate_items` method is removed, it should behave the same
+ inputs_without_chunk_size = {k: v for k, v in inputs.items() if k != 'chunk_size'}
+ assert returned_items == list(client.iterate_items(**inputs_without_chunk_size))
+
+
+@pytest.mark.parametrize(
+ ('inputs', 'expected_items', 'client_name'),
+ _generate_test_params(client_set='dataset'),
+)
+async def test_dataset_items_list_iterable_async(
+ pagination_server: HTTPServer,
+ client_name: str,
+ inputs: dict,
+ expected_items: list[dict[str, int]],
+) -> None:
+ """The async dataset client's `list_items()` return value should iterate across pages."""
+ client: DatasetClientAsync = _CLIENT_FACTORIES[client_name](_make_async_client(pagination_server))
+ returned_items = [item async for item in client.list_items(**inputs)]
+
+ if inputs == {}:
+ list_response = await client.list_items(**inputs)
+ assert len(returned_items) == list_response.total
+
+ assert returned_items == expected_items
+
+ # Until the deprecated `iterate_items` method is removed, it should behave the same
+ inputs_without_chunk_size = {k: v for k, v in inputs.items() if k != 'chunk_size'}
+ assert returned_items == [item async for item in client.iterate_items(**inputs_without_chunk_size)]
+
+
+@pytest.mark.parametrize(
+ ('inputs', 'expected_items', 'client_name'),
+ _generate_test_params(client_set='kvs'),
+)
+def test_kvs_list_keys_iterable(
+ pagination_server: HTTPServer,
+ client_name: str,
+ inputs: dict,
+ expected_items: list[dict[str, int]],
+) -> None:
+ """The sync KVS client's `list_keys()` return value should iterate across cursor-paginated pages."""
+ client: KeyValueStoreClient = _CLIENT_FACTORIES[client_name](_make_sync_client(pagination_server))
+ returned_items = [dict(item) for item in client.list_keys(**inputs)]
+
+ assert returned_items == expected_items
+
+ # Until the deprecated `iterate_keys` method is removed, it should behave the same
+ assert returned_items == [dict(item) for item in client.iterate_keys(**inputs)]
+
+
+@pytest.mark.parametrize(
+ ('inputs', 'expected_items', 'client_name'),
+ _generate_test_params(client_set='kvs'),
+)
+async def test_kvs_list_keys_iterable_async(
+ pagination_server: HTTPServer,
+ client_name: str,
+ inputs: dict,
+ expected_items: list[dict[str, int]],
+) -> None:
+ """The async KVS client's `list_keys()` return value should iterate across cursor-paginated pages."""
+ client: KeyValueStoreClientAsync = _CLIENT_FACTORIES[client_name](_make_async_client(pagination_server))
+ returned_items = [dict(item) async for item in client.list_keys(**inputs)]
+
+ assert returned_items == expected_items
+
+ # Until the deprecated `iterate_keys` method is removed, it should behave the same
+ assert returned_items == [dict(item) async for item in client.iterate_keys(**inputs)]
+
+
+@pytest.mark.parametrize(
+ ('inputs', 'expected_items', 'client_name'),
+ _generate_test_params(client_set='rq'),
+)
+def test_rq_list_requests_iterable(
+ pagination_server: HTTPServer,
+ client_name: str,
+ inputs: dict,
+ expected_items: list[dict[str, int]],
+) -> None:
+ """The sync RQ client's `list_requests()` return value should iterate across cursor-paginated pages."""
+ client: RequestQueueClient = _CLIENT_FACTORIES[client_name](_make_sync_client(pagination_server))
+ returned_items = [dict(item) for item in client.list_requests(**inputs)]
+ assert returned_items == expected_items
+
+
+@pytest.mark.parametrize(
+ ('inputs', 'expected_items', 'client_name'),
+ _generate_test_params(client_set='rq'),
+)
+async def test_rq_list_requests_iterable_async(
+ pagination_server: HTTPServer,
+ client_name: str,
+ inputs: dict,
+ expected_items: list[dict[str, int]],
+) -> None:
+ """The async RQ client's `list_requests()` return value should iterate across cursor-paginated pages."""
+ client: RequestQueueClientAsync = _CLIENT_FACTORIES[client_name](_make_async_client(pagination_server))
+ returned_items = [dict(item) async for item in client.list_requests(**inputs)]
+ assert returned_items == expected_items
+
+
+def test_rq_list_requests_rejects_cursor_and_exclusive_start_id() -> None:
+ """Passing both `cursor` and `exclusive_start_id` is mutually exclusive and must error."""
+ client = ApifyClient(token='').request_queue(ID_PLACEHOLDER)
+ with pytest.raises(ValueError, match='Cannot use both'):
+ client.list_requests(cursor='a', exclusive_start_id='b')
+
+
+async def test_rq_list_requests_rejects_cursor_and_exclusive_start_id_async() -> None:
+ """Async variant of the mutual-exclusion check."""
+ client = ApifyClientAsync(token='').request_queue(ID_PLACEHOLDER)
+ with pytest.raises(ValueError, match='Cannot use both'):
+ client.list_requests(cursor='a', exclusive_start_id='b')