Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions docs/02_concepts/08_pagination.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ import ApiLink from '@site/src/components/ApiLink';
import PaginationAsyncExample from '!!raw-loader!./code/08_pagination_async.py';
import PaginationSyncExample from '!!raw-loader!./code/08_pagination_sync.py';

import IterateItemsAsyncExample from '!!raw-loader!./code/08_iterate_items_async.py';
import IterateItemsSyncExample from '!!raw-loader!./code/08_iterate_items_sync.py';

Most methods named `list` or `list_something` in the Apify client return a <ApiLink to="class/ListPage">`ListPage`</ApiLink> object. This object provides a consistent interface for working with paginated data and includes the following properties:

Expand Down Expand Up @@ -45,7 +43,7 @@ The <ApiLink to="class/ListPage">`ListPage`</ApiLink> interface offers several k

## Generator-based iteration

For most use cases, `iterate_items()` is the recommended way to process all items in a dataset. It handles pagination automatically using a Python generator, fetching items in batches behind the scenes so you don't need to manage offsets or limits yourself.
You can also use the `list` methods directly in iteration. It handles pagination automatically, fetching items in batches behind the scenes so you don't need to manage offsets or limits yourself.

<Tabs>
<TabItem value="AsyncExample" label="Async client" default>
Expand All @@ -60,6 +58,4 @@ For most use cases, `iterate_items()` is the recommended way to process all item
</TabItem>
</Tabs>

`iterate_items()` accepts the same filtering parameters as `list_items()` (`clean`, `fields`, `omit`, `unwind`, `skip_empty`, `skip_hidden`), so you can combine automatic pagination with data filtering.

Similarly, `KeyValueStoreClient` provides an `iterate_keys()` method for iterating over all keys in a key-value store without manual pagination.
11 changes: 8 additions & 3 deletions docs/02_concepts/code/08_iterate_items_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ async def main() -> None:
apify_client = ApifyClientAsync(TOKEN)
dataset_client = apify_client.dataset('dataset-id')

# Iterate through all items automatically.
async for item in dataset_client.iterate_items():
print(item)
# Define the pagination parameters
limit = 1500 # Number of items in total
offset = 100 # Starting offset

# Iterate through items automatically, lazily sending as many API calls
# as needed and receiving items in chunks.
async for item in dataset_client.list_items(limit=limit, offset=offset):
print(item) # Process the item as needed
11 changes: 8 additions & 3 deletions docs/02_concepts/code/08_iterate_items_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,14 @@ def main() -> None:
apify_client = ApifyClient(TOKEN)
dataset_client = apify_client.dataset('dataset-id')

# Iterate through all items automatically.
for item in dataset_client.iterate_items():
print(item)
# Define the pagination parameters
limit = 1500 # Number of items in total
offset = 100 # Starting offset

# Iterate through items automatically, lazily sending as many API calls
# as needed and receiving items in chunks.
for item in dataset_client.list_items(limit=limit, offset=offset):
print(item) # Process the item as needed


if __name__ == '__main__':
Expand Down
27 changes: 8 additions & 19 deletions docs/02_concepts/code/08_pagination_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,26 +10,15 @@ async def main() -> None:
dataset_client = apify_client.dataset('dataset-id')

# Define the pagination parameters
limit = 1000 # Number of items per page
limit = 1000 # Number items to request from API
offset = 0 # Starting offset
all_items = [] # List to store all fetched items

while True:
# Fetch a page of items
response = await dataset_client.list_items(limit=limit, offset=offset)
items = response.items
total = response.total
# Send single API call to fetch paginated items.
# (number of items per single call can be limited by API)
paginated_items = await dataset_client.list_items(limit=limit, offset=offset)

print(f'Fetched {len(items)} items')
# Inspect pagination metadata returned by API
print(paginated_items.total)

# Add the fetched items to the complete list
all_items.extend(items)

# Exit the loop if there are no more items to fetch
if offset + limit >= total:
break

# Increment the offset for the next page
offset += limit

print(f'Overall fetched {len(all_items)} items')
for item in paginated_items.items:
print(item) # Process the item as needed
27 changes: 8 additions & 19 deletions docs/02_concepts/code/08_pagination_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,26 +10,15 @@ def main() -> None:
dataset_client = apify_client.dataset('dataset-id')

# Define the pagination parameters
limit = 1000 # Number of items per page
limit = 1000 # Number items to request from API
offset = 0 # Starting offset
all_items = [] # List to store all fetched items

while True:
# Fetch a page of items
response = dataset_client.list_items(limit=limit, offset=offset)
items = response.items
total = response.total
# Send single API call to fetch paginated items.
# (number of items per single call can be limited by API)
paginated_items = dataset_client.list_items(limit=limit, offset=offset)

print(f'Fetched {len(items)} items')
# Inspect pagination metadata returned by API
print(paginated_items.total)

# Add the fetched items to the complete list
all_items.extend(items)

# Exit the loop if there are no more items to fetch
if offset + limit >= total:
break

# Increment the offset for the next page
offset += limit

print(f'Overall fetched {len(all_items)} items')
for item in paginated_items.items:
print(item) # Process the item as needed
Loading
Loading