Skip to content

Commit 8f55d46

Browse files
drobnikjfnesveda
andauthored
feat: docs string for memory storage clients (#31)
Co-authored-by: František Nesveda <fnesveda@users.noreply.github.com>
1 parent 174548e commit 8f55d46

8 files changed

Lines changed: 253 additions & 98 deletions

File tree

‎src/apify/memory_storage/memory_storage.py‎

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -125,11 +125,6 @@ async def purge(self) -> None:
125125
if request_queue_folder.name == 'default' or request_queue_folder.name.startswith('__APIFY_TEMPORARY'):
126126
await self._batch_remove_files(request_queue_folder.path)
127127

128-
def teardown(self) -> None:
129-
"""TODO: docs."""
130-
# We don't need to wait for anything here since we don't have worker threads for fs operations
131-
pass
132-
133128
async def _handle_default_key_value_store(self, folder: str) -> None:
134129
"""Remove everything from the default key-value store folder except `possible_input_keys`."""
135130
folder_exists = await ospath.exists(folder)

‎src/apify/memory_storage/resource_clients/dataset.py‎

Lines changed: 82 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030

3131

3232
class DatasetClient:
33-
"""TODO: docs."""
33+
"""Sub-client for manipulating a single dataset."""
3434

3535
_id: str
3636
_dataset_directory: str
@@ -43,7 +43,7 @@ class DatasetClient:
4343
_item_count = 0
4444

4545
def __init__(self, *, base_storage_directory: str, client: 'MemoryStorage', id: Optional[str] = None, name: Optional[str] = None) -> None:
46-
"""TODO: docs."""
46+
"""Initialize the DatasetClient."""
4747
self._id = str(uuid.uuid4()) if id is None else id
4848
self._dataset_directory = os.path.join(base_storage_directory, name or self._id)
4949
self._client = client
@@ -54,7 +54,11 @@ def __init__(self, *, base_storage_directory: str, client: 'MemoryStorage', id:
5454
self._modified_at = datetime.utcnow()
5555

5656
async def get(self) -> Optional[Dict]:
57-
"""TODO: docs."""
57+
"""Retrieve the dataset.
58+
59+
Returns:
60+
dict, optional: The retrieved dataset, or None, if it does not exist
61+
"""
5862
found = _find_or_cache_dataset_by_possible_id(client=self._client, entry_name_or_id=self._name or self._id)
5963

6064
if found:
@@ -64,7 +68,14 @@ async def get(self) -> Optional[Dict]:
6468
return None
6569

6670
async def update(self, *, name: Optional[str] = None) -> Dict:
67-
"""TODO: docs."""
71+
"""Update the dataset with specified fields.
72+
73+
Args:
74+
name (str, optional): The new name for the dataset
75+
76+
Returns:
77+
dict: The updated dataset
78+
"""
6879
# Check by id
6980
existing_dataset_by_id = _find_or_cache_dataset_by_possible_id(client=self._client, entry_name_or_id=self._name or self._id)
7081

@@ -96,7 +107,7 @@ async def update(self, *, name: Optional[str] = None) -> Dict:
96107
return existing_dataset_by_id.to_dataset_info()
97108

98109
async def delete(self) -> None:
99-
"""TODO: docs."""
110+
"""Delete the dataset."""
100111
dataset = next((dataset for dataset in self._client._datasets_handled if dataset._id == self._id), None)
101112

102113
if dataset is not None:
@@ -122,7 +133,35 @@ async def list_items(
122133
flatten: Optional[List[str]] = None, # noqa: U100
123134
view: Optional[str] = None, # noqa: U100
124135
) -> ListPage:
125-
"""TODO: docs."""
136+
"""List the items of the dataset.
137+
138+
Args:
139+
offset (int, optional): Number of items that should be skipped at the start. The default value is 0
140+
limit (int, optional): Maximum number of items to return. By default there is no limit.
141+
desc (bool, optional): By default, results are returned in the same order as they were stored.
142+
To reverse the order, set this parameter to True.
143+
clean (bool, optional): If True, returns only non-empty items and skips hidden fields (i.e. fields starting with the # character).
144+
The clean parameter is just a shortcut for skip_hidden=True and skip_empty=True parameters.
145+
Note that since some objects might be skipped from the output, that the result might contain less items than the limit value.
146+
fields (list of str, optional): A list of fields which should be picked from the items,
147+
only these fields will remain in the resulting record objects.
148+
Note that the fields in the outputted items are sorted the same way as they are specified in the fields parameter.
149+
You can use this feature to effectively fix the output format.
150+
omit (list of str, optional): A list of fields which should be omitted from the items.
151+
unwind (str, optional): Name of a field which should be unwound.
152+
If the field is an array then every element of the array will become a separate record and merged with parent object.
153+
If the unwound field is an object then it is merged with the parent object.
154+
If the unwound field is missing or its value is neither an array nor an object and therefore cannot be merged with a parent object,
155+
then the item gets preserved as it is. Note that the unwound items ignore the desc parameter.
156+
skip_empty (bool, optional): If True, then empty items are skipped from the output.
157+
Note that if used, the results might contain less items than the limit value.
158+
skip_hidden (bool, optional): If True, then hidden fields are skipped from the output, i.e. fields starting with the # character.
159+
flatten (list of str, optional): A list of fields that should be flattened
160+
view (str, optional): Name of the dataset view to be used
161+
162+
Returns:
163+
ListPage: A page of the list of dataset items according to the specified filters.
164+
"""
126165
# Check by id
127166
existing_dataset_by_id = _find_or_cache_dataset_by_possible_id(client=self._client, entry_name_or_id=self._name or self._id)
128167

@@ -167,7 +206,33 @@ async def iterate_items(
167206
skip_empty: Optional[bool] = None, # noqa: U100
168207
skip_hidden: Optional[bool] = None, # noqa: U100
169208
) -> AsyncIterator[Dict]:
170-
"""TODO: docs."""
209+
"""Iterate over the items in the dataset.
210+
211+
Args:
212+
offset (int, optional): Number of items that should be skipped at the start. The default value is 0
213+
limit (int, optional): Maximum number of items to return. By default there is no limit.
214+
desc (bool, optional): By default, results are returned in the same order as they were stored.
215+
To reverse the order, set this parameter to True.
216+
clean (bool, optional): If True, returns only non-empty items and skips hidden fields (i.e. fields starting with the # character).
217+
The clean parameter is just a shortcut for skip_hidden=True and skip_empty=True parameters.
218+
Note that since some objects might be skipped from the output, that the result might contain less items than the limit value.
219+
fields (list of str, optional): A list of fields which should be picked from the items,
220+
only these fields will remain in the resulting record objects.
221+
Note that the fields in the outputted items are sorted the same way as they are specified in the fields parameter.
222+
You can use this feature to effectively fix the output format.
223+
omit (list of str, optional): A list of fields which should be omitted from the items.
224+
unwind (str, optional): Name of a field which should be unwound.
225+
If the field is an array then every element of the array will become a separate record and merged with parent object.
226+
If the unwound field is an object then it is merged with the parent object.
227+
If the unwound field is missing or its value is neither an array nor an object and therefore cannot be merged with a parent object,
228+
then the item gets preserved as it is. Note that the unwound items ignore the desc parameter.
229+
skip_empty (bool, optional): If True, then empty items are skipped from the output.
230+
Note that if used, the results might contain less items than the limit value.
231+
skip_hidden (bool, optional): If True, then hidden fields are skipped from the output, i.e. fields starting with the # character.
232+
233+
Yields:
234+
dict: An item from the dataset
235+
"""
171236
cache_size = 1000
172237
first_item = offset
173238

@@ -197,53 +262,18 @@ async def iterate_items(
197262
for item in current_items_page.items:
198263
yield item
199264

200-
async def get_items_as_bytes(
201-
self,
202-
*,
203-
_item_format: str = 'json',
204-
_offset: Optional[int] = None,
205-
_limit: Optional[int] = None,
206-
_desc: Optional[bool] = None,
207-
_clean: Optional[bool] = None,
208-
_bom: Optional[bool] = None,
209-
_delimiter: Optional[str] = None,
210-
_fields: Optional[List[str]] = None,
211-
_omit: Optional[List[str]] = None,
212-
_unwind: Optional[str] = None,
213-
_skip_empty: Optional[bool] = None,
214-
_skip_header_row: Optional[bool] = None,
215-
_skip_hidden: Optional[bool] = None,
216-
_xml_root: Optional[str] = None,
217-
_xml_row: Optional[str] = None,
218-
_flatten: Optional[List[str]] = None,
219-
) -> bytes:
220-
"""TODO: docs."""
221-
raise NotImplementedError('This method is not supported in local memory storage')
265+
async def get_items_as_bytes(self, *_args: Any, **_kwargs: Any) -> bytes: # noqa: D102
266+
raise NotImplementedError('This method is not supported in local memory storage.')
222267

223-
async def stream_items(
224-
self,
225-
*,
226-
_item_format: str = 'json',
227-
_offset: Optional[int] = None,
228-
_limit: Optional[int] = None,
229-
_desc: Optional[bool] = None,
230-
_clean: Optional[bool] = None,
231-
_bom: Optional[bool] = None,
232-
_delimiter: Optional[str] = None,
233-
_fields: Optional[List[str]] = None,
234-
_omit: Optional[List[str]] = None,
235-
_unwind: Optional[str] = None,
236-
_skip_empty: Optional[bool] = None,
237-
_skip_header_row: Optional[bool] = None,
238-
_skip_hidden: Optional[bool] = None,
239-
_xml_root: Optional[str] = None,
240-
_xml_row: Optional[str] = None,
241-
) -> AsyncIterator:
242-
"""TODO: docs."""
268+
async def stream_items(self, *_args: Any, **_kwargs: Any) -> AsyncIterator: # noqa: D102
243269
raise NotImplementedError('This method is not supported in local memory storage')
244270

245271
async def push_items(self, items: JSONSerializable) -> None:
246-
"""TODO: docs."""
272+
"""Push items to the dataset.
273+
274+
Args:
275+
items: The items which to push in the dataset. Either a stringified JSON, a dictionary, or a list of strings or dictionaries.
276+
"""
247277
# Check by id
248278
existing_dataset_by_id = _find_or_cache_dataset_by_possible_id(client=self._client, entry_name_or_id=self._name or self._id)
249279

@@ -273,7 +303,7 @@ async def push_items(self, items: JSONSerializable) -> None:
273303
)
274304

275305
def to_dataset_info(self) -> Dict:
276-
"""TODO: docs."""
306+
"""Retrieve the dataset info."""
277307
return {
278308
'id': self._id,
279309
'name': self._name,
@@ -284,7 +314,7 @@ def to_dataset_info(self) -> Dict:
284314
}
285315

286316
async def _update_timestamps(self, has_been_modified: bool) -> None:
287-
"""TODO: docs."""
317+
"""Update the timestamps of the dataset."""
288318
self._accessed_at = datetime.utcnow()
289319

290320
if has_been_modified:

‎src/apify/memory_storage/resource_clients/dataset_collection.py‎

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,22 @@
1111

1212

1313
class DatasetCollectionClient:
14-
"""TODO: docs."""
14+
"""Sub-client for manipulating datasets."""
1515

1616
_datasets_directory: str
1717
_client: 'MemoryStorage'
1818

1919
def __init__(self, *, base_storage_directory: str, client: 'MemoryStorage') -> None:
20-
"""TODO: docs."""
20+
"""Initialize the DatasetCollectionClient with the passed arguments."""
2121
self._datasets_directory = base_storage_directory
2222
self._client = client
2323

24-
def list(self) -> ListPage[Dict]:
25-
"""TODO: docs."""
24+
def list(self) -> ListPage:
25+
"""List the available datasets.
26+
27+
Returns:
28+
ListPage: The list of available datasets matching the specified filters.
29+
"""
2630
def map_store(store: DatasetClient) -> Dict:
2731
return store.to_dataset_info()
2832
return ListPage({
@@ -35,7 +39,15 @@ def map_store(store: DatasetClient) -> Dict:
3539
})
3640

3741
async def get_or_create(self, *, name: Optional[str] = None, _schema: Optional[Dict] = None) -> Dict:
38-
"""TODO: docs."""
42+
"""Retrieve a named dataset, or create a new one when it doesn't exist.
43+
44+
Args:
45+
name (str, optional): The name of the dataset to retrieve or create.
46+
schema (Dict, optional): The schema of the dataset
47+
48+
Returns:
49+
dict: The retrieved or newly-created dataset.
50+
"""
3951
if name:
4052
found = _find_or_cache_dataset_by_possible_id(client=self._client, entry_name_or_id=name)
4153

0 commit comments

Comments
 (0)