diff --git a/docs/user_guide/storing/doc_store/store_s3.md b/docs/user_guide/storing/doc_store/store_s3.md index c4e0878133..cd26f1a358 100644 --- a/docs/user_guide/storing/doc_store/store_s3.md +++ b/docs/user_guide/storing/doc_store/store_s3.md @@ -12,7 +12,7 @@ When you want to use your [`DocList`][docarray.DocList] in another place, you ca ## Push & pull To use the store [`DocList`][docarray.DocList] on S3, you need to pass an S3 path to the function starting with `'s3://'`. -In the following demo, we use `MinIO` as a local S3 service. You could use the following docker-compose file to start the service in a Docker container. +In the following demo, we use `MinIO` as a local S3 service. You could use the following docker compose file to start the service in a Docker container. ```yaml version: "3" @@ -26,7 +26,7 @@ services: ``` Save the above file as `docker-compose.yml` and run the following line in the same folder as the file. ```cmd -docker-compose up +docker compose up ``` ```python diff --git a/docs/user_guide/storing/index_elastic.md b/docs/user_guide/storing/index_elastic.md index f05ef0e5cb..89a104fefa 100644 --- a/docs/user_guide/storing/index_elastic.md +++ b/docs/user_guide/storing/index_elastic.md @@ -45,13 +45,17 @@ from docarray.index import ElasticDocIndex # or ElasticV7DocIndex from docarray.typing import NdArray import numpy as np + # Define the document schema. class MyDoc(BaseDoc): - title: str + title: str embedding: NdArray[128] + # Create dummy documents. -docs = DocList[MyDoc](MyDoc(title=f'title #{i}', embedding=np.random.rand(128)) for i in range(10)) +docs = DocList[MyDoc]( + MyDoc(title=f'title #{i}', embedding=np.random.rand(128)) for i in range(10) +) # Initialize a new ElasticDocIndex instance and add the documents to the index. doc_index = ElasticDocIndex[MyDoc](index_name='my_index') @@ -67,7 +71,7 @@ retrieved_docs = doc_index.find(query, search_field='embedding', limit=10) ## Initialize -You can use docker-compose to create a local Elasticsearch service with the following `docker-compose.yml`. +You can use docker compose to create a local Elasticsearch service with the following `docker-compose.yml`. ```yaml version: "3.3" @@ -91,7 +95,7 @@ networks: Run the following command in the folder of the above `docker-compose.yml` to start the service: ```bash -docker-compose up +docker compose up ``` ### Schema definition @@ -225,9 +229,7 @@ You can also search for multiple documents at once, in a batch, using the [`find ```python # create some query Documents - queries = DocList[SimpleDoc]( - SimpleDoc(tensor=np.random.rand(128)) for i in range(3) - ) + queries = DocList[SimpleDoc](SimpleDoc(tensor=np.random.rand(128)) for i in range(3)) # find similar documents matches, scores = doc_index.find_batched(queries, search_field='tensor', limit=5) diff --git a/docs/user_guide/storing/index_milvus.md b/docs/user_guide/storing/index_milvus.md index 4cf9c91c7d..18431902ce 100644 --- a/docs/user_guide/storing/index_milvus.md +++ b/docs/user_guide/storing/index_milvus.md @@ -27,13 +27,17 @@ from docarray.typing import NdArray from pydantic import Field import numpy as np + # Define the document schema. class MyDoc(BaseDoc): - title: str + title: str embedding: NdArray[128] = Field(is_embedding=True) + # Create dummy documents. -docs = DocList[MyDoc](MyDoc(title=f'title #{i}', embedding=np.random.rand(128)) for i in range(10)) +docs = DocList[MyDoc]( + MyDoc(title=f'title #{i}', embedding=np.random.rand(128)) for i in range(10) +) # Initialize a new MilvusDocumentIndex instance and add the documents to the index. doc_index = MilvusDocumentIndex[MyDoc](index_name='tmp_index_1') @@ -55,7 +59,7 @@ wget https://github.com/milvus-io/milvus/releases/download/v2.2.11/milvus-standa And start Milvus by running: ```shell -sudo docker-compose up -d +sudo docker compose up -d ``` Learn more on [Milvus documentation](https://milvus.io/docs/install_standalone-docker.md). @@ -142,10 +146,12 @@ Now that you have a Document Index, you can add data to it, using the [`index()` import numpy as np from docarray import DocList + class MyDoc(BaseDoc): - title: str + title: str embedding: NdArray[128] = Field(is_embedding=True) + doc_index = MilvusDocumentIndex[MyDoc](index_name='tmp_index_5') # create some random data @@ -273,7 +279,9 @@ class Book(BaseDoc): embedding: NdArray[10] = Field(is_embedding=True) -books = DocList[Book]([Book(price=i * 10, embedding=np.random.rand(10)) for i in range(10)]) +books = DocList[Book]( + [Book(price=i * 10, embedding=np.random.rand(10)) for i in range(10)] +) book_index = MilvusDocumentIndex[Book](index_name='tmp_index_6') book_index.index(books) @@ -312,8 +320,11 @@ class SimpleSchema(BaseDoc): price: int embedding: NdArray[128] = Field(is_embedding=True) + # Create dummy documents. -docs = DocList[SimpleSchema](SimpleSchema(price=i, embedding=np.random.rand(128)) for i in range(10)) +docs = DocList[SimpleSchema]( + SimpleSchema(price=i, embedding=np.random.rand(128)) for i in range(10) +) doc_index = MilvusDocumentIndex[SimpleSchema](index_name='tmp_index_7') doc_index.index(docs) @@ -407,7 +418,9 @@ You can pass any of the above as keyword arguments to the `__init__()` method or ```python class SimpleDoc(BaseDoc): - tensor: NdArray[128] = Field(is_embedding=True, index_type='IVF_FLAT', metric_type='L2') + tensor: NdArray[128] = Field( + is_embedding=True, index_type='IVF_FLAT', metric_type='L2' + ) doc_index = MilvusDocumentIndex[SimpleDoc](index_name='tmp_index_10') diff --git a/docs/user_guide/storing/index_qdrant.md b/docs/user_guide/storing/index_qdrant.md index 71770e4598..3d34b472a0 100644 --- a/docs/user_guide/storing/index_qdrant.md +++ b/docs/user_guide/storing/index_qdrant.md @@ -22,13 +22,17 @@ from docarray.index import QdrantDocumentIndex from docarray.typing import NdArray import numpy as np + # Define the document schema. class MyDoc(BaseDoc): - title: str + title: str embedding: NdArray[128] + # Create dummy documents. -docs = DocList[MyDoc](MyDoc(title=f'title #{i}', embedding=np.random.rand(128)) for i in range(10)) +docs = DocList[MyDoc]( + MyDoc(title=f'title #{i}', embedding=np.random.rand(128)) for i in range(10) +) # Initialize a new QdrantDocumentIndex instance and add the documents to the index. doc_index = QdrantDocumentIndex[MyDoc](host='localhost') @@ -46,7 +50,7 @@ You can initialize [QdrantDocumentIndex][docarray.index.backends.qdrant.QdrantDo **Connecting to a local Qdrant instance running as a Docker container** -You can use docker-compose to create a local Qdrant service with the following `docker-compose.yml`. +You can use docker compose to create a local Qdrant service with the following `docker-compose.yml`. ```yaml version: '3.8' @@ -66,7 +70,7 @@ services: Run the following command in the folder of the above `docker-compose.yml` to start the service: ```bash -docker-compose up +docker compose up ``` Next, you can create a [QdrantDocumentIndex][docarray.index.backends.qdrant.QdrantDocumentIndex] instance using: @@ -89,7 +93,7 @@ doc_index = QdrantDocumentIndex[MyDoc](qdrant_config) **Connecting to Qdrant Cloud service** ```python qdrant_config = QdrantDocumentIndex.DBConfig( - "https://YOUR-CLUSTER-URL.aws.cloud.qdrant.io", + "https://YOUR-CLUSTER-URL.aws.cloud.qdrant.io", api_key="", ) doc_index = QdrantDocumentIndex[MyDoc](qdrant_config) @@ -317,9 +321,7 @@ book_index = QdrantDocumentIndex[Book]() book_index.index(books) # filter for books that are cheaper than 29 dollars -query = rest.Filter( - must=[rest.FieldCondition(key='price', range=rest.Range(lt=29))] - ) +query = rest.Filter(must=[rest.FieldCondition(key='price', range=rest.Range(lt=29))]) cheap_books = book_index.filter(filter_query=query) assert len(cheap_books) == 3 @@ -372,7 +374,9 @@ class SimpleDoc(BaseDoc): doc_index = QdrantDocumentIndex[SimpleDoc](host='localhost') index_docs = [ - SimpleDoc(id=f'{i}', tens=np.ones(10) * i, num=int(i / 2), text=f'Lorem ipsum {int(i/2)}') + SimpleDoc( + id=f'{i}', tens=np.ones(10) * i, num=int(i / 2), text=f'Lorem ipsum {int(i/2)}' + ) for i in range(10) ] doc_index.index(index_docs) @@ -380,16 +384,16 @@ doc_index.index(index_docs) find_query = np.ones(10) text_search_query = 'ipsum 1' filter_query = rest.Filter( - must=[ - rest.FieldCondition( - key='num', - range=rest.Range( - gte=1, - lt=5, - ), - ) - ] - ) + must=[ + rest.FieldCondition( + key='num', + range=rest.Range( + gte=1, + lt=5, + ), + ) + ] +) query = ( doc_index.build_query() @@ -437,6 +441,8 @@ import numpy as np from docarray import BaseDoc, DocList from docarray.typing import NdArray from docarray.index import QdrantDocumentIndex + + class MyDoc(BaseDoc): text: str embedding: NdArray[128] @@ -445,7 +451,12 @@ class MyDoc(BaseDoc): Now, we can instantiate our Index and add some data: ```python docs = DocList[MyDoc]( - [MyDoc(embedding=np.random.rand(10), text=f'I am the first version of Document {i}') for i in range(100)] + [ + MyDoc( + embedding=np.random.rand(10), text=f'I am the first version of Document {i}' + ) + for i in range(100) + ] ) index = QdrantDocumentIndex[MyDoc]() index.index(docs) diff --git a/docs/user_guide/storing/index_weaviate.md b/docs/user_guide/storing/index_weaviate.md index 029c86de37..d1d86d03f2 100644 --- a/docs/user_guide/storing/index_weaviate.md +++ b/docs/user_guide/storing/index_weaviate.md @@ -27,13 +27,17 @@ from docarray.typing import NdArray from pydantic import Field import numpy as np + # Define the document schema. class MyDoc(BaseDoc): - title: str + title: str embedding: NdArray[128] = Field(is_embedding=True) + # Create dummy documents. -docs = DocList[MyDoc](MyDoc(title=f'title #{i}', embedding=np.random.rand(128)) for i in range(10)) +docs = DocList[MyDoc]( + MyDoc(title=f'title #{i}', embedding=np.random.rand(128)) for i in range(10) +) # Initialize a new WeaviateDocumentIndex instance and add the documents to the index. doc_index = WeaviateDocumentIndex[MyDoc]() @@ -59,7 +63,7 @@ There are multiple ways to start a Weaviate instance, depending on your use case | ----- | ----- | ----- | ----- | | **Weaviate Cloud Services (WCS)** | Development and production | Limited | **Recommended for most users** | | **Embedded Weaviate** | Experimentation | Limited | Experimental (as of Apr 2023) | -| **Docker-Compose** | Development | Yes | **Recommended for development + customizability** | +| **Docker Compose** | Development | Yes | **Recommended for development + customizability** | | **Kubernetes** | Production | Yes | | ### Instantiation instructions @@ -70,7 +74,7 @@ Go to the [WCS console](https://console.weaviate.cloud) and create an instance u Weaviate instances on WCS come pre-configured, so no further configuration is required. -**Docker-Compose (self-managed)** +**Docker Compose (self-managed)** Get a configuration file (`docker-compose.yaml`). You can build it using [this interface](https://weaviate.io/developers/weaviate/installation/docker-compose), or download it directly with: @@ -84,12 +88,12 @@ Where `v` is the actual version, such as `v1.18.3`. curl -o docker-compose.yml "https://configuration.weaviate.io/v2/docker-compose/docker-compose.yml?modules=standalone&runtime=docker-compose&weaviate_version=v1.18.3" ``` -**Start up Weaviate with Docker-Compose** +**Start up Weaviate with Docker Compose** Then you can start up Weaviate by running from a shell: ```shell -docker-compose up -d +docker compose up -d ``` **Shut down Weaviate** @@ -97,7 +101,7 @@ docker-compose up -d Then you can shut down Weaviate by running from a shell: ```shell -docker-compose down +docker compose down ``` **Notes** @@ -107,7 +111,7 @@ Unless data persistence or backups are set up, shutting down the Docker instance See documentation on [Persistent volume](https://weaviate.io/developers/weaviate/installation/docker-compose#persistent-volume) and [Backups](https://weaviate.io/developers/weaviate/configuration/backups) to prevent this if persistence is desired. ```bash -docker-compose up -d +docker compose up -d ``` **Embedded Weaviate (from the application)** @@ -192,9 +196,7 @@ dbconfig = WeaviateDocumentIndex.DBConfig( ### Create an instance Let's connect to a local Weaviate service and instantiate a `WeaviateDocumentIndex` instance: ```python -dbconfig = WeaviateDocumentIndex.DBConfig( - host="http://localhost:8080" -) +dbconfig = WeaviateDocumentIndex.DBConfig(host="http://localhost:8080") doc_index = WeaviateDocumentIndex[MyDoc](db_config=dbconfig) ``` @@ -378,10 +380,10 @@ the [`find()`][docarray.index.abstract.BaseDocIndex.find] method: embedding=np.array([1, 2]), file=np.random.rand(100), ) - + # find similar documents matches, scores = doc_index.find(query, limit=5) - + print(f"{matches=}") print(f"{matches.text=}") print(f"{scores=}") @@ -428,10 +430,10 @@ You can also search for multiple documents at once, in a batch, using the [`find ) for i in range(3) ) - + # find similar documents matches, scores = doc_index.find_batched(queries, limit=5) - + print(f"{matches=}") print(f"{matches[0].text=}") print(f"{scores=}") @@ -481,7 +483,9 @@ class Book(BaseDoc): embedding: NdArray[10] = Field(is_embedding=True) -books = DocList[Book]([Book(price=i * 10, embedding=np.random.rand(10)) for i in range(10)]) +books = DocList[Book]( + [Book(price=i * 10, embedding=np.random.rand(10)) for i in range(10)] +) book_index = WeaviateDocumentIndex[Book](index_name='tmp_index') book_index.index(books) @@ -602,7 +606,7 @@ del doc_index[ids[1:]] # del by list of ids **WCS instances come pre-configured**, and as such additional settings are not configurable outside of those chosen at creation, such as whether to enable authentication. -For other cases, such as **Docker-Compose deployment**, its settings can be modified through the configuration file, such as the `docker-compose.yaml` file. +For other cases, such as **Docker Compose deployment**, its settings can be modified through the configuration file, such as the `docker-compose.yaml` file. Some of the more commonly used settings include: diff --git a/tests/index/elastic/fixture.py b/tests/index/elastic/fixture.py index d81a91c893..fddce16d69 100644 --- a/tests/index/elastic/fixture.py +++ b/tests/index/elastic/fixture.py @@ -28,32 +28,32 @@ pytestmark = [pytest.mark.slow, pytest.mark.index] cur_dir = os.path.dirname(os.path.abspath(__file__)) -compose_yml_v7 = os.path.abspath(os.path.join(cur_dir, 'v7/docker-compose.yml')) -compose_yml_v8 = os.path.abspath(os.path.join(cur_dir, 'v8/docker-compose.yml')) +compose_yml_v7 = os.path.abspath(os.path.join(cur_dir, "v7/docker-compose.yml")) +compose_yml_v8 = os.path.abspath(os.path.join(cur_dir, "v8/docker-compose.yml")) -@pytest.fixture(scope='module', autouse=True) +@pytest.fixture(scope="module", autouse=True) def start_storage_v7(): - os.system(f"docker-compose -f {compose_yml_v7} up -d --remove-orphans") + os.system(f"docker compose -f {compose_yml_v7} up -d --remove-orphans") _wait_for_es() yield - os.system(f"docker-compose -f {compose_yml_v7} down --remove-orphans") + os.system(f"docker compose -f {compose_yml_v7} down --remove-orphans") -@pytest.fixture(scope='module', autouse=True) +@pytest.fixture(scope="module", autouse=True) def start_storage_v8(): - os.system(f"docker-compose -f {compose_yml_v8} up -d --remove-orphans") + os.system(f"docker compose -f {compose_yml_v8} up -d --remove-orphans") _wait_for_es() yield - os.system(f"docker-compose -f {compose_yml_v8} down --remove-orphans") + os.system(f"docker compose -f {compose_yml_v8} down --remove-orphans") def _wait_for_es(): from elasticsearch import Elasticsearch - es = Elasticsearch(hosts='http://localhost:9200/') + es = Elasticsearch(hosts="http://localhost:9200/") while not es.ping(): time.sleep(0.5) @@ -79,12 +79,12 @@ class MyImageDoc(ImageDoc): embedding: NdArray = Field(dims=128) -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def ten_simple_docs(): return [SimpleDoc(tens=np.random.randn(10)) for _ in range(10)] -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def ten_flat_docs(): return [ FlatDoc(tens_one=np.random.randn(10), tens_two=np.random.randn(50)) @@ -92,12 +92,12 @@ def ten_flat_docs(): ] -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def ten_nested_docs(): return [NestedDoc(d=SimpleDoc(tens=np.random.randn(10))) for _ in range(10)] -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def ten_deep_nested_docs(): return [ DeepNestedDoc(d=NestedDoc(d=SimpleDoc(tens=np.random.randn(10)))) @@ -105,6 +105,6 @@ def ten_deep_nested_docs(): ] -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def tmp_index_name(): return uuid.uuid4().hex diff --git a/tests/index/qdrant/fixtures.py b/tests/index/qdrant/fixtures.py index cf599fe0cd..ccb725a774 100644 --- a/tests/index/qdrant/fixtures.py +++ b/tests/index/qdrant/fixtures.py @@ -23,19 +23,19 @@ from docarray.index import QdrantDocumentIndex cur_dir = os.path.dirname(os.path.abspath(__file__)) -qdrant_yml = os.path.abspath(os.path.join(cur_dir, 'docker-compose.yml')) +qdrant_yml = os.path.abspath(os.path.join(cur_dir, "docker-compose.yml")) -@pytest.fixture(scope='session', autouse=True) +@pytest.fixture(scope="session", autouse=True) def start_storage(): - os.system(f"docker-compose -f {qdrant_yml} up -d --remove-orphans") + os.system(f"docker compose -f {qdrant_yml} up -d --remove-orphans") time.sleep(1) yield - os.system(f"docker-compose -f {qdrant_yml} down --remove-orphans") + os.system(f"docker compose -f {qdrant_yml} down --remove-orphans") -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def tmp_collection_name(): return uuid.uuid4().hex @@ -43,7 +43,7 @@ def tmp_collection_name(): @pytest.fixture def qdrant() -> qdrant_client.QdrantClient: """This fixture takes care of removing the collection before each test case""" - client = qdrant_client.QdrantClient(path='/tmp/qdrant-local') + client = qdrant_client.QdrantClient(path="/tmp/qdrant-local") for collection in client.get_collections().collections: client.delete_collection(collection.name) return client diff --git a/tests/index/weaviate/fixture_weaviate.py b/tests/index/weaviate/fixture_weaviate.py index 3699673746..4358f46b5d 100644 --- a/tests/index/weaviate/fixture_weaviate.py +++ b/tests/index/weaviate/fixture_weaviate.py @@ -24,16 +24,16 @@ cur_dir = os.path.dirname(os.path.abspath(__file__)) -weaviate_yml = os.path.abspath(os.path.join(cur_dir, 'docker-compose.yml')) +weaviate_yml = os.path.abspath(os.path.join(cur_dir, "docker-compose.yml")) -@pytest.fixture(scope='session', autouse=True) +@pytest.fixture(scope="session", autouse=True) def start_storage(): - os.system(f"docker-compose -f {weaviate_yml} up -d --remove-orphans") + os.system(f"docker compose -f {weaviate_yml} up -d --remove-orphans") _wait_for_weaviate() yield - os.system(f"docker-compose -f {weaviate_yml} down --remove-orphans") + os.system(f"docker compose -f {weaviate_yml} down --remove-orphans") def _wait_for_weaviate(): diff --git a/tests/integrations/store/test_s3.py b/tests/integrations/store/test_s3.py index b3b5203c5a..62e0126ea3 100644 --- a/tests/integrations/store/test_s3.py +++ b/tests/integrations/store/test_s3.py @@ -12,7 +12,7 @@ DA_LEN: int = 2**10 TOLERANCE_RATIO = 0.5 # Percentage of difference allowed in stream vs non-stream test -BUCKET: str = 'da-pushpull' +BUCKET: str = "da-pushpull" RANDOM: str = uuid.uuid4().hex[:8] pytestmark = [pytest.mark.s3] @@ -22,16 +22,16 @@ def minio_container(): file_dir = os.path.dirname(__file__) os.system( - f"docker-compose -f {os.path.join(file_dir, 'docker-compose.yml')} up -d --remove-orphans minio" + f"docker compose -f {os.path.join(file_dir, 'docker-compose.yml')} up -d --remove-orphans minio" ) time.sleep(1) yield os.system( - f"docker-compose -f {os.path.join(file_dir, 'docker-compose.yml')} down --remove-orphans" + f"docker compose -f {os.path.join(file_dir, 'docker-compose.yml')} down --remove-orphans" ) -@pytest.fixture(scope='session', autouse=True) +@pytest.fixture(scope="session", autouse=True) def testing_bucket(minio_container): import boto3 from botocore.client import Config @@ -59,7 +59,7 @@ def testing_bucket(minio_container): Config(signature_version="s3v4"), ) # make a bucket - s3 = boto3.resource('s3') + s3 = boto3.resource("s3") s3.create_bucket(Bucket=BUCKET) yield @@ -67,15 +67,15 @@ def testing_bucket(minio_container): s3.Bucket(BUCKET).delete() -@pytest.mark.skip(reason='Skip it!') +@pytest.mark.skip(reason="Skip it!") @pytest.mark.slow def test_pushpull_correct(capsys): - namespace_dir = f'{BUCKET}/test{RANDOM}/pushpull-correct' + namespace_dir = f"{BUCKET}/test{RANDOM}/pushpull-correct" da1 = get_test_da(DA_LEN) # Verbose - da1.push(f's3://{namespace_dir}/meow', show_progress=True) - da2 = DocList[TextDoc].pull(f's3://{namespace_dir}/meow', show_progress=True) + da1.push(f"s3://{namespace_dir}/meow", show_progress=True) + da2 = DocList[TextDoc].pull(f"s3://{namespace_dir}/meow", show_progress=True) assert len(da1) == len(da2) assert all(d1.id == d2.id for d1, d2 in zip(da1, da2)) assert all(d1.text == d2.text for d1, d2 in zip(da1, da2)) @@ -85,8 +85,8 @@ def test_pushpull_correct(capsys): assert len(captured.err) == 0 # Quiet - da2.push(f's3://{namespace_dir}/meow') - da1 = DocList[TextDoc].pull(f's3://{namespace_dir}/meow') + da2.push(f"s3://{namespace_dir}/meow") + da1 = DocList[TextDoc].pull(f"s3://{namespace_dir}/meow") assert len(da1) == len(da2) assert all(d1.id == d2.id for d1, d2 in zip(da1, da2)) assert all(d1.text == d2.text for d1, d2 in zip(da1, da2)) @@ -96,18 +96,18 @@ def test_pushpull_correct(capsys): assert len(captured.err) == 0 -@pytest.mark.skip(reason='Skip it!') +@pytest.mark.skip(reason="Skip it!") @pytest.mark.slow def test_pushpull_stream_correct(capsys): - namespace_dir = f'{BUCKET}/test{RANDOM}/pushpull-stream-correct' + namespace_dir = f"{BUCKET}/test{RANDOM}/pushpull-stream-correct" da1 = get_test_da(DA_LEN) # Verbosity and correctness DocList[TextDoc].push_stream( - iter(da1), f's3://{namespace_dir}/meow', show_progress=True + iter(da1), f"s3://{namespace_dir}/meow", show_progress=True ) doc_stream2 = DocList[TextDoc].pull_stream( - f's3://{namespace_dir}/meow', show_progress=True + f"s3://{namespace_dir}/meow", show_progress=True ) assert all(d1.id == d2.id for d1, d2 in zip(da1, doc_stream2)) @@ -120,10 +120,10 @@ def test_pushpull_stream_correct(capsys): # Quiet and chained doc_stream = DocList[TextDoc].pull_stream( - f's3://{namespace_dir}/meow', show_progress=False + f"s3://{namespace_dir}/meow", show_progress=False ) DocList[TextDoc].push_stream( - doc_stream, f's3://{namespace_dir}/meow2', show_progress=False + doc_stream, f"s3://{namespace_dir}/meow2", show_progress=False ) captured = capsys.readouterr() @@ -132,18 +132,18 @@ def test_pushpull_stream_correct(capsys): # for some reason this test is failing with pydantic v2 -@pytest.mark.skip(reason='Skip it!') +@pytest.mark.skip(reason="Skip it!") @pytest.mark.slow def test_pull_stream_vs_pull_full(): - namespace_dir = f'{BUCKET}/test{RANDOM}/pull-stream-vs-pull-full' + namespace_dir = f"{BUCKET}/test{RANDOM}/pull-stream-vs-pull-full" DocList[TextDoc].push_stream( gen_text_docs(DA_LEN * 1), - f's3://{namespace_dir}/meow-short', + f"s3://{namespace_dir}/meow-short", show_progress=False, ) DocList[TextDoc].push_stream( gen_text_docs(DA_LEN * 4), - f's3://{namespace_dir}/meow-long', + f"s3://{namespace_dir}/meow-long", show_progress=False, ) @@ -158,106 +158,106 @@ def get_total_full(url: str): return sum(len(d.text) for d in DocList[TextDoc].pull(url, show_progress=False)) # A warmup is needed to get accurate memory usage comparison - _ = get_total_stream(f's3://{namespace_dir}/meow-short') + _ = get_total_stream(f"s3://{namespace_dir}/meow-short") short_total_stream, (_, short_stream_peak) = get_total_stream( - f's3://{namespace_dir}/meow-short' + f"s3://{namespace_dir}/meow-short" ) long_total_stream, (_, long_stream_peak) = get_total_stream( - f's3://{namespace_dir}/meow-long' + f"s3://{namespace_dir}/meow-long" ) - _ = get_total_full(f's3://{namespace_dir}/meow-short') + _ = get_total_full(f"s3://{namespace_dir}/meow-short") short_total_full, (_, short_full_peak) = get_total_full( - f's3://{namespace_dir}/meow-short' + f"s3://{namespace_dir}/meow-short" ) long_total_full, (_, long_full_peak) = get_total_full( - f's3://{namespace_dir}/meow-long' + f"s3://{namespace_dir}/meow-long" ) assert ( short_total_stream == short_total_full - ), 'Streamed and non-streamed pull should have similar statistics' + ), "Streamed and non-streamed pull should have similar statistics" assert ( long_total_stream == long_total_full - ), 'Streamed and non-streamed pull should have similar statistics' + ), "Streamed and non-streamed pull should have similar statistics" assert ( abs(long_stream_peak - short_stream_peak) / short_stream_peak < TOLERANCE_RATIO - ), 'Streamed memory usage should not be dependent on the size of the data' + ), "Streamed memory usage should not be dependent on the size of the data" assert ( abs(long_full_peak - short_full_peak) / short_full_peak > TOLERANCE_RATIO - ), 'Full pull memory usage should be dependent on the size of the data' + ), "Full pull memory usage should be dependent on the size of the data" -@pytest.mark.skip(reason='Skip it!') +@pytest.mark.skip(reason="Skip it!") @pytest.mark.slow def test_list_and_delete(): - namespace_dir = f'{BUCKET}/test{RANDOM}/list-and-delete' + namespace_dir = f"{BUCKET}/test{RANDOM}/list-and-delete" da_names = S3DocStore.list(namespace_dir, show_table=False) assert len(da_names) == 0 DocList[TextDoc].push_stream( - gen_text_docs(DA_LEN), f's3://{namespace_dir}/meow', show_progress=False + gen_text_docs(DA_LEN), f"s3://{namespace_dir}/meow", show_progress=False ) - da_names = S3DocStore.list(f'{namespace_dir}', show_table=False) - assert set(da_names) == {'meow'} + da_names = S3DocStore.list(f"{namespace_dir}", show_table=False) + assert set(da_names) == {"meow"} DocList[TextDoc].push_stream( - gen_text_docs(DA_LEN), f's3://{namespace_dir}/woof', show_progress=False + gen_text_docs(DA_LEN), f"s3://{namespace_dir}/woof", show_progress=False ) - da_names = S3DocStore.list(f'{namespace_dir}', show_table=False) - assert set(da_names) == {'meow', 'woof'} + da_names = S3DocStore.list(f"{namespace_dir}", show_table=False) + assert set(da_names) == {"meow", "woof"} assert S3DocStore.delete( - f'{namespace_dir}/meow' - ), 'Deleting an existing DA should return True' + f"{namespace_dir}/meow" + ), "Deleting an existing DA should return True" da_names = S3DocStore.list(namespace_dir, show_table=False) - assert set(da_names) == {'woof'} + assert set(da_names) == {"woof"} with pytest.raises( ValueError ): # Deleting a non-existent DA without safety should raise an error - S3DocStore.delete(f'{namespace_dir}/meow', missing_ok=False) + S3DocStore.delete(f"{namespace_dir}/meow", missing_ok=False) assert not S3DocStore.delete( - f'{namespace_dir}/meow', missing_ok=True - ), 'Deleting a non-existent DA should return False' + f"{namespace_dir}/meow", missing_ok=True + ), "Deleting a non-existent DA should return False" -@pytest.mark.skip(reason='Skip it!') +@pytest.mark.skip(reason="Skip it!") @pytest.mark.slow def test_concurrent_push_pull(): # Push to DA that is being pulled should not mess up the pull - namespace_dir = f'{BUCKET}/test{RANDOM}/concurrent-push-pull' + namespace_dir = f"{BUCKET}/test{RANDOM}/concurrent-push-pull" DocList[TextDoc].push_stream( gen_text_docs(DA_LEN), - f's3://{namespace_dir}/da0', + f"s3://{namespace_dir}/da0", show_progress=False, ) global _task def _task(choice: str): - if choice == 'push': + if choice == "push": DocList[TextDoc].push_stream( gen_text_docs(DA_LEN), - f's3://{namespace_dir}/da0', + f"s3://{namespace_dir}/da0", show_progress=False, ) - elif choice == 'pull': + elif choice == "pull": pull_len = sum( - 1 for _ in DocList[TextDoc].pull_stream(f's3://{namespace_dir}/da0') + 1 for _ in DocList[TextDoc].pull_stream(f"s3://{namespace_dir}/da0") ) assert pull_len == DA_LEN else: - raise ValueError(f'Unknown choice {choice}') + raise ValueError(f"Unknown choice {choice}") - with mp.get_context('fork').Pool(3) as p: - p.map(_task, ['pull', 'push', 'pull']) + with mp.get_context("fork").Pool(3) as p: + p.map(_task, ["pull", "push", "pull"]) -@pytest.mark.skip(reason='Not Applicable') +@pytest.mark.skip(reason="Not Applicable") def test_concurrent_push(): """ Amazon S3 does not support object locking for concurrent writers. pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy