From 16276939d623e4558b2de2b1419c4801b662c569 Mon Sep 17 00:00:00 2001 From: Johannes Messner Date: Wed, 18 Oct 2023 13:44:19 +0200 Subject: [PATCH 1/6] ci: use python 3.9 and newer numpy version Signed-off-by: Johannes Messner --- .github/workflows/ci.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3c43bfa7b4..f584b778a6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -94,7 +94,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8] + python-version: [3.9] pydantic-version: ["pydantic-v2", "pydantic-v1"] test-path: [tests/integrations, tests/units, tests/documentation] steps: @@ -112,6 +112,11 @@ jobs: ./scripts/install_pydantic_v2.sh ${{ matrix.pydantic-version }} poetry run pip uninstall -y torch poetry run pip install torch +# Issue https://github.com/docarray/docarray/issues/1821 occurs with a numpy version +# that is only supported in pyton 3.9 and up. +# Poetry cannot lock to that version because we support python 3.8. +# Therefore, we install this version manually to test for the error in the CI. + poetry run pip install numpy==1.26.1 sudo apt-get update sudo apt-get install --no-install-recommends ffmpeg From c8fd6483f3026548a12f4f8df580f1a59baea889 Mon Sep 17 00:00:00 2001 From: Johannes Messner Date: Wed, 18 Oct 2023 13:52:28 +0200 Subject: [PATCH 2/6] ci: try to fix syntax error (?) Signed-off-by: Johannes Messner --- .github/workflows/ci.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f584b778a6..cc5b769d59 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -112,10 +112,6 @@ jobs: ./scripts/install_pydantic_v2.sh ${{ matrix.pydantic-version }} poetry run pip uninstall -y torch poetry run pip install torch -# Issue https://github.com/docarray/docarray/issues/1821 occurs with a numpy version -# that is only supported in pyton 3.9 and up. -# Poetry cannot lock to that version because we support python 3.8. -# Therefore, we install this version manually to test for the error in the CI. poetry run pip install numpy==1.26.1 sudo apt-get update sudo apt-get install --no-install-recommends ffmpeg From 0bab36ed1cdf92145eba5a5f656340c97bd10640 Mon Sep 17 00:00:00 2001 From: Johannes Messner Date: Wed, 18 Oct 2023 14:04:39 +0200 Subject: [PATCH 3/6] fix: from_dataframe with current numpy Signed-off-by: Johannes Messner --- docarray/helper.py | 47 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/docarray/helper.py b/docarray/helper.py index d242b05ea9..34b0c2bfd4 100644 --- a/docarray/helper.py +++ b/docarray/helper.py @@ -15,7 +15,23 @@ Union, ) +import numpy as np + from docarray.utils._internal._typing import safe_issubclass +from docarray.utils._internal.misc import ( + is_jax_available, + is_tf_available, + is_torch_available, +) + +if is_torch_available(): + import torch + +if is_jax_available(): + import jax + +if is_tf_available(): + import tensorflow as tf if TYPE_CHECKING: from docarray import BaseDoc @@ -54,6 +70,35 @@ def _access_path_to_dict(access_path: str, value) -> Dict[str, Any]: return result +def _is_none_like(val: Any) -> bool: + """ + :param val: any value + :return: true iff `val` equals to `None`, `'None'` or `''` + """ + # Convoluted implementation, but fixes https://github.com/docarray/docarray/issues/1821 + + # tensor-like types can have unexpected (= broadcast) `==`/`in` semantics, + # so treat separately + is_np_arr = isinstance(val, np.ndarray) + if is_np_arr: + return False + + is_torch_tens = is_torch_available() and isinstance(val, torch.Tensor) + if is_torch_tens: + return False + + is_tf_tens = is_tf_available() and isinstance(val, tf.Tensor) + if is_tf_tens: + return False + + is_jax_arr = is_jax_available() and isinstance(val, jax.numpy.ndarray) + if is_jax_arr: + return False + + # "normal" case + return val in ['', 'None', None] + + def _access_path_dict_to_nested_dict(access_path2val: Dict[str, Any]) -> Dict[Any, Any]: """ Convert a dict, where the keys are access paths ("__"-separated) to a nested dictionary. @@ -76,7 +121,7 @@ def _access_path_dict_to_nested_dict(access_path2val: Dict[str, Any]) -> Dict[An for access_path, value in access_path2val.items(): field2val = _access_path_to_dict( access_path=access_path, - value=value if value not in ['', 'None'] else None, + value=None if _is_none_like(value) else value, ) _update_nested_dicts(to_update=nested_dict, update_with=field2val) return nested_dict From 3b778f0f07f7b3450f755b5d0821af95074ceccc Mon Sep 17 00:00:00 2001 From: Johannes Messner Date: Wed, 18 Oct 2023 14:30:45 +0200 Subject: [PATCH 4/6] test: change test that is broken by numpy upgrade Signed-off-by: Johannes Messner --- tests/units/typing/tensor/test_ndarray.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/units/typing/tensor/test_ndarray.py b/tests/units/typing/tensor/test_ndarray.py index 49d5d34d1b..93ed58b382 100644 --- a/tests/units/typing/tensor/test_ndarray.py +++ b/tests/units/typing/tensor/test_ndarray.py @@ -200,9 +200,9 @@ def test_parametrized_instance(): def test_parametrized_equality(): t1 = parse_obj_as(NdArray[128], np.zeros(128)) t2 = parse_obj_as(NdArray[128], np.zeros(128)) - t3 = parse_obj_as(NdArray[256], np.zeros(256)) + t3 = parse_obj_as(NdArray[128], np.ones(128)) assert (t1 == t2).all() - assert not t1 == t3 + assert not (t1 == t3).any() def test_parametrized_operations(): From 5e9b8092a8f8a2f7d80a6ade398675d1f6a67583 Mon Sep 17 00:00:00 2001 From: Johannes Messner Date: Wed, 18 Oct 2023 15:07:04 +0200 Subject: [PATCH 5/6] fix: get args of paremtrized types Signed-off-by: Johannes Messner --- docarray/base_doc/doc.py | 10 +++++----- docarray/base_doc/mixins/io.py | 2 +- docarray/display/document_summary.py | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docarray/base_doc/doc.py b/docarray/base_doc/doc.py index 4154f3248a..4d45f1369a 100644 --- a/docarray/base_doc/doc.py +++ b/docarray/base_doc/doc.py @@ -22,7 +22,7 @@ import typing_extensions from pydantic import BaseModel, Field from pydantic.fields import FieldInfo -from typing_inspect import is_optional_type +from typing_inspect import get_args, is_optional_type from docarray.utils._internal.pydantic import is_pydantic_v2 @@ -185,7 +185,7 @@ def _get_field_annotation(cls, field: str) -> Type: if is_optional_type( annotation ): # this is equivalent to `outer_type_` in pydantic v1 - return annotation.__args__[0] + return get_args(annotation)[0] else: return annotation else: @@ -205,12 +205,12 @@ def _get_field_inner_type(cls, field: str) -> Type: if is_optional_type( annotation ): # this is equivalent to `outer_type_` in pydantic v1 - return annotation.__args__[0] + return get_args(annotation)[0] elif annotation == Tuple: - if len(annotation.__args__) == 0: + if len(get_args(annotation)) == 0: return Any else: - annotation.__args__[0] + get_args(annotation)[0] else: return annotation else: diff --git a/docarray/base_doc/mixins/io.py b/docarray/base_doc/mixins/io.py index 958897555c..cc4a3470d7 100644 --- a/docarray/base_doc/mixins/io.py +++ b/docarray/base_doc/mixins/io.py @@ -336,7 +336,7 @@ def _get_content_from_node_proto( field_type = None if isinstance(field_type, GenericAlias): - field_type = field_type.__args__[0] + field_type = get_args(field_type)[0] return_field = arg_to_container[content_key]( cls._get_content_from_node_proto(node, field_type=field_type) diff --git a/docarray/display/document_summary.py b/docarray/display/document_summary.py index 7a3730016e..265236a8d3 100644 --- a/docarray/display/document_summary.py +++ b/docarray/display/document_summary.py @@ -1,4 +1,4 @@ -from typing import Any, List, Optional, Type, Union +from typing import Any, List, Optional, Type, Union, get_args from rich.highlighter import RegexHighlighter from rich.theme import Theme @@ -83,7 +83,7 @@ def _get_schema( if is_union_type(field_type) or is_optional_type(field_type): sub_tree = Tree(node_name, highlight=True) - for arg in field_type.__args__: + for arg in get_args(field_type): if safe_issubclass(arg, BaseDoc): sub_tree.add( DocumentSummary._get_schema( From b70a6695705e53ef551957da2c052118c8d95b76 Mon Sep 17 00:00:00 2001 From: Johannes Messner Date: Fri, 20 Oct 2023 09:10:52 +0200 Subject: [PATCH 6/6] test: add test with 0 length numpy array Signed-off-by: Johannes Messner --- tests/units/array/test_array_from_to_pandas.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/units/array/test_array_from_to_pandas.py b/tests/units/array/test_array_from_to_pandas.py index d89902c2f8..440398562f 100644 --- a/tests/units/array/test_array_from_to_pandas.py +++ b/tests/units/array/test_array_from_to_pandas.py @@ -136,7 +136,8 @@ class BasisUnion(BaseDoc): @pytest.mark.parametrize('tensor_type', [NdArray, TorchTensor]) -def test_from_to_pandas_tensor_type(tensor_type): +@pytest.mark.parametrize('tensor_len', [0, 5]) +def test_from_to_pandas_tensor_type(tensor_type, tensor_len): class MyDoc(BaseDoc): embedding: tensor_type text: str @@ -145,9 +146,13 @@ class MyDoc(BaseDoc): da = DocVec[MyDoc]( [ MyDoc( - embedding=[1, 2, 3, 4, 5], text='hello', image=ImageDoc(url='aux.png') + embedding=list(range(tensor_len)), + text='hello', + image=ImageDoc(url='aux.png'), + ), + MyDoc( + embedding=list(range(tensor_len)), text='hello world', image=ImageDoc() ), - MyDoc(embedding=[5, 4, 3, 2, 1], text='hello world', image=ImageDoc()), ], tensor_type=tensor_type, ) pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy