docarray - PyPI Package Compare versions

+796

docarray/index/backends/mongodb_atlas.py

		import collections
		import logging
		from dataclasses import dataclass, field
		from functools import cached_property
		from typing import (
		Any,
		Dict,
		Generator,
		Generic,
		List,
		NamedTuple,
		Optional,
		Sequence,
		Tuple,
		Type,
		TypeVar,
		Union,
		)

		import bson
		import numpy as np
		from pymongo import MongoClient

		from docarray import BaseDoc, DocList, handler
		from docarray.index.abstract import BaseDocIndex, _raise_not_composable
		from docarray.index.backends.helper import _collect_query_required_args
		from docarray.typing import AnyTensor
		from docarray.typing.tensor.abstract_tensor import AbstractTensor
		from docarray.utils._internal._typing import safe_issubclass
		from docarray.utils.find import _FindResult, _FindResultBatched

		logger = logging.getLogger(__name__)
		logger.addHandler(handler)


		MAX_CANDIDATES = 10_000
		OVERSAMPLING_FACTOR = 10
		TSchema = TypeVar('TSchema', bound=BaseDoc)


		class HybridResult(NamedTuple):
		"""Adds breakdown of scores into vector and text components."""

		documents: Union[DocList, List[Dict[str, Any]]]
		scores: AnyTensor
		score_breakdown: Dict[str, List[Any]]


		class MongoDBAtlasDocumentIndex(BaseDocIndex, Generic[TSchema]):
		"""DocumentIndex backed by MongoDB Atlas Vector Store.

		MongoDB Atlas provides full Text, Vector, and Hybrid Search
		and can store structured data, text and vector indexes
		in the same Collection (Index).

		Atlas provides efficient index and search on vector embeddings
		using the Hierarchical Navigable Small Worlds (HNSW) algorithm.

		For documentation, see the following.
		* Text Search: https://www.mongodb.com/docs/atlas/atlas-search/atlas-search-overview/
		* Vector Search: https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-overview/
		* Hybrid Search: https://www.mongodb.com/docs/atlas/atlas-vector-search/tutorials/reciprocal-rank-fusion/
		"""

		def __init__(self, db_config=None, **kwargs):
		super().__init__(db_config=db_config, **kwargs)
		logger.info(f'{self.__class__.__name__} has been initialized')

		@property
		def index_name(self):
		"""The name of the index/collection in the database.

		Note that in MongoDB Atlas, one has Collections (analogous to Tables),
		which can have Search Indexes. They are distinct.
		DocArray tends to consider them together.

		The index_name can be set when initializing MongoDBAtlasDocumentIndex.
		The easiest way is to pass index_name=<collection_name> as a kwarg.
		Otherwise, a rational default uses the name of the DocumentTypes that it contains.
		"""

		if self._db_config.index_name is not None:
		return self._db_config.index_name
		else:
		# Create a reasonable default
		if not self._schema:
		raise ValueError(
		'A MongoDBAtlasDocumentIndex must be typed with a Document type.'
		'To do so, use the syntax: MongoDBAtlasDocumentIndex[DocumentType]'
		)
		schema_name = self._schema.__name__.lower()
		logger.debug(f"db_config.index_name was not set. Using {schema_name}")
		return schema_name

		@property
		def _database_name(self):
		return self._db_config.database_name

		@cached_property
		def _client(self):
		return self._connect_to_mongodb_atlas(
		atlas_connection_uri=self._db_config.mongo_connection_uri
		)

		@property
		def _collection(self):
		"""MongoDB Collection"""
		return self._client[self._database_name][self.index_name]

		@staticmethod
		def _connect_to_mongodb_atlas(atlas_connection_uri: str):
		"""
		Establish a connection to MongoDB Atlas.
		"""

		client = MongoClient(
		atlas_connection_uri,
		# driver=DriverInfo(name="docarray", version=version("docarray"))
		)
		return client

		def _create_indexes(self):
		"""Create a new index in the MongoDB database if it doesn't already exist."""

		def _check_index_exists(self, index_name: str) -> bool:
		"""
		Check if an index exists in the MongoDB Atlas database.

		:param index_name: The name of the index.
		:return: True if the index exists, False otherwise.
		"""

		@dataclass
		class Query:
		"""Dataclass describing a query."""

		vector_fields: Optional[Dict[str, np.ndarray]]
		filters: Optional[List[Any]]
		text_searches: Optional[List[Any]]
		limit: int

		class QueryBuilder(BaseDocIndex.QueryBuilder):
		"""Compose complex queries containing vector search (find), text_search, and filters.

		Arguments to `find` are vectors of embeddings, text_search expects strings,
		and filters expect dicts of MongoDB Query Language (MDB).


		NOTE: When doing Hybrid Search, pay close attention to the interpretation and use of inputs,
		particularly when multiple calls are made of the same method (find, text_search, filter).
		* find (Vector Search): Embedding vectors will be averaged. The penalty/weight defined in DBConfig will not change.
		* text_search: Individual searches are performed, each with the same penalty/weight.
		* filter: Within Vector Search, performs efficient k-NN filtering with the Lucene engine
		"""

		def __init__(self, query: Optional[List[Tuple[str, Dict]]] = None):
		super().__init__()
		# list of tuples (method name, kwargs)
		self._queries: List[Tuple[str, Dict]] = query or []

		def build(self, limit: int = 1, args, *kwargs) -> Any:
		"""Build a `Query` that can be passed to `execute_query`."""
		search_fields: Dict[str, np.ndarray] = collections.defaultdict(list)
		filters: List[Any] = []
		text_searches: List[Any] = []
		for method, kwargs in self._queries:
		if method == 'find':
		search_field = kwargs['search_field']
		search_fields[search_field].append(kwargs["query"])

		elif method == 'filter':
		filters.append(kwargs)
		else:
		text_searches.append(kwargs)

		vector_fields = {
		field: np.average(vectors, axis=0)
		for field, vectors in search_fields.items()
		}
		return MongoDBAtlasDocumentIndex.Query(
		vector_fields=vector_fields,
		filters=filters,
		text_searches=text_searches,
		limit=limit,
		)

		find = _collect_query_required_args('find', {'search_field', 'query'})
		filter = _collect_query_required_args('filter', {'query'})
		text_search = _collect_query_required_args(
		'text_search', {'search_field', 'query'}
		)

		find_batched = _raise_not_composable('find_batched')
		filter_batched = _raise_not_composable('filter_batched')
		text_search_batched = _raise_not_composable('text_search_batched')

		def execute_query(
		self, query: Any, args, score_breakdown=True, *kwargs
		) -> Any: # _FindResult:
		"""Execute a Query on the database.

		:param query: the query to execute. The output of this Document index's `QueryBuilder.build()` method.
		:param args: positional arguments to pass to the query
		:param score_breakdown: Will provide breakdown of scores into text and vector components for Hybrid Searches.
		:param kwargs: keyword arguments to pass to the query
		:return: the result of the query
		"""
		if not isinstance(query, MongoDBAtlasDocumentIndex.Query):
		raise ValueError(
		"Expected MongoDBAtlasDocumentIndex.Query. Found {type(query)=}."
		"For native calls to MongoDBAtlasDocumentIndex, simply call filter()"
		)

		if len(query.vector_fields) > 1:
		self._logger.warning(
		f"{len(query.vector_fields)} embedding vectors have been provided to the query. They will be averaged."
		)
		if len(query.text_searches) > 1:
		self._logger.warning(
		f"{len(query.text_searches)} text searches will be performed, and each receive a ranked score."
		)

		# collect filters
		filters: List[Dict[str, Any]] = []
		for filter_ in query.filters:
		filters.append(filter_['query'])

		# check if hybrid search is needed.
		hybrid = len(query.vector_fields) + len(query.text_searches) > 1
		if hybrid:
		if len(query.vector_fields) > 1:
		raise NotImplementedError(
		"Hybrid Search on multiple Vector Indexes has yet to be done."
		)
		pipeline = self._hybrid_search(
		query.vector_fields, query.text_searches, filters, query.limit
		)
		else:
		if query.text_searches:
		# it is a simple text search, perhaps with filters.
		text_stage = self._text_search_stage(**query.text_searches[0])
		pipeline = [
		text_stage,
		{"$match": {"$and": filters} if filters else {}},
		{
		'$project': self._project_fields(
		extra_fields={"score": {'$meta': 'searchScore'}}
		)
		},
		{"$limit": query.limit},
		]
		elif query.vector_fields:
		# it is a simple vector search, perhaps with filters.
		assert (
		len(query.vector_fields) == 1
		), "Query contains more than one vector_field."
		field, vector_query = list(query.vector_fields.items())[0]
		pipeline = [
		self._vector_search_stage(
		query=vector_query,
		search_field=field,
		limit=query.limit,
		filters=filters,
		),
		{
		'$project': self._project_fields(
		extra_fields={"score": {'$meta': 'vectorSearchScore'}}
		)
		},
		]
		# it is only a filter search.
		else:
		pipeline = [{"$match": {"$and": filters}}]

		with self._collection.aggregate(pipeline) as cursor:
		results, scores = self._mongo_to_docs(cursor)
		docs = self._dict_list_to_docarray(results)

		if hybrid and score_breakdown and results:
		score_breakdown = collections.defaultdict(list)
		score_fields = [key for key in results[0] if "score" in key]
		for res in results:
		score_breakdown["id"].append(res["id"])
		for sf in score_fields:
		score_breakdown[sf].append(res[sf])
		logger.debug(score_breakdown)
		return HybridResult(
		documents=docs, scores=scores, score_breakdown=score_breakdown
		)

		return _FindResult(documents=docs, scores=scores)

		@dataclass
		class DBConfig(BaseDocIndex.DBConfig):
		mongo_connection_uri: str = 'localhost'
		index_name: Optional[str] = None
		database_name: Optional[str] = "default"
		default_column_config: Dict[Type, Dict[str, Any]] = field(
		default_factory=lambda: collections.defaultdict(
		dict,
		{
		bson.BSONARR: {
		'distance': 'COSINE',
		'oversample_factor': OVERSAMPLING_FACTOR,
		'max_candidates': MAX_CANDIDATES,
		'indexed': False,
		'index_name': None,
		'penalty': 5,
		},
		bson.BSONSTR: {
		'indexed': False,
		'index_name': None,
		'operator': 'phrase',
		'penalty': 1,
		},
		},
		)
		)

		@dataclass
		class RuntimeConfig(BaseDocIndex.RuntimeConfig):
		...

		def python_type_to_db_type(self, python_type: Type) -> Any:
		"""Map python type to database type.
		Takes any python type and returns the corresponding database column type.

		:param python_type: a python type.
		:return: the corresponding database column type,
		or None if ``python_type`` is not supported.
		"""

		type_map = {
		int: bson.BSONNUM,
		float: bson.BSONDEC,
		collections.OrderedDict: bson.BSONOBJ,
		str: bson.BSONSTR,
		bytes: bson.BSONBIN,
		dict: bson.BSONOBJ,
		np.ndarray: bson.BSONARR,
		AbstractTensor: bson.BSONARR,
		}

		for py_type, mongo_types in type_map.items():
		if safe_issubclass(python_type, py_type):
		return mongo_types
		raise ValueError(f'Unsupported column type for {type(self)}: {python_type}')

		def _doc_to_mongo(self, doc):
		result = doc.copy()

		for name in result:
		if self._column_infos[name].db_type == bson.BSONARR:
		result[name] = list(result[name])

		result["_id"] = result.pop("id")
		return result

		def _docs_to_mongo(self, docs):
		return [self._doc_to_mongo(doc) for doc in docs]

		@staticmethod
		def _mongo_to_doc(mongo_doc: dict) -> dict:
		result = mongo_doc.copy()
		result["id"] = result.pop("_id")
		score = result.get("score", None)
		return result, score

		@staticmethod
		def _mongo_to_docs(mongo_docs: Generator[Dict, None, None]) -> List[dict]:
		docs = []
		scores = []
		for mongo_doc in mongo_docs:
		doc, score = MongoDBAtlasDocumentIndex._mongo_to_doc(mongo_doc)
		docs.append(doc)
		scores.append(score)

		return docs, scores

		def _get_oversampling_factor(self, search_field: str) -> int:
		return self._column_infos[search_field].config["oversample_factor"]

		def _get_max_candidates(self, search_field: str) -> int:
		return self._column_infos[search_field].config["max_candidates"]

		def _index(self, column_to_data: Dict[str, Generator[Any, None, None]]):
		"""Add and Index Documents to the datastore

		The input format is aimed towards column vectors, which is not
		the natural fit for MongoDB Collections, but we have chosen
		not to override BaseDocIndex.index as it provides valuable validation.
		This may change in the future.

		:param column_to_data: is a dictionary from column name to a generator
		"""
		self._index_subindex(column_to_data)
		docs: List[Dict[str, Any]] = []
		while True:
		try:
		doc = {key: next(column_to_data[key]) for key in column_to_data}
		mongo_doc = self._doc_to_mongo(doc)
		docs.append(mongo_doc)
		except StopIteration:
		break
		self._collection.insert_many(docs)

		def num_docs(self) -> int:
		"""Return the number of indexed documents"""
		return self._collection.count_documents({})

		@property
		def _is_index_empty(self) -> bool:
		"""
		Check if index is empty by comparing the number of documents to zero.
		:return: True if the index is empty, False otherwise.
		"""
		return self.num_docs() == 0

		def _del_items(self, doc_ids: Sequence[str]) -> None:
		"""Delete Documents from the index.

		:param doc_ids: ids to delete from the Document Store
		"""
		mg_filter = {"_id": {"$in": doc_ids}}
		self._collection.delete_many(mg_filter)

		def _get_items(
		self, doc_ids: Sequence[str]
		) -> Union[Sequence[TSchema], Sequence[Dict[str, Any]]]:
		"""Get Documents from the index, by `id`.
		If no document is found, a KeyError is raised.

		:param doc_ids: ids to get from the Document index
		:return: Sequence of Documents, sorted corresponding to the order of `doc_ids`. Duplicate `doc_ids` can be omitted in the output.
		"""
		mg_filter = {"_id": {"$in": doc_ids}}
		docs = self._collection.find(mg_filter)
		docs, _ = self._mongo_to_docs(docs)

		if not docs:
		raise KeyError(f'No document with id {doc_ids} found')
		return docs

		def _reciprocal_rank_stage(self, search_field: str, score_field: str):
		penalty = self._column_infos[search_field].config["penalty"]
		projection_fields = {
		key: f"$docs.{key}" for key in self._column_infos.keys() if key != "id"
		}
		projection_fields["_id"] = "$docs._id"
		projection_fields[score_field] = 1

		return [
		{"$group": {"_id": None, "docs": {"$push": "$$ROOT"}}},
		{"$unwind": {"path": "$docs", "includeArrayIndex": "rank"}},
		{
		"$addFields": {
		score_field: {"$divide": [1.0, {"$add": ["$rank", penalty, 1]}]}
		}
		},
		{'$project': projection_fields},
		]

		def _add_stage_to_pipeline(self, pipeline: List[Any], stage: Dict[str, Any]):
		if pipeline:
		pipeline.append(
		{"$unionWith": {"coll": self.index_name, "pipeline": stage}}
		)
		else:
		pipeline.extend(stage)
		return pipeline

		def _final_stage(self, scores_fields, limit):
		"""Sum individual scores, sort, and apply limit."""
		doc_fields = self._column_infos.keys()
		grouped_fields = {
		key: {"$first": f"${key}"} for key in doc_fields if key != "_id"
		}
		best_score = {score: {'$max': f'${score}'} for score in scores_fields}
		final_pipeline = [
		{"$group": {"_id": "$_id", grouped_fields, best_score}},
		{
		"$project": {
		**{doc_field: 1 for doc_field in doc_fields},
		**{score: {"$ifNull": [f"${score}", 0]} for score in scores_fields},
		}
		},
		{
		"$addFields": {
		"score": {"$add": [f"${score}" for score in scores_fields]},
		}
		},
		{"$sort": {"score": -1}},
		{"$limit": limit},
		]
		return final_pipeline

		@staticmethod
		def _score_field(search_field: str, search_field_counts: Dict[str, int]):
		score_field = f"{search_field}_score"
		count = search_field_counts[search_field]
		if count > 1:
		score_field += str(count)
		return score_field

		def _hybrid_search(
		self,
		vector_queries: Dict[str, Any],
		text_queries: List[Dict[str, Any]],
		filters: Dict[str, Any],
		limit: int,
		):
		hybrid_pipeline = [] # combined aggregate pipeline
		search_field_counts = collections.defaultdict(
		int
		) # stores count of calls on same search field
		score_fields = [] # names given to scores of each search stage
		for search_field, query in vector_queries.items():
		search_field_counts[search_field] += 1
		vector_stage = self._vector_search_stage(
		query=query,
		search_field=search_field,
		limit=limit,
		filters=filters,
		)
		score_field = self._score_field(search_field, search_field_counts)
		score_fields.append(score_field)
		vector_pipeline = [
		vector_stage,
		*self._reciprocal_rank_stage(search_field, score_field),
		]
		self._add_stage_to_pipeline(hybrid_pipeline, vector_pipeline)

		for kwargs in text_queries:
		search_field_counts[kwargs["search_field"]] += 1
		text_stage = self._text_search_stage(**kwargs)
		search_field = kwargs["search_field"]
		score_field = self._score_field(search_field, search_field_counts)
		score_fields.append(score_field)
		reciprocal_rank_stage = self._reciprocal_rank_stage(
		search_field, score_field
		)
		text_pipeline = [
		text_stage,
		{"$match": {"$and": filters} if filters else {}},
		{"$limit": limit},
		*reciprocal_rank_stage,
		]
		self._add_stage_to_pipeline(hybrid_pipeline, text_pipeline)

		hybrid_pipeline += self._final_stage(score_fields, limit)
		return hybrid_pipeline

		def _vector_search_stage(
		self,
		query: np.ndarray,
		search_field: str,
		limit: int,
		filters: List[Dict[str, Any]] = None,
		) -> Dict[str, Any]:

		search_index_name = self._get_column_db_index(search_field)
		oversampling_factor = self._get_oversampling_factor(search_field)
		max_candidates = self._get_max_candidates(search_field)
		query = query.astype(np.float64).tolist()

		stage = {
		'$vectorSearch': {
		'index': search_index_name,
		'path': search_field,
		'queryVector': query,
		'numCandidates': min(limit * oversampling_factor, max_candidates),
		'limit': limit,
		}
		}
		if filters:
		stage['$vectorSearch']['filter'] = {"$and": filters}
		return stage

		def _text_search_stage(
		self,
		query: str,
		search_field: str,
		) -> Dict[str, Any]:
		operator = self._column_infos[search_field].config["operator"]
		index = self._get_column_db_index(search_field)
		return {
		"$search": {
		"index": index,
		operator: {"query": query, "path": search_field},
		}
		}

		def _doc_exists(self, doc_id: str) -> bool:
		"""
		Checks if a given document exists in the index.

		:param doc_id: The id of a document to check.
		:return: True if the document exists in the index, False otherwise.
		"""
		doc = self._collection.find_one({"_id": doc_id})
		return bool(doc)

		def _find(
		self,
		query: np.ndarray,
		limit: int,
		search_field: str = '',
		) -> _FindResult:
		"""Find documents in the index

		:param query: query vector for KNN/ANN search. Has single axis.
		:param limit: maximum number of documents to return per query
		:param search_field: name of the field to search on
		:return: a named tuple containing `documents` and `scores`
		"""
		# NOTE: in standard implementations,
		# `search_field` is equal to the column name to search on

		vector_search_stage = self._vector_search_stage(query, search_field, limit)

		pipeline = [
		vector_search_stage,
		{
		'$project': self._project_fields(
		extra_fields={"score": {'$meta': 'vectorSearchScore'}}
		)
		},
		]

		with self._collection.aggregate(pipeline) as cursor:
		documents, scores = self._mongo_to_docs(cursor)

		return _FindResult(documents=documents, scores=scores)

		def _find_batched(
		self, queries: np.ndarray, limit: int, search_field: str = ''
		) -> _FindResultBatched:
		"""Find documents in the index

		:param queries: query vectors for KNN/ANN search.
		Has shape (batch_size, vector_dim)
		:param limit: maximum number of documents to return
		:param search_field: name of the field to search on
		:return: a named tuple containing `documents` and `scores`
		"""
		docs, scores = [], []
		for query in queries:
		results = self._find(query=query, search_field=search_field, limit=limit)
		docs.append(results.documents)
		scores.append(results.scores)

		return _FindResultBatched(documents=docs, scores=scores)

		def _get_column_db_index(self, column_name: str) -> Optional[str]:
		"""
		Retrieve the index name associated with the specified column name.

		Parameters:
		column_name (str): The name of the column.

		Returns:
		Optional[str]: The index name associated with the specified column name, or None if not found.
		"""
		index_name = self._column_infos[column_name].config.get("index_name")

		is_vector_index = safe_issubclass(
		self._column_infos[column_name].docarray_type, AbstractTensor
		)
		is_text_index = safe_issubclass(
		self._column_infos[column_name].docarray_type, str
		)

		if index_name is None or not isinstance(index_name, str):
		if is_vector_index:
		raise ValueError(
		f'The column {column_name} for MongoDBAtlasDocumentIndex should be associated '
		'with an Atlas Vector Index.'
		)
		elif is_text_index:
		raise ValueError(
		f'The column {column_name} for MongoDBAtlasDocumentIndex should be associated '
		'with an Atlas Index.'
		)
		if not (is_vector_index or is_text_index):
		raise ValueError(
		f'The column {column_name} for MongoDBAtlasDocumentIndex cannot be associated to an index'
		)

		return index_name

		def _project_fields(self, extra_fields: Dict[str, Any] = None) -> dict:
		"""
		Create a projection dictionary to include all fields defined in the column information.

		Returns:
		dict: A dictionary where each field key from the column information is mapped to the value 1,
		indicating that the field should be included in the projection.
		"""

		fields = {key: 1 for key in self._column_infos.keys() if key != "id"}
		fields["_id"] = 1
		if extra_fields:
		fields.update(extra_fields)
		return fields

		def _filter(
		self,
		filter_query: Any,
		limit: int,
		) -> Union[DocList, List[Dict]]:
		"""Find documents in the index based on a filter query

		:param filter_query: the DB specific filter query to execute
		:param limit: maximum number of documents to return
		:return: a DocList containing the documents that match the filter query
		"""
		with self._collection.find(filter_query, limit=limit) as cursor:
		return self._mongo_to_docs(cursor)[0]

		def _filter_batched(
		self,
		filter_queries: Any,
		limit: int,
		) -> Union[List[DocList], List[List[Dict]]]:
		"""Find documents in the index based on multiple filter queries.
		Each query is considered individually, and results are returned per query.

		:param filter_queries: the DB specific filter queries to execute
		:param limit: maximum number of documents to return per query
		:return: List of DocLists containing the documents that match the filter
		queries
		"""
		return [self._filter(query, limit) for query in filter_queries]

		def _text_search(
		self,
		query: str,
		limit: int,
		search_field: str = '',
		) -> _FindResult:
		"""Find documents in the index based on a text search query

		:param query: The text to search for
		:param limit: maximum number of documents to return
		:param search_field: name of the field to search on
		:return: a named tuple containing `documents` and `scores`
		"""
		text_stage = self._text_search_stage(query=query, search_field=search_field)

		pipeline = [
		text_stage,
		{
		'$project': self._project_fields(
		extra_fields={'score': {'$meta': 'searchScore'}}
		)
		},
		{"$limit": limit},
		]

		with self._collection.aggregate(pipeline) as cursor:
		documents, scores = self._mongo_to_docs(cursor)

		return _FindResult(documents=documents, scores=scores)

		def _text_search_batched(
		self,
		queries: Sequence[str],
		limit: int,
		search_field: str = '',
		) -> _FindResultBatched:
		"""Find documents in the index based on a text search query

		:param queries: The texts to search for
		:param limit: maximum number of documents to return per query
		:param search_field: name of the field to search on
		:return: a named tuple containing `documents` and `scores`
		"""
		# NOTE: in standard implementations,
		# `search_field` is equal to the column name to search on
		documents, scores = [], []
		for query in queries:
		results = self._text_search(
		query=query, search_field=search_field, limit=limit
		)
		documents.append(results.documents)
		scores.append(results.scores)
		return _FindResultBatched(documents=documents, scores=scores)

		def _filter_by_parent_id(self, id: str) -> Optional[List[str]]:
		"""Filter the ids of the subindex documents given id of root document.

		:param id: the root document id to filter by
		:return: a list of ids of the subindex documents
		"""
		with self._collection.find({"parent_id": id}, projection={"_id": 1}) as cursor:
		return [doc["_id"] for doc in cursor]

+70

-1

docarray/__init__.py

		@@ -1,2 +0,17 @@
		__version__ = '0.40.1.dev1'
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		__version__ = '0.40.1'

		@@ -8,3 +23,57 @@ import logging
		from docarray.utils._internal.misc import _get_path_from_docarray_root_level
		from docarray.utils._internal.pydantic import is_pydantic_v2


		def unpickle_doclist(doc_type, b):
		return DocList[doc_type].from_bytes(b, protocol="protobuf")


		def unpickle_docvec(doc_type, tensor_type, b):
		return DocVec[doc_type].from_bytes(b, protocol="protobuf", tensor_type=tensor_type)


		if is_pydantic_v2:
		# Register the pickle functions
		def register_serializers():
		import copyreg
		from functools import partial

		unpickle_doc_fn = partial(BaseDoc.from_bytes, protocol="protobuf")

		def pickle_doc(doc):
		b = doc.to_bytes(protocol='protobuf')
		return unpickle_doc_fn, (doc.__class__, b)

		# Register BaseDoc serialization
		copyreg.pickle(BaseDoc, pickle_doc)

		# For DocList, we need to hook into __reduce__ since it's a generic

		def pickle_doclist(doc_list):
		b = doc_list.to_bytes(protocol='protobuf')
		doc_type = doc_list.doc_type
		return unpickle_doclist, (doc_type, b)

		# Replace DocList.__reduce__ with a method that returns the correct format
		def doclist_reduce(self):
		return pickle_doclist(self)

		DocList.__reduce__ = doclist_reduce

		# For DocVec, we need to hook into __reduce__ since it's a generic

		def pickle_docvec(doc_vec):
		b = doc_vec.to_bytes(protocol='protobuf')
		doc_type = doc_vec.doc_type
		tensor_type = doc_vec.tensor_type
		return unpickle_docvec, (doc_type, tensor_type, b)

		# Replace DocList.__reduce__ with a method that returns the correct format
		def docvec_reduce(self):
		return pickle_docvec(self)

		DocVec.__reduce__ = docvec_reduce

		register_serializers()

		__all__ = ['BaseDoc', 'DocList', 'DocVec']
		@@ -11,0 +80,0 @@

+15

-0

docarray/array/__init__.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from docarray.array.any_array import AnyDocArray
		@@ -2,0 +17,0 @@ from docarray.array.doc_list.doc_list import DocList

+31

-9

docarray/array/any_array.py

		@@ -28,2 +28,3 @@ import sys
		from docarray.utils._internal._typing import change_cls_name, safe_issubclass
		from docarray.utils._internal.pydantic import is_pydantic_v2

		@@ -77,5 +78,16 @@ if TYPE_CHECKING:

		class _DocArrayTyped(cls): # type: ignore
		doc_type: Type[BaseDocWithoutId] = cast(Type[BaseDocWithoutId], item)
		if not is_pydantic_v2:

		class _DocArrayTyped(cls): # type: ignore
		doc_type: Type[BaseDocWithoutId] = cast(
		Type[BaseDocWithoutId], item
		)

		else:

		class _DocArrayTyped(cls, Generic[T_doc]): # type: ignore
		doc_type: Type[BaseDocWithoutId] = cast(
		Type[BaseDocWithoutId], item
		)

		for field in _DocArrayTyped.doc_type._docarray_fields().keys():
		@@ -104,10 +116,20 @@

		# The global scope and qualname need to refer to this class a unique name.
		# Otherwise, creating another _DocArrayTyped will overwrite this one.
		change_cls_name(
		_DocArrayTyped, f'{cls.__name__}[{item.__name__}]', globals()
		)
		# # The global scope and qualname need to refer to this class a unique name.
		# # Otherwise, creating another _DocArrayTyped will overwrite this one.
		if not is_pydantic_v2:
		change_cls_name(
		_DocArrayTyped, f'{cls.__name__}[{item.__name__}]', globals()
		)

		cls.__typed_da__[cls][item] = _DocArrayTyped

		cls.__typed_da__[cls][item] = _DocArrayTyped
		else:
		change_cls_name(_DocArrayTyped, f'{cls.__name__}', globals())
		if sys.version_info < (3, 12):
		cls.__typed_da__[cls][item] = Generic.__class_getitem__.__func__(
		_DocArrayTyped, item
		) # type: ignore
		# this do nothing that checking that item is valid type var or str
		# Keep the approach in #1147 to be compatible with lower versions of Python.
		else:
		cls.__typed_da__[cls][item] = GenericAlias(_DocArrayTyped, item) # type: ignore
		return cls.__typed_da__[cls][item]
		@@ -114,0 +136,0 @@

+17

-8

docarray/array/doc_list/doc_list.py

		@@ -15,2 +15,3 @@ import io
		overload,
		Callable,
		)
		@@ -32,3 +33,2 @@
		if is_pydantic_v2:
		from pydantic import GetCoreSchemaHandler
		from pydantic_core import core_schema
		@@ -50,6 +50,3 @@
		class DocList(
		ListAdvancedIndexing[T_doc],
		PushPullMixin,
		IOMixinDocList,
		AnyDocArray[T_doc],
		ListAdvancedIndexing[T_doc], PushPullMixin, IOMixinDocList, AnyDocArray[T_doc]
		):
		@@ -363,6 +360,18 @@ """
		def __get_pydantic_core_schema__(
		cls, _source_type: Any, _handler: GetCoreSchemaHandler
		cls, source: Any, handler: Callable[[Any], core_schema.CoreSchema]
		) -> core_schema.CoreSchema:
		return core_schema.general_plain_validator_function(
		cls.validate,
		instance_schema = core_schema.is_instance_schema(cls)
		args = getattr(source, '__args__', None)
		if args:
		sequence_t_schema = handler(Sequence[args[0]])
		else:
		sequence_t_schema = handler(Sequence)

		def validate_fn(v, info):
		# input has already been validated
		return cls(v, validate_input_docs=False)

		non_instance_schema = core_schema.with_info_after_validator_function(
		validate_fn, sequence_t_schema
		)
		return core_schema.union_schema([instance_schema, non_instance_schema])

+0

-1

docarray/array/doc_list/io.py

		@@ -259,3 +259,2 @@ import base64
		"""

		with file_ctx or io.BytesIO() as bf:
		@@ -262,0 +261,0 @@ self._write_bytes(

+4

-2

docarray/array/doc_vec/doc_vec.py

		@@ -201,3 +201,3 @@ from collections import ChainMap

		if isinstance(field_type, type):
		if isinstance(field_type, type) or safe_issubclass(field_type, AnyDocArray):
		if tf_available and safe_issubclass(field_type, TensorFlowTensor):
		@@ -339,3 +339,5 @@ # tf.Tensor does not allow item assignment, therefore the
		raise ValueError(f'DocVec[value.doc_type] is not compatible with {cls}')
		elif isinstance(value, DocList.__class_getitem__(cls.doc_type)):
		elif not is_pydantic_v2 and isinstance(
		value, DocList.__class_getitem__(cls.doc_type)
		):
		return cast(T, value.to_doc_vec())
		@@ -342,0 +344,0 @@ elif isinstance(value, Sequence):

+15

-0

docarray/base_doc/__init__.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from docarray.base_doc.any_doc import AnyDoc
		@@ -2,0 +17,0 @@ from docarray.base_doc.base_node import BaseNode

+15

-0

docarray/base_doc/base_node.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from abc import ABC, abstractmethod
		@@ -2,0 +17,0 @@ from typing import TYPE_CHECKING, TypeVar, Optional, Type

+7

-3

docarray/base_doc/doc.py

		@@ -329,4 +329,9 @@ import os
		type_ = self._get_field_annotation(field)
		if isinstance(type_, type) and issubclass(type_, AnyDocArray):
		doclist_exclude_fields.append(field)
		if is_pydantic_v2:
		# Conservative when touching pydantic v1 logic
		if safe_issubclass(type_, AnyDocArray):
		doclist_exclude_fields.append(field)
		else:
		if isinstance(type_, type) and safe_issubclass(type_, AnyDocArray):
		doclist_exclude_fields.append(field)

		@@ -484,3 +489,2 @@ original_exclude = exclude
		def _model_dump(doc):

		(
		@@ -487,0 +491,0 @@ exclude_,

+15

-0

docarray/base_doc/docarray_response.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from typing import TYPE_CHECKING, Any
		@@ -2,0 +17,0 @@

+15

-0

docarray/base_doc/mixins/__init__.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from docarray.base_doc.mixins.io import IOMixin
		@@ -2,0 +17,0 @@ from docarray.base_doc.mixins.update import UpdateMixin

+1

-3

docarray/base_doc/mixins/update.py

		@@ -113,5 +113,3 @@ from abc import abstractmethod

		if isinstance(field_type, type) and safe_issubclass(
		field_type, DocList
		):
		if safe_issubclass(field_type, DocList):
		nested_docarray_fields.append(field_name)
		@@ -118,0 +116,0 @@ else:

+15

-0

docarray/computation/__init__.py

		@@ -0,3 +1,18 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from docarray.computation.abstract_comp_backend import AbstractComputationalBackend

		__all__ = ['AbstractComputationalBackend']

+15

-0

docarray/data/__init__.py

		@@ -0,3 +1,18 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from docarray.data.torch_dataset import MultiModalDataset

		__all__ = ['MultiModalDataset']

+15

-0

docarray/display/tensor_display.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from typing_extensions import TYPE_CHECKING
		@@ -2,0 +17,0 @@

+15

-0

docarray/documents/__init__.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from docarray.documents.audio import AudioDoc
		@@ -2,0 +17,0 @@ from docarray.documents.image import ImageDoc

+15

-0

docarray/documents/legacy/__init__.py

		@@ -0,3 +1,18 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from docarray.documents.legacy.legacy_document import LegacyDocument

		__all__ = ['LegacyDocument']

+15

-0

docarray/documents/legacy/legacy_document.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from __future__ import annotations
		@@ -2,0 +17,0 @@

+15

-0

docarray/documents/mesh/__init__.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from docarray.documents.mesh.mesh_3d import Mesh3D
		@@ -2,0 +17,0 @@ from docarray.documents.mesh.vertices_and_faces import VerticesAndFaces

+15

-0

docarray/documents/mesh/vertices_and_faces.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from typing import TYPE_CHECKING, Any, Type, TypeVar, Union
		@@ -2,0 +17,0 @@

+15

-0

docarray/documents/point_cloud/__init__.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from docarray.documents.point_cloud.point_cloud_3d import PointCloud3D
		@@ -2,0 +17,0 @@ from docarray.documents.point_cloud.points_and_colors import PointsAndColors

+15

-0

docarray/exceptions/exceptions.py

		@@ -0,2 +1,17 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		class UnusableObjectError(NotImplementedError):
		...

+7

-0

docarray/index/__init__.py

		@@ -16,2 +16,5 @@ import types
		from docarray.index.backends.milvus import MilvusDocumentIndex # noqa: F401
		from docarray.index.backends.mongodb_atlas import ( # noqa: F401
		MongoDBAtlasDocumentIndex,
		)
		from docarray.index.backends.qdrant import QdrantDocumentIndex # noqa: F401
		@@ -30,2 +33,3 @@ from docarray.index.backends.redis import RedisDocumentIndex # noqa: F401
		'MilvusDocumentIndex',
		'MongoDBAtlasDocumentIndex',
		]
		@@ -60,2 +64,5 @@
		import docarray.index.backends.redis as lib
		elif name == 'MongoDBAtlasDocumentIndex':
		import_library('pymongo', raise_error=True)
		import docarray.index.backends.mongodb_atlas as lib
		else:
		@@ -62,0 +69,0 @@ raise ImportError(

+4

-4

docarray/index/backends/elastic.py

		@@ -355,8 +355,8 @@ # mypy: ignore-errors

		for type in elastic_py_types.keys():
		if safe_issubclass(python_type, type):
		for t in elastic_py_types.keys():
		if safe_issubclass(python_type, t):
		self._logger.info(
		f'Mapped Python type {python_type} to database type "{elastic_py_types[type]}"'
		f'Mapped Python type {python_type} to database type "{elastic_py_types[t]}"'
		)
		return elastic_py_types[type]
		return elastic_py_types[t]

		@@ -363,0 +363,0 @@ err_msg = f'Unsupported column type for {type(self)}: {python_type}'

+2

-2

docarray/index/backends/epsilla.py

		@@ -103,4 +103,4 @@ import copy
		for info in self._column_infos.values():
		for type in [list, np.ndarray, AbstractTensor]:
		if safe_issubclass(info.docarray_type, type) and info.config.get(
		for t in [list, np.ndarray, AbstractTensor]:
		if safe_issubclass(info.docarray_type, t) and info.config.get(
		'is_embedding', False
		@@ -107,0 +107,0 @@ ):

+38

-1

docarray/index/backends/helper.py

		@@ -1,2 +0,2 @@
		from typing import Any, Dict, List, Tuple, Type, cast
		from typing import Any, Dict, List, Tuple, Type, cast, Set

		@@ -23,2 +23,39 @@ from docarray import BaseDoc, DocList

		def _collect_query_required_args(method_name: str, required_args: Set[str] = None):
		"""
		Returns a function that ensures required keyword arguments are provided.

		:param method_name: The name of the method for which the required arguments are being checked.
		:type method_name: str
		:param required_args: A set containing the names of required keyword arguments. Defaults to None.
		:type required_args: Optional[Set[str]]
		:return: A function that checks for required keyword arguments before executing the specified method.
		Raises ValueError if positional arguments are provided.
		Raises TypeError if any required keyword argument is missing.
		:rtype: Callable
		"""

		if required_args is None:
		required_args = set()

		def inner(self, args, *kwargs):
		if args:
		raise ValueError(
		f"Positional arguments are not supported for "
		f"`{type(self)}.{method_name}`. "
		f"Use keyword arguments instead."
		)

		missing_args = required_args - set(kwargs.keys())
		if missing_args:
		raise ValueError(
		f"`{type(self)}.{method_name}` is missing required argument(s): {', '.join(missing_args)}"
		)

		updated_query = self._queries + [(method_name, kwargs)]
		return type(self)(updated_query)

		return inner


		def _execute_find_and_filter_query(
		@@ -25,0 +62,0 @@ doc_index: BaseDocIndex, query: List[Tuple[str, Dict]], reverse_order: bool = False

+1

-1

docarray/index/backends/hnswlib.py

		@@ -644,3 +644,3 @@ import glob
		search_field: str = '',
		hashed_ids: Optional[Set[str]] = None,
		hashed_ids: Optional[Set[int]] = None,
		) -> _FindResultBatched:
		@@ -647,0 +647,0 @@ """

+2

-2

docarray/index/backends/milvus.py

		@@ -195,3 +195,3 @@ from collections import defaultdict

		if issubclass(python_type, ID):
		if safe_issubclass(python_type, ID):
		return DataType.VARCHAR
		@@ -669,3 +669,3 @@
		fields = search_field.split('__')
		if issubclass(self._schema._get_field_annotation(fields[0]), AnyDocArray): # type: ignore
		if safe_issubclass(self._schema._get_field_annotation(fields[0]), AnyDocArray): # type: ignore
		return self._subindices[fields[0]].find_batched(
		@@ -672,0 +672,0 @@ queries,

+15

-0

docarray/proto/__init__.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from typing import TYPE_CHECKING
		@@ -2,0 +17,0 @@

+15

-0

docarray/store/abstract_doc_store.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from abc import ABC, abstractmethod
		@@ -2,0 +17,0 @@ from typing import Dict, Iterator, List, Type

+15

-0

docarray/store/exceptions.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		class ConcurrentPushException(Exception):
		@@ -2,0 +17,0 @@ """Exception raised when a concurrent push is detected."""

+15

-0

docarray/store/file.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		import logging
		@@ -2,0 +17,0 @@ from pathlib import Path

+15

-0

docarray/typing/abstract_type.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from abc import abstractmethod
		@@ -2,0 +17,0 @@ from typing import TYPE_CHECKING, Any, Type, TypeVar

+15

-0

docarray/typing/bytes/__init__.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from docarray.typing.bytes.audio_bytes import AudioBytes
		@@ -2,0 +17,0 @@ from docarray.typing.bytes.image_bytes import ImageBytes

+16

-1

docarray/typing/bytes/base_bytes.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from abc import abstractmethod
		@@ -50,5 +65,5 @@ from typing import TYPE_CHECKING, Any, Type, TypeVar
		) -> 'core_schema.CoreSchema':
		return core_schema.general_after_validator_function(
		return core_schema.with_info_after_validator_function(
		cls.validate,
		core_schema.bytes_schema(),
		)

+16

-1

docarray/typing/id.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from typing import TYPE_CHECKING, Any, Type, TypeVar, Union
		@@ -65,3 +80,3 @@ from uuid import UUID
		) -> core_schema.CoreSchema:
		return core_schema.general_plain_validator_function(
		return core_schema.with_info_plain_validator_function(
		cls.validate,
		@@ -68,0 +83,0 @@ )

+15

-0

docarray/typing/proto_register.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from typing import Callable, Dict, Type, TypeVar
		@@ -2,0 +17,0 @@

+2

-2

docarray/typing/tensor/abstract_tensor.py

		@@ -398,6 +398,6 @@ import abc
		) -> core_schema.CoreSchema:
		return core_schema.general_plain_validator_function(
		return core_schema.with_info_plain_validator_function(
		cls.validate,
		serialization=core_schema.plain_serializer_function_ser_schema(
		function=orjson_dumps,
		function=lambda x: x._docarray_to_ndarray().tolist(),
		return_schema=handler.generate_schema(bytes),
		@@ -404,0 +404,0 @@ when_used="json-unless-none",

+15

-0

docarray/typing/tensor/audio/audio_jax_array.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from typing import TypeVar
		@@ -2,0 +17,0 @@

+15

-0

docarray/typing/tensor/embedding/embedding_mixin.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from abc import ABC
		@@ -2,0 +17,0 @@ from typing import Any, Optional, Tuple, Type

+15

-0

docarray/typing/tensor/embedding/ndarray.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from docarray.typing.proto_register import _register_proto
		@@ -2,0 +17,0 @@ from docarray.typing.tensor.embedding.embedding_mixin import EmbeddingMixin

+15

-0

docarray/typing/tensor/image/image_jax_array.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from docarray.typing.proto_register import _register_proto
		@@ -2,0 +17,0 @@ from docarray.typing.tensor.image.abstract_image_tensor import AbstractImageTensor

+15

-0

docarray/typing/tensor/image/image_tensorflow_tensor.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from typing import TypeVar
		@@ -2,0 +17,0 @@

+15

-0

docarray/typing/tensor/image/image_torch_tensor.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from typing import TypeVar
		@@ -2,0 +17,0 @@

+15

-0

docarray/typing/tensor/video/video_jax_array.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from typing import TYPE_CHECKING, Any, List, Tuple, Type, TypeVar, Union
		@@ -2,0 +17,0 @@

+15

-0

docarray/typing/url/__init__.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from docarray.typing.url.any_url import AnyUrl
		@@ -2,0 +17,0 @@ from docarray.typing.url.audio_url import AudioUrl

+1

-1

docarray/typing/url/any_url.py

		@@ -59,3 +59,3 @@ import mimetypes
		) -> core_schema.CoreSchema:
		return core_schema.general_after_validator_function(
		return core_schema.with_info_after_validator_function(
		cls._docarray_validate,
		@@ -62,0 +62,0 @@ core_schema.str_schema(),

+15

-0

docarray/typing/url/audio_url.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		import warnings
		@@ -2,0 +17,0 @@ from typing import List, Optional, Tuple, TypeVar

+15

-0

docarray/typing/url/image_url.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		import warnings
		@@ -2,0 +17,0 @@ from typing import TYPE_CHECKING, List, Optional, Tuple, TypeVar

+15

-0

docarray/typing/url/mimetypes.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		TEXT_MIMETYPE = 'text'
		@@ -2,0 +17,0 @@ AUDIO_MIMETYPE = 'audio'

+15

-0

docarray/typing/url/text_url.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from typing import List, Optional, TypeVar
		@@ -2,0 +17,0 @@

+15

-0

docarray/typing/url/url_3d/__init__.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from docarray.typing.url.url_3d.mesh_url import Mesh3DUrl
		@@ -2,0 +17,0 @@ from docarray.typing.url.url_3d.point_cloud_url import PointCloud3DUrl

+15

-0

docarray/typing/url/url_3d/mesh_url.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from typing import TYPE_CHECKING, Any, Dict, List, Optional, TypeVar
		@@ -2,0 +17,0 @@

+15

-0

docarray/typing/url/url_3d/url_3d.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from abc import ABC
		@@ -2,0 +17,0 @@ from typing import TYPE_CHECKING, Any, Dict, Optional, TypeVar, Union

+21

-2

docarray/utils/_internal/_typing.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from typing import Any, ForwardRef, Optional, Union
		@@ -49,4 +64,7 @@
		"""
		origin = get_origin(x)
		if origin: # If x is a generic type like DocList[SomeDoc], get its origin
		x = origin
		if (
		(get_origin(x) in (list, tuple, dict, set, Union))
		(origin in (list, tuple, dict, set, Union))
		or is_typevar(x)
		@@ -57,2 +75,3 @@ or (type(x) == ForwardRef)
		return False
		return issubclass(x, a_tuple)

		return isinstance(x, type) and issubclass(x, a_tuple)

+15

-0

docarray/utils/_internal/cache.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		import os
		@@ -2,0 +17,0 @@ from functools import lru_cache

+2

-1

docarray/utils/_internal/misc.py

		@@ -5,3 +5,3 @@ import importlib
		import types
		from typing import Any, Optional, Literal
		from typing import Any, Literal, Optional

		@@ -54,2 +54,3 @@ import numpy as np
		'pymilvus': '"docarray[milvus]"',
		"pymongo": '"docarray[mongo]"',
		}
		@@ -56,0 +57,0 @@

+15

-0

docarray/utils/_internal/progress_bar.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from typing import Optional
		@@ -2,0 +17,0 @@

+15

-0

docarray/utils/_internal/pydantic.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		import pydantic
		@@ -2,0 +17,0 @@

+15

-0

docarray/utils/_internal/query_language/query_parser.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		from typing import Any, Dict, List, Optional, Union
		@@ -2,0 +17,0 @@

+49

-14

docarray/utils/create_dynamic_doc_class.py

		@@ -57,4 +57,5 @@ from typing import Any, Dict, List, Optional, Type, Union

		fields_copy = copy.deepcopy(model.__fields__)
		annotations_copy = copy.deepcopy(model.__annotations__)
		copy_model = copy.deepcopy(model)
		fields_copy = copy_model.__fields__
		annotations_copy = copy_model.__annotations__
		for field_name, field in annotations_copy.items():
		@@ -69,5 +70,6 @@ if field_name not in fields_copy:
		try:
		if safe_issubclass(field, DocList):
		if safe_issubclass(field, DocList) and not is_pydantic_v2:
		t: Any = field.doc_type
		fields[field_name] = (List[t], field_info)
		t_aux = create_pure_python_type_model(t)
		fields[field_name] = (List[t_aux], field_info)
		else:
		@@ -78,3 +80,5 @@ fields[field_name] = (field, field_info)

		return create_model(model.__name__, __base__=model, __doc__=model.__doc__, **fields)
		return create_model(
		copy_model.__name__, __base__=copy_model, __doc__=copy_model.__doc__, **fields
		)

		@@ -85,3 +89,2 @@
		field_name: str,
		root_schema: Dict[str, Any],
		cached_models: Dict[str, Any],
		@@ -96,3 +99,2 @@ is_tensor: bool = False,
		:param field_name: The name of the field to be created
		:param root_schema: The schema of the root object, important to get references
		:param cached_models: Parameter used when this method is called recursively to reuse partial nested classes.
		@@ -117,3 +119,3 @@ :param is_tensor: Boolean used to tell between tensor and list
		create_base_doc_from_schema(
		root_schema['definitions'][ref_name],
		definitions[ref_name],
		ref_name,
		@@ -129,3 +131,2 @@ cached_models=cached_models,
		field_name,
		root_schema=root_schema,
		cached_models=cached_models,
		@@ -149,3 +150,5 @@ is_tensor=tensor_shape is not None,
		elif field_type == 'number':
		if num_recursions <= 1:
		if num_recursions == 0:
		ret = float
		elif num_recursions == 1:
		# This is a hack because AnyTensor is more generic than a simple List and it comes as simple List
		@@ -168,3 +171,6 @@ if is_tensor:
		additional_props = field_schema['additionalProperties']
		if additional_props.get('type') == 'object':
		if (
		isinstance(additional_props, dict)
		and additional_props.get('type') == 'object'
		):
		doc_type = create_base_doc_from_schema(
		@@ -210,3 +216,2 @@ additional_props, field_name, cached_models=cached_models
		field_name=field_name,
		root_schema=root_schema,
		cached_models=cached_models,
		@@ -272,2 +277,20 @@ is_tensor=tensor_shape is not None,
		"""

		def clean_refs(value):
		"""Recursively remove $ref keys and #/$defs values from a data structure."""
		if isinstance(value, dict):
		# Create a new dictionary without $ref keys and without values containing #/$defs
		cleaned_dict = {}
		for k, v in value.items():
		if k == '$ref':
		continue
		cleaned_dict[k] = clean_refs(v)
		return cleaned_dict
		elif isinstance(value, list):
		# Process each item in the list
		return [clean_refs(item) for item in value]
		else:
		# Return primitive values as-is
		return value

		if not definitions:
		@@ -286,6 +309,6 @@ definitions = (
		has_id = True
		# Get the field type
		field_type = _get_field_annotation_from_schema(
		field_schema=field_schema,
		field_name=field_name,
		root_schema=schema,
		cached_models=cached_models,
		@@ -306,6 +329,18 @@ is_tensor=False,
		for k, v in field_schema.items():
		if field_name == 'id':
		# Skip default_factory for Optional fields and use None
		field_kwargs['default'] = None
		if k in FieldInfo.__slots__:
		field_kwargs[k] = v
		else:
		field_json_schema_extra[k] = v
		if k != '$ref':
		if isinstance(v, dict):
		cleaned_v = clean_refs(v)
		if (
		cleaned_v
		): # Only add if there's something left after cleaning
		field_json_schema_extra[k] = cleaned_v
		else:
		field_json_schema_extra[k] = v

		fields[field_name] = (
		@@ -312,0 +347,0 @@ field_type,

+15

-0

docarray/utils/reduce.py

		@@ -0,1 +1,16 @@
		# Licensed to the LF AI & Data foundation under one
		# or more contributor license agreements. See the NOTICE file
		# distributed with this work for additional information
		# regarding copyright ownership. The ASF licenses this file
		# to you under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.
		__all__ = ['reduce', 'reduce_all']
		@@ -2,0 +17,0 @@

+10

-5

PKG-INFO

		Metadata-Version: 2.1
		Name: docarray
		Version: 0.40.1.dev1
		Version: 0.41.0
		Summary: The data structure for multimodal data
		@@ -46,2 +46,3 @@ Home-page: https://docs.docarray.org/
		Provides-Extra: milvus
		Provides-Extra: mongo
		Provides-Extra: pandas
		@@ -72,2 +73,3 @@ Provides-Extra: proto
		Requires-Dist: pymilvus (>=2.2.12,<3.0.0) ; extra == "milvus"
		Requires-Dist: pymongo (>=4.6.2) ; extra == "mongo"
		Requires-Dist: qdrant-client (>=1.4.0) ; (python_version < "3.12") and (extra == "qdrant")
		@@ -105,3 +107,3 @@ Requires-Dist: redis (>=4.6.0,<5.0.0) ; extra == "redis"

		DocArray is a Python library expertly crafted for the [representation](#represent), [transmission](#send), [storage](#store), and [retrieval](#retrieve) of multimodal data. Tailored for the development of multimodal AI applications, its design guarantees seamless integration with the extensive Python and machine learning ecosystems. As of January 2022, DocArray is openly distributed under the [Apache License 2.0](https://github.com/docarray/docarray/blob/main/LICENSE) and currently enjoys the status of a sandbox project within the [LF AI & Data Foundation](https://lfaidata.foundation/).
		DocArray is a Python library expertly crafted for the [representation](#represent), [transmission](#send), [storage](#store), and [retrieval](#retrieve) of multimodal data. Tailored for the development of multimodal AI applications, its design guarantees seamless integration with the extensive Python and machine learning ecosystems. As of January 2022, DocArray is openly distributed under the [Apache License 2.0](https://github.com/docarray/docarray/blob/main/LICENSE.md) and currently enjoys the status of a sandbox project within the [LF AI & Data Foundation](https://lfaidata.foundation/).

		@@ -112,3 +114,3 @@
		- :zap: Based on [Pydantic](https://github.com/pydantic/pydantic), and instantly compatible with web and microservice frameworks like [FastAPI](https://github.com/tiangolo/fastapi/) and [Jina](https://github.com/jina-ai/jina/).
		- :package: Provides support for vector databases such as [Weaviate](https://weaviate.io/), [Qdrant](https://qdrant.tech/), [ElasticSearch](https://www.elastic.co/de/elasticsearch/), [Redis](https://redis.io/), and [HNSWLib](https://github.com/nmslib/hnswlib).
		- :package: Provides support for vector databases such as [Weaviate](https://weaviate.io/), [Qdrant](https://qdrant.tech/), [ElasticSearch](https://www.elastic.co/de/elasticsearch/), [Redis](https://redis.io/), [Mongo Atlas](https://www.mongodb.com/), and [HNSWLib](https://github.com/nmslib/hnswlib)**.
		- :chains: Allows data transmission as JSON over HTTP or as [Protobuf](https://protobuf.dev/) over [gRPC](https://grpc.io/).
		@@ -441,3 +443,3 @@

		Currently, Document Indexes support [Weaviate](https://weaviate.io/), [Qdrant](https://qdrant.tech/), [ElasticSearch](https://www.elastic.co/), [Redis](https://redis.io/), and [HNSWLib](https://github.com/nmslib/hnswlib), with more to come!
		Currently, Document Indexes support [Weaviate](https://weaviate.io/), [Qdrant](https://qdrant.tech/), [ElasticSearch](https://www.elastic.co/), [Redis](https://redis.io/), [Mongo Atlas](https://www.mongodb.com/), and [HNSWLib](https://github.com/nmslib/hnswlib), with more to come!

		@@ -513,3 +515,3 @@ The Document Index interface lets you index and retrieve Documents from multiple vector databases, all with the same user interface.

		For now, Document Indexes support [Weaviate](https://weaviate.io/), [Qdrant](https://qdrant.tech/), [ElasticSearch](https://www.elastic.co/), [Redis](https://redis.io/), Exact Nearest Neighbour search and [HNSWLib](https://github.com/nmslib/hnswlib), with more to come.
		For now, Document Indexes support [Weaviate](https://weaviate.io/), [Qdrant](https://qdrant.tech/), [ElasticSearch](https://www.elastic.co/), [Redis](https://redis.io/), [Mongo Atlas](https://www.mongodb.com/), Exact Nearest Neighbour search and [HNSWLib](https://github.com/nmslib/hnswlib), with more to come.

		@@ -937,2 +939,3 @@ </details>
		- [HNSWlib](https://github.com/nmslib/hnswlib) as a local-first ANN alternative
		- [Mongo Atlas](https://www.mongodb.com/)

		@@ -968,2 +971,3 @@ An integration of [OpenSearch](https://opensearch.org/) is currently in progress.


		# Define a document schema
		@@ -998,2 +1002,3 @@ class MovieDoc(BaseDoc):
		RedisDocumentIndex,
		MongoDBAtlasDocumentIndex,
		)
		@@ -1000,0 +1005,0 @@

+5

-2

pyproject.toml

		[tool.poetry]
		name = "docarray"
		version = '0.40.1.dev1'
		version = '0.41.0'
		description='The data structure for multimodal data'
		@@ -65,2 +65,3 @@ readme = 'README.md'
		pyepsilla = {version = ">=0.2.3", optional = true}
		pymongo = {version = ">=4.6.2", optional = true}

		@@ -86,2 +87,3 @@ [tool.poetry.extras]
		epsilla = ["pyepsilla"]
		mongo = ["pymongo"]

		@@ -168,3 +170,4 @@ # all
		"elasticv8: marks test that run with ElasticSearch v8",
		"jac: need to have access to jac cloud"
		"jac: need to have access to jac cloud",
		"atlas: mark tests using MongoDB Atlas",
		]

+7

-4

README.md

		@@ -19,3 +19,3 @@ <p align="center">

		DocArray is a Python library expertly crafted for the [representation](#represent), [transmission](#send), [storage](#store), and [retrieval](#retrieve) of multimodal data. Tailored for the development of multimodal AI applications, its design guarantees seamless integration with the extensive Python and machine learning ecosystems. As of January 2022, DocArray is openly distributed under the [Apache License 2.0](https://github.com/docarray/docarray/blob/main/LICENSE) and currently enjoys the status of a sandbox project within the [LF AI & Data Foundation](https://lfaidata.foundation/).
		DocArray is a Python library expertly crafted for the [representation](#represent), [transmission](#send), [storage](#store), and [retrieval](#retrieve) of multimodal data. Tailored for the development of multimodal AI applications, its design guarantees seamless integration with the extensive Python and machine learning ecosystems. As of January 2022, DocArray is openly distributed under the [Apache License 2.0](https://github.com/docarray/docarray/blob/main/LICENSE.md) and currently enjoys the status of a sandbox project within the [LF AI & Data Foundation](https://lfaidata.foundation/).

		@@ -26,3 +26,3 @@
		- :zap: Based on [Pydantic](https://github.com/pydantic/pydantic), and instantly compatible with web and microservice frameworks like [FastAPI](https://github.com/tiangolo/fastapi/) and [Jina](https://github.com/jina-ai/jina/).
		- :package: Provides support for vector databases such as [Weaviate](https://weaviate.io/), [Qdrant](https://qdrant.tech/), [ElasticSearch](https://www.elastic.co/de/elasticsearch/), [Redis](https://redis.io/), and [HNSWLib](https://github.com/nmslib/hnswlib).
		- :package: Provides support for vector databases such as [Weaviate](https://weaviate.io/), [Qdrant](https://qdrant.tech/), [ElasticSearch](https://www.elastic.co/de/elasticsearch/), [Redis](https://redis.io/), [Mongo Atlas](https://www.mongodb.com/), and [HNSWLib](https://github.com/nmslib/hnswlib)**.
		- :chains: Allows data transmission as JSON over HTTP or as [Protobuf](https://protobuf.dev/) over [gRPC](https://grpc.io/).
		@@ -355,3 +355,3 @@

		Currently, Document Indexes support [Weaviate](https://weaviate.io/), [Qdrant](https://qdrant.tech/), [ElasticSearch](https://www.elastic.co/), [Redis](https://redis.io/), and [HNSWLib](https://github.com/nmslib/hnswlib), with more to come!
		Currently, Document Indexes support [Weaviate](https://weaviate.io/), [Qdrant](https://qdrant.tech/), [ElasticSearch](https://www.elastic.co/), [Redis](https://redis.io/), [Mongo Atlas](https://www.mongodb.com/), and [HNSWLib](https://github.com/nmslib/hnswlib), with more to come!

		@@ -427,3 +427,3 @@ The Document Index interface lets you index and retrieve Documents from multiple vector databases, all with the same user interface.

		For now, Document Indexes support [Weaviate](https://weaviate.io/), [Qdrant](https://qdrant.tech/), [ElasticSearch](https://www.elastic.co/), [Redis](https://redis.io/), Exact Nearest Neighbour search and [HNSWLib](https://github.com/nmslib/hnswlib), with more to come.
		For now, Document Indexes support [Weaviate](https://weaviate.io/), [Qdrant](https://qdrant.tech/), [ElasticSearch](https://www.elastic.co/), [Redis](https://redis.io/), [Mongo Atlas](https://www.mongodb.com/), Exact Nearest Neighbour search and [HNSWLib](https://github.com/nmslib/hnswlib), with more to come.

		@@ -851,2 +851,3 @@ </details>
		- [HNSWlib](https://github.com/nmslib/hnswlib) as a local-first ANN alternative
		- [Mongo Atlas](https://www.mongodb.com/)

		@@ -882,2 +883,3 @@ An integration of [OpenSearch](https://opensearch.org/) is currently in progress.


		# Define a document schema
		@@ -912,2 +914,3 @@ class MovieDoc(BaseDoc):
		RedisDocumentIndex,
		MongoDBAtlasDocumentIndex,
		)
		@@ -914,0 +917,0 @@

docarray/array/doc....py→docarray/array/doc_list/__init__.py

docarray/array/doc..._.py→docarray/array/doc_vec/__init__.py

docarray/base_doc/...t__.py→docarray/base_doc/io/__init__.py

docarray/display/__init__.py→docarray/display/__init__.py

docarray/exception...it__.py→docarray/exceptions/__init__.py

docarray/index/bac....py→docarray/index/backends/__init__.py

docarray/utils/__init__.py→docarray/utils/__init__.py

docarray/utils/_in...py→docarray/utils/_internal/__init__.py

docarray/utils/_in...ls/_internal/query_language/__init__.py

docarray - pypi Package Compare versions

Improved metrics