docarray
Advanced tools
| import collections | ||
| import logging | ||
| from dataclasses import dataclass, field | ||
| from functools import cached_property | ||
| from typing import ( | ||
| Any, | ||
| Dict, | ||
| Generator, | ||
| Generic, | ||
| List, | ||
| NamedTuple, | ||
| Optional, | ||
| Sequence, | ||
| Tuple, | ||
| Type, | ||
| TypeVar, | ||
| Union, | ||
| ) | ||
| import bson | ||
| import numpy as np | ||
| from pymongo import MongoClient | ||
| from docarray import BaseDoc, DocList, handler | ||
| from docarray.index.abstract import BaseDocIndex, _raise_not_composable | ||
| from docarray.index.backends.helper import _collect_query_required_args | ||
| from docarray.typing import AnyTensor | ||
| from docarray.typing.tensor.abstract_tensor import AbstractTensor | ||
| from docarray.utils._internal._typing import safe_issubclass | ||
| from docarray.utils.find import _FindResult, _FindResultBatched | ||
| logger = logging.getLogger(__name__) | ||
| logger.addHandler(handler) | ||
| MAX_CANDIDATES = 10_000 | ||
| OVERSAMPLING_FACTOR = 10 | ||
| TSchema = TypeVar('TSchema', bound=BaseDoc) | ||
| class HybridResult(NamedTuple): | ||
| """Adds breakdown of scores into vector and text components.""" | ||
| documents: Union[DocList, List[Dict[str, Any]]] | ||
| scores: AnyTensor | ||
| score_breakdown: Dict[str, List[Any]] | ||
| class MongoDBAtlasDocumentIndex(BaseDocIndex, Generic[TSchema]): | ||
| """DocumentIndex backed by MongoDB Atlas Vector Store. | ||
| MongoDB Atlas provides full Text, Vector, and Hybrid Search | ||
| and can store structured data, text and vector indexes | ||
| in the same Collection (Index). | ||
| Atlas provides efficient index and search on vector embeddings | ||
| using the Hierarchical Navigable Small Worlds (HNSW) algorithm. | ||
| For documentation, see the following. | ||
| * Text Search: https://www.mongodb.com/docs/atlas/atlas-search/atlas-search-overview/ | ||
| * Vector Search: https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-overview/ | ||
| * Hybrid Search: https://www.mongodb.com/docs/atlas/atlas-vector-search/tutorials/reciprocal-rank-fusion/ | ||
| """ | ||
| def __init__(self, db_config=None, **kwargs): | ||
| super().__init__(db_config=db_config, **kwargs) | ||
| logger.info(f'{self.__class__.__name__} has been initialized') | ||
| @property | ||
| def index_name(self): | ||
| """The name of the index/collection in the database. | ||
| Note that in MongoDB Atlas, one has Collections (analogous to Tables), | ||
| which can have Search Indexes. They are distinct. | ||
| DocArray tends to consider them together. | ||
| The index_name can be set when initializing MongoDBAtlasDocumentIndex. | ||
| The easiest way is to pass index_name=<collection_name> as a kwarg. | ||
| Otherwise, a rational default uses the name of the DocumentTypes that it contains. | ||
| """ | ||
| if self._db_config.index_name is not None: | ||
| return self._db_config.index_name | ||
| else: | ||
| # Create a reasonable default | ||
| if not self._schema: | ||
| raise ValueError( | ||
| 'A MongoDBAtlasDocumentIndex must be typed with a Document type.' | ||
| 'To do so, use the syntax: MongoDBAtlasDocumentIndex[DocumentType]' | ||
| ) | ||
| schema_name = self._schema.__name__.lower() | ||
| logger.debug(f"db_config.index_name was not set. Using {schema_name}") | ||
| return schema_name | ||
| @property | ||
| def _database_name(self): | ||
| return self._db_config.database_name | ||
| @cached_property | ||
| def _client(self): | ||
| return self._connect_to_mongodb_atlas( | ||
| atlas_connection_uri=self._db_config.mongo_connection_uri | ||
| ) | ||
| @property | ||
| def _collection(self): | ||
| """MongoDB Collection""" | ||
| return self._client[self._database_name][self.index_name] | ||
| @staticmethod | ||
| def _connect_to_mongodb_atlas(atlas_connection_uri: str): | ||
| """ | ||
| Establish a connection to MongoDB Atlas. | ||
| """ | ||
| client = MongoClient( | ||
| atlas_connection_uri, | ||
| # driver=DriverInfo(name="docarray", version=version("docarray")) | ||
| ) | ||
| return client | ||
| def _create_indexes(self): | ||
| """Create a new index in the MongoDB database if it doesn't already exist.""" | ||
| def _check_index_exists(self, index_name: str) -> bool: | ||
| """ | ||
| Check if an index exists in the MongoDB Atlas database. | ||
| :param index_name: The name of the index. | ||
| :return: True if the index exists, False otherwise. | ||
| """ | ||
| @dataclass | ||
| class Query: | ||
| """Dataclass describing a query.""" | ||
| vector_fields: Optional[Dict[str, np.ndarray]] | ||
| filters: Optional[List[Any]] | ||
| text_searches: Optional[List[Any]] | ||
| limit: int | ||
| class QueryBuilder(BaseDocIndex.QueryBuilder): | ||
| """Compose complex queries containing vector search (find), text_search, and filters. | ||
| Arguments to `find` are vectors of embeddings, text_search expects strings, | ||
| and filters expect dicts of MongoDB Query Language (MDB). | ||
| NOTE: When doing Hybrid Search, pay close attention to the interpretation and use of inputs, | ||
| particularly when multiple calls are made of the same method (find, text_search, filter). | ||
| * find (Vector Search): Embedding vectors will be averaged. The penalty/weight defined in DBConfig will not change. | ||
| * text_search: Individual searches are performed, each with the same penalty/weight. | ||
| * filter: Within Vector Search, performs efficient k-NN filtering with the Lucene engine | ||
| """ | ||
| def __init__(self, query: Optional[List[Tuple[str, Dict]]] = None): | ||
| super().__init__() | ||
| # list of tuples (method name, kwargs) | ||
| self._queries: List[Tuple[str, Dict]] = query or [] | ||
| def build(self, limit: int = 1, *args, **kwargs) -> Any: | ||
| """Build a `Query` that can be passed to `execute_query`.""" | ||
| search_fields: Dict[str, np.ndarray] = collections.defaultdict(list) | ||
| filters: List[Any] = [] | ||
| text_searches: List[Any] = [] | ||
| for method, kwargs in self._queries: | ||
| if method == 'find': | ||
| search_field = kwargs['search_field'] | ||
| search_fields[search_field].append(kwargs["query"]) | ||
| elif method == 'filter': | ||
| filters.append(kwargs) | ||
| else: | ||
| text_searches.append(kwargs) | ||
| vector_fields = { | ||
| field: np.average(vectors, axis=0) | ||
| for field, vectors in search_fields.items() | ||
| } | ||
| return MongoDBAtlasDocumentIndex.Query( | ||
| vector_fields=vector_fields, | ||
| filters=filters, | ||
| text_searches=text_searches, | ||
| limit=limit, | ||
| ) | ||
| find = _collect_query_required_args('find', {'search_field', 'query'}) | ||
| filter = _collect_query_required_args('filter', {'query'}) | ||
| text_search = _collect_query_required_args( | ||
| 'text_search', {'search_field', 'query'} | ||
| ) | ||
| find_batched = _raise_not_composable('find_batched') | ||
| filter_batched = _raise_not_composable('filter_batched') | ||
| text_search_batched = _raise_not_composable('text_search_batched') | ||
| def execute_query( | ||
| self, query: Any, *args, score_breakdown=True, **kwargs | ||
| ) -> Any: # _FindResult: | ||
| """Execute a Query on the database. | ||
| :param query: the query to execute. The output of this Document index's `QueryBuilder.build()` method. | ||
| :param args: positional arguments to pass to the query | ||
| :param score_breakdown: Will provide breakdown of scores into text and vector components for Hybrid Searches. | ||
| :param kwargs: keyword arguments to pass to the query | ||
| :return: the result of the query | ||
| """ | ||
| if not isinstance(query, MongoDBAtlasDocumentIndex.Query): | ||
| raise ValueError( | ||
| "Expected MongoDBAtlasDocumentIndex.Query. Found {type(query)=}." | ||
| "For native calls to MongoDBAtlasDocumentIndex, simply call filter()" | ||
| ) | ||
| if len(query.vector_fields) > 1: | ||
| self._logger.warning( | ||
| f"{len(query.vector_fields)} embedding vectors have been provided to the query. They will be averaged." | ||
| ) | ||
| if len(query.text_searches) > 1: | ||
| self._logger.warning( | ||
| f"{len(query.text_searches)} text searches will be performed, and each receive a ranked score." | ||
| ) | ||
| # collect filters | ||
| filters: List[Dict[str, Any]] = [] | ||
| for filter_ in query.filters: | ||
| filters.append(filter_['query']) | ||
| # check if hybrid search is needed. | ||
| hybrid = len(query.vector_fields) + len(query.text_searches) > 1 | ||
| if hybrid: | ||
| if len(query.vector_fields) > 1: | ||
| raise NotImplementedError( | ||
| "Hybrid Search on multiple Vector Indexes has yet to be done." | ||
| ) | ||
| pipeline = self._hybrid_search( | ||
| query.vector_fields, query.text_searches, filters, query.limit | ||
| ) | ||
| else: | ||
| if query.text_searches: | ||
| # it is a simple text search, perhaps with filters. | ||
| text_stage = self._text_search_stage(**query.text_searches[0]) | ||
| pipeline = [ | ||
| text_stage, | ||
| {"$match": {"$and": filters} if filters else {}}, | ||
| { | ||
| '$project': self._project_fields( | ||
| extra_fields={"score": {'$meta': 'searchScore'}} | ||
| ) | ||
| }, | ||
| {"$limit": query.limit}, | ||
| ] | ||
| elif query.vector_fields: | ||
| # it is a simple vector search, perhaps with filters. | ||
| assert ( | ||
| len(query.vector_fields) == 1 | ||
| ), "Query contains more than one vector_field." | ||
| field, vector_query = list(query.vector_fields.items())[0] | ||
| pipeline = [ | ||
| self._vector_search_stage( | ||
| query=vector_query, | ||
| search_field=field, | ||
| limit=query.limit, | ||
| filters=filters, | ||
| ), | ||
| { | ||
| '$project': self._project_fields( | ||
| extra_fields={"score": {'$meta': 'vectorSearchScore'}} | ||
| ) | ||
| }, | ||
| ] | ||
| # it is only a filter search. | ||
| else: | ||
| pipeline = [{"$match": {"$and": filters}}] | ||
| with self._collection.aggregate(pipeline) as cursor: | ||
| results, scores = self._mongo_to_docs(cursor) | ||
| docs = self._dict_list_to_docarray(results) | ||
| if hybrid and score_breakdown and results: | ||
| score_breakdown = collections.defaultdict(list) | ||
| score_fields = [key for key in results[0] if "score" in key] | ||
| for res in results: | ||
| score_breakdown["id"].append(res["id"]) | ||
| for sf in score_fields: | ||
| score_breakdown[sf].append(res[sf]) | ||
| logger.debug(score_breakdown) | ||
| return HybridResult( | ||
| documents=docs, scores=scores, score_breakdown=score_breakdown | ||
| ) | ||
| return _FindResult(documents=docs, scores=scores) | ||
| @dataclass | ||
| class DBConfig(BaseDocIndex.DBConfig): | ||
| mongo_connection_uri: str = 'localhost' | ||
| index_name: Optional[str] = None | ||
| database_name: Optional[str] = "default" | ||
| default_column_config: Dict[Type, Dict[str, Any]] = field( | ||
| default_factory=lambda: collections.defaultdict( | ||
| dict, | ||
| { | ||
| bson.BSONARR: { | ||
| 'distance': 'COSINE', | ||
| 'oversample_factor': OVERSAMPLING_FACTOR, | ||
| 'max_candidates': MAX_CANDIDATES, | ||
| 'indexed': False, | ||
| 'index_name': None, | ||
| 'penalty': 5, | ||
| }, | ||
| bson.BSONSTR: { | ||
| 'indexed': False, | ||
| 'index_name': None, | ||
| 'operator': 'phrase', | ||
| 'penalty': 1, | ||
| }, | ||
| }, | ||
| ) | ||
| ) | ||
| @dataclass | ||
| class RuntimeConfig(BaseDocIndex.RuntimeConfig): | ||
| ... | ||
| def python_type_to_db_type(self, python_type: Type) -> Any: | ||
| """Map python type to database type. | ||
| Takes any python type and returns the corresponding database column type. | ||
| :param python_type: a python type. | ||
| :return: the corresponding database column type, | ||
| or None if ``python_type`` is not supported. | ||
| """ | ||
| type_map = { | ||
| int: bson.BSONNUM, | ||
| float: bson.BSONDEC, | ||
| collections.OrderedDict: bson.BSONOBJ, | ||
| str: bson.BSONSTR, | ||
| bytes: bson.BSONBIN, | ||
| dict: bson.BSONOBJ, | ||
| np.ndarray: bson.BSONARR, | ||
| AbstractTensor: bson.BSONARR, | ||
| } | ||
| for py_type, mongo_types in type_map.items(): | ||
| if safe_issubclass(python_type, py_type): | ||
| return mongo_types | ||
| raise ValueError(f'Unsupported column type for {type(self)}: {python_type}') | ||
| def _doc_to_mongo(self, doc): | ||
| result = doc.copy() | ||
| for name in result: | ||
| if self._column_infos[name].db_type == bson.BSONARR: | ||
| result[name] = list(result[name]) | ||
| result["_id"] = result.pop("id") | ||
| return result | ||
| def _docs_to_mongo(self, docs): | ||
| return [self._doc_to_mongo(doc) for doc in docs] | ||
| @staticmethod | ||
| def _mongo_to_doc(mongo_doc: dict) -> dict: | ||
| result = mongo_doc.copy() | ||
| result["id"] = result.pop("_id") | ||
| score = result.get("score", None) | ||
| return result, score | ||
| @staticmethod | ||
| def _mongo_to_docs(mongo_docs: Generator[Dict, None, None]) -> List[dict]: | ||
| docs = [] | ||
| scores = [] | ||
| for mongo_doc in mongo_docs: | ||
| doc, score = MongoDBAtlasDocumentIndex._mongo_to_doc(mongo_doc) | ||
| docs.append(doc) | ||
| scores.append(score) | ||
| return docs, scores | ||
| def _get_oversampling_factor(self, search_field: str) -> int: | ||
| return self._column_infos[search_field].config["oversample_factor"] | ||
| def _get_max_candidates(self, search_field: str) -> int: | ||
| return self._column_infos[search_field].config["max_candidates"] | ||
| def _index(self, column_to_data: Dict[str, Generator[Any, None, None]]): | ||
| """Add and Index Documents to the datastore | ||
| The input format is aimed towards column vectors, which is not | ||
| the natural fit for MongoDB Collections, but we have chosen | ||
| not to override BaseDocIndex.index as it provides valuable validation. | ||
| This may change in the future. | ||
| :param column_to_data: is a dictionary from column name to a generator | ||
| """ | ||
| self._index_subindex(column_to_data) | ||
| docs: List[Dict[str, Any]] = [] | ||
| while True: | ||
| try: | ||
| doc = {key: next(column_to_data[key]) for key in column_to_data} | ||
| mongo_doc = self._doc_to_mongo(doc) | ||
| docs.append(mongo_doc) | ||
| except StopIteration: | ||
| break | ||
| self._collection.insert_many(docs) | ||
| def num_docs(self) -> int: | ||
| """Return the number of indexed documents""" | ||
| return self._collection.count_documents({}) | ||
| @property | ||
| def _is_index_empty(self) -> bool: | ||
| """ | ||
| Check if index is empty by comparing the number of documents to zero. | ||
| :return: True if the index is empty, False otherwise. | ||
| """ | ||
| return self.num_docs() == 0 | ||
| def _del_items(self, doc_ids: Sequence[str]) -> None: | ||
| """Delete Documents from the index. | ||
| :param doc_ids: ids to delete from the Document Store | ||
| """ | ||
| mg_filter = {"_id": {"$in": doc_ids}} | ||
| self._collection.delete_many(mg_filter) | ||
| def _get_items( | ||
| self, doc_ids: Sequence[str] | ||
| ) -> Union[Sequence[TSchema], Sequence[Dict[str, Any]]]: | ||
| """Get Documents from the index, by `id`. | ||
| If no document is found, a KeyError is raised. | ||
| :param doc_ids: ids to get from the Document index | ||
| :return: Sequence of Documents, sorted corresponding to the order of `doc_ids`. Duplicate `doc_ids` can be omitted in the output. | ||
| """ | ||
| mg_filter = {"_id": {"$in": doc_ids}} | ||
| docs = self._collection.find(mg_filter) | ||
| docs, _ = self._mongo_to_docs(docs) | ||
| if not docs: | ||
| raise KeyError(f'No document with id {doc_ids} found') | ||
| return docs | ||
| def _reciprocal_rank_stage(self, search_field: str, score_field: str): | ||
| penalty = self._column_infos[search_field].config["penalty"] | ||
| projection_fields = { | ||
| key: f"$docs.{key}" for key in self._column_infos.keys() if key != "id" | ||
| } | ||
| projection_fields["_id"] = "$docs._id" | ||
| projection_fields[score_field] = 1 | ||
| return [ | ||
| {"$group": {"_id": None, "docs": {"$push": "$$ROOT"}}}, | ||
| {"$unwind": {"path": "$docs", "includeArrayIndex": "rank"}}, | ||
| { | ||
| "$addFields": { | ||
| score_field: {"$divide": [1.0, {"$add": ["$rank", penalty, 1]}]} | ||
| } | ||
| }, | ||
| {'$project': projection_fields}, | ||
| ] | ||
| def _add_stage_to_pipeline(self, pipeline: List[Any], stage: Dict[str, Any]): | ||
| if pipeline: | ||
| pipeline.append( | ||
| {"$unionWith": {"coll": self.index_name, "pipeline": stage}} | ||
| ) | ||
| else: | ||
| pipeline.extend(stage) | ||
| return pipeline | ||
| def _final_stage(self, scores_fields, limit): | ||
| """Sum individual scores, sort, and apply limit.""" | ||
| doc_fields = self._column_infos.keys() | ||
| grouped_fields = { | ||
| key: {"$first": f"${key}"} for key in doc_fields if key != "_id" | ||
| } | ||
| best_score = {score: {'$max': f'${score}'} for score in scores_fields} | ||
| final_pipeline = [ | ||
| {"$group": {"_id": "$_id", **grouped_fields, **best_score}}, | ||
| { | ||
| "$project": { | ||
| **{doc_field: 1 for doc_field in doc_fields}, | ||
| **{score: {"$ifNull": [f"${score}", 0]} for score in scores_fields}, | ||
| } | ||
| }, | ||
| { | ||
| "$addFields": { | ||
| "score": {"$add": [f"${score}" for score in scores_fields]}, | ||
| } | ||
| }, | ||
| {"$sort": {"score": -1}}, | ||
| {"$limit": limit}, | ||
| ] | ||
| return final_pipeline | ||
| @staticmethod | ||
| def _score_field(search_field: str, search_field_counts: Dict[str, int]): | ||
| score_field = f"{search_field}_score" | ||
| count = search_field_counts[search_field] | ||
| if count > 1: | ||
| score_field += str(count) | ||
| return score_field | ||
| def _hybrid_search( | ||
| self, | ||
| vector_queries: Dict[str, Any], | ||
| text_queries: List[Dict[str, Any]], | ||
| filters: Dict[str, Any], | ||
| limit: int, | ||
| ): | ||
| hybrid_pipeline = [] # combined aggregate pipeline | ||
| search_field_counts = collections.defaultdict( | ||
| int | ||
| ) # stores count of calls on same search field | ||
| score_fields = [] # names given to scores of each search stage | ||
| for search_field, query in vector_queries.items(): | ||
| search_field_counts[search_field] += 1 | ||
| vector_stage = self._vector_search_stage( | ||
| query=query, | ||
| search_field=search_field, | ||
| limit=limit, | ||
| filters=filters, | ||
| ) | ||
| score_field = self._score_field(search_field, search_field_counts) | ||
| score_fields.append(score_field) | ||
| vector_pipeline = [ | ||
| vector_stage, | ||
| *self._reciprocal_rank_stage(search_field, score_field), | ||
| ] | ||
| self._add_stage_to_pipeline(hybrid_pipeline, vector_pipeline) | ||
| for kwargs in text_queries: | ||
| search_field_counts[kwargs["search_field"]] += 1 | ||
| text_stage = self._text_search_stage(**kwargs) | ||
| search_field = kwargs["search_field"] | ||
| score_field = self._score_field(search_field, search_field_counts) | ||
| score_fields.append(score_field) | ||
| reciprocal_rank_stage = self._reciprocal_rank_stage( | ||
| search_field, score_field | ||
| ) | ||
| text_pipeline = [ | ||
| text_stage, | ||
| {"$match": {"$and": filters} if filters else {}}, | ||
| {"$limit": limit}, | ||
| *reciprocal_rank_stage, | ||
| ] | ||
| self._add_stage_to_pipeline(hybrid_pipeline, text_pipeline) | ||
| hybrid_pipeline += self._final_stage(score_fields, limit) | ||
| return hybrid_pipeline | ||
| def _vector_search_stage( | ||
| self, | ||
| query: np.ndarray, | ||
| search_field: str, | ||
| limit: int, | ||
| filters: List[Dict[str, Any]] = None, | ||
| ) -> Dict[str, Any]: | ||
| search_index_name = self._get_column_db_index(search_field) | ||
| oversampling_factor = self._get_oversampling_factor(search_field) | ||
| max_candidates = self._get_max_candidates(search_field) | ||
| query = query.astype(np.float64).tolist() | ||
| stage = { | ||
| '$vectorSearch': { | ||
| 'index': search_index_name, | ||
| 'path': search_field, | ||
| 'queryVector': query, | ||
| 'numCandidates': min(limit * oversampling_factor, max_candidates), | ||
| 'limit': limit, | ||
| } | ||
| } | ||
| if filters: | ||
| stage['$vectorSearch']['filter'] = {"$and": filters} | ||
| return stage | ||
| def _text_search_stage( | ||
| self, | ||
| query: str, | ||
| search_field: str, | ||
| ) -> Dict[str, Any]: | ||
| operator = self._column_infos[search_field].config["operator"] | ||
| index = self._get_column_db_index(search_field) | ||
| return { | ||
| "$search": { | ||
| "index": index, | ||
| operator: {"query": query, "path": search_field}, | ||
| } | ||
| } | ||
| def _doc_exists(self, doc_id: str) -> bool: | ||
| """ | ||
| Checks if a given document exists in the index. | ||
| :param doc_id: The id of a document to check. | ||
| :return: True if the document exists in the index, False otherwise. | ||
| """ | ||
| doc = self._collection.find_one({"_id": doc_id}) | ||
| return bool(doc) | ||
| def _find( | ||
| self, | ||
| query: np.ndarray, | ||
| limit: int, | ||
| search_field: str = '', | ||
| ) -> _FindResult: | ||
| """Find documents in the index | ||
| :param query: query vector for KNN/ANN search. Has single axis. | ||
| :param limit: maximum number of documents to return per query | ||
| :param search_field: name of the field to search on | ||
| :return: a named tuple containing `documents` and `scores` | ||
| """ | ||
| # NOTE: in standard implementations, | ||
| # `search_field` is equal to the column name to search on | ||
| vector_search_stage = self._vector_search_stage(query, search_field, limit) | ||
| pipeline = [ | ||
| vector_search_stage, | ||
| { | ||
| '$project': self._project_fields( | ||
| extra_fields={"score": {'$meta': 'vectorSearchScore'}} | ||
| ) | ||
| }, | ||
| ] | ||
| with self._collection.aggregate(pipeline) as cursor: | ||
| documents, scores = self._mongo_to_docs(cursor) | ||
| return _FindResult(documents=documents, scores=scores) | ||
| def _find_batched( | ||
| self, queries: np.ndarray, limit: int, search_field: str = '' | ||
| ) -> _FindResultBatched: | ||
| """Find documents in the index | ||
| :param queries: query vectors for KNN/ANN search. | ||
| Has shape (batch_size, vector_dim) | ||
| :param limit: maximum number of documents to return | ||
| :param search_field: name of the field to search on | ||
| :return: a named tuple containing `documents` and `scores` | ||
| """ | ||
| docs, scores = [], [] | ||
| for query in queries: | ||
| results = self._find(query=query, search_field=search_field, limit=limit) | ||
| docs.append(results.documents) | ||
| scores.append(results.scores) | ||
| return _FindResultBatched(documents=docs, scores=scores) | ||
| def _get_column_db_index(self, column_name: str) -> Optional[str]: | ||
| """ | ||
| Retrieve the index name associated with the specified column name. | ||
| Parameters: | ||
| column_name (str): The name of the column. | ||
| Returns: | ||
| Optional[str]: The index name associated with the specified column name, or None if not found. | ||
| """ | ||
| index_name = self._column_infos[column_name].config.get("index_name") | ||
| is_vector_index = safe_issubclass( | ||
| self._column_infos[column_name].docarray_type, AbstractTensor | ||
| ) | ||
| is_text_index = safe_issubclass( | ||
| self._column_infos[column_name].docarray_type, str | ||
| ) | ||
| if index_name is None or not isinstance(index_name, str): | ||
| if is_vector_index: | ||
| raise ValueError( | ||
| f'The column {column_name} for MongoDBAtlasDocumentIndex should be associated ' | ||
| 'with an Atlas Vector Index.' | ||
| ) | ||
| elif is_text_index: | ||
| raise ValueError( | ||
| f'The column {column_name} for MongoDBAtlasDocumentIndex should be associated ' | ||
| 'with an Atlas Index.' | ||
| ) | ||
| if not (is_vector_index or is_text_index): | ||
| raise ValueError( | ||
| f'The column {column_name} for MongoDBAtlasDocumentIndex cannot be associated to an index' | ||
| ) | ||
| return index_name | ||
| def _project_fields(self, extra_fields: Dict[str, Any] = None) -> dict: | ||
| """ | ||
| Create a projection dictionary to include all fields defined in the column information. | ||
| Returns: | ||
| dict: A dictionary where each field key from the column information is mapped to the value 1, | ||
| indicating that the field should be included in the projection. | ||
| """ | ||
| fields = {key: 1 for key in self._column_infos.keys() if key != "id"} | ||
| fields["_id"] = 1 | ||
| if extra_fields: | ||
| fields.update(extra_fields) | ||
| return fields | ||
| def _filter( | ||
| self, | ||
| filter_query: Any, | ||
| limit: int, | ||
| ) -> Union[DocList, List[Dict]]: | ||
| """Find documents in the index based on a filter query | ||
| :param filter_query: the DB specific filter query to execute | ||
| :param limit: maximum number of documents to return | ||
| :return: a DocList containing the documents that match the filter query | ||
| """ | ||
| with self._collection.find(filter_query, limit=limit) as cursor: | ||
| return self._mongo_to_docs(cursor)[0] | ||
| def _filter_batched( | ||
| self, | ||
| filter_queries: Any, | ||
| limit: int, | ||
| ) -> Union[List[DocList], List[List[Dict]]]: | ||
| """Find documents in the index based on multiple filter queries. | ||
| Each query is considered individually, and results are returned per query. | ||
| :param filter_queries: the DB specific filter queries to execute | ||
| :param limit: maximum number of documents to return per query | ||
| :return: List of DocLists containing the documents that match the filter | ||
| queries | ||
| """ | ||
| return [self._filter(query, limit) for query in filter_queries] | ||
| def _text_search( | ||
| self, | ||
| query: str, | ||
| limit: int, | ||
| search_field: str = '', | ||
| ) -> _FindResult: | ||
| """Find documents in the index based on a text search query | ||
| :param query: The text to search for | ||
| :param limit: maximum number of documents to return | ||
| :param search_field: name of the field to search on | ||
| :return: a named tuple containing `documents` and `scores` | ||
| """ | ||
| text_stage = self._text_search_stage(query=query, search_field=search_field) | ||
| pipeline = [ | ||
| text_stage, | ||
| { | ||
| '$project': self._project_fields( | ||
| extra_fields={'score': {'$meta': 'searchScore'}} | ||
| ) | ||
| }, | ||
| {"$limit": limit}, | ||
| ] | ||
| with self._collection.aggregate(pipeline) as cursor: | ||
| documents, scores = self._mongo_to_docs(cursor) | ||
| return _FindResult(documents=documents, scores=scores) | ||
| def _text_search_batched( | ||
| self, | ||
| queries: Sequence[str], | ||
| limit: int, | ||
| search_field: str = '', | ||
| ) -> _FindResultBatched: | ||
| """Find documents in the index based on a text search query | ||
| :param queries: The texts to search for | ||
| :param limit: maximum number of documents to return per query | ||
| :param search_field: name of the field to search on | ||
| :return: a named tuple containing `documents` and `scores` | ||
| """ | ||
| # NOTE: in standard implementations, | ||
| # `search_field` is equal to the column name to search on | ||
| documents, scores = [], [] | ||
| for query in queries: | ||
| results = self._text_search( | ||
| query=query, search_field=search_field, limit=limit | ||
| ) | ||
| documents.append(results.documents) | ||
| scores.append(results.scores) | ||
| return _FindResultBatched(documents=documents, scores=scores) | ||
| def _filter_by_parent_id(self, id: str) -> Optional[List[str]]: | ||
| """Filter the ids of the subindex documents given id of root document. | ||
| :param id: the root document id to filter by | ||
| :return: a list of ids of the subindex documents | ||
| """ | ||
| with self._collection.find({"parent_id": id}, projection={"_id": 1}) as cursor: | ||
| return [doc["_id"] for doc in cursor] |
+70
-1
@@ -1,2 +0,17 @@ | ||
| __version__ = '0.40.1.dev1' | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| __version__ = '0.40.1' | ||
@@ -8,3 +23,57 @@ import logging | ||
| from docarray.utils._internal.misc import _get_path_from_docarray_root_level | ||
| from docarray.utils._internal.pydantic import is_pydantic_v2 | ||
| def unpickle_doclist(doc_type, b): | ||
| return DocList[doc_type].from_bytes(b, protocol="protobuf") | ||
| def unpickle_docvec(doc_type, tensor_type, b): | ||
| return DocVec[doc_type].from_bytes(b, protocol="protobuf", tensor_type=tensor_type) | ||
| if is_pydantic_v2: | ||
| # Register the pickle functions | ||
| def register_serializers(): | ||
| import copyreg | ||
| from functools import partial | ||
| unpickle_doc_fn = partial(BaseDoc.from_bytes, protocol="protobuf") | ||
| def pickle_doc(doc): | ||
| b = doc.to_bytes(protocol='protobuf') | ||
| return unpickle_doc_fn, (doc.__class__, b) | ||
| # Register BaseDoc serialization | ||
| copyreg.pickle(BaseDoc, pickle_doc) | ||
| # For DocList, we need to hook into __reduce__ since it's a generic | ||
| def pickle_doclist(doc_list): | ||
| b = doc_list.to_bytes(protocol='protobuf') | ||
| doc_type = doc_list.doc_type | ||
| return unpickle_doclist, (doc_type, b) | ||
| # Replace DocList.__reduce__ with a method that returns the correct format | ||
| def doclist_reduce(self): | ||
| return pickle_doclist(self) | ||
| DocList.__reduce__ = doclist_reduce | ||
| # For DocVec, we need to hook into __reduce__ since it's a generic | ||
| def pickle_docvec(doc_vec): | ||
| b = doc_vec.to_bytes(protocol='protobuf') | ||
| doc_type = doc_vec.doc_type | ||
| tensor_type = doc_vec.tensor_type | ||
| return unpickle_docvec, (doc_type, tensor_type, b) | ||
| # Replace DocList.__reduce__ with a method that returns the correct format | ||
| def docvec_reduce(self): | ||
| return pickle_docvec(self) | ||
| DocVec.__reduce__ = docvec_reduce | ||
| register_serializers() | ||
| __all__ = ['BaseDoc', 'DocList', 'DocVec'] | ||
@@ -11,0 +80,0 @@ |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from docarray.array.any_array import AnyDocArray | ||
@@ -2,0 +17,0 @@ from docarray.array.doc_list.doc_list import DocList |
@@ -28,2 +28,3 @@ import sys | ||
| from docarray.utils._internal._typing import change_cls_name, safe_issubclass | ||
| from docarray.utils._internal.pydantic import is_pydantic_v2 | ||
@@ -77,5 +78,16 @@ if TYPE_CHECKING: | ||
| class _DocArrayTyped(cls): # type: ignore | ||
| doc_type: Type[BaseDocWithoutId] = cast(Type[BaseDocWithoutId], item) | ||
| if not is_pydantic_v2: | ||
| class _DocArrayTyped(cls): # type: ignore | ||
| doc_type: Type[BaseDocWithoutId] = cast( | ||
| Type[BaseDocWithoutId], item | ||
| ) | ||
| else: | ||
| class _DocArrayTyped(cls, Generic[T_doc]): # type: ignore | ||
| doc_type: Type[BaseDocWithoutId] = cast( | ||
| Type[BaseDocWithoutId], item | ||
| ) | ||
| for field in _DocArrayTyped.doc_type._docarray_fields().keys(): | ||
@@ -104,10 +116,20 @@ | ||
| # The global scope and qualname need to refer to this class a unique name. | ||
| # Otherwise, creating another _DocArrayTyped will overwrite this one. | ||
| change_cls_name( | ||
| _DocArrayTyped, f'{cls.__name__}[{item.__name__}]', globals() | ||
| ) | ||
| # # The global scope and qualname need to refer to this class a unique name. | ||
| # # Otherwise, creating another _DocArrayTyped will overwrite this one. | ||
| if not is_pydantic_v2: | ||
| change_cls_name( | ||
| _DocArrayTyped, f'{cls.__name__}[{item.__name__}]', globals() | ||
| ) | ||
| cls.__typed_da__[cls][item] = _DocArrayTyped | ||
| cls.__typed_da__[cls][item] = _DocArrayTyped | ||
| else: | ||
| change_cls_name(_DocArrayTyped, f'{cls.__name__}', globals()) | ||
| if sys.version_info < (3, 12): | ||
| cls.__typed_da__[cls][item] = Generic.__class_getitem__.__func__( | ||
| _DocArrayTyped, item | ||
| ) # type: ignore | ||
| # this do nothing that checking that item is valid type var or str | ||
| # Keep the approach in #1147 to be compatible with lower versions of Python. | ||
| else: | ||
| cls.__typed_da__[cls][item] = GenericAlias(_DocArrayTyped, item) # type: ignore | ||
| return cls.__typed_da__[cls][item] | ||
@@ -114,0 +136,0 @@ |
@@ -15,2 +15,3 @@ import io | ||
| overload, | ||
| Callable, | ||
| ) | ||
@@ -32,3 +33,2 @@ | ||
| if is_pydantic_v2: | ||
| from pydantic import GetCoreSchemaHandler | ||
| from pydantic_core import core_schema | ||
@@ -50,6 +50,3 @@ | ||
| class DocList( | ||
| ListAdvancedIndexing[T_doc], | ||
| PushPullMixin, | ||
| IOMixinDocList, | ||
| AnyDocArray[T_doc], | ||
| ListAdvancedIndexing[T_doc], PushPullMixin, IOMixinDocList, AnyDocArray[T_doc] | ||
| ): | ||
@@ -363,6 +360,18 @@ """ | ||
| def __get_pydantic_core_schema__( | ||
| cls, _source_type: Any, _handler: GetCoreSchemaHandler | ||
| cls, source: Any, handler: Callable[[Any], core_schema.CoreSchema] | ||
| ) -> core_schema.CoreSchema: | ||
| return core_schema.general_plain_validator_function( | ||
| cls.validate, | ||
| instance_schema = core_schema.is_instance_schema(cls) | ||
| args = getattr(source, '__args__', None) | ||
| if args: | ||
| sequence_t_schema = handler(Sequence[args[0]]) | ||
| else: | ||
| sequence_t_schema = handler(Sequence) | ||
| def validate_fn(v, info): | ||
| # input has already been validated | ||
| return cls(v, validate_input_docs=False) | ||
| non_instance_schema = core_schema.with_info_after_validator_function( | ||
| validate_fn, sequence_t_schema | ||
| ) | ||
| return core_schema.union_schema([instance_schema, non_instance_schema]) |
@@ -259,3 +259,2 @@ import base64 | ||
| """ | ||
| with file_ctx or io.BytesIO() as bf: | ||
@@ -262,0 +261,0 @@ self._write_bytes( |
@@ -201,3 +201,3 @@ from collections import ChainMap | ||
| if isinstance(field_type, type): | ||
| if isinstance(field_type, type) or safe_issubclass(field_type, AnyDocArray): | ||
| if tf_available and safe_issubclass(field_type, TensorFlowTensor): | ||
@@ -339,3 +339,5 @@ # tf.Tensor does not allow item assignment, therefore the | ||
| raise ValueError(f'DocVec[value.doc_type] is not compatible with {cls}') | ||
| elif isinstance(value, DocList.__class_getitem__(cls.doc_type)): | ||
| elif not is_pydantic_v2 and isinstance( | ||
| value, DocList.__class_getitem__(cls.doc_type) | ||
| ): | ||
| return cast(T, value.to_doc_vec()) | ||
@@ -342,0 +344,0 @@ elif isinstance(value, Sequence): |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from docarray.base_doc.any_doc import AnyDoc | ||
@@ -2,0 +17,0 @@ from docarray.base_doc.base_node import BaseNode |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from abc import ABC, abstractmethod | ||
@@ -2,0 +17,0 @@ from typing import TYPE_CHECKING, TypeVar, Optional, Type |
@@ -329,4 +329,9 @@ import os | ||
| type_ = self._get_field_annotation(field) | ||
| if isinstance(type_, type) and issubclass(type_, AnyDocArray): | ||
| doclist_exclude_fields.append(field) | ||
| if is_pydantic_v2: | ||
| # Conservative when touching pydantic v1 logic | ||
| if safe_issubclass(type_, AnyDocArray): | ||
| doclist_exclude_fields.append(field) | ||
| else: | ||
| if isinstance(type_, type) and safe_issubclass(type_, AnyDocArray): | ||
| doclist_exclude_fields.append(field) | ||
@@ -484,3 +489,2 @@ original_exclude = exclude | ||
| def _model_dump(doc): | ||
| ( | ||
@@ -487,0 +491,0 @@ exclude_, |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from typing import TYPE_CHECKING, Any | ||
@@ -2,0 +17,0 @@ |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from docarray.base_doc.mixins.io import IOMixin | ||
@@ -2,0 +17,0 @@ from docarray.base_doc.mixins.update import UpdateMixin |
@@ -113,5 +113,3 @@ from abc import abstractmethod | ||
| if isinstance(field_type, type) and safe_issubclass( | ||
| field_type, DocList | ||
| ): | ||
| if safe_issubclass(field_type, DocList): | ||
| nested_docarray_fields.append(field_name) | ||
@@ -118,0 +116,0 @@ else: |
@@ -0,3 +1,18 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from docarray.computation.abstract_comp_backend import AbstractComputationalBackend | ||
| __all__ = ['AbstractComputationalBackend'] |
@@ -0,3 +1,18 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from docarray.data.torch_dataset import MultiModalDataset | ||
| __all__ = ['MultiModalDataset'] |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from typing_extensions import TYPE_CHECKING | ||
@@ -2,0 +17,0 @@ |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from docarray.documents.audio import AudioDoc | ||
@@ -2,0 +17,0 @@ from docarray.documents.image import ImageDoc |
@@ -0,3 +1,18 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from docarray.documents.legacy.legacy_document import LegacyDocument | ||
| __all__ = ['LegacyDocument'] |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from __future__ import annotations | ||
@@ -2,0 +17,0 @@ |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from docarray.documents.mesh.mesh_3d import Mesh3D | ||
@@ -2,0 +17,0 @@ from docarray.documents.mesh.vertices_and_faces import VerticesAndFaces |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from typing import TYPE_CHECKING, Any, Type, TypeVar, Union | ||
@@ -2,0 +17,0 @@ |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from docarray.documents.point_cloud.point_cloud_3d import PointCloud3D | ||
@@ -2,0 +17,0 @@ from docarray.documents.point_cloud.points_and_colors import PointsAndColors |
@@ -0,2 +1,17 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| class UnusableObjectError(NotImplementedError): | ||
| ... |
@@ -16,2 +16,5 @@ import types | ||
| from docarray.index.backends.milvus import MilvusDocumentIndex # noqa: F401 | ||
| from docarray.index.backends.mongodb_atlas import ( # noqa: F401 | ||
| MongoDBAtlasDocumentIndex, | ||
| ) | ||
| from docarray.index.backends.qdrant import QdrantDocumentIndex # noqa: F401 | ||
@@ -30,2 +33,3 @@ from docarray.index.backends.redis import RedisDocumentIndex # noqa: F401 | ||
| 'MilvusDocumentIndex', | ||
| 'MongoDBAtlasDocumentIndex', | ||
| ] | ||
@@ -60,2 +64,5 @@ | ||
| import docarray.index.backends.redis as lib | ||
| elif name == 'MongoDBAtlasDocumentIndex': | ||
| import_library('pymongo', raise_error=True) | ||
| import docarray.index.backends.mongodb_atlas as lib | ||
| else: | ||
@@ -62,0 +69,0 @@ raise ImportError( |
@@ -355,8 +355,8 @@ # mypy: ignore-errors | ||
| for type in elastic_py_types.keys(): | ||
| if safe_issubclass(python_type, type): | ||
| for t in elastic_py_types.keys(): | ||
| if safe_issubclass(python_type, t): | ||
| self._logger.info( | ||
| f'Mapped Python type {python_type} to database type "{elastic_py_types[type]}"' | ||
| f'Mapped Python type {python_type} to database type "{elastic_py_types[t]}"' | ||
| ) | ||
| return elastic_py_types[type] | ||
| return elastic_py_types[t] | ||
@@ -363,0 +363,0 @@ err_msg = f'Unsupported column type for {type(self)}: {python_type}' |
@@ -103,4 +103,4 @@ import copy | ||
| for info in self._column_infos.values(): | ||
| for type in [list, np.ndarray, AbstractTensor]: | ||
| if safe_issubclass(info.docarray_type, type) and info.config.get( | ||
| for t in [list, np.ndarray, AbstractTensor]: | ||
| if safe_issubclass(info.docarray_type, t) and info.config.get( | ||
| 'is_embedding', False | ||
@@ -107,0 +107,0 @@ ): |
@@ -1,2 +0,2 @@ | ||
| from typing import Any, Dict, List, Tuple, Type, cast | ||
| from typing import Any, Dict, List, Tuple, Type, cast, Set | ||
@@ -23,2 +23,39 @@ from docarray import BaseDoc, DocList | ||
| def _collect_query_required_args(method_name: str, required_args: Set[str] = None): | ||
| """ | ||
| Returns a function that ensures required keyword arguments are provided. | ||
| :param method_name: The name of the method for which the required arguments are being checked. | ||
| :type method_name: str | ||
| :param required_args: A set containing the names of required keyword arguments. Defaults to None. | ||
| :type required_args: Optional[Set[str]] | ||
| :return: A function that checks for required keyword arguments before executing the specified method. | ||
| Raises ValueError if positional arguments are provided. | ||
| Raises TypeError if any required keyword argument is missing. | ||
| :rtype: Callable | ||
| """ | ||
| if required_args is None: | ||
| required_args = set() | ||
| def inner(self, *args, **kwargs): | ||
| if args: | ||
| raise ValueError( | ||
| f"Positional arguments are not supported for " | ||
| f"`{type(self)}.{method_name}`. " | ||
| f"Use keyword arguments instead." | ||
| ) | ||
| missing_args = required_args - set(kwargs.keys()) | ||
| if missing_args: | ||
| raise ValueError( | ||
| f"`{type(self)}.{method_name}` is missing required argument(s): {', '.join(missing_args)}" | ||
| ) | ||
| updated_query = self._queries + [(method_name, kwargs)] | ||
| return type(self)(updated_query) | ||
| return inner | ||
| def _execute_find_and_filter_query( | ||
@@ -25,0 +62,0 @@ doc_index: BaseDocIndex, query: List[Tuple[str, Dict]], reverse_order: bool = False |
@@ -644,3 +644,3 @@ import glob | ||
| search_field: str = '', | ||
| hashed_ids: Optional[Set[str]] = None, | ||
| hashed_ids: Optional[Set[int]] = None, | ||
| ) -> _FindResultBatched: | ||
@@ -647,0 +647,0 @@ """ |
@@ -195,3 +195,3 @@ from collections import defaultdict | ||
| if issubclass(python_type, ID): | ||
| if safe_issubclass(python_type, ID): | ||
| return DataType.VARCHAR | ||
@@ -669,3 +669,3 @@ | ||
| fields = search_field.split('__') | ||
| if issubclass(self._schema._get_field_annotation(fields[0]), AnyDocArray): # type: ignore | ||
| if safe_issubclass(self._schema._get_field_annotation(fields[0]), AnyDocArray): # type: ignore | ||
| return self._subindices[fields[0]].find_batched( | ||
@@ -672,0 +672,0 @@ queries, |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from typing import TYPE_CHECKING | ||
@@ -2,0 +17,0 @@ |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from abc import ABC, abstractmethod | ||
@@ -2,0 +17,0 @@ from typing import Dict, Iterator, List, Type |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| class ConcurrentPushException(Exception): | ||
@@ -2,0 +17,0 @@ """Exception raised when a concurrent push is detected.""" |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| import logging | ||
@@ -2,0 +17,0 @@ from pathlib import Path |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from abc import abstractmethod | ||
@@ -2,0 +17,0 @@ from typing import TYPE_CHECKING, Any, Type, TypeVar |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from docarray.typing.bytes.audio_bytes import AudioBytes | ||
@@ -2,0 +17,0 @@ from docarray.typing.bytes.image_bytes import ImageBytes |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from abc import abstractmethod | ||
@@ -50,5 +65,5 @@ from typing import TYPE_CHECKING, Any, Type, TypeVar | ||
| ) -> 'core_schema.CoreSchema': | ||
| return core_schema.general_after_validator_function( | ||
| return core_schema.with_info_after_validator_function( | ||
| cls.validate, | ||
| core_schema.bytes_schema(), | ||
| ) |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from typing import TYPE_CHECKING, Any, Type, TypeVar, Union | ||
@@ -65,3 +80,3 @@ from uuid import UUID | ||
| ) -> core_schema.CoreSchema: | ||
| return core_schema.general_plain_validator_function( | ||
| return core_schema.with_info_plain_validator_function( | ||
| cls.validate, | ||
@@ -68,0 +83,0 @@ ) |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from typing import Callable, Dict, Type, TypeVar | ||
@@ -2,0 +17,0 @@ |
@@ -398,6 +398,6 @@ import abc | ||
| ) -> core_schema.CoreSchema: | ||
| return core_schema.general_plain_validator_function( | ||
| return core_schema.with_info_plain_validator_function( | ||
| cls.validate, | ||
| serialization=core_schema.plain_serializer_function_ser_schema( | ||
| function=orjson_dumps, | ||
| function=lambda x: x._docarray_to_ndarray().tolist(), | ||
| return_schema=handler.generate_schema(bytes), | ||
@@ -404,0 +404,0 @@ when_used="json-unless-none", |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from typing import TypeVar | ||
@@ -2,0 +17,0 @@ |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from abc import ABC | ||
@@ -2,0 +17,0 @@ from typing import Any, Optional, Tuple, Type |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from docarray.typing.proto_register import _register_proto | ||
@@ -2,0 +17,0 @@ from docarray.typing.tensor.embedding.embedding_mixin import EmbeddingMixin |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from docarray.typing.proto_register import _register_proto | ||
@@ -2,0 +17,0 @@ from docarray.typing.tensor.image.abstract_image_tensor import AbstractImageTensor |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from typing import TypeVar | ||
@@ -2,0 +17,0 @@ |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from typing import TypeVar | ||
@@ -2,0 +17,0 @@ |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from typing import TYPE_CHECKING, Any, List, Tuple, Type, TypeVar, Union | ||
@@ -2,0 +17,0 @@ |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from docarray.typing.url.any_url import AnyUrl | ||
@@ -2,0 +17,0 @@ from docarray.typing.url.audio_url import AudioUrl |
@@ -59,3 +59,3 @@ import mimetypes | ||
| ) -> core_schema.CoreSchema: | ||
| return core_schema.general_after_validator_function( | ||
| return core_schema.with_info_after_validator_function( | ||
| cls._docarray_validate, | ||
@@ -62,0 +62,0 @@ core_schema.str_schema(), |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| import warnings | ||
@@ -2,0 +17,0 @@ from typing import List, Optional, Tuple, TypeVar |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| import warnings | ||
@@ -2,0 +17,0 @@ from typing import TYPE_CHECKING, List, Optional, Tuple, TypeVar |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| TEXT_MIMETYPE = 'text' | ||
@@ -2,0 +17,0 @@ AUDIO_MIMETYPE = 'audio' |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from typing import List, Optional, TypeVar | ||
@@ -2,0 +17,0 @@ |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from docarray.typing.url.url_3d.mesh_url import Mesh3DUrl | ||
@@ -2,0 +17,0 @@ from docarray.typing.url.url_3d.point_cloud_url import PointCloud3DUrl |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from typing import TYPE_CHECKING, Any, Dict, List, Optional, TypeVar | ||
@@ -2,0 +17,0 @@ |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from abc import ABC | ||
@@ -2,0 +17,0 @@ from typing import TYPE_CHECKING, Any, Dict, Optional, TypeVar, Union |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from typing import Any, ForwardRef, Optional, Union | ||
@@ -49,4 +64,7 @@ | ||
| """ | ||
| origin = get_origin(x) | ||
| if origin: # If x is a generic type like DocList[SomeDoc], get its origin | ||
| x = origin | ||
| if ( | ||
| (get_origin(x) in (list, tuple, dict, set, Union)) | ||
| (origin in (list, tuple, dict, set, Union)) | ||
| or is_typevar(x) | ||
@@ -57,2 +75,3 @@ or (type(x) == ForwardRef) | ||
| return False | ||
| return issubclass(x, a_tuple) | ||
| return isinstance(x, type) and issubclass(x, a_tuple) |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| import os | ||
@@ -2,0 +17,0 @@ from functools import lru_cache |
@@ -5,3 +5,3 @@ import importlib | ||
| import types | ||
| from typing import Any, Optional, Literal | ||
| from typing import Any, Literal, Optional | ||
@@ -54,2 +54,3 @@ import numpy as np | ||
| 'pymilvus': '"docarray[milvus]"', | ||
| "pymongo": '"docarray[mongo]"', | ||
| } | ||
@@ -56,0 +57,0 @@ |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from typing import Optional | ||
@@ -2,0 +17,0 @@ |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| import pydantic | ||
@@ -2,0 +17,0 @@ |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from typing import Any, Dict, List, Optional, Union | ||
@@ -2,0 +17,0 @@ |
@@ -57,4 +57,5 @@ from typing import Any, Dict, List, Optional, Type, Union | ||
| fields_copy = copy.deepcopy(model.__fields__) | ||
| annotations_copy = copy.deepcopy(model.__annotations__) | ||
| copy_model = copy.deepcopy(model) | ||
| fields_copy = copy_model.__fields__ | ||
| annotations_copy = copy_model.__annotations__ | ||
| for field_name, field in annotations_copy.items(): | ||
@@ -69,5 +70,6 @@ if field_name not in fields_copy: | ||
| try: | ||
| if safe_issubclass(field, DocList): | ||
| if safe_issubclass(field, DocList) and not is_pydantic_v2: | ||
| t: Any = field.doc_type | ||
| fields[field_name] = (List[t], field_info) | ||
| t_aux = create_pure_python_type_model(t) | ||
| fields[field_name] = (List[t_aux], field_info) | ||
| else: | ||
@@ -78,3 +80,5 @@ fields[field_name] = (field, field_info) | ||
| return create_model(model.__name__, __base__=model, __doc__=model.__doc__, **fields) | ||
| return create_model( | ||
| copy_model.__name__, __base__=copy_model, __doc__=copy_model.__doc__, **fields | ||
| ) | ||
@@ -85,3 +89,2 @@ | ||
| field_name: str, | ||
| root_schema: Dict[str, Any], | ||
| cached_models: Dict[str, Any], | ||
@@ -96,3 +99,2 @@ is_tensor: bool = False, | ||
| :param field_name: The name of the field to be created | ||
| :param root_schema: The schema of the root object, important to get references | ||
| :param cached_models: Parameter used when this method is called recursively to reuse partial nested classes. | ||
@@ -117,3 +119,3 @@ :param is_tensor: Boolean used to tell between tensor and list | ||
| create_base_doc_from_schema( | ||
| root_schema['definitions'][ref_name], | ||
| definitions[ref_name], | ||
| ref_name, | ||
@@ -129,3 +131,2 @@ cached_models=cached_models, | ||
| field_name, | ||
| root_schema=root_schema, | ||
| cached_models=cached_models, | ||
@@ -149,3 +150,5 @@ is_tensor=tensor_shape is not None, | ||
| elif field_type == 'number': | ||
| if num_recursions <= 1: | ||
| if num_recursions == 0: | ||
| ret = float | ||
| elif num_recursions == 1: | ||
| # This is a hack because AnyTensor is more generic than a simple List and it comes as simple List | ||
@@ -168,3 +171,6 @@ if is_tensor: | ||
| additional_props = field_schema['additionalProperties'] | ||
| if additional_props.get('type') == 'object': | ||
| if ( | ||
| isinstance(additional_props, dict) | ||
| and additional_props.get('type') == 'object' | ||
| ): | ||
| doc_type = create_base_doc_from_schema( | ||
@@ -210,3 +216,2 @@ additional_props, field_name, cached_models=cached_models | ||
| field_name=field_name, | ||
| root_schema=root_schema, | ||
| cached_models=cached_models, | ||
@@ -272,2 +277,20 @@ is_tensor=tensor_shape is not None, | ||
| """ | ||
| def clean_refs(value): | ||
| """Recursively remove $ref keys and #/$defs values from a data structure.""" | ||
| if isinstance(value, dict): | ||
| # Create a new dictionary without $ref keys and without values containing #/$defs | ||
| cleaned_dict = {} | ||
| for k, v in value.items(): | ||
| if k == '$ref': | ||
| continue | ||
| cleaned_dict[k] = clean_refs(v) | ||
| return cleaned_dict | ||
| elif isinstance(value, list): | ||
| # Process each item in the list | ||
| return [clean_refs(item) for item in value] | ||
| else: | ||
| # Return primitive values as-is | ||
| return value | ||
| if not definitions: | ||
@@ -286,6 +309,6 @@ definitions = ( | ||
| has_id = True | ||
| # Get the field type | ||
| field_type = _get_field_annotation_from_schema( | ||
| field_schema=field_schema, | ||
| field_name=field_name, | ||
| root_schema=schema, | ||
| cached_models=cached_models, | ||
@@ -306,6 +329,18 @@ is_tensor=False, | ||
| for k, v in field_schema.items(): | ||
| if field_name == 'id': | ||
| # Skip default_factory for Optional fields and use None | ||
| field_kwargs['default'] = None | ||
| if k in FieldInfo.__slots__: | ||
| field_kwargs[k] = v | ||
| else: | ||
| field_json_schema_extra[k] = v | ||
| if k != '$ref': | ||
| if isinstance(v, dict): | ||
| cleaned_v = clean_refs(v) | ||
| if ( | ||
| cleaned_v | ||
| ): # Only add if there's something left after cleaning | ||
| field_json_schema_extra[k] = cleaned_v | ||
| else: | ||
| field_json_schema_extra[k] = v | ||
| fields[field_name] = ( | ||
@@ -312,0 +347,0 @@ field_type, |
@@ -0,1 +1,16 @@ | ||
| # Licensed to the LF AI & Data foundation under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| __all__ = ['reduce', 'reduce_all'] | ||
@@ -2,0 +17,0 @@ |
+10
-5
| Metadata-Version: 2.1 | ||
| Name: docarray | ||
| Version: 0.40.1.dev1 | ||
| Version: 0.41.0 | ||
| Summary: The data structure for multimodal data | ||
@@ -46,2 +46,3 @@ Home-page: https://docs.docarray.org/ | ||
| Provides-Extra: milvus | ||
| Provides-Extra: mongo | ||
| Provides-Extra: pandas | ||
@@ -72,2 +73,3 @@ Provides-Extra: proto | ||
| Requires-Dist: pymilvus (>=2.2.12,<3.0.0) ; extra == "milvus" | ||
| Requires-Dist: pymongo (>=4.6.2) ; extra == "mongo" | ||
| Requires-Dist: qdrant-client (>=1.4.0) ; (python_version < "3.12") and (extra == "qdrant") | ||
@@ -105,3 +107,3 @@ Requires-Dist: redis (>=4.6.0,<5.0.0) ; extra == "redis" | ||
| DocArray is a Python library expertly crafted for the [representation](#represent), [transmission](#send), [storage](#store), and [retrieval](#retrieve) of multimodal data. Tailored for the development of multimodal AI applications, its design guarantees seamless integration with the extensive Python and machine learning ecosystems. As of January 2022, DocArray is openly distributed under the [Apache License 2.0](https://github.com/docarray/docarray/blob/main/LICENSE) and currently enjoys the status of a sandbox project within the [LF AI & Data Foundation](https://lfaidata.foundation/). | ||
| DocArray is a Python library expertly crafted for the [representation](#represent), [transmission](#send), [storage](#store), and [retrieval](#retrieve) of multimodal data. Tailored for the development of multimodal AI applications, its design guarantees seamless integration with the extensive Python and machine learning ecosystems. As of January 2022, DocArray is openly distributed under the [Apache License 2.0](https://github.com/docarray/docarray/blob/main/LICENSE.md) and currently enjoys the status of a sandbox project within the [LF AI & Data Foundation](https://lfaidata.foundation/). | ||
@@ -112,3 +114,3 @@ | ||
| - :zap: Based on **[Pydantic](https://github.com/pydantic/pydantic)**, and instantly compatible with web and microservice frameworks like **[FastAPI](https://github.com/tiangolo/fastapi/)** and **[Jina](https://github.com/jina-ai/jina/)**. | ||
| - :package: Provides support for vector databases such as **[Weaviate](https://weaviate.io/), [Qdrant](https://qdrant.tech/), [ElasticSearch](https://www.elastic.co/de/elasticsearch/), [Redis](https://redis.io/)**, and **[HNSWLib](https://github.com/nmslib/hnswlib)**. | ||
| - :package: Provides support for vector databases such as **[Weaviate](https://weaviate.io/), [Qdrant](https://qdrant.tech/), [ElasticSearch](https://www.elastic.co/de/elasticsearch/), **[Redis](https://redis.io/)**, **[Mongo Atlas](https://www.mongodb.com/)**, and **[HNSWLib](https://github.com/nmslib/hnswlib)**. | ||
| - :chains: Allows data transmission as JSON over **HTTP** or as **[Protobuf](https://protobuf.dev/)** over **[gRPC](https://grpc.io/)**. | ||
@@ -441,3 +443,3 @@ | ||
| Currently, Document Indexes support **[Weaviate](https://weaviate.io/)**, **[Qdrant](https://qdrant.tech/)**, **[ElasticSearch](https://www.elastic.co/)**, **[Redis](https://redis.io/)**, and **[HNSWLib](https://github.com/nmslib/hnswlib)**, with more to come! | ||
| Currently, Document Indexes support **[Weaviate](https://weaviate.io/)**, **[Qdrant](https://qdrant.tech/)**, **[ElasticSearch](https://www.elastic.co/)**, **[Redis](https://redis.io/)**, **[Mongo Atlas](https://www.mongodb.com/)**, and **[HNSWLib](https://github.com/nmslib/hnswlib)**, with more to come! | ||
@@ -513,3 +515,3 @@ The Document Index interface lets you index and retrieve Documents from multiple vector databases, all with the same user interface. | ||
| For now, Document Indexes support **[Weaviate](https://weaviate.io/)**, **[Qdrant](https://qdrant.tech/)**, **[ElasticSearch](https://www.elastic.co/)**, **[Redis](https://redis.io/)**, Exact Nearest Neighbour search and **[HNSWLib](https://github.com/nmslib/hnswlib)**, with more to come. | ||
| For now, Document Indexes support **[Weaviate](https://weaviate.io/)**, **[Qdrant](https://qdrant.tech/)**, **[ElasticSearch](https://www.elastic.co/)**, **[Redis](https://redis.io/)**, **[Mongo Atlas](https://www.mongodb.com/)**, Exact Nearest Neighbour search and **[HNSWLib](https://github.com/nmslib/hnswlib)**, with more to come. | ||
@@ -937,2 +939,3 @@ </details> | ||
| - [HNSWlib](https://github.com/nmslib/hnswlib) as a local-first ANN alternative | ||
| - [Mongo Atlas](https://www.mongodb.com/) | ||
@@ -968,2 +971,3 @@ An integration of [OpenSearch](https://opensearch.org/) is currently in progress. | ||
| # Define a document schema | ||
@@ -998,2 +1002,3 @@ class MovieDoc(BaseDoc): | ||
| RedisDocumentIndex, | ||
| MongoDBAtlasDocumentIndex, | ||
| ) | ||
@@ -1000,0 +1005,0 @@ |
+5
-2
| [tool.poetry] | ||
| name = "docarray" | ||
| version = '0.40.1.dev1' | ||
| version = '0.41.0' | ||
| description='The data structure for multimodal data' | ||
@@ -65,2 +65,3 @@ readme = 'README.md' | ||
| pyepsilla = {version = ">=0.2.3", optional = true} | ||
| pymongo = {version = ">=4.6.2", optional = true} | ||
@@ -86,2 +87,3 @@ [tool.poetry.extras] | ||
| epsilla = ["pyepsilla"] | ||
| mongo = ["pymongo"] | ||
@@ -168,3 +170,4 @@ # all | ||
| "elasticv8: marks test that run with ElasticSearch v8", | ||
| "jac: need to have access to jac cloud" | ||
| "jac: need to have access to jac cloud", | ||
| "atlas: mark tests using MongoDB Atlas", | ||
| ] |
+7
-4
@@ -19,3 +19,3 @@ <p align="center"> | ||
| DocArray is a Python library expertly crafted for the [representation](#represent), [transmission](#send), [storage](#store), and [retrieval](#retrieve) of multimodal data. Tailored for the development of multimodal AI applications, its design guarantees seamless integration with the extensive Python and machine learning ecosystems. As of January 2022, DocArray is openly distributed under the [Apache License 2.0](https://github.com/docarray/docarray/blob/main/LICENSE) and currently enjoys the status of a sandbox project within the [LF AI & Data Foundation](https://lfaidata.foundation/). | ||
| DocArray is a Python library expertly crafted for the [representation](#represent), [transmission](#send), [storage](#store), and [retrieval](#retrieve) of multimodal data. Tailored for the development of multimodal AI applications, its design guarantees seamless integration with the extensive Python and machine learning ecosystems. As of January 2022, DocArray is openly distributed under the [Apache License 2.0](https://github.com/docarray/docarray/blob/main/LICENSE.md) and currently enjoys the status of a sandbox project within the [LF AI & Data Foundation](https://lfaidata.foundation/). | ||
@@ -26,3 +26,3 @@ | ||
| - :zap: Based on **[Pydantic](https://github.com/pydantic/pydantic)**, and instantly compatible with web and microservice frameworks like **[FastAPI](https://github.com/tiangolo/fastapi/)** and **[Jina](https://github.com/jina-ai/jina/)**. | ||
| - :package: Provides support for vector databases such as **[Weaviate](https://weaviate.io/), [Qdrant](https://qdrant.tech/), [ElasticSearch](https://www.elastic.co/de/elasticsearch/), [Redis](https://redis.io/)**, and **[HNSWLib](https://github.com/nmslib/hnswlib)**. | ||
| - :package: Provides support for vector databases such as **[Weaviate](https://weaviate.io/), [Qdrant](https://qdrant.tech/), [ElasticSearch](https://www.elastic.co/de/elasticsearch/), **[Redis](https://redis.io/)**, **[Mongo Atlas](https://www.mongodb.com/)**, and **[HNSWLib](https://github.com/nmslib/hnswlib)**. | ||
| - :chains: Allows data transmission as JSON over **HTTP** or as **[Protobuf](https://protobuf.dev/)** over **[gRPC](https://grpc.io/)**. | ||
@@ -355,3 +355,3 @@ | ||
| Currently, Document Indexes support **[Weaviate](https://weaviate.io/)**, **[Qdrant](https://qdrant.tech/)**, **[ElasticSearch](https://www.elastic.co/)**, **[Redis](https://redis.io/)**, and **[HNSWLib](https://github.com/nmslib/hnswlib)**, with more to come! | ||
| Currently, Document Indexes support **[Weaviate](https://weaviate.io/)**, **[Qdrant](https://qdrant.tech/)**, **[ElasticSearch](https://www.elastic.co/)**, **[Redis](https://redis.io/)**, **[Mongo Atlas](https://www.mongodb.com/)**, and **[HNSWLib](https://github.com/nmslib/hnswlib)**, with more to come! | ||
@@ -427,3 +427,3 @@ The Document Index interface lets you index and retrieve Documents from multiple vector databases, all with the same user interface. | ||
| For now, Document Indexes support **[Weaviate](https://weaviate.io/)**, **[Qdrant](https://qdrant.tech/)**, **[ElasticSearch](https://www.elastic.co/)**, **[Redis](https://redis.io/)**, Exact Nearest Neighbour search and **[HNSWLib](https://github.com/nmslib/hnswlib)**, with more to come. | ||
| For now, Document Indexes support **[Weaviate](https://weaviate.io/)**, **[Qdrant](https://qdrant.tech/)**, **[ElasticSearch](https://www.elastic.co/)**, **[Redis](https://redis.io/)**, **[Mongo Atlas](https://www.mongodb.com/)**, Exact Nearest Neighbour search and **[HNSWLib](https://github.com/nmslib/hnswlib)**, with more to come. | ||
@@ -851,2 +851,3 @@ </details> | ||
| - [HNSWlib](https://github.com/nmslib/hnswlib) as a local-first ANN alternative | ||
| - [Mongo Atlas](https://www.mongodb.com/) | ||
@@ -882,2 +883,3 @@ An integration of [OpenSearch](https://opensearch.org/) is currently in progress. | ||
| # Define a document schema | ||
@@ -912,2 +914,3 @@ class MovieDoc(BaseDoc): | ||
| RedisDocumentIndex, | ||
| MongoDBAtlasDocumentIndex, | ||
| ) | ||
@@ -914,0 +917,0 @@ |
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
1016079
8.51%153
0.66%21561
8.26%