item-matching
Advanced tools
+1
-1
| Metadata-Version: 2.4 | ||
| Name: item_matching | ||
| Version: 0.0.102 | ||
| Version: 0.0.104 | ||
| Summary: A name matching package | ||
@@ -5,0 +5,0 @@ Project-URL: Homepage, https://github.com/kevinkhang2909/item_matching |
+1
-1
@@ -7,3 +7,3 @@ [build-system] | ||
| name = "item_matching" | ||
| version = "0.0.102" | ||
| version = "0.0.104" | ||
| authors = [ | ||
@@ -10,0 +10,0 @@ { name="Kevin Khang", email="kevinkhang2909@gmail.com" }, |
@@ -15,3 +15,3 @@ from PIL import Image | ||
| from FlagEmbedding import BGEM3FlagModel | ||
| from transformers import Dinov2WithRegistersModel, AutoModel | ||
| from transformers import Dinov2WithRegistersModel, SiglipVisionModel, SiglipConfig | ||
| from .func import _create_folder | ||
@@ -74,3 +74,3 @@ | ||
| img_model = ( | ||
| AutoModel.from_pretrained( | ||
| SiglipVisionModel.from_pretrained( | ||
| pretrain_name, | ||
@@ -82,2 +82,3 @@ torch_dtype=torch.bfloat16, | ||
| ) | ||
| config = SiglipConfig.from_pretrained(pretrain_name) | ||
@@ -93,3 +94,4 @@ # pretrain_name = "facebook/dinov2-with-registers-base" | ||
| # ) | ||
| return torch.compile(img_model) | ||
| # return torch.compile(img_model) | ||
| return img_model, config | ||
@@ -99,2 +101,3 @@ | ||
| img_model, | ||
| config, | ||
| save_file_path: Path, | ||
@@ -116,14 +119,4 @@ iterable_list: list[str], | ||
| # 2) Pre‑allocate a .npy memmap for all embeddings | ||
| total = len(ds) | ||
| dim = img_model.config.hidden_size # e.g. 1024 | ||
| mmap = open_memmap( | ||
| filename=str(save_file_path), | ||
| mode="w+", | ||
| dtype="float32", | ||
| shape=(total, dim), | ||
| ) | ||
| # 3) Inference + save loop | ||
| idx = 0 | ||
| # 2) Inference + collect embeddings | ||
| all_embs = [] | ||
| with torch.inference_mode(): | ||
@@ -138,10 +131,6 @@ for batch in tqdm(loader): | ||
| emb = normed.cpu().numpy().astype("float32") # (B, dim) | ||
| bs = emb.shape[0] | ||
| mmap[idx : idx + bs] = emb # write into .npy | ||
| idx += bs | ||
| all_embs.append(emb) | ||
| mmap.flush() # ensure all data is on disk | ||
| embeddings = np.memmap( | ||
| save_file_path, dtype=np.float32, mode="r", shape=(total, dim) | ||
| ) | ||
| embeddings = np.concatenate(all_embs, axis=0) | ||
| np.save(save_file_path, embeddings) | ||
| return embeddings | ||
@@ -181,3 +170,3 @@ | ||
| self.col_embedding = f"{self.MATCH_BY}_embed" | ||
| self.img_model = get_img_model() | ||
| self.img_model, self.config = get_img_model() | ||
@@ -217,2 +206,3 @@ def load(self, data: pl.DataFrame): | ||
| img_model=self.img_model, | ||
| config=self.config, | ||
| save_file_path=array_name, | ||
@@ -219,0 +209,0 @@ iterable_list=dataset_chunk[self.col_input].to_list(), |
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
3598077
-0.01%1273
-0.7%