Latest Threat Research:SANDWORM_MODE: Shai-Hulud-Style npm Worm Hijacks CI Workflows and Poisons AI Toolchains.Details
Socket
Book a DemoInstallSign in
Socket

item-matching

Package Overview
Dependencies
Maintainers
1
Versions
106
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

item-matching - npm Package Compare versions

Comparing version
0.0.102
to
0.0.104
+1
-1
PKG-INFO
Metadata-Version: 2.4
Name: item_matching
Version: 0.0.102
Version: 0.0.104
Summary: A name matching package

@@ -5,0 +5,0 @@ Project-URL: Homepage, https://github.com/kevinkhang2909/item_matching

@@ -7,3 +7,3 @@ [build-system]

name = "item_matching"
version = "0.0.102"
version = "0.0.104"
authors = [

@@ -10,0 +10,0 @@ { name="Kevin Khang", email="kevinkhang2909@gmail.com" },

@@ -15,3 +15,3 @@ from PIL import Image

from FlagEmbedding import BGEM3FlagModel
from transformers import Dinov2WithRegistersModel, AutoModel
from transformers import Dinov2WithRegistersModel, SiglipVisionModel, SiglipConfig
from .func import _create_folder

@@ -74,3 +74,3 @@

img_model = (
AutoModel.from_pretrained(
SiglipVisionModel.from_pretrained(
pretrain_name,

@@ -82,2 +82,3 @@ torch_dtype=torch.bfloat16,

)
config = SiglipConfig.from_pretrained(pretrain_name)

@@ -93,3 +94,4 @@ # pretrain_name = "facebook/dinov2-with-registers-base"

# )
return torch.compile(img_model)
# return torch.compile(img_model)
return img_model, config

@@ -99,2 +101,3 @@

img_model,
config,
save_file_path: Path,

@@ -116,14 +119,4 @@ iterable_list: list[str],

# 2) Pre‑allocate a .npy memmap for all embeddings
total = len(ds)
dim = img_model.config.hidden_size # e.g. 1024
mmap = open_memmap(
filename=str(save_file_path),
mode="w+",
dtype="float32",
shape=(total, dim),
)
# 3) Inference + save loop
idx = 0
# 2) Inference + collect embeddings
all_embs = []
with torch.inference_mode():

@@ -138,10 +131,6 @@ for batch in tqdm(loader):

emb = normed.cpu().numpy().astype("float32") # (B, dim)
bs = emb.shape[0]
mmap[idx : idx + bs] = emb # write into .npy
idx += bs
all_embs.append(emb)
mmap.flush() # ensure all data is on disk
embeddings = np.memmap(
save_file_path, dtype=np.float32, mode="r", shape=(total, dim)
)
embeddings = np.concatenate(all_embs, axis=0)
np.save(save_file_path, embeddings)
return embeddings

@@ -181,3 +170,3 @@

self.col_embedding = f"{self.MATCH_BY}_embed"
self.img_model = get_img_model()
self.img_model, self.config = get_img_model()

@@ -217,2 +206,3 @@ def load(self, data: pl.DataFrame):

img_model=self.img_model,
config=self.config,
save_file_path=array_name,

@@ -219,0 +209,0 @@ iterable_list=dataset_chunk[self.col_input].to_list(),