Latest Threat Research:SANDWORM_MODE: Shai-Hulud-Style npm Worm Hijacks CI Workflows and Poisons AI Toolchains.Details
Socket
Book a DemoInstallSign in
Socket

item-matching

Package Overview
Dependencies
Maintainers
1
Versions
106
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

item-matching - npm Package Compare versions

Comparing version
0.0.99
to
0.0.100
+1
-1
PKG-INFO
Metadata-Version: 2.4
Name: item_matching
Version: 0.0.99
Version: 0.0.100
Summary: A name matching package

@@ -5,0 +5,0 @@ Project-URL: Homepage, https://github.com/kevinkhang2909/item_matching

@@ -7,3 +7,3 @@ [build-system]

name = "item_matching"
version = "0.0.99"
version = "0.0.100"
authors = [

@@ -10,0 +10,0 @@ { name="Kevin Khang", email="kevinkhang2909@gmail.com" },

@@ -5,3 +5,3 @@ from pathlib import Path

from autofaiss import build_index
from datasets import concatenate_datasets, load_from_disk
from datasets import Dataset
import numpy as np

@@ -80,4 +80,4 @@ from rich import print

files = sorted(self.dataset_dict[f"{i}_ds_path"].glob("*"), key=self.sort_key_ds)
lst_ds = [load_from_disk(str(f)) for f in files]
dataset[i] = concatenate_datasets(lst_ds)
df = pl.concat([pl.read_parquet(f) for f in files])
dataset[i] = Dataset.from_polars(df)

@@ -84,0 +84,0 @@ # Add index

@@ -6,3 +6,2 @@ from PIL import Image

from rich import print
from datasets import Dataset, concatenate_datasets
from numpy.lib.format import open_memmap

@@ -176,3 +175,3 @@ import torch

# Check if exists:
dataset_name = self.path_ds / f"{i}"
dataset_name = self.path_ds / f"{i}.parquet"
array_name = self.path_array / f"{i}.npy"

@@ -187,3 +186,3 @@ if dataset_name.exists():

dataset_chunk = Dataset.from_polars(data[start_idx:end_idx])
dataset_chunk = data[start_idx:end_idx]
print(

@@ -198,3 +197,3 @@ f"[DataEmbedding] Shard [{i}/{num_chunks - 1}]: start {start_idx:,.0f} end {end_idx:,.0f}"

save_file_path=array_name,
iterable_list=dataset_chunk[self.col_input]
iterable_list=dataset_chunk[self.col_input].to_list()
)

@@ -205,10 +204,10 @@ else:

save_file_path=array_name,
iterable_list=dataset_chunk[self.col_input]
iterable_list=dataset_chunk[self.col_input].to_list()
)
# Concat
dset_embed = Dataset.from_dict({self.col_embedding: embeddings})
dataset_chunk = concatenate_datasets([dataset_chunk, dset_embed], axis=1)
dset_embed = pl.DataFrame({self.col_embedding: embeddings})
dataset_chunk = pl.concat([dataset_chunk, dset_embed], how="horizontal")
# Save chunk
dataset_chunk.save_to_disk(str(dataset_name))
dataset_chunk.write_parquet(str(dataset_name))