New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

@orama/orama

Package Overview
Dependencies
Maintainers
1
Versions
92
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@orama/orama - npm Package Compare versions

Comparing version 1.0.0-beta.9 to 1.0.0-beta.10

2

dist/components/index.d.ts

@@ -36,3 +36,3 @@ import { Node as AVLNode } from '../trees/avl.js';

export declare function search(context: SearchContext, index: Index, prop: string, term: string): Promise<TokenScore[]>;
export declare function searchByWhereClause(index: Index, filters: Record<string, boolean | ComparisonOperator>): Promise<string[]>;
export declare function searchByWhereClause(context: SearchContext, index: Index, filters: Record<string, boolean | ComparisonOperator>): Promise<string[]>;
export declare function getSearchableProperties(index: Index): Promise<string[]>;

@@ -39,0 +39,0 @@ export declare function getSearchablePropertiesWithTypes(index: Index): Promise<Record<string, 'string' | 'number' | 'boolean'>>;

@@ -113,3 +113,3 @@ import { createError } from "../errors.js";

}
const tokens = await tokenizer.tokenize(value, language);
const tokens = await tokenizer.tokenize(value, language, prop);
await implementation.insertDocumentScoreParameters(index, prop, id, tokens, docsCount);

@@ -131,3 +131,3 @@ for (const token of tokens){

}
const tokens = await tokenizer.tokenize(value, language);
const tokens = await tokenizer.tokenize(value, language, prop);
await implementation.removeDocumentScoreParameters(index, prop, id, docsCount);

@@ -160,3 +160,3 @@ for (const token of tokens){

}
export async function searchByWhereClause(index, filters) {
export async function searchByWhereClause(context, index, filters) {
const filterKeys = Object.keys(filters);

@@ -175,2 +175,16 @@ const filtersMap = filterKeys.reduce((acc, key)=>({

}
if (typeof operation === 'string' || Array.isArray(operation)) {
const idx = index.indexes[param];
for (const raw of [
operation
].flat()){
const term = await context.tokenizer.tokenize(raw, context.language, param);
const filteredIDsResults = radixFind(idx, {
term: term[0],
exact: true
});
filtersMap[param].push(...Object.values(filteredIDsResults).flat());
}
continue;
}
const operationKeys = Object.keys(operation);

@@ -177,0 +191,0 @@ if (operationKeys.length > 1) {

@@ -5,8 +5,9 @@ import { Stemmer, Tokenizer, TokenizerConfig } from '../../types.js';

stemmer?: Stemmer;
stemmerSkipProperties: Set<string>;
stopWords?: string[];
allowDuplicates: boolean;
normalizationCache: Map<string, string>;
normalizeToken(this: DefaultTokenizer, token: string): string;
normalizeToken(this: DefaultTokenizer, token: string, prop: string | undefined): string;
}
export declare function createTokenizer(config?: TokenizerConfig): Promise<DefaultTokenizer>;
export {};
import { createError } from "../../errors.js";
import { replaceDiacritics } from "./diacritics.js";
import { SPLITTERS, STEMMERS, SUPPORTED_LANGUAGES } from "./languages.js";
import { SPLITTERS, SUPPORTED_LANGUAGES } from "./languages.js";
import { stemmers } from "./stemmers.js";
import { stopWords as defaultStopWords } from "./stop-words/index.js";
function normalizeToken(token) {
function normalizeToken(prop, token) {
var _this_stopWords;
const key = `${this.language}:${token}`;
const key = `${this.language}:${prop}:${token}`;
if (this.normalizationCache.has(key)) {

@@ -17,3 +18,3 @@ return this.normalizationCache.get(key);

// Apply stemming if enabled
if (this.stemmer) {
if (this.stemmer && !this.stemmerSkipProperties.has(prop)) {
token = this.stemmer(token);

@@ -34,3 +35,3 @@ }

}
function tokenize(input, language) {
function tokenize(input, language, prop) {
if (language && language !== this.language) {

@@ -45,3 +46,3 @@ throw createError('LANGUAGE_NOT_SUPPORTED', language);

const splitRule = SPLITTERS[this.language];
const tokens = input.toLowerCase().split(splitRule).map(this.normalizeToken).filter(Boolean);
const tokens = input.toLowerCase().split(splitRule).map(this.normalizeToken.bind(this, prop ?? '')).filter(Boolean);
const trimTokens = trim(tokens);

@@ -59,23 +60,12 @@ if (!this.allowDuplicates) {

}
// Handle stemming
// Handle stemming - It is disabled by default
let stemmer;
if (config.stemming !== false) {
if (config.stemmer && typeof config.stemmer !== 'function') {
throw createError('INVALID_STEMMER_FUNCTION_TYPE');
}
if (config.stemming || config.stemmer && !('stemming' in config)) {
if (config.stemmer) {
if (typeof config.stemmer !== 'function') {
throw createError('INVALID_STEMMER_FUNCTION_TYPE');
}
stemmer = config.stemmer;
} else {
// Check if we are in a TypeScript or Javascript scenario and determine the stemmers path
// Note that the initial .. is purposely left inside the import in order to be compatible
// with vite.
try {
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore This fails when verifying CJS but it's actually correct
const stemmersPath = import.meta.url.endsWith('ts') ? '../../stemmers/lib' : '../stemmers';
const stemmerImport = await import(`../${stemmersPath}/${STEMMERS[config.language]}.js`);
stemmer = stemmerImport.stemmer;
} catch (e) {
throw createError('BUNDLED_ORAMA', config.language);
}
stemmer = stemmers[config.language];
}

@@ -109,2 +99,5 @@ }

stemmer,
stemmerSkipProperties: new Set(config.stemmerSkipProperties ? [
config.stemmerSkipProperties
].flat() : []),
stopWords,

@@ -116,3 +109,3 @@ allowDuplicates: Boolean(config.allowDuplicates),

tokenizer.tokenize = tokenize.bind(tokenizer);
tokenizer.normalizeToken = normalizeToken.bind(tokenizer);
tokenizer.normalizeToken = normalizeToken;
return tokenizer;

@@ -119,0 +112,0 @@ }

@@ -1,27 +0,27 @@

import { stemmer as arabic } from '../../stemmers/ar.js';
import { stemmer as armenian } from '../../stemmers/am.js';
import { stemmer as bulgarian } from '../../stemmers/bg.js';
import { stemmer as danish } from '../../stemmers/dk.js';
import { stemmer as dutch } from '../../stemmers/nl.js';
import { stemmer as english } from '../../stemmers/en.js';
import { stemmer as finnish } from '../../stemmers/fi.js';
import { stemmer as french } from '../../stemmers/fr.js';
import { stemmer as german } from '../../stemmers/de.js';
import { stemmer as greek } from '../../stemmers/gr.js';
import { stemmer as hungarian } from '../../stemmers/hu.js';
import { stemmer as indian } from '../../stemmers/in.js';
import { stemmer as indonesian } from '../../stemmers/id.js';
import { stemmer as irish } from '../../stemmers/ie.js';
import { stemmer as italian } from '../../stemmers/it.js';
import { stemmer as lithuanian } from '../../stemmers/lt.js';
import { stemmer as nepali } from '../../stemmers/np.js';
import { stemmer as norwegian } from '../../stemmers/no.js';
import { stemmer as portuguese } from '../../stemmers/pt.js';
import { stemmer as romanian } from '../../stemmers/ro.js';
import { stemmer as russian } from '../../stemmers/ru.js';
import { stemmer as serbian } from '../../stemmers/rs.js';
import { stemmer as spanish } from '../../stemmers/es.js';
import { stemmer as swedish } from '../../stemmers/se.js';
import { stemmer as turkish } from '../../stemmers/tr.js';
import { stemmer as ukrainian } from '../../stemmers/uk.js';
import { stemmer as arabic } from '@stemmers/ar.js';
import { stemmer as armenian } from '@stemmers/am.js';
import { stemmer as bulgarian } from '@stemmers/bg.js';
import { stemmer as danish } from '@stemmers/dk.js';
import { stemmer as dutch } from '@stemmers/nl.js';
import { stemmer as english } from '@stemmers/en.js';
import { stemmer as finnish } from '@stemmers/fi.js';
import { stemmer as french } from '@stemmers/fr.js';
import { stemmer as german } from '@stemmers/de.js';
import { stemmer as greek } from '@stemmers/gr.js';
import { stemmer as hungarian } from '@stemmers/hu.js';
import { stemmer as indian } from '@stemmers/in.js';
import { stemmer as indonesian } from '@stemmers/id.js';
import { stemmer as irish } from '@stemmers/ie.js';
import { stemmer as italian } from '@stemmers/it.js';
import { stemmer as lithuanian } from '@stemmers/lt.js';
import { stemmer as nepali } from '@stemmers/np.js';
import { stemmer as norwegian } from '@stemmers/no.js';
import { stemmer as portuguese } from '@stemmers/pt.js';
import { stemmer as romanian } from '@stemmers/ro.js';
import { stemmer as russian } from '@stemmers/ru.js';
import { stemmer as serbian } from '@stemmers/rs.js';
import { stemmer as spanish } from '@stemmers/es.js';
import { stemmer as swedish } from '@stemmers/se.js';
import { stemmer as turkish } from '@stemmers/tr.js';
import { stemmer as ukrainian } from '@stemmers/uk.js';
export declare const stemmers: {

@@ -28,0 +28,0 @@ arabic: typeof arabic;

declare const errors: {
NO_LANGUAGE_WITH_CUSTOM_TOKENIZER: string;
BUNDLED_ORAMA: string;
LANGUAGE_NOT_SUPPORTED: string;

@@ -5,0 +4,0 @@ INVALID_STEMMER_FUNCTION_TYPE: string;

@@ -6,3 +6,2 @@ import { SUPPORTED_LANGUAGES } from "./components/tokenizer/languages.js";

NO_LANGUAGE_WITH_CUSTOM_TOKENIZER: 'Do not pass the language option to create when using a custom tokenizer.',
BUNDLED_ORAMA: 'Cannot find the stemmer for the locale "%s". This can happen if you are using Orama within a bundler like webpack. To solve this issue please look at https://docs.oramasearch.com/text-analysis/stemming#using-stemming-with-bundlers.',
LANGUAGE_NOT_SUPPORTED: `Language "%s" is not supported.\nSupported languages are:\n - ${allLanguages}`,

@@ -9,0 +8,0 @@ INVALID_STEMMER_FUNCTION_TYPE: `config.stemmer property must be a function.`,

@@ -11,3 +11,3 @@ import { prioritizeTokenScores } from "../components/algorithms.js";

};
async function createSearchContext(index, properties, tokens, params, docsCount) {
async function createSearchContext(tokenizer, index, documentsStore, language, params, properties, tokens, docsCount) {
// If filters are enabled, we need to get the IDs of the documents that match the filters.

@@ -52,4 +52,7 @@ // const hasFilters = Object.keys(params.where ?? {}).length > 0;

return {
timeStart: await getNanosecondsTime(),
tokenizer,
index,
timeStart: await getNanosecondsTime(),
documentsStore,
language,
params,

@@ -85,3 +88,3 @@ docsCount,

// Create the search context and the results
const context = await createSearchContext(orama.index, propertiesToSearch, tokens, params, await orama.documentsStore.count(docs));
const context = await createSearchContext(orama.tokenizer, orama.index, orama.documentsStore, language, params, propertiesToSearch, tokens, await orama.documentsStore.count(docs));
const results = Array.from({

@@ -94,3 +97,3 @@ length: limit

if (hasFilters) {
whereFiltersIDs = await orama.index.searchByWhereClause(index, params.where);
whereFiltersIDs = await orama.index.searchByWhereClause(context, index, params.where);
}

@@ -97,0 +100,0 @@ // Now it's time to loop over all the indices and get the documents IDs for every single term

@@ -143,3 +143,3 @@ import { Language } from './components/tokenizer/languages.js';

*/
where?: Record<string, boolean | ComparisonOperator>;
where?: Record<string, boolean | string | string[] | ComparisonOperator>;
};

@@ -169,2 +169,6 @@ export type Result = {

timeStart: bigint;
tokenizer: Tokenizer;
index: IIndex;
documentsStore: IDocumentsStore;
language: string | undefined;
params: SearchParams;

@@ -175,3 +179,2 @@ docsCount: number;

docsIntersection: TokenMap;
index: IIndex;
};

@@ -219,3 +222,3 @@ export type ElapsedTime = {

search(context: SearchContext, index: I, prop: string, term: string): SyncOrAsyncValue<TokenScore[]>;
searchByWhereClause(index: I, filters: Record<string, boolean | ComparisonOperator>): SyncOrAsyncValue<string[]>;
searchByWhereClause(context: SearchContext, index: I, filters: Record<string, boolean | string | string[] | ComparisonOperator>): SyncOrAsyncValue<string[]>;
getSearchableProperties(index: I): SyncOrAsyncValue<string[]>;

@@ -243,2 +246,3 @@ getSearchablePropertiesWithTypes(index: I): SyncOrAsyncValue<Record<string, SearchableType>>;

stemmer?: Stemmer;
stemmerSkipProperties?: string | string[];
stopWords?: boolean | string[] | ((stopWords: string[]) => string[] | Promise<string[]>);

@@ -250,3 +254,3 @@ allowDuplicates?: boolean;

normalizationCache: Map<string, string>;
tokenize: (raw: string, language?: string) => SyncOrAsyncValue<string[]>;
tokenize: (raw: string, language?: string, prop?: string) => SyncOrAsyncValue<string[]>;
}

@@ -253,0 +257,0 @@ export interface ObjectComponents {

@@ -13,2 +13,2 @@ Copyright 2023 OramaSearch Inc

See the License for the specific language governing permissions and
limitations under the License.
limitations under the License.
{
"name": "@orama/orama",
"version": "1.0.0-beta.9",
"version": "1.0.0-beta.10",
"type": "module",

@@ -5,0 +5,0 @@ "description": "Next generation full-text search engine, written in TypeScript",

@@ -209,3 +209,3 @@ ![Orama. Search, everywhere.](https://github.com/oramasearch/orama/blob/main/misc/oramasearch.gif?raw=true)

You can specify a different language by using the `defaultLanguage` property
You can specify a different language by using the `language` property
during Orama initialization.

@@ -212,0 +212,0 @@

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc