@@ -239,3 +239,5 @@ /**

		if (statsAsChars.sameLength) { // MSA FASTA single character
		if (statsAsChars.sameLength && !separator &&
		!(['[', ']'].some((c) => c in statsAsChars.freq)) // not fasta ext notation
		) { // MSA FASTA single character
		const stats = this.getStats(categoriesSample, seqMinLength, splitter);
		@@ -371,4 +373,4 @@ const alphabet = this.detectAlphabet(stats.freq, candidateAlphabets, '-', colNameLikely ? 0.20 : 0);

		return (sepRate / expSepRate > 2.2 && mLengthVarN < 0.7) \|\|
		(sepRate / expSepRate > 4) ? sep : null;
		return (sepRate / expSepRate > 2.2 && mLengthVarN < 0.8) \|\|
		(sepRate / expSepRate > 3.5) ? sep : null;
		}
		@@ -375,0 +377,0 @@

package.json

		@@ -8,3 +8,3 @@ {
		},
		"version": "2.12.9",
		"version": "2.12.10",
		"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
		@@ -55,3 +55,4 @@ "repository": {
		"umap-js": "^1.3.3",
		"wu": "latest"
		"wu": "latest",
		"@webgpu/types": "^0.1.40"
		},
		@@ -111,2 +112,3 @@ "devDependencies": {
		"meta": {
		"dartium": false,
		"menu": {
		@@ -142,2 +144,2 @@ "Bio": {
		}
		}
		}

155

src/tests/detectors-tests.ts

		@@ -24,9 +24,16 @@ import * as grok from 'datagrok-api/grok';


		class PosCol {
		constructor(
		public readonly units: string,
		public readonly aligned: string \| null,
		public readonly alphabet: string \| null,
		public readonly alphabetSize: number,
		public readonly alphabetIsMultichar?: boolean,
		public readonly separator?: string,
		) { };
		}

		category('detectors', () => {
		const enum csvTests {
		negEmpty = 'negEmpty',
		neg1 = 'neg1',
		neg2 = 'neg2',
		neg3 = 'neg3',
		negSmiles = 'negSmiles',
		fastaDna1 = 'csvFastaDna1',
		@@ -46,6 +53,13 @@ fastaRna1 = 'fastaRna1',
		fastaMsaPt1 = 'fastaMsaPt1',
		fastaMsaSameLength = 'fastaMsaSameLength',
		fastaExtSameLength = 'fastaExtSameLength',
		fastaMsaExtSameLength = 'fastaMsaExtSameLength',
		sepSameLength = 'sepSameLength',
		sepMsaSameLength = 'sepMsaSameLength',
		helmSameLength = 'helmSameLength',
		}

		const csvData = new class {
		[csvTests.negEmpty]: string = `id,col1
		const csvData2: { [testName: string]: { csv: string, neg?: string[], pos?: { [colName: string]: PosCol } } } = {
		'negEmpty': {
		csv: `id,col1
		1,
		@@ -55,13 +69,22 @@ 2,
		4,
		5,`;
		[csvTests.neg1]: string = `col1
		5,`,
		neg: ['col1']
		},
		'negNum1': {
		csv: `col1
		1
		2
		3`;
		[csvTests.neg2]: string = `col1
		3`,
		neg: ['col1'],
		},
		'negNum2': {
		csv: `col1
		4
		5
		6
		7`;
		[csvTests.neg3]: string = `col1
		7`,
		neg: ['col1'],
		},
		'negNum3': {
		csv: `col1
		8
		@@ -71,7 +94,85 @@ 9
		11
		12`;
		[csvTests.negSmiles]: string = `col1
		12`,
		neg: ['col1'],
		},

		'negSmiles': {
		csv: `col1
		CCCCN1C(=O)CN=C(c2cc(F)ccc12)C3CCCCC3
		C1CCCCC1
		CCCCCC`;
		CCCCCC`,
		neg: ['col1'],
		},

		// Same length
		'fastaMsaSameLength': {
		csv: `seq
		FWPHEYFWPHEYYV
		YNRQWYVYNRQWYV
		MKPSEYVMKPSEYV`,
		pos: {'seq': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.PT, 20, false, undefined)}
		},
		'fastaExtSameLength': {
		csv: `seq
		FW[Ac]PHEYFWPH
		YN[Re]VYNRQWYV
		[Me]EYVMPS[Et]`,
		pos: {'seq': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.UN, 16, true, undefined)},
		},
		'fastaMsaExtSameLength': {
		csv: `seq
		FW[Ac]PHEY[Re]WPH
		YN[Re]VYNR[Ac]WYV
		[Me]EYVMPSFW[Me]H`,
		pos: {'seq': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 14, true, undefined)},
		},
		'sepSameLength': {
		csv: `seq
		Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2
		Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2
		Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2`, pos: {
		'seq': new PosCol(NOTATION.SEPARATOR, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 5, true, '-'),
		}
		},
		'sepMsaSameLength': {
		csv: `seq
		Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2
		Ac(1)-A-A(2)-A-A-A-C(2)-A-A-A-A-C(1)-G-NH2
		Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2`, pos: {
		'seq': new PosCol(NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.UN, 5, true, '-'),
		}
		},
		'helmSameLength': {
		csv: `seq
		PEPTIDE1{Ac(1).A.A.A.A.A.A.A.A.A.A.A.A.A.C(1).G.NH2}$$$$
		PEPTIDE1{Ab(1).Y.V.K.H.P.F.W.R.W.Y.A.A.A.C(1).G.NH2}$$$$
		PEPTIDE1{Ad(1).S.W.Y.C.K.H.P.M.W.A.A.A.A.C(1)-G-NH2}$$$$`,
		pos: {
		'seq': new PosCol(NOTATION.HELM, null, null, 19, undefined, undefined)
		}
		},
		};

		const readCsv2: (key: keyof typeof csvData2) => DfReaderFunc = (key: keyof typeof csvData2) => {
		return async () => {
		const csv: string = csvData2[key].csv;
		const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
		await grok.data.detectSemanticTypes(df);
		return df;
		};
		};

		for (const [testName, testData] of Object.entries(csvData2)) {
		test(`csvData2-${testName}`, async () => {
		const reader = readCsv2(testName as csvTests);
		for (const negColName of testData.neg ?? [])
		await _testNeg(reader, negColName);
		for (const [posColName, posCol] of Object.entries(testData.pos ?? {})) {
		await _testPos(reader, posColName, posCol.units, posCol.aligned,
		posCol.alphabet, posCol.alphabetSize, posCol.alphabetIsMultichar, posCol.separator);
		}
		});
		}

		const csvData = new class {
		[csvTests.fastaDna1]: string = `seq
		@@ -215,3 +316,3 @@ ACGTCACGTC

		const readCsv: (key: csvTests) => DfReaderFunc = (key: keyof typeof csvData) => {
		const readCsv: (key: keyof typeof csvData) => DfReaderFunc = (key: keyof typeof csvData) => {
		return async () => {
		@@ -226,8 +327,2 @@ // Always recreate test data frame from CSV for reproducible detector behavior in tests.


		test('NegativeEmpty', async () => { await _testNeg(readCsv(csvTests.negEmpty), 'col1'); });
		test('Negative1', async () => { await _testNeg(readCsv(csvTests.neg1), 'col1'); });
		test('Negative2', async () => { await _testNeg(readCsv(csvTests.neg2), 'col1'); });
		test('Negative3', async () => { await _testNeg(readCsv(csvTests.neg3), 'col1'); });
		test('NegativeSmiles', async () => { await _testNeg(readCsv(csvTests.negSmiles), 'col1'); });
		test('NegativeStartEnd', async () => { await _testNegList(['START', 'END']); });
		@@ -452,4 +547,4 @@ test('NegativeStartEndIntermediate', async () => { await _testNegList(['START', 'END', 'INTERMEDIATE']); });
		export async function _testPos(
		readDf: DfReaderFunc, colName: string, units: string,
		aligned: string \| null, alphabet: string \| null, alphabetSize: number, alphabetIsMultichar: boolean,
		readDf: DfReaderFunc, colName: string, units: string, aligned: string \| null,
		alphabet: string \| null, alphabetSize: number, alphabetIsMultichar?: boolean,
		separator: string \| null = null,
		@@ -480,12 +575,2 @@ ) {

		class PosCol {
		constructor(
		public readonly units: string,
		public readonly aligned: string \| null,
		public readonly alphabet: string \| null,
		public readonly alphabetSize: number,
		public readonly alphabetIsMultichar: boolean,
		public readonly separator?: string,
		) { };
		}

		@@ -492,0 +577,0 @@ export async function _testDf(readDf: DfReaderFunc, posCols: { [colName: string]: PosCol }): Promise<void> {

src/utils/helm-to-molfile.ts

		@@ -93,3 +93,3 @@ /* Do not change these import lines to match external modules in webpack configuration */
		async getMolV3000ViaOCL(beautifiedMols: (RDMol \| null)[], columnName: string) {
		const beautifiedMolV2000 = beautifiedMols.map((mol) => {
		const beautifiedMolV2000 = beautifiedMols.map((mol) => {
		if (mol === null)
		@@ -107,7 +107,7 @@ return '';
		molv3000Arr[i] = molV3000.replace('STERAC1', 'STEABS');
		const progress = i/beautifiedMolV2000.length*100;
		const progress = i / beautifiedMolV2000.length * 100;
		chiralityPb.update(progress, `${progress?.toFixed(2)}% of molecules completed`);
		}
		chiralityPb.close();
		return DG.Column.fromStrings(columnName, molv3000Arr);
		return DG.Column.fromStrings(columnName, molv3000Arr);
		}
		@@ -138,3 +138,3 @@
		return molBlock;
		}));
		}));
		}
		@@ -141,0 +141,0 @@

src/utils/poly-tool/transformation.ts

		@@ -248,10 +248,10 @@ import * as grok from 'datagrok-api/grok';
		molCol.name = df.columns.getUnusedName('molfile(' + molColumn.name + ')');
		molCol.semType = DG.SEMTYPE.MOLECULE;

		if (addHelm) {
		targetHelmCol.setTag('cell.renderer', 'helm');
		targetHelmCol.semType = DG.SEMTYPE.MACROMOLECULE;
		df.columns.add(targetHelmCol);
		}
		df.columns.add(molCol, true);

		await grok.data.detectSemanticTypes(df);
		}

tsconfig.json

		@@ -66,3 +66,3 @@ {
		"emitDecoratorMetadata": true, /* Enables experimental support for emitting type metadata for decorators. */

		"typeRoots": ["./node_modules/@webgpu/types", "./node_modules/@types"],
		/* Advanced Options */
		@@ -69,0 +69,0 @@ "skipLibCheck": false, /* Skip type checking of declaration files. */

dist/package-test.js

Sorry, the diff of this file is too big to display

dist/package-test.js.map

Sorry, the diff of this file is not supported yet

dist/package.js

Sorry, the diff of this file is too big to display

dist/package.js.map

Sorry, the diff of this file is not supported yet

@datagrok/bio - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Worsened metrics

Dependency changes