@datagrok/bio
Advanced tools
Comparing version 2.12.9 to 2.12.10
# Bio changelog | ||
## 2.12.10 (2024-04-11) | ||
### Bug fixes | ||
* Bio: Fix detector for non-fasta seqs of the same length | ||
## 2.12.9 (2024-04-10) | ||
@@ -4,0 +10,0 @@ |
@@ -239,3 +239,5 @@ /** | ||
if (statsAsChars.sameLength) { // MSA FASTA single character | ||
if (statsAsChars.sameLength && !separator && | ||
!(['[', ']'].some((c) => c in statsAsChars.freq)) // not fasta ext notation | ||
) { // MSA FASTA single character | ||
const stats = this.getStats(categoriesSample, seqMinLength, splitter); | ||
@@ -371,4 +373,4 @@ const alphabet = this.detectAlphabet(stats.freq, candidateAlphabets, '-', colNameLikely ? 0.20 : 0); | ||
return (sepRate / expSepRate > 2.2 && mLengthVarN < 0.7) || | ||
(sepRate / expSepRate > 4) ? sep : null; | ||
return (sepRate / expSepRate > 2.2 && mLengthVarN < 0.8) || | ||
(sepRate / expSepRate > 3.5) ? sep : null; | ||
} | ||
@@ -375,0 +377,0 @@ |
@@ -8,3 +8,3 @@ { | ||
}, | ||
"version": "2.12.9", | ||
"version": "2.12.10", | ||
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.", | ||
@@ -55,3 +55,4 @@ "repository": { | ||
"umap-js": "^1.3.3", | ||
"wu": "latest" | ||
"wu": "latest", | ||
"@webgpu/types": "^0.1.40" | ||
}, | ||
@@ -111,2 +112,3 @@ "devDependencies": { | ||
"meta": { | ||
"dartium": false, | ||
"menu": { | ||
@@ -142,2 +144,2 @@ "Bio": { | ||
} | ||
} | ||
} |
@@ -24,9 +24,16 @@ import * as grok from 'datagrok-api/grok'; | ||
class PosCol { | ||
constructor( | ||
public readonly units: string, | ||
public readonly aligned: string | null, | ||
public readonly alphabet: string | null, | ||
public readonly alphabetSize: number, | ||
public readonly alphabetIsMultichar?: boolean, | ||
public readonly separator?: string, | ||
) { }; | ||
} | ||
category('detectors', () => { | ||
const enum csvTests { | ||
negEmpty = 'negEmpty', | ||
neg1 = 'neg1', | ||
neg2 = 'neg2', | ||
neg3 = 'neg3', | ||
negSmiles = 'negSmiles', | ||
fastaDna1 = 'csvFastaDna1', | ||
@@ -46,6 +53,13 @@ fastaRna1 = 'fastaRna1', | ||
fastaMsaPt1 = 'fastaMsaPt1', | ||
fastaMsaSameLength = 'fastaMsaSameLength', | ||
fastaExtSameLength = 'fastaExtSameLength', | ||
fastaMsaExtSameLength = 'fastaMsaExtSameLength', | ||
sepSameLength = 'sepSameLength', | ||
sepMsaSameLength = 'sepMsaSameLength', | ||
helmSameLength = 'helmSameLength', | ||
} | ||
const csvData = new class { | ||
[csvTests.negEmpty]: string = `id,col1 | ||
const csvData2: { [testName: string]: { csv: string, neg?: string[], pos?: { [colName: string]: PosCol } } } = { | ||
'negEmpty': { | ||
csv: `id,col1 | ||
1, | ||
@@ -55,13 +69,22 @@ 2, | ||
4, | ||
5,`; | ||
[csvTests.neg1]: string = `col1 | ||
5,`, | ||
neg: ['col1'] | ||
}, | ||
'negNum1': { | ||
csv: `col1 | ||
1 | ||
2 | ||
3`; | ||
[csvTests.neg2]: string = `col1 | ||
3`, | ||
neg: ['col1'], | ||
}, | ||
'negNum2': { | ||
csv: `col1 | ||
4 | ||
5 | ||
6 | ||
7`; | ||
[csvTests.neg3]: string = `col1 | ||
7`, | ||
neg: ['col1'], | ||
}, | ||
'negNum3': { | ||
csv: `col1 | ||
8 | ||
@@ -71,7 +94,85 @@ 9 | ||
11 | ||
12`; | ||
[csvTests.negSmiles]: string = `col1 | ||
12`, | ||
neg: ['col1'], | ||
}, | ||
'negSmiles': { | ||
csv: `col1 | ||
CCCCN1C(=O)CN=C(c2cc(F)ccc12)C3CCCCC3 | ||
C1CCCCC1 | ||
CCCCCC`; | ||
CCCCCC`, | ||
neg: ['col1'], | ||
}, | ||
// Same length | ||
'fastaMsaSameLength': { | ||
csv: `seq | ||
FWPHEYFWPHEYYV | ||
YNRQWYVYNRQWYV | ||
MKPSEYVMKPSEYV`, | ||
pos: {'seq': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.PT, 20, false, undefined)} | ||
}, | ||
'fastaExtSameLength': { | ||
csv: `seq | ||
FW[Ac]PHEYFWPH | ||
YN[Re]VYNRQWYV | ||
[Me]EYVMPS[Et]`, | ||
pos: {'seq': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.UN, 16, true, undefined)}, | ||
}, | ||
'fastaMsaExtSameLength': { | ||
csv: `seq | ||
FW[Ac]PHEY[Re]WPH | ||
YN[Re]VYNR[Ac]WYV | ||
[Me]EYVMPSFW[Me]H`, | ||
pos: {'seq': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 14, true, undefined)}, | ||
}, | ||
'sepSameLength': { | ||
csv: `seq | ||
Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2 | ||
Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2 | ||
Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2`, pos: { | ||
'seq': new PosCol(NOTATION.SEPARATOR, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 5, true, '-'), | ||
} | ||
}, | ||
'sepMsaSameLength': { | ||
csv: `seq | ||
Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2 | ||
Ac(1)-A-A(2)-A-A-A-C(2)-A-A-A-A-C(1)-G-NH2 | ||
Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2`, pos: { | ||
'seq': new PosCol(NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.UN, 5, true, '-'), | ||
} | ||
}, | ||
'helmSameLength': { | ||
csv: `seq | ||
PEPTIDE1{Ac(1).A.A.A.A.A.A.A.A.A.A.A.A.A.C(1).G.NH2}$$$$ | ||
PEPTIDE1{Ab(1).Y.V.K.H.P.F.W.R.W.Y.A.A.A.C(1).G.NH2}$$$$ | ||
PEPTIDE1{Ad(1).S.W.Y.C.K.H.P.M.W.A.A.A.A.C(1)-G-NH2}$$$$`, | ||
pos: { | ||
'seq': new PosCol(NOTATION.HELM, null, null, 19, undefined, undefined) | ||
} | ||
}, | ||
}; | ||
const readCsv2: (key: keyof typeof csvData2) => DfReaderFunc = (key: keyof typeof csvData2) => { | ||
return async () => { | ||
const csv: string = csvData2[key].csv; | ||
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv); | ||
await grok.data.detectSemanticTypes(df); | ||
return df; | ||
}; | ||
}; | ||
for (const [testName, testData] of Object.entries(csvData2)) { | ||
test(`csvData2-${testName}`, async () => { | ||
const reader = readCsv2(testName as csvTests); | ||
for (const negColName of testData.neg ?? []) | ||
await _testNeg(reader, negColName); | ||
for (const [posColName, posCol] of Object.entries(testData.pos ?? {})) { | ||
await _testPos(reader, posColName, posCol.units, posCol.aligned, | ||
posCol.alphabet, posCol.alphabetSize, posCol.alphabetIsMultichar, posCol.separator); | ||
} | ||
}); | ||
} | ||
const csvData = new class { | ||
[csvTests.fastaDna1]: string = `seq | ||
@@ -215,3 +316,3 @@ ACGTCACGTC | ||
const readCsv: (key: csvTests) => DfReaderFunc = (key: keyof typeof csvData) => { | ||
const readCsv: (key: keyof typeof csvData) => DfReaderFunc = (key: keyof typeof csvData) => { | ||
return async () => { | ||
@@ -226,8 +327,2 @@ // Always recreate test data frame from CSV for reproducible detector behavior in tests. | ||
test('NegativeEmpty', async () => { await _testNeg(readCsv(csvTests.negEmpty), 'col1'); }); | ||
test('Negative1', async () => { await _testNeg(readCsv(csvTests.neg1), 'col1'); }); | ||
test('Negative2', async () => { await _testNeg(readCsv(csvTests.neg2), 'col1'); }); | ||
test('Negative3', async () => { await _testNeg(readCsv(csvTests.neg3), 'col1'); }); | ||
test('NegativeSmiles', async () => { await _testNeg(readCsv(csvTests.negSmiles), 'col1'); }); | ||
test('NegativeStartEnd', async () => { await _testNegList(['START', 'END']); }); | ||
@@ -452,4 +547,4 @@ test('NegativeStartEndIntermediate', async () => { await _testNegList(['START', 'END', 'INTERMEDIATE']); }); | ||
export async function _testPos( | ||
readDf: DfReaderFunc, colName: string, units: string, | ||
aligned: string | null, alphabet: string | null, alphabetSize: number, alphabetIsMultichar: boolean, | ||
readDf: DfReaderFunc, colName: string, units: string, aligned: string | null, | ||
alphabet: string | null, alphabetSize: number, alphabetIsMultichar?: boolean, | ||
separator: string | null = null, | ||
@@ -480,12 +575,2 @@ ) { | ||
class PosCol { | ||
constructor( | ||
public readonly units: string, | ||
public readonly aligned: string | null, | ||
public readonly alphabet: string | null, | ||
public readonly alphabetSize: number, | ||
public readonly alphabetIsMultichar: boolean, | ||
public readonly separator?: string, | ||
) { }; | ||
} | ||
@@ -492,0 +577,0 @@ export async function _testDf(readDf: DfReaderFunc, posCols: { [colName: string]: PosCol }): Promise<void> { |
@@ -93,3 +93,3 @@ /* Do not change these import lines to match external modules in webpack configuration */ | ||
async getMolV3000ViaOCL(beautifiedMols: (RDMol | null)[], columnName: string) { | ||
const beautifiedMolV2000 = beautifiedMols.map((mol) => { | ||
const beautifiedMolV2000 = beautifiedMols.map((mol) => { | ||
if (mol === null) | ||
@@ -107,7 +107,7 @@ return ''; | ||
molv3000Arr[i] = molV3000.replace('STERAC1', 'STEABS'); | ||
const progress = i/beautifiedMolV2000.length*100; | ||
const progress = i / beautifiedMolV2000.length * 100; | ||
chiralityPb.update(progress, `${progress?.toFixed(2)}% of molecules completed`); | ||
} | ||
chiralityPb.close(); | ||
return DG.Column.fromStrings(columnName, molv3000Arr); | ||
return DG.Column.fromStrings(columnName, molv3000Arr); | ||
} | ||
@@ -138,3 +138,3 @@ | ||
return molBlock; | ||
})); | ||
})); | ||
} | ||
@@ -141,0 +141,0 @@ |
@@ -248,10 +248,10 @@ import * as grok from 'datagrok-api/grok'; | ||
molCol.name = df.columns.getUnusedName('molfile(' + molColumn.name + ')'); | ||
molCol.semType = DG.SEMTYPE.MOLECULE; | ||
if (addHelm) { | ||
targetHelmCol.setTag('cell.renderer', 'helm'); | ||
targetHelmCol.semType = DG.SEMTYPE.MACROMOLECULE; | ||
df.columns.add(targetHelmCol); | ||
} | ||
df.columns.add(molCol, true); | ||
await grok.data.detectSemanticTypes(df); | ||
} |
@@ -66,3 +66,3 @@ { | ||
"emitDecoratorMetadata": true, /* Enables experimental support for emitting type metadata for decorators. */ | ||
"typeRoots": ["./node_modules/@webgpu/types", "./node_modules/@types"], | ||
/* Advanced Options */ | ||
@@ -69,0 +69,0 @@ "skipLibCheck": false, /* Skip type checking of declaration files. */ |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
35407516
58909
20
+ Added@webgpu/types@^0.1.40