selfies
Advanced tools
+21
-10
| Metadata-Version: 2.1 | ||
| Name: selfies | ||
| Version: 2.1.0 | ||
| Version: 2.1.1 | ||
| Summary: SELFIES (SELF-referencIng Embedded Strings) is a general-purpose, sequence-based, robust representation of semantically constrained graphs. | ||
@@ -24,2 +24,3 @@ Home-page: https://github.com/aspuru-guzik-group/selfies | ||
| [Talk on youtube about SELFIES](https://www.youtube.com/watch?v=CaIyUmfGXDk).\ | ||
| [A community paper with 31 authors on SELFIES and the future of molecular string representations](https://arxiv.org/abs/2204.00056).\ | ||
| [Blog explaining SELFIES in Japanese language](https://blacktanktop.hatenablog.com/entry/2021/08/12/115613)\ | ||
@@ -109,3 +110,16 @@ Major contributors of v1.0.n: _[Alston Lo](https://github.com/alstonlo) and [Seyone Chithrananda](https://github.com/seyonechithrananda)_\ | ||
| #### Very simple creation of random valid molecules: | ||
| A key property of SELFIES is the possibility to create valid random molecules in a very simple way -- inspired by a tweet by [Rajarshi Guha](https://twitter.com/rguha/status/1543601839983284224): | ||
| ```python | ||
| import selfies as sf | ||
| import random | ||
| alphabet=sf.get_semantic_robust_alphabet() # Gets the alphabet of robust symbols | ||
| rnd_selfies=''.join(random.sample(list(alphabet), 9)) | ||
| rnd_smiles=sf.decoder(rnd_selfies) | ||
| print(rnd_smiles) | ||
| ``` | ||
| These simple lines gives crazy molecules, but all are valid. Can be used as a start for more advanced filtering techniques or for machine learning models. | ||
| #### Integer and one-hot encoding SELFIES: | ||
@@ -173,7 +187,4 @@ | ||
| print('Attribution:') | ||
| for smiles_token, a in attr: | ||
| for smiles_token in attr: | ||
| print(smiles_token) | ||
| if a: | ||
| for j, selfies_token in a: | ||
| print(f'\t{j}:{selfies_token}') | ||
@@ -185,7 +196,7 @@ # output | ||
| AttributionMap(index=0, token='C', attribution=[Attribution(index=0, token='[C]')]) | ||
| AttributionMap(index=4, token='N', attribution=[Attribution(index=1, token='[N]')]) | ||
| AttributionMap(index=6, token='C', attribution=[Attribution(index=2, token='[C]')]) | ||
| AttributionMap(index=9, token='P', attribution=[Attribution(index=3, token='[Branch1]'), Attribution(index=5, token='[P]')]) | ||
| AttributionMap(index=12, token='C', attribution=[Attribution(index=6, token='[C]')]) | ||
| AttributionMap(index=14, token='C', attribution=[Attribution(index=7, token='[C]')]) | ||
| AttributionMap(index=2, token='N', attribution=[Attribution(index=1, token='[N]')]) | ||
| AttributionMap(index=3, token='C', attribution=[Attribution(index=2, token='[C]')]) | ||
| AttributionMap(index=5, token='P', attribution=[Attribution(index=3, token='[Branch1]'), Attribution(index=5, token='[P]')]) | ||
| AttributionMap(index=7, token='C', attribution=[Attribution(index=6, token='[C]')]) | ||
| AttributionMap(index=8, token='C', attribution=[Attribution(index=7, token='[C]')]) | ||
| ``` | ||
@@ -192,0 +203,0 @@ |
+20
-9
@@ -16,2 +16,3 @@ # SELFIES | ||
| [Talk on youtube about SELFIES](https://www.youtube.com/watch?v=CaIyUmfGXDk).\ | ||
| [A community paper with 31 authors on SELFIES and the future of molecular string representations](https://arxiv.org/abs/2204.00056).\ | ||
| [Blog explaining SELFIES in Japanese language](https://blacktanktop.hatenablog.com/entry/2021/08/12/115613)\ | ||
@@ -101,3 +102,16 @@ Major contributors of v1.0.n: _[Alston Lo](https://github.com/alstonlo) and [Seyone Chithrananda](https://github.com/seyonechithrananda)_\ | ||
| #### Very simple creation of random valid molecules: | ||
| A key property of SELFIES is the possibility to create valid random molecules in a very simple way -- inspired by a tweet by [Rajarshi Guha](https://twitter.com/rguha/status/1543601839983284224): | ||
| ```python | ||
| import selfies as sf | ||
| import random | ||
| alphabet=sf.get_semantic_robust_alphabet() # Gets the alphabet of robust symbols | ||
| rnd_selfies=''.join(random.sample(list(alphabet), 9)) | ||
| rnd_smiles=sf.decoder(rnd_selfies) | ||
| print(rnd_smiles) | ||
| ``` | ||
| These simple lines gives crazy molecules, but all are valid. Can be used as a start for more advanced filtering techniques or for machine learning models. | ||
| #### Integer and one-hot encoding SELFIES: | ||
@@ -165,7 +179,4 @@ | ||
| print('Attribution:') | ||
| for smiles_token, a in attr: | ||
| for smiles_token in attr: | ||
| print(smiles_token) | ||
| if a: | ||
| for j, selfies_token in a: | ||
| print(f'\t{j}:{selfies_token}') | ||
@@ -177,7 +188,7 @@ # output | ||
| AttributionMap(index=0, token='C', attribution=[Attribution(index=0, token='[C]')]) | ||
| AttributionMap(index=4, token='N', attribution=[Attribution(index=1, token='[N]')]) | ||
| AttributionMap(index=6, token='C', attribution=[Attribution(index=2, token='[C]')]) | ||
| AttributionMap(index=9, token='P', attribution=[Attribution(index=3, token='[Branch1]'), Attribution(index=5, token='[P]')]) | ||
| AttributionMap(index=12, token='C', attribution=[Attribution(index=6, token='[C]')]) | ||
| AttributionMap(index=14, token='C', attribution=[Attribution(index=7, token='[C]')]) | ||
| AttributionMap(index=2, token='N', attribution=[Attribution(index=1, token='[N]')]) | ||
| AttributionMap(index=3, token='C', attribution=[Attribution(index=2, token='[C]')]) | ||
| AttributionMap(index=5, token='P', attribution=[Attribution(index=3, token='[Branch1]'), Attribution(index=5, token='[P]')]) | ||
| AttributionMap(index=7, token='C', attribution=[Attribution(index=6, token='[C]')]) | ||
| AttributionMap(index=8, token='C', attribution=[Attribution(index=7, token='[C]')]) | ||
| ``` | ||
@@ -184,0 +195,0 @@ |
| Metadata-Version: 2.1 | ||
| Name: selfies | ||
| Version: 2.1.0 | ||
| Version: 2.1.1 | ||
| Summary: SELFIES (SELF-referencIng Embedded Strings) is a general-purpose, sequence-based, robust representation of semantically constrained graphs. | ||
@@ -24,2 +24,3 @@ Home-page: https://github.com/aspuru-guzik-group/selfies | ||
| [Talk on youtube about SELFIES](https://www.youtube.com/watch?v=CaIyUmfGXDk).\ | ||
| [A community paper with 31 authors on SELFIES and the future of molecular string representations](https://arxiv.org/abs/2204.00056).\ | ||
| [Blog explaining SELFIES in Japanese language](https://blacktanktop.hatenablog.com/entry/2021/08/12/115613)\ | ||
@@ -109,3 +110,16 @@ Major contributors of v1.0.n: _[Alston Lo](https://github.com/alstonlo) and [Seyone Chithrananda](https://github.com/seyonechithrananda)_\ | ||
| #### Very simple creation of random valid molecules: | ||
| A key property of SELFIES is the possibility to create valid random molecules in a very simple way -- inspired by a tweet by [Rajarshi Guha](https://twitter.com/rguha/status/1543601839983284224): | ||
| ```python | ||
| import selfies as sf | ||
| import random | ||
| alphabet=sf.get_semantic_robust_alphabet() # Gets the alphabet of robust symbols | ||
| rnd_selfies=''.join(random.sample(list(alphabet), 9)) | ||
| rnd_smiles=sf.decoder(rnd_selfies) | ||
| print(rnd_smiles) | ||
| ``` | ||
| These simple lines gives crazy molecules, but all are valid. Can be used as a start for more advanced filtering techniques or for machine learning models. | ||
| #### Integer and one-hot encoding SELFIES: | ||
@@ -173,7 +187,4 @@ | ||
| print('Attribution:') | ||
| for smiles_token, a in attr: | ||
| for smiles_token in attr: | ||
| print(smiles_token) | ||
| if a: | ||
| for j, selfies_token in a: | ||
| print(f'\t{j}:{selfies_token}') | ||
@@ -185,7 +196,7 @@ # output | ||
| AttributionMap(index=0, token='C', attribution=[Attribution(index=0, token='[C]')]) | ||
| AttributionMap(index=4, token='N', attribution=[Attribution(index=1, token='[N]')]) | ||
| AttributionMap(index=6, token='C', attribution=[Attribution(index=2, token='[C]')]) | ||
| AttributionMap(index=9, token='P', attribution=[Attribution(index=3, token='[Branch1]'), Attribution(index=5, token='[P]')]) | ||
| AttributionMap(index=12, token='C', attribution=[Attribution(index=6, token='[C]')]) | ||
| AttributionMap(index=14, token='C', attribution=[Attribution(index=7, token='[C]')]) | ||
| AttributionMap(index=2, token='N', attribution=[Attribution(index=1, token='[N]')]) | ||
| AttributionMap(index=3, token='C', attribution=[Attribution(index=2, token='[C]')]) | ||
| AttributionMap(index=5, token='P', attribution=[Attribution(index=3, token='[Branch1]'), Attribution(index=5, token='[P]')]) | ||
| AttributionMap(index=7, token='C', attribution=[Attribution(index=6, token='[C]')]) | ||
| AttributionMap(index=8, token='C', attribution=[Attribution(index=7, token='[C]')]) | ||
| ``` | ||
@@ -192,0 +203,0 @@ |
@@ -28,3 +28,3 @@ #!/usr/bin/env python | ||
| __version__ = "2.0.0" | ||
| __version__ = "2.1.0" | ||
@@ -31,0 +31,0 @@ __all__ = [ |
+13
-4
@@ -187,2 +187,6 @@ from selfies.exceptions import EncoderError, SMILESParserError | ||
| else: | ||
| # start, end are so we can go back and | ||
| # correct offset from branch symbol in | ||
| # branch tokens | ||
| start = len(attribution_maps) | ||
| branch = _fragment_to_selfies( | ||
@@ -195,7 +199,5 @@ mol, bond, bond.dst, attribution_maps, len(derived)) | ||
| ) | ||
| end = len(attribution_maps) | ||
| derived.append(branch_symbol) | ||
| attribution_maps.append(AttributionMap( | ||
| len(derived) - 1 + attribution_index, | ||
| branch_symbol, mol.get_attribution(bond))) | ||
| for symbol in Q_as_symbols: | ||
@@ -206,2 +208,10 @@ derived.append(symbol) | ||
| symbol, mol.get_attribution(bond))) | ||
| # account for branch symbol because it is inserted after | ||
| for j in range(start, end): | ||
| attribution_maps[j].index += len(Q_as_symbols) + 1 | ||
| attribution_maps.append(AttributionMap( | ||
| len(derived) - 1 + attribution_index, | ||
| branch_symbol, mol.get_attribution(bond))) | ||
| derived.extend(branch) | ||
@@ -212,3 +222,2 @@ | ||
| break | ||
| return derived | ||
@@ -215,0 +224,0 @@ |
@@ -427,3 +427,3 @@ import enum | ||
| derived, mol, root, ring_log, attribution_maps, attribution_index) | ||
| attribution_index += len(derived) | ||
| attribution_index += _strlen(derived) | ||
| fragments.append("".join(derived)) | ||
@@ -436,2 +436,6 @@ # trim attribution map of empty tokens | ||
| def _strlen(slist: List[str]) -> int: | ||
| return len(''.join(slist)) | ||
| def _derive_smiles_from_fragment( | ||
@@ -447,3 +451,3 @@ derived, | ||
| attribution_maps.append(AttributionMap( | ||
| len(derived) - 1 + attribution_index, | ||
| _strlen(derived) - 1 + attribution_index, | ||
| token, mol.get_attribution(curr_atom))) | ||
@@ -457,3 +461,3 @@ | ||
| attribution_maps.append(AttributionMap( | ||
| len(derived) - 1 + attribution_index, | ||
| _strlen(derived) - 1 + attribution_index, | ||
| token, mol.get_attribution(bond))) | ||
@@ -473,3 +477,3 @@ ends = (min(bond.src, bond.dst), max(bond.src, bond.dst)) | ||
| attribution_maps.append(AttributionMap( | ||
| len(derived) - 1 + attribution_index, | ||
| _strlen(derived) - 1 + attribution_index, | ||
| token, mol.get_attribution(bond))) | ||
@@ -476,0 +480,0 @@ _derive_smiles_from_fragment( |
+1
-1
@@ -10,3 +10,3 @@ #!/usr/bin/env python | ||
| name="selfies", | ||
| version="2.1.0", | ||
| version="2.1.1", | ||
| author="Mario Krenn, Alston Lo, and many other contributors", | ||
@@ -13,0 +13,0 @@ author_email="mario.krenn@utoronto.ca, alan@aspuru.com", |
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
124295
2.29%1821
0.55%