natasha - PyPI Package Compare versions

+54

natasha/tests/test_span.py


		import pytest

		from natasha.span import (
		Span,
		envelop_spans
		)


		tests = [
		[
		[(0, 1)],
		[],
		[]
		],
		[
		[],
		[(0, 1)],
		[[]]
		],
		[
		[(0, 1), (1, 2)],
		[(1, 2)],
		[[(1, 2)]]
		],
		[
		[(0, 1), (1, 2)],
		[(0, 1)],
		[[(0, 1)]]
		],
		[
		[(0, 1), (1, 2)],
		[(0, 1), (1, 2)],
		[[(0, 1)], [(1, 2)]]
		],
		]


		def adapt_spans(spans):
		for start, stop in spans:
		yield Span(start, stop, type=None)


		@pytest.mark.parametrize('test', tests)
		def test_envelope_spans(test):
		spans, envelopes, target = test
		spans = list(adapt_spans(spans))
		envelopes = list(adapt_spans(envelopes))
		target = [
		list(adapt_spans(group))
		for group in target
		]
		pred = list(envelop_spans(spans, envelopes))
		assert pred == target

+7

-335

natasha.egg-info/PKG-INFO

		Metadata-Version: 2.1
		Name: natasha
		Version: 1.2.0
		Version: 1.3.0
		Summary: Named-entity recognition for russian language
		@@ -332,336 +332,8 @@ Home-page: https://github.com/natasha/natasha

		### Segmentation
		* Segmentation — <a href="https://github.com/natasha/razdel#quality-performance">Razdel evalualtion section</a>
		* Embedding — <a href="https://github.com/natasha/navec#evaluation">Navec evalualtion section</a>
		* Morphology — <a href="https://github.com/natasha/slovnet#morphology-1">Slovnet Morph evaluation section</a>
		* Syntax — <a href="https://github.com/natasha/slovnet#syntax-1">Slovnet Syntax evaluation section</a>
		* NER — <a href="https://github.com/natasha/slovnet#ner-1">Slovnet NER evaluation section</a>

		Natasha uses <a href="https://github.com/natasha/razdel">Razdel</a> for text segmentation.

		`errors` — number of errors aggregated over 4 datasets, see <a href="https://github.com/natasha/razdel#quality-performance">Razdel evalualtion section</a> for more info.

		<!--- token --->
		<table border="0" class="dataframe">
		<thead>
		<tr style="text-align: right;">
		<th></th>
		<th>errors</th>
		<th>time</th>
		</tr>
		</thead>
		<tbody>
		<tr>
		<th>razdel.tokenize</th>
		<td>5439</td>
		<td>9.898350</td>
		</tr>
		<tr>
		<th>mystem</th>
		<td>12192</td>
		<td>17.210470</td>
		</tr>
		<tr>
		<th>spacy</th>
		<td>12288</td>
		<td>19.920618</td>
		</tr>
		<tr>
		<th>nltk.word_tokenize</th>
		<td>130119</td>
		<td>12.405366</td>
		</tr>
		</tbody>
		</table>
		<!--- token --->

		<!--- sent --->
		<table border="0" class="dataframe">
		<thead>
		<tr style="text-align: right;">
		<th></th>
		<th>errors</th>
		<th>time</th>
		</tr>
		</thead>
		<tbody>
		<tr>
		<th>razdel.sentenize</th>
		<td>32106</td>
		<td>21.989045</td>
		</tr>
		<tr>
		<th>deeppavlov/rusenttokenize</th>
		<td>41722</td>
		<td>32.535322</td>
		</tr>
		<tr>
		<th>nltk.sent_tokenize</th>
		<td>60378</td>
		<td>29.916063</td>
		</tr>
		</tbody>
		</table>
		<!--- sent --->

		### Embedding

		Natasha uses <a href="https://github.com/natasha/navec">Navec pretrained embeddings</a>.

		`precision` — Average precision over 4 datasets, see <a href="https://github.com/natasha/navec#evaluation">Navec evalualtion section</a> for more info.

		<!--- emb1 --->
		<table border="0" class="dataframe">
		<thead>
		<tr style="text-align: right;">
		<th></th>
		<th>type</th>
		<th>precision</th>
		<th>init, s</th>
		<th>disk, mb</th>
		<th>ram, mb</th>
		<th>vocab</th>
		</tr>
		</thead>
		<tbody>
		<tr>
		<th>hudlit_12B_500K_300d_100q</th>
		<td>navec</td>
		<td>0.825</td>
		<td>1.0</td>
		<td>50.6</td>
		<td>95.3</td>
		<td>500K</td>
		</tr>
		<tr>
		<th>news_1B_250K_300d_100q</th>
		<td>navec</td>
		<td>0.775</td>
		<td>0.5</td>
		<td>25.4</td>
		<td>47.7</td>
		<td>250K</td>
		</tr>
		<tr>
		<th>ruscorpora_upos_cbow_300_20_2019</th>
		<td>w2v</td>
		<td>0.777</td>
		<td>12.1</td>
		<td>220.6</td>
		<td>236.1</td>
		<td>189K</td>
		</tr>
		<tr>
		<th>ruwikiruscorpora_upos_skipgram_300_2_2019</th>
		<td>w2v</td>
		<td>0.776</td>
		<td>15.7</td>
		<td>290.0</td>
		<td>309.4</td>
		<td>248K</td>
		</tr>
		<tr>
		<th>tayga_upos_skipgram_300_2_2019</th>
		<td>w2v</td>
		<td>0.795</td>
		<td>15.7</td>
		<td>290.7</td>
		<td>310.9</td>
		<td>249K</td>
		</tr>
		<tr>
		<th>tayga_none_fasttextcbow_300_10_2019</th>
		<td>fasttext</td>
		<td>0.706</td>
		<td>11.3</td>
		<td>2741.9</td>
		<td>2746.9</td>
		<td>192K</td>
		</tr>
		<tr>
		<th>araneum_none_fasttextcbow_300_5_2018</th>
		<td>fasttext</td>
		<td>0.720</td>
		<td>7.8</td>
		<td>2752.1</td>
		<td>2754.7</td>
		<td>195K</td>
		</tr>
		</tbody>
		</table>
		<!--- emb1 --->

		### Morphology

		Natasha uses <a href="https://github.com/natasha/slovnet#morphology">Slovnet morphology tagger</a>.

		`accuracy` — accuracy on news dataset, see <a href="https://github.com/natasha/slovnet#morphology-1">Slovnet evaluation section</a> for more.

		<!--- morph1 --->
		<table border="0" class="dataframe">
		<thead>
		<tr style="text-align: right;">
		<th></th>
		<th>accuracy</th>
		<th>init, s</th>
		<th>disk, mb</th>
		<th>ram, mb</th>
		<th>speed, sents/s</th>
		</tr>
		</thead>
		<tbody>
		<tr>
		<th>slovnet</th>
		<td>0.961</td>
		<td>1.0</td>
		<td>27</td>
		<td>115</td>
		<td>532.0</td>
		</tr>
		<tr>
		<th>deeppavlov_bert</th>
		<td>0.951</td>
		<td>20.0</td>
		<td>1393</td>
		<td>8704</td>
		<td>85.0 (gpu)</td>
		</tr>
		<tr>
		<th>deeppavlov</th>
		<td>0.940</td>
		<td>4.0</td>
		<td>32</td>
		<td>10240</td>
		<td>90.0 (gpu)</td>
		</tr>
		<tr>
		<th>spacy</th>
		<td>0.919</td>
		<td>10.9</td>
		<td>89</td>
		<td>579</td>
		<td>30.6</td>
		</tr>
		<tr>
		<th>udpipe</th>
		<td>0.918</td>
		<td>6.9</td>
		<td>45</td>
		<td>242</td>
		<td>56.2</td>
		</tr>
		</tbody>
		</table>
		<!--- morph1 --->

		### Syntax

		Natasha uses <a href="https://github.com/natasha/slovnet#syntax">Slovnet syntax parser</a>.

		`uas`, `las` — accuracy on news dataset, see <a href="https://github.com/natasha/slovnet#syntax-1">Slovnet evaluation section</a> for more.

		<!--- syntax1 --->
		<table border="0" class="dataframe">
		<thead>
		<tr style="text-align: right;">
		<th></th>
		<th>uas</th>
		<th>las</th>
		<th>init, s</th>
		<th>disk, mb</th>
		<th>ram, mb</th>
		<th>speed, sents/s</th>
		</tr>
		</thead>
		<tbody>
		<tr>
		<th>slovnet</th>
		<td>0.907</td>
		<td>0.880</td>
		<td>1.0</td>
		<td>27</td>
		<td>125</td>
		<td>450.0</td>
		</tr>
		<tr>
		<th>deeppavlov_bert</th>
		<td>0.962</td>
		<td>0.910</td>
		<td>34.0</td>
		<td>1427</td>
		<td>8704</td>
		<td>75.0 (gpu)</td>
		</tr>
		<tr>
		<th>spacy</th>
		<td>0.876</td>
		<td>0.818</td>
		<td>10.9</td>
		<td>89</td>
		<td>579</td>
		<td>31.6</td>
		</tr>
		<tr>
		<th>udpipe</th>
		<td>0.873</td>
		<td>0.823</td>
		<td>6.9</td>
		<td>45</td>
		<td>242</td>
		<td>56.2</td>
		</tr>
		</tbody>
		</table>
		<!--- syntax1 --->

		### NER

		Natasha uses <a href="https://github.com/natasha/slovnet#ner">Slovnet NER tagger</a>.

		`f1` — score aggregated over 4 datasets, see <a href="https://github.com/natasha/slovnet#ner-1">Slovnet evaluation section</a> for more.

		<!--- ner1 --->
		<table border="0" class="dataframe">
		<thead>
		<tr style="text-align: right;">
		<th></th>
		<th>PER/LOC/ORG f1</th>
		<th>init, s</th>
		<th>disk, mb</th>
		<th>ram, mb</th>
		<th>speed, articles/s</th>
		</tr>
		</thead>
		<tbody>
		<tr>
		<th>slovnet</th>
		<td>0.97/0.91/0.85</td>
		<td>1.0</td>
		<td>27</td>
		<td>205</td>
		<td>25.3</td>
		</tr>
		<tr>
		<th>deeppavlov_bert</th>
		<td>0.98/0.92/0.86</td>
		<td>34.5</td>
		<td>2048</td>
		<td>6144</td>
		<td>13.1 (gpu)</td>
		</tr>
		<tr>
		<th>deeppavlov</th>
		<td>0.92/0.86/0.76</td>
		<td>5.9</td>
		<td>1024</td>
		<td>3072</td>
		<td>24.3 (gpu)</td>
		</tr>
		<tr>
		<th>pullenti</th>
		<td>0.92/0.82/0.64</td>
		<td>2.9</td>
		<td>16</td>
		<td>253</td>
		<td>6.0</td>
		</tr>
		</tbody>
		</table>
		<!--- ner1 --->

		## Support
		@@ -671,3 +343,3 @@
		- Issues — https://github.com/natasha/natasha/issues
		- Commercial support — http://lab.alexkuk.ru/natasha
		- Commercial support — https://lab.alexkuk.ru

		@@ -674,0 +346,0 @@ ## Development

+2

-1

natasha.egg-info/SOURCES.txt

		@@ -45,2 +45,3 @@ README.md
		natasha/tests/test_money.py
		natasha/tests/test_name.py
		natasha/tests/test_name.py
		natasha/tests/test_span.py

+1

-1

natasha/__init__.py

		@@ -20,2 +20,2 @@

		__version__ = '1.2.0'
		__version__ = '1.3.0'

+14

-32

natasha/span.py

		@@ -23,34 +23,16 @@

		def append_sentinel(items, sentinel=None):
		for item in items:
		yield item
		yield sentinel


		def envelop_spans(spans, envelopes):
		if not spans or not envelopes:
		return

		spans = append_sentinel(spans)
		span = next(spans)

		envelopes = append_sentinel(envelopes)
		envelope = next(envelopes)

		buffer = []
		while span and envelope:
		if span.start < envelope.start:
		span = next(spans)

		elif span.stop <= envelope.stop:
		buffer.append(span)
		span = next(spans)

		else:
		if buffer:
		yield buffer
		buffer = []
		envelope = next(envelopes)

		if buffer:
		yield buffer
		index = 0
		for envelope in envelopes:
		chunk = []
		while index < len(spans):
		span = spans[index]
		index += 1
		if span.start < envelope.start:
		continue
		elif span.stop <= envelope.stop:
		chunk.append(span)
		else:
		index -= 1
		break
		yield chunk

+7

-335

PKG-INFO

		Metadata-Version: 2.1
		Name: natasha
		Version: 1.2.0
		Version: 1.3.0
		Summary: Named-entity recognition for russian language
		@@ -332,336 +332,8 @@ Home-page: https://github.com/natasha/natasha

		### Segmentation
		* Segmentation — <a href="https://github.com/natasha/razdel#quality-performance">Razdel evalualtion section</a>
		* Embedding — <a href="https://github.com/natasha/navec#evaluation">Navec evalualtion section</a>
		* Morphology — <a href="https://github.com/natasha/slovnet#morphology-1">Slovnet Morph evaluation section</a>
		* Syntax — <a href="https://github.com/natasha/slovnet#syntax-1">Slovnet Syntax evaluation section</a>
		* NER — <a href="https://github.com/natasha/slovnet#ner-1">Slovnet NER evaluation section</a>

		Natasha uses <a href="https://github.com/natasha/razdel">Razdel</a> for text segmentation.

		`errors` — number of errors aggregated over 4 datasets, see <a href="https://github.com/natasha/razdel#quality-performance">Razdel evalualtion section</a> for more info.

		<!--- token --->
		<table border="0" class="dataframe">
		<thead>
		<tr style="text-align: right;">
		<th></th>
		<th>errors</th>
		<th>time</th>
		</tr>
		</thead>
		<tbody>
		<tr>
		<th>razdel.tokenize</th>
		<td>5439</td>
		<td>9.898350</td>
		</tr>
		<tr>
		<th>mystem</th>
		<td>12192</td>
		<td>17.210470</td>
		</tr>
		<tr>
		<th>spacy</th>
		<td>12288</td>
		<td>19.920618</td>
		</tr>
		<tr>
		<th>nltk.word_tokenize</th>
		<td>130119</td>
		<td>12.405366</td>
		</tr>
		</tbody>
		</table>
		<!--- token --->

		<!--- sent --->
		<table border="0" class="dataframe">
		<thead>
		<tr style="text-align: right;">
		<th></th>
		<th>errors</th>
		<th>time</th>
		</tr>
		</thead>
		<tbody>
		<tr>
		<th>razdel.sentenize</th>
		<td>32106</td>
		<td>21.989045</td>
		</tr>
		<tr>
		<th>deeppavlov/rusenttokenize</th>
		<td>41722</td>
		<td>32.535322</td>
		</tr>
		<tr>
		<th>nltk.sent_tokenize</th>
		<td>60378</td>
		<td>29.916063</td>
		</tr>
		</tbody>
		</table>
		<!--- sent --->

		### Embedding

		Natasha uses <a href="https://github.com/natasha/navec">Navec pretrained embeddings</a>.

		`precision` — Average precision over 4 datasets, see <a href="https://github.com/natasha/navec#evaluation">Navec evalualtion section</a> for more info.

		<!--- emb1 --->
		<table border="0" class="dataframe">
		<thead>
		<tr style="text-align: right;">
		<th></th>
		<th>type</th>
		<th>precision</th>
		<th>init, s</th>
		<th>disk, mb</th>
		<th>ram, mb</th>
		<th>vocab</th>
		</tr>
		</thead>
		<tbody>
		<tr>
		<th>hudlit_12B_500K_300d_100q</th>
		<td>navec</td>
		<td>0.825</td>
		<td>1.0</td>
		<td>50.6</td>
		<td>95.3</td>
		<td>500K</td>
		</tr>
		<tr>
		<th>news_1B_250K_300d_100q</th>
		<td>navec</td>
		<td>0.775</td>
		<td>0.5</td>
		<td>25.4</td>
		<td>47.7</td>
		<td>250K</td>
		</tr>
		<tr>
		<th>ruscorpora_upos_cbow_300_20_2019</th>
		<td>w2v</td>
		<td>0.777</td>
		<td>12.1</td>
		<td>220.6</td>
		<td>236.1</td>
		<td>189K</td>
		</tr>
		<tr>
		<th>ruwikiruscorpora_upos_skipgram_300_2_2019</th>
		<td>w2v</td>
		<td>0.776</td>
		<td>15.7</td>
		<td>290.0</td>
		<td>309.4</td>
		<td>248K</td>
		</tr>
		<tr>
		<th>tayga_upos_skipgram_300_2_2019</th>
		<td>w2v</td>
		<td>0.795</td>
		<td>15.7</td>
		<td>290.7</td>
		<td>310.9</td>
		<td>249K</td>
		</tr>
		<tr>
		<th>tayga_none_fasttextcbow_300_10_2019</th>
		<td>fasttext</td>
		<td>0.706</td>
		<td>11.3</td>
		<td>2741.9</td>
		<td>2746.9</td>
		<td>192K</td>
		</tr>
		<tr>
		<th>araneum_none_fasttextcbow_300_5_2018</th>
		<td>fasttext</td>
		<td>0.720</td>
		<td>7.8</td>
		<td>2752.1</td>
		<td>2754.7</td>
		<td>195K</td>
		</tr>
		</tbody>
		</table>
		<!--- emb1 --->

		### Morphology

		Natasha uses <a href="https://github.com/natasha/slovnet#morphology">Slovnet morphology tagger</a>.

		`accuracy` — accuracy on news dataset, see <a href="https://github.com/natasha/slovnet#morphology-1">Slovnet evaluation section</a> for more.

		<!--- morph1 --->
		<table border="0" class="dataframe">
		<thead>
		<tr style="text-align: right;">
		<th></th>
		<th>accuracy</th>
		<th>init, s</th>
		<th>disk, mb</th>
		<th>ram, mb</th>
		<th>speed, sents/s</th>
		</tr>
		</thead>
		<tbody>
		<tr>
		<th>slovnet</th>
		<td>0.961</td>
		<td>1.0</td>
		<td>27</td>
		<td>115</td>
		<td>532.0</td>
		</tr>
		<tr>
		<th>deeppavlov_bert</th>
		<td>0.951</td>
		<td>20.0</td>
		<td>1393</td>
		<td>8704</td>
		<td>85.0 (gpu)</td>
		</tr>
		<tr>
		<th>deeppavlov</th>
		<td>0.940</td>
		<td>4.0</td>
		<td>32</td>
		<td>10240</td>
		<td>90.0 (gpu)</td>
		</tr>
		<tr>
		<th>spacy</th>
		<td>0.919</td>
		<td>10.9</td>
		<td>89</td>
		<td>579</td>
		<td>30.6</td>
		</tr>
		<tr>
		<th>udpipe</th>
		<td>0.918</td>
		<td>6.9</td>
		<td>45</td>
		<td>242</td>
		<td>56.2</td>
		</tr>
		</tbody>
		</table>
		<!--- morph1 --->

		### Syntax

		Natasha uses <a href="https://github.com/natasha/slovnet#syntax">Slovnet syntax parser</a>.

		`uas`, `las` — accuracy on news dataset, see <a href="https://github.com/natasha/slovnet#syntax-1">Slovnet evaluation section</a> for more.

		<!--- syntax1 --->
		<table border="0" class="dataframe">
		<thead>
		<tr style="text-align: right;">
		<th></th>
		<th>uas</th>
		<th>las</th>
		<th>init, s</th>
		<th>disk, mb</th>
		<th>ram, mb</th>
		<th>speed, sents/s</th>
		</tr>
		</thead>
		<tbody>
		<tr>
		<th>slovnet</th>
		<td>0.907</td>
		<td>0.880</td>
		<td>1.0</td>
		<td>27</td>
		<td>125</td>
		<td>450.0</td>
		</tr>
		<tr>
		<th>deeppavlov_bert</th>
		<td>0.962</td>
		<td>0.910</td>
		<td>34.0</td>
		<td>1427</td>
		<td>8704</td>
		<td>75.0 (gpu)</td>
		</tr>
		<tr>
		<th>spacy</th>
		<td>0.876</td>
		<td>0.818</td>
		<td>10.9</td>
		<td>89</td>
		<td>579</td>
		<td>31.6</td>
		</tr>
		<tr>
		<th>udpipe</th>
		<td>0.873</td>
		<td>0.823</td>
		<td>6.9</td>
		<td>45</td>
		<td>242</td>
		<td>56.2</td>
		</tr>
		</tbody>
		</table>
		<!--- syntax1 --->

		### NER

		Natasha uses <a href="https://github.com/natasha/slovnet#ner">Slovnet NER tagger</a>.

		`f1` — score aggregated over 4 datasets, see <a href="https://github.com/natasha/slovnet#ner-1">Slovnet evaluation section</a> for more.

		<!--- ner1 --->
		<table border="0" class="dataframe">
		<thead>
		<tr style="text-align: right;">
		<th></th>
		<th>PER/LOC/ORG f1</th>
		<th>init, s</th>
		<th>disk, mb</th>
		<th>ram, mb</th>
		<th>speed, articles/s</th>
		</tr>
		</thead>
		<tbody>
		<tr>
		<th>slovnet</th>
		<td>0.97/0.91/0.85</td>
		<td>1.0</td>
		<td>27</td>
		<td>205</td>
		<td>25.3</td>
		</tr>
		<tr>
		<th>deeppavlov_bert</th>
		<td>0.98/0.92/0.86</td>
		<td>34.5</td>
		<td>2048</td>
		<td>6144</td>
		<td>13.1 (gpu)</td>
		</tr>
		<tr>
		<th>deeppavlov</th>
		<td>0.92/0.86/0.76</td>
		<td>5.9</td>
		<td>1024</td>
		<td>3072</td>
		<td>24.3 (gpu)</td>
		</tr>
		<tr>
		<th>pullenti</th>
		<td>0.92/0.82/0.64</td>
		<td>2.9</td>
		<td>16</td>
		<td>253</td>
		<td>6.0</td>
		</tr>
		</tbody>
		</table>
		<!--- ner1 --->

		## Support
		@@ -671,3 +343,3 @@
		- Issues — https://github.com/natasha/natasha/issues
		- Commercial support — http://lab.alexkuk.ru/natasha
		- Commercial support — https://lab.alexkuk.ru

		@@ -674,0 +346,0 @@ ## Development

+6

-334

README.md

		@@ -324,336 +324,8 @@

		### Segmentation
		* Segmentation — <a href="https://github.com/natasha/razdel#quality-performance">Razdel evalualtion section</a>
		* Embedding — <a href="https://github.com/natasha/navec#evaluation">Navec evalualtion section</a>
		* Morphology — <a href="https://github.com/natasha/slovnet#morphology-1">Slovnet Morph evaluation section</a>
		* Syntax — <a href="https://github.com/natasha/slovnet#syntax-1">Slovnet Syntax evaluation section</a>
		* NER — <a href="https://github.com/natasha/slovnet#ner-1">Slovnet NER evaluation section</a>

		Natasha uses <a href="https://github.com/natasha/razdel">Razdel</a> for text segmentation.

		`errors` — number of errors aggregated over 4 datasets, see <a href="https://github.com/natasha/razdel#quality-performance">Razdel evalualtion section</a> for more info.

		<!--- token --->
		<table border="0" class="dataframe">
		<thead>
		<tr style="text-align: right;">
		<th></th>
		<th>errors</th>
		<th>time</th>
		</tr>
		</thead>
		<tbody>
		<tr>
		<th>razdel.tokenize</th>
		<td>5439</td>
		<td>9.898350</td>
		</tr>
		<tr>
		<th>mystem</th>
		<td>12192</td>
		<td>17.210470</td>
		</tr>
		<tr>
		<th>spacy</th>
		<td>12288</td>
		<td>19.920618</td>
		</tr>
		<tr>
		<th>nltk.word_tokenize</th>
		<td>130119</td>
		<td>12.405366</td>
		</tr>
		</tbody>
		</table>
		<!--- token --->

		<!--- sent --->
		<table border="0" class="dataframe">
		<thead>
		<tr style="text-align: right;">
		<th></th>
		<th>errors</th>
		<th>time</th>
		</tr>
		</thead>
		<tbody>
		<tr>
		<th>razdel.sentenize</th>
		<td>32106</td>
		<td>21.989045</td>
		</tr>
		<tr>
		<th>deeppavlov/rusenttokenize</th>
		<td>41722</td>
		<td>32.535322</td>
		</tr>
		<tr>
		<th>nltk.sent_tokenize</th>
		<td>60378</td>
		<td>29.916063</td>
		</tr>
		</tbody>
		</table>
		<!--- sent --->

		### Embedding

		Natasha uses <a href="https://github.com/natasha/navec">Navec pretrained embeddings</a>.

		`precision` — Average precision over 4 datasets, see <a href="https://github.com/natasha/navec#evaluation">Navec evalualtion section</a> for more info.

		<!--- emb1 --->
		<table border="0" class="dataframe">
		<thead>
		<tr style="text-align: right;">
		<th></th>
		<th>type</th>
		<th>precision</th>
		<th>init, s</th>
		<th>disk, mb</th>
		<th>ram, mb</th>
		<th>vocab</th>
		</tr>
		</thead>
		<tbody>
		<tr>
		<th>hudlit_12B_500K_300d_100q</th>
		<td>navec</td>
		<td>0.825</td>
		<td>1.0</td>
		<td>50.6</td>
		<td>95.3</td>
		<td>500K</td>
		</tr>
		<tr>
		<th>news_1B_250K_300d_100q</th>
		<td>navec</td>
		<td>0.775</td>
		<td>0.5</td>
		<td>25.4</td>
		<td>47.7</td>
		<td>250K</td>
		</tr>
		<tr>
		<th>ruscorpora_upos_cbow_300_20_2019</th>
		<td>w2v</td>
		<td>0.777</td>
		<td>12.1</td>
		<td>220.6</td>
		<td>236.1</td>
		<td>189K</td>
		</tr>
		<tr>
		<th>ruwikiruscorpora_upos_skipgram_300_2_2019</th>
		<td>w2v</td>
		<td>0.776</td>
		<td>15.7</td>
		<td>290.0</td>
		<td>309.4</td>
		<td>248K</td>
		</tr>
		<tr>
		<th>tayga_upos_skipgram_300_2_2019</th>
		<td>w2v</td>
		<td>0.795</td>
		<td>15.7</td>
		<td>290.7</td>
		<td>310.9</td>
		<td>249K</td>
		</tr>
		<tr>
		<th>tayga_none_fasttextcbow_300_10_2019</th>
		<td>fasttext</td>
		<td>0.706</td>
		<td>11.3</td>
		<td>2741.9</td>
		<td>2746.9</td>
		<td>192K</td>
		</tr>
		<tr>
		<th>araneum_none_fasttextcbow_300_5_2018</th>
		<td>fasttext</td>
		<td>0.720</td>
		<td>7.8</td>
		<td>2752.1</td>
		<td>2754.7</td>
		<td>195K</td>
		</tr>
		</tbody>
		</table>
		<!--- emb1 --->

		### Morphology

		Natasha uses <a href="https://github.com/natasha/slovnet#morphology">Slovnet morphology tagger</a>.

		`accuracy` — accuracy on news dataset, see <a href="https://github.com/natasha/slovnet#morphology-1">Slovnet evaluation section</a> for more.

		<!--- morph1 --->
		<table border="0" class="dataframe">
		<thead>
		<tr style="text-align: right;">
		<th></th>
		<th>accuracy</th>
		<th>init, s</th>
		<th>disk, mb</th>
		<th>ram, mb</th>
		<th>speed, sents/s</th>
		</tr>
		</thead>
		<tbody>
		<tr>
		<th>slovnet</th>
		<td>0.961</td>
		<td>1.0</td>
		<td>27</td>
		<td>115</td>
		<td>532.0</td>
		</tr>
		<tr>
		<th>deeppavlov_bert</th>
		<td>0.951</td>
		<td>20.0</td>
		<td>1393</td>
		<td>8704</td>
		<td>85.0 (gpu)</td>
		</tr>
		<tr>
		<th>deeppavlov</th>
		<td>0.940</td>
		<td>4.0</td>
		<td>32</td>
		<td>10240</td>
		<td>90.0 (gpu)</td>
		</tr>
		<tr>
		<th>spacy</th>
		<td>0.919</td>
		<td>10.9</td>
		<td>89</td>
		<td>579</td>
		<td>30.6</td>
		</tr>
		<tr>
		<th>udpipe</th>
		<td>0.918</td>
		<td>6.9</td>
		<td>45</td>
		<td>242</td>
		<td>56.2</td>
		</tr>
		</tbody>
		</table>
		<!--- morph1 --->

		### Syntax

		Natasha uses <a href="https://github.com/natasha/slovnet#syntax">Slovnet syntax parser</a>.

		`uas`, `las` — accuracy on news dataset, see <a href="https://github.com/natasha/slovnet#syntax-1">Slovnet evaluation section</a> for more.

		<!--- syntax1 --->
		<table border="0" class="dataframe">
		<thead>
		<tr style="text-align: right;">
		<th></th>
		<th>uas</th>
		<th>las</th>
		<th>init, s</th>
		<th>disk, mb</th>
		<th>ram, mb</th>
		<th>speed, sents/s</th>
		</tr>
		</thead>
		<tbody>
		<tr>
		<th>slovnet</th>
		<td>0.907</td>
		<td>0.880</td>
		<td>1.0</td>
		<td>27</td>
		<td>125</td>
		<td>450.0</td>
		</tr>
		<tr>
		<th>deeppavlov_bert</th>
		<td>0.962</td>
		<td>0.910</td>
		<td>34.0</td>
		<td>1427</td>
		<td>8704</td>
		<td>75.0 (gpu)</td>
		</tr>
		<tr>
		<th>spacy</th>
		<td>0.876</td>
		<td>0.818</td>
		<td>10.9</td>
		<td>89</td>
		<td>579</td>
		<td>31.6</td>
		</tr>
		<tr>
		<th>udpipe</th>
		<td>0.873</td>
		<td>0.823</td>
		<td>6.9</td>
		<td>45</td>
		<td>242</td>
		<td>56.2</td>
		</tr>
		</tbody>
		</table>
		<!--- syntax1 --->

		### NER

		Natasha uses <a href="https://github.com/natasha/slovnet#ner">Slovnet NER tagger</a>.

		`f1` — score aggregated over 4 datasets, see <a href="https://github.com/natasha/slovnet#ner-1">Slovnet evaluation section</a> for more.

		<!--- ner1 --->
		<table border="0" class="dataframe">
		<thead>
		<tr style="text-align: right;">
		<th></th>
		<th>PER/LOC/ORG f1</th>
		<th>init, s</th>
		<th>disk, mb</th>
		<th>ram, mb</th>
		<th>speed, articles/s</th>
		</tr>
		</thead>
		<tbody>
		<tr>
		<th>slovnet</th>
		<td>0.97/0.91/0.85</td>
		<td>1.0</td>
		<td>27</td>
		<td>205</td>
		<td>25.3</td>
		</tr>
		<tr>
		<th>deeppavlov_bert</th>
		<td>0.98/0.92/0.86</td>
		<td>34.5</td>
		<td>2048</td>
		<td>6144</td>
		<td>13.1 (gpu)</td>
		</tr>
		<tr>
		<th>deeppavlov</th>
		<td>0.92/0.86/0.76</td>
		<td>5.9</td>
		<td>1024</td>
		<td>3072</td>
		<td>24.3 (gpu)</td>
		</tr>
		<tr>
		<th>pullenti</th>
		<td>0.92/0.82/0.64</td>
		<td>2.9</td>
		<td>16</td>
		<td>253</td>
		<td>6.0</td>
		</tr>
		</tbody>
		</table>
		<!--- ner1 --->

		## Support
		@@ -663,3 +335,3 @@
		- Issues — https://github.com/natasha/natasha/issues
		- Commercial support — http://lab.alexkuk.ru/natasha
		- Commercial support — https://lab.alexkuk.ru

		@@ -666,0 +338,0 @@ ## Development

+1

-1

setup.cfg

		[bumpversion]
		current_version = 1.2.0
		current_version = 1.3.0
		files = setup.py natasha/__init__.py
		@@ -4,0 +4,0 @@ commit = True

+1

-1

setup.py

		@@ -15,3 +15,3 @@
		name='natasha',
		version='1.2.0',
		version='1.3.0',

		@@ -18,0 +18,0 @@ description='Named-entity recognition for russian language',

natasha - pypi Package Compare versions

Improved metrics

Worsened metrics