html-text
Advanced tools
+6
-0
@@ -5,2 +5,8 @@ ======= | ||
| 0.5.1 (2019-05-27) | ||
| ------------------ | ||
| Fixed whitespace handling when ``guess_punct_space`` is False: html-text was | ||
| producing unnecessary spaces after newlines. | ||
| 0.5.0 (2018-11-19) | ||
@@ -7,0 +13,0 @@ ------------------ |
| Metadata-Version: 1.1 | ||
| Name: html-text | ||
| Version: 0.5.0 | ||
| Version: 0.5.1 | ||
| Summary: Extract text from HTML | ||
@@ -146,2 +146,8 @@ Home-page: https://github.com/TeamHG-Memex/html-text | ||
| 0.5.1 (2019-05-27) | ||
| ------------------ | ||
| Fixed whitespace handling when ``guess_punct_space`` is False: html-text was | ||
| producing unnecessary spaces after newlines. | ||
| 0.5.0 (2018-11-19) | ||
@@ -148,0 +154,0 @@ ------------------ |
| # -*- coding: utf-8 -*- | ||
| __version__ = '0.5.0' | ||
| __version__ = '0.5.1' | ||
@@ -4,0 +4,0 @@ from .html_text import (etree_to_text, extract_text, selector_to_text, |
@@ -94,2 +94,4 @@ # -*- coding: utf-8 -*- | ||
| return False | ||
| if not guess_punct_space: | ||
| return True | ||
| if not _has_trailing_whitespace(prev): | ||
@@ -101,3 +103,3 @@ if _has_punct_after(text) or _has_open_bracket_before(prev): | ||
| def get_space_between(text, prev): | ||
| if not text or not guess_punct_space: | ||
| if not text: | ||
| return ' ' | ||
@@ -104,0 +106,0 @@ return ' ' if should_add_space(text, prev) else '' |
+7
-1
| Metadata-Version: 1.1 | ||
| Name: html_text | ||
| Version: 0.5.0 | ||
| Version: 0.5.1 | ||
| Summary: Extract text from HTML | ||
@@ -146,2 +146,8 @@ Home-page: https://github.com/TeamHG-Memex/html-text | ||
| 0.5.1 (2019-05-27) | ||
| ------------------ | ||
| Fixed whitespace handling when ``guess_punct_space`` is False: html-text was | ||
| producing unnecessary spaces after newlines. | ||
| 0.5.0 (2018-11-19) | ||
@@ -148,0 +154,0 @@ ------------------ |
+1
-1
| [bumpversion] | ||
| current_version = 0.5.0 | ||
| current_version = 0.5.1 | ||
| commit = True | ||
@@ -4,0 +4,0 @@ tag = True |
+1
-1
@@ -15,3 +15,3 @@ #!/usr/bin/env python | ||
| name='html_text', | ||
| version='0.5.0', | ||
| version='0.5.1', | ||
| description="Extract text from HTML", | ||
@@ -18,0 +18,0 @@ long_description=readme + '\n\n' + history, |
@@ -141,4 +141,4 @@ # -*- coding: utf-8 -*- | ||
| text = ('title\n\n text_1.\n\n text_2 text_3\n\n text_4\n text_5' | ||
| '\n\n text_6 text_7 text_8\n\n text_9\n\n ...text_10') | ||
| text = ('title\n\ntext_1.\n\ntext_2 text_3\n\ntext_4\ntext_5' | ||
| '\n\ntext_6 text_7 text_8\n\ntext_9\n\n...text_10') | ||
| assert extract_text(html, guess_punct_space=False, guess_layout=True) == text | ||
@@ -155,2 +155,10 @@ | ||
| def test_basic_newline(): | ||
| html = u'<div>a</div><div>b</div>' | ||
| assert extract_text(html, guess_punct_space=False, guess_layout=False) == 'a b' | ||
| assert extract_text(html, guess_punct_space=False, guess_layout=True) == 'a\nb' | ||
| assert extract_text(html, guess_punct_space=True, guess_layout=False) == 'a b' | ||
| assert extract_text(html, guess_punct_space=True, guess_layout=True) == 'a\nb' | ||
| def test_adjust_newline(): | ||
@@ -157,0 +165,0 @@ html = u'<div>text 1</div><p><div>text 2</div></p>' |
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
201571
0.5%375
2.18%