@@ -5,2 +5,8 @@ =======

		0.5.1 (2019-05-27)
		------------------

		Fixed whitespace handling when ``guess_punct_space`` is False: html-text was
		producing unnecessary spaces after newlines.

		0.5.0 (2018-11-19)
		@@ -7,0 +13,0 @@ ------------------

+7

-1

html_text.egg-info/PKG-INFO

		Metadata-Version: 1.1
		Name: html-text
		Version: 0.5.0
		Version: 0.5.1
		Summary: Extract text from HTML
		@@ -146,2 +146,8 @@ Home-page: https://github.com/TeamHG-Memex/html-text

		0.5.1 (2019-05-27)
		------------------

		Fixed whitespace handling when ``guess_punct_space`` is False: html-text was
		producing unnecessary spaces after newlines.

		0.5.0 (2018-11-19)
		@@ -148,0 +154,0 @@ ------------------

+1

-1

html_text/__init__.py

		# -- coding: utf-8 --
		__version__ = '0.5.0'
		__version__ = '0.5.1'

		@@ -4,0 +4,0 @@ from .html_text import (etree_to_text, extract_text, selector_to_text,

+3

-1

html_text/html_text.py

		@@ -94,2 +94,4 @@ # -- coding: utf-8 --
		return False
		if not guess_punct_space:
		return True
		if not _has_trailing_whitespace(prev):
		@@ -101,3 +103,3 @@ if _has_punct_after(text) or _has_open_bracket_before(prev):
		def get_space_between(text, prev):
		if not text or not guess_punct_space:
		if not text:
		return ' '
		@@ -104,0 +106,0 @@ return ' ' if should_add_space(text, prev) else ''

+7

-1

PKG-INFO

		Metadata-Version: 1.1
		Name: html_text
		Version: 0.5.0
		Version: 0.5.1
		Summary: Extract text from HTML
		@@ -146,2 +146,8 @@ Home-page: https://github.com/TeamHG-Memex/html-text

		0.5.1 (2019-05-27)
		------------------

		Fixed whitespace handling when ``guess_punct_space`` is False: html-text was
		producing unnecessary spaces after newlines.

		0.5.0 (2018-11-19)
		@@ -148,0 +154,0 @@ ------------------

+1

-1

setup.cfg

+1

-1

setup.py

		@@ -15,3 +15,3 @@ #!/usr/bin/env python
		name='html_text',
		version='0.5.0',
		version='0.5.1',
		description="Extract text from HTML",
		@@ -18,0 +18,0 @@ long_description=readme + '\n\n' + history,

+10

-2

tests/test_html_text.py

		@@ -141,4 +141,4 @@ # -- coding: utf-8 --

		text = ('title\n\n text_1.\n\n text_2 text_3\n\n text_4\n text_5'
		'\n\n text_6 text_7 text_8\n\n text_9\n\n ...text_10')
		text = ('title\n\ntext_1.\n\ntext_2 text_3\n\ntext_4\ntext_5'
		'\n\ntext_6 text_7 text_8\n\ntext_9\n\n...text_10')
		assert extract_text(html, guess_punct_space=False, guess_layout=True) == text
		@@ -155,2 +155,10 @@

		def test_basic_newline():
		html = u'<div>a</div><div>b</div>'
		assert extract_text(html, guess_punct_space=False, guess_layout=False) == 'a b'
		assert extract_text(html, guess_punct_space=False, guess_layout=True) == 'a\nb'
		assert extract_text(html, guess_punct_space=True, guess_layout=False) == 'a b'
		assert extract_text(html, guess_punct_space=True, guess_layout=True) == 'a\nb'


		def test_adjust_newline():
		@@ -157,0 +165,0 @@ html = u'<div>text 1</div><p><div>text 2</div></p>'

html-text - pypi Package Compare versions

Improved metrics