@@ -5,2 +5,10 @@ =======

		0.5.2 (2020-07-22)
		------------------

		* Handle lxml Cleaner exceptions (a workaround for
		https://bugs.launchpad.net/lxml/+bug/1838497 );
		* Python 3.8 support;
		* testing improvements.

		0.5.1 (2019-05-27)
		@@ -7,0 +15,0 @@ ------------------

+10

-1

html_text.egg-info/PKG-INFO

		Metadata-Version: 1.1
		Name: html-text
		Version: 0.5.1
		Version: 0.5.2
		Summary: Extract text from HTML
		@@ -146,2 +146,10 @@ Home-page: https://github.com/TeamHG-Memex/html-text

		0.5.2 (2020-07-22)
		------------------

		* Handle lxml Cleaner exceptions (a workaround for
		https://bugs.launchpad.net/lxml/+bug/1838497 );
		* Python 3.8 support;
		* testing improvements.

		0.5.1 (2019-05-27)
		@@ -231,1 +239,2 @@ ------------------
		Classifier: Programming Language :: Python :: 3.7
		Classifier: Programming Language :: Python :: 3.8

+1

-1

html_text/__init__.py

		# -- coding: utf-8 --
		__version__ = '0.5.1'
		__version__ = '0.5.2'

		@@ -4,0 +4,0 @@ from .html_text import (etree_to_text, extract_text, selector_to_text,

+8

-1

html_text/html_text.py

		@@ -42,5 +42,12 @@ # -- coding: utf-8 --
		tree = parse_html(html)
		return cleaner.clean_html(tree)

		# we need this as https://bugs.launchpad.net/lxml/+bug/1838497
		try:
		cleaned = cleaner.clean_html(tree)
		except AssertionError:
		cleaned = tree

		return cleaned


		def parse_html(html):
		@@ -47,0 +54,0 @@ """ Create an lxml.html.HtmlElement from a string with html.

+10

-1

PKG-INFO

		Metadata-Version: 1.1
		Name: html_text
		Version: 0.5.1
		Version: 0.5.2
		Summary: Extract text from HTML
		@@ -146,2 +146,10 @@ Home-page: https://github.com/TeamHG-Memex/html-text

		0.5.2 (2020-07-22)
		------------------

		* Handle lxml Cleaner exceptions (a workaround for
		https://bugs.launchpad.net/lxml/+bug/1838497 );
		* Python 3.8 support;
		* testing improvements.

		0.5.1 (2019-05-27)
		@@ -231,1 +239,2 @@ ------------------
		Classifier: Programming Language :: Python :: 3.7
		Classifier: Programming Language :: Python :: 3.8

+1

-1

setup.cfg

+2

-1

setup.py

		@@ -15,3 +15,3 @@ #!/usr/bin/env python
		name='html_text',
		version='0.5.1',
		version='0.5.2',
		description="Extract text from HTML",
		@@ -38,2 +38,3 @@ long_description=readme + '\n\n' + history,
		'Programming Language :: Python :: 3.7',
		'Programming Language :: Python :: 3.8',
		],
		@@ -40,0 +41,0 @@ test_suite='tests',

+7

-0

tests/test_html_text.py

		@@ -76,2 +76,9 @@ # -- coding: utf-8 --

		def test_extract_text_from_fail_html():
		html = "<html><frameset><frame></frameset></html>"
		tree = parse_html(html)
		node = tree.xpath('/html/frameset')[0]
		assert extract_text(node) == u''


		def test_punct_whitespace():
		@@ -78,0 +85,0 @@ html = u'<div><span>field</span>, and more</div>'

html-text - pypi Package Compare versions

Improved metrics