html-text
Advanced tools
+6
-0
@@ -5,2 +5,8 @@ ======= | ||
| 0.6.1 (2024-04-23) | ||
| ------------------ | ||
| * Fixed HTML comment and processing instruction handling. | ||
| * Use ``lxml-html-clean`` instead of ``lxml[html_clean]`` in setup.py, | ||
| to avoid https://github.com/jazzband/pip-tools/issues/2004 | ||
| 0.6.0 (2024-04-04) | ||
@@ -7,0 +13,0 @@ ------------------ |
| Metadata-Version: 2.1 | ||
| Name: html_text | ||
| Version: 0.6.0 | ||
| Version: 0.6.1 | ||
| Summary: Extract text from HTML | ||
@@ -20,3 +20,4 @@ Home-page: https://github.com/zytedata/html-text | ||
| License-File: LICENSE | ||
| Requires-Dist: lxml[html_clean] | ||
| Requires-Dist: lxml | ||
| Requires-Dist: lxml-html-clean | ||
@@ -164,2 +165,8 @@ ============ | ||
| 0.6.1 (2024-04-23) | ||
| ------------------ | ||
| * Fixed HTML comment and processing instruction handling. | ||
| * Use ``lxml-html-clean`` instead of ``lxml[html_clean]`` in setup.py, | ||
| to avoid https://github.com/jazzband/pip-tools/issues/2004 | ||
| 0.6.0 (2024-04-04) | ||
@@ -166,0 +173,0 @@ ------------------ |
@@ -1,1 +0,2 @@ | ||
| lxml[html_clean] | ||
| lxml | ||
| lxml-html-clean |
| # -*- coding: utf-8 -*- | ||
| __version__ = '0.6.0' | ||
| __version__ = '0.6.1' | ||
@@ -4,0 +4,0 @@ from .html_text import (etree_to_text, extract_text, selector_to_text, |
@@ -221,2 +221,8 @@ # -*- coding: utf-8 -*- | ||
| return '' | ||
| no_content_nodes = ( | ||
| lxml.html.HtmlComment, | ||
| lxml.html.HtmlProcessingInstruction | ||
| ) | ||
| if isinstance(html, no_content_nodes): | ||
| return '' | ||
| cleaned = _cleaned_html_tree(html) | ||
@@ -223,0 +229,0 @@ return etree_to_text( |
+9
-2
| Metadata-Version: 2.1 | ||
| Name: html_text | ||
| Version: 0.6.0 | ||
| Version: 0.6.1 | ||
| Summary: Extract text from HTML | ||
@@ -20,3 +20,4 @@ Home-page: https://github.com/zytedata/html-text | ||
| License-File: LICENSE | ||
| Requires-Dist: lxml[html_clean] | ||
| Requires-Dist: lxml | ||
| Requires-Dist: lxml-html-clean | ||
@@ -164,2 +165,8 @@ ============ | ||
| 0.6.1 (2024-04-23) | ||
| ------------------ | ||
| * Fixed HTML comment and processing instruction handling. | ||
| * Use ``lxml-html-clean`` instead of ``lxml[html_clean]`` in setup.py, | ||
| to avoid https://github.com/jazzband/pip-tools/issues/2004 | ||
| 0.6.0 (2024-04-04) | ||
@@ -166,0 +173,0 @@ ------------------ |
+1
-1
| [bumpversion] | ||
| current_version = 0.6.0 | ||
| current_version = 0.6.1 | ||
| commit = True | ||
@@ -4,0 +4,0 @@ tag = True |
+5
-2
@@ -15,3 +15,3 @@ #!/usr/bin/env python | ||
| name='html_text', | ||
| version='0.6.0', | ||
| version='0.6.1', | ||
| description="Extract text from HTML", | ||
@@ -24,3 +24,6 @@ long_description=readme + '\n\n' + history, | ||
| include_package_data=True, | ||
| install_requires=['lxml[html_clean]'], | ||
| install_requires=[ | ||
| 'lxml', | ||
| 'lxml-html-clean', | ||
| ], | ||
| license="MIT license", | ||
@@ -27,0 +30,0 @@ zip_safe=False, |
@@ -5,2 +5,3 @@ # -*- coding: utf-8 -*- | ||
| import lxml.html | ||
| import pytest | ||
@@ -56,2 +57,16 @@ | ||
| def test_comment_fragment(all_options): | ||
| node = lxml.html.fragment_fromstring("<!-- hello world -->") | ||
| assert extract_text(node, **all_options) == '' | ||
| def test_processing_instruction(all_options): | ||
| assert extract_text('<?dbfo label-width="width"?>', **all_options) == '' | ||
| def test_processing_instruction_fragment(all_options): | ||
| node = lxml.html.fragment_fromstring('<?dbfo label-width="width"?>') | ||
| assert extract_text(node, **all_options) == '' | ||
| def test_extract_text_from_tree(all_options): | ||
@@ -58,0 +73,0 @@ html = (u'<html><style>.div {}</style>' |
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
201730
0.7%394
4.79%