Latest Threat Research:SANDWORM_MODE: Shai-Hulud-Style npm Worm Hijacks CI Workflows and Poisons AI Toolchains.Details
Socket
Book a DemoSign in
Socket

html-text

Package Overview
Dependencies
Maintainers
3
Versions
15
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

html-text - pypi Package Compare versions

Comparing version
0.5.0
to
0.5.1
+6
-0
CHANGES.rst

@@ -5,2 +5,8 @@ =======

0.5.1 (2019-05-27)
------------------
Fixed whitespace handling when ``guess_punct_space`` is False: html-text was
producing unnecessary spaces after newlines.
0.5.0 (2018-11-19)

@@ -7,0 +13,0 @@ ------------------

+7
-1
Metadata-Version: 1.1
Name: html-text
Version: 0.5.0
Version: 0.5.1
Summary: Extract text from HTML

@@ -146,2 +146,8 @@ Home-page: https://github.com/TeamHG-Memex/html-text

0.5.1 (2019-05-27)
------------------
Fixed whitespace handling when ``guess_punct_space`` is False: html-text was
producing unnecessary spaces after newlines.
0.5.0 (2018-11-19)

@@ -148,0 +154,0 @@ ------------------

+1
-1
# -*- coding: utf-8 -*-
__version__ = '0.5.0'
__version__ = '0.5.1'

@@ -4,0 +4,0 @@ from .html_text import (etree_to_text, extract_text, selector_to_text,

@@ -94,2 +94,4 @@ # -*- coding: utf-8 -*-

return False
if not guess_punct_space:
return True
if not _has_trailing_whitespace(prev):

@@ -101,3 +103,3 @@ if _has_punct_after(text) or _has_open_bracket_before(prev):

def get_space_between(text, prev):
if not text or not guess_punct_space:
if not text:
return ' '

@@ -104,0 +106,0 @@ return ' ' if should_add_space(text, prev) else ''

Metadata-Version: 1.1
Name: html_text
Version: 0.5.0
Version: 0.5.1
Summary: Extract text from HTML

@@ -146,2 +146,8 @@ Home-page: https://github.com/TeamHG-Memex/html-text

0.5.1 (2019-05-27)
------------------
Fixed whitespace handling when ``guess_punct_space`` is False: html-text was
producing unnecessary spaces after newlines.
0.5.0 (2018-11-19)

@@ -148,0 +154,0 @@ ------------------

[bumpversion]
current_version = 0.5.0
current_version = 0.5.1
commit = True

@@ -4,0 +4,0 @@ tag = True

@@ -15,3 +15,3 @@ #!/usr/bin/env python

name='html_text',
version='0.5.0',
version='0.5.1',
description="Extract text from HTML",

@@ -18,0 +18,0 @@ long_description=readme + '\n\n' + history,

@@ -141,4 +141,4 @@ # -*- coding: utf-8 -*-

text = ('title\n\n text_1.\n\n text_2 text_3\n\n text_4\n text_5'
'\n\n text_6 text_7 text_8\n\n text_9\n\n ...text_10')
text = ('title\n\ntext_1.\n\ntext_2 text_3\n\ntext_4\ntext_5'
'\n\ntext_6 text_7 text_8\n\ntext_9\n\n...text_10')
assert extract_text(html, guess_punct_space=False, guess_layout=True) == text

@@ -155,2 +155,10 @@

def test_basic_newline():
html = u'<div>a</div><div>b</div>'
assert extract_text(html, guess_punct_space=False, guess_layout=False) == 'a b'
assert extract_text(html, guess_punct_space=False, guess_layout=True) == 'a\nb'
assert extract_text(html, guess_punct_space=True, guess_layout=False) == 'a b'
assert extract_text(html, guess_punct_space=True, guess_layout=True) == 'a\nb'
def test_adjust_newline():

@@ -157,0 +165,0 @@ html = u'<div>text 1</div><p><div>text 2</div></p>'