Metadata-Version: 1.0
		Name: html-wrapper
		Version: 0.3.1
		Version: 0.3.2
		Summary: HTML parser with an lxml backend. Implements a subset of BeautifulSoup API and is an order of magnitude faster
		@@ -5,0 +5,0 @@ Home-page: https://github.com/thismachinechills/html_wrapper

+17

-16

html_wrapper/wrapper.py

		from typing import Union, Dict, AnyStr, Any, Optional, \
		Iterable, Tuple
		Iterable, Tuple, List
		from functools import lru_cache
		@@ -10,11 +10,12 @@ from abc import ABC

		BS4_TYPES = "Tag", "BeautifulSoup"
		STR_ENCODING = 'unicode'
		BS4_TYPES: Tuple[str] = "Tag", "BeautifulSoup"
		STR_ENCODING: str = 'unicode'

		NO_ATTRS: Dict[str, str] = {}
		NO_TEXT = ''
		NO_TEXT: str = ''
		SKIP_COMMA: int = -len(', ')
		COLLECTIONS: Tuple[type] = set, list, tuple


		Attrs = Union[str, Dict]
		Attrs = Union[str, Dict[str, str]]
		CssClassType = str
		@@ -215,3 +216,3 @@
		def get_xpath_str(tag: str, class_: CssClassType = None, **kwargs) -> str:
		tag_xp = f'.//{tag}'
		tags: List[str] = [f'.//{tag}']

		@@ -222,3 +223,3 @@ if class_:
		for attr, val in kwargs.items():
		tag_xp += '['
		tags.append('[')
		attr_xp = f'@{attr}'
		@@ -228,23 +229,23 @@
		if val:
		tag_xp += attr_xp
		tags.append(attr_xp)

		else:
		tag_xp += f'not({attr_xp})'
		tags.append(f'not({attr_xp})')

		elif isinstance(val, (set, list, tuple)):
		elif isinstance(val, COLLECTIONS):
		for item in val:
		val_xp = f'"{item}", '

		val_xp = val_xp[:SKIP_COMMA] if val else ''
		tag_xp += f'contains({attr_xp}, {val_xp})'
		val_xp = val_xp[:SKIP_COMMA] if val else NO_TEXT
		tags.append(f'contains({attr_xp}, {val_xp})')

		elif isinstance(val, str):
		tag_xp += f'contains({attr_xp}, "{val}")'
		tags.append(f'contains({attr_xp}, "{val}")')

		else:
		tag_xp += "{attr_xp}='{val}'"
		tags.append("{attr_xp}='{val}'")

		tag_xp += ']'
		tags.append(']')

		return tag_xp
		return ''.join(tags)

		@@ -251,0 +252,0 @@

+1

-1

PKG-INFO

		Metadata-Version: 1.0
		Name: html_wrapper
		Version: 0.3.1
		Version: 0.3.2
		Summary: HTML parser with an lxml backend. Implements a subset of BeautifulSoup API and is an order of magnitude faster
		@@ -5,0 +5,0 @@ Home-page: https://github.com/thismachinechills/html_wrapper

+13

-10

README.md

		# html_wrapper

		html_wrapper implements a small subset of the BeautifulSoup API that I use. It's anywhere from 10x-100x faster than bs4.
		`html_wrapper` implements a small subset of the `BeautifulSoup4` API. It can be anywhere from 10x-100x faster than `bs4` for some use cases.


		## Installation
		`pip3 install html_wrapper`
		`python3 -m pip install html_wrapper`


		## Example
		Faster to instantiate and parse HTML. Suits my needs.
		It's faster to instantiate and parse HTML. Suits my needs.

		```
		In [1]: import bs4
		```python3
		In [1]: from html_wrapper import HtmlWrapper

		In [2]: import html_wrapper
		In [2]: from bs4 import BeautifulSoup

		In [3]: %timeit html_wrapper.HtmlWrapper("<html><body><p>hi</p></body></html>").text
		10000 loops, best of 3: 20.4 µs per loop
		In [3]: from requests import get

		In [4]: %timeit bs4.BeautifulSoup("<html><body><p>hi</p></body></html>", "lxml").text
		1000 loops, best of 3: 232 µs per loop
		In [4]: html: bytes = get("https://en.wikipedia.org/wiki/HTML").content

		In [5]: %timeit HtmlWrapper(html).text
		23.4 ms ± 563 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)

		In [6]: %timeit BeautifulSoup(html).text
		190 ms ± 29.3 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
		```
		@@ -25,0 +28,0 @@

+1

-1

setup.py

		@@ -5,3 +5,3 @@ from setuptools import setup
		setup(name="html_wrapper",
		version="0.3.1",
		version="0.3.2",
		description="HTML parser with an lxml backend. Implements a subset of BeautifulSoup API and is an order of magnitude faster",
		@@ -8,0 +8,0 @@ url="https://github.com/thismachinechills/html_wrapper",

html-wrapper - pypi Package Compare versions

Improved metrics