html2object
Advanced tools
| import unittest | ||
| from html2object.html2object import * | ||
| class TestHtml2Object(unittest.TestCase): | ||
| def test_get_element(self): | ||
| html = "<div class='container'><h1>Title</h1></div>" | ||
| element = get_element(html, name="h1") | ||
| self.assertEqual(element, "<h1>") | ||
| def test_remove_element(self): | ||
| html = "<div class='container'><h1>Title</h1></div>" | ||
| new_html = remove_element(html, name="h1") | ||
| self.assertEqual(new_html, "<div class='container'></div>") | ||
| def test_get_name(self): | ||
| html = "<div class='container'><h1>Title</h1></div>" | ||
| name = get_name(html) | ||
| self.assertEqual(name, "div") | ||
| def test_get_attributes(self): | ||
| html = "<div id='main' class='container'><h1>Title</h1></div>" | ||
| attributes = get_attributes(html) | ||
| expected_attributes = {"id": "'main'", "class": "'container'"} | ||
| self.assertEqual(attributes, expected_attributes) | ||
| def test_get_child(self): | ||
| html = "<div><span>Child</span></div>" | ||
| child = get_child(html, name="div") | ||
| self.assertEqual(child, "<span>Child</span>") | ||
| if __name__ == "__main__": | ||
| unittest.main() |
| Metadata-Version: 2.1 | ||
| Name: html2object | ||
| Version: 0.1.2 | ||
| Version: 0.1.3 | ||
| Summary: Tools to handle the CRUD of .html files as objects. | ||
@@ -5,0 +5,0 @@ Home-page: https://github.com/boterop/html2object |
@@ -12,2 +12,3 @@ LICENSE | ||
| tests/__init__.py | ||
| tests/test_html2object.py | ||
| tests/test_html_element.py |
@@ -23,6 +23,3 @@ import uuid | ||
| self.name = name | ||
| if id: | ||
| self.attributes = {"id": id, **attributes} | ||
| else: | ||
| self.attributes = attributes | ||
| self.attributes = {"id": id, **attributes} if id else attributes | ||
| self.children = children | ||
@@ -59,5 +56,3 @@ | ||
| result = None | ||
| if self.parent: | ||
| result = self.parent.find_element_by_id(id, pile) | ||
| result = self.parent.find_element_by_id(id, pile) if self.parent else None | ||
| if self.children and not result: | ||
@@ -74,8 +69,5 @@ for child in self.children: | ||
| children_html = "" | ||
| children = self.children if self.children else [] | ||
| children = self.children or [] | ||
| for child in children: | ||
| if type(child) is str: | ||
| children_html += child | ||
| else: | ||
| children_html += str(child) | ||
| children_html += child if type(child) is str else str(child) | ||
| children_html += "\n" | ||
@@ -107,5 +99,4 @@ children_html = children_html.strip() | ||
| children_html = html_u.get_child(html, name=self.name) | ||
| if children_html == None: | ||
| self.children = None | ||
| else: | ||
| self.children = None | ||
| if children_html is not None: | ||
| self.children = [] | ||
@@ -115,3 +106,3 @@ self._add_children(children_html) | ||
| def _add_children(self, html: str): | ||
| if html == "": | ||
| if not html: | ||
| return | ||
@@ -118,0 +109,0 @@ try: |
@@ -26,11 +26,4 @@ import re | ||
| def get_id(html: str) -> str: | ||
| id = re.search(r'id=[\'"]([^\'"]+)[\'"]', html) | ||
| if not id: | ||
| return "" | ||
| return id.group() | ||
| def get_attributes(html: str) -> dict: | ||
| attribs = re.search(r"<([a-zA-Z][a-zA-Z0-9]*)\b([^>]*)>", html).group(2).strip() | ||
| attribs = re.search(r"<([a-zA-Z][a-zA-Z0-9]*)\b([^>]*)>", html)[2].strip() | ||
| attribs_list = attribs.split(" ") | ||
@@ -41,3 +34,3 @@ attribs_list = _fix_attrs(attribs_list) | ||
| for attr in attribs_list: | ||
| if attr != "" and attr != "/": | ||
| if attr not in ["", "/"]: | ||
| attr_div = attr.strip().split("=", 1) | ||
@@ -47,3 +40,3 @@ key = attr_div[0] | ||
| key = last_key | ||
| value = attributes[key] + " " + attr_div[0] | ||
| value = f"{attributes[key]} {attr_div[0]}" | ||
| else: | ||
@@ -74,7 +67,7 @@ value = attr_div[1] | ||
| html = re.sub(rf"<\/{name}>", "<remove>", html, 1) | ||
| (_, start_index) = re.search(rf"<{name}\b[^>]*>", html).span() | ||
| (end_index, _) = re.search(rf"<\/{name}>", html).span() | ||
| html = re.sub("<remove>", f"</{name}>", html) | ||
| end_index = end_index - ((5 - len(name)) * count) | ||
| child = html[:end_index].replace(element, "").strip() | ||
| return child | ||
| return html[start_index:end_index].strip() | ||
@@ -89,3 +82,3 @@ | ||
| if mix: | ||
| attr = mix + " " + attr | ||
| attr = f"{mix} {attr}" | ||
| mix = None | ||
@@ -92,0 +85,0 @@ if attr.strip()[-1] in no_splitable: |
+1
-1
| Metadata-Version: 2.1 | ||
| Name: html2object | ||
| Version: 0.1.2 | ||
| Version: 0.1.3 | ||
| Summary: Tools to handle the CRUD of .html files as objects. | ||
@@ -5,0 +5,0 @@ Home-page: https://github.com/boterop/html2object |
+1
-1
@@ -5,3 +5,3 @@ from setuptools import setup, find_packages | ||
| name="html2object", | ||
| version="0.1.2", | ||
| version="0.1.3", | ||
| author="boterop", | ||
@@ -8,0 +8,0 @@ author_email="boterop22@gmail.com", |
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
20386
4.6%15
7.14%289
4.33%