parserutils
This is a library of utility functions designed to make a developer's life easier.
The functions in this library are written to be both performant and Pythonic, as well as compatible with Python 3.6 through 3.9.
They are both documented and covered thoroughly by unit tests that fully describe and prove their behavior.
In general, my philosophy is that utility functions should be fast and handle edge cases so the caller doesn't have to take all kinds of precautions or do type checking on results.
Thus, in this library, if None will break a function it is simply returned as is; if there's nothing to do for a value, the result is returned without processing; otherwise, values are either processed successfully or a standard exception is returned.
But this is just a starting point. I welcome feedback and requests for additional functionality.
Installation
Install with pip install parserutils
.
Usage
Here's what you can do with dict
objects and other collections.
from parserutils import collections
collections.accumulate_items([('key', 'val1'), ('key', 'val2'), ('key', 'val3')])
collections.accumulate_items(
[('key1', 'val1'), ('key2', 'val2'), ('key3', 'val3')], reduce_each=True
)
collections.setdefaults({}, 'a.b')
collections.setdefaults({}, ['a.b', 'a.c'])
collections.setdefaults({}, {'a.b': 'bbb', 'a.c': 'ccc'})
collections.filter_empty(x for x in (None, [], ['a'], '', {'b'}, 'c'))
collections.flatten_items(x for x in ('abc', ['a', 'b', 'c'], ('d', 'e')))
collections.remove_duplicates('abcdefabc')
collections.remove_duplicates('abcdefabc', in_reverse=True)
collections.remove_duplicates(['a', 'b', 'c', 'a'])
collections.remove_duplicates(('a', 'b', 'c', 'a'), in_reverse=True)
collections.remove_duplicates(x for x in 'abca')
collections.remove_duplicates((x for x in 'abca'), in_reverse=True)
collections.remove_duplicates((set(x) for x in 'abca'), is_unhashable=True)
collections.rindex('aba', 'a')
collections.rindex(['a', 'b', 'a'], 'a')
collections.rindex(('a', 'b', 'a'), 'a')
collections.rindex('xyz', 'a')
collections.rindex([x for x in 'xyz'], 'a')
collections.rfind('aba', 'a')
collections.rfind(['a', 'b', 'a'], 'a')
collections.rfind(('a', 'b', 'a'), 'a')
collections.rindex('xyz', 'a')
collections.rfind([x for x in 'xyz'], 'a')
collections.reduce_value(['abc'])
collections.reduce_value(('abc',))
collections.reduce_value({'abc'})
collections.reduce_value('abc')
collections.reduce_value({'a': 'aaa'})
collections.reduce_value([{'a': 'aaa'}])
collections.reduce_value(['a', 'b', 'c'])
collections.wrap_value(['abc'])
collections.wrap_value(('abc',))
collections.wrap_value('abc')
collections.wrap_value(x for x in 'abc')
collections.wrap_value({'a': 'aaa'})
collections.wrap_value(['a', 'b', 'c'])
Here's a little bit about dates and numbers.
from parserutils import dates
from parserutils import numbers
dates.parse_dates(None, default='today')
dates.parse_dates(None, default=None)
dates.parse_dates('nope', default=None)
dates.parse_dates(0)
dates.parse_dates('<date_format>')
numbers.is_number(0)
numbers.is_number(1.1)
numbers.is_number('2.2')
numbers.is_number(False)
numbers.is_number(False, if_bool=True)
numbers.is_number(float('inf'))
numbers.is_number(float('nan'))
Here's something about string and URL parsing helpers.
from parserutils import strings
from parserutils import urls
strings.camel_to_constant('toConstant')
strings.camel_to_constant('XMLConstant')
strings.camel_to_constant('withNumbers1And2')
strings.camel_to_snake('toSnake')
strings.camel_to_snake('withXMLAbbreviation')
strings.camel_to_snake('withNumbers3And4')
strings.snake_to_camel('from_snake')
strings.snake_to_camel('_leading_and_trailing_')
strings.snake_to_camel('extra___underscores')
strings.find_all('ab??ca??bc??', '??')
strings.find_all('ab??ca??bc??', '??', reverse=True)
strings.find_all('ab??ca??bc??', '??', limit=2, reverse=True)
strings.find_all('ab??ca??bc??', '??', start=4)
strings.find_all('ab??ca??bc??', '??', end=8)
strings.find_all('ab??ca??bc??', '??', start=4, end=8)
strings.splitany('ab:ca:bc', ',')
strings.splitany('ab:ca:bc', ',', 1)
strings.splitany('ab|ca:bc', '|:')
strings.splitany('ab|ca:bc', ':|', 1)
strings.splitany('0<=3<5', ['<', '<='])
strings.splitany('0<=3<5', ['<', '<='], 1)
strings.to_ascii_equivalent('smart quotes, etc.')
urls.get_base_url('http://www.params.com?a=aaa')
urls.get_base_url('http://www.path.com/test')
urls.get_base_url('http://www.path.com/test', include_path=True)
urls.get_base_url('http://www.params.com/test?a=aaa', True)
urls.update_url_params('http://www.params.com?a=aaa', a='aaa')
urls.update_url_params('http://www.params.com?a=aaa', a='xxx')
urls.update_url_params('http://www.params.com', b='bbb')
urls.update_url_params('http://www.params.com', c=['c', 'cc'])
urls.url_to_parts('http://www.params.com/test/path?a=aaa')
urls.parts_to_url(
{'netloc': 'www.params.com', 'query': {'a': 'aaa'}
)
urls.parts_to_url(
urls.url_to_parts('http://www.params.com/test/path?a=aaa')
)
Finally, XML parsing is also supported, using the cElementTree and defusedxml libraries for performance and security
from parserutils import elements
xml_string = '<root><parent><child>one</child><child>two</child><uglyChild>yuck</uglyChild></parent></root>'
xml_element = elements.get_element(xml_string)
elements.set_element_text(xml_element, 'parent/child', 'child text')
elements.set_element_attributes(xml_element, childHas='child attribute')
elements.remove_element(xml_element, 'parent/uglyChild')
elements.element_to_string(xml_element)
converted = elements.element_to_dict(xml_string, recurse=True)
reverted = elements.dict_to_element(converted)
reverted = elements.get_element(converted)
xml_string == elements.element_to_string(converted)
root, obj = elements.element_to_object(converted)
obj == {'root': {'parent': {'child': ['one', 'two'], 'uglyChild': 'yuck'}}}
with open('/path/to/file.xml', 'wb') as xml:
xml_from_file = elements.get_element(xml)
elements.write_element(xml_from_file, '/path/to/updated/file.xml')
xml_from_web = elements.get_remote_element('http://en.wikipedia.org/wiki/XML')
elements.write_element(xml_from_web, '/path/to/new/file.xml')
xml_from_path = elements.get_remote_element('/path/to/file.xml')
elements.element_to_string(xml_from_path)