Latest Threat Research:SANDWORM_MODE: Shai-Hulud-Style npm Worm Hijacks CI Workflows and Poisons AI Toolchains.Details
Socket
Book a DemoSign in
Socket

mf2util

Package Overview
Dependencies
Maintainers
1
Versions
34
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

mf2util - pypi Package Compare versions

Comparing version
0.3.3
to
0.4.0
+1
-1
mf2util.egg-info/PKG-INFO
Metadata-Version: 1.1
Name: mf2util
Version: 0.3.3
Version: 0.4.0
Summary: Python Microformats2 utilities, a companion to mf2py

@@ -5,0 +5,0 @@ Home-page: http://indiewebcamp.com/mf2util

+129
-46

@@ -215,3 +215,3 @@ """Utilities for interpreting mf2 data.

def find_author(parsed, source_url=None, hentry=None):
def find_author(parsed, source_url=None, hentry=None, fetch_mf2_func=None):
"""Use the authorship discovery algorithm

@@ -223,4 +223,19 @@ https://indiewebcamp.com/authorship to determine an h-entry's

:param str source_url: the source of the parsed document.
:param hentry dict: optional, the h-entry we're examining, if omitted,
we'll just use the first one
:param fetch_mf2_func callable: optional function that takes a URL
and returns parsed mf2
:return: a dict containing the author's name, photo, and url
"""
def find_hentry_author(hentry):
for obj in hentry['properties'].get('author', []):
return parse_author(obj)
def find_parent_hfeed_author(hentry):
for hfeed in _find_all_entries(parsed, ['h-feed']):
# find the h-entry's parent h-feed
if hentry in hfeed.get('children', []):
for obj in hfeed['properties'].get('author', []):
return parse_author(obj)
if not hentry:

@@ -231,37 +246,71 @@ hentry = find_first_entry(parsed, ['h-entry'])

for obj in hentry['properties'].get('author', []):
return parse_author(obj)
author_page = None
# try to find an author of the top-level h-feed
for hfeed in (card for card in parsed['items']
if 'h-feed' in card.get('type', [])):
for obj in hfeed['properties'].get('author', []):
return parse_author(obj)
# 3. if the h-entry has an author property, use that
author = find_hentry_author(hentry)
# top-level h-cards
hcards = [card for card in parsed['items']
if 'h-card' in card.get('type', [])]
# 4. otherwise if the h-entry has a parent h-feed with author property,
# use that
if not author:
author = find_parent_hfeed_author(hentry)
if source_url:
for item in hcards:
if source_url in item['properties'].get('url', []):
return parse_author(item)
# 5. if an author property was found
if author:
# 5.2 otherwise if author property is an http(s) URL, let the
# author-page have that URL
if list(author.keys()) == ['url']:
author_page = author['url']
# 5.1 if it has an h-card, use it, exit.
# 5.3 otherwise use the author property as the author name,
# exit.
else:
return author
rel_mes = parsed.get('rels', {}).get("me", [])
for item in hcards:
urls = item['properties'].get('url', [])
if any(url in rel_mes for url in urls):
return parse_author(item)
# 6. if there is no author-page and the h-entry's page is a permalink page
if not author_page:
# 6.1 if the page has a rel-author link, let the author-page's
# URL be the href of the rel-author link
rel_authors = parsed.get('rels', {}).get('author', [])
if rel_authors:
author_page = rel_authors[0]
rel_authors = parsed.get('rels', {}).get("author", [])
for item in hcards:
urls = item['properties'].get('url', [])
if any(url in rel_authors for url in urls):
return parse_author(item)
# 7. if there is an author-page URL
if author_page:
if not fetch_mf2_func:
return {'url': author_page}
# just return the first h-card
if hcards:
return parse_author(hcards[0])
# 7.1 get the author-page from that URL and parse it for microformats2
parsed = fetch_mf2_func(author_page)
hcards = find_all_entries(parsed, ['h-card'])
# 7.2 if author-page has 1+ h-card with url == uid ==
# author-page's URL, then use first such h-card, exit.
for hcard in hcards:
hcard_url = get_plain_text(hcard['properties'].get('url'))
hcard_uid = get_plain_text(hcard['properties'].get('uid'))
if (hcard_url and hcard_uid and hcard_url == hcard_uid
and hcard_url == author_page):
return parse_author(hcard)
# 7.3 else if author-page has 1+ h-card with url property
# which matches the href of a rel-me link on the author-page
# (perhaps the same hyperlink element as the u-url, though not
# required to be), use first such h-card, exit.
rel_mes = parsed.get('rels', {}).get('me', [])
for hcard in hcards:
hcard_url = get_plain_text(hcard['properties'].get('url'))
if hcard_url and hcard_url in rel_mes:
return parse_author(hcard)
# 7.4 if the h-entry's page has 1+ h-card with url ==
# author-page URL, use first such h-card, exit.
for hcard in hcards:
hcard_url = get_plain_text(hcard['properties'].get('url'))
if hcard_url and hcard_url == author_page:
return parse_author(hcard)
# 8. otherwise no deterministic author can be found.
return None
def representative_hcard(parsed, source_url):

@@ -465,3 +514,5 @@ """Find the representative h-card for a URL

def _interpret_common_properties(parsed, source_url, base_href, hentry, use_rel_syndication, want_json):
def _interpret_common_properties(
parsed, source_url, base_href, hentry, use_rel_syndication,
want_json, fetch_mf2_func):
result = {}

@@ -487,3 +538,3 @@ for prop in ('url', 'uid', 'photo'):

author = find_author(parsed, source_url, hentry)
author = find_author(parsed, source_url, hentry, fetch_mf2_func)
if author:

@@ -504,2 +555,9 @@ result['author'] = author

summary_prop = hentry['properties'].get('summary')
if summary_prop:
if isinstance(summary_prop[0], dict):
result['summary'] = summary_prop[0]['value']
else:
result['summary'] = summary_prop[0]
# TODO handle h-adr and h-geo variants

@@ -527,3 +585,5 @@ locations = hentry['properties'].get('location')

def interpret_event(parsed, source_url, base_href=None, hevent=None, use_rel_syndication=True, want_json=False):
def interpret_event(
parsed, source_url, base_href=None, hevent=None,
use_rel_syndication=True, want_json=False, fetch_mf2_func=None):
"""Given a document containing an h-event, return a dictionary::

@@ -552,2 +612,4 @@

will be pure json with datetimes as strings instead of python objects
:param callable fetch_mf2_func: (optional) function to fetch mf2 parsed
output for a given URL.
:return: a dict with some or all of the described properties

@@ -562,3 +624,4 @@ """

result = _interpret_common_properties(
parsed, source_url, base_href, hevent, use_rel_syndication, want_json)
parsed, source_url, base_href, hevent, use_rel_syndication, want_json,
fetch_mf2_func)
result['type'] = 'event'

@@ -571,3 +634,5 @@ name_value = get_plain_text(hevent['properties'].get('name'))

def interpret_entry(parsed, source_url, base_href=None, hentry=None, use_rel_syndication=True, want_json=False):
def interpret_entry(
parsed, source_url, base_href=None, hentry=None,
use_rel_syndication=True, want_json=False, fetch_mf2_func=None):
"""Given a document containing an h-entry, return a dictionary::

@@ -609,2 +674,4 @@

will be pure json with datetimes as strings instead of python objects
:param callable fetch_mf2_func: (optional) function to fetch mf2 parsed
output for a given URL.
:return: a dict with some or all of the described properties

@@ -620,3 +687,4 @@ """

result = _interpret_common_properties(
parsed, source_url, base_href, hentry, use_rel_syndication, want_json)
parsed, source_url, base_href, hentry, use_rel_syndication, want_json,
fetch_mf2_func)
if 'h-cite' in hentry.get('type', []):

@@ -636,3 +704,4 @@ result['type'] = 'cite'

result.setdefault(prop, []).append(
interpret(parsed, source_url, base_href, url_val, want_json))
interpret(parsed, source_url, base_href, url_val,
want_json, fetch_mf2_func))
else:

@@ -646,3 +715,4 @@ result.setdefault(prop, []).append({

def interpret_feed(parsed, source_url, base_href=None, hfeed=None):
def interpret_feed(parsed, source_url, base_href=None, hfeed=None,
fetch_mf2_func=None):
"""Interpret a source page as an h-feed or as an top-level collection

@@ -655,4 +725,6 @@ of h-entries.

:param str base_href: (optional) the href value of the base tag
:param dict item: (optional) the item to be parsed. If provided,
this will be used instead of the first element on the page.
:param dict hfedd: (optional) the h-feed to be parsed. If provided,
this will be used instead of the first h-feed on the page.
:param callable fetch_mf2_func: (optional) function to fetch mf2 parsed
output for a given URL.
:return: a dict containing 'entries', a list of entries, and possibly other

@@ -679,3 +751,3 @@ feed properties (like 'name').

parsed, source_url, base_href, item=child,
use_rel_syndication=False)
use_rel_syndication=False, fetch_mf2_func=fetch_mf2_func)
if entry:

@@ -688,3 +760,3 @@ entries.append(entry)

def interpret(parsed, source_url, base_href=None, item=None,
use_rel_syndication=True, want_json=False):
use_rel_syndication=True, want_json=False, fetch_mf2_func=None):
"""Interpret a permalink of unknown type. Finds the first interesting

@@ -706,2 +778,4 @@ h-* element, and delegates to :func:`interpret_entry` if it is an

will be pure json with datetimes as strings instead of python objects
:param callable fetch_mf2_func: (optional) function to fetch mf2 parsed
output for a given URL.
:return: a dict as described by interpret_entry or interpret_event, or None

@@ -713,11 +787,17 @@ """

if item:
if 'h-event' in item.get('type', []):
types = item.get('type', [])
if 'h-event' in types:
return interpret_event(
parsed, source_url, base_href=base_href, hevent=item, use_rel_syndication=use_rel_syndication, want_json=want_json)
elif 'h-entry' in item.get('type', []) or 'h-cite' in item.get('type', []):
parsed, source_url, base_href=base_href, hevent=item,
use_rel_syndication=use_rel_syndication, want_json=want_json,
fetch_mf2_func=fetch_mf2_func)
elif 'h-entry' in types or 'h-cite' in types:
return interpret_entry(
parsed, source_url, base_href=base_href, hentry=item, use_rel_syndication=use_rel_syndication, want_json=want_json)
parsed, source_url, base_href=base_href, hentry=item,
use_rel_syndication=use_rel_syndication, want_json=want_json,
fetch_mf2_func=fetch_mf2_func)
def interpret_comment(parsed, source_url, target_urls, base_href=None, want_json=False):
def interpret_comment(parsed, source_url, target_urls, base_href=None,
want_json=False, fetch_mf2_func=None):
"""Interpret received webmentions, and classify as like, reply, or

@@ -741,2 +821,4 @@ repost (or a combination thereof). Returns a dict as described

will be pure json with datetimes as strings instead of python objects
:param callable fetch_mf2_func: (optional) function to fetch mf2 parsed
output for a given URL.
:return: a dict as described above, or None

@@ -747,3 +829,4 @@ """

result = interpret_entry(parsed, source_url, base_href=base_href,
hentry=item, want_json=want_json)
hentry=item, want_json=want_json,
fetch_mf2_func=fetch_mf2_func)
if result:

@@ -750,0 +833,0 @@ result['comment_type'] = classify_comment(parsed, target_urls)

Metadata-Version: 1.1
Name: mf2util
Version: 0.3.3
Version: 0.4.0
Summary: Python Microformats2 utilities, a companion to mf2py

@@ -5,0 +5,0 @@ Home-page: http://indiewebcamp.com/mf2util

@@ -27,3 +27,3 @@ #!/usr/bin/env python

setup(name='mf2util',
version='0.3.3',
version='0.4.0',
description='Python Microformats2 utilities, a companion to mf2py',

@@ -55,3 +55,3 @@ long_description="""

py_modules=['mf2util'],
tests_require=['pytest'],
tests_require=['pytest', 'mf2py'],
cmdclass={'test': PyTest},

@@ -58,0 +58,0 @@ classifiers=[