
Security News
npm Adopts OIDC for Trusted Publishing in CI/CD Workflows
npm now supports Trusted Publishing with OIDC, enabling secure package publishing directly from CI/CD workflows without relying on long-lived tokens.
smart and convenient dict flatten library for complex container nested with dict and list
# extracting from json loads data..
sample_from_json = json.loads('sample.json')
count = sample_from_json['count']
data_list = sample_from_json.get('data_list')
for cityinfo in data_list:
for key, value in cityinfo.items():
if key == 'city':
city_name = value['names']['en']
if key == 'subdivisions':
subdiv = []
for subsubdivision in value:
sv = subsubdivision['names']['en']
subdiv.append(sv)
...
...
...
...
...
...
...
OMG...
...
....
pip install diselect
from diselect import diselect
# example
# list of dict in dict in list in dict in list of dict in....
sample_from_json = {
'count': 1,
'date': '2022-5-31',
'data_list': [
{
"city": {
"names": {
"en": "Songpa-gu"
}
},
"continent": {
"code": "AS",
"names": {"pt-BR": "Ásia", "de": "Asien", "en": "Asia",}
},
"country": {
"iso_code": "KR",
"names": {
"de": "Südkorea",
"en": "South Korea",
}
},
"location": {"latitude": 37.5013, "longitude": 127.1188, "time_zone": "Asia/Seoul"},
# multiple childerns of list
"subdivisions": [
{
"iso_code": "11",
"names": {"zh-CN": "首尔特别市", "en": "Seoul", "ja": "ソウル特別市"}
},
{
"iso_code": "12",
"names": {"en": "Hangang"}
}
],
},
{
"city": {
"names": {
"en": "Songpa-gu2"
}
},
"continent": {
"code": "AS2",
"names": {"pt-BR": "Ásia2", "de": "Asien", "en": "Asia2",}
},
"country": {
"iso_code": "KR2",
"names": {
"de": "Südkorea2",
"en": "South Korea2",
}
},
"location": {"latitude": 37.5013, "longitude": 127.1188, "time_zone": "Asia/Seoul2"},
# multiple childerns of list
"subdivisions": [
{
"iso_code": "112",
"names": {"zh-CN": "首尔特别市", "en": "Seoul2", "ja": "ソウル特別市"}
},
{
"iso_code": "122",
"names": {"en": "Hangang2"}
}
],
},
]
}
# Useage 1) Specify only the column name
# When taking the highest values in container without the risk of duplication
query_only_key = ['count', 'date'] # key name to column
for r in diselect(sample_from_json, query_only_key):
print(r)
# results {'count': 1, 'date': '2022-5-31'}
# Useage 2) Extract nested values
# parent paths tuple keys of target 'terminal' value
# If there are few parental generations, duplicate matching may occur.
# Exception when duplicate occurs
query_deep_path = [('city', 'names', 'en'), ('country', 'names', 'en')] # en is key of terminal value
for r in diselect(sample_from_json, query_deep_path):
print(r)
# results
# {('city', 'names', 'en'): 'Songpa-gu', ('country', 'names', 'en'): 'South Korea'}
# {('city', 'names', 'en'): 'Songpa-gu2', ('country', 'names', 'en'): 'South Korea2'}
# Useage 3) Aliasing query to column name
# Change the query to an usable column name
query_aliases = {
('city', 'names', 'en'): 'city_name',
('country', 'names', 'en'): 'country_name',
('subdivisions', 'names', 'en'): 'subdivision_name'
}
# or
query_aliases = [
{('city', 'names', 'en'): 'city_name'},
{('country', 'names', 'en'): 'country_name'},
{('subdivisions', 'names', 'en'): 'subdivision_names'}
]
for r in diselect(sample_from_json, query_aliases):
print(r)
# results:
# {'city_name': 'Songpa-gu', 'country_name': 'South Korea', 'subdivision_names': ['Seoul', 'Hangang']}
# {'city_name': 'Songpa-gu2', 'country_name': 'South Korea2', 'subdivision_names': ['Seoul2', 'Hangang2']}
# multiple children values of subdivision_names has coaleased to list ['Seoul', 'Hangang']
# Useage 4) join listed children values
# pass tuple value of aliase and function
query_aliases_and_join_children = {
('city', 'names', 'en'): 'city_name',
('country', 'names', 'en'): 'country_name',
('subdivisions', 'names', 'en'): ('subdivision_names', ','.join), # alias, join function
}
for r in diselect(sample_from_json, query_aliases_and_join_children):
print(r)
# results
# {'city_name': 'Songpa-gu', 'country_name': 'South Korea', 'subdivision_names': 'Seoul,Hangang'}
# {'city_name': 'Songpa-gu2', 'country_name': 'South Korea2', 'subdivision_names': 'Seoul2,Hangang2'}
# Soule, Hangang has joined with sep ','
query_aliases_and_join_children = {
('city', 'names', 'en'): 'city_name',
('country', 'names', 'en'): 'country_name',
('subdivisions', 'names', 'en'): [
'subdivision_names',
','.join, str.upper # alias, chaining function
]
}
for r in diselect(sample_from_json, query_aliases_and_join_children):
print(r)
# results
# {'city_name': 'Songpa-gu', 'country_name': 'South Korea', 'subdivision_names': 'SEOUL,HANGANG'}
# {'city_name': 'Songpa-gu2', 'country_name': 'South Korea2', 'subdivision_names': 'SEOUL2,HANGANG2'}
# Useage 5) merge muliple select
query = {
(('continent', 'names', 'en'), ('country', 'names', 'en'), ('city', 'names', 'en')):[
'address',
'/' # if str, be a shorcut of join function
],
(('latitude',), ('longitude',)): [
'coordinate'
]
}
for r in diselect(sample_from_json, query):
print(r)
# {'address': 'Asia/South Korea/Songpa-gu', 'coordinate': [37.5013, 127.1188]}
# {'address': 'Asia2/South Korea2/Songpa-gu2', 'coordinate': [37.5013, 127.1188]}
# appling functions to coordinate...
query = {
(('continent', 'names', 'en'), ('country', 'names', 'en'), ('city', 'names', 'en')):[ #tuple of multiple paths,
'address', '/'
],
(('latitude',), ('longitude',)): [
'coordinate',
str, # convert individual float type elements to str for join
','
]
}
for r in diselect(sample_from_json, query):
print(r)
# {'address': 'Asia/South Korea/Songpa-gu', 'coordinate': '37.5013,127.1188'}
# {'address': 'Asia2/South Korea2/Songpa-gu2', 'coordinate': '37.5013,127.1188'}
# 4) Summary
query = {
('city', 'names', 'en'): 'city_name',
('continent', 'code'): 'continent_code',
('continent', 'names', 'en'): 'continent_name',
('country', 'iso_code'): 'country_code',
('country', 'names', 'en'): 'country_name',
('location', 'time_zone'): 'timezone',
(('latitude',), ('longitude',)): [
'coordinate',
str, ','
],
('subdivisions', 'names', 'en'): [
'subdivision_name',
',', str.upper
]
}
for r in diselect(container=sample_from_json, query=query):
print(r)
# {'city_name': 'Songpa-gu', 'continent_code': 'AS', 'continent_name': 'Asia', 'country_code': 'KR', 'country_name': 'South Korea', 'timezone': 'Asia/Seoul', 'coordinate': '37.5013,127.1188', 'subdivision_name': 'SEOUL,HANGANG'}
# {'city_name': 'Songpa-gu2', 'continent_code': 'AS2', 'continent_name': 'Asia2', 'country_code': 'KR2', 'country_name': 'South Korea2', 'timezone': 'Asia/Seoul2', 'coordinate': '37.5013,127.1188', 'subdivision_name': 'SEOUL2,HANGANG2'}
> nested with dict and list complex data
query1 = {
key1, key2,
{(key3, key2): alias},
{(key4, key5): (alias2, apply)},
}
query2 = [
'column1', 'column2',
{
('path1', 'path2'): 'alias1',
('patt1', 'path2', 'path3'): ('alias2', dateutil.parser.parse),
},
'column4'
]
# date and count in the presented example data are single entities as top-level keys.
# 'count': 1,
# 'date': '2022-5-31',
# 'data_list': [ ...
# but data_list is multiple row value
# Querying data from both tendencies at the same time leads to unpredictable behavior.
greedy_query = [
# query for top level single context value
'count', 'date',
# query for row values
{
('city', 'names', 'en'): 'city_name',
('continent', 'code'): 'continent_code',
('continent', 'names', 'en'): 'continent_name',
('country', 'iso_code'): 'country_code',
('country', 'names', 'en'): 'country_name',
('location', 'time_zone'): 'timezone',
('subdivisions', 'names', 'en'): ('subdivision_name', ','),
}
]
for r in diselect(sample_from_json, greedy_query):
print(r)
# results
# {'count': 1, 'date': '2022-5-31', 'city_name': ['Songpa-gu', 'Songpa-gu2'], 'continent_code': ['AS', 'AS2'], 'continent_name': ['Asia', 'Asia2'], 'country_code': ['KR', 'KR2'], 'country_name': ['South Korea', 'South Korea2'], 'timezone': ['Asia/Seoul', 'Asia/Seoul2'], 'subdivision_name': 'Seoul,Hangang,Seoul2,Hangang2'}
# The data is organized vertically with the top keys count and date. Maybe this is what you want.
# This can be used as a trick to get the column dataset
## Tip. separate query by structure for get two of them both
query_context = ['count', 'date']
query_list = {
('city', 'names', 'en'): 'city_name',
('continent', 'code'): 'continent_code',
('continent', 'names', 'en'): 'continent_name',
('country', 'iso_code'): 'country_code',
('country', 'names', 'en'): 'country_name',
('location', 'time_zone'): 'timezone',
('subdivisions', 'names', 'en'): ('subdivision_name', ','),
}
[context_data] = list(diselect(sample_from_json, query_context)) # may one
count = context_data['count']
date = context_data['date']
# or may be simple and better just direct indexing when values are easy to access
count = sample_from_json['count']
date = sample_from_json['date']
data_list = list(diselect(sample_from_json, query_list)) # many
- value typing via apply function
import dateutil
data = [
{
'place_id': 142213,
'visit_count': '5',
'visit_date': '2022/2/21',
'rating': '2.5',
},
{
'place_id': 154321,
'visit_count': '12',
'visit_date': '2022.3.7.',
'rating': '4.5',
},
]
parsed = diselect(data,
{
'place_id': ('place_id', str),
'visit_count': ('visit_count', int),
'rating': ('point', float),
'visit_date': ('visit_count', dateutil.parser.parse),
})
for row in parsed:
print(row)
# results
# {'place_id': '142213', 'visit_count': datetime.datetime(2022, 2, 21, 0, 0), 'point': 2.5}
# {'place_id': '154321', 'visit_count': datetime.datetime(2022, 3, 7, 0, 0), 'point': 4.5}
FAQs
smart and convenient dict flatten library for complex container nested with dict and list
We found that diselect demonstrated a healthy version release cadence and project activity because the last version was released less than a year ago. It has 1 open source maintainer collaborating on the project.
Did you know?
Socket for GitHub automatically highlights issues in each pull request and monitors the health of all your open source dependencies. Discover the contents of your packages and block harmful activity before you install or update your dependencies.
Security News
npm now supports Trusted Publishing with OIDC, enabling secure package publishing directly from CI/CD workflows without relying on long-lived tokens.
Research
/Security News
A RubyGems malware campaign used 60 malicious packages posing as automation tools to steal credentials from social media and marketing tool users.
Security News
The CNA Scorecard ranks CVE issuers by data completeness, revealing major gaps in patch info and software identifiers across thousands of vulnerabilities.