You're Invited:Meet the Socket Team at RSAC and BSidesSF 2026, March 23–26.RSVP
Socket
Book a DemoSign in
Socket

githubdata

Package Overview
Dependencies
Maintainers
1
Versions
47
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

githubdata - pypi Package Compare versions

Comparing version
11.1.0
to
12.0.0
+83
src/githubdata/github_data_repo.py
"""
"""
from pathlib import Path
from giteasy import GitHubRepo
from mirutil.df import read_data_according_to_type as rdatt
from mirutil.files import read_json_file as rjf
data_file_suffixes = {
'.xlsx' : None ,
'.prq' : None ,
'.csv' : None ,
}
class GithubDataRepo(GitHubRepo) :
def __init__(self , repo_url , committing_usr = None , token = None) :
super().__init__(repo_url = repo_url ,
committing_usr = committing_usr ,
token = token)
self.set_data_fps()
self.data_suf = None
self.data_fp: (Path , list)
self.meta_fp: Path
self.meta: dict
def clone_overwrite(self , depth = 1) :
super().clone_overwrite(depth = depth)
self.set_data_fps()
def _set_defualt_data_suffix(self) :
for ky in data_file_suffixes.keys() :
fps = self.ret_sorted_fpns_by_suf(ky)
if len(fps) >= 1 :
self.data_suf = ky
return
def set_data_fps(self) :
self._set_defualt_data_suffix()
if self.data_suf is None :
return
fps = self.ret_sorted_fpns_by_suf(self.data_suf)
if len(fps) == 1 :
self.data_fp = fps[0]
else :
self.data_fp = fps
def ret_sorted_fpns_by_suf(self , suffix) :
ls = list(self.local_path.glob(f'*{suffix}'))
return sorted(ls)
def read_metadata(self) :
fps = self.ret_sorted_fpns_by_suf('.json')
if len(fps) == 0 :
return
fp = fps[0]
self.meta_fp = fp
self.meta = rjf(fp)
return self.meta
def read_data(self) :
if not self.local_path.exists() :
self.clone_overwrite()
if isinstance(self.data_fp , Path) :
return rdatt(self.data_fp)
def get_data_from_github(github_url) :
"""
:param: github_url
:return: pandas.DataFrame
"""
gd = GithubDataRepo(github_url)
df = gd.read_data()
gd.rmdir()
return df
+2
-1
Metadata-Version: 2.1
Name: githubdata
Version: 11.1.0
Version: 12.0.0
Summary: A simple tool to get the lastest version of a dataset in a Github repository

@@ -50,2 +50,3 @@ Project-URL: Homepage, https://github.com/imahdimir/githubdata

Requires-Dist: giteasy
Requires-Dist: mirutil
Requires-Dist: openpyxl

@@ -52,0 +53,0 @@ Requires-Dist: pandas

@@ -7,3 +7,3 @@ [build-system]

name = "githubdata"
version = "11.1.0"
version = "12.0.0"
authors = [{ name = "Mahdi Mir", email = "imahdimir@gmail.com" }]

@@ -19,2 +19,3 @@ description = "A simple tool to get the lastest version of a dataset in a Github repository"

"openpyxl",
"mirutil"
]

@@ -21,0 +22,0 @@ classifiers = [

@@ -1,2 +0,2 @@

from .githubdata import get_data_from_github
from .githubdata import GithubData
from .github_data_repo import get_data_from_github
from .github_data_repo import GithubDataRepo

@@ -7,8 +7,6 @@ """

from src.githubdata import githubdata
importlib.reload(githubdata)
from src.githubdata.githubdata import *
from src.githubdata.github_data_repo import *

@@ -22,4 +20,4 @@

u = 'https://github.com/imahdimir/d-TSETMC_ID-2-FirmTicker'
repo = GithubData(u)
repo.overwriting_clone()
repo = GithubDataRepo(u)
repo.clone_overwrite()

@@ -31,4 +29,4 @@ ##

u = 'https://github.com/imahdimir/test-public'
repo = GithubData(u)
repo.overwriting_clone()
repo = GithubDataRepo(u)
repo.clone_overwrite()

@@ -44,4 +42,4 @@ ##

ur = 'https://github.com/imahdimir/test-private'
rp = GithubData(ur)
rp.overwriting_clone()
rp = GithubDataRepo(ur)
rp.clone_overwrite()

@@ -48,0 +46,0 @@ ##

pandas
giteasy
IPython
pyarrow
fastparquet
openpyxl
build
twine
"""
"""
import json
from pathlib import Path
import pandas as pd
from giteasy.repo import Repo
data_file_suffixes = {
'.xlsx' : None ,
'.prq' : None ,
'.csv' : None ,
}
class GithubData(Repo) :
def __init__(self , src_url , github_usr = None , usr_tok_json_fp = None) :
super().__init__(src_url = src_url ,
github_usr = github_usr ,
usr_tok_json_fp = usr_tok_json_fp)
self.set_data_fps()
self.read_metadata()
def overwriting_clone(self , overwrite = True , depth = 1) :
super().overwriting_clone(overwrite = overwrite , depth = depth)
self.set_data_fps()
def _set_defualt_data_suffix(self) :
for ky in data_file_suffixes.keys() :
fps = self.ret_sorted_fpns_by_suf(ky)
if len(fps) >= 1 :
self.data_suf = ky
return
self.data_suf = None
def set_data_fps(self) :
self._set_defualt_data_suffix()
if not self.data_suf :
return
fps = self.ret_sorted_fpns_by_suf(self.data_suf)
if len(fps) == 1 :
self.data_fp = fps[0]
else :
self.data_fp = fps
def ret_sorted_fpns_by_suf(self , suffix) :
ls = list(self.local_path.glob(f'*{suffix}'))
return sorted(ls)
def read_metadata(self) :
fps = self.ret_sorted_fpns_by_suf('.json')
if len(fps) == 0 :
return
fp = fps[0]
self.meta_fp = fp
with open(fp , 'r') as fi :
js = json.load(fi)
self.meta = js
return js
def read_data(self) :
if not self.local_path.exists() :
self.overwriting_clone()
if isinstance(self.data_fp , Path) :
if self.data_suf == '.xlsx' :
return pd.read_excel(self.data_fp , engine = 'openpyxl')
elif self.data_suf == '.prq' :
return pd.read_parquet(self.data_fp)
elif self.data_suf == '.csv' :
return pd.read_csv(self.data_fp)
def get_data_from_github(github_url) :
"""
:param: github_url
:return: pandas.DataFrame
"""
gd = GithubData(github_url)
df = gd.read_data()
gd.rmdir()
return df