githubdata
Advanced tools
| """ | ||
| """ | ||
| from pathlib import Path | ||
| from giteasy import GitHubRepo | ||
| from mirutil.df import read_data_according_to_type as rdatt | ||
| from mirutil.files import read_json_file as rjf | ||
| data_file_suffixes = { | ||
| '.xlsx' : None , | ||
| '.prq' : None , | ||
| '.csv' : None , | ||
| } | ||
| class GithubDataRepo(GitHubRepo) : | ||
| def __init__(self , repo_url , committing_usr = None , token = None) : | ||
| super().__init__(repo_url = repo_url , | ||
| committing_usr = committing_usr , | ||
| token = token) | ||
| self.set_data_fps() | ||
| self.data_suf = None | ||
| self.data_fp: (Path , list) | ||
| self.meta_fp: Path | ||
| self.meta: dict | ||
| def clone_overwrite(self , depth = 1) : | ||
| super().clone_overwrite(depth = depth) | ||
| self.set_data_fps() | ||
| def _set_defualt_data_suffix(self) : | ||
| for ky in data_file_suffixes.keys() : | ||
| fps = self.ret_sorted_fpns_by_suf(ky) | ||
| if len(fps) >= 1 : | ||
| self.data_suf = ky | ||
| return | ||
| def set_data_fps(self) : | ||
| self._set_defualt_data_suffix() | ||
| if self.data_suf is None : | ||
| return | ||
| fps = self.ret_sorted_fpns_by_suf(self.data_suf) | ||
| if len(fps) == 1 : | ||
| self.data_fp = fps[0] | ||
| else : | ||
| self.data_fp = fps | ||
| def ret_sorted_fpns_by_suf(self , suffix) : | ||
| ls = list(self.local_path.glob(f'*{suffix}')) | ||
| return sorted(ls) | ||
| def read_metadata(self) : | ||
| fps = self.ret_sorted_fpns_by_suf('.json') | ||
| if len(fps) == 0 : | ||
| return | ||
| fp = fps[0] | ||
| self.meta_fp = fp | ||
| self.meta = rjf(fp) | ||
| return self.meta | ||
| def read_data(self) : | ||
| if not self.local_path.exists() : | ||
| self.clone_overwrite() | ||
| if isinstance(self.data_fp , Path) : | ||
| return rdatt(self.data_fp) | ||
| def get_data_from_github(github_url) : | ||
| """ | ||
| :param: github_url | ||
| :return: pandas.DataFrame | ||
| """ | ||
| gd = GithubDataRepo(github_url) | ||
| df = gd.read_data() | ||
| gd.rmdir() | ||
| return df |
+2
-1
| Metadata-Version: 2.1 | ||
| Name: githubdata | ||
| Version: 11.1.0 | ||
| Version: 12.0.0 | ||
| Summary: A simple tool to get the lastest version of a dataset in a Github repository | ||
@@ -50,2 +50,3 @@ Project-URL: Homepage, https://github.com/imahdimir/githubdata | ||
| Requires-Dist: giteasy | ||
| Requires-Dist: mirutil | ||
| Requires-Dist: openpyxl | ||
@@ -52,0 +53,0 @@ Requires-Dist: pandas |
+2
-1
@@ -7,3 +7,3 @@ [build-system] | ||
| name = "githubdata" | ||
| version = "11.1.0" | ||
| version = "12.0.0" | ||
| authors = [{ name = "Mahdi Mir", email = "imahdimir@gmail.com" }] | ||
@@ -19,2 +19,3 @@ description = "A simple tool to get the lastest version of a dataset in a Github repository" | ||
| "openpyxl", | ||
| "mirutil" | ||
| ] | ||
@@ -21,0 +22,0 @@ classifiers = [ |
@@ -1,2 +0,2 @@ | ||
| from .githubdata import get_data_from_github | ||
| from .githubdata import GithubData | ||
| from .github_data_repo import get_data_from_github | ||
| from .github_data_repo import GithubDataRepo |
+7
-9
@@ -7,8 +7,6 @@ """ | ||
| from src.githubdata import githubdata | ||
| importlib.reload(githubdata) | ||
| from src.githubdata.githubdata import * | ||
| from src.githubdata.github_data_repo import * | ||
@@ -22,4 +20,4 @@ | ||
| u = 'https://github.com/imahdimir/d-TSETMC_ID-2-FirmTicker' | ||
| repo = GithubData(u) | ||
| repo.overwriting_clone() | ||
| repo = GithubDataRepo(u) | ||
| repo.clone_overwrite() | ||
@@ -31,4 +29,4 @@ ## | ||
| u = 'https://github.com/imahdimir/test-public' | ||
| repo = GithubData(u) | ||
| repo.overwriting_clone() | ||
| repo = GithubDataRepo(u) | ||
| repo.clone_overwrite() | ||
@@ -44,4 +42,4 @@ ## | ||
| ur = 'https://github.com/imahdimir/test-private' | ||
| rp = GithubData(ur) | ||
| rp.overwriting_clone() | ||
| rp = GithubDataRepo(ur) | ||
| rp.clone_overwrite() | ||
@@ -48,0 +46,0 @@ ## |
| pandas | ||
| giteasy | ||
| IPython | ||
| pyarrow | ||
| fastparquet | ||
| openpyxl | ||
| build | ||
| twine |
| """ | ||
| """ | ||
| import json | ||
| from pathlib import Path | ||
| import pandas as pd | ||
| from giteasy.repo import Repo | ||
| data_file_suffixes = { | ||
| '.xlsx' : None , | ||
| '.prq' : None , | ||
| '.csv' : None , | ||
| } | ||
| class GithubData(Repo) : | ||
| def __init__(self , src_url , github_usr = None , usr_tok_json_fp = None) : | ||
| super().__init__(src_url = src_url , | ||
| github_usr = github_usr , | ||
| usr_tok_json_fp = usr_tok_json_fp) | ||
| self.set_data_fps() | ||
| self.read_metadata() | ||
| def overwriting_clone(self , overwrite = True , depth = 1) : | ||
| super().overwriting_clone(overwrite = overwrite , depth = depth) | ||
| self.set_data_fps() | ||
| def _set_defualt_data_suffix(self) : | ||
| for ky in data_file_suffixes.keys() : | ||
| fps = self.ret_sorted_fpns_by_suf(ky) | ||
| if len(fps) >= 1 : | ||
| self.data_suf = ky | ||
| return | ||
| self.data_suf = None | ||
| def set_data_fps(self) : | ||
| self._set_defualt_data_suffix() | ||
| if not self.data_suf : | ||
| return | ||
| fps = self.ret_sorted_fpns_by_suf(self.data_suf) | ||
| if len(fps) == 1 : | ||
| self.data_fp = fps[0] | ||
| else : | ||
| self.data_fp = fps | ||
| def ret_sorted_fpns_by_suf(self , suffix) : | ||
| ls = list(self.local_path.glob(f'*{suffix}')) | ||
| return sorted(ls) | ||
| def read_metadata(self) : | ||
| fps = self.ret_sorted_fpns_by_suf('.json') | ||
| if len(fps) == 0 : | ||
| return | ||
| fp = fps[0] | ||
| self.meta_fp = fp | ||
| with open(fp , 'r') as fi : | ||
| js = json.load(fi) | ||
| self.meta = js | ||
| return js | ||
| def read_data(self) : | ||
| if not self.local_path.exists() : | ||
| self.overwriting_clone() | ||
| if isinstance(self.data_fp , Path) : | ||
| if self.data_suf == '.xlsx' : | ||
| return pd.read_excel(self.data_fp , engine = 'openpyxl') | ||
| elif self.data_suf == '.prq' : | ||
| return pd.read_parquet(self.data_fp) | ||
| elif self.data_suf == '.csv' : | ||
| return pd.read_csv(self.data_fp) | ||
| def get_data_from_github(github_url) : | ||
| """ | ||
| :param: github_url | ||
| :return: pandas.DataFrame | ||
| """ | ||
| gd = GithubData(github_url) | ||
| df = gd.read_data() | ||
| gd.rmdir() | ||
| return df |
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
12357
-2.15%9
-10%96
-5.88%