csvapi
Advanced tools
| from pathlib import Path | ||
| import pandas as pd | ||
| import sqlite3 | ||
| from pandas_profiling import ProfileReport | ||
| from csvapi.errors import APIError | ||
| from csvapi.utils import get_db_info | ||
| import json | ||
| class CSVAPIProfileReport: | ||
| def get_dataframe(self, db_info): | ||
| dsn = 'file:{}?immutable=1'.format(db_info['db_path']) | ||
| conn = sqlite3.connect(dsn, uri=True) | ||
| sql = 'SELECT * FROM [{}]'.format(db_info['table_name']) | ||
| df = pd.read_sql_query(sql, con=conn) | ||
| return df | ||
| async def get_minimal_profile(self, urlhash: str) -> dict: | ||
| db_info = get_db_info(urlhash) | ||
| p = Path(db_info['db_path']) | ||
| if not p.exists(): | ||
| raise APIError('Database has probably been removed or does not exist yet.', status=404) | ||
| try: | ||
| df = self.get_dataframe(db_info) | ||
| profile = ProfileReport( | ||
| df, minimal=True, | ||
| vars=dict(num={"low_categorical_threshold": 0}), | ||
| plot=dict(histogram={"bins": 10}), | ||
| # this disables the ThreadPoolExecutor in pandas-profiling | ||
| # remove it or set it to 0 to use the number of CPUs a pool size | ||
| pool_size=1, | ||
| progress_bar=False, | ||
| ) | ||
| profile_report = json.loads(profile.to_json()) | ||
| return profile_report | ||
| except (sqlite3.OperationalError, sqlite3.IntegrityError) as e: | ||
| raise APIError('Error selecting data', status=400, payload=dict(details=str(e))) |
+1
-5
@@ -31,4 +31,2 @@ import click | ||
| help='Do not parse CSV again if DB already exists') | ||
| @click.option('-w', '--max-workers', default=3, | ||
| help='Max number of ThreadPoolExecutor workers') | ||
| @click.option('--ssl-cert', default=None, | ||
@@ -39,3 +37,3 @@ help='Path to SSL certificate') | ||
| @cli.command() | ||
| def serve(dbs, host, port, debug, reload, cache, max_workers, ssl_cert, ssl_key): | ||
| def serve(dbs, host, port, debug, reload, cache, ssl_cert, ssl_key): | ||
| ssl_context = None | ||
@@ -48,7 +46,5 @@ if ssl_cert and ssl_key: | ||
| 'CSV_CACHE_ENABLED': cache, | ||
| 'MAX_WORKERS': max_workers, | ||
| 'DEBUG': debug, | ||
| # TODO this probably does not exist in Quart | ||
| 'RESPONSE_TIMEOUT': RESPONSE_TIMEOUT, | ||
| }) | ||
| app.run(host=host, port=port, debug=debug, use_reloader=reload, ssl=ssl_context) |
+3
-10
@@ -11,6 +11,5 @@ import os | ||
| from csvapi.profileview import ProfileView | ||
| from csvapi.errors import APIError | ||
| from csvapi.parser import parse | ||
| from csvapi.profiling import CSVAPIProfileReport | ||
| from csvapi.utils import ( | ||
@@ -78,8 +77,3 @@ already_exists, | ||
| profileViewInstance = ProfileView() | ||
| profile_report = await profileViewInstance.get_minimal_profile( | ||
| profileViewInstance, | ||
| urlhash=urlhash, | ||
| csv_detective_report=csv_detective_report | ||
| ) | ||
| profile_report = await CSVAPIProfileReport().get_minimal_profile(urlhash) | ||
@@ -101,3 +95,3 @@ if not check_profile_report_structure(profile_report): | ||
| if not is_csv and analysis and analysis == 'yes': | ||
| conn = create_connection(app.config['DB_ROOT_DIR'] + '/' + urlhash + '.db') | ||
| conn = create_connection(f"{app.config['DB_ROOT_DIR']}/{urlhash}.db") | ||
| general_infos = [ | ||
@@ -145,3 +139,2 @@ { | ||
| 'endpoint': f"{scheme}://{request.host}/api/{urlhash}", | ||
| 'profile_endpoint': f"{scheme}://{request.host}/profile/{urlhash}", | ||
| }) |
+0
-3
@@ -18,6 +18,4 @@ import hashlib | ||
| db_storage = storage or app.config['DB_ROOT_DIR'] | ||
| profile_storage = app.config['PROFILES_ROOT_DIR'] | ||
| db_path = f"{db_storage}/{urlhash}.db" | ||
| profile_path = f"{profile_storage}/{urlhash}.html" | ||
| return { | ||
@@ -28,3 +26,2 @@ 'dsn': f"sqlite:///{db_path}", | ||
| 'db_path': db_path, | ||
| 'profile_path': profile_path, | ||
| } | ||
@@ -31,0 +28,0 @@ |
@@ -13,3 +13,2 @@ import os | ||
| from csvapi.parseview import ParseView | ||
| from csvapi.profileview import ProfileView | ||
| from csvapi.security import filter_referrers | ||
@@ -21,3 +20,2 @@ | ||
| app.add_url_rule('/api/<urlhash>', view_func=TableView.as_view('table')) | ||
| app.add_url_rule('/profile/<urlhash>', view_func=ProfileView.as_view('profile')) | ||
| app.add_url_rule('/api/<urlhash>/export', view_func=ExportView.as_view('export')) | ||
@@ -24,0 +22,0 @@ app.add_url_rule('/apify', view_func=ParseView.as_view('parse')) |
+2
-1
| Metadata-Version: 2.1 | ||
| Name: csvapi | ||
| Version: 2.1.1 | ||
| Version: 2.1.1857 | ||
| Summary: An instant JSON API for your CSV | ||
@@ -13,2 +13,3 @@ License: MIT | ||
| Classifier: Programming Language :: Python :: 3.10 | ||
| Classifier: Programming Language :: Python :: 3.11 | ||
| Requires-Dist: Quart (>=0.18.0,<0.19.0) | ||
@@ -15,0 +16,0 @@ Requires-Dist: agate (>=1.6.3,<1.7.0) |
+1
-1
| [tool.poetry] | ||
| name = "csvapi" | ||
| version = "2.1.1" | ||
| version = "2.1.1857" | ||
| description = "An instant JSON API for your CSV" | ||
@@ -5,0 +5,0 @@ authors = ["Opendatateam <opendatateam@data.gouv.fr>"] |
+1
-1
@@ -35,3 +35,3 @@ # -*- coding: utf-8 -*- | ||
| 'name': 'csvapi', | ||
| 'version': '2.1.1', | ||
| 'version': '2.1.1857', | ||
| 'description': 'An instant JSON API for your CSV', | ||
@@ -38,0 +38,0 @@ 'long_description': 'None', |
| from pathlib import Path | ||
| import pandas as pd | ||
| import sqlite3 | ||
| from quart import send_from_directory | ||
| from quart.views import MethodView | ||
| from pandas_profiling import ProfileReport | ||
| from csvapi.errors import APIError | ||
| from csvapi.utils import get_db_info | ||
| from quart import current_app as app | ||
| import json | ||
| class ProfileView(MethodView): | ||
| def get_dataframe(self, db_info): | ||
| dsn = 'file:{}?immutable=1'.format(db_info['db_path']) | ||
| conn = sqlite3.connect(dsn, uri=True) | ||
| sql = 'SELECT * FROM [{}]'.format(db_info['table_name']) | ||
| df = pd.read_sql_query(sql, con=conn) | ||
| return df | ||
| def make_profile(self, db_info): | ||
| df = self.get_dataframe(db_info) | ||
| if app.config['PANDAS_PROFILING_CONFIG_MIN']: | ||
| profile = ProfileReport(df, config_file="profiling-minimal.yml") | ||
| else: | ||
| profile = ProfileReport(df) | ||
| profile.to_file(db_info['profile_path']) | ||
| return Path(db_info['profile_path']) | ||
| async def get(self, urlhash): | ||
| db_info = get_db_info(urlhash) | ||
| p = Path(db_info['db_path']) | ||
| if not p.exists(): | ||
| raise APIError('Database has probably been removed or does not exist yet.', status=404) | ||
| path = Path(db_info['profile_path']) | ||
| if not path.exists(): | ||
| try: | ||
| path = self.make_profile(db_info) | ||
| except (sqlite3.OperationalError, sqlite3.IntegrityError) as e: | ||
| raise APIError('Error selecting data', status=400, payload=dict(details=str(e))) | ||
| return await send_from_directory(path.parent, path.name) | ||
| async def get_minimal_profile(self, url: str, urlhash: str, csv_detective_report: dict) -> None: | ||
| db_info = get_db_info(urlhash) | ||
| p = Path(db_info['db_path']) | ||
| if not p.exists(): | ||
| raise APIError('Database has probably been removed or does not exist yet.', status=404) | ||
| path = Path(db_info['profile_path']) | ||
| if not path.exists(): | ||
| try: | ||
| df = self.get_dataframe(db_info) | ||
| profile = ProfileReport( | ||
| df, minimal=True, | ||
| vars=dict(num={"low_categorical_threshold": 0}), | ||
| plot=dict(histogram={"bins": 10}) | ||
| ) | ||
| profile_report = json.loads(profile.to_json()) | ||
| return profile_report | ||
| except (sqlite3.OperationalError, sqlite3.IntegrityError) as e: | ||
| raise APIError('Error selecting data', status=400, payload=dict(details=str(e))) | ||
| return await send_from_directory(path.parent, path.name) |
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
43296
-4%979
-3.64%