csvapi - PyPI Package Compare versions

+43

csvapi/profiling.py

		from pathlib import Path

		import pandas as pd
		import sqlite3

		from pandas_profiling import ProfileReport

		from csvapi.errors import APIError
		from csvapi.utils import get_db_info

		import json


		class CSVAPIProfileReport:

		def get_dataframe(self, db_info):
		dsn = 'file:{}?immutable=1'.format(db_info['db_path'])
		conn = sqlite3.connect(dsn, uri=True)
		sql = 'SELECT * FROM [{}]'.format(db_info['table_name'])
		df = pd.read_sql_query(sql, con=conn)
		return df

		async def get_minimal_profile(self, urlhash: str) -> dict:
		db_info = get_db_info(urlhash)
		p = Path(db_info['db_path'])
		if not p.exists():
		raise APIError('Database has probably been removed or does not exist yet.', status=404)

		try:
		df = self.get_dataframe(db_info)
		profile = ProfileReport(
		df, minimal=True,
		vars=dict(num={"low_categorical_threshold": 0}),
		plot=dict(histogram={"bins": 10}),
		# this disables the ThreadPoolExecutor in pandas-profiling
		# remove it or set it to 0 to use the number of CPUs a pool size
		pool_size=1,
		progress_bar=False,
		)
		profile_report = json.loads(profile.to_json())
		return profile_report
		except (sqlite3.OperationalError, sqlite3.IntegrityError) as e:
		raise APIError('Error selecting data', status=400, payload=dict(details=str(e)))

+1

-5

csvapi/cli.py

		@@ -31,4 +31,2 @@ import click
		help='Do not parse CSV again if DB already exists')
		@click.option('-w', '--max-workers', default=3,
		help='Max number of ThreadPoolExecutor workers')
		@click.option('--ssl-cert', default=None,
		@@ -39,3 +37,3 @@ help='Path to SSL certificate')
		@cli.command()
		def serve(dbs, host, port, debug, reload, cache, max_workers, ssl_cert, ssl_key):
		def serve(dbs, host, port, debug, reload, cache, ssl_cert, ssl_key):
		ssl_context = None
		@@ -48,7 +46,5 @@ if ssl_cert and ssl_key:
		'CSV_CACHE_ENABLED': cache,
		'MAX_WORKERS': max_workers,
		'DEBUG': debug,
		# TODO this probably does not exist in Quart
		'RESPONSE_TIMEOUT': RESPONSE_TIMEOUT,
		})
		app.run(host=host, port=port, debug=debug, use_reloader=reload, ssl=ssl_context)

+3

-10

csvapi/parseview.py

		@@ -11,6 +11,5 @@ import os

		from csvapi.profileview import ProfileView

		from csvapi.errors import APIError
		from csvapi.parser import parse
		from csvapi.profiling import CSVAPIProfileReport
		from csvapi.utils import (
		@@ -78,8 +77,3 @@ already_exists,

		profileViewInstance = ProfileView()
		profile_report = await profileViewInstance.get_minimal_profile(
		profileViewInstance,
		urlhash=urlhash,
		csv_detective_report=csv_detective_report
		)
		profile_report = await CSVAPIProfileReport().get_minimal_profile(urlhash)

		@@ -101,3 +95,3 @@ if not check_profile_report_structure(profile_report):
		if not is_csv and analysis and analysis == 'yes':
		conn = create_connection(app.config['DB_ROOT_DIR'] + '/' + urlhash + '.db')
		conn = create_connection(f"{app.config['DB_ROOT_DIR']}/{urlhash}.db")
		general_infos = [
		@@ -145,3 +139,2 @@ {
		'endpoint': f"{scheme}://{request.host}/api/{urlhash}",
		'profile_endpoint': f"{scheme}://{request.host}/profile/{urlhash}",
		})

+0

-3

csvapi/utils.py

		@@ -18,6 +18,4 @@ import hashlib
		db_storage = storage or app.config['DB_ROOT_DIR']
		profile_storage = app.config['PROFILES_ROOT_DIR']

		db_path = f"{db_storage}/{urlhash}.db"
		profile_path = f"{profile_storage}/{urlhash}.html"
		return {
		@@ -28,3 +26,2 @@ 'dsn': f"sqlite:///{db_path}",
		'db_path': db_path,
		'profile_path': profile_path,
		}
		@@ -31,0 +28,0 @@

+0

-2

csvapi/webservice.py

		@@ -13,3 +13,2 @@ import os
		from csvapi.parseview import ParseView
		from csvapi.profileview import ProfileView
		from csvapi.security import filter_referrers
		@@ -21,3 +20,2 @@
		app.add_url_rule('/api/<urlhash>', view_func=TableView.as_view('table'))
		app.add_url_rule('/profile/<urlhash>', view_func=ProfileView.as_view('profile'))
		app.add_url_rule('/api/<urlhash>/export', view_func=ExportView.as_view('export'))
		@@ -24,0 +22,0 @@ app.add_url_rule('/apify', view_func=ParseView.as_view('parse'))

+2

-1

PKG-INFO

		Metadata-Version: 2.1
		Name: csvapi
		Version: 2.1.1
		Version: 2.1.1857
		Summary: An instant JSON API for your CSV
		@@ -13,2 +13,3 @@ License: MIT
		Classifier: Programming Language :: Python :: 3.10
		Classifier: Programming Language :: Python :: 3.11
		Requires-Dist: Quart (>=0.18.0,<0.19.0)
		@@ -15,0 +16,0 @@ Requires-Dist: agate (>=1.6.3,<1.7.0)

+1

-1

pyproject.toml

		[tool.poetry]
		name = "csvapi"
		version = "2.1.1"
		version = "2.1.1857"
		description = "An instant JSON API for your CSV"
		@@ -5,0 +5,0 @@ authors = ["Opendatateam <opendatateam@data.gouv.fr>"]

+1

-1

setup.py

		@@ -35,3 +35,3 @@ # -- coding: utf-8 --
		'name': 'csvapi',
		'version': '2.1.1',
		'version': '2.1.1857',
		'description': 'An instant JSON API for your CSV',
		@@ -38,0 +38,0 @@ 'long_description': 'None',

-74

csvapi/profileview.py

		from pathlib import Path

		import pandas as pd
		import sqlite3

		from quart import send_from_directory
		from quart.views import MethodView
		from pandas_profiling import ProfileReport

		from csvapi.errors import APIError
		from csvapi.utils import get_db_info

		from quart import current_app as app

		import json


		class ProfileView(MethodView):

		def get_dataframe(self, db_info):
		dsn = 'file:{}?immutable=1'.format(db_info['db_path'])
		conn = sqlite3.connect(dsn, uri=True)
		sql = 'SELECT * FROM [{}]'.format(db_info['table_name'])
		df = pd.read_sql_query(sql, con=conn)
		return df

		def make_profile(self, db_info):
		df = self.get_dataframe(db_info)

		if app.config['PANDAS_PROFILING_CONFIG_MIN']:
		profile = ProfileReport(df, config_file="profiling-minimal.yml")
		else:
		profile = ProfileReport(df)
		profile.to_file(db_info['profile_path'])
		return Path(db_info['profile_path'])

		async def get(self, urlhash):
		db_info = get_db_info(urlhash)
		p = Path(db_info['db_path'])
		if not p.exists():
		raise APIError('Database has probably been removed or does not exist yet.', status=404)

		path = Path(db_info['profile_path'])

		if not path.exists():
		try:
		path = self.make_profile(db_info)
		except (sqlite3.OperationalError, sqlite3.IntegrityError) as e:
		raise APIError('Error selecting data', status=400, payload=dict(details=str(e)))

		return await send_from_directory(path.parent, path.name)

		async def get_minimal_profile(self, url: str, urlhash: str, csv_detective_report: dict) -> None:
		db_info = get_db_info(urlhash)
		p = Path(db_info['db_path'])
		if not p.exists():
		raise APIError('Database has probably been removed or does not exist yet.', status=404)

		path = Path(db_info['profile_path'])

		if not path.exists():
		try:
		df = self.get_dataframe(db_info)
		profile = ProfileReport(
		df, minimal=True,
		vars=dict(num={"low_categorical_threshold": 0}),
		plot=dict(histogram={"bins": 10})
		)
		profile_report = json.loads(profile.to_json())
		return profile_report
		except (sqlite3.OperationalError, sqlite3.IntegrityError) as e:
		raise APIError('Error selecting data', status=400, payload=dict(details=str(e)))

		return await send_from_directory(path.parent, path.name)

csvapi - pypi Package Compare versions

Worsened metrics