New Research: Supply Chain Attack on Axios Pulls Malicious Dependency from npm.Details →
Socket
Book a DemoSign in
Socket

csvapi

Package Overview
Dependencies
Maintainers
2
Versions
38
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

csvapi - pypi Package Compare versions

Comparing version
2.1.1
to
2.1.1857
+43
csvapi/profiling.py
from pathlib import Path
import pandas as pd
import sqlite3
from pandas_profiling import ProfileReport
from csvapi.errors import APIError
from csvapi.utils import get_db_info
import json
class CSVAPIProfileReport:
def get_dataframe(self, db_info):
dsn = 'file:{}?immutable=1'.format(db_info['db_path'])
conn = sqlite3.connect(dsn, uri=True)
sql = 'SELECT * FROM [{}]'.format(db_info['table_name'])
df = pd.read_sql_query(sql, con=conn)
return df
async def get_minimal_profile(self, urlhash: str) -> dict:
db_info = get_db_info(urlhash)
p = Path(db_info['db_path'])
if not p.exists():
raise APIError('Database has probably been removed or does not exist yet.', status=404)
try:
df = self.get_dataframe(db_info)
profile = ProfileReport(
df, minimal=True,
vars=dict(num={"low_categorical_threshold": 0}),
plot=dict(histogram={"bins": 10}),
# this disables the ThreadPoolExecutor in pandas-profiling
# remove it or set it to 0 to use the number of CPUs a pool size
pool_size=1,
progress_bar=False,
)
profile_report = json.loads(profile.to_json())
return profile_report
except (sqlite3.OperationalError, sqlite3.IntegrityError) as e:
raise APIError('Error selecting data', status=400, payload=dict(details=str(e)))
+1
-5

@@ -31,4 +31,2 @@ import click

help='Do not parse CSV again if DB already exists')
@click.option('-w', '--max-workers', default=3,
help='Max number of ThreadPoolExecutor workers')
@click.option('--ssl-cert', default=None,

@@ -39,3 +37,3 @@ help='Path to SSL certificate')

@cli.command()
def serve(dbs, host, port, debug, reload, cache, max_workers, ssl_cert, ssl_key):
def serve(dbs, host, port, debug, reload, cache, ssl_cert, ssl_key):
ssl_context = None

@@ -48,7 +46,5 @@ if ssl_cert and ssl_key:

'CSV_CACHE_ENABLED': cache,
'MAX_WORKERS': max_workers,
'DEBUG': debug,
# TODO this probably does not exist in Quart
'RESPONSE_TIMEOUT': RESPONSE_TIMEOUT,
})
app.run(host=host, port=port, debug=debug, use_reloader=reload, ssl=ssl_context)

@@ -11,6 +11,5 @@ import os

from csvapi.profileview import ProfileView
from csvapi.errors import APIError
from csvapi.parser import parse
from csvapi.profiling import CSVAPIProfileReport
from csvapi.utils import (

@@ -78,8 +77,3 @@ already_exists,

profileViewInstance = ProfileView()
profile_report = await profileViewInstance.get_minimal_profile(
profileViewInstance,
urlhash=urlhash,
csv_detective_report=csv_detective_report
)
profile_report = await CSVAPIProfileReport().get_minimal_profile(urlhash)

@@ -101,3 +95,3 @@ if not check_profile_report_structure(profile_report):

if not is_csv and analysis and analysis == 'yes':
conn = create_connection(app.config['DB_ROOT_DIR'] + '/' + urlhash + '.db')
conn = create_connection(f"{app.config['DB_ROOT_DIR']}/{urlhash}.db")
general_infos = [

@@ -145,3 +139,2 @@ {

'endpoint': f"{scheme}://{request.host}/api/{urlhash}",
'profile_endpoint': f"{scheme}://{request.host}/profile/{urlhash}",
})

@@ -18,6 +18,4 @@ import hashlib

db_storage = storage or app.config['DB_ROOT_DIR']
profile_storage = app.config['PROFILES_ROOT_DIR']
db_path = f"{db_storage}/{urlhash}.db"
profile_path = f"{profile_storage}/{urlhash}.html"
return {

@@ -28,3 +26,2 @@ 'dsn': f"sqlite:///{db_path}",

'db_path': db_path,
'profile_path': profile_path,
}

@@ -31,0 +28,0 @@

@@ -13,3 +13,2 @@ import os

from csvapi.parseview import ParseView
from csvapi.profileview import ProfileView
from csvapi.security import filter_referrers

@@ -21,3 +20,2 @@

app.add_url_rule('/api/<urlhash>', view_func=TableView.as_view('table'))
app.add_url_rule('/profile/<urlhash>', view_func=ProfileView.as_view('profile'))
app.add_url_rule('/api/<urlhash>/export', view_func=ExportView.as_view('export'))

@@ -24,0 +22,0 @@ app.add_url_rule('/apify', view_func=ParseView.as_view('parse'))

Metadata-Version: 2.1
Name: csvapi
Version: 2.1.1
Version: 2.1.1857
Summary: An instant JSON API for your CSV

@@ -13,2 +13,3 @@ License: MIT

Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Requires-Dist: Quart (>=0.18.0,<0.19.0)

@@ -15,0 +16,0 @@ Requires-Dist: agate (>=1.6.3,<1.7.0)

[tool.poetry]
name = "csvapi"
version = "2.1.1"
version = "2.1.1857"
description = "An instant JSON API for your CSV"

@@ -5,0 +5,0 @@ authors = ["Opendatateam <opendatateam@data.gouv.fr>"]

@@ -35,3 +35,3 @@ # -*- coding: utf-8 -*-

'name': 'csvapi',
'version': '2.1.1',
'version': '2.1.1857',
'description': 'An instant JSON API for your CSV',

@@ -38,0 +38,0 @@ 'long_description': 'None',

from pathlib import Path
import pandas as pd
import sqlite3
from quart import send_from_directory
from quart.views import MethodView
from pandas_profiling import ProfileReport
from csvapi.errors import APIError
from csvapi.utils import get_db_info
from quart import current_app as app
import json
class ProfileView(MethodView):
def get_dataframe(self, db_info):
dsn = 'file:{}?immutable=1'.format(db_info['db_path'])
conn = sqlite3.connect(dsn, uri=True)
sql = 'SELECT * FROM [{}]'.format(db_info['table_name'])
df = pd.read_sql_query(sql, con=conn)
return df
def make_profile(self, db_info):
df = self.get_dataframe(db_info)
if app.config['PANDAS_PROFILING_CONFIG_MIN']:
profile = ProfileReport(df, config_file="profiling-minimal.yml")
else:
profile = ProfileReport(df)
profile.to_file(db_info['profile_path'])
return Path(db_info['profile_path'])
async def get(self, urlhash):
db_info = get_db_info(urlhash)
p = Path(db_info['db_path'])
if not p.exists():
raise APIError('Database has probably been removed or does not exist yet.', status=404)
path = Path(db_info['profile_path'])
if not path.exists():
try:
path = self.make_profile(db_info)
except (sqlite3.OperationalError, sqlite3.IntegrityError) as e:
raise APIError('Error selecting data', status=400, payload=dict(details=str(e)))
return await send_from_directory(path.parent, path.name)
async def get_minimal_profile(self, url: str, urlhash: str, csv_detective_report: dict) -> None:
db_info = get_db_info(urlhash)
p = Path(db_info['db_path'])
if not p.exists():
raise APIError('Database has probably been removed or does not exist yet.', status=404)
path = Path(db_info['profile_path'])
if not path.exists():
try:
df = self.get_dataframe(db_info)
profile = ProfileReport(
df, minimal=True,
vars=dict(num={"low_categorical_threshold": 0}),
plot=dict(histogram={"bins": 10})
)
profile_report = json.loads(profile.to_json())
return profile_report
except (sqlite3.OperationalError, sqlite3.IntegrityError) as e:
raise APIError('Error selecting data', status=400, payload=dict(details=str(e)))
return await send_from_directory(path.parent, path.name)