Latest Threat Research:SANDWORM_MODE: Shai-Hulud-Style npm Worm Hijacks CI Workflows and Poisons AI Toolchains.Details
Socket
Book a DemoInstallSign in
Socket

elg

Package Overview
Dependencies
Maintainers
2
Versions
27
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

elg - npm Package Compare versions

Comparing version
0.4.16
to
0.4.17
+147
elg/model/base/StandardMessages.py
from .StatusMessage import StatusMessage
class StandardMessages:
"""
This class provides easy access to the standard set of ELG status messages that are provided by default by
the platform and should be fully translated in the ELG user interface. If you use codes other than these
standard ones in your services then you should also try to contribute translations of your messages into as
many languages as possible for the benefit of other ELG users.
Implementation note: This class is auto-generated from elg-messages.properties - to add new message codes you
should edit the property files, then run /utils/generate_standard_messages.py. Do not edit this class directly.
"""
@classmethod
def generate_elg_request_invalid(cls, params=[], detail={}, lang="en"):
"""Generate StatusMessage for code: elg.request.invalid"""
code = "elg.request.invalid"
text = {"en": "Invalid request message", "es": "Mensaje de petici\u00f3n inv\u00e1lido"}
return StatusMessage(code=code, text=text[lang], params=params, detail=detail)
@classmethod
def generate_elg_request_missing(cls, params=[], detail={}, lang="en"):
"""Generate StatusMessage for code: elg.request.missing"""
code = "elg.request.missing"
text = {"en": "No request provided in message", "es": "Ninguna petici\u00f3n provista en el mensaje"}
return StatusMessage(code=code, text=text[lang], params=params, detail=detail)
@classmethod
def generate_elg_request_type_unsupported(cls, params=[], detail={}, lang="en"):
"""Generate StatusMessage for code: elg.request.type.unsupported"""
code = "elg.request.type.unsupported"
text = {
"en": "Request type {0} not supported by this service",
"es": "Tipo de petici\u00f3n {0} no soportada por este servicio",
}
return StatusMessage(code=code, text=text[lang], params=params, detail=detail)
@classmethod
def generate_elg_request_property_unsupported(cls, params=[], detail={}, lang="en"):
"""Generate StatusMessage for code: elg.request.property.unsupported"""
code = "elg.request.property.unsupported"
text = {"en": "Unsupported property {0} in request", "es": "Propiedad no soportada {0} en la petici\u00f3n"}
return StatusMessage(code=code, text=text[lang], params=params, detail=detail)
@classmethod
def generate_elg_request_too_large(cls, params=[], detail={}, lang="en"):
"""Generate StatusMessage for code: elg.request.too.large"""
code = "elg.request.too.large"
text = {"en": "Request size too large", "es": "Tama\u00f1o de petici\u00f3n demasiado grande"}
return StatusMessage(code=code, text=text[lang], params=params, detail=detail)
@classmethod
def generate_elg_request_text_mimetype_unsupported(cls, params=[], detail={}, lang="en"):
"""Generate StatusMessage for code: elg.request.text.mimeType.unsupported"""
code = "elg.request.text.mimeType.unsupported"
text = {
"en": "MIME type {0} not supported by this service",
"es": "Tipo MIME {0} no soportado por este servicio",
}
return StatusMessage(code=code, text=text[lang], params=params, detail=detail)
@classmethod
def generate_elg_request_audio_format_unsupported(cls, params=[], detail={}, lang="en"):
"""Generate StatusMessage for code: elg.request.audio.format.unsupported"""
code = "elg.request.audio.format.unsupported"
text = {
"en": "Audio format {0} not supported by this service",
"es": "Formato de audio {0} no soportado por este servicio",
}
return StatusMessage(code=code, text=text[lang], params=params, detail=detail)
@classmethod
def generate_elg_request_audio_samplerate_unsupported(cls, params=[], detail={}, lang="en"):
"""Generate StatusMessage for code: elg.request.audio.sampleRate.unsupported"""
code = "elg.request.audio.sampleRate.unsupported"
text = {
"en": "Audio sample rate {0} not supported by this service",
"es": "Tasa de sampleo de audio {0} no soportado por este servicio",
}
return StatusMessage(code=code, text=text[lang], params=params, detail=detail)
@classmethod
def generate_elg_request_structuredtext_property_unsupported(cls, params=[], detail={}, lang="en"):
"""Generate StatusMessage for code: elg.request.structuredText.property.unsupported"""
code = "elg.request.structuredText.property.unsupported"
text = {
"en": 'Unsupported property {0} in "texts" of structuredText request',
"es": 'Propiedad no soportada {0} en "texts" de la petici\u00f3n structuredText',
}
return StatusMessage(code=code, text=text[lang], params=params, detail=detail)
@classmethod
def generate_elg_response_property_unsupported(cls, params=[], detail={}, lang="en"):
"""Generate StatusMessage for code: elg.response.property.unsupported"""
code = "elg.response.property.unsupported"
text = {"en": "Unsupported property {0} in response", "es": "Propiedad no soportada {0} en la respuesta"}
return StatusMessage(code=code, text=text[lang], params=params, detail=detail)
@classmethod
def generate_elg_response_texts_property_unsupported(cls, params=[], detail={}, lang="en"):
"""Generate StatusMessage for code: elg.response.texts.property.unsupported"""
code = "elg.response.texts.property.unsupported"
text = {
"en": 'Unsupported property {0} in "texts" of texts response',
"es": 'Propiedad no soportada {0} en "texts" de la respuesta de textos',
}
return StatusMessage(code=code, text=text[lang], params=params, detail=detail)
@classmethod
def generate_elg_response_classification_property_unsupported(cls, params=[], detail={}, lang="en"):
"""Generate StatusMessage for code: elg.response.classification.property.unsupported"""
code = "elg.response.classification.property.unsupported"
text = {
"en": 'Unsupported property {0} in "classes" of classification response',
"es": 'Propiedad no soportada {0} en "classes" de la respuesta de clasificaci\u00f3n',
}
return StatusMessage(code=code, text=text[lang], params=params, detail=detail)
@classmethod
def generate_elg_response_invalid(cls, params=[], detail={}, lang="en"):
"""Generate StatusMessage for code: elg.response.invalid"""
code = "elg.response.invalid"
text = {"en": "Invalid response message", "es": "Mensaje de respuesta inv\u00e1lido"}
return StatusMessage(code=code, text=text[lang], params=params, detail=detail)
@classmethod
def generate_elg_response_type_unsupported(cls, params=[], detail={}, lang="en"):
"""Generate StatusMessage for code: elg.response.type.unsupported"""
code = "elg.response.type.unsupported"
text = {"en": "Response type {0} not supported", "es": "Tipo de respuesta {0} no soportada"}
return StatusMessage(code=code, text=text[lang], params=params, detail=detail)
@classmethod
def generate_elg_service_not_found(cls, params=[], detail={}, lang="en"):
"""Generate StatusMessage for code: elg.service.not.found"""
code = "elg.service.not.found"
text = {"en": "Service {0} not found", "es": "Servicio {0} no se encontr\u00f3"}
return StatusMessage(code=code, text=text[lang], params=params, detail=detail)
@classmethod
def generate_elg_service_internalerror(cls, params=[], detail={}, lang="en"):
"""Generate StatusMessage for code: elg.service.internalError"""
code = "elg.service.internalError"
text = {"en": "Internal error during processing: {0}", "es": "Error interno durante el procesamiento: {0}"}
return StatusMessage(code=code, text=text[lang], params=params, detail=detail)
def to_camel(string: str) -> str:
"""
Convert `snake_case` string to `lowerCamelCase`.
"""
return "".join(word.capitalize() if i != 0 else word for i, word in enumerate(string.split("_")))
import configparser
import json
"""
Fairly basic script to generate elg/model/base/StandardMessages.
Left simple to make maintenance/editing easy.
To run from the root directory of this project: `python elg/utils/generate_standard_messages.py`.
"""
config = configparser.RawConfigParser()
# Don't lower-case the keys
config.optionxform = lambda x: x
config.read("elg/model/messages/errors.ini")
details_dicts = {lang: dict(config.items(lang)) for lang in config.sections()}
# Revert the dicts to pass from:
# "{'es': {1: 'q', 2: 'w'}, 'en': {1: 's', 2: 'r'}}"
# to
# "{1: {'es': 'q', 'en: 's'}, 2: {'es: 'w', 'en: 'r'}}"
keys = list(details_dicts.values())[0].keys()
details_dicts_reverted = {k: {} for k in keys}
for lang, d in details_dicts.items():
# assert all the dict have the same keys
assert d.keys() == keys
for k in keys:
details_dicts_reverted[k][lang] = d[k]
with open("elg/model/base/StandardMessages.py", "w+") as file:
file.write("from .StatusMessage import StatusMessage\n\n")
file.write("\nclass StandardMessages:\n")
file.write(
'\n\t"""\n'
+ "\tThis class provides easy access to the standard set of ELG status messages that are provided by default by \n"
+ "\tthe platform and should be fully translated in the ELG user interface. If you use codes other than these \n"
+ "\tstandard ones in your services then you should also try to contribute translations of your messages into as \n"
+ "\tmany languages as possible for the benefit of other ELG users.\n\n"
"\tImplementation note: This class is auto-generated from elg-messages.properties - to add new message codes you \n"
+ "\tshould edit the property files, then run /utils/generate_standard_messages.py. Do not edit this class directly.\n"
+ '\t"""\n'
)
for key in details_dicts_reverted:
file.write("\n\t@classmethod")
file.write("\n\tdef generate_" + key.replace(".", "_").lower() + '(cls, params=[], detail={}, lang="en"):')
file.write('\n\t\t"""Generate StatusMessage for code: ' + key + '"""')
file.write('\n\t\tcode="' + key + '"')
file.write("\n\t\ttext=" + json.dumps(details_dicts_reverted[key]))
file.write("\n\t\treturn StatusMessage(code=code,text=text[lang],params=params,detail=detail)")
file.write("\n")
from datetime import date, datetime, time
from json import JSONEncoder
from uuid import UUID
from pydantic import BaseModel
try:
import collections.abc as collections_abc # python 3.3+
except ImportError:
import collections as collections_abc
try:
from dataclasses import asdict, is_dataclass
except ImportError:
# dataclasses not available
is_dataclass = lambda x: False
asdict = lambda x: dict()
def json_encoder(service):
"""Create a JSON encoder class that calls the `to_json` hook of the given FlaskService/QuartService"""
class ELGJsonEncoder(JSONEncoder):
def default(self, o):
# First try the service hook
val = service.to_json(o)
if val is not None:
return val
# Hook did not return a substitute, so fall back on default behaviour
elif isinstance(o, BaseModel):
return o.dict(by_alias=True, exclude_none=True)
elif isinstance(o, collections_abc.Iterable):
# All iterables will be converted to list.
return list(o)
elif isinstance(o, (datetime, date, time)):
return o.isoformat()
elif isinstance(o, UUID):
return str(o)
elif is_dataclass(o):
return asdict(o)
elif hasattr(o, "__json__"):
return o.__json__()
elif hasattr(o, "for_json"):
return o.for_json()
else:
return super().default(o)
return ELGJsonEncoder
+1
-1
Metadata-Version: 2.1
Name: elg
Version: 0.4.16
Version: 0.4.17
Summary: Use the European Language Grid in your Python projects

@@ -5,0 +5,0 @@ Home-page: https://gitlab.com/european-language-grid/platform/python-client

@@ -9,3 +9,2 @@ requests>=2.25

flask>=2.0
Flask-JSON>=0.3
docker>=5.0

@@ -20,4 +19,3 @@ requests_toolbelt>=0.9

quart>=0.15.1
aiohttp>=3.7
docker>=5.0
requests_toolbelt>=0.9

@@ -34,6 +34,7 @@ README.md

elg/model/base/Request.py
elg/model/base/Response.py
elg/model/base/ResponseObject.py
elg/model/base/StandardMessages.py
elg/model/base/StatusMessage.py
elg/model/base/__init__.py
elg/model/base/utils.py
elg/model/request/AudioRequest.py

@@ -52,4 +53,6 @@ elg/model/request/StructuredTextRequest.py

elg/utils/__init__.py
elg/utils/_generate_standard_messages.py
elg/utils/docker.py
elg/utils/errors.py
elg/utils/json_encoder.py
elg/utils/utils.py

@@ -1,2 +0,2 @@

__version__ = "0.4.16"
__version__ = "0.4.17"

@@ -9,3 +9,2 @@ import importlib.util

_quart_available = importlib.util.find_spec("quart") is not None
_aiohttp_available = importlib.util.find_spec("aiohttp") is not None
_requests_toolbelt_available = importlib.util.find_spec("requests_toolbelt") is not None

@@ -26,3 +25,3 @@

if _docker_available and _quart_available and _aiohttp_available and _requests_toolbelt_available:
if _docker_available and _quart_available and _requests_toolbelt_available:
from .quart_service import QuartService

@@ -5,2 +5,3 @@ import urllib

import requests
import urllib3

@@ -11,3 +12,4 @@ from .entity import Entity

from .utils import get_domain
from .utils.errors import catch_requests_error, ensure_response_ok
from .utils.errors import (ClientException, ConnectException,
catch_requests_error, ensure_response_ok)

@@ -154,3 +156,3 @@ ISO639 = iso639()

):
"""Method to send a search request to the API.
"""Generator to iterate through search results via the API.

@@ -172,8 +174,8 @@ Args:

Returns:
List[elg.Entity]: list of the results.
Yields:
elg.Entity: search results one entity at a time.
Examples::
results = catalog.search(
results = [ r for r in catalog.search(
resource = "Tool/Service",

@@ -183,4 +185,4 @@ function = "Machine Translation",

limit = 100,
)
results = catalog.search(
)]
results = [ r for r in catalog.search(
resource = "Corpus",

@@ -190,10 +192,12 @@ languages = ["German"],

limit = 100,
)
)]
"""
results = []
# results = []
finished = False
page = 1
while len(results) < limit and finished is False:
attempts_remaining = 100
yielded = 0
while limit <= 0 or yielded < limit:
try:
r = self._search(
page_results = self._search(
entity=entity,

@@ -207,8 +211,23 @@ search=search,

)
except:
finished = True
except ConnectException as e:
if attempts_remaining == 0:
raise e
else:
attempts_remaining -= 1
pass
continue
results.extend(r)
except ClientException as e:
if e.trigger == 404:
return
else:
raise e
except Exception as e:
raise e
for r in page_results:
if limit > 0 and yielded == limit:
return
yielded += 1
yield r
page += 1
return results[:limit]
return

@@ -215,0 +234,0 @@ def interactive_search(

@@ -21,2 +21,6 @@ import sys

service_type=args.service_type,
log_level=args.log_level,
workers=args.workers,
timeout=args.timeout,
worker_class=args.worker_class,
)

@@ -98,2 +102,32 @@

)
info_parser.add_argument(
"--log_level",
type=str,
default="INFO",
required=None,
help="The minimum severity level from which logged messages should be displayed.",
choices=["INFO", "DEBUG", "TRACE"],
)
info_parser.add_argument(
"--workers",
type=int,
default=1,
required=None,
help="Number of Gunicorn workers. Only used for the FlaskService class.",
)
info_parser.add_argument(
"--timeout",
type=int,
default=30,
required=None,
help="Timeout value for the Gunicorn worker. Only used for the FlaskService class.",
)
info_parser.add_argument(
"--worker_class",
type=str,
default="sync",
required=None,
help="Worker class value for the Gunicorn worker. Only used for the FlaskService class.",
choices=["sync", "gthread"],
)

@@ -113,2 +147,6 @@ info_parser.set_defaults(func=docker_create_command_factory)

service_type: str = "flask",
log_level: str = "INFO",
workers: int = 1,
timeout: int = 30,
worker_class: str = "sync",
):

@@ -124,2 +162,6 @@ self._classname = classname

self._service_type = service_type
self._log_level = log_level
self._workers = workers
self._timeout = timeout
self._worker_class = worker_class

@@ -175,2 +217,6 @@ def run(self):

path=self._path,
log_level=self._log_level,
workers=self._workers,
timeout=self._timeout,
worker_class=self._worker_class,
)

@@ -184,2 +230,3 @@ elif self._service_type == "quart":

path=self._path,
log_level=self._log_level,
)

@@ -186,0 +233,0 @@ except Exception as e:

@@ -32,3 +32,2 @@ from textwrap import TextWrapper

downloads: int,
size: int,
service_execution_count: int,

@@ -39,2 +38,3 @@ status: str,

record: dict = None,
size: int = 0,
**kwargs,

@@ -41,0 +41,0 @@ ):

import inspect
import json
import re
import sys
from collections.abc import Iterable

@@ -14,3 +14,2 @@ from pathlib import Path

from flask import request as flask_request
from flask_json import JsonError
from requests_toolbelt import MultipartDecoder

@@ -22,5 +21,7 @@ except:

from .model import (AudioRequest, Failure, Progress, ResponseObject,
StructuredTextRequest, TextRequest)
from .utils.docker import COPY_FOLDER, DOCKERFILE, ENTRYPOINT_FLASK
StandardMessages, StructuredTextRequest, TextRequest)
from .utils.docker import COPY_FOLDER, DOCKERFILE, ENTRYPOINT_FLASK, ENV_FLASK
from .utils.json_encoder import json_encoder

@@ -49,4 +50,38 @@

self.app.config["JSON_SORT_KEYS"] = False
self.app.json_encoder = json_encoder(self)
self.app.add_url_rule("/process", "process", self.process, methods=["POST"])
def to_json(self, obj):
"""Hook that can be overridden by subclasses to customise JSON encoding.
FlaskService can convert the following types to JSON by default, in
addition to the types handled natively by `json.dump`:
- date, time, datetime (via `.isoformat()`)
- uuid.UUID (converted to `str`)
- any pydantic.BaseModel including the ELG message types (via
`.dict(by_alias=True, exclude_none=True)`)
- anything Iterable (as a list)
- any `dataclass` (converted to a `dict` via `dataclasses.asdict`)
- anything with a `__json__` or `for_json` method (which is expected to
return a serializable type)
To handle other types, or to change the standard behaviour for any of
the above types, subclasses can override this method, which will be
called whenever an object other than a string, number, bool, list or
dict must be serialized and is expected to return a JSON-serializable
object to be used in place of the original, or `None` to fall back to
the default behaviour.
The default implementation of this method always returns `None`.
Args:
obj: the object to convert
Returns:
a substitute object suitable for JSON serialization, or `None` to
use the default behaviour.
"""
return None
def run(self):

@@ -64,4 +99,4 @@ """

even_stream = True if "text/event-stream" in flask_request.accept_mimetypes else False
logger.debug(f"Accept MimeTypes: {flask_request.accept_mimetypes}")
logger.info(f"Accept even-stream: {even_stream}")
logger.debug("Accept MimeTypes: {mimetypes}", mimetypes=flask_request.accept_mimetypes)
logger.debug("Accept even-stream: {even_stream}", even_stream=even_stream)
if "application/json" in flask_request.content_type:

@@ -84,39 +119,52 @@ data = flask_request.get_json()

logger.debug(f"Data type: {data.get('type')}")
if data.get("type") == "audio":
request = AudioRequest(**data)
elif data.get("type") == "text":
request = TextRequest(**data)
elif data.get("type") == "structuredText":
request = StructuredTextRequest(**data)
request_type = data.get("type")
logger.debug("Data type: {request_type}", request_type=request_type)
if request_type in ["audio", "text", "structuredText"]:
if request_type == "audio":
request = AudioRequest(**data)
elif request_type == "text":
request = TextRequest(**data)
else:
request = StructuredTextRequest(**data)
logger.trace("Call with the input: {request}", request=request)
try:
response = self.process_request(request)
except:
logger.opt(exception=True).warning("Error during the request processing.")
response = Failure(
errors=[StandardMessages.generate_elg_service_internalerror(params=[str(sys.exc_info()[1])])]
)
else:
self.invalid_request_error()
logger.info(f"Call with the input: {request}")
try:
response = self.process_request(request)
except:
logger.debug("Error during the request processing.")
response = Failure(
errors=[
{"code": "elg.internalError", "text": "Internal error during processing", "params": ["message"]}
]
)
response = Failure(errors=[StandardMessages.generate_elg_request_invalid()])
if isinstance(response, Failure):
logger.info(f"Get error message: {response}")
response = {"failure": response.dict(by_alias=True)}
logger.info(f"Return: {response}")
logger.debug("Get error message")
response = {"failure": response}
logger.trace("Return: {response}", response=response)
logger.info("Response returned")
return response
elif isinstance(response, ResponseObject):
logger.info(f"Get response: {response}")
response = {"response": response.dict(by_alias=True)}
logger.info(f"Return: {response}")
logger.debug("Get response")
response = {"response": response}
logger.trace("Return: {response}", response=response)
logger.info("Response returned")
return response
elif isinstance(response, Iterable):
logger.info(f"Get iterable response")
logger.debug("Get iterable response")
if even_stream:
logger.info("Streaming response returned")
return self.app.response_class(self.generator_mapping(response), mimetype="text/event-stream")
else:
response = self.get_response_from_generator(response)
logger.info(f"Get response: {response}")
response = {"response": response.dict(by_alias=True)}
logger.info(f"Return: {response}")
if isinstance(response, ResponseObject):
logger.debug("Get response")
response = {"response": response}
elif isinstance(response, Failure):
logger.debug("Get error message")
response = {"failure": response}
else:
logger.debug("Response type not known")
response = {"failure": Failure(errors=[StandardMessages.generate_elg_invalid_response()])}
logger.trace("Return: {response}", response=response)
logger.info("Response returned")
return response

@@ -126,10 +174,2 @@ else:

def invalid_request_error(self):
"""
Generates a valid ELG "failure" response if the request cannot be parsed
"""
raise JsonError(
status_=400, failure={"errors": [{"code": "elg.request.invalid", "text": "Invalid request message"}]}
)
def process_request(self, request):

@@ -140,9 +180,11 @@ """

if request.type == "text":
logger.info("Process text request")
logger.debug("Process text request")
return self.process_text(request)
elif request.type == "structuredText":
logger.debug("Process structuredText request")
return self.process_structured_text(request)
elif request.type == "audio":
logger.debug("Process audio request")
return self.process_audio(request)
self.invalid_request_error()
return Failure(errors=[StandardMessages.generate_elg_request_invalid()])

@@ -176,4 +218,3 @@ def process_text(self, request: TextRequest):

@staticmethod
def generator_mapping(generator):
def generator_mapping(self, generator):
end = False

@@ -184,33 +225,26 @@ try:

logger.warning(
(
"The service has already returned a ResponseObject or Failure message but continue to return the following message:\n"
f"{message.dict(by_alias=True)}\nThis message will be ignored and not returned to the user."
)
"The service has already returned a ResponseObject or Failure message but continue to return a message. This message will be ignored and not returned to the user."
)
continue
if isinstance(message, Failure):
logger.info(f"Get failure: {message}")
message = json.dumps({"failure": message.dict(by_alias=True)})
logger.debug("Get failure message")
message = json.dumps({"failure": message}, cls=self.app.json_encoder)
end = True
elif isinstance(message, Progress):
logger.info(f"Get progress: {message}")
message = json.dumps({"progress": message.dict(by_alias=True)})
logger.debug("Get progress message")
message = json.dumps({"progress": message}, cls=self.app.json_encoder)
elif isinstance(message, ResponseObject):
logger.info(f"Get response: {message}")
message = json.dumps({"response": message.dict(by_alias=True)})
logger.debug("Get response")
message = json.dumps({"response": message}, cls=self.app.json_encoder)
end = True
yield f"data:{message}\r\n\r\n"
except:
logger.opt(exception=True).warning("Exception in generator")
message = json.dumps(
{
"failure": Failure(
errors=[
{
"code": "elg.internalError",
"text": "Internal error during processing",
"params": ["message"],
}
]
).dict(by_alias=True)
}
errors=[StandardMessages.generate_elg_service_internalerror(params=[str(sys.exc_info()[1])])]
)
},
cls=self.app.json_encoder,
)

@@ -225,6 +259,3 @@ yield f"data:{message}\r\n\r\n"

logger.warning(
(
"The service has already returned a ResponseObject or Failure message but continue to return the following message:\n"
f"{message.dict(by_alias=True)}\nThis message will be ignored and not returned to the user."
)
"The service has already returned a ResponseObject or Failure message but continue to return a message. This message will be ignored and not returned to the user."
)

@@ -235,5 +266,3 @@ continue

if response is None:
return Failure(
errors=[{"code": "elg.response.invalid", "text": "Invalid response message", "params": ["message"]}]
)
return Failure(errors=[StandardMessages.generate_elg_invalid_response()])
return response

@@ -266,2 +295,6 @@

path: str = None,
log_level: str = "INFO",
workers: int = 1,
timeout: int = 30,
worker_class: str = "sync",
):

@@ -276,2 +309,6 @@ """Class method to create the correct Dockerfile.

path (str, optional): Path where to generate the file. Defaults to None.
log_level (str, optional): The minimum severity level from which logged messages should be displayed. Defaults to 'INFO'.
workers (int, optional): Number of Gunicorn workers. Defaults to 1.
timeout (int, optional): Timeout value for the Gunicorn worker. Defaults to 30.
worker_class (str, optional): Worker class value for the Gunicorn worker. Defaults to 'sync'.
"""

@@ -300,2 +337,5 @@ if path == None:

commands="\n".join(commands),
env=ENV_FLASK.format(
workers=workers, timeout=timeout, worker_class=worker_class, log_level=log_level
),
)

@@ -318,3 +358,10 @@ )

@classmethod
def docker_push_image(cls, repository: str, tag: str, username: str = None, password: str = None, **kwargs):
def docker_push_image(
cls,
repository: str,
tag: str,
username: str = None,
password: str = None,
**kwargs,
):
"""

@@ -326,3 +373,9 @@ Class method to do `docker push ...` in python.

auth_config = {"username": username, "password": password}
client.images.push(repository=repository, tag=tag, auth_config=auth_config, stream=True, **kwargs)
client.images.push(
repository=repository,
tag=tag,
auth_config=auth_config,
stream=True,
**kwargs,
)
client.images.push(repository=repository, tag=tag, stream=True, **kwargs)

@@ -343,3 +396,9 @@ return

cls.docker_build_image(tag=f"{repository}:{tag}", pull=pull, **build_kwargs)
cls.docker_push_image(repository=repository, tag=tag, username=username, password=password, **push_kwargs)
cls.docker_push_image(
repository=repository,
tag=tag,
username=username,
password=password,
**push_kwargs,
)
return None

@@ -1,3 +0,3 @@

from .base import (Annotation, Failure, Progress, Request, Response,
ResponseObject, StatusMessage)
from .base import (Annotation, Failure, Progress, Request, ResponseObject,
StandardMessages, StatusMessage)
from .request import AudioRequest, StructuredTextRequest, TextRequest

@@ -4,0 +4,0 @@ from .response import (AnnotationsResponse, AudioResponse, ClassesResponse,

@@ -5,4 +5,4 @@ from .Annotation import Annotation

from .Request import Request
from .Response import Response
from .ResponseObject import ResponseObject
from .StandardMessages import StandardMessages
from .StatusMessage import StatusMessage

@@ -1,6 +0,8 @@

from typing import Dict, List
from numbers import Number
from pydantic import BaseModel
from .utils import to_camel
class Annotation(BaseModel):

@@ -12,13 +14,17 @@ """

----------
start (int, required): annotation start location (in response)
end (int, required): annotation end location (in response)
source_start (int, required in cases): annotation start location (in source)
source_end (int, required in cases): annotation end location (in source)
start (Number, required): annotation start location (in response)
end (Number, required): annotation end location (in response)
source_start (Number, required in cases): annotation start location (in source)
source_end (Number, required in cases): annotation end location (in source)
features (dict, optional): arbitrary json metadata about content
"""
start: int
end: int
sourceStart: int = None
sourceEnd: int = None
start: Number
end: Number
source_start: Number = None
source_end: Number = None
features: dict = None
class Config:
alias_generator = to_camel
arbitrary_types_allowed = True

@@ -6,2 +6,3 @@ from typing import List

from .StatusMessage import StatusMessage
from .utils import to_camel

@@ -12,8 +13,8 @@

Details of a failed task
Attributes
----------
errors: List[StatusMessage]: list of status messages describing failure
"""
errors: List[StatusMessage]
"""*(required)* List of status messages describing the failure"""
class Config:
alias_generator = to_camel
from pydantic import BaseModel
from .StatusMessage import StatusMessage
from .utils import to_camel

@@ -9,10 +10,11 @@

Details of an in progress task
Attributes
----------
percent (float, required): completion percentage
message (StatusMessage, optional): message describing progress report
Some LT services can take a long time to process each request - likely useful to keep caller updated
"""
percent: float
"""*(required)* completion percentage"""
message: StatusMessage = None
"""*(optional)* message describing progress report"""
class Config:
alias_generator = to_camel
from pydantic import BaseModel
from .utils import to_camel
class Request(BaseModel):
"""
Representation of a service invocation request
Representation of a service invocation request.
Intended to be abstract, subclasses should be initiated with their specific type
Subclasses
----------
request.AudioRequest
request.TextRequest
request.StructuredTextRequest
**Subclasses**
Attributes
----------
type (str, required on subclass instantiation): the type of request
params (dict, optional): vendor specific requirements
* :class:`elg.model.request.AudioRequest`
* :class:`elg.model.request.TextRequest`
* :class:`elg.model.request.StructuredTextRequest`
"""
type: str = None
"""*(required in subclass)* the type of request"""
params: dict = None
"""*(optional)* vendor specific params, up to service implementor to decide how to interpret these """
class Config:
alias_generator = to_camel
def __str__(self):
"""Override string to display dictionary"""
return " - ".join([f"{k}: {v}" for k, v in self.dict().items() if v is not None])

@@ -6,2 +6,3 @@ from typing import List

from .StatusMessage import StatusMessage
from .utils import to_camel

@@ -12,19 +13,19 @@

Representation of a successful completion response.
Abstract, subclasses must instantiate this with their own type
Abstract, subclasses should instantiate this with their own type
Subclasses
----------
response.AnnotationsResponse
response.AudioResponse
response.ClassificationResponse
response.StoredResponse
response.TextsResponse
**Subclasses**
Attributes
----------
type (string, required on subclass): type of response
warnings (List[StatusMessage], optional): messages describing any warnings on responses
* :class:`elg.model.response.AnnotationsResponse`
* :class:`elg.model.response.AudioResponse`
* :class:`elg.model.response.ClassificationResponse`
* :class:`elg.model.response.TextsResponse`
"""
type: str
"""*(required in subclass)* the type of response"""
warnings: List[StatusMessage] = None
"""*(optional)* messages describing any warnings on response"""
class Config:
alias_generator = to_camel

@@ -5,3 +5,5 @@ from typing import Dict, List

from .utils import to_camel
class StatusMessage(BaseModel):

@@ -15,14 +17,17 @@ """

code cannot be found in the lookup table.
Attributes
----------
code: (str, required) status code to be found in lookup table
params: (List[str], required) values to fill in message placeholder
text: (str, requored) fallback text to be used if specified code cannot be found in lookup table
detail: (Dict, optional) arbitrary further details that don't need translation (e.g: stacktrace)
"""
code: str
"""*(required)* status code to be found in lookup table"""
params: List[str]
"""*(required)* values to fill in message placeholder"""
text: str
"""*(required)* fallback text to be used if specified code cannot be found in lookup table"""
detail: Dict = None
"""*(optional)* arbitrary further details that don't need translation (e.g. stacktrace)"""
class Config:
alias_generator = to_camel

@@ -0,3 +1,4 @@

from collections.abc import AsyncIterable, Iterable
from pathlib import Path
from typing import Dict, List
from typing import Any, Dict, List

@@ -12,21 +13,25 @@ from pydantic import validator

Request representing a piece of audio - the actual audio data will be sent as a separate request.
Subclass of Request
Attributes
----------
type (str, required): the type of request. must be "audio"
params (dict, optional): vendor specific requirements
format (str, required): the format of the audio request
sampleRate (int, required): sample rate of audio
features (dict, optional): arbitrary json metadata about content
annotations (Dict[str, List[Annotation]], optional): optional annotations on request
Subclass of :class:`elg.model.base.Request.Request`
"""
type: str = "audio"
"""*(required)* the type of request must be \"audio\""""
content: bytes = None
"""*(optional)* audio itself, if not being sent as separate stream"""
generator: Any = None
"""*(optional)* generator that provide the audio itself"""
format: str = "LINEAR16"
"""*(required)* format of audio request. must be either \"LINEAR16\" (default) or \"MP3\""""
sample_rate: int = None
"""*(optional)* sample rate of audio"""
features: Dict = None
"""*(optional)* arbitrary json metadata about content"""
annotations: Dict[str, List[Annotation]] = None
"""*(optional)* optional annotations on request"""

@@ -36,3 +41,3 @@ @validator("format")

"""
validator: ensures the format of the audio request is either "LINEAR16" or "MP3"
*(validator)* ensures the format of the audio request is either "LINEAR16" or "MP3"
"""

@@ -45,2 +50,21 @@ acceptable_formats = ["LINEAR16", "MP3"]

@validator("generator")
def generator_must_be_iterable(cls, v):
"""
*(validator)* ensures the iterator field of the audio request is either None or an Iterable
"""
if v is None:
return v
if isinstance(v, (AsyncIterable, Iterable)):
return v
raise ValueError(f"The generator musts be None or an Iterable, not {type(v)}")
@staticmethod
def generator_from_file(filename, blocksize=1024):
with open(filename, "rb") as file:
byte = file.read(blocksize)
while byte:
yield byte
byte = file.read(blocksize)
@classmethod

@@ -54,8 +78,18 @@ def from_file(

annotations: Dict[str, List[Annotation]] = None,
streaming: bool = False,
blocksize: int = 1024,
):
"""
allows you to generate audio request from file
"""
filename = Path(filename)
if not filename.is_file():
raise ValueError(f"{filename} musts be the path to a file.")
with open(filename, "rb") as f:
content = f.read()
if streaming:
generator = cls.generator_from_file(filename=filename, blocksize=blocksize)
content = None
else:
with open(filename, "rb") as f:
content = f.read()
generator = None
if format is None:

@@ -65,2 +99,3 @@ format = "MP3" if filename.suffix == ".mp3" else "LINEAR16"

content=content,
generator=generator,
format=format,

@@ -73,2 +108,4 @@ sample_rate=sample_rate,

def __str__(self):
return " - ".join([f"{k}: {v}" for k, v in self.dict().items() if v is not None and k != "content"])
return " - ".join(
[f"{k}: {v}" for k, v in self.dict(exclude={"content", "generator"}).items() if v is not None]
) + ((" - content " + str(len(self.content))) if self.content else (" - content generator"))

@@ -1,8 +0,7 @@

from __future__ import annotations
from typing import Dict, List
from typing import Dict, List, Text
from pydantic import BaseModel, root_validator
from .. import Annotation, Request
from ..base.utils import to_camel

@@ -17,22 +16,26 @@

original source material from the corresponding request.
Attributes
----------
content: (str, optional) text content
mime_type: (str, optional) mime type of request, default text/plain
features (dict, optional) arbitrary json metadata about content
annotations (Dict[str, List[Annotation]], optional): optional annotations on request
texts: (List[Text], optional): recursive, same structure again
"""
content: str = None
"""*(optional)* text content"""
mime_type: str = "text/plain"
"""*(optional)* mime type of request, default \"text/plain\""""
features: dict = None
"""*(optional)* arbitrary json metadata about content"""
annotations: Dict[str, List[Annotation]] = None
texts: List[Text] = None
"""*(optional)* optional annotations on request"""
texts: List = None
"""*(optional)* recursive, same structure (should be List[Text] but postponed annotations introduced post python 3.6)"""
class Config:
alias_generator = to_camel
@root_validator()
def either_content_or_text(cls, values):
"""
validator: ensures only either the "content" or the "text" fields are present
ensures only either the "content" or the "text" fields are present
"""

@@ -51,16 +54,14 @@ content, texts = values.get("content"), values.get("texts")

"""
Request representing text with some structure,
Request representing text with some structure.
Subclass of :class:`elg.model.base.Request.Request`
For example a list of paragraphs or sentences, or a corpus of documents, each divided into sentences.
While this could be represented as standoff annotations in a plain "text" request, the structured format is more
Whilst this could be represented as standoff annotations in a plain "text" request, the structured format is more
suitable for certain types of tools.
Attributes
----------
type (str, required): the type of request. must be "audio"
params (dict, optional): vendor specific requirements
texts: (List[Text], required): the actual text object with the text content
"""
type: str = "structuredText"
"""*(required)* the type of request must be \"structuredText\""""
texts: List[Text]
"""*(required)* the actual text object with the text content"""
from pathlib import Path
from typing import Dict, List
from pydantic import validator
from .. import Annotation, Request

@@ -12,2 +10,3 @@

Request representing a single piece of text, optionally with associated markup
Subclass of :class:`elg.model.base.Request.Request`

@@ -17,18 +16,18 @@ For example a list of paragraphs or sentences, or a corpus of documents, each divided into sentences.

suitable for certain types of tools.
Attributes
----------
type (str, required): the type of request. must be "text"
params (Dict, optional): vendor specific requirements
content: (str, optional) text content
mimeType: (str, optional) mime type of request, default text/plain
features (Dict, optional): arbitrary json metadata about content
annotations (Dict[str, List[Annotation]], optional): optional annotations on request
"""
type: str = "text"
"""*(required)* the type of request must be \"text\""""
content: str
"""*(optional)* text content"""
mimeType: str = "text/plain"
"""*(optional)* mime type of request, default \"text/plain\""""
features: Dict = None
"""*(optional)* arbitrary json metadata about content"""
annotations: Dict[str, List[Annotation]] = None
"""*(optional)* optional annotations on request"""

@@ -35,0 +34,0 @@ @classmethod

@@ -11,23 +11,22 @@ from typing import Dict, List

Response representing standoff annotations over a single stream of data (e.g. information extraction results).
Attributes
----------
type (string, required): type of response
warnings (List[StatusMessage], optional): messages describing any warnings on responses
features (dict, optional): arbitrary json metadata about content
annotations (Dict[str, List[Annotation]], optional): optional annotations on request
Subclass of :class:`elg.model.base.ResponseObject.ResponseObject`
"""
type: str = "annotations"
"""*(required)* the type of response must be \"annotations\""""
features: dict = None
"""*(optional)* arbitrary json metadata about content"""
annotations: Dict[str, List[Annotation]] = None
"""*(optional)* optional annotations on request"""
@root_validator()
def either_features_or_annotations(cls, values):
""" "
validator: ensures either the "features" or "annotations" fields are present
"""
*(validator)* ensures either the "features" or "annotations" fields are present
"""
features, annotations = values.get("features"), values.get("annotations")
if features is None and annotations is None:
raise ValueError('A annotation response must have either "features" or "annotations" fields')
raise ValueError('An annotation response must have either "features" or "annotations" fields')
return values

@@ -34,0 +33,0 @@

@@ -13,18 +13,20 @@ from base64 import b64decode

Response representing audio data with optional standoff annotations (e.g. text-to-speech results)
Subclass of :class:`elg.model.base.ResponseObject.ResponseObject`
Attributes
----------
type (string, required): type of response
warnings (List[StatusMessage], optional): messages describing any warnings on responses
content (str, required): base64 encoded audio for short audio snippets
format (str, required): specifies audio format used: either "LINEAR16" or "MP3"
features (dict, optional): arbitrary json metadata about content
annotations (Dict[str, List[Annotation]], optional): optional annotations on request
"""
type: str = "audio"
"""*(required)* type of response"""
content: str
"""*(required)* base64 encoded audio for short audio snippets"""
format: str
"""*(required)* specifies audio format used: either \"LINEAR16\" or \"MP3\""""
features: dict = None
"""*(required)* arbitrary json metadata about content"""
annotations: Dict[str, List[Annotation]] = None
"""*(required)* optional annotations on response"""

@@ -34,10 +36,13 @@ @validator("format")

"""
validator: ensures the format of the audio response is either "LINEAR16" or "MP3"
*(validator)* ensures the format of the audio response is either \"LINEAR16\" or \"MP3\"
"""
acceptable_formats = ["LINEAR16", "MP3"]
if v.lower() not in acceptable_formats:
raise ValueError("The format given is not supported")
return v.lower()
if v.upper() not in acceptable_formats:
raise ValueError(f"The format given ({v}) is not supported")
return v.upper()
def to_file(self, filename):
"""
*(validator)* writes audio response to file
"""
filename = Path(filename)

@@ -44,0 +49,0 @@ with open(filename, "wb") as f:

@@ -6,2 +6,3 @@ from typing import List

from .. import ResponseObject
from ..base.utils import to_camel

@@ -12,13 +13,15 @@

Classification object: classification and score (optional likelihood of classification)
Attributes
----------
class: (str, required) labelled class
score: (float, optional) confidence score in class
Subclass of :class:`elg.model.base.ResponseObject.ResponseObject`
"""
class_field: str = Field(None, alias="class")
"""*(required)* labelled class"""
score: float = None
"""*(optional)* confidence score in class"""
class Config:
alias_generator = to_camel
class ClassificationResponse(ResponseObject):

@@ -28,12 +31,9 @@ """

attached.
Attributes
----------
type (string, required): type of response
warnings (List[StatusMessage], optional): messages describing any warnings on responses
classes: (List[ClassesResponse], optional) : list of classications, zero or more allowed
"""
type: str = "classification"
"""*(required)* type of response"""
classes: List[ClassesResponse] = None
"""*(optional)* list of classifications, zero or more allowed"""

@@ -40,0 +40,0 @@ def auto_content(self):

@@ -6,2 +6,3 @@ from typing import Dict, List

from .. import Annotation, ResponseObject
from ..base.utils import to_camel

@@ -12,25 +13,30 @@

Object representing a structured piece of text. Recursive.
Attributes
----------
role: (str, optional) the role of this node in the response
content:(str, optional) string of translated/transcribed text
texts: (List[TextsResponseObject], optional) list of same structures, recursive
score: (int, optional) confidence of response
features (dict, optional): arbitrary JSON metadata about content
annotations (Dict[str, List[Annotation]], optional): optional annotations on request
"""
role: str = None
"""*(optional)* the role of this node in the response"""
content: str = None
"""*(optional)* string of translated/transcribed text"""
texts: List = None
"""*(optional)* list of same structures, recursive"""
score: int = None
"""*(optional)* confidence of response"""
features: dict = None
"""*(optional)* arbitrary JSON metadata about content"""
annotations: Dict[str, List[Annotation]] = None
"""*(optional)* optional annotations on request"""
class Config:
alias_generator = to_camel
@root_validator()
def either_content_or_text(cls, values):
""" "
validator: ensures either the "content" or "text" fields are present
"""
*(validator)* ensures either the \"content\" or \"text\" fields are present
"""
content, texts, score = values.get("content"), values.get("texts"), values.get("score")

@@ -62,15 +68,13 @@ if content is None and texts is None:

Response consisting of a set of one or more new texts, each with optional annotations attached to it.
Subclass of :class:`elg.model.base.ResponseObject.ResponseObject`
For example a set of possible translations produced by a translation tool or possible transcriptions produced by a
speech-to-text recogniser.
Attributes
----------
type (str, required): type of response
warnings (List[StatusMessage], optional): messages describing any warnings on responses
texts (List[TextsResponseObject], optional): list of objects representing a structured text response
"""
type: str = "texts"
"""*(optional)* type of response, must be \"texts\""""
texts: List[TextsResponseObject]
"""*(optional)* list of objects representing a structured text response"""

@@ -77,0 +81,0 @@ def auto_content(self):

@@ -0,1 +1,2 @@

from .. import Failure
from .AnnotationsResponse import AnnotationsResponse

@@ -9,13 +10,17 @@ from .AudioResponse import AudioResponse

try:
response = response["response"]
if response["type"] == "annotations":
return AnnotationsResponse(**response)
elif response["type"] == "audio":
return AudioResponse(**response)
elif response["type"] == "classification":
return ClassificationResponse(**response)
elif response["type"] == "texts":
return TextsResponse(**response)
else:
raise ValueError(f"Response type {response['type']} not known.")
if "response" in response:
response = response["response"]
if response["type"] == "annotations":
return AnnotationsResponse(**response)
elif response["type"] == "audio":
return AudioResponse(**response)
elif response["type"] == "classification":
return ClassificationResponse(**response)
elif response["type"] == "texts":
return TextsResponse(**response)
else:
raise ValueError(f"Response type {response['type']} not known.")
elif "failure" in response:
response = response["failure"]
return Failure(**response)
except Exception as e:

@@ -22,0 +27,0 @@ if e == ValueError:

import inspect
import json
import sys
from collections.abc import AsyncIterable

@@ -10,7 +11,10 @@ from pathlib import Path

try:
import aiohttp
import docker
from quart import Quart, make_response
from quart import Quart, current_app, make_response
from quart import request as input_request
from requests_toolbelt import MultipartDecoder
from werkzeug.exceptions import BadRequest, RequestEntityTooLarge
from werkzeug.http import parse_options_header
from werkzeug.sansio.multipart import (Data, Epilogue, Field, File,
MultipartDecoder, NeedData)
except:

@@ -21,27 +25,24 @@ raise ImportError(

from werkzeug.exceptions import BadRequest, RequestEntityTooLarge
from .model import (AudioRequest, Failure, Progress, ResponseObject,
StructuredTextRequest, TextRequest)
from .utils.docker import COPY_FOLDER, DOCKERFILE, ENTRYPOINT_QUART
StandardMessages, StructuredTextRequest, TextRequest)
from .utils.docker import COPY_FOLDER, DOCKERFILE, ENTRYPOINT_QUART, ENV_QUART
from .utils.json_encoder import json_encoder
class ProcessingError(Exception):
def __init__(self, status_code, code, text, *params):
def __init__(self, status_code, standard_message):
self.status_code = status_code
self.code = code
self.text = text
self.params = params
self.standard_message = standard_message
@staticmethod
def InternalError(text):
return ProcessingError(500, "elg.service.internalError", "Internal error during processing: {0}", text)
return ProcessingError(500, StandardMessages.generate_elg_service_internalerror(params=[text]))
@staticmethod
def InvalidRequest():
return ProcessingError(400, "elg.request.invalid", "Invalid request message")
return ProcessingError(400, StandardMessages.generate_elg_request_invalid())
@staticmethod
def TooLarge():
return ProcessingError(413, "elg.request.too.large", "Request size too large")
return ProcessingError(413, StandardMessages.generate_elg_request_too_large())

@@ -51,3 +52,4 @@ @staticmethod

return ProcessingError(
400, "elg.request.text.mimeType.unsupported", "MIME type {0} not supported by this service", mime
400,
StandardMessages.generate_elg_request_text_mimetype_unsupported(params=[mime]),
)

@@ -58,17 +60,8 @@

return ProcessingError(
400, "elg.request.type.unsupported", "Request type {0} not supported by this service", request_type
400,
StandardMessages.generate_elg_request_type_unsupported(params=[request_type]),
)
def to_json(self):
return {
"failure": {
"errors": [
{
"code": self.code,
"text": self.text,
"params": self.params,
}
]
}
}
return {"failure": Failure(errors=[self.standard_message])}

@@ -80,5 +73,69 @@

Extra dependencies need to be install to use the QuartService class. Please run: `pip install elg[quart]`.
The QuartService class is suitable for services that execute the request directly, for example of a simple language detection service::
from elg import QuartService
from elg.model import AnnotationsResponse
import langdetect
class ELGService(QuartService):
async def process_text(self, content):
langs = langdetect.detect_langs(content.content)
ld = {}
for l in langs:
ld[l.lang] = l.prob
return AnnotationsResponse(features=ld)
service = ELGService("LangDetection")
app = service.app
The QuartService class is also particulary useful for proxy services that forward the request to the actual LT service.
For example a proxy for a Speech-to-text service running outside the ELG cluster::
import traceback
import aiohttp
from loguru import logger
from elg import QuartService
from elg.model import TextsResponse
from elg.quart_service import ProcessingError
class Proxy(QuartService):
consume_generator = False
async def setup(self):
self.session = aiohttp.ClientSession()
async def shutdown(self):
if self.session is not None:
await self.session.close()
async def process_audio(self, content):
try:
# Make the remote call
async with self.session.post("https://example.com/endpoint", data=content.generator) as client_response:
status_code = client_response.status
content = await client_response.json()
except:
traceback.print_exc()
raise ProcessingError.InternalError('Error calling API')
if status_code >= 400:
# if your API returns sensible error messages you could include that
# instead of the generic message
raise ProcessingError.InternalError('Error calling API')
logger.info("Return the text response")
return TextsResponse(texts=[{"content": content["text"]}])
service = Proxy("Proxy")
app = service.app
"""
requirements = ["elg[quart]"]
consume_generator = True

@@ -98,2 +155,3 @@ def __init__(self, name: str, request_size_limit: int = None):

self.app.config["JSON_SORT_KEYS"] = False
self.app.json_encoder = json_encoder(self)

@@ -107,3 +165,4 @@ if request_size_limit is not None:

self.app.register_error_handler(
RequestEntityTooLarge, lambda err: self.error_message(ProcessingError.TooLarge())
RequestEntityTooLarge,
lambda err: self.error_message(ProcessingError.TooLarge()),
)

@@ -117,2 +176,35 @@

def to_json(self, obj):
"""Hook that can be overridden by subclasses to customise JSON encoding.
FlaskService can convert the following types to JSON by default, in
addition to the types handled natively by `json.dump`:
- date, time, datetime (via `.isoformat()`)
- uuid.UUID (converted to `str`)
- any pydantic.BaseModel including the ELG message types (via
`.dict(by_alias=True, exclude_none=True)`)
- anything Iterable (as a list)
- any `dataclass` (converted to a `dict` via `dataclasses.asdict`)
- anything with a `__json__` or `for_json` method (which is expected to
return a serializable type)
To handle other types, or to change the standard behaviour for any of
the above types, subclasses can override this method, which will be
called whenever an object other than a string, number, bool, list or
dict must be serialized and is expected to return a JSON-serializable
object to be used in place of the original, or `None` to fall back to
the default behaviour.
The default implementation of this method always returns `None`.
Args:
obj: the object to convert
Returns:
a substitute object suitable for JSON serialization, or `None` to
use the default behaviour.
"""
return None
def run(self):

@@ -126,2 +218,3 @@ """

def error_message(err):
logger.error("Return error message: {err}", err=err)
return err.to_json(), err.status_code

@@ -132,9 +225,7 @@

One-time setup tasks that must happen before the first request is
handled, but require access to the event loop so cannot happen at the top
level.
handled. For example, it is possible to open an `aiohttp` `ClientSessions()`
to use it :
`self.session = aiohttp.ClientSession()`
"""
# Create the shared aiohttp session
self.session = aiohttp.ClientSession()
# or you may wish to configure things like default headers, e.g.
# session = aiohttp.ClientSession(headers = {'X-API-Key':os.environ.get('APIKEY')})
pass

@@ -144,10 +235,92 @@ async def shutdown(self):

Logic that must run at shutdown time, after the last request has been
handled.
"""
handled. For example closing the `aiohttp` `ClientSessions()`:
```
if self.session is not None:
await self.session.close()
```
"""
pass
async def convert_async_generator_to_bytes(self, generator):
data = b""
async for value in generator:
data += value
return data
def health(self):
return {"alive": True}
@staticmethod
async def parse_plain(audioformat):
return {"type": "audio", "format": audioformat}, input_request.body
@staticmethod
async def parse_multipart():
boundary = input_request.mimetype_params.get("boundary", "").encode("ascii")
if not boundary:
raise ProcessingError.InvalidRequest()
async def multipart_gen():
"""
Logic: the request should consist of first a "form field" part named
"request" containing JSON, and second a "file upload" part named
"content" containing the audio. This generator fully parses the JSON
part and yields that as a dict, then subsequently yields chunks of the
audio data until they run out. We create the generator and consume its
first yield (the parsed JSON), then return the active generator so the
rest of the binary chunks can be consumed by the caller in an async for.
This logic is heavily inspired by quart.formparser but we don't use that
directly as it would attempt to buffer the binary data rather than
allowing it to stream directly from the request.
"""
parser = MultipartDecoder(boundary, current_app.config["MAX_CONTENT_LENGTH"])
found_request = False
request_buf = []
in_content = False
async for data in input_request.body:
parser.receive_data(data)
event = parser.next_event()
while not isinstance(event, (Epilogue, NeedData)):
if isinstance(event, Field):
# this should be the "request" section
if event.name != "request":
raise ProcessingError.InvalidRequest()
found_request = True
request_charset = "utf-8"
request_content_type = event.headers.get("content-type")
if request_content_type:
mimetype, ct_params = parse_options_header(request_content_type)
if mimetype != "application/json":
raise ProcessingError.InvalidRequest()
request_charset = ct_params.get("charset", request_charset)
elif isinstance(event, File):
if not found_request:
raise ProcessingError.InvalidRequest()
# this should be the "content" section
if event.name != "content":
raise ProcessingError.InvalidRequest()
in_content = True
elif isinstance(event, Data):
if in_content:
# we're streaming the content now
yield event.data
elif found_request:
request_buf.append(event.data)
if not event.more_data:
# finished the request section, so parse it
try:
yield json.loads(b"".join(request_buf).decode(request_charset))
except:
raise ProcessingError.InvalidRequest()
# allow the JSON buffer to be freed by the GC
request_buf = []
event = parser.next_event()
mp_gen = multipart_gen()
# generator yields first the parsed JSON request, then the content as
# chunks of bytes
return (await mp_gen.asend(None)), mp_gen
async def process(self):

@@ -159,23 +332,22 @@ """

even_stream = True if "text/event-stream" in input_request.accept_mimetypes else False
logger.debug(f"Accept MimeTypes: {input_request.accept_mimetypes}")
logger.info(f"Accept even-stream: {even_stream}")
if "application/json" in input_request.content_type:
logger.debug("Accept MimeTypes: {mimetypes}", mimetypes=input_request.accept_mimetypes)
logger.debug("Accept even-stream: {even_stream}", even_stream=even_stream)
if input_request.mimetype == "multipart/form-data":
data, generator = await self.parse_multipart()
if self.consume_generator:
data["content"] = await self.convert_async_generator_to_bytes(generator)
else:
data["generator"] = generator
elif input_request.mimetype == "audio/mpeg":
data, content = await self.parse_plain("MP3")
data["content"] = content
elif input_request.mimetype == "audio/wav" or input_request.mimetype == "audio/x-wav":
data, content = await self.parse_plain("LINEAR16")
data["content"] = content
elif input_request.mimetype == "application/json":
data = await input_request.get_json()
elif "multipart/form-data" in input_request.content_type:
input_request_data = await input_request.get_data()
decoder = MultipartDecoder(input_request_data, input_request.content_type)
data = {}
for part in decoder.parts:
headers = {k.decode(): v.decode() for k, v in part.headers.items()}
if "application/json" in headers["Content-Type"]:
for k, v in json.loads(part.content.decode()).items():
data[k] = v
elif "audio" in headers["Content-Type"]:
data["content"] = part.content
else:
raise ProcessingError.UnsupportedType(str(headers["Content-Type"]))
else:
raise ProcessingError.UnsupportedType(input_request.content_type)
logger.debug(f"Data type: {data.get('type')}")
logger.debug("Data type: {request_type}", request_type=data.get("type"))
if data.get("type") == "audio":

@@ -189,22 +361,33 @@ request = AudioRequest(**data)

raise ProcessingError.InvalidRequest()
logger.info(f"Call with the input: {request}")
logger.info("Await for the coroutine...")
logger.trace("Call with the input: {request}", request=request)
logger.debug("Await for the coroutine...")
try:
response = await self.process_request(request)
except ProcessingError:
# if the code raises its own ProcessingError with a valid ELG error
# message, just pass that along as-is rather than converting to a
# generic internalError
raise
except:
raise ProcessingError.InvalidRequest()
logger.opt(exception=True).warning("Exception during processing")
raise ProcessingError.InternalError(str(sys.exc_info()[1]))
if isinstance(response, Failure):
logger.info(f"Get error message: {response}")
response = {"failure": response.dict(by_alias=True)}
logger.info(f"Return: {response}")
logger.debug("Get error message")
response = {"failure": response}
logger.trace("Return: {response}", response=response)
logger.info("Response returned")
return response
elif isinstance(response, ResponseObject):
logger.info(f"Get response: {response}")
response = {"response": response.dict(by_alias=True)}
logger.info(f"Return: {response}")
logger.debug("Get response")
response = {"response": response}
logger.trace("Return: {response}", response=response)
logger.info("Response returned")
return response
elif isinstance(response, AsyncIterable):
logger.info(f"Get async iterable response")
logger.debug("Get async iterable response")
if even_stream:
response = await make_response(
self.generator_mapping(response), 200, {"Content-Type": "text/event-stream"}
self.generator_mapping(response),
200,
{"Content-Type": "text/event-stream"},
)

@@ -214,13 +397,17 @@ # Quart will by default time-out long responses, may be necessary to disable that

response.timeout = None
logger.info("Streaming response returned")
return response
else:
response = await self.get_response_from_generator(response)
logger.info(f"Get response: {response}")
if isinstance(response, ResponseObject):
response = {"response": response.dict(by_alias=True)}
logger.debug("Get response")
response = {"response": response}
elif isinstance(response, Failure):
response = {"failure": response.dict(by_alias=True)}
logger.debug("Get error message")
response = {"failure": response}
else:
logger.debug("Response type not known")
raise ProcessingError.InvalidRequest()
logger.info(f"Return: {response}")
logger.trace("Return: {response}", response=response)
logger.info("Response returned")
return response

@@ -235,19 +422,10 @@ else:

if request.type == "text":
logger.debug("Process text")
try:
return await self.process_text(request)
except:
return self.process_text(request)
logger.debug("Process text request")
return await self.process_text(request)
if request.type == "structuredText":
logger.debug("Process structured text")
try:
return await self.process_structured_text(request)
except:
return self.process_structured_text(request)
logger.debug("Process structuredText request")
return await self.process_structured_text(request)
if request.type == "audio":
logger.debug("Process audio")
try:
return await self.process_audio(request)
except:
return self.process_audio(request)
logger.debug("Process audio request")
return await self.process_audio(request)
raise ProcessingError.InvalidRequest()

@@ -257,3 +435,3 @@

"""
Method to implement if the service takes text as input.
Method to implement if the service takes text as input. This method must be implemented as async.

@@ -267,3 +445,3 @@ Args:

"""
Method to implement if the service takes structured text as input.
Method to implement if the service takes structured text as input. This method must be implemented as async.

@@ -277,3 +455,3 @@ Args:

"""
Method to implement if the service takes audio as input.
Method to implement if the service takes audio as input. This method must be implemented as async.

@@ -285,4 +463,3 @@ Args:

@staticmethod
async def generator_mapping(generator):
async def generator_mapping(self, generator):
end = False

@@ -293,35 +470,28 @@ try:

logger.warning(
(
"The service has already returned a ResponseObject or Failure message but continue to return the following message:\n"
f"{message.dict(by_alias=True)}\nThis message will be ignored and not returned to the user."
)
"The service has already returned a ResponseObject or Failure message but continue to return a message. This message will be ignored and not returned to the user."
)
continue
if isinstance(message, Failure):
logger.info(f"Get failure: {message}")
message = json.dumps({"failure": message.dict(by_alias=True)})
logger.debug("Get failure message")
message = json.dumps({"failure": message}, cls=self.app.json_encoder)
end = True
elif isinstance(message, Progress):
logger.info(f"Get progress: {message}")
message = json.dumps({"progress": message.dict(by_alias=True)})
logger.debug("Get progress message")
message = json.dumps({"progress": message}, cls=self.app.json_encoder)
elif isinstance(message, ResponseObject):
logger.info(f"Get response: {message}")
message = json.dumps({"response": message.dict(by_alias=True)})
logger.debug("Get response")
message = json.dumps({"response": message}, cls=self.app.json_encoder)
end = True
yield f"data:{message}\r\n\r\n"
yield f"data:{message}\r\n\r\n".encode("utf-8")
except:
logger.opt(exception=True).warning("Exception in generator")
message = json.dumps(
{
"failure": Failure(
errors=[
{
"code": "elg.internalError",
"text": "Internal error during processing",
"params": ["message"],
}
]
).dict(by_alias=True)
}
errors=[StandardMessages.generate_elg_service_internalerror(params=[str(sys.exc_info()[1])])]
)
},
cls=self.app.json_encoder,
)
yield f"data:{message}\r\n\r\n"
yield f"data:{message}\r\n\r\n".encode("utf-8")

@@ -334,6 +504,3 @@ @staticmethod

logger.warning(
(
"The service has already returned a ResponseObject or Failure message but continue to return the following message:\n"
f"{message.dict(by_alias=True)}\nThis message will be ignored and not returned to the user."
)
"The service has already returned a ResponseObject or Failure message but continue to return a message. This message will be ignored and not returned to the user."
)

@@ -344,5 +511,3 @@ continue

if response is None:
return Failure(
errors=[{"code": "elg.response.invalid", "text": "Invalid response message", "params": ["message"]}]
)
return Failure(errors=[StandardMessages.generate_elg_invalid_response()])
return response

@@ -375,2 +540,3 @@

path: str = None,
log_level: str = "INFO",
):

@@ -385,2 +551,3 @@ """Class method to create the correct Dockerfile.

path (str, optional): Path where to generate the file. Defaults to None.
log_level (str, optional): The minimum severity level from which logged messages should be displayed. Defaults to 'INFO'.
"""

@@ -409,2 +576,3 @@ if path == None:

commands="\n".join(commands),
env=ENV_QUART.format(log_level=log_level),
)

@@ -427,3 +595,10 @@ )

@classmethod
def docker_push_image(cls, repository: str, tag: str, username: str = None, password: str = None, **kwargs):
def docker_push_image(
cls,
repository: str,
tag: str,
username: str = None,
password: str = None,
**kwargs,
):
"""

@@ -435,3 +610,9 @@ Class method to do `docker push ...` in python. Better to use the docker cli instead of this method.

auth_config = {"username": username, "password": password}
client.images.push(repository=repository, tag=tag, auth_config=auth_config, stream=True, **kwargs)
client.images.push(
repository=repository,
tag=tag,
auth_config=auth_config,
stream=True,
**kwargs,
)
client.images.push(repository=repository, tag=tag, stream=True, **kwargs)

@@ -452,3 +633,9 @@ return

cls.docker_build_image(tag=f"{repository}:{tag}", pull=pull, **build_kwargs)
cls.docker_push_image(repository=repository, tag=tag, username=username, password=password, **push_kwargs)
cls.docker_push_image(
repository=repository,
tag=tag,
username=username,
password=password,
**push_kwargs,
)
return None

@@ -352,3 +352,3 @@ import hashlib

a list of texts, or directly a Request object.
request_type (str, optional): precise the type of the request. Can be "text", "structuredText", or "audio".
request_type (str, optional): precise the type of the request. Can be "text", "structuredText", "audio", or "audioStream".
It is only used if request_input is not a Request object. Defaults to "text".

@@ -387,5 +387,7 @@ sync_mode (bool, optional): True to use the sync_mode. Defaults to False.

request = AudioRequest.from_file(request_input)
elif request_type == "audioStream":
request = AudioRequest.from_file(request_input, streaming=True)
else:
raise ValueError(
"Invalid value for request_type. It musts be 'text', 'structuredText', or 'audio'."
"Invalid value for request_type. It musts be 'text', 'structuredText', 'audio', or 'audioStream'."
)

@@ -445,3 +447,3 @@ else:

elif isinstance(request, AudioRequest):
data = request.content
data = request.content if request.content else request.generator
headers["Content-Type"] = "audio/x-wav" if request.format == "LINEAR16" else "audio/mpeg"

@@ -508,3 +510,5 @@ elif isinstance(request, TextRequest) or isinstance(request, StructuredTextRequest):

while response.ok and "progress" in response.json().keys():
print(f"Progress: {response.json()['progress']['percent']}%")
percent = response.json()["progress"]["percent"]
if percent != 0:
print(f"Progress: {percent}%")
sleep(1)

@@ -511,0 +515,0 @@ response = requests.get(uri, headers=headers, timeout=timeout)

ENTRYPOINT_FLASK = """\
#!/bin/sh\nexec /sbin/tini -- venv/bin/gunicorn --bind=0.0.0.0:8000 "--workers=$WORKERS" "--timeout=$TIMEOUT" --worker-tmp-dir=/dev/shm "$@" {service_script}:app\n"""
#!/bin/sh\nexec /sbin/tini -- venv/bin/gunicorn --bind=0.0.0.0:8000 "--workers=$WORKERS" "--timeout=$TIMEOUT" "--worker-class=$WORKER_CLASS" --worker-tmp-dir=/dev/shm "$@" {service_script}:app\n"""

@@ -33,4 +33,3 @@ ENTRYPOINT_QUART = """\

ENV WORKERS=1
ENV TIMEOUT=30
{env}

@@ -42,1 +41,12 @@ RUN chmod +x ./docker-entrypoint.sh

COPY_FOLDER = "COPY --chown=elg:elg {folder_name} /elg/{folder_name}/"
ENV_FLASK = """\
ENV WORKERS={workers}
ENV TIMEOUT={timeout}
ENV WORKER_CLASS={worker_class}
ENV LOGURU_LEVEL={log_level}
"""
ENV_QUART = """\
ENV LOGURU_LEVEL={log_level}
"""

@@ -6,4 +6,5 @@ import requests

class ElgException(Exception):
def __init__(self, message):
def __init__(self, message, trigger):
super().__init__(message)
self.trigger = trigger

@@ -51,3 +52,3 @@

self.message = f"Impossible to connect to the API (Caused by {error})"
super().__init__(self.message)
super().__init__(self.message, error)

@@ -57,4 +58,4 @@

class APIException(ElgException):
def __init__(self, message):
super().__init__(message)
def __init__(self, message, trigger):
super().__init__(message, trigger)

@@ -66,3 +67,3 @@

self.message = f"Error {status_code} with the client request: {response}"
super().__init__(self.message)
super().__init__(self.message, status_code)

@@ -74,3 +75,3 @@

self.message = f"Error {status_code} on the server side: {response}"
super().__init__(self.message)
super().__init__(self.message, status_code)

@@ -77,0 +78,0 @@

@@ -6,7 +6,5 @@ import hashlib

from pathlib import Path
from typing import Dict, List
import requests
from ..model import Response
from .errors import (MissingInformationException, catch_requests_error,

@@ -13,0 +11,0 @@ ensure_response_ok)

Metadata-Version: 2.1
Name: elg
Version: 0.4.16
Version: 0.4.17
Summary: Use the European Language Grid in your Python projects

@@ -5,0 +5,0 @@ Home-page: https://gitlab.com/european-language-grid/platform/python-client

@@ -5,3 +5,3 @@ from setuptools import find_packages, setup

name="elg",
version="0.4.16",
version="0.4.17",
author="ELG Technical Team",

@@ -26,3 +26,2 @@ url="https://gitlab.com/european-language-grid/platform/python-client",

"flask>=2.0",
"Flask-JSON>=0.3",
"docker>=5.0",

@@ -33,3 +32,2 @@ "requests_toolbelt>=0.9",

"quart>=0.15.1",
"aiohttp>=3.7",
"docker>=5.0",

@@ -36,0 +34,0 @@ "requests_toolbelt>=0.9",

from pydantic import BaseModel
from .ResponseObject import ResponseObject
class Response(BaseModel):
"""
Representation of a successful completion response.
Abstract, subclasses should instantiate this with their own type
Attributes
----------
response (ResponseObject, required):
"""
response: ResponseObject