@axa-fr/axa-fr-splitter
About
The axa-fr-splitter package aims at providing tools to process several types of documents (pdf, tiff, ...) into images using Python.
Quick Start
pip install axa-fr-splitter
from pathlib import Path
from splitter import FileHandler
from splitter.image.tiff_handler import TifHandler
from splitter.pdf.pdf_handler import FitzPdfHandler
def create_file_handler() -> FileHandler:
"""Factory to create customized file handler"""
file_handler = FileHandler()
pdf_handler = FitzPdfHandler()
tiff_handler = TifHandler()
file_handler.register_converter(
pdf_handler,
extensions=['.pdf'],
mime_types=['application/pdf']
)
file_handler.register_converter(
tiff_handler,
extensions=['.tif', '.tiff'],
mime_types=['image/tiff']
)
return file_handler
def main(filepath, output_path):
file_handler = create_file_handler()
output_path = Path(output_path)
for file_or_exception in file_handler.split_document(filepath):
file = file_or_exception.unwrap()
print(file.metadata)
export_path = output_path.joinpath(file.relative_path)
export_path.write_bytes(file.file_bytes)
if __name__ == '__main__':
main(r"tests/inputs/specimen.tiff", MY_OUTPUT_PATH)
You can use the match
statement to handle the exceptions in a different way:
from returns.result import Failure, Success
...
def main(filepath, output_path):
file_handler = create_file_handler()
output_path = Path(output_path)
for file_or_exception in file_handler.split_document(filepath):
match file_or_exception:
case Success(file):
print(file.metadata)
export_path = output_path.joinpath(file.relative_path)
export_path.write_bytes(file.file_bytes)
case Failure(exception):
raise exception
Contribute