Latest Threat Research:SANDWORM_MODE: Shai-Hulud-Style npm Worm Hijacks CI Workflows and Poisons AI Toolchains.Details
Socket
Book a DemoInstallSign in
Socket

abogen

Package Overview
Dependencies
Maintainers
1
Versions
28
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

abogen - npm Package Compare versions

Comparing version
1.3.0
to
1.3.1
.env.example

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

+31
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<!-- Uploaded to: SVG Repo, www.svgrepo.com, Generator: SVG Repo Mixer Tools -->
<svg height="800px" width="800px" version="1.1" id="_x32_" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"
viewBox="0 0 512 512" xml:space="preserve">
<style type="text/css">
.st0{fill:#808080;}
</style>
<g>
<path class="st0" d="M502.325,307.303l-39.006-30.805c-6.215-4.908-9.665-12.429-9.668-20.348c0-0.084,0-0.168,0-0.252
c-0.014-7.936,3.44-15.478,9.667-20.396l39.007-30.806c8.933-7.055,12.093-19.185,7.737-29.701l-17.134-41.366
c-4.356-10.516-15.167-16.86-26.472-15.532l-49.366,5.8c-7.881,0.926-15.656-1.966-21.258-7.586
c-0.059-0.06-0.118-0.119-0.177-0.178c-5.597-5.602-8.476-13.36-7.552-21.225l5.799-49.363
c1.328-11.305-5.015-22.116-15.531-26.472L337.004,1.939c-10.516-4.356-22.646-1.196-29.701,7.736l-30.805,39.005
c-4.908,6.215-12.43,9.665-20.349,9.668c-0.084,0-0.168,0-0.252,0c-7.935,0.014-15.477-3.44-20.395-9.667L204.697,9.675
c-7.055-8.933-19.185-12.092-29.702-7.736L133.63,19.072c-10.516,4.356-16.86,15.167-15.532,26.473l5.799,49.366
c0.926,7.881-1.964,15.656-7.585,21.257c-0.059,0.059-0.118,0.118-0.178,0.178c-5.602,5.598-13.36,8.477-21.226,7.552
l-49.363-5.799c-11.305-1.328-22.116,5.015-26.472,15.531L1.939,174.996c-4.356,10.516-1.196,22.646,7.736,29.701l39.006,30.805
c6.215,4.908,9.665,12.429,9.668,20.348c0,0.084,0,0.167,0,0.251c0.014,7.935-3.44,15.477-9.667,20.395L9.675,307.303
c-8.933,7.055-12.092,19.185-7.736,29.701l17.134,41.365c4.356,10.516,15.168,16.86,26.472,15.532l49.366-5.799
c7.882-0.926,15.656,1.965,21.258,7.586c0.059,0.059,0.118,0.119,0.178,0.178c5.597,5.603,8.476,13.36,7.552,21.226l-5.799,49.364
c-1.328,11.305,5.015,22.116,15.532,26.472l41.366,17.134c10.516,4.356,22.646,1.196,29.701-7.736l30.804-39.005
c4.908-6.215,12.43-9.665,20.348-9.669c0.084,0,0.168,0,0.251,0c7.936-0.014,15.478,3.44,20.396,9.667l30.806,39.007
c7.055,8.933,19.185,12.093,29.701,7.736l41.366-17.134c10.516-4.356,16.86-15.168,15.532-26.472l-5.8-49.366
c-0.926-7.881,1.965-15.656,7.586-21.257c0.059-0.059,0.119-0.119,0.178-0.178c5.602-5.597,13.36-8.476,21.225-7.552l49.364,5.799
c11.305,1.328,22.117-5.015,26.472-15.531l17.134-41.365C514.418,326.488,511.258,314.358,502.325,307.303z M281.292,329.698
c-39.68,16.436-85.172-2.407-101.607-42.087c-16.436-39.68,2.407-85.171,42.087-101.608c39.68-16.436,85.172,2.407,101.608,42.088
C339.815,267.771,320.972,313.262,281.292,329.698z"/>
</g>
</svg>

Sorry, the diff of this file is not supported yet

import os
import re
import logging
import textwrap
import urllib.parse
from abc import ABC, abstractmethod
import ebooklib
from ebooklib import epub
from bs4 import BeautifulSoup, NavigableString
import fitz # PyMuPDF
import markdown
from abogen.utils import detect_encoding
from abogen.subtitle_utils import clean_text, calculate_text_length
# Pre-compile frequently used regex patterns
_BRACKETED_NUMBERS_PATTERN = re.compile(r"\[\s*\d+\s*\]")
_STANDALONE_PAGE_NUMBERS_PATTERN = re.compile(r"^\s*\d+\s*$", re.MULTILINE)
_PAGE_NUMBERS_AT_END_PATTERN = re.compile(r"\s+\d+\s*$", re.MULTILINE)
_PAGE_NUMBERS_WITH_DASH_PATTERN = re.compile(
r"\s+[-–—]\s*\d+\s*[-–—]?\s*$", re.MULTILINE
)
class BaseBookParser(ABC):
"""
Abstract base class for parsing different book formats.
"""
def __init__(self, book_path):
self.book_path = os.path.normpath(os.path.abspath(book_path))
self.content_texts = {}
self.content_lengths = {}
self.book_metadata = {}
# Unified structure for navigation: list of dicts
# { 'title': str, 'src': str, 'children': [], 'has_content': bool }
self.processed_nav_structure = []
self.load()
@abstractmethod
def load(self):
"""Load the book file."""
pass
def close(self):
"""Close any open file handles."""
pass
def __enter__(self):
# Already loaded in __init__, or lazily.
# Just ensure we have resources if needed, or do nothing.
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
@abstractmethod
def process_content(self, replace_single_newlines=True):
"""Process the book content to extract text and structure."""
pass
@property
@abstractmethod
def file_type(self):
"""Return the type of the file (pdf, epub, markdown)."""
pass
def get_chapters(self):
"""Return a list of chapter IDs and Names."""
chapters = []
if self.processed_nav_structure:
def flatten_nav(nodes):
for node in nodes:
if node.get("has_content"):
chapters.append((node["src"], node["title"]))
if node.get("children"):
flatten_nav(node["children"])
flatten_nav(self.processed_nav_structure)
else:
# Fallback for simple content without nav structure
for ch_id, content in self.content_texts.items():
# This could be improved, but serves as a generic fallback
chapters.append((ch_id, ch_id))
return chapters
def get_formatted_text(self):
"""
Returns the full text of the book formatted with chapter markers.
"""
chapters = self.get_chapters()
full_text = []
for chapter_id, chapter_name in chapters:
text = self.content_texts.get(chapter_id, "")
if text:
full_text.append(f"\n<<CHAPTER_MARKER:{chapter_name}>>\n")
full_text.append(text)
return "\n".join(full_text)
def get_metadata(self):
"""Return extracted metadata."""
return self.book_metadata
class PdfParser(BaseBookParser):
def __init__(self, book_path):
self.pdf_doc = None
super().__init__(book_path)
@property
def file_type(self):
return "pdf"
def load(self):
try:
self.pdf_doc = fitz.open(self.book_path)
except Exception as e:
logging.error(f"Error loading PDF {self.book_path}: {e}")
raise
def close(self):
if self.pdf_doc:
self.pdf_doc.close()
self.pdf_doc = None
def _extract_book_metadata(self):
# PDF metadata extraction can be added here if needed
# For now, base class metadata is empty dict
pass
def process_content(self, replace_single_newlines=True):
if not self.pdf_doc:
self.load()
# 1. Extract text from all pages first
for page_num in range(len(self.pdf_doc)):
text = clean_text(self.pdf_doc[page_num].get_text())
# Clean up common PDF artifacts:
text = _BRACKETED_NUMBERS_PATTERN.sub("", text)
text = _STANDALONE_PAGE_NUMBERS_PATTERN.sub("", text)
text = _PAGE_NUMBERS_AT_END_PATTERN.sub("", text)
text = _PAGE_NUMBERS_WITH_DASH_PATTERN.sub("", text)
page_id = f"page_{page_num + 1}"
self.content_texts[page_id] = text
self.content_lengths[page_id] = calculate_text_length(text)
# 2. Build Navigation Structure
toc = self.pdf_doc.get_toc()
if not toc:
# Fallback: Flat list of pages if no TOC
self.processed_nav_structure = []
pages_node = {
"title": "Pages",
"src": None,
"children": [],
"has_content": False
}
# Add all pages as children
for page_num in range(len(self.pdf_doc)):
page_id = f"page_{page_num + 1}"
title = self._get_page_title(page_num, self.content_texts.get(page_id, ""))
pages_node["children"].append({
"title": title,
"src": page_id,
"children": [],
"has_content": True
})
self.processed_nav_structure.append(pages_node)
else:
self.processed_nav_structure = self._build_structure_from_toc(toc)
return self.content_texts, self.content_lengths
def _get_page_title(self, page_num, text):
title = f"Page {page_num + 1}"
if text:
first_line = text.split("\n", 1)[0].strip()
if first_line and len(first_line) < 100:
title += f" - {first_line}"
return title
def _build_structure_from_toc(self, toc):
# 1. Flatten TOC to easier list (page_num, title, level)
# fitz TOC is [[lvl, title, page, dest], ...]
bookmarks = []
for entry in toc:
lvl, title, page = entry[:3]
if isinstance(page, int):
page_idx = page - 1
else:
# Handle potential complex destinations if necessary, but usually simple int
# PyMuPDF docs say int.
page_idx = -1
if page_idx >= 0:
bookmarks.append({"level": lvl, "title": title, "page": page_idx})
root_children = []
stack = [] # Stack of (level, list_to_append_to)
stack.append((0, root_children))
# Step 1: Build the Skeleton Tree from TOC
# And keep a flat list of these nodes to associate with pages.
processed_nodes = [] # List of (page_idx, node_dict)
for entry in bookmarks:
node = {
"title": entry["title"],
"src": f"page_{entry['page'] + 1}",
"children": [],
"has_content": True
}
# Find parent
level = entry["level"]
# Adjust stack
while stack and stack[-1][0] >= level:
stack.pop()
parent_list = stack[-1][1]
parent_list.append(node)
stack.append((level, node["children"]))
processed_nodes.append((entry["page"], node))
# Step 3: Add gap pages.
# Sort processed_nodes by page index to find ranges.
sorted_bookmarks = sorted(processed_nodes, key=lambda x: x[0])
# Set of pages that are "bookmarks"
bookmarked_pages = set(p for p, n in sorted_bookmarks)
current_node = None
# We need a way to look up bookmarks starting at p
bookmarks_by_page = {}
for p, node in processed_nodes:
if p not in bookmarks_by_page:
bookmarks_by_page[p] = []
bookmarks_by_page[p].append(node)
# Let's iterate.
for page_num in range(len(self.pdf_doc)):
page_id = f"page_{page_num + 1}"
# Check if this page STARTS bookmarks
if page_num in bookmarks_by_page:
starts = bookmarks_by_page[page_num]
current_node = starts[-1]
continue
# If page is NOT a bookmark, it's a "gap page".
# Add as child to current_node
title = self._get_page_title(page_num, self.content_texts.get(page_id, ""))
page_node = {
"title": title,
"src": page_id,
"children": [],
"has_content": True
}
if current_node:
current_node["children"].append(page_node)
else:
# No preceding bookmark. Add to root.
root_children.append(page_node)
return root_children
class MarkdownParser(BaseBookParser):
def __init__(self, book_path):
self.markdown_text = None
super().__init__(book_path)
@property
def file_type(self):
return "markdown"
def load(self):
try:
encoding = detect_encoding(self.book_path)
with open(self.book_path, "r", encoding=encoding, errors="replace") as f:
self.markdown_text = f.read()
except Exception as e:
logging.error(f"Error reading markdown file: {e}")
self.markdown_text = ""
def process_content(self, replace_single_newlines=True):
if self.markdown_text is None:
self.load()
self._process_markdown_content()
return self.content_texts, self.content_lengths
def _convert_markdown_toc_to_nav(self, toc_tokens):
nav_nodes = []
for token in toc_tokens:
node = {
"title": token["name"],
"src": token["id"],
"children": self._convert_markdown_toc_to_nav(
token.get("children", [])
),
"has_content": True,
}
nav_nodes.append(node)
return nav_nodes
def _process_markdown_content(self):
if not self.markdown_text:
return
original_text = textwrap.dedent(self.markdown_text)
md = markdown.Markdown(extensions=["toc", "fenced_code"])
html = md.convert(original_text)
markdown_toc = md.toc_tokens
# Convert markdown TOC tokens to our unified navigation structure
self.processed_nav_structure = self._convert_markdown_toc_to_nav(markdown_toc)
cleaned_full_text = clean_text(original_text)
# If no TOC found, treat as single chapter
if not self.processed_nav_structure:
chapter_id = "markdown_content"
self.content_texts[chapter_id] = cleaned_full_text
self.content_lengths[chapter_id] = calculate_text_length(cleaned_full_text)
return
soup = BeautifulSoup(html, "html.parser")
all_headers = []
def flatten_nav_internal(nodes):
for node in nodes:
all_headers.append(node)
if node.get("children"):
flatten_nav_internal(node["children"])
flatten_nav_internal(self.processed_nav_structure)
header_positions = []
for node in all_headers:
header_id = node["src"]
id_pattern = f'id="{header_id}"'
pos = html.find(id_pattern)
if pos != -1:
tag_start = html.rfind("<", 0, pos)
header_positions.append(
{"id": header_id, "start": tag_start, "name": node["title"]}
)
header_positions.sort(key=lambda x: x["start"])
for i, header_pos in enumerate(header_positions):
header_id = header_pos["id"]
header_name = header_pos["name"]
content_start = header_pos["start"]
content_end = (
header_positions[i + 1]["start"]
if i + 1 < len(header_positions)
else len(html)
)
section_html = html[content_start:content_end]
section_soup = BeautifulSoup(section_html, "html.parser")
header_tag = section_soup.find(attrs={"id": header_id})
if header_tag:
header_tag.decompose()
section_text = clean_text(section_soup.get_text()).strip()
chapter_id = header_id
if section_text:
full_content = f"{header_name}\n\n{section_text}"
self.content_texts[chapter_id] = full_content
self.content_lengths[chapter_id] = calculate_text_length(full_content)
else:
self.content_texts[chapter_id] = header_name
self.content_lengths[chapter_id] = calculate_text_length(header_name)
def get_chapters(self):
chapters = super().get_chapters()
if not chapters and "markdown_content" in self.content_texts:
chapters.append(("markdown_content", "Content"))
return chapters
class EpubParser(BaseBookParser):
def __init__(self, book_path):
self.book = None
self.doc_content = {}
super().__init__(book_path)
@property
def file_type(self):
return "epub"
def load(self):
try:
self.book = epub.read_epub(self.book_path)
except KeyError as e:
# TODO: should we just patch the ebooklib pre-emptively to avoid the need to catch this exception?
logging.warning(f"EPUB missing referenced file: {e}. Attempting to patch.")
# Patch ebooklib to skip missing files
import types
from ebooklib import epub as _epub_module
reader_class = _epub_module.EpubReader
orig_read_file = reader_class.read_file
def safe_read_file(self, name):
try:
return orig_read_file(self, name)
except KeyError:
logging.warning(
f"Missing file in EPUB: {name}. Returning empty bytes."
)
return b""
reader_class.read_file = safe_read_file
try:
self.book = epub.read_epub(self.book_path)
finally:
reader_class.read_file = orig_read_file
def process_content(self, replace_single_newlines=True):
if not self.book:
self.load()
self.book_metadata = self._extract_book_metadata()
try:
nav_item, nav_type = self._identify_nav_item()
self._execute_nav_parsing_logic(nav_item, nav_type)
except Exception as e:
logging.warning(f"EPUB nav processing failed: {e}. Falling back to spine.")
self._process_epub_content_spine_fallback()
return self.content_texts, self.content_lengths
def _extract_book_metadata(self):
metadata = {}
if not self.book:
return metadata
try:
metadata["title"] = self.book.get_metadata("DC", "title")[0][0]
except Exception:
metadata["title"] = os.path.splitext(os.path.basename(self.book_path))[0]
try:
metadata["author"] = self.book.get_metadata("DC", "creator")[0][0]
except Exception:
metadata["author"] = "Unknown Author"
try:
metadata["language"] = self.book.get_metadata("DC", "language")[0][0]
except Exception:
metadata["language"] = "en"
return metadata
def _find_doc_key(self, base_href, doc_order, doc_order_decoded):
candidates = [
base_href,
urllib.parse.unquote(base_href),
]
base_name = os.path.basename(base_href).lower()
for k in list(doc_order.keys()) + list(doc_order_decoded.keys()):
if os.path.basename(k).lower() == base_name:
candidates.append(k)
for candidate in candidates:
if candidate in doc_order:
return candidate, doc_order[candidate]
elif candidate in doc_order_decoded:
return candidate, doc_order_decoded[candidate]
return None, None
def _find_position_robust(self, doc_href, fragment_id):
if doc_href not in self.doc_content:
logging.warning(f"Document '{doc_href}' not found in cached content.")
return 0
html_content = self.doc_content[doc_href]
if not fragment_id:
return 0
try:
temp_soup = BeautifulSoup(f"<div>{html_content}</div>", "html.parser")
target_element = temp_soup.find(id=fragment_id)
if target_element:
tag_str = str(target_element)
pos = html_content.find(tag_str[: min(len(tag_str), 200)])
if pos != -1:
return pos
except Exception as e:
logging.warning(f"BeautifulSoup failed to find id='{fragment_id}': {e}")
safe_fragment_id = re.escape(fragment_id)
id_name_pattern = re.compile(
f"<[^>]+(?:id|name)\\s*=\\s*[\"']{safe_fragment_id}[\"']", re.IGNORECASE
)
match = id_name_pattern.search(html_content)
if match:
return match.start()
id_match_str = f'id="{fragment_id}"'
name_match_str = f'name="{fragment_id}"'
id_pos = html_content.find(id_match_str)
name_pos = html_content.find(name_match_str)
pos = -1
if id_pos != -1 and name_pos != -1:
pos = min(id_pos, name_pos)
elif id_pos != -1:
pos = id_pos
elif name_pos != -1:
pos = name_pos
if pos != -1:
tag_start_pos = html_content.rfind("<", 0, pos)
final_pos = tag_start_pos if tag_start_pos != -1 else 0
return final_pos
logging.warning(
f"Anchor '{fragment_id}' not found in {doc_href}. Defaulting to position 0."
)
return 0
def _parse_ncx_navpoint(
self,
nav_point,
ordered_entries,
doc_order,
doc_order_decoded,
tree_structure_list,
find_position_func,
):
"""
Recursive parsing of NCX navigation nodes.
Logic tested by: tests/test_epub_ncx_parsing.py
"""
nav_label = nav_point.find("navLabel")
content = nav_point.find("content")
title = (
nav_label.find("text").get_text(strip=True)
if nav_label and nav_label.find("text")
else "Untitled Section"
)
src = content["src"] if content and "src" in content.attrs else None
current_entry_node = {"title": title, "src": src, "children": []}
if src:
base_href, fragment = src.split("#", 1) if "#" in src else (src, None)
doc_key, doc_idx = self._find_doc_key(
base_href, doc_order, doc_order_decoded
)
if not doc_key:
current_entry_node["has_content"] = False
else:
position = find_position_func(doc_key, fragment)
entry_data = {
"src": src,
"title": title,
"doc_href": doc_key,
"position": position,
"doc_order": doc_idx,
}
ordered_entries.append(entry_data)
current_entry_node["has_content"] = True
else:
current_entry_node["has_content"] = False
child_navpoints = nav_point.find_all("navPoint", recursive=False)
if child_navpoints:
for child_np in child_navpoints:
self._parse_ncx_navpoint(
child_np,
ordered_entries,
doc_order,
doc_order_decoded,
current_entry_node["children"],
find_position_func,
)
if title and (
current_entry_node.get("has_content", False)
or current_entry_node["children"]
):
tree_structure_list.append(current_entry_node)
def _extract_nav_li_title(self, li_element, link_element=None, span_element=None):
"""Helper to extract title from a nav <li> element, handling various structures."""
title = "Untitled Section"
if link_element:
title = link_element.get_text(strip=True) or title
elif span_element:
title = span_element.get_text(strip=True) or title
# Fallback to direct text if title is empty or default
# If we used link/span but got empty string, we try fallback.
# If we didn't use link/span, we try fallback.
if not title.strip() or title == "Untitled Section":
li_text = "".join(
t for t in li_element.contents if isinstance(t, NavigableString)
).strip()
if li_text:
title = li_text
# Second fallback: if we have a span but title is still empty, try span text again
# (covered by logic above mostly, but mirroring original logic's intense fallback)
if (not title.strip() or title == "Untitled Section") and span_element:
title = span_element.get_text(strip=True) or title
return title
def _parse_html_nav_li(
self,
li_element,
ordered_entries,
doc_order,
doc_order_decoded,
tree_structure_list,
find_position_func,
):
"""
Recursive parsing of HTML5 Navigation (li) nodes.
Logic tested by: tests/test_epub_html_nav_parsing.py
"""
link = li_element.find("a", recursive=False)
span_text = li_element.find("span", recursive=False)
src = None
current_entry_node = {"children": []}
if link and "href" in link.attrs:
src = link["href"]
title = self._extract_nav_li_title(li_element, link, span_text)
current_entry_node["title"] = title
current_entry_node["src"] = src
doc_key = None
doc_idx = None
position = 0
fragment = None
if src:
base_href, fragment = src.split("#", 1) if "#" in src else (src, None)
doc_key, doc_idx = self._find_doc_key(
base_href, doc_order, doc_order_decoded
)
if doc_key is not None:
position = find_position_func(doc_key, fragment)
entry_data = {
"src": src,
"title": title,
"doc_href": doc_key,
"position": position,
"doc_order": doc_idx,
}
ordered_entries.append(entry_data)
current_entry_node["has_content"] = True
else:
current_entry_node["has_content"] = False
else:
current_entry_node["has_content"] = False
for child_ol in li_element.find_all("ol", recursive=False):
for child_li in child_ol.find_all("li", recursive=False):
self._parse_html_nav_li(
child_li,
ordered_entries,
doc_order,
doc_order_decoded,
current_entry_node["children"],
find_position_func,
)
tree_structure_list.append(current_entry_node)
def _identify_nav_item(self):
"""Identify the navigation item (HTML Nav or NCX) and its type."""
nav_item = None
nav_type = None
# 1. Check ITEM_NAVIGATION
nav_items = list(self.book.get_items_of_type(ebooklib.ITEM_NAVIGATION))
# 1.1 Support for EPUB 3 EpubNav which might be ITEM_DOCUMENT (9) but with properties=['nav']
if not nav_items:
# Look in ITEM_DOCUMENT for items with 'nav' property
for item in self.book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
if (
hasattr(item, "get_type")
and item.get_type() == ebooklib.ITEM_DOCUMENT
):
# Check properties - ebooklib stores opf properties in list
# Some versions use item.properties, some need checking
props = getattr(item, "properties", [])
if "nav" in props:
nav_items.append(item)
if nav_items:
nav_item = next(
(
item
for item in nav_items
if "nav" in item.get_name().lower()
and item.get_name().lower().endswith((".xhtml", ".html"))
),
None,
) or next(
(
item
for item in nav_items
if item.get_name().lower().endswith((".xhtml", ".html"))
),
None,
)
if nav_item:
nav_type = "html"
# 2. NCX in NAV
if not nav_item and nav_items:
ncx_in_nav = next(
(
item
for item in nav_items
if item.get_name().lower().endswith(".ncx")
),
None,
)
if ncx_in_nav:
nav_item = ncx_in_nav
nav_type = "ncx"
# 3. ITEM_NCX or Fallback
# If no explicit navigation item found, try to find a standard NCX file
if not nav_item:
ncx_constant = getattr(epub, "ITEM_NCX", None)
if ncx_constant is not None:
ncx_items = list(self.book.get_items_of_type(ncx_constant))
if ncx_items:
nav_item = ncx_items[0]
nav_type = "ncx"
# 4. Heuristic Search
# Scan documents for something that looks like a TOC if standard methods fail
if not nav_item:
for item in self.book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
try:
html_content = item.get_content().decode("utf-8", errors="ignore")
if "<nav" in html_content and 'epub:type="toc"' in html_content:
nav_item = item
nav_type = "html"
break
except Exception:
continue
if not nav_item or not nav_type:
raise ValueError("No navigation document found")
return nav_item, nav_type
def _execute_nav_parsing_logic(self, nav_item, nav_type):
"""Parse the identified navigation item and slice content accordingly."""
parser_type = "html.parser" if nav_type == "html" else "xml"
try:
nav_content = nav_item.get_content().decode("utf-8", errors="ignore")
nav_soup = BeautifulSoup(nav_content, parser_type)
except Exception as e:
raise ValueError(f"Failed to parse navigation content: {e}")
self.doc_content = {}
spine_docs = []
for spine_item_tuple in self.book.spine:
item_id = spine_item_tuple[0]
item = self.book.get_item_with_id(item_id)
if item:
spine_docs.append(item.get_name())
doc_order = {href: i for i, href in enumerate(spine_docs)}
doc_order_decoded = {
urllib.parse.unquote(href): i for href, i in doc_order.items()
}
self.content_texts = {}
self.content_lengths = {}
for item in self.book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
href = item.get_name()
if href in doc_order or any(
href in nav_point.get("src", "")
for nav_point in nav_soup.find_all(["content", "a"])
):
try:
self.doc_content[href] = item.get_content().decode(
"utf-8", errors="ignore"
)
except Exception:
self.doc_content[href] = ""
ordered_nav_entries = []
parse_successful = False
if nav_type == "ncx":
nav_map = nav_soup.find("navMap")
if nav_map:
for nav_point in nav_map.find_all("navPoint", recursive=False):
self._parse_ncx_navpoint(
nav_point,
ordered_nav_entries,
doc_order,
doc_order_decoded,
self.processed_nav_structure,
self._find_position_robust,
)
parse_successful = bool(ordered_nav_entries)
elif nav_type == "html":
toc_nav = nav_soup.find("nav", attrs={"epub:type": "toc"})
if not toc_nav:
for nav in nav_soup.find_all("nav"):
if nav.find("ol"):
toc_nav = nav
break
if toc_nav:
top_ol = toc_nav.find("ol", recursive=False)
if top_ol:
for li in top_ol.find_all("li", recursive=False):
self._parse_html_nav_li(
li,
ordered_nav_entries,
doc_order,
doc_order_decoded,
self.processed_nav_structure,
self._find_position_robust,
)
parse_successful = bool(ordered_nav_entries)
if not parse_successful:
raise ValueError("No valid navigation entries found after parsing")
ordered_nav_entries.sort(key=lambda x: (x["doc_order"], x["position"]))
num_entries = len(ordered_nav_entries)
for i in range(num_entries):
current_entry = ordered_nav_entries[i]
current_src = current_entry["src"]
current_doc = current_entry["doc_href"]
current_pos = current_entry["position"]
current_doc_html = self.doc_content.get(current_doc, "")
start_slice_pos = current_pos
slice_html = ""
next_entry = ordered_nav_entries[i + 1] if (i + 1) < num_entries else None
if next_entry:
next_doc = next_entry["doc_href"]
next_pos = next_entry["position"]
if current_doc == next_doc:
slice_html = current_doc_html[start_slice_pos:next_pos]
else:
slice_html = current_doc_html[start_slice_pos:]
docs_between = []
try:
idx_current = spine_docs.index(current_doc)
idx_next = spine_docs.index(next_doc)
if idx_current < idx_next:
docs_between = [
spine_docs[k] for k in range(idx_current + 1, idx_next)
]
elif idx_current > idx_next:
docs_between = [
spine_docs[k]
for k in range(idx_current + 1, len(spine_docs))
]
docs_between.extend(
[spine_docs[k] for k in range(0, idx_next)]
)
except ValueError:
pass
for doc_href in docs_between:
slice_html += self.doc_content.get(doc_href, "")
next_doc_html = self.doc_content.get(next_doc, "")
slice_html += next_doc_html[:next_pos]
else:
slice_html = current_doc_html[start_slice_pos:]
try:
idx_current = spine_docs.index(current_doc)
for doc_idx in range(idx_current + 1, len(spine_docs)):
slice_html += self.doc_content.get(spine_docs[doc_idx], "")
except ValueError:
pass
if not slice_html.strip() and current_doc_html:
slice_html = current_doc_html
if slice_html.strip():
slice_soup = BeautifulSoup(slice_html, "html.parser")
for tag in slice_soup.find_all(["p", "div"]):
tag.append("\n\n")
for ol in slice_soup.find_all("ol"):
start = int(ol.get("start", 1))
for idx, li in enumerate(ol.find_all("li", recursive=False)):
number_text = f"{start + idx}) "
if li.string:
li.string.replace_with(number_text + li.string)
else:
li.insert(0, NavigableString(number_text))
for tag in slice_soup.find_all(["sup", "sub"]):
tag.decompose()
text = clean_text(slice_soup.get_text()).strip()
if text:
self.content_texts[current_src] = text
self.content_lengths[current_src] = calculate_text_length(text)
else:
self.content_texts[current_src] = ""
self.content_lengths[current_src] = 0
else:
self.content_texts[current_src] = ""
self.content_lengths[current_src] = 0
if ordered_nav_entries:
first_entry = ordered_nav_entries[0]
first_doc_href = first_entry["doc_href"]
first_pos = first_entry["position"]
first_doc_order = first_entry["doc_order"]
prefix_html = ""
for doc_idx in range(first_doc_order):
if doc_idx < len(spine_docs):
intermediate_doc_href = spine_docs[doc_idx]
prefix_html += self.doc_content.get(intermediate_doc_href, "")
first_doc_html = self.doc_content.get(first_doc_href, "")
prefix_html += first_doc_html[:first_pos]
if prefix_html.strip():
prefix_soup = BeautifulSoup(prefix_html, "html.parser")
for tag in prefix_soup.find_all(["sup", "sub"]):
tag.decompose()
prefix_text = clean_text(prefix_soup.get_text()).strip()
if prefix_text:
prefix_chapter_src = "internal:prefix_content"
self.content_texts[prefix_chapter_src] = prefix_text
self.content_lengths[prefix_chapter_src] = len(prefix_text)
self.processed_nav_structure.insert(
0,
{
"src": prefix_chapter_src,
"title": "Introduction",
"children": [],
"has_content": True,
},
)
def _process_epub_content_spine_fallback(self):
"""
Process EPUB content using the spine (linear reading order)
when navigation processing fails.
"""
logging.info("Using spine fallback for EPUB processing.")
self.doc_content = {}
spine_docs = []
for spine_item_tuple in self.book.spine:
item_id = spine_item_tuple[0]
item = self.book.get_item_with_id(item_id)
if item:
spine_docs.append(item.get_name())
else:
logging.warning(f"Spine item with id '{item_id}' not found.")
for item in self.book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
href = item.get_name()
if href in spine_docs:
try:
html_content = item.get_content().decode("utf-8", errors="ignore")
self.doc_content[href] = html_content
except Exception:
self.doc_content[href] = ""
self.content_texts = {}
self.content_lengths = {}
for i, doc_href in enumerate(spine_docs):
html_content = self.doc_content.get(doc_href, "")
if html_content:
soup = BeautifulSoup(html_content, "html.parser")
# Handle ordered lists
for ol in soup.find_all("ol"):
start = int(ol.get("start", 1))
for idx, li in enumerate(ol.find_all("li", recursive=False)):
number_text = f"{start + idx}) "
if li.string:
li.string.replace_with(number_text + li.string)
else:
li.insert(0, NavigableString(number_text))
# Remove sup/sub
for tag in soup.find_all(["sup", "sub"]):
tag.decompose()
text = clean_text(soup.get_text()).strip()
if text:
self.content_texts[doc_href] = text
self.content_lengths[doc_href] = calculate_text_length(text)
def get_chapters(self):
chapters = super().get_chapters()
if not chapters:
# Use spine order fallback if no Nav structure
if self.book:
for spine_item_tuple in self.book.spine:
item_id = spine_item_tuple[0]
item = self.book.get_item_with_id(item_id)
if item:
href = item.get_name()
if href in self.content_texts:
chapters.append((href, href))
return chapters
def get_book_parser(book_path, file_type=None):
"""
Factory function to get the appropriate parser instance.
"""
book_path = os.path.normpath(os.path.abspath(book_path))
if not file_type:
if book_path.lower().endswith(".pdf"):
file_type = "pdf"
elif book_path.lower().endswith((".md", ".markdown")):
file_type = "markdown"
else:
file_type = "epub"
if file_type == "pdf":
return PdfParser(book_path)
elif file_type == "markdown":
return MarkdownParser(book_path)
elif file_type == "epub":
return EpubParser(book_path)
else:
raise ValueError(f"Unsupported file type: {file_type}")
import sys
import os
import platform
import ctypes
import importlib.util
def check_cuda_with_fix():
"""
Check if CUDA is available, with a fix for PyTorch DLL loading issue
([WinError 1114]) on Windows.
"""
# Fix PyTorch DLL loading issue ([WinError 1114]) on Windows
try:
if platform.system() == "Windows":
spec = importlib.util.find_spec("torch")
if spec and spec.origin:
dll_path = os.path.join(os.path.dirname(spec.origin), "lib", "c10.dll")
if os.path.exists(dll_path):
ctypes.CDLL(os.path.normpath(dll_path))
except Exception:
pass
try:
from torch.cuda import is_available
print(is_available())
except ImportError:
print("False")
if __name__ == "__main__":
check_cuda_with_fix()
from __future__ import annotations
from dataclasses import dataclass
from typing import Dict, Iterable, Iterator, List, Literal, Optional, Tuple
from typing import Pattern
import re
from abogen.kokoro_text_normalization import ApostropheConfig, normalize_for_pipeline
from abogen.normalization_settings import build_apostrophe_config, get_runtime_settings
ChunkLevel = Literal["paragraph", "sentence"]
_SENTENCE_SPLIT_REGEX = re.compile(r"(?<!\b[A-Z])[.!?][\s\n]+")
_WHITESPACE_REGEX = re.compile(r"\s+")
_PARAGRAPH_SPLIT_REGEX = re.compile(r"(?:\r?\n){2,}")
_ABBREVIATION_END_RE = re.compile(
r"\b(?:Mr|Mrs|Ms|Dr|Prof|Rev|Sr|Jr|St|Gen|Lt|Col|Sgt|Capt|Adm|Cmdr|vs|etc)\.$",
re.IGNORECASE,
)
_PIPELINE_APOSTROPHE_CONFIG = ApostropheConfig()
@dataclass(frozen=True)
class Chunk:
id: str
chapter_index: int
chunk_index: int
level: ChunkLevel
text: str
speaker_id: str = "narrator"
voice: Optional[str] = None
voice_profile: Optional[str] = None
voice_formula: Optional[str] = None
display_text: Optional[str] = None
def as_dict(self) -> Dict[str, object]:
return {
"id": self.id,
"chapter_index": self.chapter_index,
"chunk_index": self.chunk_index,
"level": self.level,
"text": self.text,
"speaker_id": self.speaker_id,
"voice": self.voice,
"voice_profile": self.voice_profile,
"voice_formula": self.voice_formula,
"display_text": self.display_text,
}
def _iter_paragraphs(text: str) -> Iterator[str]:
for raw_segment in _PARAGRAPH_SPLIT_REGEX.split(text.strip()):
normalized = raw_segment.strip()
if normalized:
yield normalized
def _iter_sentences(paragraph: str) -> Iterator[Tuple[str, str]]:
if not paragraph:
return
start = 0
for match in _SENTENCE_SPLIT_REGEX.finditer(paragraph):
end = match.end()
raw_segment = paragraph[start:end]
candidate = raw_segment.strip()
if candidate:
yield candidate, raw_segment
start = match.end()
tail_raw = paragraph[start:]
tail = tail_raw.strip()
if tail:
yield tail, tail_raw
def _normalize_whitespace(value: str) -> str:
return _WHITESPACE_REGEX.sub(" ", value).strip()
def _normalize_chunk_text(value: str) -> str:
settings = get_runtime_settings()
config = build_apostrophe_config(
settings=settings, base=_PIPELINE_APOSTROPHE_CONFIG
)
normalized = normalize_for_pipeline(value, config=config, settings=settings)
return _normalize_whitespace(normalized)
def _split_sentences(paragraph: str) -> List[Tuple[str, str]]:
sentences = list(_iter_sentences(paragraph))
if not sentences:
return []
merged: List[Tuple[str, str]] = []
buffer_norm: List[str] = []
buffer_raw: List[str] = []
for normalized_sentence, raw_sentence in sentences:
if buffer_norm:
buffer_norm.append(normalized_sentence)
buffer_raw.append(raw_sentence)
else:
buffer_norm = [normalized_sentence]
buffer_raw = [raw_sentence]
if _ABBREVIATION_END_RE.search(normalized_sentence.rstrip()):
continue
merged.append((" ".join(buffer_norm), "".join(buffer_raw)))
buffer_norm = []
buffer_raw = []
if buffer_norm:
merged.append((" ".join(buffer_norm), "".join(buffer_raw)))
return merged
def chunk_text(
*,
chapter_index: int,
chapter_title: str,
text: str,
level: ChunkLevel,
speaker_id: str = "narrator",
voice: Optional[str] = None,
voice_profile: Optional[str] = None,
voice_formula: Optional[str] = None,
chunk_prefix: Optional[str] = None,
) -> List[Dict[str, object]]:
"""Split text into ordered chunk dictionaries."""
prefix = chunk_prefix or f"chap{chapter_index:04d}"
chunks: List[Dict[str, object]] = []
if level == "paragraph":
paragraphs = list(_iter_paragraphs(text)) or [text.strip()]
for para_index, paragraph in enumerate(paragraphs):
normalized = _normalize_whitespace(paragraph)
if not normalized:
continue
chunk_id = f"{prefix}_p{para_index:04d}"
payload = Chunk(
id=chunk_id,
chapter_index=chapter_index,
chunk_index=len(chunks),
level=level,
text=normalized,
speaker_id=speaker_id,
voice=voice,
voice_profile=voice_profile,
voice_formula=voice_formula,
).as_dict()
payload["normalized_text"] = _normalize_chunk_text(paragraph)
payload["original_text"] = paragraph
chunks.append(payload)
_attach_display_text(text, chunks)
return chunks
# Sentence level – flatten paragraphs into individual sentences
sentence_index = 0
for para_index, paragraph in enumerate(
list(_iter_paragraphs(text)) or [text.strip()]
):
normalized_para = _normalize_whitespace(paragraph)
if not normalized_para:
continue
sentence_pairs = _split_sentences(paragraph) or [(normalized_para, paragraph)]
for sent_local_index, (normalized_sentence, raw_sentence) in enumerate(
sentence_pairs
):
normalized_sentence = _normalize_whitespace(normalized_sentence)
if not normalized_sentence:
continue
chunk_id = f"{prefix}_p{para_index:04d}_s{sent_local_index:04d}"
payload = Chunk(
id=chunk_id,
chapter_index=chapter_index,
chunk_index=sentence_index,
level=level,
text=normalized_sentence,
speaker_id=speaker_id,
voice=voice,
voice_profile=voice_profile,
voice_formula=voice_formula,
).as_dict()
payload["normalized_text"] = _normalize_chunk_text(raw_sentence)
payload["display_text"] = raw_sentence
payload["original_text"] = raw_sentence
chunks.append(payload)
sentence_index += 1
_attach_display_text(text, chunks)
return chunks
_DISPLAY_PATTERN_CACHE: Dict[str, Pattern[str]] = {}
def _build_display_pattern(text: str) -> Pattern[str]:
cached = _DISPLAY_PATTERN_CACHE.get(text)
if cached is not None:
return cached
escaped = re.escape(text)
escaped = escaped.replace(r"\ ", r"\s+")
pattern = re.compile(r"(\s*" + escaped + r"\s*)", re.DOTALL)
_DISPLAY_PATTERN_CACHE[text] = pattern
return pattern
def _search_source_span(
source: str, normalized: str, start: int
) -> Optional[Tuple[int, int]]:
if not normalized:
return None
pattern = _build_display_pattern(normalized)
match = pattern.search(source, start)
if not match:
return None
return match.start(1), match.end(1)
def _attach_display_text(source: str, chunks: List[Dict[str, object]]) -> None:
if not source or not chunks:
return
cursor = 0
for chunk in chunks:
candidate = str(chunk.get("display_text") or chunk.get("text") or "")
if not candidate:
continue
match = _search_source_span(source, candidate, cursor)
if match is None and cursor:
match = _search_source_span(source, candidate, 0)
if match is None:
chunk.setdefault("display_text", candidate)
chunk.setdefault("original_text", chunk.get("display_text") or candidate)
continue
start, end = match
chunk["display_text"] = source[start:end]
chunk["original_text"] = source[start:end]
cursor = end
def build_chunks_for_chapters(
chapters: Iterable[Dict[str, object]],
*,
level: ChunkLevel,
speaker_id: str = "narrator",
) -> List[Dict[str, object]]:
"""Generate chunk dictionaries for a sequence of chapter payloads."""
all_chunks: List[Dict[str, object]] = []
for chapter_index, entry in enumerate(chapters):
if not isinstance(entry, dict): # defensive
continue
text = str(entry.get("text", "") or "").strip()
if not text:
continue
voice = entry.get("voice")
voice_profile = entry.get("voice_profile")
voice_formula = entry.get("voice_formula")
prefix = entry.get("id") or f"chap{chapter_index:04d}"
chapter_chunks = chunk_text(
chapter_index=chapter_index,
chapter_title=str(entry.get("title") or f"Chapter {chapter_index + 1}"),
text=text,
level=level,
speaker_id=speaker_id,
voice=str(voice) if voice else None,
voice_profile=str(voice_profile) if voice_profile else None,
voice_formula=str(voice_formula) if voice_formula else None,
chunk_prefix=str(prefix),
)
all_chunks.extend(chapter_chunks)
return all_chunks
from abogen.utils import get_version
# Program Information
PROGRAM_NAME = "abogen"
PROGRAM_DESCRIPTION = "Generate audiobooks from EPUBs, PDFs, text and subtitles with synchronized captions."
GITHUB_URL = "https://github.com/denizsafak/abogen"
VERSION = get_version()
# Settings
CHAPTER_OPTIONS_COUNTDOWN = 30 # Countdown seconds for chapter options
SUBTITLE_FORMATS = [
("srt", "SRT (standard)"),
("ass_wide", "ASS (wide)"),
("ass_narrow", "ASS (narrow)"),
("ass_centered_wide", "ASS (centered wide)"),
("ass_centered_narrow", "ASS (centered narrow)"),
]
# Language description mapping
LANGUAGE_DESCRIPTIONS = {
"a": "American English",
"b": "British English",
"e": "Spanish",
"f": "French",
"h": "Hindi",
"i": "Italian",
"j": "Japanese",
"p": "Brazilian Portuguese",
"z": "Mandarin Chinese",
}
# Supported sound formats
SUPPORTED_SOUND_FORMATS = [
"wav",
"mp3",
"opus",
"m4b",
"flac",
]
# Supported subtitle formats
SUPPORTED_SUBTITLE_FORMATS = [
"srt",
"ass",
"vtt",
]
# Supported input formats
SUPPORTED_INPUT_FORMATS = [
"epub",
"pdf",
"txt",
"srt",
"ass",
"vtt",
]
# Supported languages for subtitle generation
# Currently, only 'a (American English)' and 'b (British English)' are supported for subtitle generation.
# This is because tokens that contain timestamps are not generated for other languages in the Kokoro pipeline.
# Please refer to: https://github.com/hexgrad/kokoro/blob/6d87f4ae7abc2d14dbc4b3ef2e5f19852e861ac2/kokoro/pipeline.py
# 383 English processing (unchanged)
# 384 if self.lang_code in 'ab':
SUPPORTED_LANGUAGES_FOR_SUBTITLE_GENERATION = list(LANGUAGE_DESCRIPTIONS.keys())
# Voice and sample text constants
VOICES_INTERNAL = [
"af_alloy",
"af_aoede",
"af_bella",
"af_heart",
"af_jessica",
"af_kore",
"af_nicole",
"af_nova",
"af_river",
"af_sarah",
"af_sky",
"am_adam",
"am_echo",
"am_eric",
"am_fenrir",
"am_liam",
"am_michael",
"am_onyx",
"am_puck",
"am_santa",
"bf_alice",
"bf_emma",
"bf_isabella",
"bf_lily",
"bm_daniel",
"bm_fable",
"bm_george",
"bm_lewis",
"ef_dora",
"em_alex",
"em_santa",
"ff_siwis",
"hf_alpha",
"hf_beta",
"hm_omega",
"hm_psi",
"if_sara",
"im_nicola",
"jf_alpha",
"jf_gongitsune",
"jf_nezumi",
"jf_tebukuro",
"jm_kumo",
"pf_dora",
"pm_alex",
"pm_santa",
"zf_xiaobei",
"zf_xiaoni",
"zf_xiaoxiao",
"zf_xiaoyi",
"zm_yunjian",
"zm_yunxi",
"zm_yunxia",
"zm_yunyang",
]
# Voice and sample text mapping
SAMPLE_VOICE_TEXTS = {
"a": "This is a sample of the selected voice.",
"b": "This is a sample of the selected voice.",
"e": "Este es una muestra de la voz seleccionada.",
"f": "Ceci est un exemple de la voix sélectionnée.",
"h": "यह चयनित आवाज़ का एक नमूना है।",
"i": "Questo è un esempio della voce selezionata.",
"j": "これは選択した声のサンプルです。",
"p": "Este é um exemplo da voz selecionada.",
"z": "这是所选语音的示例。",
}
COLORS = {
"BLUE": "#007dff",
"RED": "#c0392b",
"ORANGE": "#FFA500",
"GREEN": "#42ad4a",
"GREEN_BG": "rgba(66, 173, 73, 0.1)",
"GREEN_BG_HOVER": "rgba(66, 173, 73, 0.15)",
"GREEN_BORDER": "#42ad4a",
"BLUE_BG": "rgba(0, 102, 255, 0.05)",
"BLUE_BG_HOVER": "rgba(0, 102, 255, 0.1)",
"BLUE_BORDER_HOVER": "#6ab0de",
"YELLOW_BACKGROUND": "rgba(255, 221, 51, 0.40)",
"GREY_BACKGROUND": "rgba(128, 128, 128, 0.15)",
"GREY_BORDER": "#808080",
"RED_BACKGROUND": "rgba(232, 78, 60, 0.15)",
"RED_BG": "rgba(232, 78, 60, 0.10)",
"RED_BG_HOVER": "rgba(232, 78, 60, 0.15)",
# Theme palette colors
"DARK_BG": "#202326",
"DARK_BASE": "#141618",
"DARK_ALT": "#2c2f31",
"DARK_BUTTON": "#292c30",
"DARK_DISABLED": "#535353",
"LIGHT_BG": "#eff0f1",
"LIGHT_DISABLED": "#9a9999",
}
"""Backwards-compatible re-export of conversion module.
The PyQt-based implementation lives in abogen.pyqt.conversion.
The web-based implementation is in abogen.webui.conversion_runner.
"""
from __future__ import annotations
# Re-export PyQt conversion classes for backwards compatibility
from abogen.pyqt.conversion import ( # noqa: F401
ConversionThread,
VoicePreviewThread,
PlayAudioThread,
)
__all__ = ["ConversionThread", "VoicePreviewThread", "PlayAudioThread"]
from __future__ import annotations
import tempfile
import zipfile
from dataclasses import dataclass
from pathlib import Path
from typing import Iterable, List, Sequence
MARKER_PREFIX = "[[ABOGEN-DBG:"
MARKER_SUFFIX = "]]"
@dataclass(frozen=True)
class DebugTTSSample:
code: str
label: str
text: str
DEBUG_TTS_SAMPLES: Sequence[DebugTTSSample] = (
DebugTTSSample(
code="APOS_001",
label="Apostrophes & contractions (1)",
text="It's a beautiful day, isn't it? Let's see what we'll do.",
),
DebugTTSSample(
code="APOS_002",
label="Apostrophes & contractions (2)",
text="I'm sure you're ready; we'd better go before it's too late.",
),
DebugTTSSample(
code="APOS_003",
label="Apostrophes & contractions (3)",
text="He'll say it's fine, but I can't promise it'll work.",
),
DebugTTSSample(
code="APOS_004",
label="Apostrophes & contractions (4)",
text="They've done it, and I'd agree they've earned it.",
),
DebugTTSSample(
code="APOS_005",
label="Apostrophes & contractions (5)",
text="She's here, we're late, they're waiting, and you're right.",
),
DebugTTSSample(
code="POS_001",
label="Plural possessives (1)",
text="The dogs' bowls were empty, but the boss's office was quiet.",
),
DebugTTSSample(
code="POS_002",
label="Plural possessives (2)",
text="The teachers' lounge was closed during the students' exams.",
),
DebugTTSSample(
code="POS_003",
label="Plural possessives (3)",
text="The actresses' roles changed, and the directors' notes piled up.",
),
DebugTTSSample(
code="POS_004",
label="Plural possessives (4)",
text="The Joneses' car was parked by the neighbors' fence.",
),
DebugTTSSample(
code="POS_005",
label="Plural possessives (5)",
text="The bosses' meeting ended before the witnesses' statements began.",
),
DebugTTSSample(
code="NUM_001",
label="Grouped numbers (1)",
text="There are 1,234 apples, 56 oranges, and 7.89 liters of juice.",
),
DebugTTSSample(
code="NUM_002",
label="Grouped numbers (2)",
text="The population is 10,000,000 and the area is 123.45 square miles.",
),
DebugTTSSample(
code="NUM_003",
label="Grouped numbers (3)",
text="Set the timer for 0.5 seconds, then wait 2.0 minutes.",
),
DebugTTSSample(
code="NUM_004",
label="Grouped numbers (4)",
text="We measured 3.1415 radians and wrote down 2,718.28 as well.",
),
DebugTTSSample(
code="NUM_005",
label="Grouped numbers (5)",
text="The sequence is 1, 2, 3, 4, 5, and then 13.",
),
DebugTTSSample(
code="YEAR_001",
label="Years and decades (1)",
text="In 1999, people said the '90s were over.",
),
DebugTTSSample(
code="YEAR_002",
label="Years and decades (2)",
text="In 2001, the show premiered; by 2010 it was everywhere.",
),
DebugTTSSample(
code="YEAR_003",
label="Years and decades (3)",
text="The 1980s were loud, and the 1970s were groovy.",
),
DebugTTSSample(
code="YEAR_004",
label="Years and decades (4)",
text="She loved the '80s, but he preferred the '60s.",
),
DebugTTSSample(
code="YEAR_005",
label="Years and decades (5)",
text="In 2024, we looked back at 2020 and planned for 2030.",
),
DebugTTSSample(
code="DATE_001",
label="Dates (1)",
text="On 2023-01-01, we celebrated the new year.",
),
DebugTTSSample(
code="DATE_002",
label="Dates (2)",
text="The deadline is 1999-12-31 at midnight.",
),
DebugTTSSample(
code="DATE_003",
label="Dates (3)",
text="Leap day happens on 2024-02-29.",
),
DebugTTSSample(
code="DATE_004",
label="Dates (4)",
text="Some formats look like 01/02/2003 and can be ambiguous.",
),
DebugTTSSample(
code="DATE_005",
label="Dates (5)",
text="We met on March 5, 2020 and again on Apr. 7, 2021.",
),
DebugTTSSample(
code="CUR_001",
label="Currency symbols (1)",
text="The price is $10.50, but it was £8.00 yesterday.",
),
DebugTTSSample(
code="CUR_002",
label="Currency symbols (2)",
text="Tickets cost €12, and the fine was $0.99.",
),
DebugTTSSample(
code="CUR_003",
label="Currency symbols (3)",
text="The bill was ¥500 and the refund was $-3.25.",
),
DebugTTSSample(
code="CUR_004",
label="Currency symbols (4)",
text="He paid £1,234.56 for the instrument.",
),
DebugTTSSample(
code="CUR_005",
label="Currency symbols (5)",
text="The subscription is $5 per month, or $50 per year.",
),
DebugTTSSample(
code="TITLE_001",
label="Titles and abbreviations (1)",
text="Dr. Smith lives on Elm St. near the U.S. border.",
),
DebugTTSSample(
code="TITLE_002",
label="Titles and abbreviations (2)",
text="Mr. and Mrs. Doe met Prof. Adams at 5 p.m.",
),
DebugTTSSample(
code="TITLE_003",
label="Titles and abbreviations (3)",
text="Gen. Smith spoke to Sgt. Rivera on Main St.",
),
DebugTTSSample(
code="TITLE_004",
label="Titles and abbreviations (4)",
text="The report came from the U.K. office, not the U.S.A. team.",
),
DebugTTSSample(
code="TITLE_005",
label="Titles and abbreviations (5)",
text="St. John's is different from St. Louis.",
),
DebugTTSSample(
code="PUNC_001",
label="Terminal punctuation (1)",
text="This sentence ends without punctuation",
),
DebugTTSSample(
code="PUNC_002",
label="Terminal punctuation (2)",
text="An ellipsis is already present...",
),
DebugTTSSample(
code="PUNC_003",
label="Terminal punctuation (3)",
text="A question without a mark",
),
DebugTTSSample(
code="PUNC_004",
label="Terminal punctuation (4)",
text="An exclamation without a bang",
),
DebugTTSSample(
code="PUNC_005",
label="Terminal punctuation (5)",
text='A quote ends here"',
),
DebugTTSSample(
code="QUOTE_001",
label="ALL CAPS inside quotes (1)",
text='He shouted, "THIS IS IMPORTANT!" and then whispered, "ok."',
),
DebugTTSSample(
code="QUOTE_002",
label="ALL CAPS inside quotes (2)",
text='She said, "NO WAY", but he replied, "maybe".',
),
DebugTTSSample(
code="QUOTE_003",
label="ALL CAPS inside quotes (3)",
text='The sign read "DO NOT ENTER" and the note read "pls knock".',
),
DebugTTSSample(
code="QUOTE_004",
label="ALL CAPS inside quotes (4)",
text='He muttered, "OK", then yelled, "STOP!"',
),
DebugTTSSample(
code="QUOTE_005",
label="ALL CAPS inside quotes (5)",
text='They chanted, "USA!" and someone wrote "idk".',
),
DebugTTSSample(
code="FOOT_001",
label="Footnote indicators (1)",
text="This is a sentence with a footnote[1] and another[12].",
),
DebugTTSSample(
code="FOOT_002",
label="Footnote indicators (2)",
text="Some books use multiple footnotes like this[2][3] in a row.",
),
DebugTTSSample(
code="FOOT_003",
label="Footnote indicators (3)",
text="A footnote can appear mid-sentence[4] and continue afterward.",
),
DebugTTSSample(
code="FOOT_004",
label="Footnote indicators (4)",
text="Edge cases include [0] or very large indices like [1234].",
),
DebugTTSSample(
code="FOOT_005",
label="Footnote indicators (5)",
text="Sometimes a footnote follows punctuation.[5] Sometimes it doesn't[6]",
),
)
def marker_for(code: str) -> str:
return f"{MARKER_PREFIX}{code}{MARKER_SUFFIX}"
def build_debug_epub(dest_path: Path, *, title: str = "abogen debug samples") -> Path:
"""Create a tiny EPUB containing all debug samples.
The text includes stable marker codes so developers can report failures
precisely.
"""
dest_path = Path(dest_path)
dest_path.parent.mkdir(parents=True, exist_ok=True)
chapter_lines: List[str] = [
'<?xml version="1.0" encoding="utf-8"?>',
"<!DOCTYPE html>",
'<html xmlns="http://www.w3.org/1999/xhtml">',
"<head>",
f" <title>{title}</title>",
' <meta charset="utf-8" />',
"</head>",
"<body>",
f" <h1>{title}</h1>",
" <p>Each paragraph begins with a stable debug code marker.</p>",
]
for sample in DEBUG_TTS_SAMPLES:
safe_label = sample.label.replace("&", "and")
chapter_lines.append(f" <h2>{safe_label}</h2>")
chapter_lines.append(
" <p><strong>"
+ marker_for(sample.code)
+ "</strong> "
+ sample.text
+ "</p>"
)
chapter_lines += ["</body>", "</html>"]
chapter_xhtml = "\n".join(chapter_lines)
container_xml = """<?xml version="1.0" encoding="UTF-8"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
<rootfiles>
<rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>
</rootfiles>
</container>
"""
content_opf = """<?xml version="1.0" encoding="utf-8"?>
<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="bookid" version="3.0">
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
<dc:identifier id="bookid">abogen-debug-samples</dc:identifier>
<dc:title>abogen debug samples</dc:title>
<dc:language>en</dc:language>
</metadata>
<manifest>
<item id="chapter" href="chapter.xhtml" media-type="application/xhtml+xml" />
<item id="nav" href="nav.xhtml" media-type="application/xhtml+xml" properties="nav" />
</manifest>
<spine>
<itemref idref="chapter" />
</spine>
</package>
"""
nav_xhtml = """<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Navigation</title>
<meta charset="utf-8" />
</head>
<body>
<nav epub:type="toc" id="toc">
<h2>Table of Contents</h2>
<ol>
<li><a href="chapter.xhtml">Debug samples</a></li>
</ol>
</nav>
</body>
</html>
"""
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
(tmp_path / "mimetype").write_text("application/epub+zip", encoding="utf-8")
meta_inf = tmp_path / "META-INF"
meta_inf.mkdir(parents=True, exist_ok=True)
(meta_inf / "container.xml").write_text(container_xml, encoding="utf-8")
oebps = tmp_path / "OEBPS"
oebps.mkdir(parents=True, exist_ok=True)
(oebps / "content.opf").write_text(content_opf, encoding="utf-8")
(oebps / "chapter.xhtml").write_text(chapter_xhtml, encoding="utf-8")
(oebps / "nav.xhtml").write_text(nav_xhtml, encoding="utf-8")
# Per EPUB spec: mimetype must be the first entry and stored (no compression).
with zipfile.ZipFile(dest_path, "w") as zf:
zf.write(
tmp_path / "mimetype", "mimetype", compress_type=zipfile.ZIP_STORED
)
for source in (
meta_inf / "container.xml",
oebps / "content.opf",
oebps / "chapter.xhtml",
oebps / "nav.xhtml",
):
arcname = str(source.relative_to(tmp_path)).replace("\\", "/")
zf.write(source, arcname, compress_type=zipfile.ZIP_DEFLATED)
return dest_path
def iter_expected_codes() -> Iterable[str]:
for sample in DEBUG_TTS_SAMPLES:
yield sample.code
from __future__ import annotations
import hashlib
import os
import re
import threading
import time
from collections import Counter
from dataclasses import dataclass, field
from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple
try: # pragma: no cover - fallback when spaCy not available during tests
import spacy # type: ignore[import-not-found]
except Exception: # pragma: no cover - spaCy optional during runtime bootstrap
spacy = None
_Language = Any # type: ignore[misc,assignment]
Doc = Any # type: ignore[misc,assignment]
Span = Any # type: ignore[misc,assignment]
_TITLE_PREFIXES = (
"mr",
"mrs",
"ms",
"miss",
"dr",
"prof",
"sir",
"madam",
"lady",
"lord",
"capt",
"captain",
"col",
"colonel",
"maj",
"major",
"sgt",
"sergeant",
"rev",
"father",
"mother",
"brother",
"sister",
)
_STOP_LABELS = {
"the",
"that",
"this",
"those",
"these",
"there",
"here",
"then",
"and",
"but",
"or",
"nor",
"so",
"yet",
"dr",
"mr",
"mrs",
"ms",
"miss",
"sir",
"madam",
"lady",
"lord",
}
_EXCLUDED_NER_LABELS = {
"CARDINAL",
"DATE",
"ORDINAL",
"PERCENT",
"TIME",
"LAW",
"MONEY",
"QUANTITY",
}
_TITLE_PATTERN = re.compile(
r"^(?:" + "|".join(re.escape(prefix) for prefix in _TITLE_PREFIXES) + r")\.?\s+",
re.IGNORECASE,
)
_POSSESSIVE_PATTERN = re.compile(r"(?:'s|’s|\u2019s)$", re.IGNORECASE)
_NON_WORD_PATTERN = re.compile(r"[^\w\s'-]+")
_MULTI_SPACE_PATTERN = re.compile(r"\s+")
_SUFFIX_PATTERN = re.compile(
r",?\s+(?:jr|sr|ii|iii|iv|v|vi|md|phd|esq|esquire|dds|dvm)\.?$",
re.IGNORECASE,
)
@dataclass(slots=True)
class EntityRecord:
key: Tuple[str, str]
label: str
kind: str
category: str
count: int = 0
samples: List[Dict[str, Any]] = field(default_factory=list)
chapter_indices: set[int] = field(default_factory=set)
forms: Counter = field(default_factory=Counter)
first_position: Optional[Tuple[int, int]] = None
def register(
self, *, chapter_index: int, position: int, text: str, sentence: Optional[str]
) -> None:
self.count += 1
self.chapter_indices.add(chapter_index)
self.forms[text] += 1
if self.first_position is None:
self.first_position = (chapter_index, position)
if sentence and len(self.samples) < 5:
payload = {
"excerpt": sentence.strip(),
"chapter_index": chapter_index,
}
if payload not in self.samples:
self.samples.append(payload)
def as_dict(self, ordinal: int) -> Dict[str, Any]:
chapter_indices = sorted(self.chapter_indices)
first_chapter = chapter_indices[0] if chapter_indices else None
return {
"id": f"{self.category}_{ordinal}",
"label": self.label,
"normalized": self.key[1],
"category": self.category,
"kind": self.kind,
"count": self.count,
"samples": list(self.samples),
"chapter_indices": chapter_indices,
"first_chapter": first_chapter,
"forms": self.forms.most_common(6),
}
@dataclass(slots=True)
class EntityExtractionResult:
summary: Dict[str, Any]
cache_key: str
elapsed: float
errors: List[str]
class EntityModelError(RuntimeError):
pass
_MODEL_CACHE: Dict[str, Any] = {}
_MODEL_LOCK = threading.RLock()
def _resolve_model_name(language: str) -> str:
override = os.environ.get("ABOGEN_SPACY_MODEL")
if override:
return override.strip()
lowered = language.strip().lower()
if lowered.startswith("en"):
return "en_core_web_sm"
return "en_core_web_sm"
def _load_model(language: str) -> Any:
if spacy is None:
raise EntityModelError(
"spaCy is not available. Install spaCy to enable entity extraction."
)
model_name = _resolve_model_name(language)
cache_key = model_name.lower()
with _MODEL_LOCK:
if cache_key in _MODEL_CACHE:
return _MODEL_CACHE[cache_key]
try:
nlp = spacy.load(model_name) # type: ignore[arg-type]
except OSError as exc: # pragma: no cover - external dependency failure
raise EntityModelError(
f"spaCy model '{model_name}' is not installed. Download it with "
"`python -m spacy download en_core_web_sm`."
) from exc
nlp.max_length = max(nlp.max_length, 2_000_000)
_MODEL_CACHE[cache_key] = nlp
return nlp
def _normalize_label(text: str) -> str:
if not text:
return ""
stripped = text.strip().strip("\"'`“”’")
if not stripped:
return ""
stripped = _TITLE_PATTERN.sub("", stripped)
stripped = _SUFFIX_PATTERN.sub("", stripped)
stripped = _POSSESSIVE_PATTERN.sub("", stripped)
stripped = _NON_WORD_PATTERN.sub(" ", stripped)
stripped = _MULTI_SPACE_PATTERN.sub(" ", stripped)
stripped = stripped.strip()
if not stripped or stripped.lower() in _STOP_LABELS:
return ""
parts = stripped.split()
if not parts:
return ""
if len(parts) == 1 and len(parts[0]) <= 1:
return ""
# Normalise casing: preserve uppercase abbreviations, otherwise title case.
normalized_parts = []
for index, part in enumerate(parts):
if part.isupper():
normalized_parts.append(part)
elif part[:1].isupper():
normalized_parts.append(part[:1].upper() + part[1:])
elif index == 0:
normalized_parts.append(part[:1].upper() + part[1:])
else:
normalized_parts.append(part)
normalized = " ".join(normalized_parts).strip()
if normalized.lower() in _STOP_LABELS:
return ""
return normalized
def _token_key(value: str) -> str:
return _MULTI_SPACE_PATTERN.sub(" ", value.lower().strip()).strip()
def _iter_named_entities(doc: Any) -> Iterable[Any]: # type: ignore[override]
for ent in getattr(doc, "ents", ()):
if ent.label_ == "":
continue
yield ent
def _extract_propn_tokens(doc: Any) -> Iterable[Any]: # type: ignore[override]
seen: set[Tuple[int, int]] = set()
for ent in getattr(doc, "ents", ()): # guard multi-token spans
seen.add((ent.start, ent.end))
for token in doc:
if token.pos_ != "PROPN":
continue
span_key = (token.i, token.i + 1)
if span_key in seen:
continue
if token.is_stop:
continue
text = token.text.strip()
if not text:
continue
if token.ent_type_:
continue
yield doc[token.i : token.i + 1]
def _empty_result(
cache_key: str, error: Optional[str] = None
) -> EntityExtractionResult:
payload = {
"people": [],
"entities": [],
"index": {"tokens": []},
"stats": {
"tokens": 0,
"chapters": 0,
"processed": False,
},
"model": None,
}
errors = [error] if error else []
return EntityExtractionResult(
summary=payload, cache_key=cache_key, elapsed=0.0, errors=errors
)
def extract_entities(
chapters: Iterable[Mapping[str, Any]],
*,
language: str = "en",
) -> EntityExtractionResult:
start = time.perf_counter()
normalized_language = language or "en"
combined_hasher = hashlib.sha1()
chapter_texts: List[Tuple[int, str]] = []
for idx, chapter in enumerate(chapters):
text = chapter.get("text") if isinstance(chapter, Mapping) else None
text_value = str(text or "")
original_index = idx
if isinstance(chapter, Mapping):
try:
original_index = int(chapter.get("index", idx))
except (TypeError, ValueError):
original_index = idx
chapter_texts.append((original_index, text_value))
if text_value:
combined_hasher.update(text_value.encode("utf-8", "ignore"))
combined_hasher.update(str(original_index).encode("utf-8", "ignore"))
cache_key = combined_hasher.hexdigest()
if not chapter_texts:
return _empty_result(cache_key)
try:
nlp = _load_model(normalized_language)
except EntityModelError as exc:
return _empty_result(cache_key, str(exc))
records: Dict[Tuple[str, str], EntityRecord] = {}
tokens_for_index: Dict[str, Dict[str, Any]] = {}
processed_tokens = 0
for chapter_index, text in chapter_texts:
trimmed = text.strip()
if not trimmed:
continue
if len(trimmed) + 1024 > nlp.max_length:
nlp.max_length = len(trimmed) + 1024
doc = nlp(trimmed)
def _register_span(span: Any, category_hint: Optional[str] = None) -> None:
nonlocal processed_tokens
if category_hint is None and span.label_ in _EXCLUDED_NER_LABELS:
return
cleaned = _normalize_label(span.text)
if not cleaned:
return
key = _token_key(cleaned)
if not key:
return
category = category_hint or (
"people" if span.label_ == "PERSON" else "entities"
)
record_key = (category, key)
record = records.get(record_key)
if record is None:
record = EntityRecord(
key=record_key,
label=cleaned,
kind=span.label_
or ("PROPN" if category == "entities" else "PERSON"),
category=category,
)
records[record_key] = record
sentence = (
span.sent.text
if hasattr(span, "sent") and span.sent is not None
else None
)
record.register(
chapter_index=chapter_index,
position=span.start,
text=span.text,
sentence=sentence,
)
processed_tokens += 1
index_entry = tokens_for_index.get(key)
if index_entry is None:
index_entry = {
"token": record.label,
"normalized": key,
"category": category,
"count": 0,
"samples": [],
}
tokens_for_index[key] = index_entry
index_entry["count"] += 1
if sentence and len(index_entry["samples"]) < 3:
if sentence not in index_entry["samples"]:
index_entry["samples"].append(sentence)
for ent in _iter_named_entities(doc):
_register_span(ent)
for span in _extract_propn_tokens(doc):
_register_span(span, category_hint="entities")
elapsed = time.perf_counter() - start
people_records = [
record for record in records.values() if record.category == "people"
]
people_keys = {record.key[1] for record in people_records}
entity_records = [
record
for record in records.values()
if record.category == "entities"
and record.key[1] not in people_keys
and record.kind != "PERSON"
]
people_records.sort(key=lambda rec: (-rec.count, rec.label))
entity_records.sort(key=lambda rec: (-rec.count, rec.label))
people_payload = [
record.as_dict(index + 1) for index, record in enumerate(people_records)
]
entity_payload = [
record.as_dict(index + 1) for index, record in enumerate(entity_records)
]
index_payload = sorted(
tokens_for_index.values(), key=lambda item: (-item["count"], item["token"])
)
summary = {
"people": people_payload,
"entities": entity_payload,
"index": {"tokens": index_payload},
"stats": {
"tokens": processed_tokens,
"chapters": len(chapter_texts),
"processed": True,
"people": len(people_payload),
"entities": len(entity_payload),
},
"model": {
"name": getattr(nlp, "meta", {}).get("name", "unknown"),
"version": getattr(nlp, "meta", {}).get("version", "unknown"),
"lang": getattr(nlp, "meta", {}).get("lang", normalized_language),
},
}
return EntityExtractionResult(
summary=summary, cache_key=cache_key, elapsed=elapsed, errors=[]
)
def search_tokens(
index: Mapping[str, Any], query: str, *, limit: int = 15
) -> List[Dict[str, Any]]:
tokens = index.get("tokens") if isinstance(index, Mapping) else None
if not isinstance(tokens, list) or not query:
return []
normalized = query.strip().lower()
if not normalized:
return tokens[:limit]
results: List[Dict[str, Any]] = []
for entry in tokens:
token_label = str(entry.get("token", ""))
normalized_label = token_label.lower()
if normalized in normalized_label or normalized in str(
entry.get("normalized", "")
):
results.append(entry)
if len(results) >= limit:
break
return results
def merge_override(
summary: Mapping[str, Any], overrides: Mapping[str, Mapping[str, Any]]
) -> Dict[str, Any]:
if not isinstance(summary, Mapping):
return {"people": [], "entities": []}
merged_summary: Dict[str, Any] = dict(summary)
for key in ("people", "entities"):
items = summary.get(key)
if not isinstance(items, list):
continue
merged_items: List[Dict[str, Any]] = []
for entry in items:
if not isinstance(entry, Mapping):
continue
normalized = _token_key(
str(entry.get("normalized") or entry.get("label") or "")
)
merged = dict(entry)
if normalized and normalized in overrides:
merged_override = dict(overrides[normalized])
merged["override"] = merged_override
merged_items.append(merged)
merged_summary[key] = merged_items
return merged_summary
def normalize_token(token: str) -> str:
return _token_key(_normalize_label(token))
def normalize_manual_override_token(token: str) -> str:
if not token:
return ""
stripped = token.strip().strip("\"'`“”’")
if not stripped:
return ""
return _MULTI_SPACE_PATTERN.sub(" ", stripped.lower()).strip()
from .exporter import EPUB3PackageBuilder, build_epub3_package
__all__ = ["EPUB3PackageBuilder", "build_epub3_package"]
from __future__ import annotations
import html
import re
import shutil
import uuid
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Any, Dict, Iterable, List, Optional, Pattern, Sequence, Tuple
import zipfile
from abogen.text_extractor import ExtractedChapter, ExtractionResult
@dataclass(slots=True)
class ChunkOverlay:
id: str
text: str
original_text: Optional[str]
start: Optional[float]
end: Optional[float]
speaker_id: str
voice: Optional[str]
level: Optional[str] = None
group_id: Optional[str] = None
@dataclass(slots=True)
class ChapterDocument:
index: int # zero-based
title: str
xhtml_name: str
smil_name: str
chunks: List[ChunkOverlay]
start: Optional[float]
end: Optional[float]
class EPUB3PackageBuilder:
"""Constructs an EPUB 3 package with media overlays."""
def __init__(
self,
*,
output_path: Path,
book_id: str,
extraction: ExtractionResult,
metadata_tags: Dict[str, Any],
chapter_markers: Sequence[Dict[str, Any]],
chunk_markers: Sequence[Dict[str, Any]],
chunks: Iterable[Dict[str, Any]],
audio_path: Path,
speaker_mode: str = "single",
cover_image_path: Optional[Path] = None,
cover_image_mime: Optional[str] = None,
) -> None:
self.output_path = output_path
self.book_id = book_id or str(uuid.uuid4())
self.extraction = extraction
self.metadata_tags = _normalize_metadata(metadata_tags)
self.chapter_markers = list(chapter_markers or [])
self.chunk_markers = list(chunk_markers or [])
self.chunks = list(chunks or [])
self.audio_path = audio_path
self.speaker_mode = speaker_mode or "single"
self.cover_image_path = cover_image_path if cover_image_path and cover_image_path.exists() else None
self.cover_image_mime = cover_image_mime
self._combined_metadata = _combine_metadata(extraction.metadata, self.metadata_tags)
self._title = self._combined_metadata.get("title") or self._fallback_title()
self._authors = _split_authors(self._combined_metadata)
self._language = self._determine_language()
self._publisher = self._combined_metadata.get("publisher") or ""
self._description = self._combined_metadata.get("comment")
self._duration = _calculate_total_duration(self.chunk_markers, self.chapter_markers)
self._modified = _utc_now_iso()
def build(self) -> Path:
if not self.audio_path or not self.audio_path.exists():
raise FileNotFoundError(f"Audio asset missing: {self.audio_path}")
chapter_documents = self._build_chapter_documents()
with TemporaryDirectory() as tmp_dir:
root = Path(tmp_dir)
oebps = root / "OEBPS"
text_dir = oebps / "text"
smil_dir = oebps / "smil"
audio_dir = oebps / "audio"
image_dir = oebps / "images"
stylesheet_dir = oebps / "styles"
for directory in (oebps, text_dir, smil_dir, audio_dir, stylesheet_dir):
directory.mkdir(parents=True, exist_ok=True)
if self.cover_image_path:
image_dir.mkdir(parents=True, exist_ok=True)
_write_mimetype(root)
_write_container_xml(root)
audio_filename = self.audio_path.name
embedded_audio = audio_dir / audio_filename
shutil.copy2(self.audio_path, embedded_audio)
if self.cover_image_path:
shutil.copy2(self.cover_image_path, image_dir / self.cover_image_path.name)
stylesheet_path = stylesheet_dir / "style.css"
stylesheet_path.write_text(_DEFAULT_STYLESHEET, encoding="utf-8")
for chapter in chapter_documents:
chapter_path = text_dir / chapter.xhtml_name
chapter_path.write_text(
self._render_chapter_xhtml(chapter),
encoding="utf-8",
)
smil_path = smil_dir / chapter.smil_name
smil_path.write_text(
self._render_chapter_smil(chapter, f"audio/{audio_filename}"),
encoding="utf-8",
)
nav_path = oebps / "nav.xhtml"
nav_path.write_text(self._render_nav(chapter_documents), encoding="utf-8")
opf_path = oebps / "content.opf"
opf_path.write_text(
self._render_opf(
chapter_documents,
audio_filename,
has_cover=self.cover_image_path is not None,
stylesheet_path=stylesheet_path.relative_to(oebps),
),
encoding="utf-8",
)
self.output_path.parent.mkdir(parents=True, exist_ok=True)
with zipfile.ZipFile(self.output_path, "w", compression=zipfile.ZIP_DEFLATED) as archive:
# Ensure mimetype is the first entry and stored without compression
mimetype_path = root / "mimetype"
info = zipfile.ZipInfo("mimetype")
info.compress_type = zipfile.ZIP_STORED
archive.writestr(info, mimetype_path.read_bytes())
for file_path in sorted(root.rglob("*")):
if file_path == mimetype_path or file_path.is_dir():
continue
archive.write(file_path, file_path.relative_to(root))
return self.output_path
# ------------------------------------------------------------------
def _build_chapter_documents(self) -> List[ChapterDocument]:
chunk_lookup = _build_chunk_lookup(self.chunks)
markers_by_chapter = _group_markers_by_chapter(self.chunk_markers)
chapter_meta = {int(entry.get("index", idx + 1)) - 1: dict(entry) for idx, entry in enumerate(self.chapter_markers)}
documents: List[ChapterDocument] = []
for chapter_index, chapter in enumerate(self.extraction.chapters):
markers = markers_by_chapter.get(chapter_index, [])
if not markers and chunk_lookup.by_chapter.get(chapter_index):
markers = [
{
"id": item.get("id"),
"chapter_index": chapter_index,
"chunk_index": item.get("chunk_index"),
"start": None,
"end": None,
"speaker_id": item.get("speaker_id", "narrator"),
"voice": item.get("voice"),
}
for item in chunk_lookup.by_chapter.get(chapter_index, [])
]
if not markers:
markers = [
{
"id": f"chap{chapter_index:04d}_auto0000",
"chapter_index": chapter_index,
"chunk_index": 0,
"start": chapter_meta.get(chapter_index, {}).get("start"),
"end": chapter_meta.get(chapter_index, {}).get("end"),
"speaker_id": "narrator",
"voice": None,
}
]
overlays = self._build_overlays_for_chapter(
chapter_index,
markers,
chunk_lookup,
)
xhtml_name = f"chapter_{chapter_index + 1:04d}.xhtml"
smil_name = f"chapter_{chapter_index + 1:04d}.smil"
chapter_start = chapter_meta.get(chapter_index, {}).get("start")
chapter_end = chapter_meta.get(chapter_index, {}).get("end")
documents.append(
ChapterDocument(
index=chapter_index,
title=chapter.title or f"Chapter {chapter_index + 1}",
xhtml_name=xhtml_name,
smil_name=smil_name,
chunks=overlays,
start=chapter_start,
end=chapter_end,
)
)
return documents
def _build_overlays_for_chapter(
self,
chapter_index: int,
markers: Sequence[Dict[str, Any]],
chunk_lookup: "ChunkLookup",
) -> List[ChunkOverlay]:
overlays: List[ChunkOverlay] = []
used_ids: set[str] = set()
chapter_chunks = list(chunk_lookup.by_chapter.get(chapter_index, []))
chapter_chunks.sort(key=lambda entry: _safe_int(entry.get("chunk_index")))
for position, marker in enumerate(markers):
chunk_id = marker.get("id")
chunk_entry = None
if chunk_id and chunk_id in chunk_lookup.by_id:
chunk_entry = chunk_lookup.by_id[chunk_id]
else:
candidate_index = _safe_int(marker.get("chunk_index"))
chunk_entry = _find_chunk_by_index(chapter_chunks, candidate_index)
if chunk_entry is None and chapter_chunks and position < len(chapter_chunks):
chunk_entry = chapter_chunks[position]
level = None
if chunk_entry is None:
text = self.extraction.chapters[chapter_index].text
speaker_id = str(marker.get("speaker_id") or "narrator")
voice = marker.get("voice")
else:
display_text = chunk_entry.get("display_text")
text = str(chunk_entry.get("text") or "")
speaker_id = str(chunk_entry.get("speaker_id") or marker.get("speaker_id") or "narrator")
voice = chunk_entry.get("voice") or chunk_entry.get("resolved_voice") or marker.get("voice")
level = chunk_entry.get("level") or None
if chunk_entry is None:
level = None
normalized_id = _normalize_chunk_id(chunk_id) if chunk_id else None
if not normalized_id:
normalized_id = f"chap{chapter_index:04d}_chunk{position:04d}"
while normalized_id in used_ids:
normalized_id = f"{normalized_id}_dup"
used_ids.add(normalized_id)
raw_group_key = chunk_entry.get("id") if chunk_entry else chunk_id
group_id = _derive_group_id(raw_group_key, level)
normalized_group_id = _normalize_chunk_id(group_id) if group_id else None
original_text = None
if chunk_entry is not None:
original_text = chunk_entry.get("original_text") or chunk_entry.get("display_text")
overlays.append(
ChunkOverlay(
id=normalized_id,
text=text or self.extraction.chapters[chapter_index].text,
original_text=str(original_text) if original_text is not None else None,
start=_safe_float(marker.get("start")),
end=_safe_float(marker.get("end")),
speaker_id=speaker_id,
voice=str(voice) if voice else None,
level=str(level) if level else None,
group_id=normalized_group_id,
)
)
chapter_text = ""
if 0 <= chapter_index < len(self.extraction.chapters):
chapter_entry = self.extraction.chapters[chapter_index]
chapter_text = getattr(chapter_entry, "text", "") or ""
_restore_original_chunk_text(chapter_text, overlays)
return overlays
def _render_chapter_xhtml(self, chapter: ChapterDocument) -> str:
language = html.escape(self._language or "en")
title = html.escape(chapter.title)
grouped_chunks = _group_chunks_for_render(chapter.chunks)
chunk_html = "\n".join(
_render_chunk_group_html(group_id, items) for group_id, items in grouped_chunks
)
if not chunk_html:
chunk_html = "<p></p>"
original_block = ""
if chapter.chunks:
original_text = "".join((chunk.original_text if chunk.original_text is not None else (chunk.text or "")) for chunk in chapter.chunks)
if original_text:
safe_original = html.escape(original_text)
original_block = (
" <pre class=\"chapter-original\" hidden=\"hidden\" aria-hidden=\"true\">\n"
f"{safe_original}\n"
" </pre>"
)
return (
"<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
"<html xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:epub=\"http://www.idpf.org/2007/ops\" xml:lang=\"{lang}\" lang=\"{lang}\">\n"
" <head>\n"
" <title>{title}</title>\n"
" <meta charset=\"utf-8\"/>\n"
" <link rel=\"stylesheet\" type=\"text/css\" href=\"styles/style.css\"/>\n"
" </head>\n"
" <body>\n"
" <section epub:type=\"chapter\" id=\"chapter-{index:04d}\">\n"
" <h1>{title}</h1>\n"
" {chunks}\n"
"{original_block}"
" </section>\n"
" </body>\n"
"</html>\n"
).format(
lang=language,
title=title,
index=chapter.index + 1,
chunks=chunk_html,
original_block=("" if not original_block else f"{original_block}\n"),
)
def _render_chapter_smil(self, chapter: ChapterDocument, audio_href: str) -> str:
par_lines = []
for chunk in chapter.chunks:
par_lines.append(
" <par id=\"par-{chunk_id}\">\n"
" <text src=\"text/{xhtml}#{chunk_id}\"/>\n"
" <audio src=\"{audio}\" clipBegin=\"{start}\" clipEnd=\"{end}\"/>\n"
" </par>".format(
chunk_id=html.escape(chunk.id),
xhtml=html.escape(chapter.xhtml_name),
audio=html.escape(audio_href),
start=_format_smil_time(chunk.start),
end=_format_smil_time(chunk.end),
)
)
return (
"<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
"<smil xmlns=\"http://www.w3.org/2001/SMIL20/Language\" xmlns:epub=\"http://www.idpf.org/2007/ops\">\n"
" <head>\n"
" <meta name=\"dc:title\" content=\"{title}\"/>\n"
" <meta name=\"dtb:uid\" content=\"{book_id}\"/>\n"
" <meta name=\"dtb:generator\" content=\"Abogen\"/>\n"
" </head>\n"
" <body>\n"
" <seq id=\"seq-{index:04d}\" epub:textref=\"text/{xhtml}\">\n"
"{pars}\n"
" </seq>\n"
" </body>\n"
"</smil>\n"
).format(
title=html.escape(chapter.title),
book_id=html.escape(self.book_id),
index=chapter.index + 1,
xhtml=html.escape(chapter.xhtml_name),
pars="\n".join(par_lines) if par_lines else " <par/>",
)
def _render_nav(self, chapters: Sequence[ChapterDocument]) -> str:
items = []
for chapter in chapters:
href = f"text/{chapter.xhtml_name}"
items.append(
" <li><a href=\"{href}\">{title}</a></li>".format(
href=html.escape(href),
title=html.escape(chapter.title),
)
)
return (
"<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
"<html xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:epub=\"http://www.idpf.org/2007/ops\" xml:lang=\"{lang}\">\n"
" <head>\n"
" <title>Navigation</title>\n"
" <meta charset=\"utf-8\"/>\n"
" </head>\n"
" <body>\n"
" <nav epub:type=\"toc\" id=\"toc\">\n"
" <h1>{title}</h1>\n"
" <ol>\n"
"{items}\n"
" </ol>\n"
" </nav>\n"
" </body>\n"
"</html>\n"
).format(
lang=html.escape(self._language or "en"),
title=html.escape(self._title),
items="\n".join(items) if items else " <li><a href=\"text/chapter_0001.xhtml\">Chapter 1</a></li>",
)
def _render_opf(
self,
chapters: Sequence[ChapterDocument],
audio_filename: str,
*,
has_cover: bool,
stylesheet_path: Path,
) -> str:
manifest_items = []
spine_refs = []
for chapter in chapters:
item_id = f"chap{chapter.index + 1:04d}"
overlay_id = f"mo-{chapter.index + 1:04d}"
manifest_items.append(
" <item id=\"{item_id}\" href=\"text/{href}\" media-type=\"application/xhtml+xml\" media-overlay=\"{overlay_id}\"/>".format(
item_id=item_id,
href=html.escape(chapter.xhtml_name),
overlay_id=overlay_id,
)
)
manifest_items.append(
" <item id=\"{overlay_id}\" href=\"smil/{smil}\" media-type=\"application/smil+xml\"/>".format(
overlay_id=overlay_id,
smil=html.escape(chapter.smil_name),
)
)
spine_refs.append(f" <itemref idref=\"{item_id}\"/>")
audio_item_id = "primary-audio"
manifest_items.append(
" <item id=\"{item_id}\" href=\"audio/{href}\" media-type=\"{mime}\"/>".format(
item_id=audio_item_id,
href=html.escape(audio_filename),
mime=_detect_audio_mime(audio_filename),
)
)
manifest_items.append(
" <item id=\"nav\" href=\"nav.xhtml\" media-type=\"application/xhtml+xml\" properties=\"nav\"/>"
)
manifest_items.append(
" <item id=\"style\" href=\"{href}\" media-type=\"text/css\"/>".format(
href=html.escape(str(stylesheet_path).replace("\\", "/")),
)
)
if has_cover and self.cover_image_path:
cover_id = "cover-image"
manifest_items.append(
" <item id=\"{item_id}\" href=\"images/{href}\" media-type=\"{mime}\" properties=\"cover-image\"/>".format(
item_id=cover_id,
href=html.escape(self.cover_image_path.name),
mime=self.cover_image_mime or _detect_image_mime(self.cover_image_path.suffix),
)
)
metadata_elements = _render_metadata_xml(
self._title,
self._authors,
self._language,
self.book_id,
duration=self._duration,
publisher=self._publisher,
description=self._description,
speaker_mode=self.speaker_mode,
modified=self._modified,
)
return (
"<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
"<package xmlns=\"http://www.idpf.org/2007/opf\" version=\"3.0\" unique-identifier=\"book-id\">\n"
" <metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:opf=\"http://www.idpf.org/2007/opf\" xmlns:media=\"http://www.idpf.org/epub/vocab/mediaoverlays/#\" xmlns:abogen=\"https://abogen.app/ns#\" xmlns:dcterms=\"http://purl.org/dc/terms/\">\n"
"{metadata}\n"
" </metadata>\n"
" <manifest>\n"
"{manifest}\n"
" </manifest>\n"
" <spine>\n"
"{spine}\n"
" </spine>\n"
"</package>\n"
).format(
metadata="\n".join(metadata_elements),
manifest="\n".join(manifest_items),
spine="\n".join(spine_refs) if spine_refs else " <itemref idref=\"chap0001\"/>",
)
def _fallback_title(self) -> str:
if self.extraction.chapters:
first_title = self.extraction.chapters[0].title
if first_title:
return first_title
return "Generated Audiobook"
def _determine_language(self) -> str:
language = self._combined_metadata.get("language")
if language:
return language
return "en"
def build_epub3_package(
*,
output_path: Path,
book_id: str,
extraction: ExtractionResult,
metadata_tags: Dict[str, Any],
chapter_markers: Sequence[Dict[str, Any]],
chunk_markers: Sequence[Dict[str, Any]],
chunks: Iterable[Dict[str, Any]],
audio_path: Path,
speaker_mode: str = "single",
cover_image_path: Optional[Path] = None,
cover_image_mime: Optional[str] = None,
) -> Path:
builder = EPUB3PackageBuilder(
output_path=output_path,
book_id=book_id,
extraction=extraction,
metadata_tags=metadata_tags,
chapter_markers=chapter_markers,
chunk_markers=chunk_markers,
chunks=chunks,
audio_path=audio_path,
speaker_mode=speaker_mode,
cover_image_path=cover_image_path,
cover_image_mime=cover_image_mime,
)
return builder.build()
# ---------------------------------------------------------------------------
# Helpers
@dataclass
class ChunkLookup:
by_id: Dict[str, Dict[str, Any]]
by_chapter: Dict[int, List[Dict[str, Any]]]
def _normalize_metadata(metadata: Optional[Dict[str, Any]]) -> Dict[str, str]:
normalized: Dict[str, str] = {}
for key, value in (metadata or {}).items():
if value is None:
continue
normalized[str(key).lower()] = str(value)
return normalized
def _combine_metadata(*sources: Dict[str, Any]) -> Dict[str, str]:
combined: Dict[str, str] = {}
for source in sources:
for key, value in (source or {}).items():
if value is None:
continue
combined[str(key).lower()] = str(value)
return combined
def _split_authors(metadata: Dict[str, str]) -> List[str]:
candidates = []
for key in ("artist", "author", "authors", "album_artist", "creator"):
value = metadata.get(key)
if value:
candidates.extend(part.strip() for part in value.replace(";", ",").split(","))
return [author for author in candidates if author]
def _calculate_total_duration(
chunk_markers: Sequence[Dict[str, Any]],
chapter_markers: Sequence[Dict[str, Any]],
) -> Optional[float]:
candidates: List[float] = []
for marker in chunk_markers or []:
end_value = _safe_float(marker.get("end"))
if end_value is not None:
candidates.append(end_value)
for marker in chapter_markers or []:
end_value = _safe_float(marker.get("end"))
if end_value is not None:
candidates.append(end_value)
if not candidates:
return None
return max(candidates)
def _write_mimetype(root: Path) -> None:
(root / "mimetype").write_text("application/epub+zip", encoding="utf-8")
def _write_container_xml(root: Path) -> None:
meta_inf = root / "META-INF"
meta_inf.mkdir(parents=True, exist_ok=True)
container = meta_inf / "container.xml"
container.write_text(
(
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
"<container version=\"1.0\" xmlns=\"urn:oasis:names:tc:opendocument:xmlns:container\">\n"
" <rootfiles>\n"
" <rootfile full-path=\"OEBPS/content.opf\" media-type=\"application/oebps-package+xml\"/>\n"
" </rootfiles>\n"
"</container>\n"
),
encoding="utf-8",
)
def _build_chunk_lookup(chunks: Iterable[Dict[str, Any]]) -> ChunkLookup:
by_id: Dict[str, Dict[str, Any]] = {}
by_chapter: Dict[int, List[Dict[str, Any]]] = {}
for entry in chunks or []:
if not isinstance(entry, dict):
continue
chunk_id = entry.get("id")
if chunk_id:
by_id[str(chunk_id)] = dict(entry)
chapter_index = _safe_int(entry.get("chapter_index"))
by_chapter.setdefault(chapter_index, []).append(dict(entry))
return ChunkLookup(by_id=by_id, by_chapter=by_chapter)
def _group_markers_by_chapter(markers: Iterable[Dict[str, Any]]) -> Dict[int, List[Dict[str, Any]]]:
grouped: Dict[int, List[Dict[str, Any]]] = {}
for entry in markers or []:
if not isinstance(entry, dict):
continue
chapter_index = _safe_int(entry.get("chapter_index"))
grouped.setdefault(chapter_index, []).append(dict(entry))
for chapter_index, items in grouped.items():
items.sort(key=lambda payload: (_safe_int(payload.get("chunk_index")), _safe_float(payload.get("start")) or 0.0))
return grouped
def _find_chunk_by_index(
chapter_chunks: Sequence[Dict[str, Any]],
chunk_index: Optional[int],
) -> Optional[Dict[str, Any]]:
if chunk_index is None:
return None
for entry in chapter_chunks:
if _safe_int(entry.get("chunk_index")) == chunk_index:
return entry
return None
def _normalize_chunk_id(chunk_id: Optional[Any]) -> Optional[str]:
if chunk_id is None:
return None
text = str(chunk_id).strip()
if not text:
return None
safe = "".join(ch if ch.isalnum() or ch in {"_", "-"} else "_" for ch in text)
return safe[:120]
def _derive_group_id(chunk_id: Optional[Any], level: Optional[Any]) -> Optional[str]:
if chunk_id is None:
return None
text = str(chunk_id).strip()
if not text:
return None
if str(level or "").lower() == "sentence":
match = re.match(r"(.+?)_s\d+(?:_.*)?$", text)
if match:
return match.group(1)
return text
def _group_chunks_for_render(chunks: Sequence[ChunkOverlay]) -> List[Tuple[Optional[str], List[ChunkOverlay]]]:
groups: List[Tuple[Optional[str], List[ChunkOverlay]]] = []
current_key: Optional[str] = None
current_items: List[ChunkOverlay] = []
for chunk in chunks:
key = chunk.group_id or chunk.id
if current_items and key != current_key:
groups.append((current_key, current_items))
current_items = []
if not current_items:
current_key = key
current_items.append(chunk)
if current_items:
groups.append((current_key, current_items))
return groups
def _render_chunk_inline(chunk: ChunkOverlay) -> str:
escaped_id = html.escape(chunk.id)
speaker_attr = f" data-speaker=\"{html.escape(chunk.speaker_id)}\"" if chunk.speaker_id else ""
voice_attr = f" data-voice=\"{html.escape(chunk.voice)}\"" if chunk.voice else ""
level_attr = f" data-level=\"{html.escape(chunk.level)}\"" if chunk.level else ""
raw_text = chunk.text or ""
escaped_text = html.escape(raw_text)
if not escaped_text:
escaped_text = "&nbsp;"
return (
f"<span class=\"chunk\" id=\"{escaped_id}\"{speaker_attr}{voice_attr}{level_attr}>"
f"{escaped_text}"
"</span>"
)
def _render_chunk_group_html(group_id: Optional[str], chunks: Sequence[ChunkOverlay]) -> str:
if not chunks:
return ""
group_attr = f" data-group=\"{html.escape(group_id)}\"" if group_id else ""
inline_html = "".join(_render_chunk_inline(chunk) for chunk in chunks)
if not inline_html:
inline_html = "&nbsp;"
return f" <p class=\"chunk-group\"{group_attr}>{inline_html}</p>"
def _format_smil_time(value: Optional[float]) -> str:
if value is None or value < 0:
value = 0.0
total_ms = int(round(value * 1000))
hours, remainder = divmod(total_ms, 3600_000)
minutes, remainder = divmod(remainder, 60_000)
seconds, milliseconds = divmod(remainder, 1000)
return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{milliseconds:03d}"
def _safe_int(value: Any, default: int = 0) -> int:
try:
return int(value)
except (TypeError, ValueError):
return default
def _safe_float(value: Any) -> Optional[float]:
if value is None:
return None
try:
return float(value)
except (TypeError, ValueError):
return None
def _restore_original_chunk_text(chapter_text: str, overlays: List[ChunkOverlay]) -> None:
if not chapter_text or not overlays:
return
cursor = 0
for chunk in overlays:
if chunk.original_text is not None:
prepared = _prepare_display_text(chunk.original_text)
chunk.text = prepared
continue
candidate = chunk.text or ""
if not candidate:
continue
match = _search_original_span(chapter_text, candidate, cursor)
if match is None and cursor:
match = _search_original_span(chapter_text, candidate, 0)
if match is None:
if chunk.original_text is None:
chunk.original_text = chunk.text
chunk.text = _prepare_display_text(chunk.text or "")
continue
start, end = match
segment = chapter_text[start:end]
chunk.original_text = segment
chunk.text = _prepare_display_text(segment)
cursor = end
def _prepare_display_text(value: str) -> str:
if not value:
return ""
cleaned = re.sub(r"(?:[ \t]*\r?\n)+\Z", "", value)
return cleaned if cleaned else ""
def _search_original_span(source: str, normalized: str, start: int) -> Optional[Tuple[int, int]]:
if not normalized:
return None
pattern = _build_chunk_pattern(normalized)
match = pattern.search(source, start)
if not match:
return None
return match.start(1), match.end(1)
_CHUNK_REGEX_CACHE: Dict[str, Pattern[str]] = {}
def _build_chunk_pattern(text: str) -> Pattern[str]:
cached = _CHUNK_REGEX_CACHE.get(text)
if cached is not None:
return cached
escaped = re.escape(text)
escaped = escaped.replace(r"\ ", r"\s+")
pattern = re.compile(r"(\s*" + escaped + r"\s*)", re.DOTALL)
_CHUNK_REGEX_CACHE[text] = pattern
return pattern
def _render_metadata_xml(
title: str,
authors: Sequence[str],
language: str,
book_id: str,
*,
duration: Optional[float],
publisher: Optional[str],
description: Optional[str],
speaker_mode: Optional[str],
modified: Optional[str],
) -> List[str]:
elements = [
f" <dc:identifier id=\"book-id\">{html.escape(book_id)}</dc:identifier>",
f" <dc:title>{html.escape(title)}</dc:title>",
f" <dc:language>{html.escape(language or 'en')}</dc:language>",
]
for author in authors or ["Unknown"]:
elements.append(f" <dc:creator>{html.escape(author)}</dc:creator>")
if publisher:
elements.append(f" <dc:publisher>{html.escape(publisher)}</dc:publisher>")
if description:
elements.append(f" <dc:description>{html.escape(description)}</dc:description>")
if duration is not None:
elements.append(f" <meta property=\"media:duration\">{_format_iso_duration(duration)}</meta>")
if speaker_mode:
elements.append(
" <meta property=\"abogen:speakerMode\">{}</meta>".format(
html.escape(str(speaker_mode))
)
)
if modified:
elements.append(f" <meta property=\"dcterms:modified\">{html.escape(modified)}</meta>")
return elements
def _format_iso_duration(value: float) -> str:
total_seconds = int(value)
remainder = value - total_seconds
hours, remainder_seconds = divmod(total_seconds, 3600)
minutes, seconds = divmod(remainder_seconds, 60)
seconds_with_fraction = seconds + remainder
if seconds_with_fraction.is_integer():
seconds_text = f"{int(seconds_with_fraction)}"
else:
seconds_text = f"{seconds_with_fraction:.3f}".rstrip("0").rstrip(".")
return f"PT{hours}H{minutes}M{seconds_text}S"
def _detect_audio_mime(audio_filename: str) -> str:
suffix = Path(audio_filename).suffix.lower()
return {
".mp3": "audio/mpeg",
".m4a": "audio/mp4",
".m4b": "audio/mp4",
".aac": "audio/aac",
".wav": "audio/wav",
".flac": "audio/flac",
".ogg": "audio/ogg",
".opus": "audio/ogg",
}.get(suffix, "audio/mpeg")
def _detect_image_mime(suffix: str) -> str:
normalized = suffix.lower()
return {
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".png": "image/png",
".gif": "image/gif",
".webp": "image/webp",
}.get(normalized, "image/jpeg")
def _utc_now_iso() -> str:
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
_DEFAULT_STYLESHEET = """
body {
font-family: 'Georgia', serif;
line-height: 1.6;
margin: 1.5em;
}
h1 {
font-size: 1.5em;
margin-bottom: 0.5em;
}
.chunk-group {
margin: 0.5em 0;
}
.chunk-group .chunk {
white-space: pre-wrap;
}
"""
"""Backwards-compatible re-export of the PyQt GUI.
The actual implementation lives in abogen.pyqt.gui.
"""
from __future__ import annotations
from abogen.pyqt.gui import * # noqa: F401, F403
from abogen.pyqt.gui import abogen
__all__ = ["abogen"]
from __future__ import annotations
import hashlib
import re
from dataclasses import dataclass
from typing import Any, Dict, Iterable, List, Mapping, Optional, Sequence, Tuple
try: # pragma: no cover - optional dependency
import spacy # type: ignore
except Exception: # pragma: no cover - spaCy may be unavailable in minimal environments
spacy = None
@dataclass(frozen=True)
class HeteronymVariant:
key: str
label: str
replacement_token: str
example_sentence: str
@dataclass(frozen=True)
class HeteronymSpec:
token: str
variants: Tuple[HeteronymVariant, HeteronymVariant]
def default_choice_for_token(self, spacy_token: Any) -> str:
"""Return the most likely variant key for this token."""
pos = (getattr(spacy_token, "pos_", "") or "").upper()
tag = (getattr(spacy_token, "tag_", "") or "").upper()
token_lower = self.token.casefold()
if token_lower == "wind":
# VERB => /waɪnd/, NOUN => /wɪnd/
return "verb" if pos == "VERB" else "noun"
if token_lower == "read":
# VBD/VBN => /rɛd/
return "past" if tag in {"VBD", "VBN"} else "present"
if token_lower == "tear":
return "verb" if pos == "VERB" else "noun"
if token_lower == "close":
return "verb" if pos == "VERB" else "adj"
if token_lower == "lead":
# Default to verb unless POS suggests noun.
return "metal" if pos == "NOUN" else "verb"
return self.variants[0].key
# Minimal, high-confidence starter set.
# NOTE: These replacements intentionally prioritize speech output.
# Some replacements may not be appropriate for subtitles/text exports.
_HETERONYM_SPECS: Dict[str, HeteronymSpec] = {
"wind": HeteronymSpec(
token="wind",
variants=(
HeteronymVariant(
key="noun",
label="Noun (the wind)",
replacement_token="wind",
example_sentence="Listen to the wind.",
),
HeteronymVariant(
key="verb",
label="Verb (to wind)",
replacement_token="wynd",
example_sentence="I need to wind the watch.",
),
),
),
"read": HeteronymSpec(
token="read",
variants=(
HeteronymVariant(
key="present",
label="Present (I read every day)",
replacement_token="read",
example_sentence="I read every day.",
),
HeteronymVariant(
key="past",
label="Past (I read it yesterday)",
replacement_token="red",
example_sentence="I read it yesterday.",
),
),
),
"tear": HeteronymSpec(
token="tear",
variants=(
HeteronymVariant(
key="noun",
label="Noun (a tear /crying/)",
replacement_token="tier",
example_sentence="A tear rolled down her cheek.",
),
HeteronymVariant(
key="verb",
label="Verb (to tear /rip/)",
replacement_token="tear",
example_sentence="Please don't tear the page.",
),
),
),
"close": HeteronymSpec(
token="close",
variants=(
HeteronymVariant(
key="adj",
label="Adjective (close /near/)",
replacement_token="close",
example_sentence="We are close to the station.",
),
HeteronymVariant(
key="verb",
label="Verb (close /klohz/)",
replacement_token="cloze",
example_sentence="Please close the door.",
),
),
),
"lead": HeteronymSpec(
token="lead",
variants=(
HeteronymVariant(
key="verb",
label="Verb (to lead)",
replacement_token="lead",
example_sentence="They will lead the way.",
),
HeteronymVariant(
key="metal",
label="Noun (lead /metal/)",
replacement_token="led",
example_sentence="The pipe was made of lead.",
),
),
),
}
def _hash_id(*parts: str) -> str:
digest = hashlib.sha1("\n".join(parts).encode("utf-8")).hexdigest()
return digest[:12]
_WORD_BOUNDARY_CACHE: Dict[str, re.Pattern[str]] = {}
def _word_boundary_pattern(token: str) -> re.Pattern[str]:
key = token.casefold()
cached = _WORD_BOUNDARY_CACHE.get(key)
if cached is not None:
return cached
escaped = re.escape(token)
pattern = re.compile(
rf"(?i)(?<!\w){escaped}(?P<possessive>'s|\u2019s|\u2019)?(?!\w)"
)
_WORD_BOUNDARY_CACHE[key] = pattern
return pattern
def _preserve_case(replacement: str, original: str) -> str:
if not replacement:
return replacement
if original.isupper():
return replacement.upper()
if original[:1].isupper():
return replacement[:1].upper() + replacement[1:]
return replacement
def _build_replacement_sentence(
sentence: str, token: str, replacement_token: str
) -> str:
pattern = _word_boundary_pattern(token)
def _repl(match: re.Match[str]) -> str:
matched = match.group(0) or ""
suffix = match.group("possessive") or ""
base = matched[: len(matched) - len(suffix)] if suffix else matched
return _preserve_case(replacement_token, base) + suffix
return pattern.sub(_repl, sentence)
def _load_spacy(language: str) -> Any:
if spacy is None:
return None
# English only for now.
# Use installed small model; keep it simple.
lang = (language or "en").lower()
if lang.startswith("en"):
try:
return spacy.load("en_core_web_sm")
except Exception:
return spacy.blank("en")
return spacy.blank("xx")
def extract_heteronym_overrides(
chapters: Sequence[Mapping[str, Any]],
*,
language: str,
existing: Optional[Iterable[Mapping[str, Any]]] = None,
) -> List[Dict[str, Any]]:
"""Extract distinct heteronym-containing sentences from chapters.
Returns entries shaped for persistence + UI.
Each entry contains:
- id
- token
- sentence
- options: [{key,label,replacement_token,replacement_sentence,example_sentence}]
- default_choice
- choice
"""
lang = (language or "en").lower()
if not lang.startswith("en"):
return []
if spacy is None:
return []
nlp = _load_spacy(lang)
if nlp is None:
return []
previous_choices: Dict[str, str] = {}
if existing:
for item in existing:
if not isinstance(item, Mapping):
continue
entry_id = str(item.get("id") or "").strip()
choice = str(item.get("choice") or "").strip()
if entry_id and choice:
previous_choices[entry_id] = choice
results: List[Dict[str, Any]] = []
seen: set[tuple[str, str]] = set()
for chapter in chapters:
if not isinstance(chapter, Mapping):
continue
text = str(chapter.get("text") or "")
if not text.strip():
continue
doc = nlp(text)
for sent in getattr(doc, "sents", []):
sentence = str(getattr(sent, "text", "") or "").strip()
if not sentence:
continue
for token in sent:
token_text = str(getattr(token, "text", "") or "")
if not token_text:
continue
token_key = token_text.casefold()
spec = _HETERONYM_SPECS.get(token_key)
if not spec:
continue
dedupe_key = (token_key, sentence)
if dedupe_key in seen:
continue
seen.add(dedupe_key)
entry_id = _hash_id(token_key, sentence)
default_choice = spec.default_choice_for_token(token)
choice = previous_choices.get(entry_id, default_choice)
options: List[Dict[str, Any]] = []
for variant in spec.variants:
replacement_sentence = _build_replacement_sentence(
sentence,
token=spec.token,
replacement_token=variant.replacement_token,
)
options.append(
{
"key": variant.key,
"label": variant.label,
"replacement_token": variant.replacement_token,
"replacement_sentence": replacement_sentence,
"example_sentence": variant.example_sentence,
}
)
results.append(
{
"id": entry_id,
"token": token_text,
"token_lower": token_key,
"sentence": sentence,
"options": options,
"default_choice": default_choice,
"choice": choice,
}
)
return results
log_callback = None
show_warning_signal_emitter = None # Renamed for clarity
def set_log_callback(cb):
global log_callback
log_callback = cb
def set_show_warning_signal_emitter(emitter): # Renamed for clarity
global show_warning_signal_emitter
show_warning_signal_emitter = emitter
from huggingface_hub import hf_hub_download
def tracked_hf_hub_download(*args, **kwargs):
try:
local_kwargs = dict(kwargs)
local_kwargs["local_files_only"] = True
hf_hub_download(*args, **local_kwargs)
except Exception:
repo_id = kwargs.get("repo_id", "<unknown repo>")
filename = kwargs.get("filename", "<unknown file>")
if filename.endswith(".pth"):
msg = f"\nDownloading model '{filename}' from Hugging Face ({repo_id}). This may take a while. Please wait..."
if show_warning_signal_emitter: # Check if the emitter is set
show_warning_signal_emitter.emit(
"Downloading Model",
f"Downloading model '{filename}' from Hugging Face repository '{repo_id}'. This may take a while, please wait.",
)
else:
msg = f"\nDownloading '{filename}' from Hugging Face ({repo_id}). Please wait..."
if log_callback:
print(msg, flush=True)
log_callback(msg)
else:
print(msg, flush=True)
return hf_hub_download(*args, **kwargs)
import huggingface_hub
huggingface_hub.hf_hub_download = tracked_hf_hub_download
"""Integration clients for external services."""
from __future__ import annotations
import json
import logging
import math
import mimetypes
import re
from contextlib import ExitStack
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, Iterable, List, Mapping, Optional, Sequence, Tuple
import httpx
logger = logging.getLogger(__name__)
class AudiobookshelfUploadError(RuntimeError):
"""Raised when an upload to Audiobookshelf fails."""
@dataclass(frozen=True)
class AudiobookshelfConfig:
base_url: str
api_token: str
library_id: Optional[str] = None
collection_id: Optional[str] = None
folder_id: Optional[str] = None
verify_ssl: bool = True
send_cover: bool = True
send_chapters: bool = True
send_subtitles: bool = True
timeout: float = 3600.0
def normalized_base_url(self) -> str:
base = (self.base_url or "").strip()
if not base:
raise ValueError("Audiobookshelf base URL is required")
normalized = base.rstrip("/")
# The web UI historically suggested including '/api' in the base URL; trim
# it here so we can safely append `/api/...` endpoints below.
if normalized.lower().endswith("/api"):
normalized = normalized[:-4]
return normalized or base
class AudiobookshelfClient:
"""Client for the legacy Audiobookshelf multipart upload endpoint."""
def __init__(self, config: AudiobookshelfConfig) -> None:
if not config.api_token:
raise ValueError("Audiobookshelf API token is required")
# library_id is now optional for discovery
self._config = config
normalized = config.normalized_base_url() or ""
self._base_url = normalized.rstrip("/") or normalized
self._client_base_url = f"{self._base_url}/"
self._folder_cache: Optional[Tuple[str, str, str]] = None
def get_libraries(self) -> List[Dict[str, Any]]:
"""Fetch all libraries from the Audiobookshelf server."""
route = self._api_path("libraries")
try:
with self._open_client() as client:
response = client.get(route)
response.raise_for_status()
data = response.json()
# data['libraries'] is a list of library objects
return data.get("libraries", [])
except httpx.HTTPError as exc:
raise AudiobookshelfUploadError(f"Failed to fetch libraries: {exc}") from exc
def _api_path(self, suffix: str = "") -> str:
"""Join the API prefix with the provided suffix without losing proxies."""
clean_suffix = suffix.lstrip("/")
return f"api/{clean_suffix}" if clean_suffix else "api"
def upload_audiobook(
self,
audio_path: Path,
*,
metadata: Dict[str, Any],
cover_path: Optional[Path] = None,
chapters: Optional[Iterable[Dict[str, Any]]] = None,
subtitles: Optional[Iterable[Path]] = None,
) -> Dict[str, Any]:
if not audio_path.exists():
raise AudiobookshelfUploadError(f"Audio path does not exist: {audio_path}")
form_fields = self._build_upload_fields(audio_path, metadata, chapters)
file_entries = self._build_file_entries(audio_path, cover_path, subtitles)
route = self._api_path("upload")
try:
with self._open_client() as client, ExitStack() as stack:
files_payload = self._open_file_handles(file_entries, stack)
response = client.post(route, data=form_fields, files=files_payload)
response.raise_for_status()
except httpx.HTTPStatusError as exc:
status = exc.response.status_code
detail = (exc.response.text or "").strip()
if detail:
detail = detail[:200]
message = f"Audiobookshelf upload failed with status {status}: {detail}"
else:
message = f"Audiobookshelf upload failed with status {status}"
raise AudiobookshelfUploadError(
message
) from exc
except httpx.HTTPError as exc:
raise AudiobookshelfUploadError(f"Audiobookshelf upload failed: {exc}") from exc
return {}
def _open_client(self) -> httpx.Client:
headers = {
"Authorization": f"Bearer {self._config.api_token}",
"Accept": "application/json",
}
return httpx.Client(
base_url=self._client_base_url,
headers=headers,
timeout=self._config.timeout,
verify=self._config.verify_ssl,
)
def _build_upload_fields(
self,
audio_path: Path,
metadata: Dict[str, Any],
chapters: Optional[Iterable[Dict[str, Any]]],
) -> Dict[str, str]:
folder_id, _, _ = self._ensure_folder()
title = self._extract_title(metadata, audio_path)
author = self._extract_author(metadata)
series = self._extract_series(metadata)
series_sequence = self._extract_series_sequence(metadata)
fields: Dict[str, str] = {
"library": self._config.library_id,
"folder": folder_id,
"title": title,
}
if author:
fields["author"] = author
if series:
fields["series"] = series
if series_sequence:
fields["seriesSequence"] = series_sequence
if self._config.collection_id:
fields["collectionId"] = self._config.collection_id
metadata_payload: Dict[str, Any] = metadata or {}
if chapters and self._config.send_chapters:
metadata_payload = dict(metadata_payload)
metadata_payload["chapters"] = list(chapters)
if metadata_payload:
# Ensure authors is a list of strings in the JSON payload if it exists
if "authors" in metadata_payload:
authors_val = metadata_payload["authors"]
if isinstance(authors_val, str):
metadata_payload["authors"] = [a.strip() for a in authors_val.split(",") if a.strip()]
elif isinstance(authors_val, list):
metadata_payload["authors"] = [str(a).strip() for a in authors_val if str(a).strip()]
try:
fields["metadata"] = json.dumps(metadata_payload, ensure_ascii=False)
except (TypeError, ValueError):
logger.debug("Failed to serialize Audiobookshelf metadata payload")
return fields
def _build_file_entries(
self,
audio_path: Path,
cover_path: Optional[Path],
subtitles: Optional[Iterable[Path]],
) -> List[Tuple[str, Path]]:
entries: List[Tuple[str, Path]] = [("file0", audio_path)]
index = 1
if cover_path and self._config.send_cover and cover_path.exists():
entries.append((f"file{index}", cover_path))
index += 1
if subtitles and self._config.send_subtitles:
for subtitle in subtitles:
if subtitle.exists():
entries.append((f"file{index}", subtitle))
index += 1
return entries
def _open_file_handles(
self,
entries: Sequence[Tuple[str, Path]],
stack: ExitStack,
) -> List[Tuple[str, Tuple[str, Any, str]]]:
files: List[Tuple[str, Tuple[str, Any, str]]] = []
for field_name, path in entries:
mime_type, _ = mimetypes.guess_type(path.name)
mime_type = mime_type or "application/octet-stream"
handle = stack.enter_context(path.open("rb"))
files.append((field_name, (path.name, handle, mime_type)))
return files
def find_existing_items(
self,
title: str,
*,
folder_id: Optional[str] = None,
) -> List[Mapping[str, Any]]:
normalized_title = self._normalize_title_value(title)
if not normalized_title:
return []
folder_hint = folder_id or self._config.folder_id
target_folders = set()
if folder_hint:
folder_token = str(folder_hint).strip().lower()
if folder_token:
target_folders.add(folder_token)
requests = self._candidate_search_requests(title, folder_hint)
if not requests:
return []
matches: List[Mapping[str, Any]] = []
try:
with self._open_client() as client:
for route, params in requests:
try:
response = client.get(route, params=params)
except httpx.HTTPError as exc:
logger.debug("Audiobookshelf lookup failed for %s: %s", route, exc)
continue
if response.status_code == 404:
continue
try:
response.raise_for_status()
except httpx.HTTPStatusError as exc:
status = exc.response.status_code
if status in {401, 403}:
raise AudiobookshelfUploadError(
"Audiobookshelf authentication failed while checking for existing items."
) from exc
logger.debug("Audiobookshelf lookup error %s for %s", status, route)
continue
try:
payload = response.json()
except ValueError:
continue
candidates = self._extract_candidate_items(payload)
for item in candidates:
item_title = self._normalize_item_title(item)
if not item_title or item_title != normalized_title:
continue
if target_folders:
item_folder = self._normalize_folder_id(item)
if item_folder and item_folder not in target_folders:
continue
matches.append(item)
if matches:
break
except AudiobookshelfUploadError:
raise
except Exception:
logger.debug(
"Unexpected error while checking Audiobookshelf for existing items",
exc_info=True,
)
return matches
def delete_items(self, items: Iterable[Mapping[str, Any] | str]) -> None:
to_delete: List[str] = []
for entry in items:
if isinstance(entry, Mapping):
item_id = self._extract_item_id(entry)
else:
item_id = str(entry).strip()
if item_id:
to_delete.append(item_id)
if not to_delete:
return
with self._open_client() as client:
for item_id in to_delete:
self._delete_single_item(client, item_id)
def _candidate_search_requests(
self,
title: str,
folder_id: Optional[str],
) -> List[Tuple[str, Dict[str, Any]]]:
query = (title or "").strip()
if not query:
return []
library_id = self._config.library_id
folder_token = (folder_id or self._config.folder_id or "").strip()
requests: List[Tuple[str, Dict[str, Any]]] = []
seen_routes: set[str] = set()
def _append(route: str, params: Dict[str, Any]) -> None:
if route in seen_routes:
return
seen_routes.add(route)
requests.append((route, params))
if folder_token:
_append(
self._api_path(f"folders/{folder_token}/items"),
{"library": library_id, "search": query},
)
_append(self._api_path(f"libraries/{library_id}/items"), {"search": query})
_append(self._api_path("items"), {"library": library_id, "search": query})
_append(
self._api_path("search"),
{"query": query, "library": library_id, "media": "audiobook"},
)
return requests
def _delete_single_item(self, client: httpx.Client, item_id: str) -> None:
routes = [
self._api_path(f"items/{item_id}"),
self._api_path(f"libraries/{self._config.library_id}/items/{item_id}"),
]
for route in routes:
try:
response = client.delete(route)
except httpx.HTTPError as exc:
logger.debug("Audiobookshelf delete failed for %s: %s", route, exc)
continue
if response.status_code in (200, 202, 204):
return
if response.status_code == 404:
continue
try:
response.raise_for_status()
except httpx.HTTPStatusError as exc:
raise AudiobookshelfUploadError(
f"Failed to delete Audiobookshelf item '{item_id}': {exc}"
) from exc
logger.debug("Audiobookshelf item %s could not be confirmed deleted", item_id)
def resolve_folder(self) -> Tuple[str, str, str]:
"""Return the resolved folder (id, name, library name)."""
return self._ensure_folder()
def list_folders(self) -> List[Dict[str, str]]:
"""Return all folders for the configured library."""
library_name, folders = self._load_library_metadata()
results: List[Dict[str, str]] = []
for folder in folders:
folder_id = str(folder.get("id") or "").strip()
if not folder_id:
continue
name = self._folder_display_name(folder)
path = self._select_folder_path(folder)
results.append(
{
"id": folder_id,
"name": name,
"path": path,
"library": library_name,
}
)
results.sort(key=lambda entry: (entry.get("path") or entry.get("name") or entry.get("id") or "").lower())
return results
def _ensure_folder(self) -> Tuple[str, str, str]:
if self._folder_cache:
return self._folder_cache
identifier = (self._config.folder_id or "").strip()
if not identifier:
raise AudiobookshelfUploadError(
"Audiobookshelf folder is required; enter the folder name or ID in Settings."
)
identifier_norm = self._normalize_identifier(identifier)
library_name, folders = self._load_library_metadata()
# direct ID match
for folder in folders:
folder_id = str(folder.get("id") or "").strip()
if folder_id and folder_id == identifier:
folder_name = self._folder_display_name(folder) or folder_id
self._folder_cache = (folder_id, folder_name, library_name)
return self._folder_cache
has_path_component = "/" in identifier_norm
for folder in folders:
folder_id = str(folder.get("id") or "").strip()
if not folder_id:
continue
folder_name = self._folder_display_name(folder)
name_norm = self._normalize_identifier(folder_name)
if name_norm and name_norm == identifier_norm:
self._folder_cache = (folder_id, folder_name or folder_id, library_name)
return self._folder_cache
for candidate in self._folder_path_candidates(folder):
candidate_norm = self._normalize_identifier(candidate)
if not candidate_norm:
continue
if candidate_norm == identifier_norm:
self._folder_cache = (folder_id, folder_name or folder_id, library_name)
return self._folder_cache
if has_path_component and candidate_norm.endswith(identifier_norm):
self._folder_cache = (folder_id, folder_name or folder_id, library_name)
return self._folder_cache
if not has_path_component:
tail = candidate_norm.split("/")[-1]
if tail and tail == identifier_norm:
self._folder_cache = (folder_id, folder_name or folder_id, library_name)
return self._folder_cache
raise AudiobookshelfUploadError(
f"Folder '{identifier}' was not found in library '{library_name}'. "
"Enter the folder name exactly as it appears in Audiobookshelf, a trailing path segment, or paste the folder ID."
)
def _load_library_metadata(self) -> Tuple[str, List[Mapping[str, Any]]]:
try:
with self._open_client() as client:
response = client.get(self._api_path(f"libraries/{self._config.library_id}"))
response.raise_for_status()
payload = response.json()
except httpx.HTTPStatusError as exc:
status = exc.response.status_code
if status == 404:
message = f"Audiobookshelf library '{self._config.library_id}' not found."
else:
detail = (exc.response.text or "").strip()
if detail:
detail = detail[:200]
message = (
f"Failed to load Audiobookshelf library '{self._config.library_id}' "
f"(status {status}): {detail}"
)
else:
message = (
f"Failed to load Audiobookshelf library '{self._config.library_id}' "
f"(status {status})."
)
raise AudiobookshelfUploadError(message) from exc
except httpx.HTTPError as exc:
raise AudiobookshelfUploadError(
f"Failed to reach Audiobookshelf library '{self._config.library_id}': {exc}"
) from exc
if not isinstance(payload, Mapping):
return self._config.library_id, []
library_name = str(payload.get("name") or payload.get("label") or self._config.library_id)
raw_folders = payload.get("libraryFolders") or payload.get("folders") or []
folders = [entry for entry in raw_folders if isinstance(entry, Mapping)]
return library_name, folders
@staticmethod
def _folder_path_candidates(folder: Mapping[str, Any]) -> List[str]:
candidates: List[str] = []
for key in ("fullPath", "fullpath", "path", "folderPath", "virtualPath"):
value = folder.get(key)
if isinstance(value, str) and value.strip():
candidates.append(value)
return candidates
@staticmethod
def _folder_display_name(folder: Mapping[str, Any]) -> str:
name = str(folder.get("name") or folder.get("label") or "").strip()
if name:
return name
path = AudiobookshelfClient._select_folder_path(folder)
if path:
tail = path.strip("/ ")
tail = tail.split("/")[-1] if tail else ""
if tail:
return tail
return str(folder.get("id") or "").strip()
@staticmethod
def _select_folder_path(folder: Mapping[str, Any]) -> str:
for candidate in AudiobookshelfClient._folder_path_candidates(folder):
normalized = candidate.replace("\\", "/").strip()
if normalized:
return normalized
return ""
@staticmethod
def _normalize_identifier(value: str) -> str:
token = (value or "").strip()
token = token.replace("\\", "/")
if len(token) > 1 and token[1] == ":":
token = token[2:]
token = token.strip("/ ")
return token.lower()
@staticmethod
def _normalize_title_value(value: Optional[str]) -> str:
if not isinstance(value, str):
return ""
normalized = re.sub(r"\s+", " ", value).strip()
return normalized.casefold() if normalized else ""
@staticmethod
def _normalize_item_title(item: Mapping[str, Any]) -> str:
if not isinstance(item, Mapping):
return ""
for key in ("title", "name", "label"):
candidate = item.get(key)
if isinstance(candidate, str) and candidate.strip():
return AudiobookshelfClient._normalize_title_value(candidate)
library_item = item.get("libraryItem")
if isinstance(library_item, Mapping):
return AudiobookshelfClient._normalize_item_title(library_item)
return ""
@staticmethod
def _normalize_folder_id(item: Mapping[str, Any]) -> Optional[str]:
if not isinstance(item, Mapping):
return None
for key in ("folderId", "libraryFolderId", "folder_id", "folder"):
value = item.get(key)
if isinstance(value, str) and value.strip():
return value.strip().lower()
if isinstance(value, (int, float)):
return str(value).strip().lower()
library_item = item.get("libraryItem")
if isinstance(library_item, Mapping):
return AudiobookshelfClient._normalize_folder_id(library_item)
return None
@staticmethod
def _extract_item_id(item: Mapping[str, Any]) -> Optional[str]:
if not isinstance(item, Mapping):
return None
for key in ("id", "libraryItemId", "itemId"):
value = item.get(key)
if isinstance(value, str) and value.strip():
return value.strip()
if isinstance(value, (int, float)):
return str(value).strip()
library_item = item.get("libraryItem")
if isinstance(library_item, Mapping):
return AudiobookshelfClient._extract_item_id(library_item)
return None
@staticmethod
def _extract_candidate_items(payload: Any) -> List[Mapping[str, Any]]:
items: List[Mapping[str, Any]] = []
seen_ids: set[str] = set()
visited: set[int] = set()
def _visit(obj: Any) -> None:
if isinstance(obj, Mapping):
obj_id = id(obj)
if obj_id in visited:
return
visited.add(obj_id)
title = AudiobookshelfClient._normalize_item_title(obj)
item_id = AudiobookshelfClient._extract_item_id(obj)
if title and item_id:
key = item_id.strip().lower()
if key not in seen_ids:
seen_ids.add(key)
items.append(obj)
for value in obj.values():
_visit(value)
elif isinstance(obj, list):
for entry in obj:
_visit(entry)
_visit(payload)
return items
@staticmethod
def _extract_title(metadata: Mapping[str, Any], audio_path: Path) -> str:
title = metadata.get("title") if isinstance(metadata, Mapping) else None
candidate = str(title).strip() if isinstance(title, str) else ""
if candidate:
return candidate
return audio_path.stem or audio_path.name
@staticmethod
def _extract_author(metadata: Mapping[str, Any]) -> str:
authors = metadata.get("authors") if isinstance(metadata, Mapping) else None
if isinstance(authors, str):
candidate = authors.strip()
return candidate
if isinstance(authors, Iterable) and not isinstance(authors, (str, Mapping)):
names = [str(entry).strip() for entry in authors if isinstance(entry, str) and entry.strip()]
if names:
# ABS expects a comma-separated string for multiple authors.
return ", ".join(names)
return ""
@staticmethod
def _extract_series(metadata: Mapping[str, Any]) -> str:
series_name = metadata.get("seriesName") if isinstance(metadata, Mapping) else None
if isinstance(series_name, str) and series_name.strip():
return series_name.strip()
return ""
@staticmethod
def _extract_series_sequence(metadata: Mapping[str, Any]) -> str:
if not isinstance(metadata, Mapping):
return ""
preferred_keys = (
"seriesSequence",
"series_sequence",
"seriesIndex",
"series_index",
"seriesNumber",
"series_number",
"bookNumber",
"book_number",
)
for key in preferred_keys:
if key not in metadata:
continue
normalized = AudiobookshelfClient._normalize_series_sequence(metadata.get(key))
if normalized:
return normalized
return ""
@staticmethod
def _normalize_series_sequence(raw: Any) -> str:
if raw is None:
return ""
if isinstance(raw, (int, float)):
if isinstance(raw, float) and (math.isnan(raw) or math.isinf(raw)):
return ""
text = str(raw)
else:
text = str(raw).strip()
if not text:
return ""
candidate = text.replace(",", ".")
match = re.search(r"\d+(?:\.\d+)?", candidate)
if not match:
return ""
normalized = match.group(0)
if "." in normalized:
normalized = normalized.rstrip("0").rstrip(".")
if not normalized:
normalized = "0"
return normalized
try:
return str(int(normalized))
except ValueError:
cleaned = normalized.lstrip("0")
return cleaned or "0"
from __future__ import annotations
import dataclasses
import html
import re
import unicodedata
import xml.etree.ElementTree as ET
from collections import deque
from dataclasses import dataclass, field
from pathlib import PurePosixPath
from typing import Any, Deque, Dict, Iterable, Iterator, List, Mapping, Optional, Set, Tuple, Union
from urllib.parse import quote, urljoin, urlparse
import httpx
ATOM_NS = "http://www.w3.org/2005/Atom"
OPDS_NS = "http://opds-spec.org/2010/catalog"
DC_NS = "http://purl.org/dc/terms/"
CALIBRE_CATALOG_NS = "http://calibre.kovidgoyal.net/2009/catalog"
CALIBRE_METADATA_NS = "http://calibre.kovidgoyal.net/2009/metadata"
NS = {
"atom": ATOM_NS,
"opds": OPDS_NS,
"dc": DC_NS,
"calibre": CALIBRE_CATALOG_NS,
"calibre_md": CALIBRE_METADATA_NS,
}
_TAG_STRIP_RE = re.compile(r"<[^>]+>")
_SERIES_PREFIX_RE = re.compile(r"^\s*(series|books?)\s*[:\-]\s*", re.IGNORECASE)
_SERIES_NUMBER_BRACKET_RE = re.compile(r"[\[(]\s*(?:book\s*)?(\d+(?:\.\d+)?)\s*[\])]", re.IGNORECASE)
_SERIES_NUMBER_HASH_RE = re.compile(r"#\s*(\d+(?:\.\d+)?)")
_SERIES_NUMBER_BOOK_RE = re.compile(r"\bbook\s+(\d+(?:\.\d+)?)\b", re.IGNORECASE)
_SERIES_LINE_TEXT_RE = re.compile(r"^\s*series\s*[:\-]\s*(.+)$", re.IGNORECASE)
_SUMMARY_METADATA_LINE_RE = re.compile(r"^([A-Z][A-Z0-9&/\- +'\u2019]{1,40})\s*[:\-]\s*(.+)$")
_EPUB_MIME_TYPES = {
"application/epub+zip",
"application/zip",
"application/x-zip",
"application/x-zip-compressed",
}
_SUPPORTED_DOWNLOAD_MIME_TYPES = set(_EPUB_MIME_TYPES) | {"application/pdf"}
_SUPPORTED_DOWNLOAD_EXTENSIONS = {".epub", ".pdf"}
_STOP_WORDS = {
"a", "an", "the", "and", "or", "but", "if", "then", "else", "when",
"at", "by", "for", "from", "in", "into", "of", "off", "on", "onto",
"to", "with", "is", "are", "was", "were", "be", "been", "being",
"that", "this", "these", "those", "it", "its"
}
class CalibreOPDSError(RuntimeError):
"""Raised when the Calibre OPDS client encounters an unrecoverable error."""
@dataclass
class OPDSLink:
href: str
rel: Optional[str] = None
type: Optional[str] = None
title: Optional[str] = None
def to_dict(self) -> Dict[str, Optional[str]]:
return {
"href": self.href,
"rel": self.rel,
"type": self.type,
"title": self.title,
}
@dataclass
class OPDSEntry:
id: str
title: str
position: Optional[int] = None
authors: List[str] = field(default_factory=list)
subtitle: Optional[str] = None
updated: Optional[str] = None
published: Optional[str] = None
summary: Optional[str] = None
download: Optional[OPDSLink] = None
alternate: Optional[OPDSLink] = None
thumbnail: Optional[OPDSLink] = None
links: List[OPDSLink] = field(default_factory=list)
series: Optional[str] = None
series_index: Optional[float] = None
tags: List[str] = field(default_factory=list)
rating: Optional[float] = None
rating_max: Optional[float] = None
def to_dict(self) -> Dict[str, Any]:
return {
"id": self.id,
"title": self.title,
"position": self.position,
"authors": list(self.authors),
"subtitle": self.subtitle,
"updated": self.updated,
"published": self.published,
"summary": self.summary,
"download": self.download.to_dict() if self.download else None,
"alternate": self.alternate.to_dict() if self.alternate else None,
"thumbnail": self.thumbnail.to_dict() if self.thumbnail else None,
"links": [link.to_dict() for link in self.links],
"series": self.series,
"series_index": self.series_index,
"tags": list(self.tags),
"rating": self.rating,
"rating_max": self.rating_max,
}
@dataclass
class OPDSFeed:
id: Optional[str]
title: Optional[str]
entries: List[OPDSEntry]
links: Dict[str, OPDSLink] = field(default_factory=dict)
def to_dict(self) -> Dict[str, Any]:
return {
"id": self.id,
"title": self.title,
"entries": [entry.to_dict() for entry in self.entries],
"links": {key: link.to_dict() for key, link in self.links.items()},
}
def feed_to_dict(feed: OPDSFeed) -> Dict[str, Any]:
return feed.to_dict()
@dataclass
class DownloadedResource:
filename: str
mime_type: str
content: bytes
class CalibreOPDSClient:
"""Client for interacting with a Calibre-Web OPDS catalog."""
def __init__(
self,
base_url: str,
*,
username: Optional[str] = None,
password: Optional[str] = None,
timeout: float = 15.0,
verify: bool = True,
) -> None:
if not base_url:
raise ValueError("Calibre OPDS base URL is required")
normalized = base_url.strip()
if not normalized:
raise ValueError("Calibre OPDS base URL is required")
# Store the original URL without forcing a trailing slash.
# Some servers (e.g., Booklore) return 404 for URLs with trailing slashes.
self._base_url = normalized.rstrip("/")
self._auth = None
if username:
self._auth = httpx.BasicAuth(username, password or "")
self._timeout = timeout
self._verify = verify
self._headers = {
"User-Agent": "abogen-calibre-opds/1.0",
"Accept": "application/atom+xml,application/xml;q=0.9,*/*;q=0.8",
}
@staticmethod
def _strip_html(value: Optional[str]) -> Optional[str]:
if not value:
return None
cleaned = _TAG_STRIP_RE.sub("", value)
return html.unescape(cleaned).strip() or None
def _make_url(self, href: Optional[str]) -> str:
if not href:
return self._base_url
href = href.strip()
if href.startswith("http://") or href.startswith("https://"):
return href
if href.startswith("/"):
# Absolute path - join with origin only
parsed = urlparse(self._base_url)
return f"{parsed.scheme}://{parsed.netloc}{href}"
if href.startswith("?") or href.startswith("#"):
return f"{self._base_url}{href}"
if href.startswith("./") or href.startswith("../"):
# For relative paths, we need a trailing slash for urljoin to work correctly
base_with_slash = self._base_url if self._base_url.endswith("/") else f"{self._base_url}/"
return urljoin(base_with_slash, href)
# Relative path like "search" or "catalog?page=1" - treat as sibling
base_with_slash = self._base_url if self._base_url.endswith("/") else f"{self._base_url}/"
return urljoin(base_with_slash, href)
def _open_client(self) -> httpx.Client:
return httpx.Client(
auth=self._auth,
headers=dict(self._headers),
timeout=self._timeout,
verify=self._verify,
)
def fetch_feed(self, href: Optional[str] = None, *, params: Optional[Mapping[str, Any]] = None) -> OPDSFeed:
target = self._make_url(href)
try:
with self._open_client() as client:
response = client.get(target, params=params, follow_redirects=True)
response.raise_for_status()
except httpx.HTTPStatusError as exc: # pragma: no cover - thin wrapper
raise CalibreOPDSError(f"Calibre OPDS request failed: {exc.response.status_code}") from exc
except httpx.HTTPError as exc: # pragma: no cover - thin wrapper
raise CalibreOPDSError(f"Calibre OPDS request failed: {exc}") from exc
return self._parse_feed(response.text, base_url=target)
def _fetch_opensearch_template(self, href: str) -> Optional[str]:
target = self._make_url(href)
try:
with self._open_client() as client:
response = client.get(target, follow_redirects=True)
response.raise_for_status()
# Simple XML parsing to find the Url template
# We avoid full namespace handling for robustness
root = ET.fromstring(response.text)
for node in root.iter():
if node.tag.endswith("Url"):
template = node.attrib.get("template")
if template and "{searchTerms}" in template:
mime = node.attrib.get("type", "")
# Prefer atom/xml feeds
if "atom" in mime or "xml" in mime:
return template
return None
except Exception:
return None
def _find_best_seed_feed(self, root_feed: OPDSFeed) -> OPDSFeed:
# If the root feed already has books, use it
for entry in root_feed.entries:
if any("acquisition" in (link.rel or "") for link in entry.links):
return root_feed
# Otherwise, look for a "By Title" or "All" navigation entry
candidates = ["title", "all", "books", "catalog"]
best_href = None
for entry in root_feed.entries:
title_lower = (entry.title or "").lower()
if any(c in title_lower for c in candidates):
# Check if it has a navigation link
for link in entry.links:
if self._is_navigation_link(link):
best_href = link.href
# Prefer "By Title" explicitly
if "title" in title_lower:
break
if best_href and "title" in title_lower:
break
if best_href:
try:
return self.fetch_feed(best_href)
except CalibreOPDSError:
pass
return root_feed
def search(self, query: str, start_href: Optional[str] = None) -> OPDSFeed:
cleaned = (query or "").strip()
if not cleaned:
return self.fetch_feed(start_href) if start_href else self.fetch_feed()
base_feed: Optional[OPDSFeed] = None
try:
base_feed = self.fetch_feed()
except CalibreOPDSError:
pass
# 1. Try explicit search link from feed
if base_feed:
# Check for OpenSearch description first
search_link = self._resolve_link(base_feed.links, "search")
if search_link and search_link.type == "application/opensearchdescription+xml":
template = self._fetch_opensearch_template(search_link.href)
if template:
search_url = template.replace("{searchTerms}", quote(cleaned))
try:
feed = self.fetch_feed(search_url)
if feed.entries:
filtered = self._filter_feed_entries(feed, cleaned)
if filtered.entries:
return filtered
except CalibreOPDSError:
pass
# Check for direct template
search_url = self._resolve_search_url(base_feed, cleaned)
if search_url:
try:
feed = self.fetch_feed(search_url)
if feed.entries:
filtered = self._filter_feed_entries(feed, cleaned)
if filtered.entries:
return filtered
except CalibreOPDSError:
pass
# 2. Try common guesses if explicit link failed
candidates: List[Tuple[Optional[str], Optional[Mapping[str, Any]]]] = [
("search", {"query": cleaned}),
("search", {"q": cleaned}),
(None, {"search": cleaned}),
]
last_error: Optional[Exception] = None
for path, params in candidates:
try:
feed = self.fetch_feed(path, params=params)
if feed.entries:
# Check if the server ignored the query and returned the default feed
if base_feed and feed.title == base_feed.title:
# Compare first entry ID to see if it's the same feed
if feed.entries[0].id == base_feed.entries[0].id:
continue
filtered = self._filter_feed_entries(feed, cleaned)
if filtered.entries:
return filtered
except CalibreOPDSError as exc:
last_error = exc
continue
# 3. Fallback to local search (crawling)
seed_feed: Optional[OPDSFeed] = None
if start_href:
try:
seed_feed = self.fetch_feed(start_href)
except CalibreOPDSError:
pass
if not seed_feed and base_feed:
# If we are falling back to base_feed (Root), try to find a better seed
seed_feed = self._find_best_seed_feed(base_feed)
if not seed_feed:
try:
seed_feed = self.fetch_feed()
except CalibreOPDSError as exc:
if last_error:
raise last_error
raise exc
# Heuristic: If the seed feed has acquisition links, use linear scan.
# Otherwise, use BFS to find content.
has_books = False
if seed_feed and seed_feed.entries:
for entry in seed_feed.entries[:5]:
for link in entry.links:
if "acquisition" in (link.rel or ""):
has_books = True
break
if has_books:
break
if has_books:
return self._collect_search_results(seed_feed, cleaned)
else:
return self._local_search(cleaned, seed_feed=seed_feed)
def _collect_search_results(
self,
seed_feed: OPDSFeed,
query: str,
*,
max_pages: int = 40,
) -> OPDSFeed:
normalized = (query or "").strip()
if not normalized:
return seed_feed
seen_ids: Set[str] = set()
collected: List[OPDSEntry] = []
for page in self._iter_paginated_feeds(seed_feed, max_pages=max_pages):
filtered = self._filter_feed_entries(page, normalized)
for entry in filtered.entries:
entry_id = (entry.id or "").strip()
if entry_id:
if entry_id in seen_ids:
continue
seen_ids.add(entry_id)
collected.append(entry)
return dataclasses.replace(seed_feed, entries=collected)
def _iter_paginated_feeds(self, seed_feed: OPDSFeed, *, max_pages: int = 40) -> Iterator[OPDSFeed]:
yield seed_feed
next_link = self._resolve_link(seed_feed.links, "next")
visited: Set[str] = set()
pages_examined = 0
while next_link and pages_examined < max_pages:
href = (next_link.href or "").strip()
if not href:
break
absolute = self._make_url(href)
if absolute in visited:
break
visited.add(absolute)
pages_examined += 1
try:
page = self.fetch_feed(absolute)
except CalibreOPDSError:
break
yield page
next_link = self._resolve_link(page.links, "next")
@staticmethod
def _merge_feed_entries(primary: OPDSFeed, secondary: OPDSFeed) -> OPDSFeed:
if primary is secondary or not secondary.entries:
return primary
seen_ids: Set[str] = set()
combined: List[OPDSEntry] = list(primary.entries)
for entry in primary.entries:
entry_id = (entry.id or "").strip()
if entry_id:
seen_ids.add(entry_id)
for entry in secondary.entries:
entry_id = (entry.id or "").strip()
if entry_id and entry_id in seen_ids:
continue
if entry_id:
seen_ids.add(entry_id)
combined.append(entry)
return dataclasses.replace(primary, entries=combined)
def _local_search(
self,
query: str,
*,
seed_feed: Optional[OPDSFeed] = None,
max_pages: int = 40,
) -> OPDSFeed:
normalized = (query or "").strip()
if not normalized:
return seed_feed or self.fetch_feed()
tokens = [token for token in re.split(r"\s+", normalized.lower()) if token]
if not tokens:
return seed_feed or self.fetch_feed()
start_feed = seed_feed or self.fetch_feed()
collected: List[OPDSEntry] = []
seen_match_ids: Set[str] = set()
def add_matches(feed: OPDSFeed) -> None:
filtered = self._filter_feed_entries(feed, normalized)
for entry in filtered.entries:
entry_id = (entry.id or "").strip()
if entry_id:
if entry_id in seen_match_ids:
continue
seen_match_ids.add(entry_id)
collected.append(entry)
add_matches(start_feed)
queue: Deque[str] = deque()
queued: Set[str] = set()
visited: Set[str] = set()
def is_navigation_link(rel_hint: Optional[str], link: OPDSLink) -> bool:
rel_candidates: List[str] = []
if rel_hint:
rel_candidates.append(rel_hint)
if link.rel and link.rel not in rel_candidates:
rel_candidates.append(link.rel)
rel_candidates = [(rel or "").strip().lower() for rel in rel_candidates if rel]
link_type = (link.type or "").strip().lower()
if link_type and "opds-catalog" in link_type:
return True
for rel_value in rel_candidates:
if not rel_value:
continue
if "acquisition" in rel_value:
return False
if rel_value == "self":
continue
if rel_value == "next":
return True
if rel_value in {"start", "up", "down"}:
return True
if rel_value.endswith("navigation") or rel_value.endswith("collection"):
return True
if rel_value.startswith("http://opds-spec.org/"):
if rel_value.startswith("http://opds-spec.org/group") or rel_value.startswith(
"http://opds-spec.org/sort"
):
return True
if rel_value.endswith("navigation") or rel_value.endswith("collection"):
return True
return False
def enqueue_link(link: OPDSLink, rel_hint: Optional[str] = None) -> None:
if not is_navigation_link(rel_hint, link):
return
href = (link.href or "").strip()
if not href:
return
absolute = self._make_url(href)
if absolute in queued or absolute in visited:
return
queued.add(absolute)
queue.append(absolute)
for rel_key, link in (start_feed.links or {}).items():
enqueue_link(link, rel_key)
for entry in start_feed.entries:
for link in entry.links:
enqueue_link(link, link.rel)
pages_examined = 0
while queue and pages_examined < max_pages:
href = queue.popleft()
if href in visited:
continue
visited.add(href)
pages_examined += 1
try:
feed = self.fetch_feed(href)
except CalibreOPDSError:
continue
add_matches(feed)
for rel_key, link in (feed.links or {}).items():
enqueue_link(link, rel_key)
for entry in feed.entries:
for link in entry.links:
enqueue_link(link, link.rel)
if collected:
return dataclasses.replace(start_feed, entries=collected)
return dataclasses.replace(start_feed, entries=[])
def download(self, href: str) -> DownloadedResource:
if not href:
raise ValueError("Download link missing")
target = self._make_url(href)
try:
with self._open_client() as client:
response = client.get(target, follow_redirects=True)
response.raise_for_status()
except httpx.HTTPStatusError as exc: # pragma: no cover - thin wrapper
raise CalibreOPDSError(
f"Download failed with status {exc.response.status_code}"
) from exc
except httpx.HTTPError as exc: # pragma: no cover - thin wrapper
raise CalibreOPDSError(f"Download failed: {exc}") from exc
mime_type = response.headers.get("Content-Type", "application/octet-stream").split(";")[0].strip()
filename = self._deduce_filename(response, target, mime_type)
return DownloadedResource(filename=filename, mime_type=mime_type, content=response.content)
def _deduce_filename(self, response: httpx.Response, url: str, mime_type: str) -> str:
header = response.headers.get("Content-Disposition", "")
match = re.search(r'filename="?([^";]+)"?', header)
if match:
candidate = match.group(1).strip()
if candidate:
return candidate
parsed = urlparse(url)
stem = (parsed.path or "").strip("/").split("/")[-1]
if not stem:
stem = "download"
if "." not in stem:
extension = self._extension_for_mime(mime_type)
if extension:
stem = f"{stem}{extension}"
return stem
@staticmethod
def _extension_for_mime(mime_type: str) -> str:
normalized = mime_type.lower()
if normalized in _EPUB_MIME_TYPES:
return ".epub"
if normalized == "application/pdf":
return ".pdf"
if normalized in {"text/plain", "text/html"}:
return ".txt"
return ""
def _parse_feed(self, xml_payload: str, *, base_url: str) -> OPDSFeed:
try:
root = ET.fromstring(xml_payload)
except ET.ParseError as exc:
raise CalibreOPDSError(f"Unable to parse OPDS feed: {exc}") from exc
feed_id = root.findtext("atom:id", default=None, namespaces=NS)
feed_title = root.findtext("atom:title", default=None, namespaces=NS)
links_list = self._extract_links(root.findall("atom:link", NS), base_url)
links = self._links_to_dict(links_list)
parsed_entries = [self._parse_entry(node, base_url) for node in root.findall("atom:entry", NS)]
entries: List[OPDSEntry] = []
for entry in parsed_entries:
if entry.download and self._is_supported_download(entry.download):
entries.append(entry)
continue
if self._has_navigation_link(entry):
entries.append(entry)
return OPDSFeed(id=feed_id, title=feed_title, entries=entries, links=links)
def _parse_entry(self, node: ET.Element, base_url: str) -> OPDSEntry:
entry_id = node.findtext("atom:id", default="", namespaces=NS).strip()
title = node.findtext("atom:title", default="Untitled", namespaces=NS).strip() or "Untitled"
subtitle = (
node.findtext("calibre_md:subtitle", default=None, namespaces=NS)
or node.findtext("calibre:subtitle", default=None, namespaces=NS)
or node.findtext("atom:subtitle", default=None, namespaces=NS)
)
subtitle = self._strip_html(subtitle.strip()) if subtitle else None
position_value = self._extract_position(node)
updated = node.findtext("atom:updated", default=None, namespaces=NS)
published = (
node.findtext("dc:date", default=None, namespaces=NS)
or node.findtext("atom:published", default=None, namespaces=NS)
)
if published:
published = published.strip() or None
summary_text = (
self._extract_text(node.find("atom:summary", NS))
or self._extract_text(node.find("atom:content", NS))
or self._extract_text(node.find("dc:description", NS))
)
summary_metadata: Dict[str, str] = {}
summary_body: Optional[str] = None
if summary_text:
summary_metadata, summary_body = self._split_summary_metadata(summary_text)
cleaned_summary = self._strip_html(summary_body or summary_text)
authors: List[str] = []
for author_node in node.findall("atom:author", NS):
name = author_node.findtext("atom:name", default="", namespaces=NS).strip()
if name:
authors.append(name)
if not authors:
creators = node.findall("dc:creator", NS)
for creator in creators:
value = (creator.text or "").strip()
if value:
authors.append(value)
links = node.findall("atom:link", NS)
all_links = self._extract_links(links, base_url)
link_dict = self._links_to_dict(all_links)
download_link = self._select_download_link(all_links)
alternate_link = link_dict.get("alternate")
thumb_link = link_dict.get("http://opds-spec.org/image/thumbnail") or link_dict.get(
"thumbnail"
)
series_name = (
node.findtext("calibre:series", default=None, namespaces=NS)
or node.findtext("calibre_md:series", default=None, namespaces=NS)
)
if series_name:
series_name = series_name.strip() or None
series_index_raw = (
node.findtext("calibre:series_index", default=None, namespaces=NS)
or node.findtext("calibre_md:series_index", default=None, namespaces=NS)
)
series_index: Optional[float] = None
if series_index_raw is not None:
text = str(series_index_raw).strip()
if text:
try:
series_index = float(text)
except ValueError:
match = re.search(r"\d+(?:\.\d+)?", text.replace(",", "."))
if match:
try:
series_index = float(match.group(0))
except ValueError:
series_index = None
if series_name is None or series_index is None:
category_series_name, category_series_index = self._extract_series_from_categories(
node.findall("atom:category", NS),
authors=authors,
)
if series_name is None and category_series_name:
series_name = category_series_name
if series_index is None and category_series_index is not None:
series_index = category_series_index
if (series_name is None or series_index is None) and summary_text:
text_series_name, text_series_index = self._extract_series_from_text(summary_text)
if series_name is None and text_series_name:
series_name = text_series_name
if series_index is None and text_series_index is not None:
series_index = text_series_index
tags_value = summary_metadata.get("TAGS")
tags = self._parse_tags(tags_value) if tags_value else []
rating_value = summary_metadata.get("RATING")
rating, rating_max = self._parse_rating(rating_value) if rating_value else (None, None)
return OPDSEntry(
id=entry_id or title,
title=title,
position=position_value,
authors=authors,
subtitle=subtitle,
updated=updated,
published=published,
summary=cleaned_summary,
download=download_link,
alternate=alternate_link,
thumbnail=thumb_link,
links=all_links,
series=series_name,
series_index=series_index,
tags=tags,
rating=rating,
rating_max=rating_max,
)
def _extract_series_from_categories(
self,
category_nodes: List[ET.Element],
*,
authors: Optional[List[str]] = None,
) -> tuple[Optional[str], Optional[float]]:
name: Optional[str] = None
index: Optional[float] = None
author_set = {str(author).strip().casefold() for author in (authors or []) if str(author).strip()}
for category in category_nodes:
scheme = (category.attrib.get("scheme") or "").strip().lower()
label = (category.attrib.get("label") or "").strip()
term = (category.attrib.get("term") or "").strip()
values: List[str] = []
if label:
values.append(label)
if term and term not in values:
values.append(term)
# Be conservative: category schemes are often URLs and can contain unrelated substrings.
# Also, some catalog feeds incorrectly include author names in series-like categories.
is_series_hint = self._is_series_scheme(scheme) or any("series" in value.lower() for value in values if value)
if not is_series_hint:
continue
for value in values:
if not value:
continue
candidate_name, candidate_index = self._parse_series_value(value)
if candidate_name and candidate_name.casefold() in author_set:
# Guardrail: avoid mapping the author name into series.
continue
if candidate_name and not name:
name = candidate_name
if candidate_index is not None and index is None:
index = candidate_index
if name and index is not None:
return name, index
return name, index
@staticmethod
def _is_series_scheme(scheme: str) -> bool:
cleaned = (scheme or "").strip().lower()
if not cleaned:
return False
if "author" in cleaned:
return False
return bool(re.search(r"(^|[/#:\-])series([/#:\-]|$)", cleaned))
def _parse_series_value(self, value: str) -> tuple[Optional[str], Optional[float]]:
cleaned = re.sub(r"\s+", " ", value or "").strip()
if not cleaned:
return None, None
cleaned = _SERIES_PREFIX_RE.sub("", cleaned)
working = cleaned
number: Optional[float] = None
bracket_match = _SERIES_NUMBER_BRACKET_RE.search(working)
if bracket_match:
number = self._coerce_series_index(bracket_match.group(1))
start, end = bracket_match.span()
working = (working[:start] + working[end:]).strip()
if number is None:
hash_match = _SERIES_NUMBER_HASH_RE.search(working)
if hash_match:
number = self._coerce_series_index(hash_match.group(1))
start, end = hash_match.span()
working = (working[:start] + working[end:]).strip()
if number is None:
book_match = _SERIES_NUMBER_BOOK_RE.search(working)
if book_match:
number = self._coerce_series_index(book_match.group(1))
start, end = book_match.span()
working = (working[:start] + working[end:]).strip()
name = working.strip(" -–—,:")
name = re.sub(r"\s+", " ", name).strip()
if not name:
name = None
return name, number
@staticmethod
def _extract_text(node: Optional[ET.Element]) -> Optional[str]:
if node is None:
return None
# Prefer itertext to capture nested XHTML content
parts = list(node.itertext())
if not parts:
return (node.text or "").strip() or None
combined = "".join(parts).strip()
return combined or None
def _extract_series_from_text(self, text: str) -> tuple[Optional[str], Optional[float]]:
for line in text.splitlines():
match = _SERIES_LINE_TEXT_RE.match(line)
if not match:
continue
candidate = match.group(1).strip()
if not candidate:
continue
name, number = self._parse_series_value(candidate)
if name or number is not None:
return name, number
return None, None
def _split_summary_metadata(self, text: Optional[str]) -> tuple[Dict[str, str], Optional[str]]:
metadata: Dict[str, str] = {}
if text is None:
return metadata, None
lines = text.splitlines()
index = 0
total = len(lines)
while index < total and not lines[index].strip():
index += 1
while index < total:
stripped = lines[index].strip()
if not stripped:
break
match = _SUMMARY_METADATA_LINE_RE.match(stripped)
if not match:
break
key = match.group(1).strip().upper()
value = match.group(2).strip()
if key and value:
metadata[key] = value
index += 1
remainder = "\n".join(lines[index:]).strip()
return metadata, (remainder or None)
@staticmethod
def _parse_tags(value: str) -> List[str]:
if not value:
return []
tokens = re.split(r"[;,\n]\s*", value)
cleaned: List[str] = []
seen: set[str] = set()
for token in tokens:
entry = token.strip()
if not entry:
continue
key = entry.casefold()
if key in seen:
continue
seen.add(key)
cleaned.append(entry)
return cleaned
@staticmethod
def _parse_rating(value: str) -> tuple[Optional[float], Optional[float]]:
if not value:
return None, None
text = value.strip()
if not text:
return None, None
stars = text.count("★")
half = 0.5 if "½" in text else 0.0
if stars or half:
rating = stars + half
return (rating if rating > 0 else None, 5.0)
match = re.search(r"\d+(?:\.\d+)?", text.replace(",", "."))
if match:
try:
rating_value = float(match.group(0))
except ValueError:
return None, None
return rating_value, 5.0
return None, None
@staticmethod
def _coerce_series_index(value: str) -> Optional[float]:
text = value.strip().replace(",", ".")
if not text:
return None
try:
return float(text)
except ValueError:
return None
def _extract_position(self, node: ET.Element) -> Optional[int]:
candidates = [
node.findtext("opds:position", default=None, namespaces=NS),
node.findtext("opds:groupPosition", default=None, namespaces=NS),
node.findtext("opds:order", default=None, namespaces=NS),
node.findtext("dc:identifier", default=None, namespaces=NS),
]
for value in candidates:
if value is None:
continue
text = str(value).strip()
if not text:
continue
try:
return int(float(text))
except (TypeError, ValueError):
continue
return None
def _extract_links(self, link_nodes: List[ET.Element], base_url: str) -> List[OPDSLink]:
links: List[OPDSLink] = []
for link in link_nodes:
href = link.attrib.get("href")
if not href:
continue
rel = link.attrib.get("rel")
link_type = link.attrib.get("type")
title = link.attrib.get("title")
base_for_join = base_url or self._base_url
absolute_href = urljoin(base_for_join, href)
links.append(OPDSLink(href=absolute_href, rel=rel, type=link_type, title=title))
return links
def _links_to_dict(self, links: List[OPDSLink]) -> Dict[str, OPDSLink]:
results: Dict[str, OPDSLink] = {}
for entry in links:
key = entry.rel or entry.href
if not key:
continue
# Prioritize search links with template parameters
if key == "search" and key in results:
existing = results[key]
if "{searchTerms}" in (existing.href or ""):
continue
if "{searchTerms}" in (entry.href or ""):
results[key] = entry
continue
results[key] = entry
return results
@staticmethod
def _is_supported_download(link: OPDSLink) -> bool:
mime = (link.type or "").split(";")[0].strip().lower()
if mime in _SUPPORTED_DOWNLOAD_MIME_TYPES:
return True
href = (link.href or "").strip()
if not href:
return False
parsed_path = urlparse(href).path or ""
extension = PurePosixPath(parsed_path).suffix.lower()
return extension in _SUPPORTED_DOWNLOAD_EXTENSIONS
@staticmethod
def _select_download_link(links: Mapping[str, OPDSLink] | Iterable[OPDSLink]) -> Optional[OPDSLink]:
if isinstance(links, Mapping):
iterable: List[OPDSLink] = list(links.values())
else:
iterable = list(links)
supported = [link for link in iterable if CalibreOPDSClient._is_supported_download(link)]
best: Optional[OPDSLink] = None
for link in supported:
rel = (link.rel or "").lower()
if "acquisition" not in rel:
continue
mime = (link.type or "").lower()
if mime in _EPUB_MIME_TYPES:
return link
if best is None:
best = link
if best:
return best
if supported:
return supported[0]
# No valid acquisition-style link exposed
return None
@staticmethod
def _resolve_link(links: Optional[Mapping[str, OPDSLink]], rel: str) -> Optional[OPDSLink]:
if not links:
return None
if rel in links:
return links[rel]
rel_lower = rel.lower()
for key, link in links.items():
key_lower = (key or "").strip().lower()
if key_lower == rel_lower or key_lower.endswith(rel_lower):
return link
return None
@staticmethod
def _is_navigation_link(link: OPDSLink) -> bool:
href = (link.href or "").strip()
if not href:
return False
rel = (link.rel or "").strip().lower()
link_type = (link.type or "").strip().lower()
if "acquisition" in rel:
return False
if rel == "self":
return False
if "opds-catalog" in link_type:
return True
if rel.endswith("navigation") or rel.endswith("collection"):
return True
if rel.startswith("http://opds-spec.org/sort") or rel.startswith("http://opds-spec.org/group"):
return True
return False
@staticmethod
def _has_navigation_link(entry: OPDSEntry) -> bool:
return any(CalibreOPDSClient._is_navigation_link(link) for link in entry.links)
@staticmethod
def _browse_mode_for_title(title: Optional[str]) -> str:
if not title:
return "generic"
lowered = title.lower()
if "author" in lowered:
return "author"
if "series" in lowered:
return "series"
if "title" in lowered or "book" in lowered:
return "title"
return "generic"
@staticmethod
def _strip_leading_article(text: str) -> str:
working = text.strip()
lowered = working.lower()
for article in ("the ", "a ", "an "):
if lowered.startswith(article):
return working[len(article):].strip()
return working
@staticmethod
def _alphabet_source(entry: OPDSEntry, mode: str) -> str:
if mode == "author" and entry.authors:
candidate = entry.authors[0] or ""
if "," in candidate:
return candidate.split(",", 1)[0].strip()
parts = candidate.split()
if len(parts) > 1:
return parts[-1].strip()
return candidate.strip()
if mode == "series" and entry.series:
return entry.series.strip()
if entry.title:
return entry.title.strip()
if entry.series:
return entry.series.strip()
for link in entry.links:
if link.title:
return link.title.strip()
return ""
@staticmethod
def _normalize_text(text: str) -> str:
if not text:
return ""
# Normalize unicode characters to their base form (e.g. é -> e)
normalized = unicodedata.normalize('NFKD', text).encode('ASCII', 'ignore').decode('utf-8')
return normalized.lower().strip()
@staticmethod
def _alphabet_letter_for_entry(entry: OPDSEntry, mode: str) -> Optional[str]:
source = CalibreOPDSClient._alphabet_source(entry, mode)
if not source:
return None
if mode == "title":
source = CalibreOPDSClient._strip_leading_article(source)
# Normalize to handle accents (É -> E)
normalized_source = unicodedata.normalize('NFKD', source).encode('ASCII', 'ignore').decode('utf-8')
cleaned = re.sub(r"^[^0-9A-Za-z]+", "", normalized_source)
if not cleaned:
return "#"
initial = cleaned[0]
if initial.isalpha():
return initial.upper()
if initial.isdigit():
return "#"
return "#"
@staticmethod
def _entry_matches_query(entry: OPDSEntry, tokens: List[str]) -> bool:
if not tokens:
return True
search_fragments: List[str] = []
if entry.title:
search_fragments.append(CalibreOPDSClient._normalize_text(entry.title))
if entry.series:
search_fragments.append(CalibreOPDSClient._normalize_text(entry.series))
for author in entry.authors:
cleaned = (author or "").strip()
if not cleaned:
continue
normalized_author = CalibreOPDSClient._normalize_text(cleaned)
search_fragments.append(normalized_author)
for part in re.split(r"[\s,]+", normalized_author):
part = part.strip()
if part:
search_fragments.append(part)
if not search_fragments:
return False
# Check if all tokens match at least one fragment
# Tokens are already normalized in _filter_feed_entries
return all(any(token in fragment for fragment in search_fragments) for token in tokens)
def _filter_feed_entries(self, feed: OPDSFeed, query: str) -> OPDSFeed:
normalized_query = CalibreOPDSClient._normalize_text(query)
if not normalized_query:
return feed
tokens = [token for token in re.split(r"\s+", normalized_query) if token]
if not tokens:
return feed
scored_entries = []
for entry in feed.entries:
if not self._entry_matches_query(entry, tokens):
continue
score = self._calculate_match_score(entry, tokens)
# Require a minimum score to avoid weak matches (e.g. single word in summary)
if score >= 10:
scored_entries.append((score, entry))
# Sort by score descending
scored_entries.sort(key=lambda x: x[0], reverse=True)
filtered = [e for s, e in scored_entries]
return dataclasses.replace(feed, entries=filtered)
def _estimate_letter_position(self, letter: str) -> float:
"""Estimate the relative position (0.0-1.0) of a letter in an alphabetical list."""
if letter == "#":
return 0.0
if not letter or not letter.isalpha():
return 0.0
# Approximate cumulative distribution of starting letters in English book titles
# This is a heuristic to jump closer to the target
weights = {
'A': 0.00, 'B': 0.08, 'C': 0.15, 'D': 0.22, 'E': 0.28,
'F': 0.33, 'G': 0.38, 'H': 0.43, 'I': 0.49, 'J': 0.53,
'K': 0.55, 'L': 0.58, 'M': 0.63, 'N': 0.68, 'O': 0.71,
'P': 0.75, 'Q': 0.80, 'R': 0.81, 'S': 0.85, 'T': 0.92,
'U': 0.97, 'V': 0.98, 'W': 0.99, 'X': 0.995, 'Y': 0.997, 'Z': 0.999
}
return weights.get(letter.upper(), 0.0)
def _attempt_smart_jump(self, feed: OPDSFeed, letter: str) -> Optional[OPDSFeed]:
"""
Attempt to jump to a page closer to the target letter by analyzing pagination links.
Returns a new OPDSFeed if a jump was successful, or None.
"""
first_link = self._resolve_link(feed.links, "first")
last_link = self._resolve_link(feed.links, "last")
next_link = self._resolve_link(feed.links, "next")
if not (first_link and last_link and next_link):
return None
# Try to extract offsets from URLs to determine page size and total items
# Common Calibre pattern: .../offset/0, .../offset/50
def extract_offset(href: str) -> Optional[int]:
match = re.search(r"/(\d+)/?$", href)
if match:
return int(match.group(1))
# Try query param
parsed = urlparse(href)
qs = dict(pair.split('=') for pair in parsed.query.split('&') if '=' in pair)
if 'offset' in qs:
try:
return int(qs['offset'])
except ValueError:
pass
return None
start_offset = extract_offset(first_link.href)
next_offset = extract_offset(next_link.href)
last_offset = extract_offset(last_link.href)
if start_offset is None or next_offset is None or last_offset is None:
return None
page_size = next_offset - start_offset
if page_size <= 0:
return None
# Estimate total items (last_offset is the start of the last page)
# We assume the last page is roughly half full for estimation
total_items = last_offset + (page_size // 2)
target_ratio = self._estimate_letter_position(letter)
# Aim slightly early (subtract 1-2 pages worth) to ensure we don't miss the start
target_offset = int(total_items * target_ratio)
target_offset = max(0, target_offset - (page_size * 2))
# Round to nearest page boundary
target_offset = (target_offset // page_size) * page_size
# If the jump is too small (e.g. we are already near the start), don't bother
if target_offset < (page_size * 3):
return None
# Construct the new URL
# We assume the URL structure is consistent and we can just replace the offset
# This is risky but works for standard Calibre OPDS
base_href = first_link.href
if str(start_offset) in base_href:
# Path based replacement
# Replace the last occurrence of the offset
parts = base_href.rsplit(str(start_offset), 1)
if len(parts) == 2:
new_href = f"{parts[0]}{target_offset}{parts[1]}"
try:
return self.fetch_feed(new_href)
except Exception:
return None
return None
def browse_letter(
self,
letter: str,
*,
start_href: Optional[str] = None,
max_pages: int = 40,
) -> OPDSFeed:
normalized = (letter or "").strip()
if not normalized:
return self.fetch_feed(start_href)
key = normalized.upper()
if key in {"ALL", "*"}:
return self.fetch_feed(start_href)
if key in {"0-9", "NUMERIC"}:
key = "#"
if len(key) > 1:
key = key[0]
if key != "#" and not key.isalpha():
key = "#"
base_feed = self.fetch_feed(start_href)
# Ensure we start from the beginning of the feed if possible
first_link = self._resolve_link(base_feed.links, "first") or self._resolve_link(base_feed.links, "start")
if first_link and first_link.href:
try:
# Only switch if the href is different to avoid redundant fetch
if not start_href or first_link.href != start_href:
base_feed = self.fetch_feed(first_link.href)
except CalibreOPDSError:
pass
mode = self._browse_mode_for_title(base_feed.title)
def letter_matches(entry: OPDSEntry, active_mode: str) -> bool:
letter_value = self._alphabet_letter_for_entry(entry, active_mode)
if not letter_value:
return False
if key == "#":
return letter_value == "#"
return letter_value == key
collected: List[OPDSEntry] = []
seen_ids: Set[str] = set()
letter_href: Optional[str] = None
def add_entry(entry: OPDSEntry) -> None:
entry_id = (entry.id or "").strip()
if entry_id:
if entry_id in seen_ids:
return
seen_ids.add(entry_id)
collected.append(entry)
# Check the first page for navigation links before attempting any jumps
# This handles the case where "By Title" has "A", "B", "C" folders
has_nav_links = False
for entry in base_feed.entries:
if self._has_navigation_link(entry):
has_nav_links = True
if letter_matches(entry, mode):
for link in entry.links:
if self._is_navigation_link(link):
href = (link.href or "").strip()
if href:
letter_href = href
break
if letter_href:
break
# If we didn't find a direct link, and it looks like a flat list (no nav links matching criteria),
# try to jump closer to the target letter if we are in a sorted mode
if not letter_href and mode in {"title", "author", "series"}:
jump_feed = self._attempt_smart_jump(base_feed, key)
if jump_feed:
base_feed = jump_feed
for page in self._iter_paginated_feeds(base_feed, max_pages=max_pages):
for entry in page.entries:
if not letter_matches(entry, mode):
continue
if self._has_navigation_link(entry):
if letter_href is None:
for link in entry.links:
if self._is_navigation_link(link):
href = (link.href or "").strip()
if href:
letter_href = href
break
else:
add_entry(entry)
letter_feed: Optional[OPDSFeed] = None
if letter_href:
try:
letter_feed = self.fetch_feed(letter_href)
except CalibreOPDSError:
letter_feed = None
else:
letter_mode = self._browse_mode_for_title(letter_feed.title)
for page in self._iter_paginated_feeds(letter_feed, max_pages=max_pages):
for entry in page.entries:
if not letter_matches(entry, letter_mode):
continue
if self._has_navigation_link(entry):
continue
add_entry(entry)
template = letter_feed or base_feed
if collected:
return dataclasses.replace(template, entries=collected)
return dataclasses.replace(template, entries=[])
def _resolve_search_url(self, feed: OPDSFeed, query: str) -> Optional[str]:
link = self._resolve_link(feed.links, "search")
if not link:
link = self._resolve_link(feed.links, "http://opds-spec.org/search")
if not link or not link.href:
return None
href = link.href.strip()
if "{searchTerms}" in href:
return href.replace("{searchTerms}", quote(query))
return href
def _calculate_match_score(self, entry: OPDSEntry, tokens: List[str]) -> int:
if not tokens:
return 0
score = 0
# Prepare normalized text
title = self._normalize_text(entry.title)
authors = [self._normalize_text(a) for a in entry.authors]
series = self._normalize_text(entry.series) if entry.series else ""
summary = self._normalize_text(entry.summary) if entry.summary else ""
tags = [self._normalize_text(t) for t in entry.tags]
# 1. Exact/Phrase matches
query_phrase = " ".join(tokens)
if query_phrase == title:
score += 1000
elif query_phrase in title:
score += 500
for author in authors:
if query_phrase in author:
score += 300
if query_phrase in series:
score += 200
for tag in tags:
if query_phrase == tag:
score += 100
elif query_phrase in tag:
score += 50
# 2. Token matches
# Filter out stop words unless the query is only stop words
significant_tokens = [t for t in tokens if t not in _STOP_WORDS]
if not significant_tokens:
significant_tokens = tokens
for token in significant_tokens:
token_score = 0
# Use regex for word boundary matching
# Escape token to handle special chars
token_regex = r"\b" + re.escape(token) + r"\b"
# Title: High weight
if re.search(token_regex, title):
token_score = max(token_score, 50)
elif token in title:
token_score = max(token_score, 5)
# Author: Medium-High weight
for author in authors:
if re.search(token_regex, author):
token_score = max(token_score, 40)
elif token in author:
token_score = max(token_score, 5)
# Series: Medium weight
if token in series:
if re.search(token_regex, series):
token_score = max(token_score, 30)
else:
token_score = max(token_score, 5)
# Tags: Medium weight
for tag in tags:
if re.search(token_regex, tag):
token_score = max(token_score, 30)
elif token in tag:
token_score = max(token_score, 5)
# Summary: Low weight
if token in summary:
if re.search(token_regex, summary):
# Only add if not found elsewhere? Or just add small amount?
if token_score == 0:
token_score = 15
else:
token_score += 5 # Small boost if also in description
elif token_score == 0:
token_score = 2 # Very low for substring in summary
score += token_score
return score
import gpustat
def check():
try:
stats = gpustat.new_query()
except Exception:
return False
nvidia_keywords = ["nvidia", "rtx", "gtx", "quadro", "tesla", "titan", "mx"]
for gpu in stats.gpus:
name = gpu.name.lower()
if any(keyword in name for keyword in nvidia_keywords):
return True
return False
if __name__ == "__main__":
stats = gpustat.new_query()
for gpu in stats.gpus:
print(gpu.name)
print(check())

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

from __future__ import annotations
import json
from dataclasses import dataclass
from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple
from urllib import error, parse, request
class LLMClientError(RuntimeError):
"""Raised when an LLM request fails."""
@dataclass(frozen=True)
class LLMConfiguration:
base_url: str
api_key: str
model: str
timeout: float = 30.0
def is_configured(self) -> bool:
return bool(self.base_url.strip() and self.model.strip())
@dataclass(frozen=True)
class LLMToolCall:
name: str
arguments: str
@dataclass(frozen=True)
class LLMCompletion:
content: Optional[str]
tool_calls: Tuple[LLMToolCall, ...]
_DEFAULT_HEADERS = {
"Content-Type": "application/json",
"Accept": "application/json",
}
def _normalized_base_url(base_url: str) -> str:
trimmed = (base_url or "").strip()
if not trimmed:
raise LLMClientError("LLM base URL is required")
if not trimmed.endswith("/"):
trimmed += "/"
return trimmed
def _build_url(base_url: str, path: str) -> str:
normalized = _normalized_base_url(base_url)
trimmed_path = path.lstrip("/")
parsed = parse.urlparse(normalized)
if parsed.path.rstrip("/").lower().endswith("/v1") and trimmed_path.startswith(
"v1/"
):
trimmed_path = trimmed_path[len("v1/") :]
return parse.urljoin(normalized, trimmed_path)
def _build_headers(api_key: str) -> Dict[str, str]:
headers = dict(_DEFAULT_HEADERS)
token = (api_key or "").strip()
if token and token.lower() != "ollama":
headers["Authorization"] = f"Bearer {token}"
return headers
def _perform_request(
method: str,
url: str,
*,
headers: Optional[Mapping[str, str]] = None,
payload: Optional[Mapping[str, Any]] = None,
timeout: float = 30.0,
) -> Any:
data_bytes: Optional[bytes] = None
if payload is not None:
data_bytes = json.dumps(payload).encode("utf-8")
request_headers = dict(headers or {})
req = request.Request(
url, data=data_bytes, headers=request_headers, method=method.upper()
)
try:
with request.urlopen(req, timeout=timeout) as response:
body = response.read()
except error.HTTPError as exc: # pragma: no cover - defensive network guard
message = exc.read().decode("utf-8", "ignore") if exc.fp else exc.reason
raise LLMClientError(f"LLM request failed ({exc.code}): {message}") from exc
except error.URLError as exc: # pragma: no cover - defensive network guard
raise LLMClientError(f"LLM request failed: {exc.reason}") from exc
except Exception as exc: # pragma: no cover - defensive network guard
raise LLMClientError("LLM request failed") from exc
if not body:
return None
try:
return json.loads(body.decode("utf-8"))
except json.JSONDecodeError as exc:
raise LLMClientError("LLM response was not valid JSON") from exc
def list_models(configuration: LLMConfiguration) -> List[Dict[str, str]]:
if not configuration.is_configured() and not configuration.base_url.strip():
raise LLMClientError("LLM configuration is incomplete")
url = _build_url(configuration.base_url, "v1/models")
headers = _build_headers(configuration.api_key)
payload = _perform_request(
"GET", url, headers=headers, timeout=configuration.timeout
)
if not isinstance(payload, Mapping):
raise LLMClientError("Unexpected response when listing models")
data = payload.get("data")
if not isinstance(data, list):
return []
models: List[Dict[str, str]] = []
for entry in data:
if not isinstance(entry, Mapping):
continue
identifier = str(entry.get("id") or "").strip()
if not identifier:
continue
description = str(entry.get("name") or entry.get("description") or identifier)
models.append({"id": identifier, "label": description})
return models
def generate_completion(
configuration: LLMConfiguration,
*,
system_message: str,
user_message: str,
temperature: float = 0.2,
max_tokens: Optional[int] = None,
tools: Optional[Sequence[Mapping[str, Any]]] = None,
tool_choice: Optional[Mapping[str, Any]] = None,
response_format: Optional[Mapping[str, Any]] = None,
) -> LLMCompletion:
if not configuration.is_configured():
raise LLMClientError("LLM configuration is incomplete")
url = _build_url(configuration.base_url, "v1/chat/completions")
headers = _build_headers(configuration.api_key)
payload: Dict[str, Any] = {
"model": configuration.model,
"messages": [
{"role": "system", "content": system_message},
{"role": "user", "content": user_message},
],
"temperature": temperature,
}
if max_tokens is not None:
payload["max_tokens"] = max_tokens
if tools:
payload["tools"] = list(tools)
if tool_choice:
payload["tool_choice"] = dict(tool_choice)
if response_format:
payload["response_format"] = dict(response_format)
response = _perform_request(
"POST", url, headers=headers, payload=payload, timeout=configuration.timeout
)
if not isinstance(response, Mapping):
raise LLMClientError("Unexpected response from LLM")
choices = response.get("choices")
if not isinstance(choices, list) or not choices:
raise LLMClientError("LLM response did not include choices")
first = choices[0]
if not isinstance(first, Mapping):
raise LLMClientError("LLM response choice was invalid")
message = first.get("message")
content: Optional[str] = None
tool_calls: List[LLMToolCall] = []
if isinstance(message, Mapping):
content = message.get("content")
if isinstance(content, str):
stripped = content.strip()
if stripped:
content = stripped
else:
content = None
tool_call_entries = message.get("tool_calls")
if isinstance(tool_call_entries, list):
for entry in tool_call_entries:
if not isinstance(entry, Mapping):
continue
fn = entry.get("function")
if not isinstance(fn, Mapping):
continue
name = str(fn.get("name") or "").strip()
if not name:
continue
args = fn.get("arguments", "")
if isinstance(args, (dict, list)):
arguments = json.dumps(args)
else:
arguments = str(args)
tool_calls.append(LLMToolCall(name=name, arguments=arguments))
if content:
return LLMCompletion(content=content, tool_calls=tuple(tool_calls))
text = first.get("text")
if isinstance(text, str):
stripped = text.strip()
if stripped:
content = stripped
if content or tool_calls:
return LLMCompletion(content=content, tool_calls=tuple(tool_calls))
raise LLMClientError("LLM response did not include text content")
"""Backwards-compatible entry point that now launches the web UI."""
from __future__ import annotations
import atexit
import os
import platform
import signal
import sys
from abogen.utils import load_config, prevent_sleep_end
from abogen.webui.app import main as _run_web_ui
# Configure Hugging Face Hub behaviour (mirrors legacy GUI defaults).
os.environ.setdefault("HF_HUB_DISABLE_TELEMETRY", "1")
os.environ.setdefault("HF_HUB_ETAG_TIMEOUT", "10")
os.environ.setdefault("HF_HUB_DOWNLOAD_TIMEOUT", "10")
os.environ.setdefault("HF_HUB_DISABLE_SYMLINKS_WARNING", "1")
if load_config().get("disable_kokoro_internet", False):
os.environ["HF_HUB_OFFLINE"] = "1"
# Prefer faster ROCm tuning defaults when available.
os.environ.setdefault("MIOPEN_FIND_MODE", "FAST")
os.environ.setdefault("MIOPEN_CONV_PRECISE_ROCM_TUNING", "0")
# Enable MPS GPU acceleration on Apple Silicon.
if platform.system() == "Darwin" and platform.processor() == "arm":
os.environ.setdefault("PYTORCH_ENABLE_MPS_FALLBACK", "1")
atexit.register(prevent_sleep_end)
def _cleanup_sleep(signum, _frame):
prevent_sleep_end()
sys.exit(0)
signal.signal(signal.SIGINT, _cleanup_sleep)
signal.signal(signal.SIGTERM, _cleanup_sleep)
def main() -> None:
"""Launch the Flask-based web UI."""
_run_web_ui()
if __name__ == "__main__": # pragma: no cover - manual execution hook
main()
from __future__ import annotations
import os
from dataclasses import replace
from functools import lru_cache
from typing import Any, Dict, Mapping, Optional
from abogen.kokoro_text_normalization import (
ApostropheConfig,
CONTRACTION_CATEGORY_DEFAULTS,
)
from abogen.llm_client import LLMConfiguration
from abogen.utils import load_config
DEFAULT_LLM_PROMPT = (
"You are assisting with audiobook preparation. Analyze the sentence and identify any apostrophes or "
"contractions that should be expanded for clarity. Call the apply_regex_replacements tool with precise "
"regex substitutions for only the words that need adjustment. If no changes are required, return an empty list.\n"
"Sentence: {{ sentence }}"
)
_LEGACY_REWRITE_ONLY_PROMPT = (
"You are assisting with audiobook preparation. Rewrite the provided sentence so apostrophes and "
"contractions are unambiguous for text-to-speech. Respond with only the rewritten sentence.\n"
"Sentence: {{ sentence }}\n"
"Context: {{ paragraph }}"
)
_SETTINGS_DEFAULTS: Dict[str, Any] = {
"llm_base_url": "",
"llm_api_key": "",
"llm_model": "",
"llm_timeout": 30.0,
"llm_prompt": DEFAULT_LLM_PROMPT,
"llm_context_mode": "sentence",
"normalization_numbers": True,
"normalization_numbers_year_style": "american",
"normalization_currency": True,
"normalization_footnotes": True,
"normalization_titles": True,
"normalization_terminal": True,
"normalization_phoneme_hints": True,
"normalization_caps_quotes": True,
"normalization_internet_slang": False,
"normalization_apostrophes_contractions": True,
"normalization_apostrophes_plural_possessives": True,
"normalization_apostrophes_sibilant_possessives": True,
"normalization_apostrophes_decades": True,
"normalization_apostrophes_leading_elisions": True,
"normalization_apostrophe_mode": "spacy",
"normalization_contraction_aux_be": True,
"normalization_contraction_aux_have": True,
"normalization_contraction_modal_will": True,
"normalization_contraction_modal_would": True,
"normalization_contraction_negation_not": True,
"normalization_contraction_let_us": True,
}
_CONTRACTION_SETTING_MAP: Dict[str, str] = {
"normalization_contraction_aux_be": "contraction_aux_be",
"normalization_contraction_aux_have": "contraction_aux_have",
"normalization_contraction_modal_will": "contraction_modal_will",
"normalization_contraction_modal_would": "contraction_modal_would",
"normalization_contraction_negation_not": "contraction_negation_not",
"normalization_contraction_let_us": "contraction_let_us",
}
_ENVIRONMENT_KEYS: Dict[str, str] = {
"llm_base_url": "ABOGEN_LLM_BASE_URL",
"llm_api_key": "ABOGEN_LLM_API_KEY",
"llm_model": "ABOGEN_LLM_MODEL",
"llm_timeout": "ABOGEN_LLM_TIMEOUT",
"llm_prompt": "ABOGEN_LLM_PROMPT",
"llm_context_mode": "ABOGEN_LLM_CONTEXT_MODE",
}
NORMALIZATION_SAMPLE_TEXTS: Dict[str, str] = {
"apostrophes": "I've heard the captain'll arrive by dusk, but they'd said the same yesterday.",
"numbers": "The ledger listed 1,204 outstanding debts totaling $57,890.",
"titles": "Dr. Smith met Mr. O'Leary outside St. John's Church on Jan. 4th.",
"punctuation": "Meet me at the docks tonight We'll decide then", # missing punctuation
}
@lru_cache(maxsize=1)
def _environment_defaults() -> Dict[str, Any]:
overrides: Dict[str, Any] = {}
for key, env_var in _ENVIRONMENT_KEYS.items():
default = _SETTINGS_DEFAULTS.get(key)
if default is None:
continue
value = os.environ.get(env_var)
if value is None or value == "":
continue
if isinstance(default, bool):
overrides[key] = _coerce_bool(value, default)
elif isinstance(default, float):
overrides[key] = _coerce_float(value, float(default))
else:
overrides[key] = value
return overrides
def environment_llm_defaults() -> Dict[str, Any]:
defaults = dict(_environment_defaults())
if defaults:
_apply_llm_migrations(defaults)
return defaults
def _coerce_bool(value: Any, default: bool) -> bool:
if isinstance(value, bool):
return value
if isinstance(value, str):
lowered = value.strip().lower()
if lowered in {"1", "true", "yes", "on"}:
return True
if lowered in {"0", "false", "no", "off"}:
return False
return default
def _coerce_float(value: Any, default: float) -> float:
try:
return float(value)
except (TypeError, ValueError):
return default
def _apply_llm_migrations(settings: Dict[str, Any]) -> None:
prompt_value = str(settings.get("llm_prompt") or "")
if prompt_value.strip() == _LEGACY_REWRITE_ONLY_PROMPT.strip():
settings["llm_prompt"] = DEFAULT_LLM_PROMPT
context_mode = str(settings.get("llm_context_mode") or "").strip().lower()
if context_mode != "sentence":
settings["llm_context_mode"] = "sentence"
def _extract_settings(source: Mapping[str, Any]) -> Dict[str, Any]:
env_defaults = _environment_defaults()
extracted: Dict[str, Any] = {}
for key, default in _SETTINGS_DEFAULTS.items():
if key in source:
raw_value = source.get(key)
elif key in env_defaults:
raw_value = env_defaults[key]
else:
raw_value = default
if isinstance(default, bool):
extracted[key] = _coerce_bool(raw_value, default)
elif isinstance(default, float):
extracted[key] = _coerce_float(raw_value, default)
else:
extracted[key] = (
str(raw_value or "") if isinstance(default, str) else raw_value
)
_apply_llm_migrations(extracted)
return extracted
@lru_cache(maxsize=1)
def _cached_settings() -> Dict[str, Any]:
config = load_config() or {}
return _extract_settings(config)
def get_runtime_settings() -> Dict[str, Any]:
return dict(_cached_settings())
def clear_cached_settings() -> None:
_cached_settings.cache_clear()
def build_apostrophe_config(
*,
settings: Mapping[str, Any],
base: Optional[ApostropheConfig] = None,
) -> ApostropheConfig:
config = replace(base or ApostropheConfig())
config.convert_numbers = bool(settings.get("normalization_numbers", True))
config.convert_currency = bool(settings.get("normalization_currency", True))
config.remove_footnotes = bool(settings.get("normalization_footnotes", True))
config.year_pronunciation_mode = (
str(settings.get("normalization_numbers_year_style", "american") or "")
.strip()
.lower()
)
config.add_phoneme_hints = bool(settings.get("normalization_phoneme_hints", True))
config.contraction_mode = (
"expand"
if settings.get("normalization_apostrophes_contractions", True)
else "keep"
)
config.plural_possessive_mode = (
"collapse"
if settings.get("normalization_apostrophes_plural_possessives", True)
else "keep"
)
config.sibilant_possessive_mode = (
"mark"
if settings.get("normalization_apostrophes_sibilant_possessives", True)
else "keep"
)
config.decades_mode = (
"expand" if settings.get("normalization_apostrophes_decades", True) else "keep"
)
config.leading_elision_mode = (
"expand"
if settings.get("normalization_apostrophes_leading_elisions", True)
else "keep"
)
config.ambiguous_past_modal_mode = (
"contextual" if config.contraction_mode == "expand" else "keep"
)
category_flags = dict(CONTRACTION_CATEGORY_DEFAULTS)
for setting_key, category in _CONTRACTION_SETTING_MAP.items():
default_value = bool(_SETTINGS_DEFAULTS.get(setting_key, True))
raw_value = settings.get(setting_key, default_value)
category_flags[category] = _coerce_bool(raw_value, default_value)
config.contraction_categories = category_flags
return config
def build_llm_configuration(settings: Mapping[str, Any]) -> LLMConfiguration:
return LLMConfiguration(
base_url=str(settings.get("llm_base_url") or ""),
api_key=str(settings.get("llm_api_key") or ""),
model=str(settings.get("llm_model") or ""),
timeout=_coerce_float(
settings.get("llm_timeout"), float(_SETTINGS_DEFAULTS["llm_timeout"])
),
)
def apply_overrides(
base: Mapping[str, Any], overrides: Mapping[str, Any]
) -> Dict[str, Any]:
merged: Dict[str, Any] = dict(base)
for key, value in overrides.items():
if key not in _SETTINGS_DEFAULTS:
continue
merged[key] = value
_apply_llm_migrations(merged)
return merged
"""
Pre-download dialog and worker for Abogen
This module consolidates pre-download logic for Kokoro voices and model
and spaCy language models. The code favors clarity, avoids duplication,
and handles optional dependencies gracefully.
"""
from typing import List, Optional, Tuple
import importlib
import importlib.util
from PyQt6.QtWidgets import (
QDialog,
QVBoxLayout,
QHBoxLayout,
QLabel,
QPushButton,
QSpacerItem,
QSizePolicy,
)
from PyQt6.QtCore import QThread, pyqtSignal
from abogen.constants import COLORS, VOICES_INTERNAL
from abogen.spacy_utils import SPACY_MODELS
import abogen.hf_tracker
# Helpers
def _unique_sorted_models() -> List[str]:
"""Return a sorted list of unique spaCy model package names."""
return sorted(set(SPACY_MODELS.values()))
def _is_package_installed(pkg_name: str) -> bool:
"""Return True if a package with the given name can be imported (site-packages)."""
try:
return importlib.util.find_spec(pkg_name) is not None
except Exception:
return False
# NOTE: explicit HF cache helper removed; we use try_to_load_from_cache in-scope where needed
class PreDownloadWorker(QThread):
"""Worker thread to download required models/voices.
Emits human-readable messages via `progress`. Uses `category_done` to indicate
a category (voices/model/spacy) finished successfully. Emits `error` on exception
and `finished` after all work completes.
"""
# Emit (category, status, message)
progress = pyqtSignal(str, str, str)
category_done = pyqtSignal(str)
finished = pyqtSignal()
error = pyqtSignal(str)
def __init__(self, parent=None):
super().__init__(parent)
self._cancelled = False
# repo and filenames used for Kokoro model
self._repo_id = "hexgrad/Kokoro-82M"
self._model_files = ["kokoro-v1_0.pth", "config.json"]
# Track download success per category
self._voices_success = False
self._model_success = False
self._spacy_success = False
# Suppress HF tracker warnings during downloads
self._original_emitter = abogen.hf_tracker.show_warning_signal_emitter
def cancel(self) -> None:
self._cancelled = True
def run(self) -> None:
# Suppress HF tracker warnings during downloads
abogen.hf_tracker.show_warning_signal_emitter = None
try:
self._download_kokoro_voices()
if self._cancelled:
return
if self._voices_success:
self.category_done.emit("voices")
self._download_kokoro_model()
if self._cancelled:
return
if self._model_success:
self.category_done.emit("model")
self._download_spacy_models()
if self._cancelled:
return
if self._spacy_success:
self.category_done.emit("spacy")
self.finished.emit()
except Exception as exc: # pragma: no cover - best-effort reporting
self.error.emit(str(exc))
finally:
# Restore original emitter
abogen.hf_tracker.show_warning_signal_emitter = self._original_emitter
# Kokoro voices
def _download_kokoro_voices(self) -> None:
self._voices_success = True
try:
from huggingface_hub import hf_hub_download, try_to_load_from_cache
except Exception:
self.progress.emit(
"voice", "warning", "huggingface_hub not installed, skipping voices..."
)
self._voices_success = False
return
voice_list = VOICES_INTERNAL
for idx, voice in enumerate(voice_list, start=1):
if self._cancelled:
self._voices_success = False
return
filename = f"voices/{voice}.pt"
if try_to_load_from_cache(repo_id=self._repo_id, filename=filename):
self.progress.emit(
"voice",
"installed",
f"{idx}/{len(voice_list)}: {voice} already present",
)
continue
self.progress.emit(
"voice", "downloading", f"{idx}/{len(voice_list)}: {voice}..."
)
try:
hf_hub_download(repo_id=self._repo_id, filename=filename)
self.progress.emit("voice", "downloaded", f"{voice} downloaded")
except Exception as exc:
self.progress.emit(
"voice", "warning", f"could not download {voice}: {exc}"
)
self._voices_success = False
# Kokoro model
def _download_kokoro_model(self) -> None:
self._model_success = True
try:
from huggingface_hub import hf_hub_download, try_to_load_from_cache
except Exception:
self.progress.emit(
"model", "warning", "huggingface_hub not installed, skipping model..."
)
self._model_success = False
return
for fname in self._model_files:
if self._cancelled:
self._model_success = False
return
category = "config" if fname == "config.json" else "model"
if try_to_load_from_cache(repo_id=self._repo_id, filename=fname):
self.progress.emit(
category, "installed", f"file {fname} already present"
)
continue
self.progress.emit(category, "downloading", f"file {fname}...")
try:
hf_hub_download(repo_id=self._repo_id, filename=fname)
self.progress.emit(category, "downloaded", f"file {fname} downloaded")
except Exception as exc:
self.progress.emit(
category, "warning", f"could not download file {fname}: {exc}"
)
self._model_success = False
# spaCy models
def _download_spacy_models(self) -> None:
"""Download spaCy models. Prefer missing models provided by parent.
Parent dialog will populate _spacy_models_missing during checking.
"""
self._spacy_success = True
# Determine which models to process: prefer parent-provided missing list to avoid
# re-checking everything; otherwise use the full unique list.
parent = self.parent()
models_to_process: List[str] = _unique_sorted_models()
try:
if (
parent is not None
and hasattr(parent, "_spacy_models_missing")
and parent._spacy_models_missing
):
models_to_process = list(dict.fromkeys(parent._spacy_models_missing))
except Exception:
pass
# If spaCy is not available to run the CLI, skip gracefully
try:
import spacy.cli as _spacy_cli
except Exception:
self.progress.emit(
"spacy", "warning", "spaCy not available, skipping spaCy models..."
)
self._spacy_success = False
return
for idx, model_name in enumerate(models_to_process, start=1):
if self._cancelled:
self._spacy_success = False
return
if _is_package_installed(model_name):
self.progress.emit(
"spacy",
"installed",
f"{idx}/{len(models_to_process)}: {model_name} already installed",
)
continue
self.progress.emit(
"spacy",
"downloading",
f"{idx}/{len(models_to_process)}: {model_name}...",
)
try:
_spacy_cli.download(model_name)
self.progress.emit("spacy", "downloaded", f"{model_name} downloaded")
except Exception as exc:
self.progress.emit(
"spacy", "warning", f"could not download {model_name}: {exc}"
)
self._spacy_success = False
class PreDownloadDialog(QDialog):
"""Dialog to show and control pre-download process."""
VOICE_PREFIX = "Kokoro voices: "
MODEL_PREFIX = "Kokoro model: "
CONFIG_PREFIX = "Kokoro config: "
SPACY_PREFIX = "spaCy models: "
def __init__(self, parent=None):
super().__init__(parent)
self.setWindowTitle("Pre-download Models and Voices")
self.setMinimumWidth(500)
self.worker: Optional[PreDownloadWorker] = None
self.has_missing = False
self._spacy_models_checked: List[tuple] = []
self._spacy_models_missing: List[str] = []
self._status_worker = None
# Map keywords to (label, prefix) - labels filled after UI creation
self.status_map = {
"voice": (None, self.VOICE_PREFIX),
"spacy": (None, self.SPACY_PREFIX),
"model": (None, self.MODEL_PREFIX),
"config": (None, self.CONFIG_PREFIX),
}
self.category_map = {
"voices": ["voice"],
"model": ["model", "config"],
"spacy": ["spacy"],
}
self._setup_ui()
self._start_status_check()
def _setup_ui(self) -> None:
layout = QVBoxLayout(self)
layout.setSpacing(0)
layout.setContentsMargins(15, 0, 15, 15)
desc = QLabel(
"You can pre-download all required models and voices for offline use.\n"
"This includes Kokoro voices, Kokoro model (and config), and spaCy models."
)
desc.setWordWrap(True)
layout.addWidget(desc)
# Status rows
status_layout = QVBoxLayout()
status_title = QLabel("<b>Current Status:</b>")
status_layout.addWidget(status_title)
self.voices_status = QLabel(self.VOICE_PREFIX + "⏳ Checking...")
row = QHBoxLayout()
row.addWidget(self.voices_status)
row.addStretch()
status_layout.addLayout(row)
self.model_status = QLabel(self.MODEL_PREFIX + "⏳ Checking...")
row = QHBoxLayout()
row.addWidget(self.model_status)
row.addStretch()
status_layout.addLayout(row)
self.config_status = QLabel(self.CONFIG_PREFIX + "⏳ Checking...")
row = QHBoxLayout()
row.addWidget(self.config_status)
row.addStretch()
status_layout.addLayout(row)
self.spacy_status = QLabel(self.SPACY_PREFIX + "⏳ Checking...")
row = QHBoxLayout()
row.addWidget(self.spacy_status)
row.addStretch()
status_layout.addLayout(row)
# register labels
self.status_map["voice"] = (self.voices_status, self.VOICE_PREFIX)
self.status_map["model"] = (self.model_status, self.MODEL_PREFIX)
self.status_map["config"] = (self.config_status, self.CONFIG_PREFIX)
self.status_map["spacy"] = (self.spacy_status, self.SPACY_PREFIX)
layout.addLayout(status_layout)
layout.addItem(
QSpacerItem(0, 20, QSizePolicy.Policy.Minimum, QSizePolicy.Policy.Fixed)
)
# Buttons
button_row = QHBoxLayout()
button_row.setSpacing(10)
self.download_btn = QPushButton("Download all")
self.download_btn.setMinimumWidth(100)
self.download_btn.setMinimumHeight(35)
self.download_btn.setEnabled(False)
self.download_btn.clicked.connect(self._start_download)
button_row.addWidget(self.download_btn)
self.close_btn = QPushButton("Close")
self.close_btn.setMinimumWidth(100)
self.close_btn.setMinimumHeight(35)
self.close_btn.clicked.connect(self._handle_close)
button_row.addWidget(self.close_btn)
layout.addLayout(button_row)
self.adjustSize()
# Status checking worker
class StatusCheckWorker(QThread):
voices_checked = pyqtSignal(bool, list)
model_checked = pyqtSignal(bool)
config_checked = pyqtSignal(bool)
spacy_model_checking = pyqtSignal(str)
spacy_model_result = pyqtSignal(str, bool)
spacy_checked = pyqtSignal(bool, list)
def run(self):
parent = self.parent()
if parent is None:
return
voices_ok, missing_voices = parent._check_kokoro_voices()
self.voices_checked.emit(voices_ok, missing_voices)
model_ok = parent._check_kokoro_model()
self.model_checked.emit(model_ok)
config_ok = parent._check_kokoro_config()
self.config_checked.emit(config_ok)
# Check spaCy models by package name to detect site-package installs
unique = _unique_sorted_models()
missing: List[str] = []
for name in unique:
self.spacy_model_checking.emit(name)
ok = _is_package_installed(name)
self.spacy_model_result.emit(name, ok)
if not ok:
missing.append(name)
parent._spacy_models_missing = missing
self.spacy_checked.emit(len(missing) == 0, missing)
def _start_status_check(self) -> None:
self._status_worker = self.StatusCheckWorker(self)
self._status_worker.voices_checked.connect(self._update_voices_status)
self._status_worker.model_checked.connect(self._update_model_status)
self._status_worker.config_checked.connect(self._update_config_status)
self._status_worker.spacy_model_checking.connect(self._spacy_model_checking)
self._status_worker.spacy_model_result.connect(self._spacy_model_result)
self._status_worker.spacy_checked.connect(self._update_spacy_status)
# These are initialized in __init__ to keep consistent object state
# Set checking visual state
for lbl in (
self.voices_status,
self.model_status,
self.config_status,
self.spacy_status,
):
lbl.setStyleSheet(f"color: {COLORS['ORANGE']};")
self.spacy_status.setText(self.SPACY_PREFIX + "⏳ Checking...")
self._status_worker.start()
# UI update callbacks
def _spacy_model_checking(self, name: str) -> None:
self.spacy_status.setText(f"{self.SPACY_PREFIX}Checking {name}...")
def _spacy_model_result(self, name: str, ok: bool) -> None:
self._spacy_models_checked.append((name, ok))
if not ok and name not in self._spacy_models_missing:
self._spacy_models_missing.append(name)
checked = len(self._spacy_models_checked)
missing_count = len(self._spacy_models_missing)
if missing_count:
self.spacy_status.setText(
f"{self.SPACY_PREFIX}{checked} checked, {missing_count} missing..."
)
else:
self.spacy_status.setText(f"{self.SPACY_PREFIX}{checked} checked...")
def _update_voices_status(self, ok: bool, missing: List[str]) -> None:
if ok:
self._set_status("voice", "✓ Downloaded", COLORS["GREEN"])
else:
self.has_missing = True
if missing:
self._set_status(
"voice", f"✗ Missing {len(missing)} voices", COLORS["RED"]
)
else:
self._set_status("voice", "✗ Not downloaded", COLORS["RED"])
def _update_model_status(self, ok: bool) -> None:
if ok:
self._set_status("model", "✓ Downloaded", COLORS["GREEN"])
else:
self.has_missing = True
self._set_status("model", "✗ Not downloaded", COLORS["RED"])
def _update_config_status(self, ok: bool) -> None:
if ok:
self._set_status("config", "✓ Downloaded", COLORS["GREEN"])
else:
self.has_missing = True
self._set_status("config", "✗ Not downloaded", COLORS["RED"])
def _update_spacy_status(self, ok: bool, missing: List[str]) -> None:
if ok:
self._set_status("spacy", "✓ Downloaded", COLORS["GREEN"])
else:
self.has_missing = True
if missing:
self._set_status(
"spacy", f"✗ Missing {len(missing)} model(s)", COLORS["RED"]
)
else:
self._set_status("spacy", "✗ Not downloaded", COLORS["RED"])
self.download_btn.setEnabled(self.has_missing)
def _set_status(self, key: str, text: str, color: str) -> None:
lbl, prefix = self.status_map.get(key, (None, ""))
if not lbl:
return
lbl.setText(prefix + text)
lbl.setStyleSheet(f"color: {color};")
# Helper checks
def _check_kokoro_voices(self) -> Tuple[bool, List[str]]:
"""Return (ok, missing_list) for Kokoro voices check."""
missing = []
try:
from huggingface_hub import try_to_load_from_cache
for voice in VOICES_INTERNAL:
if not try_to_load_from_cache(
repo_id="hexgrad/Kokoro-82M", filename=f"voices/{voice}.pt"
):
missing.append(voice)
except Exception:
# If HF missing, report all as missing
return False, list(VOICES_INTERNAL)
return (len(missing) == 0), missing
def _check_kokoro_model(self) -> bool:
try:
from huggingface_hub import try_to_load_from_cache
return (
try_to_load_from_cache(
repo_id="hexgrad/Kokoro-82M", filename="kokoro-v1_0.pth"
)
is not None
)
except Exception:
return False
def _check_kokoro_config(self) -> bool:
try:
from huggingface_hub import try_to_load_from_cache
return (
try_to_load_from_cache(
repo_id="hexgrad/Kokoro-82M", filename="config.json"
)
is not None
)
except Exception:
return False
def _check_spacy_models(self) -> bool:
unique = _unique_sorted_models()
missing = [m for m in unique if not _is_package_installed(m)]
self._spacy_models_missing = missing
return len(missing) == 0
# Download control
def _start_download(self) -> None:
self.download_btn.setEnabled(False)
self.download_btn.setText("Downloading...")
# mark the start of downloads; this triggers the labels
self._on_progress("system", "starting", "Processing, please wait...")
self.worker = PreDownloadWorker(self)
self.worker.progress.connect(self._on_progress)
self.worker.category_done.connect(self._on_category_done)
self.worker.finished.connect(self._on_download_finished)
self.worker.error.connect(self._on_download_error)
self.worker.start()
def _on_progress(self, category: str, status: str, message: str) -> None:
"""Map worker (category, status, message) to UI label updates.
Status is one of: 'downloading', 'installed', 'downloaded', 'warning', 'starting'.
Category is one of: 'voice', 'model', 'spacy', 'config', or 'system'.
"""
try:
# If the category targets a specific label, update directly
if category in self.status_map:
lbl, prefix = self.status_map[category]
if not lbl:
return
# Compose message and set color based on status token
full_text = prefix + message
if len(full_text) > 60:
display_text = full_text[:57] + "..."
lbl.setText(display_text)
lbl.setToolTip(full_text)
else:
lbl.setText(full_text)
lbl.setToolTip("") # Clear tooltip if not needed
if status == "downloading":
lbl.setStyleSheet(f"color: {COLORS['ORANGE']};")
elif status in ("installed", "downloaded"):
lbl.setStyleSheet(f"color: {COLORS['GREEN']};")
elif status == "warning":
lbl.setStyleSheet(f"color: {COLORS['RED']};")
elif status == "error":
lbl.setStyleSheet(f"color: {COLORS['RED']};")
return
# System-level messages
if category == "system":
if status == "starting":
for k in self.status_map:
lbl, prefix = self.status_map[k]
if lbl:
lbl.setText(prefix + "Processing, please wait...")
lbl.setStyleSheet(f"color: {COLORS['ORANGE']};")
# other system statuses don't require action
return
except Exception:
# Do not let UI thread crash on unexpected worker message
pass
def _on_category_done(self, category: str) -> None:
for key in self.category_map.get(category, []):
self._set_status(key, "✓ Downloaded", COLORS["GREEN"])
def _on_download_finished(self) -> None:
self.has_missing = False
self.download_btn.setText("Download all")
self.download_btn.setEnabled(False)
def _on_download_error(self, error_msg: str) -> None:
self.download_btn.setText("Download all")
self.download_btn.setEnabled(True)
for key in self.status_map:
self._set_status(key, f"✗ Error - {error_msg}", COLORS["RED"])
def _handle_close(self) -> None:
if self.worker and self.worker.isRunning():
self.worker.cancel()
self.worker.wait(2000)
self.accept()
def closeEvent(self, event) -> None:
if self.worker and self.worker.isRunning():
self.worker.cancel()
self.worker.wait(2000)
super().closeEvent(event)
from __future__ import annotations
import json
import sqlite3
import shutil
import threading
import time
import uuid
from pathlib import Path
from typing import Any, Dict, Iterable, List, Mapping, Optional
from .entity_analysis import normalize_token
from .utils import get_internal_cache_path, get_user_settings_dir
_DB_LOCK = threading.RLock()
_SCHEMA_VERSION = 1
def _store_path() -> Path:
try:
base_dir = Path(get_user_settings_dir())
except ModuleNotFoundError:
base_dir = Path(get_internal_cache_path("pronunciations"))
target = base_dir / "overrides.json"
target.parent.mkdir(parents=True, exist_ok=True)
return target
def _migrate_legacy_sqlite(target_json_path: Path) -> None:
try:
base_dir = Path(get_user_settings_dir())
except ModuleNotFoundError:
base_dir = Path(get_internal_cache_path("pronunciations"))
sqlite_path = base_dir / "pronunciations.db"
if not sqlite_path.exists():
return
try:
conn = sqlite3.connect(sqlite_path)
conn.row_factory = sqlite3.Row
# Check if table exists
cursor = conn.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name='overrides'"
)
if not cursor.fetchone():
conn.close()
return
cursor = conn.execute("SELECT * FROM overrides")
rows = cursor.fetchall()
data = {"version": _SCHEMA_VERSION, "overrides": {}}
for row in rows:
lang = row["language"]
if lang not in data["overrides"]:
data["overrides"][lang] = {}
entry = {
"id": str(row["id"]),
"normalized": row["normalized"],
"token": row["token"],
"language": row["language"],
"pronunciation": row["pronunciation"],
"voice": row["voice"],
"notes": row["notes"],
"context": row["context"],
"usage_count": row["usage_count"],
"created_at": row["created_at"],
"updated_at": row["updated_at"],
}
data["overrides"][lang][row["normalized"]] = entry
conn.close()
# Save to JSON
with open(target_json_path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2, ensure_ascii=False)
# Rename old DB
sqlite_path.rename(sqlite_path.with_suffix(".db.bak"))
except Exception:
pass
def _load_db() -> Dict[str, Any]:
path = _store_path()
if not path.exists():
_migrate_legacy_sqlite(path)
if not path.exists():
return {"version": _SCHEMA_VERSION, "overrides": {}}
try:
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
except (json.JSONDecodeError, OSError):
return {"version": _SCHEMA_VERSION, "overrides": {}}
def _save_db(data: Dict[str, Any]) -> None:
path = _store_path()
# Atomic write
temp_path = path.with_suffix(".tmp")
with open(temp_path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2, ensure_ascii=False)
shutil.move(str(temp_path), str(path))
def load_overrides(language: str, tokens: Iterable[str]) -> Dict[str, Dict[str, Any]]:
normalized_tokens = {normalize_token(token) for token in tokens if token}
if not normalized_tokens:
return {}
with _DB_LOCK:
db = _load_db()
lang_overrides = db.get("overrides", {}).get(language, {})
results: Dict[str, Dict[str, Any]] = {}
for normalized in normalized_tokens:
if normalized in lang_overrides:
results[normalized] = lang_overrides[normalized]
return results
def search_overrides(
language: str, query: str, *, limit: int = 15
) -> List[Dict[str, Any]]:
if not query:
return []
query = query.lower()
with _DB_LOCK:
db = _load_db()
lang_overrides = db.get("overrides", {}).get(language, {})
matches = []
for entry in lang_overrides.values():
if query in entry["normalized"] or query in entry["token"].lower():
matches.append(entry)
# Sort by usage count desc, then updated_at desc
matches.sort(
key=lambda x: (x.get("usage_count", 0), x.get("updated_at", 0)),
reverse=True,
)
return matches[:limit]
def save_override(
*,
language: str,
token: str,
pronunciation: Optional[str] = None,
voice: Optional[str] = None,
notes: Optional[str] = None,
context: Optional[str] = None,
) -> Dict[str, Any]:
normalized = normalize_token(token)
if not normalized:
raise ValueError("Provide a token to override")
timestamp = time.time()
with _DB_LOCK:
db = _load_db()
overrides = db.setdefault("overrides", {})
lang_overrides = overrides.setdefault(language, {})
existing = lang_overrides.get(normalized)
if existing:
entry = existing
entry["token"] = token
entry["pronunciation"] = pronunciation
entry["voice"] = voice
entry["notes"] = notes
entry["context"] = context
entry["updated_at"] = timestamp
else:
entry = {
"id": str(uuid.uuid4()),
"normalized": normalized,
"token": token,
"language": language,
"pronunciation": pronunciation,
"voice": voice,
"notes": notes,
"context": context,
"usage_count": 0,
"created_at": timestamp,
"updated_at": timestamp,
}
lang_overrides[normalized] = entry
_save_db(db)
return entry
def delete_override(*, language: str, token: str) -> None:
normalized = normalize_token(token)
if not normalized:
return
with _DB_LOCK:
db = _load_db()
lang_overrides = db.get("overrides", {}).get(language, {})
if normalized in lang_overrides:
del lang_overrides[normalized]
_save_db(db)
def all_overrides(language: str) -> List[Dict[str, Any]]:
with _DB_LOCK:
db = _load_db()
lang_overrides = db.get("overrides", {}).get(language, {})
results = list(lang_overrides.values())
results.sort(key=lambda x: x.get("updated_at", 0), reverse=True)
return results
def increment_usage(*, language: str, token: str, amount: int = 1) -> None:
normalized = normalize_token(token)
if not normalized:
return
with _DB_LOCK:
db = _load_db()
lang_overrides = db.get("overrides", {}).get(language, {})
if normalized in lang_overrides:
entry = lang_overrides[normalized]
entry["usage_count"] = entry.get("usage_count", 0) + amount
entry["updated_at"] = time.time()
_save_db(db)
def get_override_stats(language: str) -> Dict[str, int]:
with _DB_LOCK:
db = _load_db()
lang_overrides = db.get("overrides", {}).get(language, {})
total = len(lang_overrides)
with_pronunciation = sum(
1 for x in lang_overrides.values() if x.get("pronunciation")
)
with_voice = sum(1 for x in lang_overrides.values() if x.get("voice"))
return {
"total": total,
"filtered": total,
"with_pronunciation": with_pronunciation,
"with_voice": with_voice,
}
"""PyQt6 Desktop GUI for abogen.
This package contains the traditional PyQt6-based desktop interface.
For the web-based interface, see abogen.webui.
"""
from __future__ import annotations
import re
import base64
from bs4 import BeautifulSoup, NavigableString
from PyQt6.QtGui import QMovie
from PyQt6.QtWidgets import (
QDialog,
QTreeWidget,
QTreeWidgetItem,
QTextEdit,
QPushButton,
QVBoxLayout,
QHBoxLayout,
QDialogButtonBox,
QSplitter,
QWidget,
QCheckBox,
QTreeWidgetItemIterator,
QLabel,
QMenu,
)
from PyQt6.QtCore import (
Qt,
QThread,
pyqtSignal,
QSize,
)
from abogen.utils import (
detect_encoding,
get_resource_path,
)
from abogen.book_parser import get_book_parser
from abogen.subtitle_utils import (
clean_text,
calculate_text_length,
)
import os
import logging
import urllib.parse
import textwrap
# Setup logging
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
_HTML_TAG_PATTERN = re.compile(r"<[^>]+>")
_LEADING_DASH_PATTERN = re.compile(r"^\s*[-–—]\s*")
_LEADING_SIMPLE_DASH_PATTERN = re.compile(r"^\s*-\s*")
class HandlerDialog(QDialog):
# Class variables to remember checkbox states between dialog instances
_save_chapters_separately = False
_merge_chapters_at_end = True
_save_as_project = False # New class variable for save_as_project option
# Cache for processed book content to avoid reprocessing
# Key: (book_path, modification_time, file_type)
# Value: dict with content_texts, content_lengths, doc_content (for epub), markdown_toc (for markdown)
_content_cache = {}
class _LoaderThread(QThread):
"""Minimal QThread that runs a callable and emits an error string on exception."""
error = pyqtSignal(str)
def __init__(self, target_callable):
super().__init__()
self._target = target_callable
def run(self):
try:
self._target()
except Exception as e:
self.error.emit(str(e))
@classmethod
def clear_content_cache(cls, book_path=None):
"""Clear the content cache. If book_path is provided, only clear that book's cache."""
if book_path is None:
cls._content_cache.clear()
logging.info("Cleared all content cache")
else:
keys_to_remove = [
key for key in cls._content_cache.keys() if key[0] == book_path
]
for key in keys_to_remove:
del cls._content_cache[key]
if keys_to_remove:
logging.info(f"Cleared content cache for {os.path.basename(book_path)}")
def __init__(self, book_path, file_type=None, checked_chapters=None, parent=None):
super().__init__(parent)
# Normalize path
book_path = os.path.normpath(os.path.abspath(book_path))
self.book_path = book_path
# Initialize Parser
try:
# Factory handles file type detection if file_type is None
self.parser = get_book_parser(book_path, file_type=file_type)
# Parser loads automatically in init now
except Exception as e:
logging.error(f"Failed to initialize parser for {book_path}: {e}")
raise
# Extract book name from file path
book_name = os.path.splitext(os.path.basename(book_path))[0]
# Set window title based on file type and book name
item_type = "Chapters" if self.parser.file_type in ["epub", "markdown"] else "Pages"
self.setWindowTitle(f"Select {item_type} - {book_name}")
self.resize(1200, 900)
self._block_signals = False # Flag to prevent recursive signals
# Configure window: remove help button and allow resizing
self.setWindowFlags(
Qt.WindowType.Window
| Qt.WindowType.WindowCloseButtonHint
| Qt.WindowType.WindowMaximizeButtonHint
)
self.setWindowModality(Qt.WindowModality.NonModal)
# Initialize save chapters flags from class variables
self.save_chapters_separately = HandlerDialog._save_chapters_separately
self.merge_chapters_at_end = HandlerDialog._merge_chapters_at_end
self.save_as_project = HandlerDialog._save_as_project
# Initialize metadata dict; will be populated in _preprocess_content by the background loader
self.book_metadata = {}
# Initialize UI elements that are used in other methods
self.save_chapters_checkbox = None
self.merge_chapters_checkbox = None
# Build treeview
self.treeWidget = QTreeWidget(self)
self.treeWidget.setHeaderHidden(True)
self.treeWidget.setSelectionMode(QTreeWidget.SelectionMode.ExtendedSelection)
self.treeWidget.setContextMenuPolicy(Qt.ContextMenuPolicy.CustomContextMenu)
self.treeWidget.customContextMenuRequested.connect(self.on_tree_context_menu)
# Initialize checked_chapters set
self.checked_chapters = set(checked_chapters) if checked_chapters else set()
# For storing content and lengths (will be filled by background loader)
self.content_texts = {}
self.content_lengths = {}
# Also maintain refs for structure
self.processed_nav_structure = []
# Add a placeholder "Information" item so the tree isn't empty immediately
info_item = QTreeWidgetItem(self.treeWidget, ["Information"])
info_item.setData(0, Qt.ItemDataRole.UserRole, "info:bookinfo")
info_item.setFlags(info_item.flags() & ~Qt.ItemFlag.ItemIsUserCheckable)
font = info_item.font(0)
font.setBold(True)
info_item.setFont(0, font)
# Setup UI now so dialog appears immediately
self._setup_ui()
# Create a centered loading overlay and show it while background load runs
self._create_loading_overlay()
# Hide the main UI so only the overlay is visible initially
if getattr(self, "splitter", None) is not None:
self.splitter.setVisible(False)
self._show_loading_overlay("Loading...")
# Start background loading of book content so the dialog opens immediately
self._start_background_load()
# Hide expand/collapse decoration if there are no parent items
has_parents = False
for i in range(self.treeWidget.topLevelItemCount()):
if self.treeWidget.topLevelItem(i).childCount() > 0:
has_parents = True
break
self.treeWidget.setRootIsDecorated(has_parents)
def _create_loading_overlay(self):
"""Create a centered loading indicator with a GIF on the left and text on the right.
The indicator is added to the dialog's main layout above the splitter so
when the splitter is hidden only the indicator is visible.
"""
try:
# Container to hold gif + text and allow centering via stretches
container = QWidget(self)
container.setVisible(False)
h = QHBoxLayout(container)
h.setContentsMargins(0, 8, 0, 8)
h.setSpacing(10)
# Left: GIF label (animated)
gif_label = QLabel(container)
gif_label.setVisible(False)
loading_gif_path = get_resource_path("abogen.assets", "loading.gif")
movie = None
if loading_gif_path:
try:
movie = QMovie(loading_gif_path)
# Make GIF smaller so it doesn't dominate the text
movie.setScaledSize(QSize(25, 25))
gif_label.setMovie(movie)
gif_label.setFixedSize(25, 25)
gif_label.setVisible(True)
except Exception:
movie = None
# Right: Text label
text_label = QLabel(container)
text_label.setStyleSheet("font-size: 14pt;")
# Add stretches to center the content horizontally
h.addStretch(1)
h.addWidget(gif_label, 0, Qt.AlignmentFlag.AlignVCenter)
h.addWidget(text_label, 0, Qt.AlignmentFlag.AlignVCenter)
h.addStretch(1)
# Insert at top of main layout if present, otherwise keep as child
try:
layout = self.layout()
if layout is not None:
layout.insertWidget(0, container)
except Exception:
pass
# Store refs
self._loading_container = container
self._loading_gif_label = gif_label
self._loading_text_label = text_label
self._loading_movie = movie
except Exception:
self._loading_container = None
self._loading_gif_label = None
self._loading_text_label = None
self._loading_movie = None
def _show_loading_overlay(self, text: str):
container = getattr(self, "_loading_container", None)
text_lbl = getattr(self, "_loading_text_label", None)
movie = getattr(self, "_loading_movie", None)
gif_lbl = getattr(self, "_loading_gif_label", None)
if container is None or text_lbl is None:
return
text_lbl.setText(text)
if movie is not None and gif_lbl is not None:
try:
movie.start()
gif_lbl.setVisible(True)
except Exception:
pass
container.setVisible(True)
def _hide_loading_overlay(self):
container = getattr(self, "_loading_container", None)
movie = getattr(self, "_loading_movie", None)
if container is None:
return
if movie is not None:
try:
movie.stop()
except Exception:
pass
container.setVisible(False)
def _start_background_load(self):
"""Start a QThread that runs the preprocessing in background."""
# Start a minimal QThread which executes _preprocess_content
self._loader_thread = HandlerDialog._LoaderThread(self._preprocess_content)
self._loader_thread.finished.connect(self._on_load_finished)
self._loader_thread.error.connect(self._on_load_error)
# ensure thread instance is deleted when done
self._loader_thread.finished.connect(self._loader_thread.deleteLater)
self._loader_thread.start()
def _on_load_error(self, err_msg):
logging.error(f"Error loading book in background: {err_msg}")
if getattr(self, "previewEdit", None) is not None:
self.previewEdit.setPlainText(f"Error loading book: {err_msg}")
if getattr(self, "splitter", None) is not None:
self.splitter.setVisible(True)
self._hide_loading_overlay()
def _on_load_finished(self):
"""Called in the main thread when background loading finished."""
# Build the tree now that content_texts/content_lengths/etc. are ready
try:
# Rebuild tree based on file type
self._build_tree()
# Run auto-check if no provided checks are relevant
if not self._are_provided_checks_relevant():
self._run_auto_check()
# Connect signals (after tree exists)
self.treeWidget.currentItemChanged.connect(self.update_preview)
self.treeWidget.itemChanged.connect(self.handle_item_check)
self.treeWidget.itemChanged.connect(
lambda _: self._update_checkbox_states()
)
self.treeWidget.itemDoubleClicked.connect(self.handle_item_double_click)
# Expand and select first item
self.treeWidget.expandAll()
if self.treeWidget.topLevelItemCount() > 0:
self.treeWidget.setCurrentItem(self.treeWidget.topLevelItem(0))
self.treeWidget.setFocus()
# Update checkbox states
self._update_checkbox_states()
# Update preview for the current selection
current = self.treeWidget.currentItem()
self.update_preview(current)
except Exception as e:
logging.error(f"Error finalizing book load: {e}")
# Show the main UI and hide loading text
if getattr(self, "splitter", None) is not None:
self.splitter.setVisible(True)
self._hide_loading_overlay()
def _preprocess_content(self):
"""Pre-process content from the document"""
# Create cache key from file path, modification time, file type, and replace_single_newlines setting
try:
mod_time = os.path.getmtime(self.book_path)
except Exception:
mod_time = 0
# Include replace_single_newlines in cache key since it affects text cleaning
from abogen.utils import load_config
cfg = load_config()
replace_single_newlines = cfg.get("replace_single_newlines", True)
cache_key = (self.book_path, mod_time, self.parser.file_type, replace_single_newlines)
# Check if content is already cached
if cache_key in HandlerDialog._content_cache:
cached_data = HandlerDialog._content_cache[cache_key]
self.content_texts = cached_data["content_texts"]
self.content_lengths = cached_data["content_lengths"]
if "processed_nav_structure" in cached_data:
self.processed_nav_structure = cached_data["processed_nav_structure"]
if "book_metadata" in cached_data:
self.book_metadata = cached_data["book_metadata"]
# Apply to parser so it stays in sync if used elsewhere
self.parser.content_texts = self.content_texts
self.parser.content_lengths = self.content_lengths
self.parser.processed_nav_structure = self.processed_nav_structure
self.parser.book_metadata = self.book_metadata
logging.info(f"Using cached content for {os.path.basename(self.book_path)}")
return
# Process content if not cached
try:
self.parser.process_content(replace_single_newlines=replace_single_newlines)
self.content_texts = self.parser.content_texts
self.content_lengths = self.parser.content_lengths
self.processed_nav_structure = self.parser.processed_nav_structure
self.book_metadata = self.parser.get_metadata()
except Exception as e:
logging.error(f"Error processing content: {e}", exc_info=True)
# Handle empty/failure case
self.content_texts = {}
self.content_lengths = {}
# Cache the processed content
cache_data = {
"content_texts": self.content_texts,
"content_lengths": self.content_lengths,
"processed_nav_structure": self.processed_nav_structure,
"book_metadata": self.book_metadata,
}
HandlerDialog._content_cache[cache_key] = cache_data
logging.info(f"Cached content for {os.path.basename(self.book_path)}")
def _build_tree(self):
self.treeWidget.clear()
info_item = QTreeWidgetItem(self.treeWidget, ["Information"])
info_item.setData(0, Qt.ItemDataRole.UserRole, "info:bookinfo")
info_item.setFlags(info_item.flags() & ~Qt.ItemFlag.ItemIsUserCheckable)
font = info_item.font(0)
font.setBold(True)
info_item.setFont(0, font)
if self.processed_nav_structure:
self._build_tree_from_nav(
self.processed_nav_structure, self.treeWidget
)
else:
# If no structure found but content exists (rare fallback), list flat
for ch_id, ch_len in self.content_lengths.items():
# Simple flat list
item = QTreeWidgetItem(self.treeWidget, [ch_id])
item.setData(0, Qt.ItemDataRole.UserRole, ch_id)
item.setFlags(item.flags() | Qt.ItemFlag.ItemIsUserCheckable)
if self.content_texts.get(ch_id):
item.setCheckState(0, Qt.CheckState.Checked if ch_id in self.checked_chapters else Qt.CheckState.Unchecked)
has_parents = False
iterator = QTreeWidgetItemIterator(
self.treeWidget, QTreeWidgetItemIterator.IteratorFlag.HasChildren
)
if iterator.value():
has_parents = True
self.treeWidget.setRootIsDecorated(has_parents)
def _update_checkbox_states(self):
"""Update the checkbox states based on the current checked chapters."""
for i in range(self.treeWidget.topLevelItemCount()):
item = self.treeWidget.topLevelItem(i)
self._update_item_checkbox_state(item)
def _build_tree_from_nav(
self, nav_nodes, parent_item, seen_content_hashes=None
):
if seen_content_hashes is None:
seen_content_hashes = set()
for node in nav_nodes:
title = node.get("title", "Unknown")
src = node.get("src")
children = node.get("children", [])
item = QTreeWidgetItem(parent_item, [title])
item.setData(0, Qt.ItemDataRole.UserRole, src)
is_empty = (
src
and (src in self.content_texts)
and (not self.content_texts[src].strip())
)
is_duplicate = False
if src and src in self.content_texts and self.content_texts[src].strip():
content_hash = hash(self.content_texts[src])
if content_hash in seen_content_hashes:
is_duplicate = True
else:
seen_content_hashes.add(content_hash)
if src and not is_empty and not is_duplicate:
item.setFlags(item.flags() | Qt.ItemFlag.ItemIsUserCheckable)
is_checked = src in self.checked_chapters
item.setCheckState(
0, Qt.CheckState.Checked if is_checked else Qt.CheckState.Unchecked
)
elif is_duplicate:
# Mark as duplicate and remove checkbox
item.setText(0, f"{title} (Duplicate)")
item.setFlags(item.flags() & ~Qt.ItemFlag.ItemIsUserCheckable)
elif children:
item.setFlags(item.flags() | Qt.ItemFlag.ItemIsUserCheckable)
item.setCheckState(0, Qt.CheckState.Unchecked)
else:
item.setFlags(item.flags() & ~Qt.ItemFlag.ItemIsUserCheckable)
if children:
self._build_tree_from_nav(children, item, seen_content_hashes)
def _are_provided_checks_relevant(self):
if not self.checked_chapters:
return False
all_identifiers = set()
iterator = QTreeWidgetItemIterator(self.treeWidget)
while iterator.value():
item = iterator.value()
if item.flags() & Qt.ItemFlag.ItemIsUserCheckable:
identifier = item.data(0, Qt.ItemDataRole.UserRole)
if identifier:
all_identifiers.add(identifier)
iterator += 1
return bool(self.checked_chapters.intersection(all_identifiers))
def _setup_ui(self):
self.previewEdit = QTextEdit(self)
self.previewEdit.setReadOnly(True)
self.previewEdit.setMinimumWidth(300)
self.previewEdit.setStyleSheet("QTextEdit { border: none; }")
self.previewInfoLabel = QLabel(
'*Note: You can modify the content later using the "Edit" button in the input box or by accessing the temporary files directory through settings (if not saved in a project folder).',
self,
)
self.previewInfoLabel.setWordWrap(True)
self.previewInfoLabel.setStyleSheet(
"QLabel { color: #666; font-style: italic; }"
)
previewLayout = QVBoxLayout()
previewLayout.setContentsMargins(0, 0, 0, 0)
previewLayout.addWidget(self.previewEdit, 1)
previewLayout.addWidget(self.previewInfoLabel, 0)
rightWidget = QWidget()
rightWidget.setLayout(previewLayout)
buttons = QDialogButtonBox(
QDialogButtonBox.StandardButton.Ok | QDialogButtonBox.StandardButton.Cancel,
self,
)
buttons.accepted.connect(self.accept)
buttons.rejected.connect(self.reject)
item_type = "chapters" if self.parser.file_type in ["epub", "markdown"] else "pages"
self.auto_select_btn = QPushButton(f"Auto-select {item_type}", self)
self.auto_select_btn.clicked.connect(self.auto_select_chapters)
self.auto_select_btn.setToolTip(f"Automatically select main {item_type}")
buttons_layout = QVBoxLayout()
buttons_layout.setContentsMargins(0, 0, 0, 0)
buttons_layout.setSpacing(10)
auto_select_layout = QHBoxLayout()
auto_select_layout.addWidget(self.auto_select_btn)
buttons_layout.addLayout(auto_select_layout)
select_layout = QHBoxLayout()
self.select_all_btn = QPushButton("Select all", self)
self.select_all_btn.clicked.connect(self.select_all_chapters)
self.deselect_all_btn = QPushButton("Clear all", self)
self.deselect_all_btn.clicked.connect(self.deselect_all_chapters)
select_layout.addWidget(self.select_all_btn)
select_layout.addWidget(self.deselect_all_btn)
buttons_layout.addLayout(select_layout)
parent_layout = QHBoxLayout()
self.select_parents_btn = QPushButton("Select parents", self)
self.select_parents_btn.clicked.connect(self.select_parent_chapters)
self.deselect_parents_btn = QPushButton("Unselect parents", self)
self.deselect_parents_btn.clicked.connect(self.deselect_parent_chapters)
parent_layout.addWidget(self.select_parents_btn)
parent_layout.addWidget(self.deselect_parents_btn)
buttons_layout.addLayout(parent_layout)
expand_layout = QHBoxLayout()
self.expand_all_btn = QPushButton("Expand All", self)
self.expand_all_btn.clicked.connect(self.treeWidget.expandAll)
self.collapse_all_btn = QPushButton("Collapse All", self)
self.collapse_all_btn.clicked.connect(self.treeWidget.collapseAll)
expand_layout.addWidget(self.expand_all_btn)
expand_layout.addWidget(self.collapse_all_btn)
buttons_layout.addLayout(expand_layout)
leftLayout = QVBoxLayout()
leftLayout.setContentsMargins(0, 0, 5, 0)
leftLayout.addLayout(buttons_layout)
leftLayout.addWidget(self.treeWidget)
checkbox_text = (
"Save each chapter separately"
if self.parser.file_type in ["epub", "markdown"]
else "Save each page separately"
)
self.save_chapters_checkbox = QCheckBox(checkbox_text, self)
self.save_chapters_checkbox.setChecked(self.save_chapters_separately)
self.save_chapters_checkbox.stateChanged.connect(self.on_save_chapters_changed)
leftLayout.addWidget(self.save_chapters_checkbox)
self.merge_chapters_checkbox = QCheckBox(
"Create a merged version at the end", self
)
self.merge_chapters_checkbox.setChecked(self.merge_chapters_at_end)
self.merge_chapters_checkbox.stateChanged.connect(
self.on_merge_chapters_changed
)
leftLayout.addWidget(self.merge_chapters_checkbox)
self.save_as_project_checkbox = QCheckBox(
"Save in a project folder with metadata", self
)
self.save_as_project_checkbox.setToolTip(
"Save the converted item in a project folder with metadata files. "
"(Useful if you want to work with converted items in the future.)"
)
self.save_as_project_checkbox.setChecked(self.save_as_project)
self.save_as_project_checkbox.stateChanged.connect(
self.on_save_as_project_changed
)
leftLayout.addWidget(self.save_as_project_checkbox)
leftLayout.addWidget(buttons)
leftWidget = QWidget()
leftWidget.setLayout(leftLayout)
self.splitter = QSplitter(Qt.Orientation.Horizontal)
self.splitter.addWidget(leftWidget)
self.splitter.addWidget(rightWidget)
self.splitter.setSizes([280, 420])
mainLayout = QVBoxLayout(self)
mainLayout.addWidget(self.splitter)
self.setLayout(mainLayout)
def _update_checkbox_states(self):
if (
not hasattr(self, "save_chapters_checkbox")
or not self.save_chapters_checkbox
):
return
if (
self.parser.file_type == "pdf"
and hasattr(self, "has_pdf_bookmarks")
and not self.has_pdf_bookmarks
):
self.save_chapters_checkbox.setEnabled(False)
self.merge_chapters_checkbox.setEnabled(False)
return
checked_count = 0
if self.parser.file_type in ["epub", "markdown"]:
iterator = QTreeWidgetItemIterator(self.treeWidget)
while iterator.value():
item = iterator.value()
if (
item.flags() & Qt.ItemFlag.ItemIsUserCheckable
and item.checkState(0) == Qt.CheckState.Checked
):
checked_count += 1
if checked_count >= 2:
break
iterator += 1
else:
parent_groups = set()
iterator = QTreeWidgetItemIterator(self.treeWidget)
while iterator.value():
item = iterator.value()
if (
item.flags() & Qt.ItemFlag.ItemIsUserCheckable
and item.checkState(0) == Qt.CheckState.Checked
):
parent = item.parent()
if parent and parent != self.treeWidget.invisibleRootItem():
parent_groups.add(id(parent))
else:
parent_groups.add(id(item))
iterator += 1
checked_count = len(parent_groups)
min_groups_required = 2
self.save_chapters_checkbox.setEnabled(checked_count >= min_groups_required)
self.merge_chapters_checkbox.setEnabled(
self.save_chapters_checkbox.isEnabled()
and self.save_chapters_checkbox.isChecked()
)
def select_all_chapters(self):
self._block_signals = True
iterator = QTreeWidgetItemIterator(self.treeWidget)
while iterator.value():
item = iterator.value()
if item.flags() & Qt.ItemFlag.ItemIsUserCheckable:
item.setCheckState(0, Qt.CheckState.Checked)
iterator += 1
self._block_signals = False
self._update_checked_set_from_tree()
def deselect_all_chapters(self):
self._block_signals = True
iterator = QTreeWidgetItemIterator(self.treeWidget)
while iterator.value():
item = iterator.value()
if item.flags() & Qt.ItemFlag.ItemIsUserCheckable:
item.setCheckState(0, Qt.CheckState.Unchecked)
iterator += 1
self._block_signals = False
self._update_checked_set_from_tree()
def select_parent_chapters(self):
self._block_signals = True
iterator = QTreeWidgetItemIterator(self.treeWidget)
while iterator.value():
item = iterator.value()
if item.flags() & Qt.ItemFlag.ItemIsUserCheckable and item.childCount() > 0:
item.setCheckState(0, Qt.CheckState.Checked)
iterator += 1
self._block_signals = False
self._update_checked_set_from_tree()
def deselect_parent_chapters(self):
self._block_signals = True
iterator = QTreeWidgetItemIterator(self.treeWidget)
while iterator.value():
item = iterator.value()
if item.flags() & Qt.ItemFlag.ItemIsUserCheckable and item.childCount() > 0:
item.setCheckState(0, Qt.CheckState.Unchecked)
iterator += 1
self._block_signals = False
self._update_checked_set_from_tree()
def auto_select_chapters(self):
self._run_auto_check()
def _run_auto_check(self):
self._block_signals = True
if self.parser.file_type == "epub":
self._run_epub_auto_check()
elif self.parser.file_type == "markdown":
self._run_markdown_auto_check()
else:
self._run_pdf_auto_check()
self._block_signals = False
self._update_checked_set_from_tree()
def _run_epub_auto_check(self):
iterator = QTreeWidgetItemIterator(self.treeWidget)
while iterator.value():
item = iterator.value()
if not (item.flags() & Qt.ItemFlag.ItemIsUserCheckable):
iterator += 1
continue
src = item.data(0, Qt.ItemDataRole.UserRole)
has_significant_content = src and self.content_lengths.get(src, 0) > 1000
is_parent = item.childCount() > 0
if has_significant_content or is_parent:
item.setCheckState(0, Qt.CheckState.Checked)
if is_parent:
for i in range(item.childCount()):
child = item.child(i)
if child.flags() & Qt.ItemFlag.ItemIsUserCheckable:
child_src = child.data(0, Qt.ItemDataRole.UserRole)
child_has_content = (
child_src and self.content_lengths.get(child_src, 0) > 0
)
child_is_parent = child.childCount() > 0
if child_has_content or child_is_parent:
child.setCheckState(0, Qt.CheckState.Checked)
else:
item.setCheckState(0, Qt.CheckState.Unchecked)
iterator += 1
def _run_markdown_auto_check(self):
"""Auto-select markdown chapters with significant content"""
iterator = QTreeWidgetItemIterator(self.treeWidget)
while iterator.value():
item = iterator.value()
if not (item.flags() & Qt.ItemFlag.ItemIsUserCheckable):
iterator += 1
continue
identifier = item.data(0, Qt.ItemDataRole.UserRole)
# Select chapters with content > 500 characters or parent items
has_significant_content = (
identifier and self.content_lengths.get(identifier, 0) > 500
)
is_parent = item.childCount() > 0
if has_significant_content or is_parent:
item.setCheckState(0, Qt.CheckState.Checked)
# Also check children if this is a parent
if is_parent:
for i in range(item.childCount()):
child = item.child(i)
if child.flags() & Qt.ItemFlag.ItemIsUserCheckable:
child_identifier = child.data(0, Qt.ItemDataRole.UserRole)
child_has_content = (
child_identifier
and self.content_lengths.get(child_identifier, 0) > 0
)
child_is_parent = child.childCount() > 0
if child_has_content or child_is_parent:
child.setCheckState(0, Qt.CheckState.Checked)
else:
item.setCheckState(0, Qt.CheckState.Unchecked)
iterator += 1
def _run_pdf_auto_check(self):
iterator = QTreeWidgetItemIterator(self.treeWidget)
while iterator.value():
item = iterator.value()
if not (item.flags() & Qt.ItemFlag.ItemIsUserCheckable):
iterator += 1
continue
identifier = item.data(0, Qt.ItemDataRole.UserRole)
if not identifier:
iterator += 1
continue
# Logic: Check item if it has content (already handled by ItemIsUserCheckable flag really)
# But duplicate logic from previous implementation:
item.setCheckState(0, Qt.CheckState.Checked)
iterator += 1
def _update_checked_set_from_tree(self):
self.checked_chapters.clear()
iterator = QTreeWidgetItemIterator(self.treeWidget)
while iterator.value():
item = iterator.value()
if item.checkState(0) == Qt.CheckState.Checked:
identifier = item.data(0, Qt.ItemDataRole.UserRole)
if identifier:
self.checked_chapters.add(identifier)
iterator += 1
if hasattr(self, "save_chapters_checkbox") and self.save_chapters_checkbox:
self._update_checkbox_states()
def handle_item_check(self, item):
if self._block_signals:
return
self._block_signals = True
if item.flags() & Qt.ItemFlag.ItemIsUserCheckable:
for i in range(item.childCount()):
child = item.child(i)
if child.flags() & Qt.ItemFlag.ItemIsUserCheckable:
child.setCheckState(0, item.checkState(0))
self._block_signals = False
self._update_checked_set_from_tree()
def handle_item_double_click(self, item, column=0):
if item.flags() & Qt.ItemFlag.ItemIsUserCheckable and item.childCount() == 0:
rect = self.treeWidget.visualItemRect(item)
checkbox_width = 20
mouse_pos = self.treeWidget.mapFromGlobal(self.treeWidget.cursor().pos())
if mouse_pos.x() > rect.x() + checkbox_width:
new_state = (
Qt.CheckState.Unchecked
if item.checkState(0) == Qt.CheckState.Checked
else Qt.CheckState.Checked
)
item.setCheckState(0, new_state)
def update_preview(self, current):
if not current:
self.previewEdit.clear()
return
identifier = current.data(0, Qt.ItemDataRole.UserRole)
if identifier == "info:bookinfo":
self._display_book_info()
return
text = None
if self.parser.file_type == "epub":
text = self.content_texts.get(identifier)
else:
text = self.content_texts.get(identifier)
if text is None:
title = current.text(0)
self.previewEdit.setPlainText(
f"{title}\n\n(No content available for this item)"
)
elif not text.strip():
title = current.text(0)
self.previewEdit.setPlainText(f"{title}\n\n(This item is empty)")
else:
# Apply clean_text to preview so replace_single_newlines setting is respected
cleaned_text = clean_text(text)
self.previewEdit.setPlainText(cleaned_text)
def _display_book_info(self):
self.previewEdit.clear()
html_content = "<html><body style='font-family: Arial, sans-serif;'>"
cover_image = self.book_metadata.get("cover_image")
if cover_image:
try:
image_data = base64.b64encode(cover_image).decode("utf-8")
image_type = "jpeg"
if cover_image.startswith(b"\x89PNG"):
image_type = "png"
elif cover_image.startswith(b"GIF"):
image_type = "gif"
html_content += (
f"<div style='text-align: center; margin-bottom: 20px;'>"
)
html_content += (
f"<img src='data:image/{image_type};base64,{image_data}' "
)
html_content += "width='300' style='object-fit: contain;' /></div>"
except Exception as e:
html_content += f"<p>Error displaying cover image: {str(e)}</p>"
title = self.book_metadata.get("title")
if title:
html_content += (
f"<h2 style='text-align: center;'>{title}</h2>"
)
authors = self.book_metadata.get("authors")
if authors:
authors_text = ", ".join(authors)
html_content += f"<p style='text-align: center; font-style: italic;'>By {authors_text}</p>"
publisher = self.book_metadata.get("publisher")
pub_year = self.book_metadata.get("publication_year")
if publisher or pub_year:
pub_info = []
if publisher:
pub_info.append(f"Published by {publisher}")
if pub_year:
pub_info.append(f"Year: {pub_year}")
html_content += f"<p style='text-align: center;'>{' | '.join(pub_info)}</p>"
html_content += "<hr/>"
description = self.book_metadata.get("description")
if description:
# Use pre-compiled pattern for better performance
desc = _HTML_TAG_PATTERN.sub("", description)
html_content += f"<h3>Description:</h3><p>{desc}</p>"
if self.parser.file_type == "pdf":
# Access pdf_doc from parser if available
pdf_doc = getattr(self.parser, "pdf_doc", None)
page_count = len(pdf_doc) if pdf_doc else 0
html_content += f"<p>File type: PDF<br>Page count: {page_count}</p>"
html_content += "</body></html>"
self.previewEdit.setHtml(html_content)
def _extract_book_metadata(self):
metadata = {
"title": None,
"authors": [],
"description": None,
"cover_image": None,
"publisher": None,
"publication_year": None,
}
if self.parser.file_type == "epub":
try:
title_items = self.book.get_metadata("DC", "title")
if title_items and len(title_items) > 0:
metadata["title"] = title_items[0][0]
except Exception as e:
logging.warning(f"Error extracting title metadata: {e}")
try:
author_items = self.book.get_metadata("DC", "creator")
if author_items:
metadata["authors"] = [
author[0] for author in author_items if len(author) > 0
]
except Exception as e:
logging.warning(f"Error extracting author metadata: {e}")
try:
desc_items = self.book.get_metadata("DC", "description")
if desc_items and len(desc_items) > 0:
metadata["description"] = desc_items[0][0]
except Exception as e:
logging.warning(f"Error extracting description metadata: {e}")
try:
publisher_items = self.book.get_metadata("DC", "publisher")
if publisher_items and len(publisher_items) > 0:
metadata["publisher"] = publisher_items[0][0]
except Exception as e:
logging.warning(f"Error extracting publisher metadata: {e}")
# Try to extract publication year
try:
date_items = self.book.get_metadata("DC", "date")
if date_items and len(date_items) > 0:
date_str = date_items[0][0]
# Try to extract just the year from the date string
year_match = re.search(r"\b(19|20)\d{2}\b", date_str)
if year_match:
metadata["publication_year"] = year_match.group(0)
else:
metadata["publication_year"] = date_str
except Exception as e:
logging.warning(f"Error extracting publication date metadata: {e}")
for item in self.book.get_items_of_type(ebooklib.ITEM_COVER):
metadata["cover_image"] = item.get_content()
break
if not metadata["cover_image"]:
for item in self.book.get_items_of_type(ebooklib.ITEM_IMAGE):
if "cover" in item.get_name().lower():
metadata["cover_image"] = item.get_content()
break
elif self.parser.file_type == "markdown":
# Extract metadata from markdown frontmatter or first heading
if self.markdown_text:
# Try to extract YAML frontmatter
frontmatter_match = re.match(
r"^---\s*\n(.*?)\n---\s*\n", self.markdown_text, re.DOTALL
)
if frontmatter_match:
try:
frontmatter = frontmatter_match.group(1)
# Simple YAML-like parsing for common fields
title_match = re.search(
r"^title:\s*(.+)$",
frontmatter,
re.MULTILINE | re.IGNORECASE,
)
if title_match:
metadata["title"] = (
title_match.group(1).strip().strip("\"'")
)
author_match = re.search(
r"^author:\s*(.+)$",
frontmatter,
re.MULTILINE | re.IGNORECASE,
)
if author_match:
metadata["authors"] = [
author_match.group(1).strip().strip("\"'")
]
desc_match = re.search(
r"^description:\s*(.+)$",
frontmatter,
re.MULTILINE | re.IGNORECASE,
)
if desc_match:
metadata["description"] = (
desc_match.group(1).strip().strip("\"'")
)
date_match = re.search(
r"^date:\s*(.+)$", frontmatter, re.MULTILINE | re.IGNORECASE
)
if date_match:
date_str = date_match.group(1).strip().strip("\"'")
year_match = re.search(r"\b(19|20)\d{2}\b", date_str)
if year_match:
metadata["publication_year"] = year_match.group(0)
except Exception as e:
logging.warning(f"Error parsing markdown frontmatter: {e}")
# Fallback: use first H1 header as title if no frontmatter title
if not metadata["title"] and self.markdown_toc:
# Find the first level 1 header
first_h1 = next(
(h for h in self.markdown_toc if h["level"] == 1), None
)
if first_h1:
metadata["title"] = first_h1["name"]
else:
pdf_info = self.pdf_doc.metadata
if pdf_info:
metadata["title"] = pdf_info.get("title", None)
author = pdf_info.get("author", None)
if author:
metadata["authors"] = [author]
metadata["description"] = pdf_info.get("subject", None)
keywords = pdf_info.get("keywords", None)
if keywords:
if metadata["description"]:
metadata["description"] += f"\n\nKeywords: {keywords}"
else:
metadata["description"] = f"Keywords: {keywords}"
metadata["publisher"] = pdf_info.get("creator", None)
# Try to extract publication date from PDF metadata
if "creationDate" in pdf_info:
date_str = pdf_info["creationDate"]
year_match = re.search(r"D:(\d{4})", date_str)
if year_match:
metadata["publication_year"] = year_match.group(1)
elif "modDate" in pdf_info:
date_str = pdf_info["modDate"]
year_match = re.search(r"D:(\d{4})", date_str)
if year_match:
metadata["publication_year"] = year_match.group(1)
if len(self.pdf_doc) > 0:
try:
pix = self.pdf_doc[0].get_pixmap(matrix=fitz.Matrix(2, 2))
metadata["cover_image"] = pix.tobytes("png")
except Exception:
pass
return metadata
def get_selected_text(self):
# If a background loader thread is running, wait for it to finish to
# preserve compatibility with callers that expect content to be ready
# when they create a HandlerDialog and immediately request selected text.
try:
if (
hasattr(self, "_loader_thread")
and getattr(self, "_loader_thread") is not None
):
# Wait for thread to finish (blocks until done)
if self._loader_thread.isRunning():
self._loader_thread.wait()
except Exception:
pass
if self.parser.file_type == "epub":
return self._get_epub_selected_text()
elif self.parser.file_type == "markdown":
return self._get_markdown_selected_text()
else:
return self._get_pdf_selected_text()
def _format_metadata_tags(self):
"""Format metadata tags for insertion at the beginning of the text"""
import datetime
from abogen.utils import get_user_cache_path
metadata = self.book_metadata
filename = os.path.splitext(os.path.basename(self.book_path))[0]
current_year = str(datetime.datetime.now().year)
# Get values with fallbacks
title = metadata.get("title") or filename
authors = metadata.get("authors") or ["Unknown"]
authors_text = ", ".join(authors)
album_artist = authors_text or "Unknown"
year = (
metadata.get("publication_year") or current_year
) # Use publication year if available
# Count chapters/pages
total_chapters = len(self.checked_chapters)
chapter_text = (
f"{total_chapters} {'Chapters' if self.parser.file_type == 'epub' else 'Pages'}"
)
# Handle cover image
cover_tag = ""
if metadata.get("cover_image"):
try:
import uuid
cache_dir = get_user_cache_path()
cover_path = os.path.join(cache_dir, f"cover_{uuid.uuid4()}.jpg")
cover_path = os.path.normpath(cover_path)
with open(cover_path, "wb") as f:
f.write(metadata["cover_image"])
cover_tag = f"<<METADATA_COVER_PATH:{cover_path}>>"
except Exception as e:
logging.warning(f"Failed to save cover image: {e}")
# Format metadata tags
metadata_tags = [
f"<<METADATA_TITLE:{title}>>",
f"<<METADATA_ARTIST:{authors_text}>>",
f"<<METADATA_ALBUM:{title} ({chapter_text})>>",
f"<<METADATA_YEAR:{year}>>",
f"<<METADATA_ALBUM_ARTIST:{album_artist}>>",
f"<<METADATA_COMPOSER:Narrator>>",
f"<<METADATA_GENRE:Audiobook>>",
]
if cover_tag:
metadata_tags.append(cover_tag)
return "\n".join(metadata_tags)
def _get_markdown_selected_text(self):
"""Get selected text from markdown chapters"""
all_checked_identifiers = set()
chapter_texts = []
# Add metadata tags at the beginning
metadata_tags = self._format_metadata_tags()
item_order_counter = 0
ordered_checked_items = []
iterator = QTreeWidgetItemIterator(self.treeWidget)
while iterator.value():
item = iterator.value()
item_order_counter += 1
if item.checkState(0) == Qt.CheckState.Checked:
identifier = item.data(0, Qt.ItemDataRole.UserRole)
if identifier and identifier != "info:bookinfo":
all_checked_identifiers.add(identifier)
ordered_checked_items.append((item_order_counter, item, identifier))
iterator += 1
ordered_checked_items.sort(key=lambda x: x[0])
for order, item, identifier in ordered_checked_items:
text = self.content_texts.get(identifier)
if text and text.strip():
title = item.text(0)
# Remove leading dashes from title using pre-compiled pattern
title = _LEADING_DASH_PATTERN.sub("", title).strip()
marker = f"<<CHAPTER_MARKER:{title}>>"
chapter_texts.append(marker + "\n" + text)
full_text = metadata_tags + "\n\n" + "\n\n".join(chapter_texts)
return full_text, all_checked_identifiers
def _get_epub_selected_text(self):
all_checked_identifiers = set()
chapter_texts = []
# Add metadata tags at the beginning
metadata_tags = self._format_metadata_tags()
item_order_counter = 0
ordered_checked_items = []
iterator = QTreeWidgetItemIterator(self.treeWidget)
while iterator.value():
item = iterator.value()
item_order_counter += 1
if item.checkState(0) == Qt.CheckState.Checked:
identifier = item.data(0, Qt.ItemDataRole.UserRole)
if identifier and identifier != "info:bookinfo":
all_checked_identifiers.add(identifier)
ordered_checked_items.append((item_order_counter, item, identifier))
iterator += 1
ordered_checked_items.sort(key=lambda x: x[0])
for order, item, identifier in ordered_checked_items:
text = self.content_texts.get(identifier)
if text and text.strip():
title = item.text(0)
# Use pre-compiled pattern for better performance
title = _LEADING_DASH_PATTERN.sub("", title).strip()
marker = f"<<CHAPTER_MARKER:{title}>>"
chapter_texts.append(marker + "\n" + text)
full_text = metadata_tags + "\n\n" + "\n\n".join(chapter_texts)
return full_text, all_checked_identifiers
def _get_pdf_selected_text(self):
all_checked_identifiers = set()
included_text_ids = set()
section_titles = []
all_content = []
# Add metadata tags at the beginning
metadata_tags = self._format_metadata_tags()
pdf_has_no_bookmarks = (
hasattr(self, "has_pdf_bookmarks") and not self.has_pdf_bookmarks
)
iterator = QTreeWidgetItemIterator(self.treeWidget)
while iterator.value():
item = iterator.value()
if item.checkState(0) == Qt.CheckState.Checked:
identifier = item.data(0, Qt.ItemDataRole.UserRole)
if identifier:
all_checked_identifiers.add(identifier)
iterator += 1
if pdf_has_no_bookmarks:
sorted_page_ids = sorted(
[id for id in all_checked_identifiers if id.startswith("page_")],
key=lambda x: int(x.split("_")[1]) if x.split("_")[1].isdigit() else 0,
)
for page_id in sorted_page_ids:
if page_id not in included_text_ids:
text = self.content_texts.get(page_id, "")
if text:
all_content.append(text)
included_text_ids.add(page_id)
return (
metadata_tags + "\n\n" + "\n\n".join(all_content),
all_checked_identifiers,
)
iterator = QTreeWidgetItemIterator(self.treeWidget)
while iterator.value():
item = iterator.value()
if item.childCount() > 0:
parent_checked = item.checkState(0) == Qt.CheckState.Checked
parent_id = item.data(0, Qt.ItemDataRole.UserRole)
parent_title = item.text(0)
checked_children = []
for i in range(item.childCount()):
child = item.child(i)
child_id = child.data(0, Qt.ItemDataRole.UserRole)
if (
child.checkState(0) == Qt.CheckState.Checked
and child_id
and child_id not in included_text_ids
):
checked_children.append((child, child_id))
if parent_checked and parent_id and parent_id not in included_text_ids:
combined_text = self.content_texts.get(parent_id, "")
for child, child_id in checked_children:
child_text = self.content_texts.get(child_id, "")
if child_text:
combined_text += "\n\n" + child_text
included_text_ids.add(child_id)
if combined_text.strip():
# Use pre-compiled pattern for better performance
title = _LEADING_SIMPLE_DASH_PATTERN.sub(
"", parent_title
).strip()
marker = f"<<CHAPTER_MARKER:{title}>>"
section_titles.append((title, marker + "\n" + combined_text))
included_text_ids.add(parent_id)
elif not parent_checked and checked_children:
# Use pre-compiled pattern for better performance
title = _LEADING_SIMPLE_DASH_PATTERN.sub("", parent_title).strip()
marker = f"<<CHAPTER_MARKER:{title}>>"
for idx, (child, child_id) in enumerate(checked_children):
text = self.content_texts.get(child_id, "")
if text:
if idx == 0:
section_titles.append((title, marker + "\n" + text))
else:
section_titles.append((title, text))
included_text_ids.add(child_id)
elif item.flags() & Qt.ItemFlag.ItemIsUserCheckable:
identifier = item.data(0, Qt.ItemDataRole.UserRole)
if (
identifier
and identifier not in included_text_ids
and item.checkState(0) == Qt.CheckState.Checked
):
text = self.content_texts.get(identifier, "")
if text:
title = item.text(0)
# Use pre-compiled pattern for better performance
title = _LEADING_SIMPLE_DASH_PATTERN.sub("", title).strip()
marker = f"<<CHAPTER_MARKER:{title}>>"
section_titles.append((title, marker + "\n" + text))
included_text_ids.add(identifier)
iterator += 1
return (
metadata_tags + "\n\n" + "\n\n".join([t[1] for t in section_titles]),
all_checked_identifiers,
)
def on_save_chapters_changed(self, state):
self.save_chapters_separately = bool(state)
self.merge_chapters_checkbox.setEnabled(self.save_chapters_separately)
HandlerDialog._save_chapters_separately = self.save_chapters_separately
def on_merge_chapters_changed(self, state):
self.merge_chapters_at_end = bool(state)
HandlerDialog._merge_chapters_at_end = self.merge_chapters_at_end
def on_save_as_project_changed(self, state):
self.save_as_project = bool(state)
HandlerDialog._save_as_project = self.save_as_project
def get_save_chapters_separately(self):
return (
self.save_chapters_separately
if self.save_chapters_checkbox.isEnabled()
else False
)
def get_merge_chapters_at_end(self):
return self.merge_chapters_at_end
def get_save_as_project(self):
return self.save_as_project
def check_selected_items(self):
self.set_selected_items_checked(True)
def uncheck_selected_items(self):
self.set_selected_items_checked(False)
def set_selected_items_checked(self, state: bool):
print(f"Checking selected items: {state}")
self.treeWidget.blockSignals(True)
for item in self.treeWidget.selectedItems():
if item.flags() & Qt.ItemFlag.ItemIsUserCheckable:
item.setCheckState(
0, Qt.CheckState.Checked if state else Qt.CheckState.Unchecked
)
self.treeWidget.blockSignals(False)
self._update_checked_set_from_tree()
def on_tree_context_menu(self, pos):
item = self.treeWidget.itemAt(pos)
# multi-select context menu
if self.treeWidget.selectedItems() and len(self.treeWidget.selectedItems()) > 1:
menu = QMenu(self)
action = menu.addAction("Select")
action.triggered.connect(self.check_selected_items)
action = menu.addAction("Clear")
action.triggered.connect(self.uncheck_selected_items)
menu.exec(self.treeWidget.mapToGlobal(pos))
return
if (
not item
or item.childCount() == 0
or not (item.flags() & Qt.ItemFlag.ItemIsUserCheckable)
):
return
menu = QMenu(self)
checked = item.checkState(0) == Qt.CheckState.Checked
text = "Unselect only this" if checked else "Select only this"
action = menu.addAction(text)
def do_toggle():
self.treeWidget.blockSignals(True)
new_state = Qt.CheckState.Unchecked if checked else Qt.CheckState.Checked
item.setCheckState(0, new_state)
self.treeWidget.blockSignals(False)
self._update_checked_set_from_tree()
action.triggered.connect(do_toggle)
menu.exec(self.treeWidget.mapToGlobal(pos))

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is too big to display

import os
import sys
import platform
import atexit
import signal
from abogen.utils import get_resource_path, load_config, prevent_sleep_end
# Fix PyTorch DLL loading issue ([WinError 1114]) on Windows before importing PyQt6
if platform.system() == "Windows":
import ctypes
from importlib.util import find_spec
try:
if (
(spec := find_spec("torch"))
and spec.origin
and os.path.exists(
dll_path := os.path.join(os.path.dirname(spec.origin), "lib", "c10.dll")
)
):
ctypes.CDLL(os.path.normpath(dll_path))
except Exception:
pass
# Qt platform plugin detection (fixes #59)
try:
from PyQt6.QtCore import QLibraryInfo
# Get the path to the plugins directory
plugins = QLibraryInfo.path(QLibraryInfo.LibraryPath.PluginsPath)
# Normalize path to use the OS-native separators and absolute path
platform_dir = os.path.normpath(os.path.join(plugins, "platforms"))
# Ensure we work with an absolute path for clarity
platform_dir = os.path.abspath(platform_dir)
if os.path.isdir(platform_dir):
os.environ["QT_QPA_PLATFORM_PLUGIN_PATH"] = platform_dir
print("QT_QPA_PLATFORM_PLUGIN_PATH set to:", platform_dir)
else:
print("PyQt6 platform plugins not found at", platform_dir)
except ImportError:
print("PyQt6 not installed.")
# Pre-load "libxcb-cursor" on Linux (fixes #101)
if platform.system() == "Linux":
arch = platform.machine().lower()
lib_filename = {"x86_64": "libxcb-cursor-amd64.so.0", "amd64": "libxcb-cursor-amd64.so.0", "aarch64": "libxcb-cursor-arm64.so.0", "arm64": "libxcb-cursor-arm64.so.0"}.get(arch)
if lib_filename:
import ctypes
try:
# Try to load the system libxcb-cursor.so.0 first
ctypes.CDLL('libxcb-cursor.so.0', mode=ctypes.RTLD_GLOBAL)
except OSError:
# System lib not available, load the bundled version
lib_path = get_resource_path('abogen.libs', lib_filename)
if lib_path:
try:
ctypes.CDLL(lib_path, mode=ctypes.RTLD_GLOBAL)
except OSError:
# If it fails (e.g. wrong glibc version on very old systems),
# we simply ignore it and hope the system has the library.
pass
# Set application ID for Windows taskbar icon
if platform.system() == "Windows":
try:
from abogen.constants import PROGRAM_NAME, VERSION
import ctypes
app_id = f"{PROGRAM_NAME}.{VERSION}"
ctypes.windll.shell32.SetCurrentProcessExplicitAppUserModelID(app_id)
except Exception as e:
print("Warning: failed to set AppUserModelID:", e)
from PyQt6.QtWidgets import QApplication
from PyQt6.QtGui import QIcon
from PyQt6.QtCore import (
QLibraryInfo,
qInstallMessageHandler,
QtMsgType,
)
# Add the directory to Python path
sys.path.insert(0, os.path.join(os.path.dirname(__file__)))
# Set Hugging Face Hub environment variables
os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1" # Disable Hugging Face telemetry
os.environ["HF_HUB_ETAG_TIMEOUT"] = "10" # Metadata request timeout (seconds)
os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "10" # File download timeout (seconds)
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1" # Disable symlinks warning
if load_config().get("disable_kokoro_internet", False):
print("INFO: Kokoro's internet access is disabled.")
os.environ["HF_HUB_OFFLINE"] = "1" # Disable Hugging Face Hub internet access
from abogen.pyqt.gui import abogen
from abogen.constants import PROGRAM_NAME, VERSION
# Set environment variables for AMD ROCm
os.environ["MIOPEN_FIND_MODE"] = "FAST"
os.environ["MIOPEN_CONV_PRECISE_ROCM_TUNING"] = "0"
# Reset sleep states
atexit.register(prevent_sleep_end)
# Also handle signals (Ctrl+C, kill, etc.)
def _cleanup_sleep(signum, frame):
prevent_sleep_end()
sys.exit(0)
signal.signal(signal.SIGINT, _cleanup_sleep)
signal.signal(signal.SIGTERM, _cleanup_sleep)
# Ensure sys.stdout and sys.stderr are valid in GUI mode
if sys.stdout is None:
sys.stdout = open(os.devnull, "w")
if sys.stderr is None:
sys.stderr = open(os.devnull, "w")
# Enable MPS GPU acceleration on Mac Apple Silicon
if platform.system() == "Darwin" and platform.processor() == "arm":
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
# Custom message handler to filter out specific Qt warnings
def qt_message_handler(mode, context, message):
# In PyQt6, the mode is an enum, so we compare with the enum members
if "Wayland does not support QWindow::requestActivate()" in message:
return # Suppress this specific message
if "setGrabPopup called with a parent, QtWaylandClient" in message:
return
if mode == QtMsgType.QtWarningMsg:
print(f"Qt Warning: {message}")
elif mode == QtMsgType.QtCriticalMsg:
print(f"Qt Critical: {message}")
elif mode == QtMsgType.QtFatalMsg:
print(f"Qt Fatal: {message}")
elif mode == QtMsgType.QtInfoMsg:
print(f"Qt Info: {message}")
# Install the custom message handler
qInstallMessageHandler(qt_message_handler)
# Handle Wayland on Linux GNOME
if platform.system() == "Linux":
xdg_session = os.environ.get("XDG_SESSION_TYPE", "").lower()
desktop = os.environ.get("XDG_CURRENT_DESKTOP", "").lower()
if (
"gnome" in desktop
and xdg_session == "wayland"
and "QT_QPA_PLATFORM" not in os.environ
):
os.environ["QT_QPA_PLATFORM"] = "wayland"
def main():
"""Main entry point for console usage."""
app = QApplication(sys.argv)
# Set application icon using get_resource_path from utils
icon_path = get_resource_path("abogen.assets", "icon.ico")
if icon_path:
app.setWindowIcon(QIcon(icon_path))
# Set the .desktop name on Linux
if platform.system() == "Linux":
try:
app.setDesktopFileName("abogen")
except AttributeError:
pass
ex = abogen()
ex.show()
sys.exit(app.exec())
if __name__ == "__main__":
main()
"""
Pre-download dialog and worker for Abogen
This module consolidates pre-download logic for Kokoro voices and model
and spaCy language models. The code favors clarity, avoids duplication,
and handles optional dependencies gracefully.
"""
from typing import List, Optional, Tuple
import importlib
import importlib.util
from PyQt6.QtWidgets import (
QDialog,
QVBoxLayout,
QHBoxLayout,
QLabel,
QPushButton,
QSpacerItem,
QSizePolicy,
)
from PyQt6.QtCore import QThread, pyqtSignal
from abogen.constants import COLORS, VOICES_INTERNAL
from abogen.spacy_utils import SPACY_MODELS
import abogen.hf_tracker
# Helpers
def _unique_sorted_models() -> List[str]:
"""Return a sorted list of unique spaCy model package names."""
return sorted(set(SPACY_MODELS.values()))
def _is_package_installed(pkg_name: str) -> bool:
"""Return True if a package with the given name can be imported (site-packages)."""
try:
return importlib.util.find_spec(pkg_name) is not None
except Exception:
return False
# NOTE: explicit HF cache helper removed; we use try_to_load_from_cache in-scope where needed
class PreDownloadWorker(QThread):
"""Worker thread to download required models/voices.
Emits human-readable messages via `progress`. Uses `category_done` to indicate
a category (voices/model/spacy) finished successfully. Emits `error` on exception
and `finished` after all work completes.
"""
# Emit (category, status, message)
progress = pyqtSignal(str, str, str)
category_done = pyqtSignal(str)
finished = pyqtSignal()
error = pyqtSignal(str)
def __init__(self, parent=None):
super().__init__(parent)
self._cancelled = False
# repo and filenames used for Kokoro model
self._repo_id = "hexgrad/Kokoro-82M"
self._model_files = ["kokoro-v1_0.pth", "config.json"]
# Track download success per category
self._voices_success = False
self._model_success = False
self._spacy_success = False
# Suppress HF tracker warnings during downloads
self._original_emitter = abogen.hf_tracker.show_warning_signal_emitter
def cancel(self) -> None:
self._cancelled = True
def run(self) -> None:
# Suppress HF tracker warnings during downloads
abogen.hf_tracker.show_warning_signal_emitter = None
try:
self._download_kokoro_voices()
if self._cancelled:
return
if self._voices_success:
self.category_done.emit("voices")
self._download_kokoro_model()
if self._cancelled:
return
if self._model_success:
self.category_done.emit("model")
self._download_spacy_models()
if self._cancelled:
return
if self._spacy_success:
self.category_done.emit("spacy")
self.finished.emit()
except Exception as exc: # pragma: no cover - best-effort reporting
self.error.emit(str(exc))
finally:
# Restore original emitter
abogen.hf_tracker.show_warning_signal_emitter = self._original_emitter
# Kokoro voices
def _download_kokoro_voices(self) -> None:
self._voices_success = True
try:
from huggingface_hub import hf_hub_download, try_to_load_from_cache
except Exception:
self.progress.emit(
"voice", "warning", "huggingface_hub not installed, skipping voices..."
)
self._voices_success = False
return
voice_list = VOICES_INTERNAL
for idx, voice in enumerate(voice_list, start=1):
if self._cancelled:
self._voices_success = False
return
filename = f"voices/{voice}.pt"
if try_to_load_from_cache(repo_id=self._repo_id, filename=filename):
self.progress.emit(
"voice",
"installed",
f"{idx}/{len(voice_list)}: {voice} already present",
)
continue
self.progress.emit(
"voice", "downloading", f"{idx}/{len(voice_list)}: {voice}..."
)
try:
hf_hub_download(repo_id=self._repo_id, filename=filename)
self.progress.emit("voice", "downloaded", f"{voice} downloaded")
except Exception as exc:
self.progress.emit(
"voice", "warning", f"could not download {voice}: {exc}"
)
self._voices_success = False
# Kokoro model
def _download_kokoro_model(self) -> None:
self._model_success = True
try:
from huggingface_hub import hf_hub_download, try_to_load_from_cache
except Exception:
self.progress.emit(
"model", "warning", "huggingface_hub not installed, skipping model..."
)
self._model_success = False
return
for fname in self._model_files:
if self._cancelled:
self._model_success = False
return
category = "config" if fname == "config.json" else "model"
if try_to_load_from_cache(repo_id=self._repo_id, filename=fname):
self.progress.emit(
category, "installed", f"file {fname} already present"
)
continue
self.progress.emit(category, "downloading", f"file {fname}...")
try:
hf_hub_download(repo_id=self._repo_id, filename=fname)
self.progress.emit(category, "downloaded", f"file {fname} downloaded")
except Exception as exc:
self.progress.emit(
category, "warning", f"could not download file {fname}: {exc}"
)
self._model_success = False
# spaCy models
def _download_spacy_models(self) -> None:
"""Download spaCy models. Prefer missing models provided by parent.
Parent dialog will populate _spacy_models_missing during checking.
"""
self._spacy_success = True
# Determine which models to process: prefer parent-provided missing list to avoid
# re-checking everything; otherwise use the full unique list.
parent = self.parent()
models_to_process: List[str] = _unique_sorted_models()
try:
if (
parent is not None
and hasattr(parent, "_spacy_models_missing")
and parent._spacy_models_missing
):
models_to_process = list(dict.fromkeys(parent._spacy_models_missing))
except Exception:
pass
# If spaCy is not available to run the CLI, skip gracefully
try:
import spacy.cli as _spacy_cli
except Exception:
self.progress.emit(
"spacy", "warning", "spaCy not available, skipping spaCy models..."
)
self._spacy_success = False
return
for idx, model_name in enumerate(models_to_process, start=1):
if self._cancelled:
self._spacy_success = False
return
if _is_package_installed(model_name):
self.progress.emit(
"spacy",
"installed",
f"{idx}/{len(models_to_process)}: {model_name} already installed",
)
continue
self.progress.emit(
"spacy",
"downloading",
f"{idx}/{len(models_to_process)}: {model_name}...",
)
try:
_spacy_cli.download(model_name)
self.progress.emit("spacy", "downloaded", f"{model_name} downloaded")
except Exception as exc:
self.progress.emit(
"spacy", "warning", f"could not download {model_name}: {exc}"
)
self._spacy_success = False
class PreDownloadDialog(QDialog):
"""Dialog to show and control pre-download process."""
VOICE_PREFIX = "Kokoro voices: "
MODEL_PREFIX = "Kokoro model: "
CONFIG_PREFIX = "Kokoro config: "
SPACY_PREFIX = "spaCy models: "
def __init__(self, parent=None):
super().__init__(parent)
self.setWindowTitle("Pre-download Models and Voices")
self.setMinimumWidth(500)
self.worker: Optional[PreDownloadWorker] = None
self.has_missing = False
self._spacy_models_checked: List[tuple] = []
self._spacy_models_missing: List[str] = []
self._status_worker = None
# Map keywords to (label, prefix) - labels filled after UI creation
self.status_map = {
"voice": (None, self.VOICE_PREFIX),
"spacy": (None, self.SPACY_PREFIX),
"model": (None, self.MODEL_PREFIX),
"config": (None, self.CONFIG_PREFIX),
}
self.category_map = {
"voices": ["voice"],
"model": ["model", "config"],
"spacy": ["spacy"],
}
self._setup_ui()
self._start_status_check()
def _setup_ui(self) -> None:
layout = QVBoxLayout(self)
layout.setSpacing(0)
layout.setContentsMargins(15, 0, 15, 15)
desc = QLabel(
"You can pre-download all required models and voices for offline use.\n"
"This includes Kokoro voices, Kokoro model (and config), and spaCy models."
)
desc.setWordWrap(True)
layout.addWidget(desc)
# Status rows
status_layout = QVBoxLayout()
status_title = QLabel("<b>Current Status:</b>")
status_layout.addWidget(status_title)
self.voices_status = QLabel(self.VOICE_PREFIX + "⏳ Checking...")
row = QHBoxLayout()
row.addWidget(self.voices_status)
row.addStretch()
status_layout.addLayout(row)
self.model_status = QLabel(self.MODEL_PREFIX + "⏳ Checking...")
row = QHBoxLayout()
row.addWidget(self.model_status)
row.addStretch()
status_layout.addLayout(row)
self.config_status = QLabel(self.CONFIG_PREFIX + "⏳ Checking...")
row = QHBoxLayout()
row.addWidget(self.config_status)
row.addStretch()
status_layout.addLayout(row)
self.spacy_status = QLabel(self.SPACY_PREFIX + "⏳ Checking...")
row = QHBoxLayout()
row.addWidget(self.spacy_status)
row.addStretch()
status_layout.addLayout(row)
# register labels
self.status_map["voice"] = (self.voices_status, self.VOICE_PREFIX)
self.status_map["model"] = (self.model_status, self.MODEL_PREFIX)
self.status_map["config"] = (self.config_status, self.CONFIG_PREFIX)
self.status_map["spacy"] = (self.spacy_status, self.SPACY_PREFIX)
layout.addLayout(status_layout)
layout.addItem(
QSpacerItem(0, 20, QSizePolicy.Policy.Minimum, QSizePolicy.Policy.Fixed)
)
# Buttons
button_row = QHBoxLayout()
button_row.setSpacing(10)
self.download_btn = QPushButton("Download all")
self.download_btn.setMinimumWidth(100)
self.download_btn.setMinimumHeight(35)
self.download_btn.setEnabled(False)
self.download_btn.clicked.connect(self._start_download)
button_row.addWidget(self.download_btn)
self.close_btn = QPushButton("Close")
self.close_btn.setMinimumWidth(100)
self.close_btn.setMinimumHeight(35)
self.close_btn.clicked.connect(self._handle_close)
button_row.addWidget(self.close_btn)
layout.addLayout(button_row)
self.adjustSize()
# Status checking worker
class StatusCheckWorker(QThread):
voices_checked = pyqtSignal(bool, list)
model_checked = pyqtSignal(bool)
config_checked = pyqtSignal(bool)
spacy_model_checking = pyqtSignal(str)
spacy_model_result = pyqtSignal(str, bool)
spacy_checked = pyqtSignal(bool, list)
def run(self):
parent = self.parent()
if parent is None:
return
voices_ok, missing_voices = parent._check_kokoro_voices()
self.voices_checked.emit(voices_ok, missing_voices)
model_ok = parent._check_kokoro_model()
self.model_checked.emit(model_ok)
config_ok = parent._check_kokoro_config()
self.config_checked.emit(config_ok)
# Check spaCy models by package name to detect site-package installs
unique = _unique_sorted_models()
missing: List[str] = []
for name in unique:
self.spacy_model_checking.emit(name)
ok = _is_package_installed(name)
self.spacy_model_result.emit(name, ok)
if not ok:
missing.append(name)
parent._spacy_models_missing = missing
self.spacy_checked.emit(len(missing) == 0, missing)
def _start_status_check(self) -> None:
self._status_worker = self.StatusCheckWorker(self)
self._status_worker.voices_checked.connect(self._update_voices_status)
self._status_worker.model_checked.connect(self._update_model_status)
self._status_worker.config_checked.connect(self._update_config_status)
self._status_worker.spacy_model_checking.connect(self._spacy_model_checking)
self._status_worker.spacy_model_result.connect(self._spacy_model_result)
self._status_worker.spacy_checked.connect(self._update_spacy_status)
# These are initialized in __init__ to keep consistent object state
# Set checking visual state
for lbl in (
self.voices_status,
self.model_status,
self.config_status,
self.spacy_status,
):
lbl.setStyleSheet(f"color: {COLORS['ORANGE']};")
self.spacy_status.setText(self.SPACY_PREFIX + "⏳ Checking...")
self._status_worker.start()
# UI update callbacks
def _spacy_model_checking(self, name: str) -> None:
self.spacy_status.setText(f"{self.SPACY_PREFIX}Checking {name}...")
def _spacy_model_result(self, name: str, ok: bool) -> None:
self._spacy_models_checked.append((name, ok))
if not ok and name not in self._spacy_models_missing:
self._spacy_models_missing.append(name)
checked = len(self._spacy_models_checked)
missing_count = len(self._spacy_models_missing)
if missing_count:
self.spacy_status.setText(
f"{self.SPACY_PREFIX}{checked} checked, {missing_count} missing..."
)
else:
self.spacy_status.setText(f"{self.SPACY_PREFIX}{checked} checked...")
def _update_voices_status(self, ok: bool, missing: List[str]) -> None:
if ok:
self._set_status("voice", "✓ Downloaded", COLORS["GREEN"])
else:
self.has_missing = True
if missing:
self._set_status(
"voice", f"✗ Missing {len(missing)} voices", COLORS["RED"]
)
else:
self._set_status("voice", "✗ Not downloaded", COLORS["RED"])
def _update_model_status(self, ok: bool) -> None:
if ok:
self._set_status("model", "✓ Downloaded", COLORS["GREEN"])
else:
self.has_missing = True
self._set_status("model", "✗ Not downloaded", COLORS["RED"])
def _update_config_status(self, ok: bool) -> None:
if ok:
self._set_status("config", "✓ Downloaded", COLORS["GREEN"])
else:
self.has_missing = True
self._set_status("config", "✗ Not downloaded", COLORS["RED"])
def _update_spacy_status(self, ok: bool, missing: List[str]) -> None:
if ok:
self._set_status("spacy", "✓ Downloaded", COLORS["GREEN"])
else:
self.has_missing = True
if missing:
self._set_status(
"spacy", f"✗ Missing {len(missing)} model(s)", COLORS["RED"]
)
else:
self._set_status("spacy", "✗ Not downloaded", COLORS["RED"])
self.download_btn.setEnabled(self.has_missing)
def _set_status(self, key: str, text: str, color: str) -> None:
lbl, prefix = self.status_map.get(key, (None, ""))
if not lbl:
return
lbl.setText(prefix + text)
lbl.setStyleSheet(f"color: {color};")
# Helper checks
def _check_kokoro_voices(self) -> Tuple[bool, List[str]]:
"""Return (ok, missing_list) for Kokoro voices check."""
missing = []
try:
from huggingface_hub import try_to_load_from_cache
for voice in VOICES_INTERNAL:
if not try_to_load_from_cache(
repo_id="hexgrad/Kokoro-82M", filename=f"voices/{voice}.pt"
):
missing.append(voice)
except Exception:
# If HF missing, report all as missing
return False, list(VOICES_INTERNAL)
return (len(missing) == 0), missing
def _check_kokoro_model(self) -> bool:
try:
from huggingface_hub import try_to_load_from_cache
return (
try_to_load_from_cache(
repo_id="hexgrad/Kokoro-82M", filename="kokoro-v1_0.pth"
)
is not None
)
except Exception:
return False
def _check_kokoro_config(self) -> bool:
try:
from huggingface_hub import try_to_load_from_cache
return (
try_to_load_from_cache(
repo_id="hexgrad/Kokoro-82M", filename="config.json"
)
is not None
)
except Exception:
return False
def _check_spacy_models(self) -> bool:
unique = _unique_sorted_models()
missing = [m for m in unique if not _is_package_installed(m)]
self._spacy_models_missing = missing
return len(missing) == 0
# Download control
def _start_download(self) -> None:
self.download_btn.setEnabled(False)
self.download_btn.setText("Downloading...")
# mark the start of downloads; this triggers the labels
self._on_progress("system", "starting", "Processing, please wait...")
self.worker = PreDownloadWorker(self)
self.worker.progress.connect(self._on_progress)
self.worker.category_done.connect(self._on_category_done)
self.worker.finished.connect(self._on_download_finished)
self.worker.error.connect(self._on_download_error)
self.worker.start()
def _on_progress(self, category: str, status: str, message: str) -> None:
"""Map worker (category, status, message) to UI label updates.
Status is one of: 'downloading', 'installed', 'downloaded', 'warning', 'starting'.
Category is one of: 'voice', 'model', 'spacy', 'config', or 'system'.
"""
try:
# If the category targets a specific label, update directly
if category in self.status_map:
lbl, prefix = self.status_map[category]
if not lbl:
return
# Compose message and set color based on status token
full_text = prefix + message
if len(full_text) > 60:
display_text = full_text[:57] + "..."
lbl.setText(display_text)
lbl.setToolTip(full_text)
else:
lbl.setText(full_text)
lbl.setToolTip("") # Clear tooltip if not needed
if status == "downloading":
lbl.setStyleSheet(f"color: {COLORS['ORANGE']};")
elif status in ("installed", "downloaded"):
lbl.setStyleSheet(f"color: {COLORS['GREEN']};")
elif status == "warning":
lbl.setStyleSheet(f"color: {COLORS['RED']};")
elif status == "error":
lbl.setStyleSheet(f"color: {COLORS['RED']};")
return
# System-level messages
if category == "system":
if status == "starting":
for k in self.status_map:
lbl, prefix = self.status_map[k]
if lbl:
lbl.setText(prefix + "Processing, please wait...")
lbl.setStyleSheet(f"color: {COLORS['ORANGE']};")
# other system statuses don't require action
return
except Exception:
# Do not let UI thread crash on unexpected worker message
pass
def _on_category_done(self, category: str) -> None:
for key in self.category_map.get(category, []):
self._set_status(key, "✓ Downloaded", COLORS["GREEN"])
def _on_download_finished(self) -> None:
self.has_missing = False
self.download_btn.setText("Download all")
self.download_btn.setEnabled(False)
def _on_download_error(self, error_msg: str) -> None:
self.download_btn.setText("Download all")
self.download_btn.setEnabled(True)
for key in self.status_map:
self._set_status(key, f"✗ Error - {error_msg}", COLORS["RED"])
def _handle_close(self) -> None:
if self.worker and self.worker.isRunning():
self.worker.cancel()
self.worker.wait(2000)
self.accept()
def closeEvent(self, event) -> None:
if self.worker and self.worker.isRunning():
self.worker.cancel()
self.worker.wait(2000)
super().closeEvent(event)
# a simple window with a list of items in the queue, no checkboxes
# button to remove an item from the queue
# button to clear the queue
from PyQt6.QtWidgets import (
QDialog,
QVBoxLayout,
QHBoxLayout,
QDialogButtonBox,
QPushButton,
QListWidget,
QListWidgetItem,
QFileIconProvider,
QLabel,
QWidget,
QSizePolicy,
QAbstractItemView,
QCheckBox,
)
from PyQt6.QtCore import QFileInfo, Qt
from abogen.constants import COLORS
from copy import deepcopy
from PyQt6.QtGui import QFontMetrics
from abogen.utils import load_config, save_config
# Define attributes that are safe to override with global settings
OVERRIDE_FIELDS = [
"lang_code",
"speed",
"voice",
"save_option",
"output_folder",
"subtitle_mode",
"output_format",
"replace_single_newlines",
"use_silent_gaps",
"subtitle_speed_method",
]
class ElidedLabel(QLabel):
def __init__(self, text):
super().__init__(text)
self._full_text = text
self.setSizePolicy(QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Preferred)
self.setTextFormat(Qt.TextFormat.PlainText)
def setText(self, text):
self._full_text = text
super().setText(text)
self.update()
def resizeEvent(self, event):
metrics = QFontMetrics(self.font())
elided = metrics.elidedText(
self._full_text, Qt.TextElideMode.ElideRight, self.width()
)
super().setText(elided)
super().resizeEvent(event)
def fullText(self):
return self._full_text
class QueueListItemWidget(QWidget):
def __init__(self, file_name, char_count):
super().__init__()
layout = QHBoxLayout()
layout.setContentsMargins(12, 0, 6, 0)
layout.setSpacing(0)
import os
name_label = ElidedLabel(os.path.basename(file_name))
char_label = QLabel(f"Chars: {char_count}")
char_label.setStyleSheet(f"color: {COLORS['LIGHT_DISABLED']};")
char_label.setAlignment(
Qt.AlignmentFlag.AlignRight | Qt.AlignmentFlag.AlignVCenter
)
char_label.setSizePolicy(
QSizePolicy.Policy.Minimum, QSizePolicy.Policy.Preferred
)
layout.addWidget(name_label, 1)
layout.addWidget(char_label, 0)
self.setLayout(layout)
class DroppableQueueListWidget(QListWidget):
def __init__(self, parent_dialog):
super().__init__()
self.parent_dialog = parent_dialog
self.setAcceptDrops(True)
# Overlay for drag hover
self.drag_overlay = QLabel("", self)
self.drag_overlay.setAlignment(Qt.AlignmentFlag.AlignCenter)
self.drag_overlay.setStyleSheet(
f"border:2px dashed {COLORS['BLUE_BORDER_HOVER']}; border-radius:5px; padding:20px; background:{COLORS['BLUE_BG_HOVER']};"
)
self.drag_overlay.setVisible(False)
self.drag_overlay.setAttribute(
Qt.WidgetAttribute.WA_TransparentForMouseEvents, True
)
def dragEnterEvent(self, event):
if event.mimeData().hasUrls():
for url in event.mimeData().urls():
file_path = url.toLocalFile().lower()
if url.isLocalFile() and (
file_path.endswith(".txt")
or file_path.endswith((".srt", ".ass", ".vtt"))
):
self.drag_overlay.resize(self.size())
self.drag_overlay.setVisible(True)
event.acceptProposedAction()
return
self.drag_overlay.setVisible(False)
event.ignore()
def dragMoveEvent(self, event):
if event.mimeData().hasUrls():
for url in event.mimeData().urls():
file_path = url.toLocalFile().lower()
if url.isLocalFile() and (
file_path.endswith(".txt")
or file_path.endswith((".srt", ".ass", ".vtt"))
):
event.acceptProposedAction()
return
event.ignore()
def dragLeaveEvent(self, event):
self.drag_overlay.setVisible(False)
event.accept()
def dropEvent(self, event):
self.drag_overlay.setVisible(False)
if event.mimeData().hasUrls():
file_paths = [
url.toLocalFile()
for url in event.mimeData().urls()
if url.isLocalFile()
and (
url.toLocalFile().lower().endswith(".txt")
or url.toLocalFile().lower().endswith((".srt", ".ass", ".vtt"))
)
]
if file_paths:
self.parent_dialog.add_files_from_paths(file_paths)
event.acceptProposedAction()
else:
event.ignore()
else:
event.ignore()
def resizeEvent(self, event):
super().resizeEvent(event)
if hasattr(self, "drag_overlay"):
self.drag_overlay.resize(self.size())
class QueueManager(QDialog):
def __init__(self, parent, queue: list, title="Queue Manager", size=(600, 700)):
super().__init__()
self.queue = queue
self._original_queue = deepcopy(
queue
) # Store a deep copy of the original queue
self.parent = parent
self.config = load_config() # Load config for persistence
layout = QVBoxLayout()
layout.setContentsMargins(15, 15, 15, 15) # set main layout margins
layout.setSpacing(12) # set spacing between widgets in main layout
# list of queued items
self.listwidget = DroppableQueueListWidget(self)
self.listwidget.setSelectionMode(
QAbstractItemView.SelectionMode.ExtendedSelection
)
self.listwidget.setAlternatingRowColors(True)
self.listwidget.setContextMenuPolicy(Qt.ContextMenuPolicy.CustomContextMenu)
self.listwidget.customContextMenuRequested.connect(self.show_context_menu)
# Add informative instructions at the top
instructions = QLabel(
"<h2>How Queue Works?</h2>"
"You can add text and subtitle files (.txt, .srt, .ass, .vtt) directly using the '<b>Add files</b>' button below. "
"To add PDF, EPUB or markdown files, use the input box in the main window and click the <b>'Add to Queue'</b> button. "
"By default, each file in the queue keeps the configuration settings active when they were added. "
"Enabling the <b>'Override item settings with current selection'</b> option below will force all items to use the configuration currently selected in the main window. "
"You can view each file's configuration by hovering over them."
)
instructions.setAlignment(Qt.AlignmentFlag.AlignLeft)
instructions.setWordWrap(True)
layout.addWidget(instructions)
# Override Checkbox
self.override_chk = QCheckBox("Override item settings with current selection")
self.override_chk.setToolTip(
"If checked, all items in the queue will be processed using the \n"
"settings currently selected in the main window, ignoring their saved state."
)
# Load saved state (default to False)
self.override_chk.setChecked(self.config.get("queue_override_settings", False))
# Trigger process_queue to update tooltips immediately when toggled
self.override_chk.stateChanged.connect(self.process_queue)
self.override_chk.setStyleSheet("margin-bottom: 8px;")
layout.addWidget(self.override_chk)
# Overlay label for empty queue
self.empty_overlay = QLabel(
"Drag and drop your text or subtitle files here or use the 'Add files' button.",
self.listwidget,
)
self.empty_overlay.setAlignment(Qt.AlignmentFlag.AlignCenter)
self.empty_overlay.setStyleSheet(
f"color: {COLORS['LIGHT_DISABLED']}; background: transparent; padding: 20px;"
)
self.empty_overlay.setWordWrap(True)
self.empty_overlay.setAttribute(
Qt.WidgetAttribute.WA_TransparentForMouseEvents, True
)
self.empty_overlay.hide()
# add queue items to the list
self.process_queue()
button_row = QHBoxLayout()
button_row.setContentsMargins(0, 0, 0, 0) # optional: no margins for button row
button_row.setSpacing(7) # set spacing between buttons
# Add files button
add_files_button = QPushButton("Add files")
add_files_button.setFixedHeight(40)
add_files_button.clicked.connect(self.add_more_files)
button_row.addWidget(add_files_button)
# Remove button
self.remove_button = QPushButton("Remove selected")
self.remove_button.setFixedHeight(40)
self.remove_button.clicked.connect(self.remove_item)
button_row.addWidget(self.remove_button)
# Clear button
self.clear_button = QPushButton("Clear Queue")
self.clear_button.setFixedHeight(40)
self.clear_button.clicked.connect(self.clear_queue)
button_row.addWidget(self.clear_button)
layout.addLayout(button_row)
layout.addWidget(self.listwidget)
# Connect selection change to update button state
self.listwidget.currentItemChanged.connect(self.update_button_states)
self.listwidget.itemSelectionChanged.connect(self.update_button_states)
buttons = QDialogButtonBox(
QDialogButtonBox.StandardButton.Ok | QDialogButtonBox.StandardButton.Cancel,
self,
)
buttons.accepted.connect(self.accept)
buttons.rejected.connect(self.reject)
layout.addWidget(buttons)
self.setLayout(layout)
self.setWindowTitle(title)
self.resize(*size)
self.update_button_states()
def process_queue(self):
"""Process the queue items."""
import os
self.listwidget.clear()
if not self.queue:
self.empty_overlay.show()
self.update_button_states()
return
else:
self.empty_overlay.hide()
# Get current global settings and checkbox state for overrides
current_global_settings = self.get_current_attributes()
is_override_active = self.override_chk.isChecked()
icon_provider = QFileIconProvider()
for item in self.queue:
# Dynamic Attribute Retrieval Helper
def get_val(attr, default=""):
# If override is ON and attr is overrideable, use global setting
if is_override_active and attr in OVERRIDE_FIELDS:
return current_global_settings.get(attr, default)
# Otherwise return the item's saved attribute
return getattr(item, attr, default)
# Determine display file path (prefer save_base_path for original file)
display_file_path = getattr(item, "save_base_path", None) or item.file_name
processing_file_path = item.file_name
# Normalize paths for consistent display (fixes Windows path separator issues)
display_file_path = (
os.path.normpath(display_file_path)
if display_file_path
else display_file_path
)
processing_file_path = (
os.path.normpath(processing_file_path)
if processing_file_path
else processing_file_path
)
# Only show the file name, not the full path
display_name = display_file_path
if os.path.sep in display_file_path:
display_name = os.path.basename(display_file_path)
# Get icon for the display file
icon = icon_provider.icon(QFileInfo(display_file_path))
list_item = QListWidgetItem()
# Tooltip Generation
tooltip = ""
# If override is active, add the warning header on its own line
if is_override_active:
tooltip += "<b style='color: #ff9900;'>(Global Override Active)</b><br>"
output_folder = get_val("output_folder")
# For plain .txt inputs we don't need to show a separate processing file
show_processing = True
try:
if isinstance(
display_file_path, str
) and display_file_path.lower().endswith(".txt"):
show_processing = False
except Exception:
show_processing = True
tooltip += f"<b>Input File:</b> {display_file_path}<br>"
if (
show_processing
and processing_file_path
and processing_file_path != display_file_path
):
tooltip += f"<b>Processing File:</b> {processing_file_path}<br>"
tooltip += (
f"<b>Language:</b> {get_val('lang_code')}<br>"
f"<b>Speed:</b> {get_val('speed')}<br>"
f"<b>Voice:</b> {get_val('voice')}<br>"
f"<b>Save Option:</b> {get_val('save_option')}<br>"
)
if output_folder not in (None, "", "None"):
tooltip += f"<b>Output Folder:</b> {output_folder}<br>"
tooltip += (
f"<b>Subtitle Mode:</b> {get_val('subtitle_mode')}<br>"
f"<b>Output Format:</b> {get_val('output_format')}<br>"
f"<b>Characters:</b> {getattr(item, 'total_char_count', '')}<br>"
f"<b>Replace Single Newlines:</b> {get_val('replace_single_newlines', True)}<br>"
f"<b>Use Silent Gaps:</b> {get_val('use_silent_gaps', False)}<br>"
f"<b>Speed Method:</b> {get_val('subtitle_speed_method', 'tts')}"
)
# Add book handler options if present (Preserve logic: specific to file structure)
save_chapters_separately = getattr(item, "save_chapters_separately", None)
merge_chapters_at_end = getattr(item, "merge_chapters_at_end", None)
if save_chapters_separately is not None:
tooltip += f"<br><b>Save chapters separately:</b> {'Yes' if save_chapters_separately else 'No'}"
# Only show merge option if saving chapters separately
if save_chapters_separately and merge_chapters_at_end is not None:
tooltip += f"<br><b>Merge chapters at the end:</b> {'Yes' if merge_chapters_at_end else 'No'}"
list_item.setToolTip(tooltip)
list_item.setIcon(icon)
# Store both paths for context menu
list_item.setData(
Qt.ItemDataRole.UserRole,
{
"display_path": display_file_path,
"processing_path": processing_file_path,
},
)
# Use custom widget for display
char_count = getattr(item, "total_char_count", 0)
widget = QueueListItemWidget(display_file_path, char_count)
self.listwidget.addItem(list_item)
self.listwidget.setItemWidget(list_item, widget)
self.update_button_states()
def remove_item(self):
items = self.listwidget.selectedItems()
if not items:
return
from PyQt6.QtWidgets import QMessageBox
# Remove by index to ensure correct mapping
rows = sorted([self.listwidget.row(item) for item in items], reverse=True)
# Warn user if removing multiple files
if len(rows) > 1:
reply = QMessageBox.question(
self,
"Confirm Remove",
f"Are you sure you want to remove {len(rows)} selected items from the queue?",
QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No,
QMessageBox.StandardButton.No,
)
if reply != QMessageBox.StandardButton.Yes:
return
for row in rows:
if 0 <= row < len(self.queue):
del self.queue[row]
self.process_queue()
self.update_button_states()
def clear_queue(self):
from PyQt6.QtWidgets import QMessageBox
if len(self.queue) > 1:
reply = QMessageBox.question(
self,
"Confirm Clear Queue",
f"Are you sure you want to clear {len(self.queue)} items from the queue?",
QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No,
QMessageBox.StandardButton.No,
)
if reply != QMessageBox.StandardButton.Yes:
return
self.queue.clear()
self.listwidget.clear()
self.empty_overlay.resize(
self.listwidget.size()
) # Ensure overlay is sized correctly
self.empty_overlay.show() # Show the overlay when queue is empty
self.update_button_states()
def get_queue(self):
return self.queue
def get_current_attributes(self):
# Fetch current attribute values from the parent abogen GUI
attrs = {}
parent = self.parent
if parent is not None:
# lang_code: use parent's get_voice_formula and get_selected_lang
if hasattr(parent, "get_voice_formula") and hasattr(
parent, "get_selected_lang"
):
voice_formula = parent.get_voice_formula()
attrs["lang_code"] = parent.get_selected_lang(voice_formula)
attrs["voice"] = voice_formula
else:
attrs["lang_code"] = getattr(parent, "selected_lang", "")
attrs["voice"] = getattr(parent, "selected_voice", "")
# speed
if hasattr(parent, "speed_slider"):
attrs["speed"] = parent.speed_slider.value() / 100.0
else:
attrs["speed"] = getattr(parent, "speed", 1.0)
# save_option
attrs["save_option"] = getattr(parent, "save_option", "")
# output_folder
attrs["output_folder"] = getattr(parent, "selected_output_folder", "")
# subtitle_mode
if hasattr(parent, "get_actual_subtitle_mode"):
attrs["subtitle_mode"] = parent.get_actual_subtitle_mode()
else:
attrs["subtitle_mode"] = getattr(parent, "subtitle_mode", "")
# output_format
attrs["output_format"] = getattr(parent, "selected_format", "")
# total_char_count
attrs["total_char_count"] = getattr(parent, "char_count", "")
# replace_single_newlines
attrs["replace_single_newlines"] = getattr(
parent, "replace_single_newlines", True
)
# use_silent_gaps
attrs["use_silent_gaps"] = getattr(parent, "use_silent_gaps", False)
# subtitle_speed_method
attrs["subtitle_speed_method"] = getattr(
parent, "subtitle_speed_method", "tts"
)
# book handler options
attrs["save_chapters_separately"] = getattr(
parent, "save_chapters_separately", None
)
attrs["merge_chapters_at_end"] = getattr(
parent, "merge_chapters_at_end", None
)
else:
# fallback: empty values
attrs = {
k: ""
for k in [
"lang_code",
"speed",
"voice",
"save_option",
"output_folder",
"subtitle_mode",
"output_format",
"total_char_count",
"replace_single_newlines",
]
}
attrs["save_chapters_separately"] = None
attrs["merge_chapters_at_end"] = None
return attrs
def add_files_from_paths(self, file_paths):
from abogen.subtitle_utils import calculate_text_length
from PyQt6.QtWidgets import QMessageBox
import os
current_attrs = self.get_current_attributes()
duplicates = []
for file_path in file_paths:
class QueueItem:
pass
item = QueueItem()
item.file_name = file_path
item.save_base_path = (
file_path # For .txt files, processing and save paths are the same
)
for attr, value in current_attrs.items():
setattr(item, attr, value)
# Override subtitle_mode to "Disabled" for subtitle files
if file_path.lower().endswith((".srt", ".ass", ".vtt")):
item.subtitle_mode = "Disabled"
# Read file content and calculate total_char_count using calculate_text_length
try:
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
file_content = f.read()
item.total_char_count = calculate_text_length(file_content)
except Exception:
item.total_char_count = 0
# Prevent adding duplicate items to the queue (check all attributes)
is_duplicate = False
for queued_item in self.queue:
if (
getattr(queued_item, "file_name", None)
== getattr(item, "file_name", None)
and getattr(queued_item, "lang_code", None)
== getattr(item, "lang_code", None)
and getattr(queued_item, "speed", None)
== getattr(item, "speed", None)
and getattr(queued_item, "voice", None)
== getattr(item, "voice", None)
and getattr(queued_item, "save_option", None)
== getattr(item, "save_option", None)
and getattr(queued_item, "output_folder", None)
== getattr(item, "output_folder", None)
and getattr(queued_item, "subtitle_mode", None)
== getattr(item, "subtitle_mode", None)
and getattr(queued_item, "output_format", None)
== getattr(item, "output_format", None)
and getattr(queued_item, "total_char_count", None)
== getattr(item, "total_char_count", None)
and getattr(queued_item, "replace_single_newlines", True)
== getattr(item, "replace_single_newlines", True)
and getattr(queued_item, "use_silent_gaps", False)
== getattr(item, "use_silent_gaps", False)
and getattr(queued_item, "subtitle_speed_method", "tts")
== getattr(item, "subtitle_speed_method", "tts")
and getattr(queued_item, "save_base_path", None)
== getattr(item, "save_base_path", None)
and getattr(queued_item, "save_chapters_separately", None)
== getattr(item, "save_chapters_separately", None)
and getattr(queued_item, "merge_chapters_at_end", None)
== getattr(item, "merge_chapters_at_end", None)
):
is_duplicate = True
break
if is_duplicate:
duplicates.append(os.path.basename(file_path))
continue
self.queue.append(item)
if duplicates:
QMessageBox.warning(
self,
"Duplicate Item(s)",
f"Skipping {len(duplicates)} file(s) with the same attributes, already in the queue.",
)
self.process_queue()
self.update_button_states()
def add_more_files(self):
from PyQt6.QtWidgets import QFileDialog
# Allow .txt, .srt, .ass, and .vtt files
files, _ = QFileDialog.getOpenFileNames(
self,
"Select text or subtitle files",
"",
"Supported Files (*.txt *.srt *.ass *.vtt)",
)
if not files:
return
self.add_files_from_paths(files)
def resizeEvent(self, event):
super().resizeEvent(event)
if hasattr(self, "empty_overlay"):
self.empty_overlay.resize(self.listwidget.size())
def update_button_states(self):
# Enable Remove if at least one item is selected, else disable
if hasattr(self, "remove_button"):
selected_count = len(self.listwidget.selectedItems())
self.remove_button.setEnabled(selected_count > 0)
if selected_count > 1:
self.remove_button.setText(f"Remove selected ({selected_count})")
else:
self.remove_button.setText("Remove selected")
# Disable Clear if queue is empty
if hasattr(self, "clear_button"):
self.clear_button.setEnabled(bool(self.queue))
def show_context_menu(self, pos):
from PyQt6.QtWidgets import QMenu
from PyQt6.QtGui import QAction, QDesktopServices
from PyQt6.QtCore import QUrl
import os
global_pos = self.listwidget.viewport().mapToGlobal(pos)
selected_items = self.listwidget.selectedItems()
menu = QMenu(self)
if len(selected_items) == 1:
# Add Remove action
remove_action = QAction("Remove this item", self)
remove_action.triggered.connect(self.remove_item)
menu.addAction(remove_action)
# Get paths for determining if it's a document input
item = selected_items[0]
paths = item.data(Qt.ItemDataRole.UserRole)
if isinstance(paths, dict):
display_path = paths.get("display_path", "")
processing_path = paths.get("processing_path", "")
else:
display_path = paths
processing_path = paths
doc_exts = (".md", ".markdown", ".pdf", ".epub")
is_document_input = (
isinstance(display_path, str)
and display_path.lower().endswith(doc_exts)
) or (
isinstance(processing_path, str)
and processing_path.lower().endswith(doc_exts)
)
# Add Open file action(s)
def open_file_by_path(path_label: str):
from PyQt6.QtWidgets import QMessageBox
p = display_path if path_label == "display" else processing_path
if not p:
QMessageBox.warning(
self, "File Not Found", "Path is not available."
)
return
# Find the queue item and resolve the target path
target_path = None
for q in self.queue:
if (
getattr(q, "save_base_path", None) == display_path
or q.file_name == display_path
):
if path_label == "display":
target_path = (
getattr(q, "save_base_path", None) or q.file_name
)
else:
target_path = q.file_name
break
if (
getattr(q, "save_base_path", None) == processing_path
or q.file_name == processing_path
):
if path_label == "display":
target_path = (
getattr(q, "save_base_path", None) or q.file_name
)
else:
target_path = q.file_name
break
# Fallback to the raw path if resolution failed
if not target_path:
target_path = p
if not os.path.exists(target_path):
QMessageBox.warning(
self, "File Not Found", f"The file does not exist."
)
return
QDesktopServices.openUrl(QUrl.fromLocalFile(target_path))
if is_document_input:
# For documents, show two open options
open_processed_action = QAction("Open processed file", self)
open_processed_action.triggered.connect(
lambda: open_file_by_path("processing")
)
menu.addAction(open_processed_action)
open_input_action = QAction("Open input file", self)
open_input_action.triggered.connect(
lambda: open_file_by_path("display")
)
menu.addAction(open_input_action)
else:
# For plain text files, show single open option
open_file_action = QAction("Open file", self)
open_file_action.triggered.connect(lambda: open_file_by_path("display"))
menu.addAction(open_file_action)
# Add Go to folder action
# If the queued item represents a converted document (markdown, pdf, epub)
# show two actions: Go to processed file (the cached .txt) and Go to input file (original source)
from PyQt6.QtWidgets import QMessageBox
def open_folder_for(path_label: str):
# path_label should be either 'display' or 'processing'
p = display_path if path_label == "display" else processing_path
if not p:
QMessageBox.warning(
self, "File Not Found", "Path is not available."
)
return
# If the stored path is the display path (original) but the actual file may be
# stored on the queue object differently, try to resolve via the queue entry.
target_path = None
for q in self.queue:
if (
getattr(q, "save_base_path", None) == display_path
or q.file_name == display_path
):
if path_label == "display":
target_path = (
getattr(q, "save_base_path", None) or q.file_name
)
else:
target_path = q.file_name
break
if (
getattr(q, "save_base_path", None) == processing_path
or q.file_name == processing_path
):
if path_label == "display":
target_path = (
getattr(q, "save_base_path", None) or q.file_name
)
else:
target_path = q.file_name
break
# Fallback to the raw path if resolution failed
if not target_path:
target_path = p
if not os.path.exists(target_path):
QMessageBox.warning(
self,
"File Not Found",
f"The file does not exist: {target_path}",
)
return
folder = os.path.dirname(target_path)
if os.path.exists(folder):
QDesktopServices.openUrl(QUrl.fromLocalFile(folder))
if is_document_input:
processed_action = QAction("Go to processed file", self)
processed_action.triggered.connect(
lambda: open_folder_for("processing")
)
menu.addAction(processed_action)
input_action = QAction("Go to input file", self)
input_action.triggered.connect(lambda: open_folder_for("display"))
menu.addAction(input_action)
else:
# Default behavior for non-document inputs: single "Go to folder" action
go_to_folder_action = QAction("Go to folder", self)
def go_to_folder():
item = selected_items[0]
paths = item.data(Qt.ItemDataRole.UserRole)
if isinstance(paths, dict):
file_path = paths.get(
"display_path", paths.get("processing_path", "")
)
else:
file_path = paths # Fallback for old format
# Find the queue item
for q in self.queue:
if (
getattr(q, "save_base_path", None) == file_path
or q.file_name == file_path
):
target_path = (
getattr(q, "save_base_path", None) or q.file_name
)
if not os.path.exists(target_path):
QMessageBox.warning(
self, "File Not Found", f"The file does not exist."
)
return
folder = os.path.dirname(target_path)
if os.path.exists(folder):
QDesktopServices.openUrl(QUrl.fromLocalFile(folder))
break
go_to_folder_action.triggered.connect(go_to_folder)
menu.addAction(go_to_folder_action)
elif len(selected_items) > 1:
remove_action = QAction(f"Remove selected ({len(selected_items)})", self)
remove_action.triggered.connect(self.remove_item)
menu.addAction(remove_action)
# Always add Clear Queue
clear_action = QAction("Clear Queue", self)
clear_action.triggered.connect(self.clear_queue)
menu.addAction(clear_action)
menu.exec(global_pos)
def accept(self):
# Save the override state to config so it persists globally
self.config["queue_override_settings"] = self.override_chk.isChecked()
save_config(self.config)
super().accept()
def reject(self):
# Cancel: restore original queue
from PyQt6.QtWidgets import QMessageBox
# Warn if user changed a lot (e.g., more than 1 items difference)
original_count = len(self._original_queue)
current_count = len(self.queue)
if abs(original_count - current_count) > 1:
reply = QMessageBox.question(
self,
"Confirm Cancel",
f"Are you sure you want to cancel and discard all changes?",
QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No,
QMessageBox.StandardButton.No,
)
if reply != QMessageBox.StandardButton.Yes:
return
self.queue.clear()
self.queue.extend(deepcopy(self._original_queue))
super().reject()
def keyPressEvent(self, event):
from PyQt6.QtCore import Qt
if event.key() == Qt.Key.Key_Delete:
self.remove_item()
else:
super().keyPressEvent(event)
# represents a queued item - book, chapters, voice, etc.
from dataclasses import dataclass
@dataclass
class QueuedItem:
file_name: str
lang_code: str
speed: float
voice: str
save_option: str
output_folder: str
subtitle_mode: str
output_format: str
total_char_count: int
replace_single_newlines: bool = True
use_silent_gaps: bool = False
subtitle_speed_method: str = "tts"
save_base_path: str = None
save_chapters_separately: bool = None
merge_chapters_at_end: bool = None

Sorry, the diff of this file is too big to display

"""Backwards-compatible re-export of the PyQt queue manager.
The actual implementation lives in abogen.pyqt.queue_manager_gui.
"""
from __future__ import annotations
from abogen.pyqt.queue_manager_gui import * # noqa: F401, F403
from abogen.pyqt.queue_manager_gui import QueueManager
__all__ = ["QueueManager"]
# represents a queued item - book, chapters, voice, etc.
from dataclasses import dataclass
@dataclass
class QueuedItem:
file_name: str
lang_code: str
speed: float
voice: str
save_option: str
output_folder: str
subtitle_mode: str
output_format: str
total_char_count: int
replace_single_newlines: bool = True
use_silent_gaps: bool = False
subtitle_speed_method: str = "tts"
save_base_path: str = None
save_chapters_separately: bool = None
merge_chapters_at_end: bool = None
from __future__ import annotations
import os
import logging
from dataclasses import dataclass
from functools import lru_cache
from typing import Any, Dict, Optional, Tuple
try: # pragma: no cover - optional dependency
import spacy
except Exception: # pragma: no cover - spaCy unavailable at runtime
spacy = None
# Lazy spaCy type hints to avoid a hard dependency at import time.
Language = Any # type: ignore[assignment]
Token = Any # type: ignore[assignment]
logger = logging.getLogger(__name__)
@dataclass(frozen=True)
class ContractionResolution:
start: int
end: int
surface: str
expansion: str
category: str
lemma: str
@property
def span(self) -> Tuple[int, int]:
return self.start, self.end
_DEFAULT_MODEL = os.environ.get("ABOGEN_SPACY_MODEL", "en_core_web_sm")
@lru_cache(maxsize=1)
def _load_spacy_model(model: str = _DEFAULT_MODEL) -> Optional[Language]:
if spacy is None:
logger.debug("spaCy is not installed; skipping contraction disambiguation")
return None
try:
nlp = spacy.load(model)
except Exception as exc: # pragma: no cover - depends on environment
logger.warning("Failed to load spaCy model '%s': %s", model, exc)
return None
return nlp
def resolve_ambiguous_contractions(
text: str, *, model: Optional[str] = None
) -> Dict[Tuple[int, int], ContractionResolution]:
"""Use spaCy to disambiguate ambiguous contractions in *text*.
Returns a mapping from (start, end) spans to their resolved expansion.
Only ambiguous `'s` and `'d` contractions are considered.
"""
if not text:
return {}
nlp = _load_spacy_model(model or _DEFAULT_MODEL)
if nlp is None:
return {}
doc = nlp(text)
resolutions: Dict[Tuple[int, int], ContractionResolution] = {}
for token in doc:
if token.text == "'s":
resolution = _resolve_apostrophe_s(token)
elif token.text == "'d":
resolution = _resolve_apostrophe_d(token)
else:
resolution = None
if resolution is None:
continue
if resolution.span not in resolutions:
resolutions[resolution.span] = resolution
return resolutions
def _resolution(
prev: Token, token: Token, expansion_word: str, category: str, lemma_hint: str
) -> Optional[ContractionResolution]:
if token is None or prev is None:
return None
if prev.idx + len(prev.text) != token.idx:
# Not a contiguous contraction (whitespace or punctuation in between)
return None
surface_start = prev.idx
surface_end = token.idx + len(token.text)
surface_text = token.doc.text[surface_start:surface_end]
expansion = _assemble_expansion(prev.text, surface_text, expansion_word)
return ContractionResolution(
start=surface_start,
end=surface_end,
surface=surface_text,
expansion=expansion,
category=category,
lemma=lemma_hint,
)
def _assemble_expansion(base_text: str, surface_text: str, expansion_word: str) -> str:
"""Combine *base_text* with *expansion_word*, preserving coarse casing."""
if not expansion_word:
return base_text
if surface_text.isupper() and expansion_word.isalpha():
adjusted = expansion_word.upper()
elif len(surface_text) > 2 and surface_text[:-2].istitle() and expansion_word:
# Surface like "It's" -> keep appended word lowercase
adjusted = expansion_word.lower()
else:
adjusted = expansion_word
return f"{base_text} {adjusted}".strip()
def _resolve_apostrophe_s(token: Token) -> Optional[ContractionResolution]:
prev = token.nbor(-1) if token.i > 0 else None
if prev is None:
return None
# Possessive marker e.g., dog's
if token.tag_ == "POS" or token.lemma_ == "'s":
return None
prev_lower = prev.lemma_.lower()
surface = token.doc.text[prev.idx : token.idx + len(token.text)]
if prev_lower == "let":
return _resolution(prev, token, "us", "contraction_let_us", "us")
# Special check for 's been -> has been, overriding lemma
next_content = _next_content_token(token)
if next_content and next_content.text.lower() == "been":
return _resolution(prev, token, "has", "contraction_aux_have", "have")
lemma = token.lemma_.lower()
if not lemma:
lemma = "be" if _favors_be(token) else "have" if _favors_have(token) else "be"
if lemma == "be":
return _resolution(prev, token, "is", "contraction_aux_be", "be")
if lemma == "have":
return _resolution(prev, token, "has", "contraction_aux_have", "have")
if _favors_have(token):
return _resolution(prev, token, "has", "contraction_aux_have", "have")
if _favors_be(token):
return _resolution(prev, token, "is", "contraction_aux_be", "be")
# Default to copula expansion.
return _resolution(prev, token, "is", "contraction_aux_be", lemma or "be")
def _resolve_apostrophe_d(token: Token) -> Optional[ContractionResolution]:
prev = token.nbor(-1) if token.i > 0 else None
if prev is None:
return None
if token.morph.get("VerbForm") == ["Part"]:
# spaCy sometimes tags possessives oddly; guard anyway
return None
lemma = token.lemma_.lower()
tense = set(token.morph.get("Tense"))
next_content = _next_content_token(token)
prefers_had = _context_prefers_had(token)
if prefers_had:
return _resolution(prev, token, "had", "contraction_aux_have", "have")
if "Past" in tense and lemma in {"have", "had"}:
return _resolution(prev, token, "had", "contraction_aux_have", "have")
if next_content is not None:
next_tag = next_content.tag_
next_lemma = next_content.lemma_.lower()
else:
next_tag = ""
next_lemma = ""
if next_tag == "VB":
return _resolution(
prev, token, "would", "contraction_modal_would", lemma or "will"
)
if token.tag_ == "MD" or lemma in {"will", "would", "shall"}:
return _resolution(
prev, token, "would", "contraction_modal_would", lemma or "will"
)
if next_lemma in {"been", "gone", "had", "better"} or next_tag in {"VBN", "VBD"}:
return _resolution(prev, token, "had", "contraction_aux_have", "have")
if lemma in {"have", "had"}:
return _resolution(prev, token, "had", "contraction_aux_have", lemma)
return _resolution(prev, token, "would", "contraction_modal_would", lemma or "will")
def _next_content_token(token: Token) -> Optional[Token]:
doc = token.doc
for candidate in doc[token.i + 1 :]:
if candidate.is_space:
continue
if candidate.is_punct and candidate.text not in {"-"}:
break
if candidate.text in {"'", ""}:
continue
return candidate
return None
def _favors_have(token: Token) -> bool:
next_content = _next_content_token(token)
if next_content is None:
return False
if next_content.tag_ in {"VBN"}:
return True
if next_content.lemma_.lower() in {"been", "gone", "had"}:
return True
return False
def _favors_be(token: Token) -> bool:
next_content = _next_content_token(token)
if next_content is None:
return True
if next_content.tag_ in {"VBG", "JJ", "RB", "DT", "IN"}:
return True
return False
def _context_prefers_had(token: Token) -> bool:
head = token.head if token.head is not None else None
if head is not None and head.i > token.i:
head_tag = head.tag_
head_lemma = head.lemma_.lower()
if head_tag in {"VBN", "VBD"} or head_lemma in {"gone", "been", "had"}:
return True
if head_lemma == "better":
return True
next_content = _next_content_token(token)
if next_content is None:
return False
next_tag = next_content.tag_
next_lemma = next_content.lemma_.lower()
if next_tag in {"VBN", "VBD"}:
return True
if next_lemma in {"been", "gone", "had"}:
return True
if next_lemma == "better":
return True
return False
"""
Lazy-loaded spaCy utilities for sentence segmentation.
"""
# Cached spaCy module and models (lazy loaded)
_spacy = None
_nlp_cache = {}
# Language code to spaCy model mapping
SPACY_MODELS = {
"a": "en_core_web_sm", # American English
"b": "en_core_web_sm", # British English
"e": "es_core_news_sm", # Spanish
"f": "fr_core_news_sm", # French
"i": "it_core_news_sm", # Italian
"p": "pt_core_news_sm", # Brazilian Portuguese
"z": "zh_core_web_sm", # Mandarin Chinese
"j": "ja_core_news_sm", # Japanese
"h": "xx_sent_ud_sm", # Hindi (multi-language model)
}
def _load_spacy():
"""Lazy load spaCy module."""
global _spacy
if _spacy is None:
try:
import spacy
_spacy = spacy
except ImportError:
return None
return _spacy
def get_spacy_model(lang_code, log_callback=None):
"""
Get or load a spaCy model for the given language code.
Downloads the model automatically if not available.
Args:
lang_code: Language code (a, b, e, f, etc.)
log_callback: Optional function to log messages
Returns:
Loaded spaCy model or None if unavailable
"""
def log(msg, is_error=False):
# Prefer GUI log callback when provided to avoid spamming stdout.
if log_callback:
color = "red" if is_error else "grey"
try:
log_callback((msg, color))
except Exception:
# Fallback to printing if callback misbehaves
print(msg)
else:
print(msg)
# Check if model is cached
if lang_code in _nlp_cache:
return _nlp_cache[lang_code]
# Check if language is supported
model_name = SPACY_MODELS.get(lang_code)
if not model_name:
log(f"\nspaCy: No model mapping for language '{lang_code}'...")
return None
# Lazy load spaCy
spacy = _load_spacy()
if spacy is None:
log("\nspaCy: Module not installed, falling back to default segmentation...")
return None
# Try to load the model
try:
log(f"\nLoading spaCy model '{model_name}'...")
# sentence segmentation involving parentheses, quotes, and complex structure.
# We only disable heavier components we don't need like NER.
nlp = spacy.load(
model_name,
disable=["ner", "tagger", "lemmatizer", "attribute_ruler"],
)
# Ensure a sentence segmentation strategy is in place
# The parser provides sents, but if it's missing (unlikely for core models), fallback to sentencizer
if "parser" not in nlp.pipe_names and "sentencizer" not in nlp.pipe_names:
nlp.add_pipe("sentencizer")
_nlp_cache[lang_code] = nlp
return nlp
except OSError:
# Model not found, attempt download
log(f"\nspaCy: Downloading model '{model_name}'...")
try:
from spacy.cli import download
download(model_name)
# Retry loading with the same fix
nlp = spacy.load(
model_name,
disable=["ner", "tagger", "lemmatizer", "attribute_ruler"],
)
if "parser" not in nlp.pipe_names and "sentencizer" not in nlp.pipe_names:
nlp.add_pipe("sentencizer")
_nlp_cache[lang_code] = nlp
log(f"spaCy model '{model_name}' downloaded and loaded")
return nlp
except Exception as e:
log(
f"\nspaCy: Failed to download model '{model_name}': {e}...",
is_error=True,
)
return None
except Exception as e:
log(f"\nspaCy: Error loading model '{model_name}': {e}...", is_error=True)
return None
def segment_sentences(text, lang_code, log_callback=None):
"""
Segment text into sentences using spaCy.
Args:
text: Text to segment
lang_code: Language code
log_callback: Optional function to log messages
Returns:
List of sentence strings, or None if spaCy unavailable
"""
nlp = get_spacy_model(lang_code, log_callback)
if nlp is None:
return None
# Ensure spaCy can handle large texts by adjusting max_length if necessary
try:
text_len = len(text or "")
if text_len and hasattr(nlp, "max_length") and text_len > nlp.max_length:
# increase a bit beyond the text length to be safe
nlp.max_length = text_len + 1000
except Exception:
pass
# Process text and extract sentences
doc = nlp(text)
return [sent.text.strip() for sent in doc.sents if sent.text.strip()]
def is_spacy_available():
"""Check if spaCy can be imported."""
return _load_spacy() is not None
def clear_cache():
"""Clear the model cache to free memory."""
global _nlp_cache
_nlp_cache.clear()
from __future__ import annotations
import re
from dataclasses import dataclass, field
from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple
import unicodedata
_DIALOGUE_VERBS = (
"said",
"asked",
"replied",
"whispered",
"shouted",
"cried",
"muttered",
"answered",
"hissed",
"called",
"added",
"continued",
"insisted",
"remarked",
"yelled",
"breathed",
"murmured",
"exclaimed",
"explained",
"noted",
)
_VERB_PATTERN = "(?:" + "|".join(_DIALOGUE_VERBS) + ")"
_NAME_FRAGMENT = r"[A-ZÀ-ÖØ-Þ][\w'’\-]*"
_NAME_PATTERN = rf"{_NAME_FRAGMENT}(?:\s+{_NAME_FRAGMENT})*"
_COLON_PATTERN = re.compile(rf"^\s*({_NAME_PATTERN})\s*:\s*(.+)$")
_NAME_BEFORE_VERB = re.compile(rf"({_NAME_PATTERN})\s+{_VERB_PATTERN}\b", re.IGNORECASE)
_VERB_BEFORE_NAME = re.compile(rf"{_VERB_PATTERN}\s+({_NAME_PATTERN})", re.IGNORECASE)
_PRONOUN_PATTERN = re.compile(r"\b(?:he|she|they)\b", re.IGNORECASE)
_QUOTE_PATTERN = re.compile(r'["“”]([^"“”\\]*(?:\\.[^"“”\\]*)*)["”]')
_MALE_PRONOUN_PATTERN = re.compile(r"\b(?:he|him|his|himself)\b", re.IGNORECASE)
_FEMALE_PRONOUN_PATTERN = re.compile(r"\b(?:she|her|hers|herself)\b", re.IGNORECASE)
_PRONOUN_LABELS = {
"he",
"she",
"they",
"them",
"theirs",
"their",
"themselves",
"him",
"his",
"himself",
"her",
"hers",
"herself",
"we",
"us",
"our",
"ours",
"ourselves",
"i",
"me",
"my",
"mine",
"myself",
"you",
"your",
"yours",
"yourself",
"yourselves",
}
_CONFIDENCE_RANK = {"low": 1, "medium": 2, "high": 3}
_FEMALE_TITLE_HINTS = (
"madame",
"mme",
"madam",
"mrs",
"miss",
"ms",
"lady",
"countess",
"baroness",
"princess",
"queen",
"mademoiselle",
)
_MALE_TITLE_HINTS = (
"monsieur",
"m.",
"mr",
"sir",
"lord",
"count",
"baron",
"prince",
"king",
"abbé",
"abbe",
)
_MALE_TOKEN_WEIGHTS = {
"he": 1.0,
"him": 0.6,
"his": 0.75,
"himself": 1.0,
}
_FEMALE_TOKEN_WEIGHTS = {
"she": 1.0,
"her": 0.4,
"hers": 0.75,
"herself": 1.0,
}
_STOP_LABELS = {
"and",
"but",
"then",
"though",
"meanwhile",
"therefore",
"after",
"before",
"when",
"while",
"because",
"as",
"yet",
"nor",
"so",
"thus",
"suddenly",
"eventually",
"finally",
"until",
"unless",
}
@dataclass(slots=True)
class SpeakerGuess:
speaker_id: str
label: str
count: int = 0
confidence: str = "low"
sample_quotes: List[Dict[str, str]] = field(default_factory=list)
suppressed: bool = False
gender: str = "unknown"
detected_gender: str = "unknown"
male_votes: int = 0
female_votes: int = 0
def register_occurrence(
self,
confidence: str,
text: str,
quote: Optional[str],
male_votes: int,
female_votes: int,
sample_excerpt: Optional[str] = None,
) -> None:
self.count += 1
if _CONFIDENCE_RANK.get(confidence, 0) > _CONFIDENCE_RANK.get(
self.confidence, 0
):
self.confidence = confidence
excerpt = (
sample_excerpt
if sample_excerpt is not None
else _build_excerpt(text, quote)
)
gender_hint = _format_gender_hint(male_votes, female_votes)
if excerpt:
payload = {"excerpt": excerpt, "gender_hint": gender_hint}
if payload not in self.sample_quotes:
self.sample_quotes.append(payload)
if len(self.sample_quotes) > 3:
self.sample_quotes = self.sample_quotes[:3]
if male_votes:
self.male_votes += male_votes
if female_votes:
self.female_votes += female_votes
self.detected_gender = _derive_gender(
self.male_votes, self.female_votes, self.detected_gender
)
if self.gender in {"unknown", "male", "female"}:
self.gender = _derive_gender(
self.male_votes, self.female_votes, self.gender
)
def as_dict(self) -> Dict[str, Any]:
return {
"id": self.speaker_id,
"label": self.label,
"count": self.count,
"confidence": self.confidence,
"sample_quotes": [dict(sample) for sample in self.sample_quotes],
"suppressed": self.suppressed,
"gender": self.gender,
"detected_gender": self.detected_gender,
}
@dataclass(slots=True)
class SpeakerAnalysis:
assignments: Dict[str, str]
speakers: Dict[str, SpeakerGuess]
suppressed: List[str]
narrator: str = "narrator"
version: str = "1.0"
stats: Dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> Dict[str, Any]:
return {
"version": self.version,
"narrator": self.narrator,
"assignments": dict(self.assignments),
"speakers": {
speaker_id: guess.as_dict()
for speaker_id, guess in self.speakers.items()
},
"suppressed": list(self.suppressed),
"stats": dict(self.stats),
}
def analyze_speakers(
chapters: Sequence[Dict[str, Any]] | Iterable[Dict[str, Any]],
chunks: Sequence[Dict[str, Any]] | Iterable[Dict[str, Any]],
*,
threshold: int = 3,
max_speakers: int = 8,
) -> SpeakerAnalysis:
narrator_id = "narrator"
speaker_guesses: Dict[str, SpeakerGuess] = {
narrator_id: SpeakerGuess(
speaker_id=narrator_id, label="Narrator", confidence="low"
)
}
label_index: Dict[str, str] = {"Narrator": narrator_id}
assignments: Dict[str, str] = {}
suppressed: List[str] = []
ordered_chunks = sorted(
(dict(chunk) for chunk in chunks),
key=lambda entry: (
_safe_int(entry.get("chapter_index")),
_safe_int(entry.get("chunk_index")),
),
)
last_explicit: Optional[str] = None
explicit_assignments = 0
unique_speakers: set[str] = set()
for index, chunk in enumerate(ordered_chunks):
chunk_id = str(chunk.get("id") or "")
text = _get_chunk_text(chunk)
speaker_id, confidence, quote = _infer_chunk_speaker(text, last_explicit)
if speaker_id is None:
speaker_id = last_explicit or narrator_id
confidence = "medium" if last_explicit else "low"
quote = quote or _extract_quote(text)
if speaker_id != narrator_id:
last_explicit = speaker_id
explicit_assignments += 1
if speaker_id in speaker_guesses:
record_id = speaker_id
guess = speaker_guesses[record_id]
label = guess.label
else:
label = _normalize_label(speaker_id)
record_id = label_index.get(label)
if record_id is None:
record_id = _dedupe_slug(_slugify(label), speaker_guesses)
label_index[label] = record_id
speaker_guesses[record_id] = SpeakerGuess(
speaker_id=record_id, label=label
)
guess = speaker_guesses[record_id]
assignments[chunk_id] = record_id
unique_speakers.add(record_id)
if (
record_id != narrator_id
and record_id != speaker_id
and speaker_id == last_explicit
):
last_explicit = record_id
sample_excerpt = None
if record_id != narrator_id:
sample_excerpt = _select_sample_excerpt(
ordered_chunks, index, guess.label, quote, confidence
)
male_votes, female_votes = _count_gender_votes(text, guess.label)
guess.register_occurrence(
confidence, text, quote, male_votes, female_votes, sample_excerpt
)
active_speakers = [sid for sid in speaker_guesses if sid != narrator_id]
# Apply minimum occurrence threshold.
for speaker_id in list(active_speakers):
guess = speaker_guesses[speaker_id]
if guess.count < max(1, threshold):
guess.suppressed = True
suppressed.append(speaker_id)
_reassign(assignments, speaker_id, narrator_id)
active_speakers.remove(speaker_id)
# Apply maximum active speaker cap.
if max_speakers and len(active_speakers) > max_speakers:
active_speakers.sort(key=lambda sid: (-speaker_guesses[sid].count, sid))
for speaker_id in active_speakers[max_speakers:]:
guess = speaker_guesses[speaker_id]
guess.suppressed = True
suppressed.append(speaker_id)
_reassign(assignments, speaker_id, narrator_id)
active_speakers = active_speakers[:max_speakers]
narrator_guess = speaker_guesses[narrator_id]
narrator_guess.count = sum(
1 for value in assignments.values() if value == narrator_id
)
narrator_guess.confidence = "low"
stats = {
"total_chunks": len(ordered_chunks),
"explicit_chunks": explicit_assignments,
"active_speakers": len(active_speakers),
"unique_speakers": len(unique_speakers),
"suppressed": len(suppressed),
}
return SpeakerAnalysis(
assignments=assignments,
speakers=speaker_guesses,
suppressed=suppressed,
narrator=narrator_id,
stats=stats,
)
def _infer_chunk_speaker(
text: str, last_explicit: Optional[str]
) -> Tuple[Optional[str], str, Optional[str]]:
normalized = text.strip()
if not normalized:
return None, "low", None
colon_match = _COLON_PATTERN.match(normalized)
if colon_match:
raw_label = colon_match.group(1)
cleaned = _normalize_candidate_name(raw_label)
if cleaned is None:
return None, "low", colon_match.group(2).strip()
quote = colon_match.group(2).strip()
return cleaned, "high", quote
quote = _extract_quote(normalized)
if not quote:
return None, "low", None
before, after = _split_around_quote(normalized, quote)
candidate = _match_name_near_quote(before, after)
if candidate:
cleaned = _normalize_candidate_name(candidate)
if cleaned:
return cleaned, "high", quote
if last_explicit:
pronoun_after = _PRONOUN_PATTERN.search(after)
pronoun_before = _PRONOUN_PATTERN.search(before)
if pronoun_after or pronoun_before:
return last_explicit, "medium", quote
return None, "low", quote
def _split_around_quote(text: str, quote: str) -> Tuple[str, str]:
quote_index = text.find(quote)
if quote_index == -1:
return text, ""
before = text[:quote_index]
after = text[quote_index + len(quote) :]
return before, after
def _match_name_near_quote(before: str, after: str) -> Optional[str]:
trailing = before[-120:]
leading = after[:120]
match = _NAME_BEFORE_VERB.search(trailing)
if match:
name = match.group(1)
if _looks_like_name(name):
return name
match = re.search(
rf"({_NAME_PATTERN})\s*,?\s*{_VERB_PATTERN}", leading, flags=re.IGNORECASE
)
if match:
name = match.group(1)
if _looks_like_name(name):
return name
match = _VERB_BEFORE_NAME.search(leading)
if match:
name = match.group(1)
if _looks_like_name(name):
return name
return None
def _looks_like_name(value: str) -> bool:
normalized = _normalize_candidate_name(value)
if not normalized:
return False
parts = normalized.split()
if not parts:
return False
return all(part and part[0].isupper() for part in parts)
def _extract_quote(text: str) -> Optional[str]:
match = _QUOTE_PATTERN.search(text)
if not match:
return None
return match.group(0)
def _slugify(label: str) -> str:
slug = re.sub(r"[^a-z0-9]+", "_", label.lower()).strip("_")
return slug or "speaker"
def _dedupe_slug(slug: str, existing: Dict[str, SpeakerGuess]) -> str:
candidate = slug
index = 2
while candidate in existing:
candidate = f"{slug}_{index}"
index += 1
return candidate
def _normalize_label(label: str) -> str:
words = re.split(r"\s+", label.strip())
return " ".join(word.capitalize() for word in words if word)
def _safe_int(value: Any, default: int = 0) -> int:
try:
return int(value)
except (TypeError, ValueError):
return default
def _reassign(assignments: Dict[str, str], old: str, new: str) -> None:
for key, value in list(assignments.items()):
if value == old:
assignments[key] = new
def _strip_diacritics(value: str) -> str:
normalized = unicodedata.normalize("NFKD", value)
return "".join(char for char in normalized if not unicodedata.combining(char))
def _count_gender_votes(text: str, label: Optional[str]) -> Tuple[int, int]:
if not text:
return 0, 0
search_text = text
windows: List[Tuple[int, int]] = []
degrade_factor = 1.0
if label:
pattern = re.compile(re.escape(label), re.IGNORECASE)
matches = list(pattern.finditer(search_text))
if not matches:
alt_label = _strip_diacritics(label)
if alt_label and alt_label != label:
ascii_text = _strip_diacritics(search_text)
pattern_alt = re.compile(re.escape(alt_label), re.IGNORECASE)
windows = [match.span() for match in pattern_alt.finditer(ascii_text)]
# Map spans back roughly using proportional index
if windows:
mapped: List[Tuple[int, int]] = []
for start, end in windows:
start_idx = min(
len(search_text) - 1,
int(start * len(search_text) / max(len(ascii_text), 1)),
)
end_idx = min(
len(search_text),
int(end * len(search_text) / max(len(ascii_text), 1)),
)
mapped.append((start_idx, end_idx))
windows = mapped
else:
windows = [match.span() for match in matches]
if not windows:
windows = [(0, len(search_text))]
degrade_factor = 0.25
radius = 60
quote_spans: List[Tuple[int, int, str]] = []
for match in _QUOTE_PATTERN.finditer(search_text):
try:
content_start, content_end = match.span(1)
except IndexError:
content_start, content_end = match.span()
if content_start < content_end:
quote_spans.append(
(content_start, content_end, search_text[content_start:content_end])
)
normalized_label = _normalize_candidate_name(label) if label else None
normalized_label_lower = normalized_label.lower() if normalized_label else None
def _window_weight(position: int) -> float:
for start, end in windows:
if position < start - radius or position > end + radius:
continue
if position >= end:
return 1.0
if position <= start:
return 0.2
return 1.0
return 0.0
def _quote_weight(position: int) -> float:
for start, end, content in quote_spans:
if position < start or position >= end:
continue
local_index = position - start
prefix = content[:local_index]
tail = prefix[-80:]
name_matches = list(re.finditer(_NAME_PATTERN, tail))
if name_matches:
last_name = _normalize_candidate_name(name_matches[-1].group(0))
if (
normalized_label_lower
and last_name
and last_name.lower() == normalized_label_lower
):
return 0.6
return 0.05
if re.search(r"[.!?]\s", prefix):
return 0.2
if prefix.strip():
return 0.15
return 0.1
return 1.0
male_score = 0.0
for match in _MALE_PRONOUN_PATTERN.finditer(search_text):
base_weight = _window_weight(match.start())
if not base_weight:
continue
quote_modifier = _quote_weight(match.start())
weight = base_weight * quote_modifier
if not weight:
continue
token = match.group(0).lower()
male_score += _MALE_TOKEN_WEIGHTS.get(token, 0.6) * weight
female_score = 0.0
for match in _FEMALE_PRONOUN_PATTERN.finditer(search_text):
base_weight = _window_weight(match.start())
if not base_weight:
continue
quote_modifier = _quote_weight(match.start())
weight = base_weight * quote_modifier
if not weight:
continue
if quote_modifier >= 0.95:
weight = max(weight, 0.4)
token = match.group(0).lower()
female_score += _FEMALE_TOKEN_WEIGHTS.get(token, 0.4) * weight
for start, end in windows:
span_start = max(0, start - 40)
span_end = min(len(search_text), end + 40)
span_text = search_text[span_start:span_end].lower()
if any(title in span_text for title in _FEMALE_TITLE_HINTS):
female_score += 2.5
if any(title in span_text for title in _MALE_TITLE_HINTS):
male_score += 2.5
male_votes = int(round(male_score * degrade_factor))
female_votes = int(round(female_score * degrade_factor))
return male_votes, female_votes
def _derive_gender(male_votes: int, female_votes: int, current: str) -> str:
if male_votes == 0 and female_votes == 0:
return current if current != "unknown" else "unknown"
male_threshold = max(2, female_votes + 1)
female_threshold = max(2, male_votes + 1)
if male_votes >= male_threshold:
return "male"
if female_votes >= female_threshold:
return "female"
if current in {"male", "female"}:
return current
return "unknown"
def _get_chunk_text(chunk: Dict[str, Any]) -> str:
if not isinstance(chunk, dict):
return ""
value = chunk.get("normalized_text") or chunk.get("text") or ""
return str(value)
def _trim_paragraph(paragraph: str, limit: int = 600) -> str:
normalized = (paragraph or "").strip()
if not normalized:
return ""
if len(normalized) <= limit:
return normalized
return normalized[: limit - 1].rstrip() + "…"
def _compose_context_excerpt(before: str, current: str, after: str) -> str:
segments = []
for value in (before, current, after):
trimmed = _trim_paragraph(value)
if trimmed:
segments.append(trimmed)
return "\n\n".join(segments)
def _contains_dialogue_attribution(label: str, text: str, quote: Optional[str]) -> bool:
if not label or not text:
return False
escaped_label = re.escape(label)
direct_pattern = re.compile(
rf"\b{escaped_label}\b\s+(?:{_VERB_PATTERN})\b", re.IGNORECASE
)
reverse_pattern = re.compile(
rf"(?:{_VERB_PATTERN})\s+\b{escaped_label}\b", re.IGNORECASE
)
colon_pattern = re.compile(rf"^\s*{escaped_label}\s*:\s*", re.IGNORECASE)
if colon_pattern.search(text):
return True
if direct_pattern.search(text) or reverse_pattern.search(text):
return True
if quote:
before, after = _split_around_quote(text, quote)
if direct_pattern.search(before) or reverse_pattern.search(after):
return True
return False
def _select_sample_excerpt(
chunks: Sequence[Dict[str, Any]],
index: int,
label: str,
quote: Optional[str],
confidence: str,
) -> Optional[str]:
if confidence != "high" or not label:
return None
if index < 0 or index >= len(chunks):
return None
current = _get_chunk_text(chunks[index])
if not current or not _contains_dialogue_attribution(label, current, quote):
return None
previous = _get_chunk_text(chunks[index - 1]) if index > 0 else ""
following = _get_chunk_text(chunks[index + 1]) if index + 1 < len(chunks) else ""
excerpt = _compose_context_excerpt(previous, current, following)
return excerpt or None
def _build_excerpt(text: str, quote: Optional[str]) -> str:
normalized = (text or "").strip()
if not normalized:
return ""
if quote:
location = normalized.find(quote)
if location != -1:
start = max(0, location - 120)
end = min(len(normalized), location + len(quote) + 120)
snippet = normalized[start:end].strip()
if start > 0:
snippet = "…" + snippet
if end < len(normalized):
snippet = snippet + "…"
return snippet
if len(normalized) > 240:
return normalized[:240].rstrip() + "…"
return normalized
def _format_gender_hint(male_votes: int, female_votes: int) -> str:
if male_votes and female_votes:
return "Context mentions both male and female pronouns."
if male_votes:
if male_votes >= 3:
return "Multiple male pronouns detected nearby."
return "Some male pronouns detected in the surrounding text."
if female_votes:
if female_votes >= 3:
return "Multiple female pronouns detected nearby."
return "Some female pronouns detected in the surrounding text."
return "No clear pronoun signal detected."
def _normalize_candidate_name(raw: str) -> Optional[str]:
if not raw:
return None
cleaned = raw.strip().strip("\"“”'’.,:;!")
cleaned = re.sub(r"\s+", " ", cleaned).strip()
if not cleaned:
return None
parts = cleaned.split()
filtered: List[str] = []
for part in parts:
if not part:
continue
if not filtered and part.lower() in _STOP_LABELS:
continue
filtered.append(part)
while filtered and filtered[-1].lower() in _STOP_LABELS:
filtered.pop()
if not filtered:
return None
if all(part.lower() in _STOP_LABELS for part in filtered):
return None
contiguous: List[str] = []
for part in filtered:
if part and part[0].isupper():
contiguous.append(part)
else:
break
if contiguous:
candidate = " ".join(contiguous)
else:
candidate = ""
if not candidate:
return None
lowered = candidate.lower()
if lowered in _PRONOUN_LABELS or lowered in _STOP_LABELS:
return None
return candidate
from __future__ import annotations
import json
import os
from typing import Any, Dict, List, Optional
from abogen.constants import LANGUAGE_DESCRIPTIONS
from abogen.utils import get_user_config_path
_CONFIG_WRAPPER_KEY = "abogen_speaker_configs"
def _config_path() -> str:
config_path = get_user_config_path()
config_dir = os.path.dirname(config_path)
os.makedirs(config_dir, exist_ok=True)
return os.path.join(config_dir, "speaker_configs.json")
def load_configs() -> Dict[str, Dict[str, Any]]:
path = _config_path()
if not os.path.exists(path):
return {}
try:
with open(path, "r", encoding="utf-8") as handle:
payload = json.load(handle)
except Exception:
return {}
if isinstance(payload, dict) and _CONFIG_WRAPPER_KEY in payload:
payload = payload[_CONFIG_WRAPPER_KEY]
if not isinstance(payload, dict):
return {}
sanitized: Dict[str, Dict[str, Any]] = {}
for name, entry in payload.items():
if not isinstance(name, str) or not isinstance(entry, dict):
continue
sanitized[name] = _sanitize_config(entry)
return sanitized
def save_configs(configs: Dict[str, Dict[str, Any]]) -> None:
path = _config_path()
sanitized: Dict[str, Dict[str, Any]] = {}
for name, entry in configs.items():
if not isinstance(name, str) or not name.strip():
continue
sanitized[name] = _sanitize_config(entry)
with open(path, "w", encoding="utf-8") as handle:
json.dump({_CONFIG_WRAPPER_KEY: sanitized}, handle, indent=2, sort_keys=True)
def get_config(name: str) -> Optional[Dict[str, Any]]:
name = (name or "").strip()
if not name:
return None
configs = load_configs()
data = configs.get(name)
return dict(data) if isinstance(data, dict) else None
def upsert_config(name: str, payload: Dict[str, Any]) -> Dict[str, Any]:
name = (name or "").strip()
if not name:
raise ValueError("Configuration name is required")
configs = load_configs()
configs[name] = _sanitize_config(payload or {})
save_configs(configs)
return configs[name]
def delete_config(name: str) -> None:
name = (name or "").strip()
if not name:
return
configs = load_configs()
if name in configs:
del configs[name]
save_configs(configs)
def _sanitize_config(entry: Dict[str, Any]) -> Dict[str, Any]:
language = str(entry.get("language") or "a").strip() or "a"
speakers_raw = entry.get("speakers")
if not isinstance(speakers_raw, dict):
speakers_raw = {}
speakers: Dict[str, Any] = {}
for speaker_id, payload in speakers_raw.items():
if not isinstance(speaker_id, str) or not isinstance(payload, dict):
continue
record = _sanitize_speaker({"id": speaker_id, **payload})
speakers[record["id"]] = record
allowed_languages = entry.get("languages") or entry.get("allowed_languages") or []
if not isinstance(allowed_languages, list):
allowed_languages = []
normalized_langs = []
for code in allowed_languages:
if isinstance(code, str) and code:
normalized_langs.append(code.lower())
default_voice = entry.get("default_voice")
if not isinstance(default_voice, str):
default_voice = ""
return {
"language": language.lower(),
"languages": normalized_langs,
"default_voice": default_voice,
"speakers": speakers,
"version": int(entry.get("version", 1)),
"notes": entry.get("notes") if isinstance(entry.get("notes"), str) else "",
}
def slugify_label(label: str) -> str:
normalized = (label or "").strip().lower()
if not normalized:
return "speaker"
slug = "".join(ch if ch.isalnum() else "_" for ch in normalized)
slug = "_".join(filter(None, slug.split("_")))
return slug or "speaker"
def _sanitize_speaker(entry: Dict[str, Any]) -> Dict[str, Any]:
label = (entry.get("label") or entry.get("name") or "").strip()
gender = (entry.get("gender") or "unknown").strip().lower()
if gender not in {"male", "female", "unknown"}:
gender = "unknown"
voice = entry.get("voice")
voice_profile = entry.get("voice_profile")
voice_formula = entry.get("voice_formula")
voice_languages = entry.get("languages") or []
if not isinstance(voice_languages, list):
voice_languages = []
normalized_langs = []
for code in voice_languages:
if isinstance(code, str) and code:
normalized_langs.append(code.lower())
resolved_voice = entry.get("resolved_voice") or voice_formula or voice
resolved_label = label or entry.get("id") or ""
slug = (
entry.get("id")
if isinstance(entry.get("id"), str)
else slugify_label(resolved_label)
)
return {
"id": slug,
"label": resolved_label,
"gender": gender,
"voice": voice if isinstance(voice, str) else "",
"voice_profile": voice_profile if isinstance(voice_profile, str) else "",
"voice_formula": voice_formula if isinstance(voice_formula, str) else "",
"resolved_voice": resolved_voice if isinstance(resolved_voice, str) else "",
"languages": normalized_langs,
}
def list_configs() -> List[Dict[str, Any]]:
configs = load_configs()
ordered = []
for name in sorted(configs):
entry = configs[name]
ordered.append({"name": name, **entry})
return ordered
def describe_language(code: str) -> str:
code = (code or "a").lower()
return LANGUAGE_DESCRIPTIONS.get(code, code.upper())
import re
import platform
from abogen.utils import detect_encoding, load_config
from abogen.constants import SAMPLE_VOICE_TEXTS
# Pre-compile frequently used regex patterns for better performance
_METADATA_TAG_PATTERN = re.compile(r"<<METADATA_[^:]+:[^>]*>>")
_WHITESPACE_PATTERN = re.compile(r"[^\S\n]+")
_MULTIPLE_NEWLINES_PATTERN = re.compile(r"\n{3,}")
_SINGLE_NEWLINE_PATTERN = re.compile(r"(?<!\n)\n(?!\n)")
_CHAPTER_MARKER_PATTERN = re.compile(r"<<CHAPTER_MARKER:[^>]*>>")
_HTML_TAG_PATTERN = re.compile(r"<[^>]+>")
_VOICE_TAG_PATTERN = re.compile(r"{[^}]+}")
_ASS_STYLING_PATTERN = re.compile(r"\{[^}]+\}")
_ASS_NEWLINE_N_PATTERN = re.compile(r"\\N")
_ASS_NEWLINE_LOWER_N_PATTERN = re.compile(r"\\n")
_CHAPTER_MARKER_SEARCH_PATTERN = re.compile(r"<<CHAPTER_MARKER:(.*?)>>")
_WEBVTT_HEADER_PATTERN = re.compile(r"^WEBVTT.*?\n", re.MULTILINE)
_VTT_STYLE_PATTERN = re.compile(r"STYLE\s*\n.*?(?=\n\n|$)", re.DOTALL)
_VTT_NOTE_PATTERN = re.compile(r"NOTE\s*\n.*?(?=\n\n|$)", re.DOTALL)
_DOUBLE_NEWLINE_SPLIT_PATTERN = re.compile(r"\n\s*\n")
_VTT_TIMESTAMP_PATTERN = re.compile(r"([\d:.]+)\s*-->\s*([\d:.]+)")
_TIMESTAMP_ONLY_PATTERN = re.compile(r"^(\d{1,2}:\d{2}:\d{2}(?:[.,]\d{1,3})?)$")
_WINDOWS_ILLEGAL_CHARS_PATTERN = re.compile(r'[<>:"/\\|?*]')
_CONTROL_CHARS_PATTERN = re.compile(r"[\x00-\x1f]")
_LINUX_CONTROL_CHARS_PATTERN = re.compile(
r"[\x01-\x1f]"
) # Linux: exclude \x00 for separate handling
_MACOS_ILLEGAL_CHARS_PATTERN = re.compile(r"[:]")
_LINUX_ILLEGAL_CHARS_PATTERN = re.compile(r"[/\x00]")
def clean_subtitle_text(text):
"""Remove chapter markers and metadata tags from subtitle text."""
# Use pre-compiled patterns for better performance
text = _METADATA_TAG_PATTERN.sub("", text)
text = _CHAPTER_MARKER_PATTERN.sub("", text)
return text.strip()
def calculate_text_length(text):
# Use pre-compiled patterns for better performance
# Ignore chapter markers and metadata patterns in a single pass
text = _CHAPTER_MARKER_PATTERN.sub("", text)
text = _METADATA_TAG_PATTERN.sub("", text)
# Ignore newlines and leading/trailing spaces
text = text.replace("\n", "").strip()
# Calculate character count
char_count = len(text)
return char_count
def clean_text(text, *args, **kwargs):
# Remove metadata tags first
text = _METADATA_TAG_PATTERN.sub("", text)
# Load replace_single_newlines from config
cfg = load_config()
replace_single_newlines = cfg.get("replace_single_newlines", True)
# Collapse all whitespace (excluding newlines) into single spaces per line and trim edges
# Use pre-compiled pattern for better performance
lines = [_WHITESPACE_PATTERN.sub(" ", line).strip() for line in text.splitlines()]
text = "\n".join(lines)
# Standardize paragraph breaks (multiple newlines become exactly two) and trim overall whitespace
# Use pre-compiled pattern for better performance
text = _MULTIPLE_NEWLINES_PATTERN.sub("\n\n", text).strip()
# Optionally replace single newlines with spaces, but preserve double newlines
if replace_single_newlines:
# Use pre-compiled pattern for better performance
text = _SINGLE_NEWLINE_PATTERN.sub(" ", text)
return text
def parse_srt_file(file_path):
"""
Parse an SRT subtitle file and return a list of subtitle entries.
Args:
file_path: Path to the SRT file
Returns:
List of tuples: [(start_time_seconds, end_time_seconds, text), ...]
"""
encoding = detect_encoding(file_path)
with open(file_path, "r", encoding=encoding, errors="replace") as f:
content = f.read()
# Split by double newlines to get individual subtitle blocks
blocks = re.split(r"\n\s*\n", content.strip())
subtitles = []
for block in blocks:
if not block.strip():
continue
lines = block.strip().split("\n")
if len(lines) < 3:
continue
# First line is index, second line is timestamp, rest is text
try:
timestamp_line = lines[1]
match = re.match(
r"(\d{2}:\d{2}:\d{2},\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2},\d{3})",
timestamp_line,
)
if not match:
continue
start_str = match.group(1)
end_str = match.group(2)
text = "\n".join(lines[2:])
# Convert timestamp to seconds
def time_to_seconds(t):
h, m, s_ms = t.split(":")
s, ms = s_ms.split(",")
return int(h) * 3600 + int(m) * 60 + int(s) + int(ms) / 1000.0
start_sec = time_to_seconds(start_str)
end_sec = time_to_seconds(end_str)
# Clean text of any styling tags using pre-compiled pattern
text = _HTML_TAG_PATTERN.sub("", text)
# Remove chapter markers and metadata tags
text = clean_subtitle_text(text)
if text: # Only add non-empty subtitles
subtitles.append((start_sec, end_sec, text))
except (ValueError, IndexError):
continue
return subtitles
def parse_vtt_file(file_path):
"""
Parse a VTT (WebVTT) subtitle file and return a list of subtitle entries.
Args:
file_path: Path to the VTT file
Returns:
List of tuples: [(start_time_seconds, end_time_seconds, text), ...]
"""
encoding = detect_encoding(file_path)
with open(file_path, "r", encoding=encoding, errors="replace") as f:
content = f.read()
# Remove WEBVTT header and any style/note blocks using pre-compiled patterns
content = _WEBVTT_HEADER_PATTERN.sub("", content)
content = _VTT_STYLE_PATTERN.sub("", content)
content = _VTT_NOTE_PATTERN.sub("", content)
# Split by double newlines to get individual subtitle blocks using pre-compiled pattern
blocks = _DOUBLE_NEWLINE_SPLIT_PATTERN.split(content.strip())
subtitles = []
for block in blocks:
if not block.strip():
continue
lines = block.strip().split("\n")
if len(lines) < 2:
continue
# VTT can have optional identifier on first line, timestamp on second or first
timestamp_line = None
text_start_idx = 0
# Check if first line is timestamp
if "-->" in lines[0]:
timestamp_line = lines[0]
text_start_idx = 1
elif len(lines) > 1 and "-->" in lines[1]:
timestamp_line = lines[1]
text_start_idx = 2
else:
continue
try:
# VTT format: 00:00:00.000 --> 00:00:05.000 or 00:00.000 --> 00:05.000
# Use pre-compiled pattern
match = _VTT_TIMESTAMP_PATTERN.match(timestamp_line)
if not match:
continue
start_str = match.group(1)
end_str = match.group(2)
text = "\n".join(lines[text_start_idx:])
# Convert timestamp to seconds
def time_to_seconds(t):
parts = t.split(":")
if len(parts) == 3: # HH:MM:SS.mmm
h, m, s = parts
s, ms = s.split(".")
return int(h) * 3600 + int(m) * 60 + int(s) + int(ms) / 1000.0
elif len(parts) == 2: # MM:SS.mmm
m, s = parts
s, ms = s.split(".")
return int(m) * 60 + int(s) + int(ms) / 1000.0
return 0
start_sec = time_to_seconds(start_str)
end_sec = time_to_seconds(end_str)
# Clean text of any styling tags and cue settings using pre-compiled patterns
text = _HTML_TAG_PATTERN.sub("", text)
text = _VOICE_TAG_PATTERN.sub("", text) # Remove voice tags
# Remove chapter markers and metadata tags
text = clean_subtitle_text(text)
if text: # Only add non-empty subtitles
subtitles.append((start_sec, end_sec, text))
except (ValueError, IndexError, AttributeError):
continue
return subtitles
def detect_timestamps_in_text(file_path):
"""Detect if text file contains timestamp markers (HH:MM:SS or HH:MM:SS,ms format) on separate lines."""
try:
encoding = detect_encoding(file_path)
with open(file_path, "r", encoding=encoding, errors="replace") as f:
lines = [
line.strip() for line in f.readlines()[:50] if line.strip()
] # Check first 50 non-empty lines
# Count lines that are ONLY timestamps (no other text)
# Supports HH:MM:SS or HH:MM:SS,ms format
# Use pre-compiled pattern for better performance
timestamp_lines = sum(
1 for line in lines if _TIMESTAMP_ONLY_PATTERN.match(line)
)
# Must have at least 2 timestamp-only lines and they should be >5% of total lines
return timestamp_lines >= 2 and (timestamp_lines / max(len(lines), 1)) > 0.05
except Exception:
return False
def parse_timestamp_text_file(file_path):
"""Parse text file with timestamps. Returns list of (start_time, end_time, text) tuples.
Supports HH:MM:SS or HH:MM:SS,ms format. Returns time in seconds as float."""
encoding = detect_encoding(file_path)
with open(file_path, "r", encoding=encoding, errors="replace") as f:
content = f.read()
# Split by timestamp pattern (supports HH:MM:SS or HH:MM:SS,ms)
pattern = r"^(\d{1,2}:\d{2}:\d{2}(?:[.,]\d{1,3})?)$"
lines = content.split("\n")
def parse_time(time_str):
"""Convert HH:MM:SS or HH:MM:SS,ms to seconds as float."""
time_str = time_str.replace(",", ".")
parts = time_str.split(":")
return float(int(parts[0]) * 3600 + int(parts[1]) * 60 + float(parts[2]))
entries = []
current_time = None
current_text = []
pre_timestamp_text = [] # Text before first timestamp
for line in lines:
match = re.match(pattern, line.strip())
if match:
# Save previous entry
if current_time is not None and current_text:
text = "\n".join(current_text).strip()
if text:
entries.append((current_time, text))
elif current_time is None and pre_timestamp_text:
# First timestamp found, save pre-timestamp text with time 0
text = "\n".join(pre_timestamp_text).strip()
if text:
entries.append((0.0, text))
pre_timestamp_text = []
# Start new entry
time_str = match.group(1)
current_time = parse_time(time_str)
current_text = []
elif current_time is not None:
current_text.append(line)
else:
# Text before first timestamp
pre_timestamp_text.append(line)
# Save last entry
if current_time is not None and current_text:
text = "\n".join(current_text).strip()
if text:
entries.append((current_time, text))
elif not entries and pre_timestamp_text:
# No timestamps found at all, treat entire file as starting at 0
text = "\n".join(pre_timestamp_text).strip()
if text:
entries.append((0.0, text))
# Convert to subtitle format with end times
subtitles = []
for i, (start_time, text) in enumerate(entries):
end_time = entries[i + 1][0] if i + 1 < len(entries) else None
# Remove chapter markers and metadata tags
text = clean_subtitle_text(text)
if text: # Only add non-empty entries
subtitles.append((start_time, end_time, text))
return subtitles
def parse_ass_file(file_path):
"""
Parse an ASS/SSA subtitle file and return a list of subtitle entries.
Args:
file_path: Path to the ASS/SSA file
Returns:
List of tuples: [(start_time_seconds, end_time_seconds, text), ...]
"""
encoding = detect_encoding(file_path)
with open(file_path, "r", encoding=encoding, errors="replace") as f:
lines = f.readlines()
subtitles = []
in_events = False
format_indices = {}
for line in lines:
line = line.strip()
if line.startswith("[Events]"):
in_events = True
continue
if line.startswith("[") and in_events:
# New section, stop processing
break
if in_events and line.startswith("Format:"):
# Parse format line to know column positions
parts = line.split(":", 1)[1].strip().split(",")
for i, part in enumerate(parts):
format_indices[part.strip().lower()] = i
continue
if in_events and (line.startswith("Dialogue:") or line.startswith("Comment:")):
if line.startswith("Comment:"):
continue # Skip comments
parts = line.split(":", 1)[1].strip().split(",", len(format_indices) - 1)
if (
"start" in format_indices
and "end" in format_indices
and "text" in format_indices
):
start_str = parts[format_indices["start"]].strip()
end_str = parts[format_indices["end"]].strip()
text = parts[format_indices["text"]].strip()
# Convert timestamp to seconds (ASS format: H:MM:SS.CS where CS is centiseconds)
def ass_time_to_seconds(t):
parts = t.split(":")
if len(parts) == 3:
h, m, s = parts
s_parts = s.split(".")
seconds = float(s_parts[0])
centiseconds = float(s_parts[1]) if len(s_parts) > 1 else 0
return (
int(h) * 3600 + int(m) * 60 + seconds + centiseconds / 100.0
)
return 0
start_sec = ass_time_to_seconds(start_str)
end_sec = ass_time_to_seconds(end_str)
# Clean text of ASS styling tags using pre-compiled patterns
text = _ASS_STYLING_PATTERN.sub("", text) # Remove {tags}
text = _ASS_NEWLINE_N_PATTERN.sub("\n", text) # Convert \N to newline
text = _ASS_NEWLINE_LOWER_N_PATTERN.sub(
"\n", text
) # Convert \n to newline
# Remove chapter markers and metadata tags
text = clean_subtitle_text(text)
if text: # Only add non-empty subtitles
subtitles.append((start_sec, end_sec, text))
return subtitles
def get_sample_voice_text(lang_code):
return SAMPLE_VOICE_TEXTS.get(lang_code, SAMPLE_VOICE_TEXTS["a"])
def sanitize_name_for_os(name, is_folder=True):
"""
Sanitize a filename or folder name based on the operating system.
Args:
name: The name to sanitize
is_folder: Whether this is a folder name (default: True)
Returns:
Sanitized name safe for the current OS
"""
if not name:
return "audiobook"
system = platform.system()
if system == "Windows":
# Windows illegal characters: < > : " / \ | ? *
# Also can't end with space or dot
# Use pre-compiled pattern for better performance
sanitized = _WINDOWS_ILLEGAL_CHARS_PATTERN.sub("_", name)
# Remove control characters (0-31)
sanitized = _CONTROL_CHARS_PATTERN.sub("_", sanitized)
# Remove trailing spaces and dots
sanitized = sanitized.rstrip(". ")
# Windows reserved names (CON, PRN, AUX, NUL, COM1-9, LPT1-9)
reserved = (
["CON", "PRN", "AUX", "NUL"]
+ [f"COM{i}" for i in range(1, 10)]
+ [f"LPT{i}" for i in range(1, 10)]
)
if sanitized.upper() in reserved or sanitized.upper().split(".")[0] in reserved:
sanitized = f"_{sanitized}"
elif system == "Darwin": # macOS
# macOS illegal characters: : (colon is converted to / by the system)
# Also can't start with dot (hidden file) for folders typically
# Use pre-compiled pattern for better performance
sanitized = _MACOS_ILLEGAL_CHARS_PATTERN.sub("_", name)
# Remove control characters
sanitized = _CONTROL_CHARS_PATTERN.sub("_", sanitized)
# Avoid leading dot for folders (creates hidden folders)
if is_folder and sanitized.startswith("."):
sanitized = "_" + sanitized[1:]
else: # Linux and others
# Linux illegal characters: / and null character
# Though / is illegal, most other chars are technically allowed
# Use pre-compiled pattern for better performance
sanitized = _LINUX_ILLEGAL_CHARS_PATTERN.sub("_", name)
# Remove other control characters for safety (excluding \x00 which is already handled)
sanitized = _LINUX_CONTROL_CHARS_PATTERN.sub("_", sanitized)
# Avoid leading dot for folders (creates hidden folders)
if is_folder and sanitized.startswith("."):
sanitized = "_" + sanitized[1:]
# Ensure the name is not empty after sanitization
if not sanitized or sanitized.strip() == "":
sanitized = "audiobook"
# Limit length to 255 characters (common limit across filesystems)
if len(sanitized) > 255:
sanitized = sanitized[:255].rstrip(". ")
return sanitized
from __future__ import annotations
import datetime
import logging
import mimetypes
import re
import textwrap
import urllib.parse
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Tuple, cast
import ebooklib # type: ignore[import]
import fitz # type: ignore[import]
import markdown # type: ignore[import]
from bs4 import BeautifulSoup, NavigableString # type: ignore[import]
from ebooklib import epub # type: ignore[import]
from .utils import calculate_text_length, clean_text, detect_encoding
logger = logging.getLogger(__name__)
METADATA_PATTERN = re.compile(r"<<METADATA_([A-Z_]+):(.*?)>>", re.DOTALL)
CHAPTER_PATTERN = re.compile(r"<<CHAPTER_MARKER:(.*?)>>", re.IGNORECASE)
METADATA_KEY_MAP: Dict[str, str] = {
"TITLE": "title",
"ARTIST": "artist",
"ALBUM": "album",
"YEAR": "year",
"ALBUM_ARTIST": "album_artist",
"ALBUMARTIST": "album_artist",
"COMPOSER": "composer",
"GENRE": "genre",
"DATE": "date",
"PUBLISHER": "publisher",
"COMMENT": "comment",
"LANGUAGE": "language",
}
@dataclass
class ExtractedChapter:
title: str
text: str
@property
def characters(self) -> int:
return calculate_text_length(self.text)
@dataclass
class ExtractionResult:
chapters: List[ExtractedChapter]
metadata: Dict[str, str] = field(default_factory=dict)
cover_image: Optional[bytes] = None
cover_mime: Optional[str] = None
@property
def combined_text(self) -> str:
return "\n\n".join(chapter.text for chapter in self.chapters)
@property
def total_characters(self) -> int:
return sum(chapter.characters for chapter in self.chapters)
@dataclass
class MetadataSource:
title: Optional[str] = None
authors: List[str] = field(default_factory=list)
description: Optional[str] = None
publisher: Optional[str] = None
publication_year: Optional[str] = None
language: Optional[str] = None
series: Optional[str] = None
series_index: Optional[str] = None
@dataclass
class NavEntry:
src: str
title: str
doc_href: str
position: int
doc_order: int
def extract_from_path(path: Path) -> ExtractionResult:
suffix = path.suffix.lower()
if suffix == ".txt":
return _extract_plaintext(path)
if suffix == ".pdf":
return _extract_pdf(path)
if suffix in {".md", ".markdown"}:
return _extract_markdown(path)
if suffix == ".epub":
return _extract_epub(path)
raise ValueError(f"Unsupported input type: {suffix}")
def _extract_plaintext(path: Path) -> ExtractionResult:
encoding = detect_encoding(str(path))
raw = path.read_text(encoding=encoding, errors="replace")
return _extract_from_string(raw, default_title=path.stem)
def _extract_from_string(raw: str, default_title: str) -> ExtractionResult:
raw_metadata, body = _strip_metadata(raw)
chapters = _split_chapters(body, default_title)
normalized_tags = _normalize_metadata_keys(raw_metadata)
chapter_count = len(chapters)
artist_value = normalized_tags.get("artist")
authors = (
[name.strip() for name in artist_value.split(",") if name.strip()]
if artist_value
else []
)
metadata_source = MetadataSource(
title=normalized_tags.get("title") or default_title,
authors=authors,
publication_year=normalized_tags.get("year"),
)
metadata = _build_metadata_payload(
metadata_source, chapter_count, "text", default_title
)
metadata.update(normalized_tags)
if not chapters:
chapters = [ExtractedChapter(title=default_title, text="")]
return ExtractionResult(chapters=chapters, metadata=metadata)
def _strip_metadata(content: str) -> Tuple[Dict[str, str], str]:
metadata: Dict[str, str] = {}
def _replacer(match: re.Match) -> str:
key = match.group(1).strip().upper()
value = match.group(2).strip()
if value:
metadata[key] = value
return ""
stripped = METADATA_PATTERN.sub(_replacer, content)
return metadata, stripped
def _split_chapters(content: str, default_title: str) -> List[ExtractedChapter]:
matches = list(CHAPTER_PATTERN.finditer(content))
if not matches:
cleaned = clean_text(content)
return [ExtractedChapter(title=default_title, text=cleaned)]
chapters: List[ExtractedChapter] = []
last_index = 0
current_title = default_title
for match in matches:
segment = content[last_index : match.start()]
if segment.strip():
chapters.append(
ExtractedChapter(title=current_title, text=clean_text(segment))
)
current_title = match.group(1).strip() or default_title
last_index = match.end()
tail = content[last_index:]
if tail.strip():
chapters.append(ExtractedChapter(title=current_title, text=clean_text(tail)))
return chapters
def _normalize_metadata_keys(metadata: Dict[str, str]) -> Dict[str, str]:
normalized: Dict[str, str] = {}
for key, value in metadata.items():
if not value:
continue
mapped = METADATA_KEY_MAP.get(key.upper(), key.lower())
normalized[mapped] = value
return normalized
def _build_metadata_payload(
metadata_source: MetadataSource,
chapter_count: int,
file_type: str,
default_title: str,
) -> Dict[str, str]:
now_year = str(datetime.datetime.now().year)
title = metadata_source.title.strip() if metadata_source.title else default_title
if not title:
title = default_title
authors = [author for author in metadata_source.authors if author.strip()]
if not authors:
authors = ["Unknown"]
authors_text = ", ".join(authors)
if chapter_count <= 0:
chapter_count = 1
chapter_label = "Chapters" if file_type in {"epub", "markdown"} else "Pages"
metadata = {
"TITLE": title,
"ARTIST": authors_text,
"ALBUM": title,
"YEAR": metadata_source.publication_year or now_year,
"ALBUM_ARTIST": authors_text,
"COMPOSER": authors_text,
"GENRE": "Audiobook",
"CHAPTER_COUNT": str(chapter_count),
}
if metadata_source.publisher:
metadata["PUBLISHER"] = metadata_source.publisher
if metadata_source.description:
metadata["COMMENT"] = metadata_source.description
if metadata_source.language:
metadata["LANGUAGE"] = metadata_source.language
normalized = _normalize_metadata_keys(metadata)
# Ensure chapter_count survives normalization even if upstream metadata provided it
normalized.setdefault("chapter_count", str(chapter_count))
return normalized
def _extract_pdf(path: Path) -> ExtractionResult:
metadata_source = MetadataSource()
chapters: List[ExtractedChapter] = []
with fitz.open(str(path)) as document:
metadata_source = _collect_pdf_metadata(document)
pages = cast(Iterable[fitz.Page], document)
for index, page in enumerate(pages):
page_obj = cast(Any, page)
text = _clean_pdf_text(page_obj.get_text())
if not text:
continue
title = f"Page {index + 1}"
chapters.append(ExtractedChapter(title=title, text=text))
if not chapters:
chapters.append(ExtractedChapter(title=path.stem, text=""))
metadata = _build_metadata_payload(metadata_source, len(chapters), "pdf", path.stem)
return ExtractionResult(chapters=chapters, metadata=metadata)
def _collect_pdf_metadata(document: fitz.Document) -> MetadataSource:
metadata = MetadataSource()
info = document.metadata or {}
if info.get("title"):
metadata.title = info["title"]
if info.get("author"):
metadata.authors = [info["author"]]
if info.get("subject"):
metadata.description = info["subject"]
if info.get("keywords"):
keywords = info["keywords"]
if metadata.description:
metadata.description = f"{metadata.description}\n\nKeywords: {keywords}"
else:
metadata.description = f"Keywords: {keywords}"
if info.get("creator"):
metadata.publisher = info["creator"]
for key in ("creationDate", "modDate"):
value = info.get(key)
if not value:
continue
match = re.search(r"D:(\d{4})", value)
if match:
metadata.publication_year = match.group(1)
break
return metadata
def _clean_pdf_text(text: str) -> str:
cleaned = clean_text(text)
cleaned = re.sub(r"\[\s*\d+\s*\]", "", cleaned)
cleaned = re.sub(r"^\s*\d+\s*$", "", cleaned, flags=re.MULTILINE)
cleaned = re.sub(r"\s+\d+\s*$", "", cleaned, flags=re.MULTILINE)
cleaned = re.sub(r"\s+[-–—]\s*\d+\s*[-–—]?\s*$", "", cleaned, flags=re.MULTILINE)
return cleaned.strip()
def _extract_markdown(path: Path) -> ExtractionResult:
encoding = detect_encoding(str(path))
raw = path.read_text(encoding=encoding, errors="replace")
metadata_source, chapters = _parse_markdown(raw, path.stem)
if not chapters:
chapters = [
ExtractedChapter(
title=metadata_source.title or path.stem, text=clean_text(raw)
)
]
metadata = _build_metadata_payload(
metadata_source, len(chapters), "markdown", path.stem
)
return ExtractionResult(chapters=chapters, metadata=metadata)
def _parse_markdown(
raw: str, default_title: str
) -> Tuple[MetadataSource, List[ExtractedChapter]]:
metadata = MetadataSource()
text = textwrap.dedent(raw)
frontmatter_match = re.match(r"^---\s*\n(.*?)\n---\s*\n", text, re.DOTALL)
if frontmatter_match:
frontmatter = frontmatter_match.group(1)
_parse_markdown_frontmatter(frontmatter, metadata)
text_body = text[frontmatter_match.end() :]
else:
text_body = text
md = markdown.Markdown(extensions=["toc", "fenced_code"])
html = md.convert(text_body)
toc_tokens = getattr(md, "toc_tokens", None) or []
if not toc_tokens:
cleaned = clean_text(text_body)
title = metadata.title or default_title
chapters = [ExtractedChapter(title=title, text=cleaned)] if cleaned else []
return metadata, chapters
headers: List[dict] = []
def _flatten_tokens(tokens):
for token in tokens:
headers.append(token)
if token.get("children"):
_flatten_tokens(token["children"])
_flatten_tokens(toc_tokens)
header_positions: List[Tuple[str, int, str]] = []
for header in headers:
header_id = header.get("id")
if not header_id:
continue
id_pattern = f'id="{header_id}"'
pos = html.find(id_pattern)
if pos == -1:
continue
tag_start = html.rfind("<", 0, pos)
name = str(header.get("name", header_id))
header_positions.append((header_id, tag_start, name))
header_positions.sort(key=lambda item: item[1])
chapters: List[ExtractedChapter] = []
for index, (header_id, start, name) in enumerate(header_positions):
end = (
header_positions[index + 1][1]
if index + 1 < len(header_positions)
else len(html)
)
section_html = html[start:end]
section_soup = BeautifulSoup(section_html, "html.parser")
header_tag = section_soup.find(attrs={"id": header_id})
if header_tag:
header_tag.decompose()
section_text = clean_text(section_soup.get_text()).strip()
if not section_text:
continue
chapters.append(ExtractedChapter(title=name.strip(), text=section_text))
if not metadata.title:
first_h1 = next(
(
header
for header in headers
if header.get("level") == 1 and header.get("name")
),
None,
)
if first_h1:
metadata.title = str(first_h1["name"])
return metadata, chapters
def _parse_markdown_frontmatter(frontmatter: str, metadata: MetadataSource) -> None:
title_match = re.search(
r"^title:\s*(.+)$", frontmatter, re.MULTILINE | re.IGNORECASE
)
if title_match:
metadata.title = title_match.group(1).strip().strip("\"'")
author_match = re.search(
r"^author:\s*(.+)$", frontmatter, re.MULTILINE | re.IGNORECASE
)
if author_match:
metadata.authors = [author_match.group(1).strip().strip("\"'")]
desc_match = re.search(
r"^description:\s*(.+)$", frontmatter, re.MULTILINE | re.IGNORECASE
)
if desc_match:
metadata.description = desc_match.group(1).strip().strip("\"'")
date_match = re.search(r"^date:\s*(.+)$", frontmatter, re.MULTILINE | re.IGNORECASE)
if date_match:
date_str = date_match.group(1).strip().strip("\"'")
year_match = re.search(r"\b(19|20)\d{2}\b", date_str)
if year_match:
metadata.publication_year = year_match.group(0)
def _extract_epub(path: Path) -> ExtractionResult:
extractor = EpubExtractor(path)
return extractor.extract()
class EpubExtractor:
def __init__(self, path: Path) -> None:
self.path = path
self.book = epub.read_epub(str(path))
self.doc_content: Dict[str, str] = {}
self.spine_docs: List[str] = []
def extract(self) -> ExtractionResult:
metadata_source = self._collect_metadata()
try:
chapters = self._process_nav()
except Exception as exc:
logger.warning(
"EPUB navigation processing failed for %s: %s. Falling back to spine order.",
self.path.name,
exc,
exc_info=True,
)
chapters = self._process_spine_fallback()
if not chapters:
chapters = [ExtractedChapter(title=self.path.stem, text="")]
metadata = _build_metadata_payload(
metadata_source, len(chapters), "epub", self.path.stem
)
metadata.setdefault("chapter_count", str(len(chapters)))
if metadata_source.series:
series_text = str(metadata_source.series).strip()
if series_text:
metadata.setdefault("series", series_text)
metadata.setdefault("series_name", series_text)
metadata.setdefault("seriesname", series_text)
if metadata_source.series_index:
idx_text = str(metadata_source.series_index).strip()
if idx_text:
metadata.setdefault("series_index", idx_text)
metadata.setdefault("series_sequence", idx_text)
metadata.setdefault("book_number", idx_text)
cover_image, cover_mime = self._extract_cover()
return ExtractionResult(
chapters=chapters,
metadata=metadata,
cover_image=cover_image,
cover_mime=cover_mime,
)
def _collect_metadata(self) -> MetadataSource:
metadata = MetadataSource()
try:
title_items = self.book.get_metadata("DC", "title")
if title_items:
metadata.title = title_items[0][0]
except Exception as exc:
logger.debug("Failed to extract EPUB title metadata: %s", exc)
try:
author_items = self.book.get_metadata("DC", "creator")
if author_items:
metadata.authors = [
author[0] for author in author_items if author and author[0]
]
except Exception as exc:
logger.debug("Failed to extract EPUB author metadata: %s", exc)
try:
desc_items = self.book.get_metadata("DC", "description")
if desc_items:
metadata.description = desc_items[0][0]
except Exception as exc:
logger.debug("Failed to extract EPUB description metadata: %s", exc)
try:
publisher_items = self.book.get_metadata("DC", "publisher")
if publisher_items:
metadata.publisher = publisher_items[0][0]
except Exception as exc:
logger.debug("Failed to extract EPUB publisher metadata: %s", exc)
try:
date_items = self.book.get_metadata("DC", "date")
if date_items:
date_str = date_items[0][0]
year_match = re.search(r"\b(19|20)\d{2}\b", date_str)
metadata.publication_year = (
year_match.group(0) if year_match else date_str
)
except Exception as exc:
logger.debug("Failed to extract EPUB publication year metadata: %s", exc)
try:
language_items = self.book.get_metadata("DC", "language")
if language_items:
metadata.language = language_items[0][0]
except Exception as exc:
logger.debug("Failed to extract EPUB language metadata: %s", exc)
# Series metadata (best-effort). Common sources:
# - Calibre embeds OPF meta tags: <meta name="calibre:series" content="..." />
# - EPUB3 collections via: <meta property="belongs-to-collection">...</meta>
try:
meta_items = self.book.get_metadata("OPF", "meta")
except Exception as exc:
logger.debug("Failed to extract EPUB OPF meta tags: %s", exc)
meta_items = []
series_name: Optional[str] = None
series_index: Optional[str] = None
for value, attrs in meta_items or []:
attrs_dict = attrs or {}
name = str(attrs_dict.get("name") or "").strip().casefold()
prop = str(attrs_dict.get("property") or "").strip().casefold()
content = attrs_dict.get("content")
candidate = content if content is not None else value
candidate_text = str(candidate or "").strip()
if not candidate_text:
continue
if name in {"calibre:series", "series"} and series_name is None:
series_name = candidate_text
continue
if (
name
in {
"calibre:series_index",
"calibre:seriesindex",
"series_index",
"seriesindex",
}
and series_index is None
):
series_index = candidate_text
continue
if prop.endswith("belongs-to-collection") and series_name is None:
series_name = candidate_text
continue
metadata.series = series_name
metadata.series_index = series_index
return metadata
def _extract_cover(self) -> Tuple[Optional[bytes], Optional[str]]:
try:
for item in self.book.get_items_of_type(ebooklib.ITEM_COVER):
data = item.get_content()
if data:
media_type = getattr(item, "media_type", None)
return data, media_type
except Exception as exc:
logger.debug("Failed to read dedicated EPUB cover image: %s", exc)
try:
for item in self.book.get_items_of_type(ebooklib.ITEM_IMAGE):
name = item.get_name().lower()
if "cover" not in name and "front" not in name:
continue
data = item.get_content()
if not data:
continue
media_type = getattr(item, "media_type", None)
if not media_type:
media_type = mimetypes.guess_type(name)[0]
return data, media_type
except Exception as exc:
logger.debug("Failed to locate fallback EPUB cover image: %s", exc)
return None, None
def _process_nav(self) -> List[ExtractedChapter]:
nav_item, nav_type = self._find_navigation_item()
if not nav_item or not nav_type:
raise ValueError("No navigation document found")
parser_type = "html.parser" if nav_type == "html" else "xml"
nav_content = nav_item.get_content().decode("utf-8", errors="ignore")
nav_soup = BeautifulSoup(nav_content, parser_type)
self.spine_docs = self._build_spine_docs()
doc_order = {href: index for index, href in enumerate(self.spine_docs)}
doc_order_decoded = {
urllib.parse.unquote(href): index for href, index in doc_order.items()
}
nav_targets = self._collect_nav_targets(nav_soup, nav_type)
self._cache_relevant_documents(doc_order, nav_targets)
ordered_entries: List[NavEntry] = []
if nav_type == "ncx":
nav_map = nav_soup.find("navMap")
if not nav_map:
raise ValueError("NCX navigation missing <navMap>")
for nav_point in nav_map.find_all("navPoint", recursive=False):
self._parse_ncx_navpoint(
nav_point, ordered_entries, doc_order, doc_order_decoded
)
else:
toc_nav = nav_soup.find("nav", attrs={"epub:type": "toc"})
if toc_nav is None:
for nav in nav_soup.find_all("nav"):
if nav.find("ol"):
toc_nav = nav
break
if toc_nav is None:
raise ValueError("NAV HTML missing TOC structure")
top_ol = toc_nav.find("ol", recursive=False)
if top_ol is None:
raise ValueError("TOC navigation missing <ol>")
for li in top_ol.find_all("li", recursive=False):
self._parse_html_nav_li(
li, ordered_entries, doc_order, doc_order_decoded
)
if not ordered_entries:
raise ValueError("No navigation entries found")
ordered_entries.sort(key=lambda entry: (entry.doc_order, entry.position))
chapters = self._slice_entries(ordered_entries)
self._append_prefix_content(ordered_entries, chapters)
return chapters
def _process_spine_fallback(self) -> List[ExtractedChapter]:
chapters: List[ExtractedChapter] = []
self.spine_docs = self._build_spine_docs()
self.doc_content = {}
for item in self.book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
href = item.get_name()
if href not in self.spine_docs:
continue
try:
html_content = item.get_content().decode("utf-8", errors="ignore")
except Exception as exc:
logger.error("Error decoding EPUB document %s: %s", href, exc)
html_content = ""
self.doc_content[href] = html_content
for index, doc_href in enumerate(self.spine_docs):
html_content = self.doc_content.get(doc_href, "")
if not html_content:
continue
text = self._html_to_text(html_content)
if not text:
continue
title = self._resolve_document_title(
html_content, fallback=f"Untitled Chapter {index + 1}"
)
chapters.append(ExtractedChapter(title=title, text=text))
return chapters
def _find_navigation_item(self) -> Tuple[Optional[epub.EpubItem], Optional[str]]:
nav_item: Optional[epub.EpubItem] = None
nav_type: Optional[str] = None
nav_items = list(self.book.get_items_of_type(ebooklib.ITEM_NAVIGATION))
if nav_items:
preferred = next(
(
item
for item in nav_items
if "nav" in item.get_name().lower()
and item.get_name().lower().endswith((".xhtml", ".html"))
),
None,
)
if preferred:
nav_item = preferred
nav_type = "html"
else:
html_nav = next(
(
item
for item in nav_items
if item.get_name().lower().endswith((".xhtml", ".html"))
),
None,
)
if html_nav:
nav_item = html_nav
nav_type = "html"
if not nav_item and nav_items:
ncx_candidate = next(
(
item
for item in nav_items
if item.get_name().lower().endswith(".ncx")
),
None,
)
if ncx_candidate:
nav_item = ncx_candidate
nav_type = "ncx"
if not nav_item:
ncx_constant = getattr(epub, "ITEM_NCX", None)
if ncx_constant is not None:
ncx_items = list(self.book.get_items_of_type(ncx_constant))
if ncx_items:
nav_item = ncx_items[0]
nav_type = "ncx"
if not nav_item:
for item in self.book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
try:
html_content = item.get_content().decode("utf-8", errors="ignore")
except Exception:
continue
if "<nav" in html_content and 'epub:type="toc"' in html_content:
soup = BeautifulSoup(html_content, "html.parser")
if soup.find("nav", attrs={"epub:type": "toc"}):
nav_item = item
nav_type = "html"
break
return nav_item, nav_type
def _build_spine_docs(self) -> List[str]:
docs: List[str] = []
for spine_entry in self.book.spine:
item_id = spine_entry[0]
item = self.book.get_item_with_id(item_id)
if item:
docs.append(item.get_name())
return docs
def _collect_nav_targets(self, nav_soup: BeautifulSoup, nav_type: str) -> List[str]:
targets: List[str] = []
if nav_type == "ncx":
for content_node in nav_soup.find_all("content"):
src = content_node.get("src")
if src:
src_value = str(src)
targets.append(src_value.split("#", 1)[0])
else:
for link in nav_soup.find_all("a"):
href = link.get("href")
if href:
href_value = str(href)
targets.append(href_value.split("#", 1)[0])
return targets
def _cache_relevant_documents(
self, doc_order: Dict[str, int], nav_targets: List[str]
) -> None:
needed: set[str] = set(doc_order.keys())
for target in nav_targets:
needed.add(target)
needed.add(urllib.parse.unquote(target))
self.doc_content = {}
for item in self.book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
href = item.get_name()
if href not in needed and urllib.parse.unquote(href) not in needed:
continue
try:
html_content = item.get_content().decode("utf-8", errors="ignore")
except Exception as exc:
logger.error("Error decoding EPUB document %s: %s", href, exc)
html_content = ""
self.doc_content[href] = html_content
def _parse_ncx_navpoint(
self,
nav_point,
ordered_entries: List[NavEntry],
doc_order: Dict[str, int],
doc_order_decoded: Dict[str, int],
) -> None:
nav_label = nav_point.find("navLabel")
content = nav_point.find("content")
title = (
nav_label.find("text").get_text(strip=True)
if nav_label and nav_label.find("text")
else "Untitled Section"
)
src = content.get("src") if content and content.has_attr("src") else None
if src:
base_href, fragment = src.split("#", 1) if "#" in src else (src, None)
doc_key, doc_idx = self._find_doc_key(
base_href, doc_order, doc_order_decoded
)
if doc_key is not None and doc_idx is not None:
position = self._find_position_robust(doc_key, fragment)
ordered_entries.append(
NavEntry(
src=src,
title=title,
doc_href=doc_key,
position=position,
doc_order=doc_idx,
)
)
else:
logger.warning(
"Navigation entry '%s' points to '%s', which is not in the spine.",
title,
base_href,
)
for child_navpoint in nav_point.find_all("navPoint", recursive=False):
self._parse_ncx_navpoint(
child_navpoint, ordered_entries, doc_order, doc_order_decoded
)
def _parse_html_nav_li(
self,
li_element,
ordered_entries: List[NavEntry],
doc_order: Dict[str, int],
doc_order_decoded: Dict[str, int],
) -> None:
link = li_element.find("a", recursive=False)
span_text = li_element.find("span", recursive=False)
title = "Untitled Section"
if link and link.has_attr("href"):
src = link["href"]
title = link.get_text(strip=True) or title
else:
src = None
if span_text:
title = span_text.get_text(strip=True) or title
else:
text = "".join(t for t in li_element.stripped_strings)
if text:
title = text
title = title.strip() or "Untitled Section"
if src:
base_href, fragment = src.split("#", 1) if "#" in src else (src, None)
doc_key, doc_idx = self._find_doc_key(
base_href, doc_order, doc_order_decoded
)
if doc_key is not None and doc_idx is not None:
position = self._find_position_robust(doc_key, fragment)
ordered_entries.append(
NavEntry(
src=src,
title=title,
doc_href=doc_key,
position=position,
doc_order=doc_idx,
)
)
else:
logger.warning(
"Navigation entry '%s' points to '%s', which is not in the spine.",
title,
base_href,
)
for child_ol in li_element.find_all("ol", recursive=False):
for child_li in child_ol.find_all("li", recursive=False):
self._parse_html_nav_li(
child_li, ordered_entries, doc_order, doc_order_decoded
)
def _find_doc_key(
self,
base_href: str,
doc_order: Dict[str, int],
doc_order_decoded: Dict[str, int],
) -> Tuple[Optional[str], Optional[int]]:
candidates = {base_href, urllib.parse.unquote(base_href)}
base_name = urllib.parse.unquote(base_href).split("/")[-1].lower()
for key in list(doc_order.keys()) + list(doc_order_decoded.keys()):
if key.split("/")[-1].lower() == base_name:
candidates.add(key)
for candidate in candidates:
if candidate in doc_order:
return candidate, doc_order[candidate]
if candidate in doc_order_decoded:
return candidate, doc_order_decoded[candidate]
return None, None
def _find_position_robust(self, doc_href: str, fragment_id: Optional[str]) -> int:
if doc_href not in self.doc_content:
logger.warning("Document '%s' not found in cached EPUB content.", doc_href)
return 0
html_content = self.doc_content[doc_href]
if not fragment_id:
return 0
try:
temp_soup = BeautifulSoup(f"<div>{html_content}</div>", "html.parser")
target_element = temp_soup.find(id=fragment_id)
if target_element:
tag_str = str(target_element)
pos = html_content.find(tag_str[: min(len(tag_str), 200)])
if pos != -1:
return pos
except Exception:
logger.debug(
"BeautifulSoup failed to locate id '%s' in %s", fragment_id, doc_href
)
safe_fragment_id = re.escape(fragment_id)
id_name_pattern = re.compile(
f"<[^>]+(?:id|name)\\s*=\\s*[\"']{safe_fragment_id}[\"']",
re.IGNORECASE,
)
match = id_name_pattern.search(html_content)
if match:
return match.start()
id_pos = html_content.find(f'id="{fragment_id}"')
name_pos = html_content.find(f'name="{fragment_id}"')
candidates = [pos for pos in (id_pos, name_pos) if pos != -1]
if candidates:
pos = min(candidates)
tag_start = html_content.rfind("<", 0, pos)
return tag_start if tag_start != -1 else pos
logger.warning(
"Anchor '%s' not found in %s. Defaulting to start.", fragment_id, doc_href
)
return 0
def _slice_entries(self, ordered_entries: List[NavEntry]) -> List[ExtractedChapter]:
chapters: List[ExtractedChapter] = []
for index, entry in enumerate(ordered_entries):
next_entry = (
ordered_entries[index + 1] if index + 1 < len(ordered_entries) else None
)
slice_html = self._slice_entry(entry, next_entry)
text = self._html_to_text(slice_html)
if not text:
continue
title = entry.title or "Untitled Section"
chapters.append(ExtractedChapter(title=title, text=text))
return chapters
def _slice_entry(
self,
current_entry: NavEntry,
next_entry: Optional[NavEntry],
) -> str:
current_doc = current_entry.doc_href
current_pos = current_entry.position
current_html = self.doc_content.get(current_doc, "")
if not current_html:
return ""
if next_entry and next_entry.doc_href == current_doc:
return current_html[current_pos : next_entry.position]
slice_html = current_html[current_pos:]
if next_entry:
docs_between = self._docs_between(current_doc, next_entry.doc_href)
for doc_href in docs_between:
slice_html += self.doc_content.get(doc_href, "")
next_doc_html = self.doc_content.get(next_entry.doc_href, "")
slice_html += next_doc_html[: next_entry.position]
else:
for doc_href in self._docs_between(current_doc, None):
slice_html += self.doc_content.get(doc_href, "")
if not slice_html.strip():
logger.warning(
"No content found for navigation source '%s'. Using full document fallback.",
current_entry.src,
)
return current_html
return slice_html
def _docs_between(self, current_doc: str, next_doc: Optional[str]) -> List[str]:
docs: List[str] = []
try:
current_idx = self.spine_docs.index(current_doc)
except ValueError:
return docs
if next_doc is None:
docs.extend(self.spine_docs[current_idx + 1 :])
return docs
try:
next_idx = self.spine_docs.index(next_doc)
except ValueError:
return docs
if current_idx < next_idx:
docs.extend(self.spine_docs[current_idx + 1 : next_idx])
elif current_idx > next_idx:
docs.extend(self.spine_docs[current_idx + 1 :])
docs.extend(self.spine_docs[:next_idx])
return docs
def _append_prefix_content(
self,
ordered_entries: List[NavEntry],
chapters: List[ExtractedChapter],
) -> None:
if not ordered_entries:
return
first_entry = ordered_entries[0]
first_doc = first_entry.doc_href
first_pos = first_entry.position
if first_pos <= 0:
return
prefix_html = ""
try:
first_idx = self.spine_docs.index(first_doc)
except ValueError:
first_idx = -1
if first_idx > 0:
for doc_href in self.spine_docs[:first_idx]:
prefix_html += self.doc_content.get(doc_href, "")
prefix_html += self.doc_content.get(first_doc, "")[:first_pos]
prefix_text = self._html_to_text(prefix_html)
if prefix_text and (not chapters or prefix_text != chapters[0].text):
chapters.insert(0, ExtractedChapter(title="Introduction", text=prefix_text))
def _html_to_text(self, html: str) -> str:
if not html:
return ""
soup = BeautifulSoup(html, "html.parser")
for tag in soup.find_all(["p", "div"]):
tag.append("\n\n")
for ol in soup.find_all("ol"):
start_attr = ol.get("start")
try:
start = int(str(start_attr)) if start_attr is not None else 1
except (TypeError, ValueError):
start = 1
for idx, li in enumerate(ol.find_all("li", recursive=False)):
number_text = f"{start + idx}) "
existing = li.string
if isinstance(existing, NavigableString):
existing.replace_with(NavigableString(number_text + str(existing)))
else:
li.insert(0, NavigableString(number_text))
for tag in soup.find_all(["sup", "sub"]):
tag.decompose()
text = clean_text(soup.get_text())
return text.strip()
def _resolve_document_title(self, html_content: str, fallback: str) -> str:
soup = BeautifulSoup(html_content, "html.parser")
if soup.title and soup.title.string:
return soup.title.string.strip()
for heading_tag in ("h1", "h2", "h3"):
heading = soup.find(heading_tag)
if heading and heading.get_text(strip=True):
return heading.get_text(strip=True)
return fallback
from __future__ import annotations
import ast
from dataclasses import dataclass
import logging
import math
import re
from typing import Any, Iterable, Iterator, Optional
import numpy as np
logger = logging.getLogger(__name__)
DEFAULT_SUPERTONIC_VOICES = ("M1", "M2", "M3", "M4", "M5", "F1", "F2", "F3", "F4", "F5")
@dataclass
class SupertonicSegment:
graphemes: str
audio: np.ndarray
def _ensure_float32_mono(wav: Any) -> np.ndarray:
arr = np.asarray(wav, dtype="float32")
if arr.ndim == 2:
# (n, 1) or (1, n) or (n, channels)
if arr.shape[0] == 1 and arr.shape[1] > 1:
arr = arr.reshape(-1)
else:
arr = arr[:, 0]
return arr.reshape(-1)
def _resample_linear(audio: np.ndarray, src_rate: int, dst_rate: int) -> np.ndarray:
if src_rate == dst_rate:
return audio
if audio.size == 0:
return audio
ratio = dst_rate / float(src_rate)
new_len = int(round(audio.size * ratio))
if new_len <= 1:
return np.zeros(0, dtype="float32")
x_old = np.linspace(0.0, 1.0, num=audio.size, endpoint=False)
x_new = np.linspace(0.0, 1.0, num=new_len, endpoint=False)
return np.interp(x_new, x_old, audio).astype("float32", copy=False)
def _split_text(
text: str, *, split_pattern: Optional[str], max_chunk_length: int
) -> list[str]:
stripped = (text or "").strip()
if not stripped:
return []
parts: list[str]
if split_pattern:
try:
parts = [p.strip() for p in re.split(split_pattern, stripped) if p.strip()]
except re.error:
parts = [stripped]
else:
parts = [stripped]
# Enforce max length by hard-splitting long parts.
result: list[str] = []
for part in parts:
if len(part) <= max_chunk_length:
result.append(part)
continue
start = 0
while start < len(part):
end = min(len(part), start + max_chunk_length)
# Try to split at whitespace.
if end < len(part):
ws = part.rfind(" ", start, end)
if ws > start + 40:
end = ws
chunk = part[start:end].strip()
if chunk:
result.append(chunk)
start = end
return result
_UNSUPPORTED_CHARS_RE = re.compile(
r"unsupported character\(s\):\s*(\[[^\]]*\])", re.IGNORECASE
)
def _parse_unsupported_characters(error: BaseException) -> list[str]:
"""Best-effort extraction of unsupported characters from SuperTonic errors."""
message = " ".join(
str(part) for part in getattr(error, "args", ()) if part is not None
) or str(error)
match = _UNSUPPORTED_CHARS_RE.search(message)
if not match:
return []
raw = match.group(1)
try:
value = ast.literal_eval(raw)
except Exception:
return []
if isinstance(value, (list, tuple)):
out: list[str] = []
for item in value:
if item is None:
continue
s = str(item)
if s:
out.append(s)
return out
if isinstance(value, str) and value:
return [value]
return []
def _remove_unsupported_characters(text: str, unsupported: Iterable[str]) -> str:
result = text
for item in unsupported:
if not item:
continue
result = result.replace(item, "")
return result
def _configure_supertonic_gpu() -> None:
"""Patch supertonic's config to enable GPU acceleration if available."""
try:
import onnxruntime as ort
available = ort.get_available_providers()
# Use CUDA if available, skip TensorRT (requires extra libs not always present)
# TensorrtExecutionProvider may be listed as available but fail at runtime
# if TensorRT libraries (libnvinfer.so) are not installed
providers = []
if "CUDAExecutionProvider" in available:
providers.append("CUDAExecutionProvider")
providers.append("CPUExecutionProvider")
# Patch supertonic's config and loader before TTS import
# We must patch both because loader imports the value at module load time
import supertonic.config as supertonic_config
import supertonic.loader as supertonic_loader
supertonic_config.DEFAULT_ONNX_PROVIDERS = providers
supertonic_loader.DEFAULT_ONNX_PROVIDERS = providers
logger.info("Supertonic ONNX providers configured: %s", providers)
except Exception as exc:
logger.warning("Could not configure supertonic GPU providers: %s", exc)
class SupertonicPipeline:
"""Minimal adapter that mimics Kokoro's pipeline iteration interface."""
def __init__(
self,
*,
sample_rate: int,
auto_download: bool = True,
total_steps: int = 5,
max_chunk_length: int = 300,
) -> None:
self.sample_rate = int(sample_rate)
self.total_steps = int(total_steps)
self.max_chunk_length = int(max_chunk_length)
# Configure GPU providers before importing TTS
_configure_supertonic_gpu()
try:
from supertonic import TTS # type: ignore[import-not-found]
except Exception as exc: # pragma: no cover
raise RuntimeError(
"Supertonic is not installed. Install it with `pip install supertonic`."
) from exc
self._tts = TTS(auto_download=auto_download)
def __call__(
self,
text: str,
*,
voice: str,
speed: float,
split_pattern: Optional[str] = None,
total_steps: Optional[int] = None,
) -> Iterator[SupertonicSegment]:
voice_name = (voice or "").strip() or "M1"
steps = int(total_steps) if total_steps is not None else self.total_steps
steps = max(2, min(15, steps))
speed_value = float(speed) if speed is not None else 1.0
speed_value = max(0.7, min(2.0, speed_value))
style = self._tts.get_voice_style(voice_name=voice_name)
chunks = _split_text(
text, split_pattern=split_pattern, max_chunk_length=self.max_chunk_length
)
for chunk in chunks:
chunk_to_speak = chunk
removed: set[str] = set()
last_exc: Exception | None = None
# SuperTonic can raise ValueError for unsupported characters; strip and retry.
for attempt in range(3):
try:
wav, duration = self._tts.synthesize(
text=chunk_to_speak,
voice_style=style,
total_steps=steps,
speed=speed_value,
max_chunk_length=self.max_chunk_length,
silence_duration=0.0,
verbose=False,
)
break
except ValueError as exc:
last_exc = exc
unsupported = _parse_unsupported_characters(exc)
if not unsupported:
raise
removed.update(unsupported)
sanitized = _remove_unsupported_characters(
chunk_to_speak, unsupported
).strip()
# If we didn't change anything, don't loop forever.
if sanitized == chunk_to_speak.strip():
raise
chunk_to_speak = sanitized
if not chunk_to_speak:
logger.warning(
"SuperTonic: dropped a chunk after removing unsupported characters: %s",
sorted(removed),
)
break
if attempt == 0:
logger.warning(
"SuperTonic: removed unsupported characters %s and retried.",
sorted(removed),
)
else:
# Exhausted retries.
assert last_exc is not None
raise last_exc
if not chunk_to_speak:
continue
audio = _ensure_float32_mono(wav)
# If duration is present, infer the source sample rate and resample if needed.
src_rate = self.sample_rate
try:
dur = float(duration)
if dur > 0 and audio.size > 0:
inferred = int(round(audio.size / dur))
if 8000 <= inferred <= 96000:
src_rate = inferred
except Exception:
pass
if src_rate != self.sample_rate:
audio = _resample_linear(audio, src_rate, self.sample_rate)
yield SupertonicSegment(graphemes=chunk_to_speak, audio=audio)
import json
import logging
import os
import platform
import re
import shutil
import subprocess
import sys
import warnings
from threading import Thread
from typing import Dict, Optional
from functools import lru_cache
from dotenv import load_dotenv, find_dotenv
def _load_environment() -> None:
explicit_path = os.environ.get("ABOGEN_ENV_FILE")
if explicit_path:
load_dotenv(explicit_path, override=False)
return
dotenv_path = find_dotenv(usecwd=True)
if dotenv_path:
load_dotenv(dotenv_path, override=False)
_load_environment()
warnings.filterwarnings("ignore")
def detect_encoding(file_path):
try:
import chardet # type: ignore[import-not-found]
except ImportError: # pragma: no cover - optional dependency
chardet = None # type: ignore[assignment]
try:
import charset_normalizer # type: ignore[import-not-found]
except ImportError: # pragma: no cover - optional dependency
charset_normalizer = None # type: ignore[assignment]
with open(file_path, "rb") as f:
raw_data = f.read()
detected_encoding = None
for detectors in (charset_normalizer, chardet):
if detectors is None:
continue
try:
result = detectors.detect(raw_data)["encoding"]
except Exception:
continue
if result is not None:
detected_encoding = result
break
encoding = detected_encoding if detected_encoding else "utf-8"
return encoding.lower()
def get_resource_path(package, resource):
"""
Get the path to a resource file, with fallback to local file system.
Args:
package (str): Package name containing the resource (e.g., 'abogen.assets')
resource (str): Resource filename (e.g., 'icon.ico')
Returns:
str: Path to the resource file, or None if not found
"""
from importlib import resources
# Try using importlib.resources first
try:
with resources.path(package, resource) as resource_path:
if os.path.exists(resource_path):
return str(resource_path)
except (ImportError, FileNotFoundError):
pass
# Always try to resolve as a relative path from this file
parts = package.split(".")
rel_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)), *parts[1:], resource
)
if os.path.exists(rel_path):
return rel_path
# Fallback to local file system
try:
# Extract the subdirectory from package name (e.g., 'assets' from 'abogen.assets')
subdir = package.split(".")[-1] if "." in package else package
local_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)), subdir, resource
)
if os.path.exists(local_path):
return local_path
except Exception:
pass
return None
def get_version():
"""Return the current version of the application."""
try:
version_path = get_resource_path("/", "VERSION")
if not version_path:
raise FileNotFoundError("VERSION resource missing")
with open(version_path, "r") as f:
return f.read().strip()
except Exception:
return "Unknown"
# Define config path
def ensure_directory(path):
resolved = os.path.abspath(os.path.expanduser(str(path)))
os.makedirs(resolved, exist_ok=True)
return resolved
@lru_cache(maxsize=1)
def get_user_settings_dir():
override = os.environ.get("ABOGEN_SETTINGS_DIR")
if override:
return ensure_directory(override)
data_root = os.environ.get("ABOGEN_DATA") or os.environ.get("ABOGEN_DATA_DIR")
if data_root:
try:
return ensure_directory(os.path.join(data_root, "settings"))
except OSError:
pass
data_mount = "/data"
if os.path.isdir(data_mount):
try:
return ensure_directory(os.path.join(data_mount, "settings"))
except OSError:
pass
from platformdirs import user_config_dir
if platform.system() != "Windows":
legacy_dir = os.path.join(os.path.expanduser("~"), ".config", "abogen")
if os.path.exists(legacy_dir):
return ensure_directory(legacy_dir)
config_dir = user_config_dir(
"abogen", appauthor=False, roaming=True, ensure_exists=True
)
return ensure_directory(config_dir)
def get_user_config_path():
return os.path.join(get_user_settings_dir(), "config.json")
# Define cache path
@lru_cache(maxsize=1)
def get_user_cache_root():
logger = logging.getLogger(__name__)
def _try_paths(*paths):
last_error = None
for candidate in paths:
if not candidate:
continue
try:
return ensure_directory(candidate)
except OSError as exc:
last_error = exc
logger.debug("Unable to use cache directory %s: %s", candidate, exc)
if last_error is not None:
raise last_error
def _configure_cache_env(root: Optional[str]) -> None:
temp_root = None
if root:
try:
temp_root = ensure_directory(root)
except OSError:
temp_root = None
home_dir = os.environ.get("HOME")
if not home_dir:
home_dir = ensure_directory(os.path.join("/tmp", "abogen-home"))
os.environ["HOME"] = home_dir
else:
home_dir = ensure_directory(home_dir)
cache_base = os.environ.get("XDG_CACHE_HOME")
if cache_base:
cache_base = ensure_directory(cache_base)
elif temp_root:
cache_base = temp_root
os.environ["XDG_CACHE_HOME"] = cache_base
else:
cache_base = ensure_directory(os.path.join(home_dir, ".cache"))
os.environ["XDG_CACHE_HOME"] = cache_base
hf_cache = os.environ.get("HF_HOME")
if hf_cache:
hf_cache = ensure_directory(hf_cache)
elif temp_root:
hf_cache = ensure_directory(os.path.join(temp_root, "huggingface"))
os.environ["HF_HOME"] = hf_cache
else:
hf_cache = ensure_directory(os.path.join(cache_base, "huggingface"))
os.environ["HF_HOME"] = hf_cache
for env_var in ("HUGGINGFACE_HUB_CACHE", "TRANSFORMERS_CACHE"):
os.environ.setdefault(env_var, hf_cache)
os.environ.setdefault("ABOGEN_INTERNAL_CACHE_ROOT", cache_base)
cache_root: Optional[str] = None
override = os.environ.get("ABOGEN_TEMP_DIR")
if override:
try:
cache_root = ensure_directory(override)
except OSError as exc:
logger.warning("ABOGEN_TEMP_DIR=%s is not writable: %s", override, exc)
if cache_root is None:
from platformdirs import user_cache_dir
default_cache = user_cache_dir("abogen", appauthor=False, opinion=True)
data_root = os.environ.get("ABOGEN_DATA") or os.environ.get("ABOGEN_DATA_DIR")
fallback_paths = [
default_cache,
os.path.join(data_root, "cache") if data_root else None,
"/data/cache",
"/tmp/abogen-cache",
]
try:
cache_root = _try_paths(*fallback_paths)
except OSError:
# Final safety net – attempt a tmp directory unique to this process.
tmp_candidate = os.path.join("/tmp", f"abogen-cache-{os.getpid()}")
logger.warning("Falling back to temp cache directory %s", tmp_candidate)
cache_root = ensure_directory(tmp_candidate)
if cache_root is None:
raise RuntimeError("Unable to determine cache directory")
_configure_cache_env(cache_root)
return cache_root
def get_internal_cache_root():
root = os.environ.get("ABOGEN_INTERNAL_CACHE_ROOT") or os.environ.get(
"XDG_CACHE_HOME"
)
if root:
return ensure_directory(root)
home_dir = os.environ.get("HOME") or os.path.join("/tmp", "abogen-home")
home_dir = ensure_directory(home_dir)
return ensure_directory(os.path.join(home_dir, ".cache"))
def get_internal_cache_path(folder=None):
base = get_internal_cache_root()
if folder:
return ensure_directory(os.path.join(base, folder))
return base
def get_user_cache_path(folder=None):
base = get_user_cache_root()
if folder:
return ensure_directory(os.path.join(base, folder))
return base
@lru_cache(maxsize=1)
def get_user_output_root():
override = os.environ.get("ABOGEN_OUTPUT_DIR") or os.environ.get(
"ABOGEN_OUTPUT_ROOT"
)
if override:
return ensure_directory(override)
return ensure_directory(os.path.join(get_user_cache_root(), "outputs"))
def get_user_output_path(folder=None):
base = get_user_output_root()
if folder:
return ensure_directory(os.path.join(base, folder))
return base
_sleep_procs: Dict[str, Optional[subprocess.Popen[str]]] = {
"Darwin": None,
"Linux": None,
} # Store sleep prevention processes
def clean_text(text, *args, **kwargs):
# Load replace_single_newlines from config
cfg = load_config()
replace_single_newlines = cfg.get("replace_single_newlines", False)
# Collapse all whitespace (excluding newlines) into single spaces per line and trim edges
lines = [re.sub(r"[^\S\n]+", " ", line).strip() for line in text.splitlines()]
text = "\n".join(lines)
# Standardize paragraph breaks (multiple newlines become exactly two) and trim overall whitespace
text = re.sub(r"\n{3,}", "\n\n", text).strip()
# Optionally replace single newlines with spaces, but preserve double newlines
if replace_single_newlines:
text = re.sub(r"(?<!\n)\n(?!\n)", " ", text)
return text
default_encoding = sys.getfilesystemencoding()
def create_process(cmd, stdin=None, text=True, capture_output=False):
import logging
logger = logging.getLogger(__name__)
# Configure root logger to output to console if not already configured
root = logging.getLogger()
if not root.handlers:
handler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter("%(message)s")
handler.setFormatter(formatter)
root.addHandler(handler)
root.setLevel(logging.INFO)
# Determine shell usage: use shell only for string commands
use_shell = isinstance(cmd, str)
if use_shell:
logger.warning(
"Security Warning: create_process called with string command. Prefer using a list of arguments to avoid shell injection risks."
)
kwargs = {
"shell": use_shell,
"stdout": subprocess.PIPE,
"stderr": subprocess.STDOUT,
"bufsize": 1, # Line buffered
}
if text:
# Configure for text I/O
kwargs["text"] = True
kwargs["encoding"] = default_encoding
kwargs["errors"] = "replace"
else:
# Configure for binary I/O
kwargs["text"] = False
# For binary mode, 'encoding' and 'errors' arguments must not be passed to Popen
kwargs["bufsize"] = 0 # Use unbuffered mode for binary data
if stdin is not None:
kwargs["stdin"] = stdin
if platform.system() == "Windows":
startupinfo = subprocess.STARTUPINFO() # type: ignore[attr-defined]
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW # type: ignore[attr-defined]
startupinfo.wShowWindow = subprocess.SW_HIDE # type: ignore[attr-defined]
kwargs.update(
{
"startupinfo": startupinfo,
"creationflags": subprocess.CREATE_NO_WINDOW, # type: ignore[attr-defined]
}
)
# Print the command being executed
print(f"Executing: {cmd if isinstance(cmd, str) else ' '.join(cmd)}")
proc = subprocess.Popen(cmd, **kwargs)
# Stream output to console in real-time if not capturing
if proc.stdout and not capture_output:
def _stream_output(stream):
if text:
# For text mode, read character by character for real-time output
while True:
char = stream.read(1)
if not char:
break
# Direct write to stdout for immediate feedback
sys.stdout.write(char)
sys.stdout.flush()
else:
# For binary mode, read small chunks
while True:
chunk = stream.read(1) # Read byte by byte for real-time output
if not chunk:
break
try:
# Try to decode binary data for display
sys.stdout.write(
chunk.decode(default_encoding, errors="replace")
)
sys.stdout.flush()
except Exception:
pass
stream.close()
# Start a daemon thread to handle output streaming
Thread(target=_stream_output, args=(proc.stdout,), daemon=True).start()
return proc
def load_config():
try:
with open(get_user_config_path(), "r", encoding="utf-8") as f:
return json.load(f)
except Exception:
return {}
def save_config(config):
try:
with open(get_user_config_path(), "w", encoding="utf-8") as f:
json.dump(config, f, indent=2)
except Exception:
pass
def calculate_text_length(text):
# Ignore chapter markers
text = re.sub(r"<<CHAPTER_MARKER:.*?>>", "", text)
# Ignore metadata patterns
text = re.sub(r"<<METADATA_[^:]+:[^>]*>>", "", text)
# Ignore newlines
text = text.replace("\n", "")
# Ignore leading/trailing spaces
text = text.strip()
# Calculate character count
char_count = len(text)
return char_count
def get_gpu_acceleration(enabled):
try:
import torch # type: ignore[import-not-found]
from torch.cuda import is_available as cuda_available # type: ignore[import-not-found]
if not enabled:
return "GPU available but using CPU.", False
# Check for Apple Silicon MPS
if platform.system() == "Darwin" and platform.processor() == "arm":
if torch.backends.mps.is_available():
return "MPS GPU available and enabled.", True
else:
return "MPS GPU not available on Apple Silicon. Using CPU.", False
# Check for CUDA
if cuda_available():
return "CUDA GPU available and enabled.", True
# Gather CUDA diagnostic info if not available
try:
cuda_devices = torch.cuda.device_count()
cuda_error = (
torch.cuda.get_device_name(0)
if cuda_devices > 0
else "No devices found"
)
except Exception as e:
cuda_error = str(e)
return f"CUDA GPU is not available. Using CPU. ({cuda_error})", False
except Exception as e:
return f"Error checking GPU: {e}", False
def prevent_sleep_start():
from abogen.constants import PROGRAM_NAME
system = platform.system()
if system == "Windows":
import ctypes
ctypes.windll.kernel32.SetThreadExecutionState( # type: ignore[attr-defined]
0x80000000 | 0x00000001 | 0x00000040
)
elif system == "Darwin":
_sleep_procs["Darwin"] = create_process(["caffeinate"])
elif system == "Linux":
# Add program name and reason for inhibition
program_name = PROGRAM_NAME
reason = "Prevent sleep during abogen process"
# Only attempt to use systemd-inhibit if it's available on the system.
if shutil.which("systemd-inhibit"):
_sleep_procs["Linux"] = create_process(
[
"systemd-inhibit",
f"--who={program_name}",
f"--why={reason}",
"--what=sleep",
"--mode=block",
"sleep",
"infinity",
]
)
else:
# Non-systemd distro or systemd tools not installed: skip inhibition rather than crash
print(
"systemd-inhibit not found: skipping sleep inhibition on this Linux system."
)
def prevent_sleep_end():
system = platform.system()
if system == "Windows":
import ctypes
ctypes.windll.kernel32.SetThreadExecutionState(0x80000000) # type: ignore[attr-defined]
elif system in ("Darwin", "Linux"):
proc = _sleep_procs.get(system)
if proc:
try:
proc.terminate()
except Exception:
pass
finally:
_sleep_procs[system] = None
def load_numpy_kpipeline():
import numpy as np
from kokoro import KPipeline # type: ignore[import-not-found]
return np, KPipeline
class LoadPipelineThread(Thread):
def __init__(self, callback):
super().__init__()
self.callback = callback
def run(self):
try:
np_module, kpipeline_class = load_numpy_kpipeline()
self.callback(np_module, kpipeline_class, None)
except Exception as e:
self.callback(None, None, str(e))
from __future__ import annotations
import os
import threading
from typing import Callable, Dict, Iterable, Optional, Set, Tuple
try: # pragma: no cover - optional dependency guard
from huggingface_hub import hf_hub_download # type: ignore
from huggingface_hub.utils import LocalEntryNotFoundError # type: ignore
except Exception: # pragma: no cover - import fallback
hf_hub_download = None # type: ignore[assignment]
LocalEntryNotFoundError = None # type: ignore[assignment]
if LocalEntryNotFoundError is None: # pragma: no cover - fallback for tests
class LocalEntryNotFoundError(Exception):
pass
from abogen.constants import VOICES_INTERNAL
_CACHE_LOCK = threading.Lock()
_CACHED_VOICES: Set[str] = set()
_BOOTSTRAP_LOCK = threading.Lock()
_BOOTSTRAPPED = False
def _normalize_targets(voices: Optional[Iterable[str]]) -> Set[str]:
if not voices:
return set(VOICES_INTERNAL)
normalized: Set[str] = set()
for voice in voices:
if not voice:
continue
voice_id = str(voice).strip()
if not voice_id:
continue
if voice_id in VOICES_INTERNAL:
normalized.add(voice_id)
return normalized
def ensure_voice_assets(
voices: Optional[Iterable[str]] = None,
*,
repo_id: str = "hexgrad/Kokoro-82M",
cache_dir: Optional[str] = None,
on_progress: Optional[Callable[[str], None]] = None,
) -> Tuple[Set[str], Dict[str, str]]:
"""Ensure Kokoro voice weight files are present locally.
Returns a tuple of (downloaded voices, errors) where errors maps the
voice id to the underlying exception message.
"""
if hf_hub_download is None:
raise RuntimeError("huggingface_hub is required to cache voices")
effective_cache_dir = cache_dir
if effective_cache_dir is None:
env_cache_dir = os.environ.get("ABOGEN_VOICE_CACHE_DIR", "").strip()
effective_cache_dir = env_cache_dir or None
targets = _normalize_targets(voices)
if not targets:
return set(), {}
with _CACHE_LOCK:
missing = [voice for voice in targets if voice not in _CACHED_VOICES]
downloaded: Set[str] = set()
errors: Dict[str, str] = {}
for voice_id in missing:
if on_progress:
on_progress(f"Fetching voice asset '{voice_id}'")
try:
downloaded_flag = _ensure_single_voice_asset(
voice_id,
repo_id=repo_id,
cache_dir=effective_cache_dir,
)
except Exception as exc: # pragma: no cover - network variance
errors[voice_id] = str(exc)
continue
if downloaded_flag:
downloaded.add(voice_id)
with _CACHE_LOCK:
_CACHED_VOICES.add(voice_id)
return downloaded, errors
def bootstrap_voice_cache(
voices: Optional[Iterable[str]] = None,
*,
repo_id: str = "hexgrad/Kokoro-82M",
cache_dir: Optional[str] = None,
on_progress: Optional[Callable[[str], None]] = None,
) -> Tuple[Set[str], Dict[str, str]]:
"""Ensure voices are cached once per process.
Subsequent calls are no-ops and return empty structures.
"""
global _BOOTSTRAPPED
with _BOOTSTRAP_LOCK:
if _BOOTSTRAPPED:
return set(), {}
downloaded, errors = ensure_voice_assets(
voices,
repo_id=repo_id,
cache_dir=cache_dir,
on_progress=on_progress,
)
_BOOTSTRAPPED = True
return downloaded, errors
def _ensure_single_voice_asset(
voice_id: str,
*,
repo_id: str,
cache_dir: Optional[str],
) -> bool:
if hf_hub_download is None:
raise RuntimeError("huggingface_hub is required to cache voices")
filename = f"voices/{voice_id}.pt"
common_kwargs = {
"repo_id": repo_id,
"filename": filename,
}
if cache_dir is not None:
common_kwargs["cache_dir"] = cache_dir
try:
hf_hub_download(local_files_only=True, **common_kwargs)
return False
except LocalEntryNotFoundError:
pass
hf_hub_download(resume_download=True, **common_kwargs)
return True
"""Backwards-compatible re-export of the PyQt voice formula dialog.
The actual implementation lives in abogen.pyqt.voice_formula_gui.
"""
from __future__ import annotations
from abogen.pyqt.voice_formula_gui import * # noqa: F401, F403
from abogen.pyqt.voice_formula_gui import VoiceFormulaDialog
__all__ = ["VoiceFormulaDialog"]
import re
from typing import List, Tuple
from abogen.constants import VOICES_INTERNAL
# Calls parsing and loads the voice to gpu or cpu
def get_new_voice(pipeline, formula, use_gpu):
try:
weighted_voice = parse_voice_formula(pipeline, formula)
# device = "cuda" if use_gpu else "cpu"
# Setting the device "cuda" gives "Error occurred: split_with_sizes(): argument 'split_sizes' (position 2)"
# error when the device is gpu. So disabling this for now.
device = "cpu"
return weighted_voice.to(device)
except Exception as e:
raise ValueError(f"Failed to create voice: {str(e)}")
def parse_formula_terms(formula: str) -> List[Tuple[str, float]]:
if not formula or not formula.strip():
raise ValueError("Empty voice formula")
terms: List[Tuple[str, float]] = []
for segment in formula.split("+"):
part = segment.strip()
if not part:
continue
if "*" not in part:
raise ValueError("Each component must be in the form voice*weight")
voice_name, raw_weight = part.split("*", 1)
voice_name = voice_name.strip()
if voice_name not in VOICES_INTERNAL:
raise ValueError(f"Unknown voice: {voice_name}")
try:
weight = float(raw_weight.strip())
except ValueError as exc:
raise ValueError(f"Invalid weight for {voice_name}") from exc
if weight <= 0:
raise ValueError(f"Weight for {voice_name} must be positive")
terms.append((voice_name, weight))
if not terms:
raise ValueError("Voice weights must sum to a positive value")
return terms
def parse_voice_formula(pipeline, formula):
terms = parse_formula_terms(formula)
total_weight = sum(weight for _, weight in terms)
if total_weight <= 0:
raise ValueError("Voice weights must sum to a positive value")
weighted_sum = None
for voice_name, weight in terms:
normalized_weight = weight / total_weight if total_weight > 0 else weight
voice_tensor = pipeline.load_single_voice(voice_name)
if weighted_sum is None:
weighted_sum = normalized_weight * voice_tensor
else:
weighted_sum += normalized_weight * voice_tensor
if weighted_sum is None:
raise ValueError("Voice formula produced no components")
return weighted_sum
def calculate_sum_from_formula(formula):
weights = re.findall(r"\* *([\d.]+)", formula)
total_sum = sum(float(weight) for weight in weights)
return total_sum
def extract_voice_ids(formula: str) -> List[str]:
return [voice for voice, _ in parse_formula_terms(formula)]
import json
import os
from typing import Any, Dict, Iterable, List, Tuple
from abogen.constants import VOICES_INTERNAL
from abogen.tts_supertonic import DEFAULT_SUPERTONIC_VOICES
from abogen.utils import get_user_config_path
def _get_profiles_path():
config_path = get_user_config_path()
config_dir = os.path.dirname(config_path)
return os.path.join(config_dir, "voice_profiles.json")
def load_profiles():
"""Load all voice profiles from JSON file."""
path = _get_profiles_path()
if os.path.exists(path):
try:
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
# always expect abogen_voice_profiles wrapper
if isinstance(data, dict) and "abogen_voice_profiles" in data:
return data["abogen_voice_profiles"]
# fallback: treat as profiles dict
if isinstance(data, dict):
return data
except Exception:
return {}
return {}
def save_profiles(profiles):
"""Save all voice profiles to JSON file."""
path = _get_profiles_path()
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, "w", encoding="utf-8") as f:
# always save with abogen_voice_profiles wrapper
json.dump({"abogen_voice_profiles": profiles}, f, indent=2)
def delete_profile(name):
"""Remove a profile by name."""
profiles = load_profiles()
if name in profiles:
del profiles[name]
save_profiles(profiles)
def duplicate_profile(src, dest):
"""Duplicate an existing profile."""
profiles = load_profiles()
if src in profiles and dest:
profiles[dest] = profiles[src]
save_profiles(profiles)
def export_profiles(export_path):
"""Export all profiles to specified JSON file."""
profiles = load_profiles()
with open(export_path, "w", encoding="utf-8") as f:
json.dump({"abogen_voice_profiles": profiles}, f, indent=2)
def serialize_profiles() -> Dict[str, Dict[str, Iterable[Tuple[str, float]]]]:
"""Return profiles in canonical dictionary form."""
return load_profiles()
def _normalize_supertonic_voice(value: Any) -> str:
raw = str(value or "").strip().upper()
return raw if raw in DEFAULT_SUPERTONIC_VOICES else "M1"
def _coerce_supertonic_steps(value: Any) -> int:
try:
steps = int(value)
except (TypeError, ValueError):
return 5
return max(2, min(15, steps))
def _coerce_supertonic_speed(value: Any) -> float:
try:
speed = float(value)
except (TypeError, ValueError):
return 1.0
return max(0.7, min(2.0, speed))
def normalize_profile_entry(entry: Any) -> Dict[str, Any]:
"""Normalize a stored profile entry.
Backwards compatible:
- Legacy Kokoro-only entries: {language, voices}
- New entries: include provider.
"""
if not isinstance(entry, dict):
return {}
provider = str(entry.get("provider") or "kokoro").strip().lower()
if provider not in {"kokoro", "supertonic"}:
provider = "kokoro"
language = str(entry.get("language") or "a").strip().lower() or "a"
if provider == "supertonic":
return {
"provider": "supertonic",
"language": language,
"voice": _normalize_supertonic_voice(
entry.get("voice") or entry.get("voice_name") or entry.get("name")
),
"total_steps": _coerce_supertonic_steps(
entry.get("total_steps")
or entry.get("supertonic_total_steps")
or entry.get("quality")
),
"speed": _coerce_supertonic_speed(
entry.get("speed") or entry.get("supertonic_speed")
),
}
voices = _normalize_voice_entries(entry.get("voices", []))
if not voices:
return {}
return {
"provider": "kokoro",
"language": language,
"voices": voices,
}
def _normalize_voice_entries(entries: Iterable) -> List[Tuple[str, float]]:
normalized: List[Tuple[str, float]] = []
for item in entries or []:
if isinstance(item, dict):
voice = item.get("id") or item.get("voice")
weight = item.get("weight")
elif isinstance(item, (list, tuple)) and len(item) >= 2:
voice, weight = item[0], item[1]
else:
continue
if voice not in VOICES_INTERNAL:
continue
if weight is None:
continue
try:
weight_val = float(weight)
except (TypeError, ValueError):
continue
if weight_val <= 0:
continue
normalized.append((voice, weight_val))
return normalized
def normalize_voice_entries(entries: Iterable) -> List[Tuple[str, float]]:
"""Public helper to normalize voice-weight pairs from arbitrary payloads."""
return _normalize_voice_entries(entries)
def save_profile(name: str, *, language: str, voices: Iterable) -> None:
"""Persist a single profile after validating its data."""
name = (name or "").strip()
if not name:
raise ValueError("Profile name is required")
normalized = _normalize_voice_entries(voices)
if not normalized:
raise ValueError("At least one voice with a weight above zero is required")
if not language:
language = "a"
profiles = load_profiles()
profiles[name] = {"provider": "kokoro", "language": language, "voices": normalized}
save_profiles(profiles)
def remove_profile(name: str) -> None:
delete_profile(name)
def import_profiles_data(data: Dict, *, replace_existing: bool = False) -> List[str]:
"""Merge profiles from a dictionary structure and persist them.
Returns the list of profile names that were added or updated.
"""
if not isinstance(data, dict):
raise ValueError("Invalid profile payload")
if "abogen_voice_profiles" in data:
data = data["abogen_voice_profiles"]
if not isinstance(data, dict):
raise ValueError("Invalid profile payload")
current = load_profiles()
updated: List[str] = []
for name, entry in data.items():
normalized = normalize_profile_entry(entry)
if not normalized:
continue
if name in current and not replace_existing:
# skip duplicates unless explicit replacement is requested
continue
current[name] = normalized
updated.append(name)
if updated:
save_profiles(current)
return updated
def export_profiles_payload(names: Iterable[str] | None = None) -> Dict[str, Dict]:
"""Return profiles limited to the provided names for download/export."""
profiles = load_profiles()
if names is None:
subset = profiles
else:
subset = {name: profiles[name] for name in names if name in profiles}
return {"abogen_voice_profiles": subset}
__all__ = ["create_app"]
def __getattr__(name: str):
if name == "create_app":
from .app import create_app
return create_app
raise AttributeError(name)
from __future__ import annotations
import atexit
import logging
import os
from pathlib import Path
from typing import Any, Optional
from flask import Flask
from abogen.utils import get_user_cache_path, get_user_output_path, get_user_settings_dir
from .conversion_runner import run_conversion_job
from .service import build_service
class _SuppressSuccessfulAccessFilter(logging.Filter):
"""Filter out successful (HTTP 200) werkzeug access logs."""
def filter(self, record: logging.LogRecord) -> bool: # pragma: no cover - small utility
try:
message = record.getMessage()
except Exception: # pragma: no cover - defensive
return True
# Werkzeug access logs include the status code near the end, e.g.
# "GET /path HTTP/1.1" 200 -
# Treat any 2xx response as success to suppress.
return " 200 " not in message and " 201 " not in message and " 204 " not in message
_access_log_filter_attached = False
def _default_dirs() -> tuple[Path, Path]:
uploads_override = os.environ.get("ABOGEN_UPLOAD_ROOT")
outputs_override = os.environ.get("ABOGEN_OUTPUT_ROOT")
if uploads_override:
uploads = Path(os.path.expanduser(uploads_override)).resolve()
else:
uploads = Path(get_user_cache_path("web/uploads"))
if outputs_override:
outputs = Path(os.path.expanduser(outputs_override)).resolve()
else:
outputs = Path(get_user_output_path("web"))
uploads.mkdir(parents=True, exist_ok=True)
outputs.mkdir(parents=True, exist_ok=True)
return uploads, outputs
def _get_secret_key() -> str:
env_key = os.environ.get("ABOGEN_SECRET_KEY")
if env_key:
return env_key
try:
settings_dir = Path(get_user_settings_dir())
settings_dir.mkdir(parents=True, exist_ok=True)
secret_file = settings_dir / ".secret_key"
if secret_file.exists():
return secret_file.read_text(encoding="utf-8").strip()
key = os.urandom(24).hex()
secret_file.write_text(key, encoding="utf-8")
return key
except Exception:
# Fallback if we can't write to settings dir
return os.urandom(24).hex()
def create_app(config: Optional[dict[str, Any]] = None) -> Flask:
uploads_dir, outputs_dir = _default_dirs()
app = Flask(
__name__,
static_folder="static",
template_folder="templates",
)
base_config = {
"SECRET_KEY": _get_secret_key(),
"UPLOAD_FOLDER": str(uploads_dir),
"OUTPUT_FOLDER": str(outputs_dir),
"MAX_CONTENT_LENGTH": 1024 * 1024 * 400, # 400 MB uploads
}
if config:
base_config.update(config)
app.config.update(base_config)
service = build_service(
runner=run_conversion_job,
output_root=Path(app.config["OUTPUT_FOLDER"]),
uploads_root=Path(app.config["UPLOAD_FOLDER"]),
)
app.extensions["conversion_service"] = service
from abogen.webui.routes import (
main_bp,
jobs_bp,
settings_bp,
voices_bp,
entities_bp,
books_bp,
api_bp,
)
app.register_blueprint(main_bp)
app.register_blueprint(jobs_bp, url_prefix="/jobs")
app.register_blueprint(settings_bp, url_prefix="/settings")
app.register_blueprint(voices_bp, url_prefix="/voices")
app.register_blueprint(entities_bp, url_prefix="/overrides")
app.register_blueprint(books_bp, url_prefix="/find-books")
app.register_blueprint(api_bp, url_prefix="/api")
atexit.register(service.shutdown)
global _access_log_filter_attached
if not _access_log_filter_attached:
logging.getLogger("werkzeug").addFilter(_SuppressSuccessfulAccessFilter())
_access_log_filter_attached = True
return app
def main() -> None:
app = create_app()
host = os.environ.get("ABOGEN_HOST", "0.0.0.0")
port = int(os.environ.get("ABOGEN_PORT", "8808"))
debug = os.environ.get("ABOGEN_DEBUG", "false").lower() == "true"
app.run(host=host, port=port, debug=debug)
if __name__ == "__main__": # pragma: no cover
main()

Sorry, the diff of this file is too big to display

from __future__ import annotations
import json
import re
import uuid
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple
import numpy as np
from abogen.debug_tts_samples import MARKER_PREFIX, MARKER_SUFFIX, build_debug_epub, iter_expected_codes
from abogen.kokoro_text_normalization import normalize_for_pipeline
from abogen.normalization_settings import build_apostrophe_config
from abogen.text_extractor import extract_from_path
from abogen.voice_cache import ensure_voice_assets
from abogen.webui.conversion_runner import SAMPLE_RATE, SPLIT_PATTERN, _select_device, _to_float32, _resolve_voice, _spec_to_voice_ids
from abogen.utils import load_numpy_kpipeline
_MARKER_RE = re.compile(re.escape(MARKER_PREFIX) + r"(?P<code>[A-Z0-9_]+)" + re.escape(MARKER_SUFFIX))
@dataclass(frozen=True)
class DebugWavArtifact:
label: str
filename: str
code: Optional[str] = None
text: Optional[str] = None
def _resolve_voice_setting(value: str) -> tuple[str, Optional[str], Optional[str]]:
"""Resolve settings voice strings into a pipeline-ready voice spec.
Supports "profile:<name>" by converting it into a concrete voice formula.
Returns (resolved_voice_spec, profile_name, profile_language).
"""
from abogen.webui.routes.utils.voice import resolve_voice_setting
return resolve_voice_setting(value)
def _load_pipeline(language: str, use_gpu: bool) -> Any:
device = "cpu"
if use_gpu:
device = _select_device()
_np, KPipeline = load_numpy_kpipeline()
return KPipeline(lang_code=language, repo_id="hexgrad/Kokoro-82M", device=device)
def _extract_cases_from_text(text: str) -> List[Tuple[str, str]]:
raw = str(text or "")
matches = list(_MARKER_RE.finditer(raw))
cases: List[Tuple[str, str]] = []
if not matches:
return cases
for idx, match in enumerate(matches):
code = match.group("code")
start = match.end()
end = matches[idx + 1].start() if idx + 1 < len(matches) else len(raw)
snippet = raw[start:end]
# Keep it small and predictable: collapse whitespace.
snippet = " ".join(snippet.strip().split())
cases.append((code, snippet))
return cases
def _spoken_id(code: str) -> str:
# Make IDs pronounceable and stable (avoid reading as a word).
out: List[str] = []
for ch in str(code or ""):
if ch == "_":
out.append(" ")
elif ch.isalnum():
out.append(ch)
else:
out.append(" ")
# Add spaces between alnum to encourage letter-by-letter reading.
spaced = " ".join("".join(out).split())
return spaced
def run_debug_tts_wavs(
*,
output_root: Path,
settings: Mapping[str, Any],
epub_path: Optional[Path] = None,
) -> Dict[str, Any]:
"""Generate WAV artifacts for the debug EPUB samples.
Writes:
- overall.wav: concatenation of all samples
- case_<CODE>.wav: each sample rendered separately
- manifest.json: metadata + file list
"""
output_root = Path(output_root)
output_root.mkdir(parents=True, exist_ok=True)
run_id = uuid.uuid4().hex
run_dir = output_root / "debug" / run_id
run_dir.mkdir(parents=True, exist_ok=True)
if epub_path is None:
epub_path = run_dir / "abogen_debug_samples.epub"
build_debug_epub(epub_path)
else:
epub_path = Path(epub_path)
extraction = extract_from_path(epub_path)
combined_text = extraction.combined_text or "\n\n".join((c.text or "") for c in extraction.chapters)
cases = _extract_cases_from_text(combined_text)
# Prefer the canonical sample catalog for text (EPUB extraction may include headings).
try:
from abogen.debug_tts_samples import DEBUG_TTS_SAMPLES
sample_text_by_code = {sample.code: sample.text for sample in DEBUG_TTS_SAMPLES}
except Exception:
sample_text_by_code = {}
expected = list(iter_expected_codes())
found_codes = {code for code, _ in cases}
missing = [code for code in expected if code not in found_codes]
if missing:
raise RuntimeError(f"Debug EPUB missing expected codes: {', '.join(missing)}")
language = str(settings.get("language") or "a").strip() or "a"
# Kokoro's KPipeline expects short language codes like "a" (American English),
# but older settings may store ISO-like values such as "en".
language_aliases = {
"en": "a",
"en-us": "a",
"en_us": "a",
"en-gb": "b",
"en_gb": "b",
"es": "e",
"es-es": "e",
"fr": "f",
"fr-fr": "f",
"hi": "h",
"it": "i",
"pt": "p",
"pt-br": "p",
"ja": "j",
"jp": "j",
"zh": "z",
"zh-cn": "z",
}
language = language_aliases.get(language.lower(), language)
voice_spec = str(settings.get("default_voice") or "").strip()
use_gpu = bool(settings.get("use_gpu", False))
speed = float(settings.get("default_speed", 1.0) or 1.0)
# Settings may store "profile:<name>" which is not a Kokoro voice ID.
# Resolve it to a concrete voice formula (e.g. "af_heart*0.5+...") so Kokoro
# doesn't attempt to download a non-existent "voices/profile:<name>.pt".
try:
resolved_voice, _profile_name, profile_language = _resolve_voice_setting(voice_spec)
if resolved_voice:
voice_spec = resolved_voice
if profile_language:
language = str(profile_language).strip() or language
except Exception:
# Voice profile resolution is best-effort; fall back to raw voice_spec.
pass
# Best-effort voice caching (only for known Kokoro internal voices).
voice_ids = _spec_to_voice_ids(voice_spec)
if voice_ids:
try:
ensure_voice_assets(voice_ids)
except Exception:
# Network / optional dependency variance; debug runner can still proceed.
pass
pipeline = _load_pipeline(language, use_gpu)
voice_choice = _resolve_voice(pipeline, voice_spec, use_gpu)
apostrophe_config = build_apostrophe_config(settings=settings)
normalization_settings = dict(settings)
artifacts: List[DebugWavArtifact] = []
overall_path = run_dir / "overall.wav"
overall_audio: List[np.ndarray] = []
def synth(text: str, *, apply_normalization: bool = True) -> np.ndarray:
normalized = (
normalize_for_pipeline(
text,
config=apostrophe_config,
settings=normalization_settings,
)
if apply_normalization
else str(text or "")
)
parts: List[np.ndarray] = []
for segment in pipeline(
normalized,
voice=voice_choice,
speed=speed,
split_pattern=SPLIT_PATTERN,
):
audio = _to_float32(getattr(segment, "audio", None))
if audio.size:
parts.append(audio)
if not parts:
return np.zeros(0, dtype="float32")
return np.concatenate(parts).astype("float32", copy=False)
pause_1s = np.zeros(int(1.0 * SAMPLE_RATE), dtype="float32")
between_cases = np.zeros(int(0.35 * SAMPLE_RATE), dtype="float32")
# Per sample
for code, snippet in cases:
snippet = sample_text_by_code.get(code, snippet)
if not snippet:
continue
id_audio = synth(_spoken_id(code), apply_normalization=False)
text_audio = synth(snippet, apply_normalization=True)
audio = np.concatenate([id_audio, pause_1s, text_audio]).astype("float32", copy=False)
filename = f"case_{code}.wav"
path = run_dir / filename
# Write float32 PCM WAV.
import soundfile as sf
sf.write(path, audio, SAMPLE_RATE, subtype="FLOAT")
artifacts.append(DebugWavArtifact(label=f"{code}", filename=filename, code=code, text=snippet))
overall_audio.append(audio)
overall_audio.append(between_cases)
# Overall
if overall_audio:
combined = np.concatenate(overall_audio).astype("float32", copy=False)
else:
combined = np.zeros(0, dtype="float32")
import soundfile as sf
sf.write(overall_path, combined, SAMPLE_RATE, subtype="FLOAT")
artifacts.insert(0, DebugWavArtifact(label="Overall", filename="overall.wav", code=None, text=None))
manifest = {
"run_id": run_id,
"epub": str(epub_path),
"artifacts": [artifact.__dict__ for artifact in artifacts],
"sample_rate": SAMPLE_RATE,
}
(run_dir / "manifest.json").write_text(json.dumps(manifest, indent=2), encoding="utf-8")
return manifest
FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu22.04
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=1 \
VIRTUAL_ENV=/opt/venv \
PATH=/opt/venv/bin:$PATH
ARG TORCH_INDEX_URL=https://download.pytorch.org/whl/cu126
ARG TORCH_VERSION=
ARG USE_GPU=true
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
python3 \
python3-venv \
python3-pip \
ffmpeg \
libsndfile1 \
libgl1 \
libglib2.0-0 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
RUN python3 -m venv "$VIRTUAL_ENV"
WORKDIR /app
COPY pyproject.toml README.md ./
COPY abogen ./abogen
RUN pip install --upgrade pip \
&& if [ -n "$TORCH_VERSION" ]; then \
pip install torch=="$TORCH_VERSION" torchvision=="$TORCH_VERSION" torchaudio=="$TORCH_VERSION" --index-url "$TORCH_INDEX_URL"; \
else \
pip install torch torchvision torchaudio --index-url "$TORCH_INDEX_URL"; \
fi \
&& pip install --no-cache-dir . \
https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl \
&& pip install --no-cache-dir "mutagen>=1.47.0"
# Install onnxruntime-gpu for CUDA acceleration (supertonic uses ONNX Runtime)
# Set USE_GPU=false to skip this for CPU-only deployments
RUN if [ "$USE_GPU" = "true" ]; then \
pip install --no-cache-dir onnxruntime-gpu; \
fi
ENV ABOGEN_HOST=0.0.0.0 \
ABOGEN_PORT=8808
EXPOSE 8808
VOLUME ["/data"]
ENV ABOGEN_UPLOAD_ROOT=/data/uploads \
ABOGEN_OUTPUT_ROOT=/data/outputs \
ABOGEN_TEMP_DIR=/data/cache \
ABOGEN_VOICE_CACHE_DIR=/data/voice-cache \
HF_HOME=/data/huggingface \
HUGGINGFACE_HUB_CACHE=/data/huggingface/hub
# Copy and setup entrypoint script
COPY abogen/webui/entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
# Create non-root user and setup permissions
RUN useradd -m -u 1000 abogen \
&& mkdir -p /data/uploads /data/outputs /data/cache /data/voice-cache /data/huggingface \
&& chown -R abogen:abogen /data /app
USER abogen
ENTRYPOINT ["/entrypoint.sh"]
CMD ["abogen-web"]
#!/bin/bash
# Entrypoint script for abogen container
# Performs CUDA diagnostics and starts the web server
set -e
echo "=== Abogen Container Starting ==="
# Check CUDA availability
if command -v nvidia-smi &> /dev/null; then
echo "NVIDIA Driver detected:"
nvidia-smi --query-gpu=name,driver_version,memory.total,memory.free --format=csv,noheader 2>/dev/null || echo " (nvidia-smi query failed)"
# Check PyTorch CUDA support
python3 -c "
import torch
print(f'PyTorch version: {torch.__version__}')
print(f'CUDA available: {torch.cuda.is_available()}')
if torch.cuda.is_available():
print(f'CUDA version (PyTorch): {torch.version.cuda}')
print(f'GPU count: {torch.cuda.device_count()}')
for i in range(torch.cuda.device_count()):
props = torch.cuda.get_device_properties(i)
print(f' GPU {i}: {props.name} ({props.total_memory // 1024**2} MB)')
else:
print('WARNING: PyTorch cannot access CUDA. Running on CPU.')
" 2>&1 || echo "PyTorch CUDA check failed"
else
echo "No NVIDIA driver detected. Running on CPU."
fi
echo "================================="
echo ""
# Start the application
exec "$@"
from abogen.webui.routes.main import main_bp
from abogen.webui.routes.jobs import jobs_bp
from abogen.webui.routes.settings import settings_bp
from abogen.webui.routes.voices import voices_bp
from abogen.webui.routes.entities import entities_bp
from abogen.webui.routes.books import books_bp
from abogen.webui.routes.api import api_bp
__all__ = [
"main_bp",
"jobs_bp",
"settings_bp",
"voices_bp",
"entities_bp",
"books_bp",
"api_bp",
]
from typing import Any, Dict, Mapping, List, Optional
import base64
import uuid
from pathlib import Path
from flask import Blueprint, request, jsonify, send_file, url_for, current_app
from flask.typing import ResponseReturnValue
from abogen.webui.routes.utils.settings import (
load_settings,
load_integration_settings,
coerce_float,
coerce_bool,
audiobookshelf_settings_from_payload,
calibre_settings_from_payload,
)
from abogen.voice_profiles import (
load_profiles,
save_profiles,
delete_profile,
duplicate_profile,
serialize_profiles,
import_profiles_data,
export_profiles_payload,
normalize_profile_entry,
)
from abogen.webui.routes.utils.common import split_profile_spec
from abogen.webui.routes.utils.preview import synthesize_preview, generate_preview_audio
from abogen.webui.routes.utils.voice import formula_from_profile
from abogen.normalization_settings import (
build_llm_configuration,
build_apostrophe_config,
apply_overrides,
)
from abogen.llm_client import list_models, LLMClientError
from abogen.kokoro_text_normalization import normalize_for_pipeline
from abogen.integrations.audiobookshelf import AudiobookshelfClient, AudiobookshelfConfig
from abogen.integrations.calibre_opds import (
CalibreOPDSClient,
CalibreOPDSError,
)
from abogen.webui.routes.utils.service import get_service
from abogen.webui.routes.utils.form import build_pending_job_from_extraction
from abogen.text_extractor import extract_from_path
from werkzeug.utils import secure_filename
api_bp = Blueprint("api", __name__)
# --- Voice Profile Routes ---
@api_bp.get("/voice-profiles")
def api_get_voice_profiles() -> ResponseReturnValue:
profiles = load_profiles()
return jsonify(profiles)
@api_bp.post("/voice-profiles")
def api_save_voice_profile() -> ResponseReturnValue:
payload = request.get_json(force=True, silent=True) or {}
name = str(payload.get("name") or "").strip()
original_name = str(payload.get("originalName") or "").strip() or None
profile = payload.get("profile")
if profile is None:
# Speaker Studio payload format
provider = str(payload.get("provider") or "kokoro").strip().lower()
if provider not in {"kokoro", "supertonic"}:
provider = "kokoro"
if provider == "supertonic":
profile = {
"provider": "supertonic",
"language": str(payload.get("language") or "a").strip().lower() or "a",
"voice": payload.get("voice"),
"total_steps": payload.get("total_steps") or payload.get("supertonic_total_steps"),
"speed": payload.get("speed") or payload.get("supertonic_speed"),
}
else:
profile = {
"provider": "kokoro",
"language": str(payload.get("language") or "a").strip().lower() or "a",
"voices": payload.get("voices") or [],
}
if not name or not profile:
return jsonify({"error": "Name and profile are required"}), 400
profiles = load_profiles()
normalized = normalize_profile_entry(profile)
if not normalized:
return jsonify({"error": "Invalid profile payload"}), 400
if original_name and original_name in profiles and original_name != name:
del profiles[original_name]
profiles[name] = normalized
save_profiles(profiles)
return jsonify({"success": True, "profile": name, "profiles": serialize_profiles()})
@api_bp.delete("/voice-profiles/<path:name>")
def api_delete_voice_profile(name: str) -> ResponseReturnValue:
delete_profile(name)
return jsonify({"success": True, "profiles": serialize_profiles()})
@api_bp.post("/voice-profiles/<path:name>/duplicate")
def api_duplicate_voice_profile(name: str) -> ResponseReturnValue:
payload = request.get_json(force=True, silent=True) or {}
new_name = str(payload.get("name") or "").strip()
if not new_name:
return jsonify({"error": "Name is required"}), 400
duplicate_profile(name, new_name)
return jsonify({"success": True, "profile": new_name, "profiles": serialize_profiles()})
@api_bp.post("/voice-profiles/import")
def api_import_voice_profiles() -> ResponseReturnValue:
payload = request.get_json(force=True, silent=True) or {}
data = payload.get("data")
replace_existing = bool(payload.get("replace_existing"))
if not isinstance(data, dict):
return jsonify({"error": "Invalid profile payload"}), 400
try:
imported = import_profiles_data(data, replace_existing=replace_existing)
except Exception as exc:
return jsonify({"error": str(exc)}), 400
return jsonify({"success": True, "imported": imported, "profiles": serialize_profiles()})
@api_bp.get("/voice-profiles/export")
def api_export_voice_profiles() -> ResponseReturnValue:
names_param = request.args.get("names")
names = None
if names_param:
names = [item.strip() for item in names_param.split(",") if item.strip()]
payload = export_profiles_payload(names)
import io
import json
data = json.dumps(payload, indent=2).encode("utf-8")
filename = "voice_profiles.json" if not names else "voice_profiles_export.json"
return send_file(
io.BytesIO(data),
mimetype="application/json",
as_attachment=True,
download_name=filename,
)
@api_bp.post("/voice-profiles/preview")
def api_voice_profiles_preview() -> ResponseReturnValue:
payload = request.get_json(force=True, silent=True) or {}
text = str(payload.get("text") or "").strip() or "Hello world"
language = str(payload.get("language") or "a").strip().lower() or "a"
speed = coerce_float(payload.get("speed"), 1.0)
max_seconds = coerce_float(payload.get("max_seconds"), 8.0)
settings = load_settings()
use_gpu = settings.get("use_gpu", False)
# Accept a direct formula string or a full profile entry.
formula = str(payload.get("formula") or "").strip()
profile_name = str(payload.get("profile") or "").strip()
provider = str(payload.get("tts_provider") or payload.get("provider") or "").strip().lower() or None
supertonic_total_steps = int(payload.get("supertonic_total_steps") or payload.get("total_steps") or settings.get("supertonic_total_steps") or 5)
voice_spec = ""
resolved_provider = provider or "kokoro"
profiles = load_profiles()
if resolved_provider == "supertonic" and not profile_name:
voice_spec = str(payload.get("voice") or payload.get("supertonic_voice") or "M1").strip() or "M1"
# Allow per-speaker overrides via payload.
supertonic_total_steps = int(payload.get("supertonic_total_steps") or payload.get("total_steps") or supertonic_total_steps)
speed = coerce_float(payload.get("supertonic_speed") or payload.get("speed"), speed)
elif profile_name:
entry = profiles.get(profile_name)
normalized_entry = normalize_profile_entry(entry)
if not normalized_entry:
return jsonify({"error": "Unknown profile"}), 404
resolved_provider = str(normalized_entry.get("provider") or "kokoro")
if resolved_provider == "supertonic":
voice_spec = str(normalized_entry.get("voice") or "M1")
supertonic_total_steps = int(normalized_entry.get("total_steps") or supertonic_total_steps)
speed = float(normalized_entry.get("speed") or speed)
else:
voice_spec = formula_from_profile(normalized_entry) or ""
language = str(normalized_entry.get("language") or language)
elif formula:
voice_spec = formula
resolved_provider = "kokoro"
else:
# Raw voices payload -> Kokoro mix.
voices = payload.get("voices") or []
pseudo = {"provider": "kokoro", "language": language, "voices": voices}
normalized_entry = normalize_profile_entry(pseudo)
voice_spec = formula_from_profile(normalized_entry) or ""
resolved_provider = "kokoro"
if not voice_spec:
return jsonify({"error": "Unable to resolve preview voice"}), 400
try:
return synthesize_preview(
text=text,
voice_spec=voice_spec,
language=language,
speed=speed,
use_gpu=use_gpu,
tts_provider=resolved_provider,
supertonic_total_steps=supertonic_total_steps,
max_seconds=max_seconds,
)
except Exception as exc:
return jsonify({"error": str(exc)}), 500
@api_bp.post("/speaker-preview")
def api_speaker_preview() -> ResponseReturnValue:
payload = request.get_json(force=True, silent=True) or {}
pending_id = str(payload.get("pending_id") or "").strip()
text = payload.get("text", "Hello world")
voice = payload.get("voice", "af_heart")
language = payload.get("language", "a")
speed_value = payload.get("speed")
speed = coerce_float(speed_value, 1.0)
tts_provider = str(payload.get("tts_provider") or "").strip().lower()
supertonic_total_steps = int(payload.get("supertonic_total_steps") or 5)
settings = load_settings()
use_gpu = settings.get("use_gpu", False)
base_spec, speaker_name = split_profile_spec(voice)
resolved_provider = tts_provider if tts_provider in {"kokoro", "supertonic"} else ""
if speaker_name:
entry = normalize_profile_entry(load_profiles().get(speaker_name))
if entry:
resolved_provider = str(entry.get("provider") or resolved_provider or "")
if resolved_provider == "supertonic":
voice = str(entry.get("voice") or "M1")
supertonic_total_steps = int(entry.get("total_steps") or supertonic_total_steps)
if speed_value is None:
speed = coerce_float(entry.get("speed"), speed)
elif resolved_provider == "kokoro":
voice = formula_from_profile(entry) or (base_spec or voice)
if not resolved_provider:
resolved_provider = "supertonic" if str(base_spec or "").strip() in {"M1","M2","M3","M4","M5","F1","F2","F3","F4","F5"} else "kokoro"
pronunciation_overrides = None
manual_overrides = None
speakers = None
if pending_id:
try:
pending = get_service().get_pending_job(pending_id)
except Exception:
pending = None
if pending is not None:
manual_overrides = getattr(pending, "manual_overrides", None)
pronunciation_overrides = getattr(pending, "pronunciation_overrides", None)
speakers = getattr(pending, "speakers", None)
try:
return synthesize_preview(
text=text,
voice_spec=voice,
language=language,
speed=speed,
use_gpu=use_gpu
,
tts_provider=resolved_provider,
supertonic_total_steps=supertonic_total_steps or int(settings.get("supertonic_total_steps") or 5),
pronunciation_overrides=pronunciation_overrides,
manual_overrides=manual_overrides,
speakers=speakers,
)
except Exception as e:
return jsonify({"error": str(e)}), 500
# --- Integration Routes ---
def _opds_metadata_overrides(metadata_payload: Mapping[str, Any]) -> Dict[str, Any]:
metadata_overrides: Dict[str, Any] = {}
def _stringify_metadata_value(value: Any) -> str:
if value is None:
return ""
if isinstance(value, (list, tuple, set)):
parts = [str(item).strip() for item in value if item is not None]
parts = [part for part in parts if part]
return ", ".join(parts)
return str(value).strip()
raw_series = metadata_payload.get("series") or metadata_payload.get("series_name")
series_name = str(raw_series or "").strip()
if series_name:
metadata_overrides["series"] = series_name
metadata_overrides.setdefault("series_name", series_name)
series_index_value = (
metadata_payload.get("series_index")
or metadata_payload.get("series_position")
or metadata_payload.get("series_sequence")
or metadata_payload.get("book_number")
)
if series_index_value is not None:
series_index_text = str(series_index_value).strip()
if series_index_text:
metadata_overrides.setdefault("series_index", series_index_text)
metadata_overrides.setdefault("series_position", series_index_text)
metadata_overrides.setdefault("series_sequence", series_index_text)
metadata_overrides.setdefault("book_number", series_index_text)
tags_value = metadata_payload.get("tags") or metadata_payload.get("keywords")
if tags_value:
tags_text = _stringify_metadata_value(tags_value)
if tags_text:
metadata_overrides.setdefault("tags", tags_text)
metadata_overrides.setdefault("keywords", tags_text)
metadata_overrides.setdefault("genre", tags_text)
description_value = metadata_payload.get("description") or metadata_payload.get("summary")
if description_value:
description_text = _stringify_metadata_value(description_value)
if description_text:
metadata_overrides.setdefault("description", description_text)
metadata_overrides.setdefault("summary", description_text)
subtitle_value = (
metadata_payload.get("subtitle")
or metadata_payload.get("sub_title")
or metadata_payload.get("calibre_subtitle")
)
if subtitle_value:
subtitle_text = _stringify_metadata_value(subtitle_value)
if subtitle_text:
metadata_overrides.setdefault("subtitle", subtitle_text)
publisher_value = metadata_payload.get("publisher")
if publisher_value:
publisher_text = _stringify_metadata_value(publisher_value)
if publisher_text:
metadata_overrides.setdefault("publisher", publisher_text)
# Author mapping: Abogen templates look for either 'authors' or 'author'.
authors_value = (
metadata_payload.get("authors")
or metadata_payload.get("author")
or metadata_payload.get("creator")
or metadata_payload.get("dc_creator")
)
if authors_value:
authors_text = _stringify_metadata_value(authors_value)
if authors_text:
metadata_overrides.setdefault("authors", authors_text)
metadata_overrides.setdefault("author", authors_text)
return metadata_overrides
@api_bp.get("/integrations/calibre-opds/feed")
def api_calibre_opds_feed() -> ResponseReturnValue:
integrations = load_integration_settings()
calibre_settings = integrations.get("calibre_opds", {})
payload = {
"base_url": calibre_settings.get("base_url"),
"username": calibre_settings.get("username"),
"password": calibre_settings.get("password"),
"verify_ssl": calibre_settings.get("verify_ssl", True),
}
if not payload.get("base_url"):
return jsonify({"error": "Calibre OPDS base URL is not configured."}), 400
try:
client = CalibreOPDSClient(
base_url=payload.get("base_url") or "",
username=payload.get("username"),
password=payload.get("password"),
verify=bool(payload.get("verify_ssl", True)),
)
except ValueError as exc:
return jsonify({"error": str(exc)}), 400
href = request.args.get("href", type=str)
query = request.args.get("q", type=str)
letter = request.args.get("letter", type=str)
try:
if letter:
feed = client.browse_letter(letter, start_href=href)
elif query:
feed = client.search(query, start_href=href)
else:
feed = client.fetch_feed(href)
except CalibreOPDSError as exc:
return jsonify({"error": str(exc)}), 502
except Exception as exc:
return jsonify({"error": f"Unexpected error: {str(exc)}"}), 500
return jsonify({
"feed": feed.to_dict(),
"href": href or "",
"query": query or "",
})
@api_bp.post("/integrations/audiobookshelf/folders")
def api_abs_folders() -> ResponseReturnValue:
payload = request.get_json(force=True, silent=True) or {}
# Use the helper to resolve saved tokens when use_saved_token is set
settings = audiobookshelf_settings_from_payload(payload)
host = settings.get("base_url")
token = settings.get("api_token")
library_id = settings.get("library_id")
if not host or not token:
return jsonify({"error": "Base URL and API token are required"}), 400
if not library_id:
return jsonify({"error": "Library ID is required to list folders"}), 400
try:
config = AudiobookshelfConfig(base_url=host, api_token=token, library_id=library_id)
client = AudiobookshelfClient(config)
folders = client.list_folders()
return jsonify({"folders": folders})
except Exception as e:
return jsonify({"error": str(e)}), 400
@api_bp.post("/integrations/audiobookshelf/test")
def api_abs_test() -> ResponseReturnValue:
payload = request.get_json(force=True, silent=True) or {}
# Use the helper to resolve saved tokens when use_saved_token is set
settings = audiobookshelf_settings_from_payload(payload)
host = settings.get("base_url")
token = settings.get("api_token")
if not host or not token:
return jsonify({"error": "Base URL and API token are required"}), 400
try:
config = AudiobookshelfConfig(base_url=host, api_token=token)
client = AudiobookshelfClient(config)
# Just getting libraries is a good enough test
client.get_libraries()
return jsonify({"success": True, "message": "Connection successful."})
except Exception as e:
return jsonify({"error": str(e)}), 400
@api_bp.post("/integrations/calibre-opds/test")
def api_calibre_opds_test() -> ResponseReturnValue:
payload = request.get_json(force=True, silent=True) or {}
# Use the helper to resolve saved passwords when use_saved_password is set
settings = calibre_settings_from_payload(payload)
base_url = settings.get("base_url")
username = settings.get("username")
password = settings.get("password")
verify_ssl = settings.get("verify_ssl", False)
if not base_url:
return jsonify({"error": "Base URL is required"}), 400
try:
client = CalibreOPDSClient(
base_url=base_url,
username=username,
password=password,
verify=verify_ssl,
timeout=10.0
)
client.fetch_feed()
return jsonify({"success": True, "message": "Connection successful."})
except Exception as e:
return jsonify({"error": str(e)}), 400
@api_bp.post("/integrations/calibre-opds/import")
def api_calibre_opds_import() -> ResponseReturnValue:
if not request.is_json:
return jsonify({"error": "Expected JSON payload."}), 400
data = request.get_json(force=True, silent=True) or {}
href = str(data.get("href") or "").strip()
if not href:
return jsonify({"error": "Download URL (href) is required."}), 400
metadata_payload = data.get("metadata") if isinstance(data, Mapping) else None
metadata_overrides: Dict[str, Any] = {}
if isinstance(metadata_payload, Mapping):
metadata_overrides = _opds_metadata_overrides(metadata_payload)
settings = load_settings()
integrations = load_integration_settings()
calibre_settings = integrations.get("calibre_opds", {})
try:
client = CalibreOPDSClient(
base_url=calibre_settings.get("base_url") or "",
username=calibre_settings.get("username"),
password=calibre_settings.get("password"),
verify=bool(calibre_settings.get("verify_ssl", True)),
)
temp_dir = Path(current_app.config.get("UPLOAD_FOLDER", "uploads"))
temp_dir.mkdir(exist_ok=True)
resource = client.download(href)
filename = resource.filename
content = resource.content
if not filename:
filename = f"{uuid.uuid4().hex}.epub"
file_path = temp_dir / f"{uuid.uuid4().hex}_{filename}"
file_path.write_bytes(content)
extraction = extract_from_path(file_path)
if metadata_overrides:
extraction.metadata.update(metadata_overrides)
result = build_pending_job_from_extraction(
stored_path=file_path,
original_name=filename,
extraction=extraction,
form={},
settings=settings,
profiles=serialize_profiles(),
metadata_overrides=metadata_overrides,
)
get_service().store_pending_job(result.pending)
return jsonify({
"success": True,
"status": "imported",
"pending_id": result.pending.id,
"redirect_url": url_for("main.wizard_step", step="book", pending_id=result.pending.id)
})
except Exception as e:
return jsonify({"error": str(e)}), 500
# --- LLM Routes ---
@api_bp.post("/llm/models")
def api_llm_models() -> ResponseReturnValue:
payload = request.get_json(force=True, silent=False) or {}
current_settings = load_settings()
base_url = str(payload.get("base_url") or payload.get("llm_base_url") or current_settings.get("llm_base_url") or "").strip()
if not base_url:
return jsonify({"error": "LLM base URL is required."}), 400
api_key = str(payload.get("api_key") or payload.get("llm_api_key") or current_settings.get("llm_api_key") or "")
timeout = coerce_float(payload.get("timeout"), current_settings.get("llm_timeout", 30.0))
overrides = {
"llm_base_url": base_url,
"llm_api_key": api_key,
"llm_timeout": timeout,
}
merged = apply_overrides(current_settings, overrides)
configuration = build_llm_configuration(merged)
try:
models = list_models(configuration)
except LLMClientError as exc:
return jsonify({"error": str(exc)}), 400
return jsonify({"models": models})
@api_bp.post("/llm/preview")
def api_llm_preview() -> ResponseReturnValue:
payload = request.get_json(force=True, silent=False) or {}
sample_text = str(payload.get("text") or "").strip()
if not sample_text:
return jsonify({"error": "Text is required."}), 400
base_settings = load_settings()
overrides: Dict[str, Any] = {
"llm_base_url": str(
payload.get("base_url")
or payload.get("llm_base_url")
or base_settings.get("llm_base_url")
or ""
).strip(),
"llm_api_key": str(
payload.get("api_key")
or payload.get("llm_api_key")
or base_settings.get("llm_api_key")
or ""
),
"llm_model": str(
payload.get("model")
or payload.get("llm_model")
or base_settings.get("llm_model")
or ""
),
"llm_prompt": payload.get("prompt") or payload.get("llm_prompt") or base_settings.get("llm_prompt"),
"llm_context_mode": payload.get("context_mode") or base_settings.get("llm_context_mode"),
"llm_timeout": coerce_float(payload.get("timeout"), base_settings.get("llm_timeout", 30.0)),
"normalization_apostrophe_mode": "llm",
}
merged = apply_overrides(base_settings, overrides)
if not merged.get("llm_base_url"):
return jsonify({"error": "LLM base URL is required."}), 400
if not merged.get("llm_model"):
return jsonify({"error": "Select an LLM model before previewing."}), 400
apostrophe_config = build_apostrophe_config(settings=merged)
try:
normalized_text = normalize_for_pipeline(sample_text, config=apostrophe_config, settings=merged)
except LLMClientError as exc:
return jsonify({"error": str(exc)}), 400
context = {
"text": sample_text,
"normalized_text": normalized_text,
}
return jsonify(context)
# --- Normalization Routes ---
@api_bp.post("/normalization/preview")
def api_normalization_preview() -> ResponseReturnValue:
payload = request.get_json(force=True, silent=False) or {}
sample_text = str(payload.get("text") or "").strip()
if not sample_text:
return jsonify({"error": "Sample text is required."}), 400
base_settings = load_settings()
# We might want to apply overrides from payload if any normalization settings are passed
# For now, just use base settings as in original code (presumably)
apostrophe_config = build_apostrophe_config(settings=base_settings)
try:
normalized_text = normalize_for_pipeline(sample_text, config=apostrophe_config, settings=base_settings)
except Exception as exc:
return jsonify({"error": str(exc)}), 400
return jsonify({
"text": sample_text,
"normalized_text": normalized_text,
})
@api_bp.post("/entity-pronunciation/preview")
def api_entity_pronunciation_preview() -> ResponseReturnValue:
payload = request.get_json(force=True, silent=True) or {}
token = payload.get("token", "").strip()
pronunciation = payload.get("pronunciation", "").strip()
voice = payload.get("voice", "").strip()
language = payload.get("language", "a").strip()
if not token and not pronunciation:
return jsonify({"error": "Token or pronunciation required"}), 400
text_to_speak = pronunciation if pronunciation else token
if not voice:
settings = load_settings()
voice = settings.get("default_voice", "af_heart")
try:
# Check GPU setting
settings = load_settings()
use_gpu = coerce_bool(settings.get("use_gpu"), False)
audio_bytes = generate_preview_audio(
text=text_to_speak,
voice_spec=voice,
language=language,
speed=1.0,
use_gpu=use_gpu,
)
audio_base64 = base64.b64encode(audio_bytes).decode("utf-8")
return jsonify({"audio_base64": audio_base64})
except Exception as e:
return jsonify({"error": str(e)}), 400
from typing import Any, Dict
from flask import Blueprint, render_template
from flask.typing import ResponseReturnValue
from abogen.webui.routes.utils.settings import (
load_settings,
load_integration_settings,
)
from abogen.webui.routes.utils.voice import template_options
books_bp = Blueprint("books", __name__)
def _calibre_integration_enabled(integrations: Dict[str, Any]) -> bool:
calibre = integrations.get("calibre_opds", {})
return bool(calibre.get("enabled") and calibre.get("base_url"))
@books_bp.get("/")
def find_books_page() -> ResponseReturnValue:
settings = load_settings()
integrations = load_integration_settings()
return render_template(
"find_books.html",
integrations=integrations,
opds_available=_calibre_integration_enabled(integrations),
options=template_options(),
settings=settings,
)
@books_bp.get("/search")
def search_books() -> ResponseReturnValue:
return find_books_page()
from typing import Mapping
from flask import Blueprint, request, jsonify, abort, render_template, redirect, url_for
from flask.typing import ResponseReturnValue
from abogen.webui.routes.utils.service import require_pending_job, get_service
from abogen.webui.routes.utils.entity import (
refresh_entity_summary,
pending_entities_payload,
upsert_manual_override,
delete_manual_override,
search_manual_override_candidates,
)
from abogen.webui.routes.utils.settings import coerce_int, load_settings
from abogen.webui.routes.utils.voice import template_options
from abogen.pronunciation_store import (
delete_override as delete_pronunciation_override,
save_override as save_pronunciation_override,
get_override_stats,
all_overrides,
)
entities_bp = Blueprint("entities", __name__)
@entities_bp.post("/analyze")
def analyze_entities() -> ResponseReturnValue:
# This might be triggered via wizard update, but if there's a specific route:
# In original routes.py, it was likely part of wizard logic or API.
# I'll assume this is for the API endpoint /api/pending/<id>/entities/refresh
pending_id = request.form.get("pending_id") or request.args.get("pending_id")
if not pending_id:
abort(400, "Pending ID required")
pending = require_pending_job(pending_id)
refresh_entity_summary(pending, pending.chapters)
get_service().store_pending_job(pending)
return jsonify(pending_entities_payload(pending))
@entities_bp.get("/pending/<pending_id>")
def get_entities(pending_id: str) -> ResponseReturnValue:
pending = require_pending_job(pending_id)
refresh_flag = (request.args.get("refresh") or "").strip().lower()
expected_cache = (request.args.get("cache_key") or "").strip()
refresh_requested = refresh_flag in {"1", "true", "yes", "force"}
if expected_cache and expected_cache != (pending.entity_cache_key or ""):
refresh_requested = True
if refresh_requested or not pending.entity_summary:
refresh_entity_summary(pending, pending.chapters)
get_service().store_pending_job(pending)
return jsonify(pending_entities_payload(pending))
@entities_bp.post("/pending/<pending_id>/refresh")
def refresh_entities(pending_id: str) -> ResponseReturnValue:
pending = require_pending_job(pending_id)
refresh_entity_summary(pending, pending.chapters)
get_service().store_pending_job(pending)
return jsonify(pending_entities_payload(pending))
@entities_bp.get("/pending/<pending_id>/overrides")
def list_manual_overrides(pending_id: str) -> ResponseReturnValue:
pending = require_pending_job(pending_id)
return jsonify({
"overrides": pending.manual_overrides or [],
"pronunciation_overrides": pending.pronunciation_overrides or [],
"heteronym_overrides": getattr(pending, "heteronym_overrides", None) or [],
"language": pending.language or "en",
})
@entities_bp.post("/pending/<pending_id>/overrides")
def upsert_override(pending_id: str) -> ResponseReturnValue:
pending = require_pending_job(pending_id)
payload = request.get_json(silent=True) or {}
if not isinstance(payload, Mapping):
abort(400, "Invalid override payload")
try:
override = upsert_manual_override(pending, payload)
except ValueError as exc:
abort(400, str(exc))
get_service().store_pending_job(pending)
return jsonify({"override": override, **pending_entities_payload(pending)})
@entities_bp.delete("/pending/<pending_id>/overrides/<override_id>")
def delete_override(pending_id: str, override_id: str) -> ResponseReturnValue:
pending = require_pending_job(pending_id)
deleted = delete_manual_override(pending, override_id)
if not deleted:
abort(404)
get_service().store_pending_job(pending)
return jsonify({"deleted": True, **pending_entities_payload(pending)})
@entities_bp.get("/pending/<pending_id>/overrides/search")
def search_candidates(pending_id: str) -> ResponseReturnValue:
pending = require_pending_job(pending_id)
query = (request.args.get("q") or request.args.get("query") or "").strip()
limit_param = request.args.get("limit")
limit_value = coerce_int(limit_param, 15, minimum=1, maximum=50) if limit_param is not None else 15
results = search_manual_override_candidates(pending, query, limit=limit_value)
return jsonify({"query": query, "limit": limit_value, "results": results})
@entities_bp.post("/overrides")
def upsert_global_override() -> ResponseReturnValue:
payload = request.form
action = payload.get("action", "save")
lang = payload.get("lang", "en")
token = payload.get("token", "").strip()
if action == "delete":
if token:
delete_pronunciation_override(token=token, language=lang)
else:
pronunciation = payload.get("pronunciation", "").strip()
voice = payload.get("voice", "").strip()
if token:
save_pronunciation_override(
token=token,
pronunciation=pronunciation,
voice=voice or None,
language=lang
)
return redirect(url_for("entities.entities_page", lang=lang))
@entities_bp.get("/")
def entities_page() -> str:
settings = load_settings()
lang = request.args.get("lang") or settings.get("language", "en")
voice_filter = request.args.get("voice", "")
pronunciation_filter = request.args.get("pronunciation", "")
options = template_options()
stats = get_override_stats(lang)
overrides = all_overrides(lang)
if voice_filter == "assigned":
overrides = [o for o in overrides if o.get("voice")]
elif voice_filter == "unassigned":
overrides = [o for o in overrides if not o.get("voice")]
if pronunciation_filter == "defined":
overrides = [o for o in overrides if o.get("pronunciation")]
elif pronunciation_filter == "undefined":
overrides = [o for o in overrides if not o.get("pronunciation")]
voice_filter_options = [
{"value": "", "label": "All voices"},
{"value": "assigned", "label": "Assigned"},
{"value": "unassigned", "label": "Unassigned"},
]
pronunciation_filter_options = [
{"value": "", "label": "All pronunciations"},
{"value": "defined", "label": "Defined"},
{"value": "undefined", "label": "Undefined"},
]
language_label = options["languages"].get(lang, lang)
return render_template(
"entities.html",
language=lang,
language_label=language_label,
options=options,
languages=options["languages"].items(),
stats=stats,
overrides=overrides,
voice_filter=voice_filter,
pronunciation_filter=pronunciation_filter,
voice_filter_options=voice_filter_options,
pronunciation_filter_options=pronunciation_filter_options,
)
import json
import logging
from pathlib import Path
from typing import Any, Dict, Optional
from flask import Blueprint, Response, abort, redirect, render_template, request, url_for, send_file
from flask.typing import ResponseReturnValue
from abogen.webui.service import (
JobStatus,
load_audiobookshelf_chapters,
build_audiobookshelf_metadata,
)
from abogen.webui.routes.utils.service import get_service
from abogen.webui.routes.utils.form import render_jobs_panel
from abogen.webui.routes.utils.voice import template_options
from abogen.webui.routes.utils.epub import (
job_download_flags,
locate_job_epub,
locate_job_audio,
)
from abogen.webui.routes.utils.settings import (
stored_integration_config,
build_audiobookshelf_config,
coerce_bool,
)
from abogen.webui.routes.utils.common import existing_paths
from abogen.integrations.audiobookshelf import AudiobookshelfClient, AudiobookshelfUploadError
logger = logging.getLogger(__name__)
jobs_bp = Blueprint("jobs", __name__)
@jobs_bp.get("/<job_id>")
def job_detail(job_id: str) -> ResponseReturnValue:
job = get_service().get_job(job_id)
if not job:
# Return a friendly page instead of 404 to avoid confusion from stale browser tabs
return render_template("job_not_found.html"), 200
return render_template(
"job_detail.html",
job=job,
options=template_options(),
JobStatus=JobStatus,
downloads=job_download_flags(job),
)
@jobs_bp.post("/<job_id>/pause")
def pause_job(job_id: str) -> ResponseReturnValue:
get_service().pause(job_id)
if request.headers.get("HX-Request"):
return render_jobs_panel()
return redirect(url_for("jobs.job_detail", job_id=job_id))
@jobs_bp.post("/<job_id>/resume")
def resume_job(job_id: str) -> ResponseReturnValue:
get_service().resume(job_id)
if request.headers.get("HX-Request"):
return render_jobs_panel()
return redirect(url_for("jobs.job_detail", job_id=job_id))
@jobs_bp.post("/<job_id>/cancel")
def cancel_job(job_id: str) -> ResponseReturnValue:
get_service().cancel(job_id)
if request.headers.get("HX-Request"):
return render_jobs_panel()
return redirect(url_for("jobs.job_detail", job_id=job_id))
@jobs_bp.post("/<job_id>/delete")
def delete_job(job_id: str) -> ResponseReturnValue:
get_service().delete(job_id)
if request.headers.get("HX-Request"):
return render_jobs_panel()
return redirect(url_for("main.index"))
@jobs_bp.post("/<job_id>/retry")
def retry_job(job_id: str) -> ResponseReturnValue:
new_job = get_service().retry(job_id)
if request.headers.get("HX-Request"):
return render_jobs_panel()
if new_job:
return redirect(url_for("jobs.job_detail", job_id=new_job.id))
return redirect(url_for("jobs.job_detail", job_id=job_id))
@jobs_bp.post("/<job_id>/audiobookshelf")
def send_job_to_audiobookshelf(job_id: str) -> ResponseReturnValue:
service = get_service()
job = service.get_job(job_id)
if job is None:
abort(404)
def _panel_response() -> ResponseReturnValue:
if request.headers.get("HX-Request"):
return render_jobs_panel()
return redirect(url_for("jobs.job_detail", job_id=job.id))
if job.status != JobStatus.COMPLETED:
return _panel_response()
settings = stored_integration_config("audiobookshelf")
if not settings or not coerce_bool(settings.get("enabled"), False):
job.add_log("Audiobookshelf upload skipped: integration is disabled.", level="warning")
service._persist_state()
return _panel_response()
config = build_audiobookshelf_config(settings)
if config is None:
job.add_log(
"Audiobookshelf upload skipped: configure base URL, API token, and library ID first.",
level="warning",
)
service._persist_state()
return _panel_response()
if not config.folder_id:
job.add_log(
"Audiobookshelf upload skipped: enter the folder name or ID in the Audiobookshelf settings.",
level="warning",
)
service._persist_state()
return _panel_response()
audio_path = locate_job_audio(job)
if not audio_path or not audio_path.exists():
job.add_log("Audiobookshelf upload skipped: audio output not found.", level="warning")
service._persist_state()
return _panel_response()
cover_path = None
if config.send_cover and job.cover_image_path:
cover_candidate = job.cover_image_path
if not isinstance(cover_candidate, Path):
cover_candidate = Path(str(cover_candidate))
if cover_candidate.exists():
cover_path = cover_candidate
subtitles = existing_paths(job.result.subtitle_paths) if config.send_subtitles else None
chapters = load_audiobookshelf_chapters(job) if config.send_chapters else None
metadata = build_audiobookshelf_metadata(job)
display_title = metadata.get("title") or audio_path.stem
overwrite_requested = request.form.get("overwrite") == "true" or request.args.get("overwrite") == "true"
try:
client = AudiobookshelfClient(config)
except ValueError as exc:
job.add_log(f"Audiobookshelf configuration error: {exc}", level="error")
service._persist_state()
return _panel_response()
try:
existing_items = client.find_existing_items(display_title, folder_id=config.folder_id)
except AudiobookshelfUploadError as exc:
job.add_log(f"Audiobookshelf lookup failed: {exc}", level="error")
service._persist_state()
return _panel_response()
if existing_items and not overwrite_requested:
job.add_log(
f"Audiobookshelf already contains '{display_title}'. Awaiting overwrite confirmation.",
level="warning",
)
service._persist_state()
if request.headers.get("HX-Request"):
detail = {
"jobId": job.id,
"title": display_title,
"url": url_for("jobs.send_job_to_audiobookshelf", job_id=job.id),
"target": request.headers.get("HX-Target") or "#jobs-panel",
"message": f'Audiobookshelf already contains "{display_title}". Overwrite?',
}
headers = {"HX-Trigger": json.dumps({"audiobookshelf-overwrite-prompt": detail})}
return Response("", status=204, headers=headers)
return _panel_response()
if existing_items and overwrite_requested:
try:
client.delete_items(existing_items)
except AudiobookshelfUploadError as exc:
job.add_log(f"Audiobookshelf overwrite aborted: {exc}", level="error")
service._persist_state()
return _panel_response()
else:
job.add_log(
f"Removed {len(existing_items)} existing Audiobookshelf item(s) prior to overwrite.",
level="info",
)
job.add_log("Audiobookshelf upload triggered manually.", level="info")
try:
client.upload_audiobook(
audio_path,
metadata=metadata,
cover_path=cover_path,
chapters=chapters,
subtitles=subtitles,
)
except AudiobookshelfUploadError as exc:
job.add_log(f"Audiobookshelf upload failed: {exc}", level="error")
except Exception as exc:
job.add_log(f"Audiobookshelf integration error: {exc}", level="error")
else:
job.add_log("Audiobookshelf upload queued.", level="success")
finally:
service._persist_state()
return _panel_response()
@jobs_bp.post("/clear-finished")
def clear_finished_jobs() -> ResponseReturnValue:
get_service().clear_finished()
if request.headers.get("HX-Request"):
return render_jobs_panel()
return redirect(url_for("main.index", _anchor="queue"))
@jobs_bp.get("/<job_id>/epub")
def job_epub(job_id: str) -> ResponseReturnValue:
job = get_service().get_job(job_id)
if job is None or job.status != JobStatus.COMPLETED:
abort(404)
epub_path = locate_job_epub(job)
if not epub_path:
abort(404)
return send_file(
epub_path,
as_attachment=True,
download_name=epub_path.name,
mimetype="application/epub+zip",
)
@jobs_bp.get("/<job_id>/download/<file_type>")
def download_file(job_id: str, file_type: str) -> ResponseReturnValue:
job = get_service().get_job(job_id)
if not job or job.status != JobStatus.COMPLETED:
abort(404)
if file_type == "audio":
path = locate_job_audio(job)
if not path or not path.exists():
abort(404)
return send_file(
path,
as_attachment=True,
download_name=path.name,
)
# Handle other file types if needed (subtitles, etc.)
# For now, just audio and epub are explicitly handled
abort(404)
@jobs_bp.get("/<job_id>/logs")
def job_logs(job_id: str) -> ResponseReturnValue:
job = get_service().get_job(job_id)
if not job:
# Return a simple page instead of 404 to avoid log spam from stale browser tabs
return render_template("job_logs_missing.html"), 200
return render_template("job_logs_static.html", job=job)
@jobs_bp.get("/<job_id>/logs/partial")
def job_logs_partial(job_id: str) -> ResponseReturnValue:
job = get_service().get_job(job_id)
if not job:
# Return a non-polling section so HTMX stops retrying.
return render_template("partials/logs_section_missing.html"), 200
return render_template("partials/logs_section.html", job=job)
@jobs_bp.get("/<job_id>/logs/stream")
def stream_logs(job_id: str) -> ResponseReturnValue:
job = get_service().get_job(job_id)
if not job:
abort(404)
def generate():
last_index = 0
while True:
current_logs = job.logs
if len(current_logs) > last_index:
for log in current_logs[last_index:]:
yield f"data: {json.dumps({'timestamp': log.timestamp, 'level': log.level, 'message': log.message})}\n\n"
last_index = len(current_logs)
if job.status in {JobStatus.COMPLETED, JobStatus.FAILED, JobStatus.CANCELLED}:
break
import time
time.sleep(0.5)
return Response(generate(), mimetype="text/event-stream")
@jobs_bp.get("/<job_id>/reader")
def job_reader(job_id: str) -> ResponseReturnValue:
job = get_service().get_job(job_id)
if not job:
abort(404)
return render_template("reader_embed.html", job=job)
@jobs_bp.get("/queue")
def queue_page() -> str:
return render_template(
"queue.html",
jobs_panel=render_jobs_panel(),
)
@jobs_bp.get("/partial")
def jobs_partial() -> str:
return render_jobs_panel()
import logging
import time
import uuid
from pathlib import Path
from typing import Any, Dict, Optional, cast
from flask import Blueprint, redirect, render_template, request, url_for, jsonify, current_app
from werkzeug.utils import secure_filename
from abogen.webui.service import PendingJob, JobStatus
from abogen.webui.routes.utils.service import get_service, remove_pending_job, submit_job
from abogen.webui.routes.utils.settings import load_settings
from abogen.webui.routes.utils.voice import template_options
from abogen.webui.routes.utils.form import (
normalize_wizard_step,
wants_wizard_json,
render_wizard_partial,
wizard_json_response,
build_pending_job_from_extraction,
apply_book_step_form,
apply_prepare_form,
render_jobs_panel,
)
from abogen.text_extractor import extract_from_path
from abogen.voice_profiles import serialize_profiles
logger = logging.getLogger(__name__)
main_bp = Blueprint("main", __name__)
@main_bp.app_template_filter("datetimeformat")
def datetimeformat(value: float, fmt: str = "%Y-%m-%d %H:%M:%S") -> str:
if not value:
return "—"
from datetime import datetime
return datetime.fromtimestamp(value).strftime(fmt)
@main_bp.app_template_filter("durationformat")
def durationformat(value: Optional[float]) -> str:
if value is None:
return ""
seconds = int(value)
if seconds < 60:
return f"{seconds}s"
minutes = seconds // 60
seconds = seconds % 60
if minutes < 60:
return f"{minutes}m {seconds}s"
hours = minutes // 60
minutes = minutes % 60
return f"{hours}h {minutes}m"
@main_bp.route("/")
def index():
pending_id = request.args.get("pending_id")
pending = get_service().get_pending_job(pending_id) if pending_id else None
# If we have a pending job, redirect to the wizard
if pending:
step_index = getattr(pending, "wizard_max_step_index", 0)
# Map index to step name roughly
steps = ["book", "chapters", "entities"]
step_name = steps[min(step_index, len(steps)-1)]
return redirect(url_for("main.wizard_step", step=step_name, pending_id=pending.id))
jobs = get_service().list_jobs()
stats = {
"total": len(jobs),
"completed": sum(1 for j in jobs if j.status == JobStatus.COMPLETED),
"running": sum(1 for j in jobs if j.status == JobStatus.RUNNING),
"pending": sum(1 for j in jobs if j.status == JobStatus.PENDING),
"failed": sum(1 for j in jobs if j.status == JobStatus.FAILED),
}
return render_template(
"index.html",
options=template_options(),
settings=load_settings(),
jobs_panel=render_jobs_panel(),
stats=stats,
)
@main_bp.route("/wizard")
def wizard_start():
pending_id = request.args.get("pending_id")
step = request.args.get("step", "book")
if pending_id:
return redirect(url_for("main.wizard_step", step=step, pending_id=pending_id))
return redirect(url_for("main.wizard_step", step=step))
@main_bp.route("/wizard/<step>")
def wizard_step(step: str):
pending_id = request.args.get("pending_id")
pending = get_service().get_pending_job(pending_id) if pending_id else None
normalized_step = normalize_wizard_step(step, pending)
if normalized_step != step:
return redirect(url_for("main.wizard_step", step=normalized_step, pending_id=pending_id))
if wants_wizard_json():
return wizard_json_response(pending, normalized_step)
return render_template(
"index.html",
options=template_options(),
settings=load_settings(),
jobs_panel=render_jobs_panel(),
wizard_mode=True,
wizard_step=normalized_step,
wizard_partial=render_wizard_partial(pending, normalized_step),
)
@main_bp.route("/wizard/upload", methods=["POST"])
def wizard_upload():
pending_id = request.form.get("pending_id")
pending = get_service().get_pending_job(pending_id) if pending_id else None
file = request.files.get("file") or request.files.get("source_file")
settings = load_settings()
profiles = serialize_profiles()
# Case 1: Updating existing job without new file
if pending and (not file or not file.filename):
try:
apply_book_step_form(pending, request.form, settings=settings, profiles=profiles)
get_service().store_pending_job(pending)
if wants_wizard_json():
return wizard_json_response(pending, "chapters")
return redirect(url_for("main.wizard_step", step="chapters", pending_id=pending.id))
except Exception as e:
logger.exception("Error updating job settings")
error_msg = f"Failed to update settings: {str(e)}"
if wants_wizard_json():
return wizard_json_response(pending, "book", error=error_msg, status=500)
return render_template(
"index.html",
options=template_options(),
settings=settings,
jobs_panel=render_jobs_panel(),
wizard_mode=True,
wizard_step="book",
wizard_partial=render_wizard_partial(pending, "book", error=error_msg),
)
# Case 2: New file upload (or replacing file on existing job)
if not file or not file.filename:
if wants_wizard_json():
return wizard_json_response(None, "book", error="No file selected", status=400)
return redirect(url_for("main.wizard_step", step="book"))
filename = secure_filename(file.filename)
temp_dir = Path(current_app.config.get("UPLOAD_FOLDER", "uploads"))
temp_dir.mkdir(exist_ok=True)
file_path = temp_dir / f"{uuid.uuid4().hex}_{filename}"
file.save(file_path)
try:
extraction = extract_from_path(file_path)
result = build_pending_job_from_extraction(
stored_path=file_path,
original_name=filename,
extraction=extraction,
form=request.form,
settings=settings,
profiles=profiles,
)
# If we had a pending job, we might want to preserve its ID or other properties,
# but for a new file it's safer to start fresh with the new extraction.
# The frontend will handle the ID change via the redirect.
get_service().store_pending_job(result.pending)
if wants_wizard_json():
return wizard_json_response(result.pending, "chapters")
return redirect(url_for("main.wizard_step", step="chapters", pending_id=result.pending.id))
except Exception as e:
logger.exception("Error processing upload")
if file_path.exists():
try:
file_path.unlink()
except OSError:
pass
error_msg = f"Failed to process file: {str(e)}"
if wants_wizard_json():
return wizard_json_response(None, "book", error=error_msg, status=500)
return render_template(
"index.html",
options=template_options(),
settings=settings,
jobs_panel=render_jobs_panel(),
wizard_mode=True,
wizard_step="book",
wizard_partial=render_wizard_partial(None, "book", error=error_msg),
)
@main_bp.route("/wizard/text", methods=["POST"])
def wizard_text():
text = request.form.get("text", "").strip()
title = request.form.get("title", "").strip() or "Pasted Text"
if not text:
if wants_wizard_json():
return wizard_json_response(None, "book", error="No text provided", status=400)
return redirect(url_for("main.wizard_step", step="book"))
temp_dir = Path(current_app.config.get("UPLOAD_FOLDER", "uploads"))
temp_dir.mkdir(exist_ok=True)
file_path = temp_dir / f"{uuid.uuid4().hex}.txt"
file_path.write_text(text, encoding="utf-8")
settings = load_settings()
profiles = serialize_profiles()
try:
extraction = extract_from_path(file_path)
# Override title since text extraction might not find one
extraction.metadata["title"] = title
result = build_pending_job_from_extraction(
stored_path=file_path,
original_name=f"{title}.txt",
extraction=extraction,
form=request.form,
settings=settings,
profiles=profiles,
)
get_service().store_pending_job(result.pending)
if wants_wizard_json():
return wizard_json_response(result.pending, "chapters")
return redirect(url_for("main.wizard_step", step="chapters", pending_id=result.pending.id))
except Exception as e:
logger.exception("Error processing text")
if file_path.exists():
try:
file_path.unlink()
except OSError:
pass
error_msg = f"Failed to process text: {str(e)}"
if wants_wizard_json():
return wizard_json_response(None, "book", error=error_msg, status=500)
return render_template(
"index.html",
options=template_options(),
settings=settings,
jobs_panel=render_jobs_panel(),
wizard_mode=True,
wizard_step="book",
wizard_partial=render_wizard_partial(None, "book", error=error_msg),
)
@main_bp.route("/wizard/update", methods=["POST"])
def wizard_update():
pending_id = request.values.get("pending_id")
if not pending_id:
if wants_wizard_json():
return wizard_json_response(None, "book", error="Missing job ID", status=400)
return redirect(url_for("main.wizard_step", step="book"))
pending = get_service().get_pending_job(pending_id)
if not pending:
if wants_wizard_json():
return wizard_json_response(None, "book", error="Job expired or not found", status=404)
return redirect(url_for("main.wizard_step", step="book"))
current_step = request.form.get("step", "book")
next_step = request.form.get("next_step")
settings = load_settings()
profiles = serialize_profiles()
try:
if current_step == "book":
apply_book_step_form(pending, request.form, settings=settings, profiles=profiles)
target_step = next_step or "chapters"
elif current_step == "chapters":
# This step involves re-analyzing chunks if needed
(
chunk_level,
overrides,
enabled_overrides,
errors,
selected_total,
selected_config,
apply_config_requested,
persist_config_requested,
) = apply_prepare_form(pending, request.form)
if errors:
if wants_wizard_json():
return wizard_json_response(pending, current_step, error="\n".join(errors), status=400)
return render_template(
"index.html",
options=template_options(),
settings=settings,
jobs_panel=render_jobs_panel(),
wizard_mode=True,
wizard_step=current_step,
wizard_partial=render_wizard_partial(pending, current_step, error="\n".join(errors)),
)
target_step = next_step or "entities"
elif current_step == "entities":
# Just saving entity overrides
apply_prepare_form(pending, request.form)
target_step = next_step or "entities" # Stay or finish
else:
target_step = "book"
get_service().store_pending_job(pending)
if wants_wizard_json():
return wizard_json_response(pending, target_step)
return redirect(url_for("main.wizard_step", step=target_step, pending_id=pending.id))
except Exception as e:
logger.exception(f"Error updating wizard step {current_step}")
error_msg = f"Update failed: {str(e)}"
if wants_wizard_json():
return wizard_json_response(pending, current_step, error=error_msg, status=500)
return render_template(
"index.html",
options=template_options(),
settings=settings,
jobs_panel=render_jobs_panel(),
wizard_mode=True,
wizard_step=current_step,
wizard_partial=render_wizard_partial(pending, current_step, error=error_msg),
)
@main_bp.route("/wizard/cancel", methods=["POST"])
def wizard_cancel():
pending_id = request.values.get("pending_id")
if pending_id:
remove_pending_job(pending_id)
if wants_wizard_json():
return jsonify({"status": "cancelled", "redirect_url": url_for("main.index")})
return redirect(url_for("main.index"))
@main_bp.route("/wizard/finish", methods=["POST"])
def wizard_finish():
pending_id = request.values.get("pending_id")
if not pending_id:
if wants_wizard_json():
return jsonify({"error": "Missing job ID"}), 400
return redirect(url_for("main.index"))
pending = get_service().get_pending_job(pending_id)
if not pending:
if wants_wizard_json():
return jsonify({"error": "Job not found"}), 404
return redirect(url_for("main.index"))
# Final update from form
apply_prepare_form(pending, request.form)
# Submit job
job_id = submit_job(pending)
if wants_wizard_json():
return jsonify({
"status": "submitted",
"job_id": job_id,
"redirect_url": url_for("main.index"),
"jobs_panel": render_jobs_panel()
})
return redirect(url_for("main.index"))
from pathlib import Path
from collections.abc import Mapping
from typing import Any
from flask import Blueprint, current_app, render_template, request, redirect, url_for, flash, send_file, abort
from flask.typing import ResponseReturnValue
from abogen.webui.routes.utils.settings import (
load_settings,
load_integration_settings,
save_settings,
stored_integration_config,
coerce_bool,
coerce_int,
SAVE_MODE_LABELS,
llm_ready,
_NORMALIZATION_BOOLEAN_KEYS,
_NORMALIZATION_STRING_KEYS,
_DEFAULT_ANALYSIS_THRESHOLD,
)
from abogen.webui.routes.utils.voice import template_options
from abogen.webui.debug_tts_runner import run_debug_tts_wavs
from abogen.debug_tts_samples import DEBUG_TTS_SAMPLES
from abogen.utils import get_user_output_path, load_config
settings_bp = Blueprint("settings", __name__)
_NORMALIZATION_SAMPLES = {
"apostrophes": "It's a beautiful day, isn't it? 'Yes,' she said, 'it is.'",
"currency": "The price is $10.50, but it was £8.00 yesterday.",
"dates": "On 2023-01-01, we celebrated the new year.",
"numbers": "There are 123 apples and 456 oranges.",
"abbreviations": "Dr. Smith lives on Elm St. near the U.S. border.",
}
@settings_bp.post("/update")
def update_settings() -> ResponseReturnValue:
current = load_settings()
form = request.form
# General settings
current["language"] = (form.get("language") or "en").strip()
current["default_speaker"] = (form.get("default_speaker") or "").strip()
current["default_voice"] = (form.get("default_voice") or "").strip()
try:
current["supertonic_total_steps"] = max(2, min(15, int(form.get("supertonic_total_steps", current.get("supertonic_total_steps", 5)))))
except (TypeError, ValueError):
pass
try:
current["supertonic_speed"] = max(0.7, min(2.0, float(form.get("supertonic_speed", current.get("supertonic_speed", 1.0)))))
except (TypeError, ValueError):
pass
current["output_format"] = (form.get("output_format") or "mp3").strip()
current["subtitle_mode"] = (form.get("subtitle_mode") or "Disabled").strip()
current["subtitle_format"] = (form.get("subtitle_format") or "srt").strip()
current["save_mode"] = (form.get("save_mode") or "save_next_to_input").strip()
current["replace_single_newlines"] = coerce_bool(form.get("replace_single_newlines"), False)
current["use_gpu"] = coerce_bool(form.get("use_gpu"), False)
current["save_chapters_separately"] = coerce_bool(form.get("save_chapters_separately"), False)
current["merge_chapters_at_end"] = coerce_bool(form.get("merge_chapters_at_end"), True)
current["save_as_project"] = coerce_bool(form.get("save_as_project"), False)
current["separate_chapters_format"] = (form.get("separate_chapters_format") or "wav").strip()
try:
current["silence_between_chapters"] = max(0.0, float(form.get("silence_between_chapters", 2.0)))
except ValueError:
pass
try:
current["chapter_intro_delay"] = max(0.0, float(form.get("chapter_intro_delay", 0.5)))
except ValueError:
pass
current["read_title_intro"] = coerce_bool(form.get("read_title_intro"), False)
current["read_closing_outro"] = coerce_bool(form.get("read_closing_outro"), True)
current["normalize_chapter_opening_caps"] = coerce_bool(form.get("normalize_chapter_opening_caps"), True)
current["auto_prefix_chapter_titles"] = coerce_bool(form.get("auto_prefix_chapter_titles"), True)
try:
current["max_subtitle_words"] = max(1, int(form.get("max_subtitle_words", 50)))
except ValueError:
pass
current["chunk_level"] = (form.get("chunk_level") or "paragraph").strip()
current["generate_epub3"] = coerce_bool(form.get("generate_epub3"), False)
current["speaker_analysis_threshold"] = coerce_int(
form.get("speaker_analysis_threshold"),
_DEFAULT_ANALYSIS_THRESHOLD,
minimum=1,
maximum=25,
)
def _extract_checkbox(name: str, default: bool) -> bool:
values = form.getlist(name) if hasattr(form, "getlist") else []
if values:
return coerce_bool(values[-1], default)
if hasattr(form, "__contains__") and name in form:
return False
return default
# Normalization settings
for key in _NORMALIZATION_BOOLEAN_KEYS:
current[key] = _extract_checkbox(key, bool(current.get(key, True)))
for key in _NORMALIZATION_STRING_KEYS:
if hasattr(form, "__contains__") and key in form:
current[key] = (form.get(key) or "").strip()
# Integrations
# `load_settings()` returns only the general settings subset and intentionally
# does not include stored integrations. Seed them from the stored config so
# saving unrelated settings cannot wipe credentials/tokens.
current_integrations: dict[str, dict[str, Any]] = {}
cfg = load_config() or {}
stored_integrations = cfg.get("integrations")
if isinstance(stored_integrations, Mapping):
for name, payload in stored_integrations.items():
if isinstance(name, str) and isinstance(payload, Mapping):
current_integrations[name] = dict(payload)
# Ensure known integrations are loaded even if the config is still in legacy format.
for name in ("audiobookshelf", "calibre_opds"):
stored = stored_integration_config(name)
if stored and name not in current_integrations:
current_integrations[name] = dict(stored)
current["integrations"] = current_integrations
# Audiobookshelf
abs_enabled = coerce_bool(form.get("audiobookshelf_enabled"), False)
abs_url = (form.get("audiobookshelf_base_url") or "").strip()
abs_token = (form.get("audiobookshelf_api_token") or "").strip()
abs_library = (form.get("audiobookshelf_library_id") or "").strip()
abs_folder = (form.get("audiobookshelf_folder_id") or "").strip()
abs_verify = coerce_bool(form.get("audiobookshelf_verify_ssl"), True)
abs_auto_send = coerce_bool(form.get("audiobookshelf_auto_send"), False)
abs_cover = coerce_bool(form.get("audiobookshelf_send_cover"), True)
abs_chapters = coerce_bool(form.get("audiobookshelf_send_chapters"), True)
abs_subtitles = coerce_bool(form.get("audiobookshelf_send_subtitles"), False)
try:
abs_timeout = max(1.0, float(form.get("audiobookshelf_timeout", 30.0)))
except ValueError:
abs_timeout = 30.0
# Preserve existing token if not provided and not cleared
if not abs_token and not coerce_bool(form.get("audiobookshelf_api_token_clear"), False):
existing_abs = current["integrations"].get("audiobookshelf", {})
abs_token = existing_abs.get("api_token", "")
current["integrations"]["audiobookshelf"] = {
"enabled": abs_enabled,
"base_url": abs_url,
"api_token": abs_token,
"library_id": abs_library,
"folder_id": abs_folder,
"verify_ssl": abs_verify,
"auto_send": abs_auto_send,
"send_cover": abs_cover,
"send_chapters": abs_chapters,
"send_subtitles": abs_subtitles,
"timeout": abs_timeout,
}
# Calibre OPDS
calibre_enabled = coerce_bool(form.get("calibre_opds_enabled"), False)
calibre_url = (form.get("calibre_opds_base_url") or "").strip()
calibre_user = (form.get("calibre_opds_username") or "").strip()
calibre_pass = (form.get("calibre_opds_password") or "").strip()
calibre_verify = coerce_bool(form.get("calibre_opds_verify_ssl"), True)
# Preserve existing password if not provided and not cleared
if not calibre_pass and not coerce_bool(form.get("calibre_opds_password_clear"), False):
existing_calibre = current["integrations"].get("calibre_opds", {})
calibre_pass = existing_calibre.get("password", "")
current["integrations"]["calibre_opds"] = {
"enabled": calibre_enabled,
"base_url": calibre_url,
"username": calibre_user,
"password": calibre_pass,
"verify_ssl": calibre_verify,
}
save_settings(current)
flash("Settings updated successfully.", "success")
return redirect(url_for("settings.settings_page"))
@settings_bp.route("/", methods=["GET", "POST"])
def settings_page() -> str | ResponseReturnValue:
if request.method == "POST":
return update_settings()
debug_run_id = (request.args.get("debug_run_id") or "").strip()
debug_manifest = None
if debug_run_id:
run_dir = Path(current_app.config.get("OUTPUT_FOLDER") or get_user_output_path("web")) / "debug" / debug_run_id
manifest_path = run_dir / "manifest.json"
if manifest_path.exists():
try:
import json
debug_manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
except Exception:
debug_manifest = None
save_locations = [{"value": key, "label": label} for key, label in SAVE_MODE_LABELS.items()]
default_output_dir = str(Path(get_user_output_path()).resolve())
return render_template(
"settings.html",
settings=load_settings(),
integrations=load_integration_settings(),
options=template_options(),
normalization_samples=_NORMALIZATION_SAMPLES,
save_locations=save_locations,
default_output_dir=default_output_dir,
llm_ready=llm_ready(load_settings()),
debug_samples=DEBUG_TTS_SAMPLES,
debug_manifest=debug_manifest,
)
@settings_bp.post("/debug/run")
def run_debug_wavs() -> ResponseReturnValue:
settings = load_settings()
output_root = Path(current_app.config.get("OUTPUT_FOLDER") or get_user_output_path("web"))
try:
manifest = run_debug_tts_wavs(output_root=output_root, settings=settings)
except Exception as exc:
flash(f"Debug WAV generation failed: {exc}", "error")
return redirect(url_for("settings.settings_page", _anchor="debug"))
flash("Debug WAV generation completed.", "success")
return redirect(url_for("settings.debug_wavs_page", run_id=str(manifest.get("run_id") or "")))
@settings_bp.get("/debug/<run_id>")
def debug_wavs_page(run_id: str) -> ResponseReturnValue:
safe_run = (run_id or "").strip()
if not safe_run:
abort(404)
root = Path(current_app.config.get("OUTPUT_FOLDER") or get_user_output_path("web"))
run_dir = (root / "debug" / safe_run).resolve()
manifest_path = run_dir / "manifest.json"
if not manifest_path.exists():
abort(404)
try:
import json
manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
except Exception:
abort(404)
artifacts = manifest.get("artifacts") or []
# Precompute download URLs for each artifact.
for item in artifacts:
filename = str(item.get("filename") or "")
item["url"] = url_for("settings.download_debug_wav", run_id=safe_run, filename=filename)
return render_template(
"debug_wavs.html",
run_id=safe_run,
artifacts=artifacts,
)
@settings_bp.get("/debug/<run_id>/<filename>")
def download_debug_wav(run_id: str, filename: str) -> ResponseReturnValue:
safe_run = (run_id or "").strip()
safe_name = (filename or "").strip()
if not safe_run or not safe_name or "/" in safe_name or "\\" in safe_name:
abort(404)
is_wav = safe_name.lower().endswith(".wav")
if not is_wav and safe_name != "manifest.json":
abort(404)
root = Path(current_app.config.get("OUTPUT_FOLDER") or get_user_output_path("web"))
path = (root / "debug" / safe_run / safe_name).resolve()
if not path.exists() or not path.is_file():
abort(404)
# Ensure path is within root/debug/run_id
expected_dir = (root / "debug" / safe_run).resolve()
if expected_dir not in path.parents:
abort(404)
wants_download = str(request.args.get("download") or "").strip().lower() in {"1", "true", "yes"}
mimetype = "audio/wav" if is_wav else "application/json"
# Inline playback should work for WAVs; allow explicit downloads via ?download=1.
return send_file(
path,
mimetype=mimetype,
as_attachment=wants_download,
download_name=path.name,
)
from typing import Any, Optional, Tuple, Iterable, List
from pathlib import Path
def split_profile_spec(value: Any) -> Tuple[str, Optional[str]]:
text = str(value or "").strip()
if not text:
return "", None
lowered = text.lower()
if lowered.startswith("profile:") or lowered.startswith("speaker:"):
_, _, remainder = text.partition(":")
name = remainder.strip()
return "", name or None
return text, None
def split_speaker_spec(value: Any) -> Tuple[str, Optional[str]]:
"""Preferred alias for split_profile_spec (supports 'speaker:' and legacy 'profile:')."""
return split_profile_spec(value)
def existing_paths(paths: Optional[Iterable[Path]]) -> List[Path]:
if not paths:
return []
return [p for p in paths if p.exists()]
import time
import uuid
from typing import Any, Dict, Iterable, List, Mapping, Optional
from abogen.webui.service import PendingJob
from abogen.entity_analysis import (
extract_entities,
merge_override,
normalize_token as normalize_entity_token,
normalize_manual_override_token,
search_tokens as search_entity_tokens,
)
from abogen.pronunciation_store import (
delete_override as delete_pronunciation_override,
load_overrides as load_pronunciation_overrides,
save_override as save_pronunciation_override,
search_overrides as search_pronunciation_overrides,
)
from abogen.webui.routes.utils.settings import load_settings
from abogen.heteronym_overrides import extract_heteronym_overrides
def collect_pronunciation_overrides(pending: PendingJob) -> List[Dict[str, Any]]:
language = pending.language or "en"
collected: Dict[str, Dict[str, Any]] = {}
summary = pending.entity_summary or {}
for group in ("people", "entities"):
entries = summary.get(group)
if not isinstance(entries, list):
continue
for entry in entries:
if not isinstance(entry, Mapping):
continue
override_payload = entry.get("override")
if not isinstance(override_payload, Mapping):
continue
token_value = str(entry.get("label") or override_payload.get("token") or "").strip()
pronunciation_value = str(override_payload.get("pronunciation") or "").strip()
if not token_value or not pronunciation_value:
continue
normalized = normalize_entity_token(entry.get("normalized") or token_value)
if not normalized:
continue
collected[normalized] = {
"token": token_value,
"normalized": normalized,
"pronunciation": pronunciation_value,
"voice": str(override_payload.get("voice") or "").strip() or None,
"notes": str(override_payload.get("notes") or "").strip() or None,
"context": str(override_payload.get("context") or "").strip() or None,
"source": f"{group}-override",
"language": language,
}
if isinstance(pending.speakers, Mapping):
for speaker_payload in pending.speakers.values():
if not isinstance(speaker_payload, Mapping):
continue
token_value = str(speaker_payload.get("label") or "").strip()
pronunciation_value = str(speaker_payload.get("pronunciation") or "").strip()
if not token_value or not pronunciation_value:
continue
normalized = normalize_entity_token(token_value)
if not normalized:
continue
collected[normalized] = {
"token": token_value,
"normalized": normalized,
"pronunciation": pronunciation_value,
"voice": str(
speaker_payload.get("resolved_voice")
or speaker_payload.get("voice")
or pending.voice
).strip()
or None,
"notes": None,
"context": None,
"source": "speaker",
"language": language,
}
for manual_entry in pending.manual_overrides or []:
if not isinstance(manual_entry, Mapping):
continue
token_value = str(manual_entry.get("token") or "").strip()
pronunciation_value = str(manual_entry.get("pronunciation") or "").strip()
if not token_value or not pronunciation_value:
continue
normalized = manual_entry.get("normalized") or normalize_manual_override_token(token_value)
if not normalized:
continue
collected[normalized] = {
"token": token_value,
"normalized": normalized,
"pronunciation": pronunciation_value,
"voice": str(manual_entry.get("voice") or "").strip() or None,
"notes": str(manual_entry.get("notes") or "").strip() or None,
"context": str(manual_entry.get("context") or "").strip() or None,
"source": str(manual_entry.get("source") or "manual"),
"language": language,
}
return list(collected.values())
def sync_pronunciation_overrides(pending: PendingJob) -> None:
pending.pronunciation_overrides = collect_pronunciation_overrides(pending)
if not pending.pronunciation_overrides:
return
summary = pending.entity_summary or {}
manual_map: Dict[str, Mapping[str, Any]] = {}
for override in pending.manual_overrides or []:
if not isinstance(override, Mapping):
continue
normalized = override.get("normalized") or normalize_entity_token(override.get("token") or "")
pronunciation_value = str(override.get("pronunciation") or "").strip()
if not normalized or not pronunciation_value:
continue
manual_map[normalized] = override
for group in ("people", "entities"):
entries = summary.get(group)
if not isinstance(entries, list):
continue
for entry in entries:
if not isinstance(entry, dict):
continue
normalized = normalize_entity_token(entry.get("normalized") or entry.get("label") or "")
manual_override = manual_map.get(normalized)
if manual_override:
entry["override"] = {
"token": manual_override.get("token"),
"pronunciation": manual_override.get("pronunciation"),
"voice": manual_override.get("voice"),
"notes": manual_override.get("notes"),
"context": manual_override.get("context"),
"source": manual_override.get("source"),
}
def refresh_entity_summary(pending: PendingJob, chapters: Iterable[Mapping[str, Any]]) -> None:
settings = load_settings()
language = pending.language or "en"
chapter_list: List[Mapping[str, Any]] = [chapter for chapter in chapters if isinstance(chapter, Mapping)]
if not chapter_list:
pending.entity_summary = {}
pending.entity_cache_key = ""
pending.pronunciation_overrides = pending.pronunciation_overrides or []
pending.heteronym_overrides = pending.heteronym_overrides or []
return
enabled_only = [chapter for chapter in chapter_list if chapter.get("enabled")]
target_chapters = enabled_only or chapter_list
# Always compute heteronym overrides (English only). Preserve any prior selections.
try:
pending.heteronym_overrides = extract_heteronym_overrides(
target_chapters,
language=language,
existing=getattr(pending, "heteronym_overrides", None),
)
except Exception:
pending.heteronym_overrides = getattr(pending, "heteronym_overrides", []) or []
if not bool(settings.get("enable_entity_recognition", True)):
pending.entity_summary = {}
pending.entity_cache_key = ""
pending.pronunciation_overrides = pending.pronunciation_overrides or []
return
result = extract_entities(target_chapters, language=language)
summary = dict(result.summary)
tokens: List[str] = []
for group in ("people", "entities"):
entries = summary.get(group)
if not isinstance(entries, list):
continue
for entry in entries:
if not isinstance(entry, Mapping):
continue
token_value = str(entry.get("normalized") or entry.get("label") or "").strip()
if token_value:
tokens.append(token_value)
overrides_from_store = load_pronunciation_overrides(language=language, tokens=tokens)
merged_summary = merge_override(summary, overrides_from_store)
if result.errors:
merged_summary["errors"] = list(result.errors)
merged_summary["cache_key"] = result.cache_key
pending.entity_summary = merged_summary
pending.entity_cache_key = result.cache_key
sync_pronunciation_overrides(pending)
def find_manual_override(pending: PendingJob, identifier: str) -> Optional[Dict[str, Any]]:
for entry in pending.manual_overrides or []:
if not isinstance(entry, dict):
continue
if entry.get("id") == identifier or entry.get("normalized") == identifier:
return entry
return None
def upsert_manual_override(pending: PendingJob, payload: Mapping[str, Any]) -> Dict[str, Any]:
token_value = str(payload.get("token") or "").strip()
if not token_value:
raise ValueError("Token is required")
pronunciation_value = str(payload.get("pronunciation") or "").strip()
voice_value = str(payload.get("voice") or "").strip()
notes_value = str(payload.get("notes") or "").strip()
context_value = str(payload.get("context") or "").strip()
normalized = payload.get("normalized") or normalize_manual_override_token(token_value)
if not normalized:
raise ValueError("Token is required")
existing = find_manual_override(pending, payload.get("id", "")) or find_manual_override(pending, normalized)
timestamp = time.time()
language = pending.language or "en"
if existing:
existing.update(
{
"token": token_value,
"normalized": normalized,
"pronunciation": pronunciation_value,
"voice": voice_value,
"notes": notes_value,
"context": context_value,
"updated_at": timestamp,
}
)
manual_entry = existing
else:
manual_entry = {
"id": payload.get("id") or uuid.uuid4().hex,
"token": token_value,
"normalized": normalized,
"pronunciation": pronunciation_value,
"voice": voice_value,
"notes": notes_value,
"context": context_value,
"language": language,
"source": payload.get("source") or "manual",
"created_at": timestamp,
"updated_at": timestamp,
}
if isinstance(pending.manual_overrides, list):
pending.manual_overrides.append(manual_entry)
else:
pending.manual_overrides = [manual_entry]
save_pronunciation_override(
language=language,
token=token_value,
pronunciation=pronunciation_value or None,
voice=voice_value or None,
notes=notes_value or None,
context=context_value or None,
)
sync_pronunciation_overrides(pending)
return dict(manual_entry)
def delete_manual_override(pending: PendingJob, override_id: str) -> bool:
if not override_id:
return False
entries = pending.manual_overrides or []
for index, entry in enumerate(entries):
if not isinstance(entry, dict):
continue
if entry.get("id") == override_id:
token_value = entry.get("token") or ""
language = pending.language or "en"
delete_pronunciation_override(language=language, token=token_value)
entries.pop(index)
pending.manual_overrides = entries
sync_pronunciation_overrides(pending)
return True
return False
def search_manual_override_candidates(pending: PendingJob, query: str, *, limit: int = 15) -> List[Dict[str, Any]]:
normalized_query = (query or "").strip()
summary_index = (pending.entity_summary or {}).get("index", {})
matches = search_entity_tokens(summary_index, normalized_query, limit=limit)
registry: Dict[str, Dict[str, Any]] = {}
for entry in matches:
normalized = normalize_entity_token(entry.get("normalized") or entry.get("token") or "")
if not normalized:
continue
registry.setdefault(
normalized,
{
"token": entry.get("token"),
"normalized": normalized,
"category": entry.get("category") or "entity",
"count": entry.get("count", 0),
"samples": entry.get("samples", []),
"source": "entity",
},
)
language = pending.language or "en"
store_matches = search_pronunciation_overrides(language=language, query=normalized_query, limit=limit)
for entry in store_matches:
normalized = entry.get("normalized")
if not normalized:
continue
registry.setdefault(
normalized,
{
"token": entry.get("token"),
"normalized": normalized,
"category": "history",
"count": entry.get("usage_count", 0),
"samples": [entry.get("context")] if entry.get("context") else [],
"source": "history",
"pronunciation": entry.get("pronunciation"),
"voice": entry.get("voice"),
},
)
for entry in pending.manual_overrides or []:
if not isinstance(entry, Mapping):
continue
normalized = entry.get("normalized")
if not normalized:
continue
registry.setdefault(
normalized,
{
"token": entry.get("token"),
"normalized": normalized,
"category": "manual",
"count": 0,
"samples": [entry.get("context")] if entry.get("context") else [],
"source": "manual",
"pronunciation": entry.get("pronunciation"),
"voice": entry.get("voice"),
},
)
ordered = sorted(registry.values(), key=lambda item: (-int(item.get("count") or 0), item.get("token") or ""))
if limit:
return ordered[:limit]
return ordered
def pending_entities_payload(pending: PendingJob) -> Dict[str, Any]:
settings = load_settings()
recognition_enabled = bool(settings.get("enable_entity_recognition", True))
return {
"summary": pending.entity_summary or {},
"manual_overrides": pending.manual_overrides or [],
"pronunciation_overrides": pending.pronunciation_overrides or [],
"heteronym_overrides": getattr(pending, "heteronym_overrides", None) or [],
"cache_key": pending.entity_cache_key,
"language": pending.language or "en",
"recognition_enabled": recognition_enabled,
}
import json
import math
import posixpath
import zipfile
from html.parser import HTMLParser
from pathlib import Path
from typing import Any, Dict, Iterable, List, Mapping, Optional, Set, Tuple
from xml.etree import ElementTree as ET
from abogen.webui.service import Job, JobStatus
def _coerce_path(value: Any) -> Optional[Path]:
if isinstance(value, Path):
return value
if isinstance(value, str):
candidate = Path(value)
return candidate
return None
def normalize_epub_path(base_dir: str, href: str) -> str:
if not href:
return ""
sanitized = href.split("#", 1)[0].split("?", 1)[0].strip()
sanitized = sanitized.replace("\\", "/")
if not sanitized:
return ""
if sanitized.startswith("/"):
sanitized = sanitized[1:]
base_dir = ""
normalized_base = base_dir.strip("/")
sanitized_lower = sanitized.lower()
if normalized_base:
base_lower = normalized_base.lower()
prefix = base_lower + "/"
if sanitized_lower.startswith(prefix):
remainder = sanitized[len(prefix):]
if remainder.lower().startswith(prefix):
sanitized = remainder
sanitized_lower = sanitized.lower()
base_dir = ""
elif sanitized_lower == base_lower:
base_dir = ""
base = base_dir.strip("/")
combined = posixpath.join(base, sanitized) if base else sanitized
normalized = posixpath.normpath(combined)
if normalized in {"", "."}:
return ""
normalized = normalized.replace("\\", "/")
segments = [segment for segment in normalized.split("/") if segment and segment != "."]
if not segments:
return ""
deduped: List[str] = []
last_lower: Optional[str] = None
for segment in segments:
segment_lower = segment.lower()
if last_lower == segment_lower:
continue
deduped.append(segment)
last_lower = segment_lower
normalized = "/".join(deduped)
if normalized.startswith("../") or normalized == "..":
return ""
return normalized
def decode_text(payload: bytes) -> str:
for encoding in ("utf-8", "utf-16", "windows-1252"):
try:
return payload.decode(encoding)
except UnicodeDecodeError:
continue
return payload.decode("utf-8", "ignore")
def coerce_positive_time(value: Any) -> Optional[float]:
try:
numeric = float(value)
except (TypeError, ValueError):
return None
if not math.isfinite(numeric) or numeric < 0:
return None
return numeric
def load_job_metadata(job: Job) -> Dict[str, Any]:
result = getattr(job, "result", None)
artifacts = getattr(result, "artifacts", None)
if not isinstance(artifacts, Mapping):
return {}
metadata_ref = artifacts.get("metadata")
if isinstance(metadata_ref, Path):
metadata_path = metadata_ref
elif isinstance(metadata_ref, str):
metadata_path = Path(metadata_ref)
else:
return {}
if not metadata_path.exists():
return {}
try:
return json.loads(metadata_path.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError, UnicodeDecodeError):
return {}
def resolve_book_title(job: Job, *metadata_sources: Mapping[str, Any]) -> str:
for source in metadata_sources:
if not isinstance(source, Mapping):
continue
for key in ("title", "book_title", "name", "album", "album_title"):
value = source.get(key)
if isinstance(value, str):
candidate = value.strip()
if candidate:
return candidate
filename = job.original_filename or ""
stem = Path(filename).stem if filename else ""
return stem or filename
class _NavMapParser(HTMLParser):
def __init__(self, base_dir: str) -> None:
super().__init__()
self._base_dir = base_dir
self._in_nav = False
self._nav_depth = 0
self._current_href: Optional[str] = None
self._buffer: List[str] = []
self.links: Dict[str, str] = {}
def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> None:
tag_lower = tag.lower()
if tag_lower == "nav":
attributes = dict(attrs)
nav_type = (attributes.get("epub:type") or attributes.get("type") or "").strip().lower()
nav_role = (attributes.get("role") or "").strip().lower()
type_tokens = {token.strip() for token in nav_type.split() if token}
role_tokens = {token.strip() for token in nav_role.split() if token}
if "toc" in type_tokens or "doc-toc" in role_tokens:
self._in_nav = True
self._nav_depth = 1
return
if self._in_nav:
self._nav_depth += 1
return
if not self._in_nav:
return
if tag_lower == "a":
attributes = dict(attrs)
href = attributes.get("href") or ""
normalized = normalize_epub_path(self._base_dir, href)
if normalized:
self._current_href = normalized
self._buffer = []
def handle_endtag(self, tag: str) -> None:
tag_lower = tag.lower()
if tag_lower == "nav" and self._in_nav:
self._nav_depth -= 1
if self._nav_depth <= 0:
self._in_nav = False
return
if not self._in_nav:
return
if tag_lower == "a" and self._current_href:
text = "".join(self._buffer).strip()
if text:
self.links.setdefault(self._current_href, text)
self._current_href = None
self._buffer = []
def handle_data(self, data: str) -> None:
if self._in_nav and self._current_href and data:
self._buffer.append(data)
def parse_nav_document(payload: bytes, base_dir: str) -> Dict[str, str]:
parser = _NavMapParser(base_dir)
parser.feed(decode_text(payload))
parser.close()
return parser.links
def parse_ncx_document(payload: bytes, base_dir: str) -> Dict[str, str]:
try:
root = ET.fromstring(payload)
except ET.ParseError:
return {}
nav_map: Dict[str, str] = {}
for nav_point in root.findall(".//{*}navPoint"):
content = nav_point.find(".//{*}content")
if content is None:
continue
src = content.attrib.get("src", "")
normalized = normalize_epub_path(base_dir, src)
if not normalized:
continue
label_el = nav_point.find(".//{*}text")
label = (label_el.text or "").strip() if label_el is not None and label_el.text else ""
if not label:
label = posixpath.basename(normalized) or f"Section {len(nav_map) + 1}"
nav_map.setdefault(normalized, label)
return nav_map
def extract_epub_chapters(epub_path: Path) -> List[Dict[str, str]]:
chapters: List[Dict[str, str]] = []
if not epub_path or not epub_path.exists():
return chapters
try:
with zipfile.ZipFile(epub_path, "r") as archive:
container_bytes = archive.read("META-INF/container.xml")
container_root = ET.fromstring(container_bytes)
rootfile = container_root.find(".//{*}rootfile")
if rootfile is None:
return chapters
opf_path = (rootfile.attrib.get("full-path") or "").strip()
if not opf_path:
return chapters
opf_dir = posixpath.dirname(opf_path)
opf_bytes = archive.read(opf_path)
opf_root = ET.fromstring(opf_bytes)
manifest: Dict[str, Dict[str, str]] = {}
for item in opf_root.findall(".//{*}manifest/{*}item"):
item_id = item.attrib.get("id")
href = item.attrib.get("href")
if not item_id or not href:
continue
manifest[item_id] = {
"href": normalize_epub_path(opf_dir, href),
"properties": item.attrib.get("properties", ""),
"media_type": item.attrib.get("media-type", ""),
}
spine_hrefs: List[str] = []
nav_id: Optional[str] = None
spine = opf_root.find(".//{*}spine")
if spine is not None:
nav_id = spine.attrib.get("toc")
for itemref in spine.findall(".//{*}itemref"):
idref = itemref.attrib.get("idref")
if not idref:
continue
entry = manifest.get(idref)
if not entry:
continue
href = entry["href"]
if href and href not in spine_hrefs:
spine_hrefs.append(href)
nav_href: Optional[str] = None
for entry in manifest.values():
properties = entry.get("properties") or ""
if "nav" in {token.strip() for token in properties.split() if token}:
nav_href = entry["href"]
break
if not nav_href and nav_id:
toc_entry = manifest.get(nav_id)
if toc_entry:
nav_href = toc_entry["href"]
nav_titles: Dict[str, str] = {}
if nav_href:
nav_base = posixpath.dirname(nav_href)
try:
nav_bytes = archive.read(nav_href)
except KeyError:
nav_bytes = None
if nav_bytes is not None:
if nav_href.lower().endswith(".ncx"):
nav_titles = parse_ncx_document(nav_bytes, nav_base)
else:
nav_titles = parse_nav_document(nav_bytes, nav_base)
if not nav_titles and nav_id and nav_id in manifest:
toc_entry = manifest[nav_id]
nav_base = posixpath.dirname(toc_entry["href"])
try:
nav_bytes = archive.read(toc_entry["href"])
except KeyError:
nav_bytes = None
if nav_bytes is not None:
nav_titles = parse_ncx_document(nav_bytes, nav_base)
for index, href in enumerate(spine_hrefs, start=1):
normalized = href
if not normalized:
continue
title = (
nav_titles.get(normalized)
or nav_titles.get(normalized.split("#", 1)[0])
or posixpath.basename(normalized)
or f"Chapter {index}"
)
chapters.append({"href": normalized, "title": title})
if not chapters and nav_titles:
for index, (href, title) in enumerate(nav_titles.items(), start=1):
normalized = href
if not normalized:
continue
label = title or posixpath.basename(normalized) or f"Chapter {index}"
chapters.append({"href": normalized, "title": label})
return chapters
except (FileNotFoundError, zipfile.BadZipFile, KeyError, ET.ParseError, UnicodeDecodeError):
return []
return chapters
def read_epub_bytes(epub_path: Path, raw_href: str) -> bytes:
normalized = normalize_epub_path("", raw_href)
if not normalized:
raise ValueError("Invalid resource path")
with zipfile.ZipFile(epub_path, "r") as archive:
return archive.read(normalized)
def iter_job_result_paths(job: Job) -> List[Path]:
result = getattr(job, "result", None)
if result is None:
return []
resolved_seen: Set[Path] = set()
collected: List[Path] = []
def _remember(candidate: Optional[Path]) -> None:
if not candidate:
return
try:
resolved = candidate.resolve()
except OSError:
return
if resolved in resolved_seen:
return
resolved_seen.add(resolved)
collected.append(candidate)
artifacts = getattr(result, "artifacts", None)
if isinstance(artifacts, Mapping):
for value in artifacts.values():
candidate = _coerce_path(value)
if candidate and candidate.exists() and candidate.is_file():
_remember(candidate)
for attr in ("audio_path", "epub_path"):
candidate = _coerce_path(getattr(result, attr, None))
if candidate and candidate.exists() and candidate.is_file():
_remember(candidate)
return collected
def iter_job_artifact_dirs(job: Job) -> List[Path]:
result = getattr(job, "result", None)
if result is None:
return []
artifacts = getattr(result, "artifacts", None)
directories: List[Path] = []
if isinstance(artifacts, Mapping):
for value in artifacts.values():
candidate = _coerce_path(value)
if candidate and candidate.exists() and candidate.is_dir():
directories.append(candidate)
return directories
def normalize_suffixes(suffixes: Iterable[str]) -> List[str]:
normalized: List[str] = []
for suffix in suffixes:
if not suffix:
continue
cleaned = suffix.lower().strip()
if not cleaned:
continue
if not cleaned.startswith("."):
cleaned = f".{cleaned.lstrip('.')}"
normalized.append(cleaned)
return normalized
def find_job_file(job: Job, suffixes: Iterable[str]) -> Optional[Path]:
ordered_suffixes = normalize_suffixes(suffixes)
if not ordered_suffixes:
return None
files = iter_job_result_paths(job)
for suffix in ordered_suffixes:
for candidate in files:
if candidate.suffix.lower() == suffix:
return candidate
directories = iter_job_artifact_dirs(job)
for suffix in ordered_suffixes:
pattern = f"*{suffix}"
for directory in directories:
try:
match = next((path for path in directory.rglob(pattern) if path.is_file()), None)
except OSError:
match = None
if match:
return match
return None
def locate_job_epub(job: Job) -> Optional[Path]:
path = find_job_file(job, [".epub"])
if path:
return path
return None
def locate_job_m4b(job: Job) -> Optional[Path]:
return find_job_file(job, [".m4b"])
def locate_job_audio(job: Job, preferred_suffixes: Optional[Iterable[str]] = None) -> Optional[Path]:
suffix_order: List[str] = []
if preferred_suffixes:
suffix_order.extend(preferred_suffixes)
suffix_order.extend([".m4b", ".mp3", ".flac", ".opus", ".ogg", ".m4a", ".wav"])
path = find_job_file(job, suffix_order)
if path:
return path
files = iter_job_result_paths(job)
return files[0] if files else None
def job_download_flags(job: Job) -> Dict[str, bool]:
if job.status != JobStatus.COMPLETED:
return {"audio": False, "m4b": False, "epub3": False}
return {
"audio": locate_job_audio(job) is not None,
"m4b": locate_job_m4b(job) is not None,
"epub3": locate_job_epub(job) is not None,
}
import re
import time
import uuid
from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple, cast
from flask import request, render_template, jsonify
from flask.typing import ResponseReturnValue
from abogen.webui.service import PendingJob, JobStatus
from abogen.webui.routes.utils.service import get_service
from abogen.webui.routes.utils.settings import (
load_settings,
coerce_bool,
coerce_int,
_CHUNK_LEVEL_VALUES,
_DEFAULT_ANALYSIS_THRESHOLD,
_NORMALIZATION_BOOLEAN_KEYS,
_NORMALIZATION_STRING_KEYS,
SAVE_MODE_LABELS,
audiobookshelf_manual_available,
)
from abogen.webui.routes.utils.voice import (
parse_voice_formula,
formula_from_profile,
resolve_voice_setting,
resolve_voice_choice,
prepare_speaker_metadata,
template_options,
)
from abogen.webui.routes.utils.entity import sync_pronunciation_overrides
from abogen.webui.routes.utils.epub import job_download_flags
from abogen.webui.routes.utils.common import split_profile_spec
from abogen.utils import calculate_text_length
from abogen.voice_profiles import serialize_profiles, normalize_profile_entry
from abogen.chunking import ChunkLevel, build_chunks_for_chapters
from abogen.constants import VOICES_INTERNAL
from abogen.speaker_configs import get_config
from abogen.kokoro_text_normalization import normalize_roman_numeral_titles
from dataclasses import dataclass
from pathlib import Path
import mimetypes
@dataclass
class PendingBuildResult:
pending: PendingJob
selected_speaker_config: Optional[str]
config_languages: List[str]
speaker_config_payload: Optional[Dict[str, Any]]
_WIZARD_STEP_ORDER = ["book", "chapters", "entities"]
_WIZARD_STEP_META = {
"book": {
"index": 1,
"title": "Book parameters",
"hint": "Choose your source file or paste text, then set the defaults used for chapter analysis and speaker casting.",
},
"chapters": {
"index": 2,
"title": "Select chapters",
"hint": "Choose which chapters to convert. We'll analyse entities automatically when you continue.",
},
"entities": {
"index": 3,
"title": "Review entities",
"hint": "Assign pronunciations, voices, and manual overrides before queueing the conversion.",
},
}
_SUPPLEMENT_TITLE_PATTERNS: List[tuple[re.Pattern[str], float]] = [
(re.compile(r"\btitle\s+page\b"), 3.0),
(re.compile(r"\bcopyright\b"), 2.4),
(re.compile(r"\btable\s+of\s+contents\b"), 2.8),
(re.compile(r"\bcontents\b"), 2.0),
(re.compile(r"\backnowledg(e)?ments?\b"), 2.0),
(re.compile(r"\bdedication\b"), 2.0),
(re.compile(r"\babout\s+the\s+author(s)?\b"), 2.4),
(re.compile(r"\balso\s+by\b"), 2.0),
(re.compile(r"\bpraise\s+for\b"), 2.0),
(re.compile(r"\bcolophon\b"), 2.2),
(re.compile(r"\bpublication\s+data\b"), 2.2),
(re.compile(r"\btranscriber'?s?\s+note\b"), 2.2),
(re.compile(r"\bglossary\b"), 2.0),
(re.compile(r"\bindex\b"), 2.0),
(re.compile(r"\bbibliograph(y|ies)\b"), 2.0),
(re.compile(r"\breferences\b"), 1.8),
(re.compile(r"\bappendix\b"), 1.9),
]
_CONTENT_TITLE_PATTERNS: List[re.Pattern[str]] = [
re.compile(r"\bchapter\b"),
re.compile(r"\bbook\b"),
re.compile(r"\bpart\b"),
re.compile(r"\bsection\b"),
re.compile(r"\bscene\b"),
re.compile(r"\bprologue\b"),
re.compile(r"\bepilogue\b"),
re.compile(r"\bintroduction\b"),
re.compile(r"\bstory\b"),
]
_SUPPLEMENT_TEXT_KEYWORDS: List[tuple[str, float]] = [
("copyright", 1.2),
("all rights reserved", 1.1),
("isbn", 0.9),
("library of congress", 1.0),
("table of contents", 1.0),
("dedicated to", 0.8),
("acknowledg", 0.8),
("printed in", 0.6),
("permission", 0.6),
("publisher", 0.5),
("praise for", 0.9),
("also by", 0.9),
("glossary", 0.8),
("index", 0.8),
("newsletter", 3.2),
("mailing list", 2.6),
("sign-up", 2.2),
]
def supplement_score(title: str, text: str, index: int) -> float:
normalized_title = (title or "").lower()
score = 0.0
for pattern, weight in _SUPPLEMENT_TITLE_PATTERNS:
if pattern.search(normalized_title):
score += weight
for pattern in _CONTENT_TITLE_PATTERNS:
if pattern.search(normalized_title):
score -= 2.0
stripped_text = (text or "").strip()
length = len(stripped_text)
if length <= 150:
score += 0.9
elif length <= 400:
score += 0.6
elif length <= 800:
score += 0.35
lowercase_text = stripped_text.lower()
for keyword, weight in _SUPPLEMENT_TEXT_KEYWORDS:
if keyword in lowercase_text:
score += weight
if index == 0 and score > 0:
score += 0.25
return score
def should_preselect_chapter(
title: str,
text: str,
index: int,
total_count: int,
) -> bool:
if total_count <= 1:
return True
score = supplement_score(title, text, index)
return score < 1.9
def ensure_at_least_one_chapter_enabled(chapters: List[Dict[str, Any]]) -> None:
if not chapters:
return
if any(chapter.get("enabled") for chapter in chapters):
return
best_index = max(range(len(chapters)), key=lambda idx: chapters[idx].get("characters", 0))
chapters[best_index]["enabled"] = True
def apply_prepare_form(
pending: PendingJob, form: Mapping[str, Any]
) -> tuple[
ChunkLevel,
List[Dict[str, Any]],
List[Dict[str, Any]],
List[str],
int,
str,
bool,
bool,
]:
raw_chunk_level = (form.get("chunk_level") or pending.chunk_level or "paragraph").strip().lower()
if raw_chunk_level not in _CHUNK_LEVEL_VALUES:
raw_chunk_level = pending.chunk_level if pending.chunk_level in _CHUNK_LEVEL_VALUES else "paragraph"
pending.chunk_level = raw_chunk_level
chunk_level_literal = cast(ChunkLevel, pending.chunk_level)
pending.speaker_mode = "single"
pending.generate_epub3 = coerce_bool(form.get("generate_epub3"), False)
threshold_default = getattr(pending, "speaker_analysis_threshold", _DEFAULT_ANALYSIS_THRESHOLD)
raw_threshold = form.get("speaker_analysis_threshold")
if raw_threshold is not None:
pending.speaker_analysis_threshold = coerce_int(
raw_threshold,
threshold_default,
minimum=1,
maximum=25,
)
else:
pending.speaker_analysis_threshold = threshold_default
if not pending.speakers:
narrator: Dict[str, Any] = {
"id": "narrator",
"label": "Narrator",
"voice": pending.voice,
}
if pending.voice_profile:
narrator["voice_profile"] = pending.voice_profile
pending.speakers = {"narrator": narrator}
else:
existing_narrator = pending.speakers.get("narrator")
if isinstance(existing_narrator, dict):
existing_narrator.setdefault("id", "narrator")
existing_narrator["label"] = existing_narrator.get("label", "Narrator")
existing_narrator["voice"] = pending.voice
if pending.voice_profile:
existing_narrator["voice_profile"] = pending.voice_profile
pending.speakers["narrator"] = existing_narrator
selected_config = (form.get("applied_speaker_config") or "").strip()
apply_config_requested = str(form.get("apply_speaker_config", "")).strip() in {"1", "true", "on"}
persist_config_requested = str(form.get("save_speaker_config", "")).strip() in {"1", "true", "on"}
pending.applied_speaker_config = selected_config or None
errors: List[str] = []
if isinstance(pending.speakers, dict):
for speaker_id, payload in list(pending.speakers.items()):
if not isinstance(payload, dict):
continue
field_key = f"speaker-{speaker_id}-pronunciation"
raw_value = form.get(field_key, "")
pronunciation = raw_value.strip()
if pronunciation:
payload["pronunciation"] = pronunciation
else:
payload.pop("pronunciation", None)
voice_value = (form.get(f"speaker-{speaker_id}-voice") or "").strip()
formula_key = f"speaker-{speaker_id}-formula"
formula_value = (form.get(formula_key) or "").strip()
has_formula = False
if formula_value:
try:
parse_voice_formula(formula_value)
except ValueError as exc:
label = payload.get("label") or speaker_id.replace("_", " ").title()
errors.append(f"Invalid custom mix for {label}: {exc}")
else:
payload["voice_formula"] = formula_value
payload["resolved_voice"] = formula_value
payload.pop("voice_profile", None)
has_formula = True
else:
payload.pop("voice_formula", None)
if voice_value == "__custom_mix":
voice_value = ""
if voice_value:
payload["voice"] = voice_value
if not has_formula:
payload["resolved_voice"] = voice_value
else:
payload.pop("voice", None)
if not has_formula:
payload.pop("resolved_voice", None)
lang_key = f"speaker-{speaker_id}-languages"
languages: List[str] = []
getter = getattr(form, "getlist", None)
if callable(getter):
values = cast(Iterable[str], getter(lang_key))
languages = [code.strip() for code in values if code]
else:
raw_langs = form.get(lang_key)
if isinstance(raw_langs, str):
languages = [item.strip() for item in raw_langs.split(",") if item.strip()]
payload["config_languages"] = languages
profiles = serialize_profiles()
raw_delay = form.get("chapter_intro_delay")
if raw_delay is not None:
raw_normalized = raw_delay.strip()
if raw_normalized:
try:
pending.chapter_intro_delay = max(0.0, float(raw_normalized))
except ValueError:
errors.append("Enter a valid number for the chapter intro delay.")
else:
pending.chapter_intro_delay = 0.0
intro_values: List[str] = []
getter = getattr(form, "getlist", None)
if callable(getter):
raw_intro_values = getter("read_title_intro")
if raw_intro_values:
intro_values = list(cast(Iterable[str], raw_intro_values))
else:
raw_intro = form.get("read_title_intro")
if raw_intro is not None:
intro_values = [raw_intro]
if intro_values:
pending.read_title_intro = coerce_bool(intro_values[-1], pending.read_title_intro)
elif hasattr(form, "__contains__") and "read_title_intro" in form:
pending.read_title_intro = False
outro_values: List[str] = []
if callable(getter):
raw_outro_values = getter("read_closing_outro")
if raw_outro_values:
outro_values = list(cast(Iterable[str], raw_outro_values))
else:
raw_outro = form.get("read_closing_outro")
if raw_outro is not None:
outro_values = [raw_outro]
if outro_values:
pending.read_closing_outro = coerce_bool(
outro_values[-1], getattr(pending, "read_closing_outro", True)
)
elif hasattr(form, "__contains__") and "read_closing_outro" in form:
pending.read_closing_outro = False
caps_values: List[str] = []
if callable(getter):
raw_caps_values = getter("normalize_chapter_opening_caps")
if raw_caps_values:
caps_values = list(cast(Iterable[str], raw_caps_values))
else:
raw_caps = form.get("normalize_chapter_opening_caps")
if raw_caps is not None:
caps_values = [raw_caps]
if caps_values:
pending.normalize_chapter_opening_caps = coerce_bool(
caps_values[-1], getattr(pending, "normalize_chapter_opening_caps", True)
)
elif hasattr(form, "__contains__") and "normalize_chapter_opening_caps" in form:
pending.normalize_chapter_opening_caps = False
overrides: List[Dict[str, Any]] = []
selected_total = 0
for index, chapter in enumerate(pending.chapters):
enabled = form.get(f"chapter-{index}-enabled") == "on"
title_input = (form.get(f"chapter-{index}-title") or "").strip()
title = title_input or chapter.get("title") or f"Chapter {index + 1}"
voice_selection = form.get(f"chapter-{index}-voice", "__default")
formula_input = (form.get(f"chapter-{index}-formula") or "").strip()
entry: Dict[str, Any] = {
"id": chapter.get("id") or f"{index:04d}",
"index": index,
"order": index,
"source_title": chapter.get("title") or title,
"title": title,
"text": chapter.get("text", ""),
"enabled": enabled,
}
entry["characters"] = calculate_text_length(entry["text"])
if enabled:
if voice_selection.startswith("voice:"):
entry["voice"] = voice_selection.split(":", 1)[1]
entry["resolved_voice"] = entry["voice"]
elif voice_selection.startswith("profile:"):
profile_name = voice_selection.split(":", 1)[1]
entry["voice_profile"] = profile_name
profile_entry = profiles.get(profile_name) or {}
formula_value = formula_from_profile(profile_entry)
if formula_value:
entry["voice_formula"] = formula_value
entry["resolved_voice"] = formula_value
else:
errors.append(f"Profile '{profile_name}' has no configured voices.")
elif voice_selection == "formula":
if not formula_input:
errors.append(f"Provide a custom formula for chapter {index + 1}.")
else:
try:
parse_voice_formula(formula_input)
except ValueError as exc:
errors.append(str(exc))
else:
entry["voice_formula"] = formula_input
entry["resolved_voice"] = formula_input
selected_total += entry["characters"]
overrides.append(entry)
pending.chapters[index] = dict(entry)
enabled_overrides = [entry for entry in overrides if entry.get("enabled")]
heteronym_entries = getattr(pending, "heteronym_overrides", None)
if isinstance(heteronym_entries, list) and heteronym_entries:
for entry in heteronym_entries:
if not isinstance(entry, dict):
continue
entry_id = str(entry.get("entry_id") or entry.get("id") or "").strip()
if not entry_id:
continue
raw_choice = form.get(f"heteronym-{entry_id}-choice")
if raw_choice is None:
continue
choice = str(raw_choice).strip()
if not choice:
continue
options = entry.get("options")
if isinstance(options, list) and options:
allowed = {
str(opt.get("key")).strip()
for opt in options
if isinstance(opt, dict) and str(opt.get("key") or "").strip()
}
if allowed and choice not in allowed:
continue
entry["choice"] = choice
sync_pronunciation_overrides(pending)
return (
chunk_level_literal,
overrides,
enabled_overrides,
errors,
selected_total,
selected_config,
apply_config_requested,
persist_config_requested,
)
def apply_book_step_form(
pending: PendingJob,
form: Mapping[str, Any],
*,
settings: Mapping[str, Any],
profiles: Mapping[str, Any],
) -> None:
language_fallback = pending.language or settings.get("language", "en")
raw_language = (form.get("language") or language_fallback or "en").strip()
if raw_language:
pending.language = raw_language
subtitle_mode = (form.get("subtitle_mode") or pending.subtitle_mode or "Disabled").strip()
if subtitle_mode:
pending.subtitle_mode = subtitle_mode
pending.generate_epub3 = coerce_bool(form.get("generate_epub3"), bool(pending.generate_epub3))
chunk_level_default = str(settings.get("chunk_level", "paragraph")).strip().lower()
raw_chunk_level = (form.get("chunk_level") or pending.chunk_level or chunk_level_default).strip().lower()
if raw_chunk_level not in _CHUNK_LEVEL_VALUES:
raw_chunk_level = chunk_level_default if chunk_level_default in _CHUNK_LEVEL_VALUES else (pending.chunk_level or "paragraph")
pending.chunk_level = raw_chunk_level
threshold_default = pending.speaker_analysis_threshold or settings.get("speaker_analysis_threshold", _DEFAULT_ANALYSIS_THRESHOLD)
raw_threshold = form.get("speaker_analysis_threshold")
if raw_threshold is not None:
pending.speaker_analysis_threshold = coerce_int(
raw_threshold,
threshold_default,
minimum=1,
maximum=25,
)
raw_delay = form.get("chapter_intro_delay")
if raw_delay is not None:
try:
pending.chapter_intro_delay = max(0.0, float(str(raw_delay).strip() or 0.0))
except ValueError:
pass
intro_default = pending.read_title_intro if isinstance(pending.read_title_intro, bool) else bool(settings.get("read_title_intro", False))
intro_values: List[str] = []
getter = getattr(form, "getlist", None)
if callable(getter):
raw_intro_values = getter("read_title_intro")
if raw_intro_values:
intro_values = list(cast(Iterable[str], raw_intro_values))
else:
raw_intro_flag = form.get("read_title_intro")
if raw_intro_flag is not None:
intro_values = [raw_intro_flag]
if intro_values:
pending.read_title_intro = coerce_bool(intro_values[-1], intro_default)
elif hasattr(form, "__contains__") and "read_title_intro" in form:
pending.read_title_intro = False
else:
pending.read_title_intro = intro_default
outro_default = (
pending.read_closing_outro
if isinstance(getattr(pending, "read_closing_outro", None), bool)
else bool(settings.get("read_closing_outro", True))
)
outro_values: List[str] = []
if callable(getter):
raw_outro_values = getter("read_closing_outro")
if raw_outro_values:
outro_values = list(cast(Iterable[str], raw_outro_values))
else:
raw_outro_flag = form.get("read_closing_outro")
if raw_outro_flag is not None:
outro_values = [raw_outro_flag]
if outro_values:
pending.read_closing_outro = coerce_bool(outro_values[-1], outro_default)
elif hasattr(form, "__contains__") and "read_closing_outro" in form:
pending.read_closing_outro = False
else:
pending.read_closing_outro = outro_default
caps_default = (
pending.normalize_chapter_opening_caps
if isinstance(getattr(pending, "normalize_chapter_opening_caps", None), bool)
else bool(settings.get("normalize_chapter_opening_caps", True))
)
caps_values: List[str] = []
getter = getattr(form, "getlist", None)
if callable(getter):
raw_caps_values = getter("normalize_chapter_opening_caps")
if raw_caps_values:
caps_values = list(cast(Iterable[str], raw_caps_values))
else:
raw_caps_flag = form.get("normalize_chapter_opening_caps")
if raw_caps_flag is not None:
caps_values = [raw_caps_flag]
if caps_values:
pending.normalize_chapter_opening_caps = coerce_bool(caps_values[-1], caps_default)
elif hasattr(form, "__contains__") and "normalize_chapter_opening_caps" in form:
pending.normalize_chapter_opening_caps = False
else:
pending.normalize_chapter_opening_caps = caps_default
def _extract_checkbox(name: str, default: bool) -> bool:
values: List[str] = []
getter = getattr(form, "getlist", None)
if callable(getter):
raw_values = getter(name)
if raw_values:
values = list(cast(Iterable[str], raw_values))
else:
raw_flag = form.get(name)
if raw_flag is not None:
values = [raw_flag]
if values:
return coerce_bool(values[-1], default)
if hasattr(form, "__contains__") and name in form:
return False
return default
overrides_existing = getattr(pending, "normalization_overrides", None)
overrides: Dict[str, Any] = dict(overrides_existing or {})
for key in _NORMALIZATION_BOOLEAN_KEYS:
default_toggle = overrides.get(key, bool(settings.get(key, True)))
overrides[key] = _extract_checkbox(key, default_toggle)
for key in _NORMALIZATION_STRING_KEYS:
default_val = overrides.get(key, str(settings.get(key, "")))
val = form.get(key)
if val is not None:
overrides[key] = str(val)
else:
overrides[key] = default_val
pending.normalization_overrides = overrides
speed_value = form.get("speed")
if speed_value is not None:
try:
pending.speed = float(speed_value)
except ValueError:
pass
# NOTE: Do not auto-set a global TTS provider at the book level based on the
# narrator defaults. Provider is resolved per-speaker/per-chunk from the voice
# spec (e.g. "speaker:Name" for saved speakers, or a Kokoro mix formula).
# This enables mixed-provider conversions (e.g. narrator=SuperTonic, characters=Kokoro).
provider_value = str(form.get("tts_provider") or "").strip().lower()
if provider_value in {"kokoro", "supertonic"}:
pending.tts_provider = provider_value
# Determine the base speaker selection (saved speaker ref or raw voice).
narrator_voice_raw = (
form.get("voice")
or pending.voice
or settings.get("default_speaker")
or settings.get("default_voice")
or ""
).strip()
profiles_map = dict(profiles) if isinstance(profiles, Mapping) else dict(profiles or {})
base_spec, _selected_speaker_name = split_profile_spec(narrator_voice_raw)
profile_selection = (form.get("voice_profile") or pending.voice_profile or "__standard").strip()
custom_formula_raw = (form.get("voice_formula") or "").strip()
narrator_voice_raw = (base_spec or narrator_voice_raw or settings.get("default_voice") or "").strip()
resolved_default_voice, inferred_profile, _ = resolve_voice_setting(
narrator_voice_raw,
profiles=profiles_map,
)
if profile_selection in {"__standard", "", None} and inferred_profile:
profile_selection = inferred_profile
if profile_selection == "__formula":
profile_name = ""
custom_formula = custom_formula_raw
elif profile_selection in {"__standard", "", None}:
profile_name = ""
custom_formula = ""
else:
profile_name = profile_selection
custom_formula = ""
base_voice_spec = resolved_default_voice or narrator_voice_raw
if not base_voice_spec and VOICES_INTERNAL:
base_voice_spec = VOICES_INTERNAL[0]
voice_choice, resolved_language, selected_profile = resolve_voice_choice(
pending.language,
base_voice_spec,
profile_name,
custom_formula,
profiles_map,
)
if resolved_language:
pending.language = resolved_language
if profile_selection == "__formula" and custom_formula_raw:
pending.voice = custom_formula_raw
pending.voice_profile = None
elif profile_selection not in {"__standard", "", None, "__formula"}:
pending.voice_profile = selected_profile or profile_selection
pending.voice = voice_choice
else:
pending.voice_profile = None
fallback_voice = base_voice_spec or narrator_voice_raw
pending.voice = voice_choice or fallback_voice
pending.applied_speaker_config = (form.get("speaker_config") or "").strip() or None
# Metadata updates
if "meta_title" in form:
pending.metadata_tags["title"] = str(form.get("meta_title", "")).strip()
if "meta_subtitle" in form:
pending.metadata_tags["subtitle"] = str(form.get("meta_subtitle", "")).strip()
if "meta_author" in form:
authors = str(form.get("meta_author", "")).strip()
pending.metadata_tags["authors"] = authors
pending.metadata_tags["author"] = authors
if "meta_series" in form:
series = str(form.get("meta_series", "")).strip()
pending.metadata_tags["series"] = series
pending.metadata_tags["series_name"] = series
pending.metadata_tags["seriesname"] = series
pending.metadata_tags["series_title"] = series
pending.metadata_tags["seriestitle"] = series
# If user manually edits series, update opds_series too so it persists
if "opds_series" in pending.metadata_tags:
pending.metadata_tags["opds_series"] = series
if "meta_series_index" in form:
idx = str(form.get("meta_series_index", "")).strip()
pending.metadata_tags["series_index"] = idx
pending.metadata_tags["series_sequence"] = idx
if "meta_publisher" in form:
pending.metadata_tags["publisher"] = str(form.get("meta_publisher", "")).strip()
if "meta_description" in form:
desc = str(form.get("meta_description", "")).strip()
pending.metadata_tags["description"] = desc
pending.metadata_tags["summary"] = desc
if coerce_bool(form.get("remove_cover"), False):
pending.cover_image_path = None
pending.cover_image_mime = None
def persist_cover_image(extraction_result: Any, stored_path: Path) -> tuple[Optional[Path], Optional[str]]:
cover_bytes = getattr(extraction_result, "cover_image", None)
if not cover_bytes:
return None, None
mime = getattr(extraction_result, "cover_mime", None)
extension = mimetypes.guess_extension(mime or "") or ".png"
base_stem = Path(stored_path).stem or "cover"
candidate = stored_path.parent / f"{base_stem}_cover{extension}"
counter = 1
while candidate.exists():
candidate = stored_path.parent / f"{base_stem}_cover_{counter}{extension}"
counter += 1
try:
candidate.write_bytes(cover_bytes)
except OSError:
return None, None
return candidate, mime
def build_pending_job_from_extraction(
*,
stored_path: Path,
original_name: str,
extraction: Any,
form: Mapping[str, Any],
settings: Mapping[str, Any],
profiles: Mapping[str, Any],
metadata_overrides: Optional[Mapping[str, Any]] = None,
) -> PendingBuildResult:
profiles_map = dict(profiles)
cover_path, cover_mime = persist_cover_image(extraction, stored_path)
if getattr(extraction, "chapters", None):
original_titles = [chapter.title for chapter in extraction.chapters]
normalized_titles = normalize_roman_numeral_titles(original_titles)
if normalized_titles != original_titles:
for chapter, new_title in zip(extraction.chapters, normalized_titles):
chapter.title = new_title
metadata_tags = dict(getattr(extraction, "metadata", {}) or {})
if metadata_overrides:
normalized_keys = {str(existing_key).casefold(): str(existing_key) for existing_key in metadata_tags.keys()}
for key, value in metadata_overrides.items():
if value is None:
continue
key_text = str(key or "").strip()
if not key_text:
continue
value_text = str(value).strip()
if not value_text:
continue
lookup = key_text.casefold()
existing_key = normalized_keys.get(lookup)
if existing_key:
existing_value = str(metadata_tags.get(existing_key) or "").strip()
if existing_value:
continue
target_key = existing_key
else:
target_key = key_text
normalized_keys[lookup] = target_key
metadata_tags[target_key] = value_text
total_chars = getattr(extraction, "total_characters", None) or calculate_text_length(
getattr(extraction, "combined_text", "")
)
chapters_source = getattr(extraction, "chapters", []) or []
total_chapter_count = len(chapters_source)
chapters_payload: List[Dict[str, Any]] = []
for index, chapter in enumerate(chapters_source):
enabled = should_preselect_chapter(chapter.title, chapter.text, index, total_chapter_count)
chapters_payload.append(
{
"id": f"{index:04d}",
"index": index,
"title": chapter.title,
"text": chapter.text,
"characters": calculate_text_length(chapter.text),
"enabled": enabled,
}
)
if not chapters_payload:
chapters_payload.append(
{
"id": "0000",
"index": 0,
"title": original_name,
"text": "",
"characters": 0,
"enabled": True,
}
)
ensure_at_least_one_chapter_enabled(chapters_payload)
language = str(form.get("language") or "a").strip() or "a"
profiles_map = dict(profiles) if isinstance(profiles, Mapping) else dict(profiles or {})
default_voice_setting = settings.get("default_voice") or ""
resolved_default_voice, inferred_profile, inferred_language = resolve_voice_setting(
default_voice_setting,
profiles=profiles_map,
)
base_voice_input = str(form.get("voice") or "").strip()
profile_selection = (form.get("voice_profile") or "__standard").strip()
custom_formula_raw = str(form.get("voice_formula") or "").strip()
if profile_selection in {"__standard", ""} and inferred_profile:
profile_selection = inferred_profile
base_voice = base_voice_input or resolved_default_voice or str(default_voice_setting).strip()
if not base_voice and VOICES_INTERNAL:
base_voice = VOICES_INTERNAL[0]
selected_speaker_config = (form.get("speaker_config") or "").strip()
speaker_config_payload = get_config(selected_speaker_config) if selected_speaker_config else None
if profile_selection == "__formula":
profile_name = ""
custom_formula = custom_formula_raw
elif profile_selection in {"__standard", ""}:
profile_name = ""
custom_formula = ""
else:
profile_name = profile_selection
custom_formula = ""
voice, language, selected_profile = resolve_voice_choice(
language,
base_voice,
profile_name,
custom_formula,
profiles_map,
)
try:
speed = float(form.get("speed", 1.0))
except (TypeError, ValueError):
speed = 1.0
subtitle_mode = str(form.get("subtitle_mode") or "Disabled")
output_format = settings["output_format"]
subtitle_format = settings["subtitle_format"]
save_mode_key = settings["save_mode"]
save_mode = SAVE_MODE_LABELS.get(save_mode_key, SAVE_MODE_LABELS["save_next_to_input"])
replace_single_newlines = settings["replace_single_newlines"]
use_gpu = settings["use_gpu"]
save_chapters_separately = settings["save_chapters_separately"]
merge_chapters_at_end = settings["merge_chapters_at_end"] or not save_chapters_separately
save_as_project = settings["save_as_project"]
separate_chapters_format = settings["separate_chapters_format"]
silence_between_chapters = settings["silence_between_chapters"]
chapter_intro_delay = settings["chapter_intro_delay"]
read_title_intro = settings["read_title_intro"]
read_closing_outro = settings.get("read_closing_outro", True)
normalize_chapter_opening_caps = settings["normalize_chapter_opening_caps"]
max_subtitle_words = settings["max_subtitle_words"]
auto_prefix_chapter_titles = settings["auto_prefix_chapter_titles"]
chunk_level_default = str(settings.get("chunk_level", "paragraph")).strip().lower()
raw_chunk_level = str(form.get("chunk_level") or chunk_level_default).strip().lower()
if raw_chunk_level not in _CHUNK_LEVEL_VALUES:
raw_chunk_level = chunk_level_default if chunk_level_default in _CHUNK_LEVEL_VALUES else "paragraph"
chunk_level_value = raw_chunk_level
chunk_level_literal = cast(ChunkLevel, chunk_level_value)
speaker_mode_value = "single"
generate_epub3_default = bool(settings.get("generate_epub3", False))
generate_epub3 = coerce_bool(form.get("generate_epub3"), generate_epub3_default)
selected_chapter_sources = [entry for entry in chapters_payload if entry.get("enabled")]
raw_chunks = build_chunks_for_chapters(selected_chapter_sources, level=chunk_level_literal)
analysis_chunks = build_chunks_for_chapters(selected_chapter_sources, level="sentence")
analysis_threshold = coerce_int(
settings.get("speaker_analysis_threshold"),
_DEFAULT_ANALYSIS_THRESHOLD,
minimum=1,
maximum=25,
)
initial_analysis = False
(
processed_chunks,
speakers,
analysis_payload,
config_languages,
_,
) = prepare_speaker_metadata(
chapters=selected_chapter_sources,
chunks=raw_chunks,
analysis_chunks=analysis_chunks,
voice=voice,
voice_profile=selected_profile or None,
threshold=analysis_threshold,
run_analysis=initial_analysis,
speaker_config=speaker_config_payload,
apply_config=bool(speaker_config_payload),
)
def _extract_checkbox(name: str, default: bool) -> bool:
values: List[str] = []
getter = getattr(form, "getlist", None)
if callable(getter):
raw_values = getter(name)
if raw_values:
values = list(cast(Iterable[str], raw_values))
else:
raw_flag = form.get(name)
if raw_flag is not None:
values = [raw_flag]
if values:
return coerce_bool(values[-1], default)
return default
normalization_overrides = {}
for key in _NORMALIZATION_BOOLEAN_KEYS:
default_val = bool(settings.get(key, True))
normalization_overrides[key] = _extract_checkbox(key, default_val)
for key in _NORMALIZATION_STRING_KEYS:
default_val = str(settings.get(key, ""))
val = form.get(key)
if val is not None:
normalization_overrides[key] = str(val)
else:
normalization_overrides[key] = default_val
pending = PendingJob(
id=uuid.uuid4().hex,
original_filename=original_name,
stored_path=stored_path,
language=language,
voice=voice,
speed=speed,
use_gpu=use_gpu,
subtitle_mode=subtitle_mode,
output_format=output_format,
save_mode=save_mode,
output_folder=None,
replace_single_newlines=replace_single_newlines,
subtitle_format=subtitle_format,
total_characters=total_chars,
save_chapters_separately=save_chapters_separately,
merge_chapters_at_end=merge_chapters_at_end,
separate_chapters_format=separate_chapters_format,
silence_between_chapters=silence_between_chapters,
save_as_project=save_as_project,
voice_profile=selected_profile or None,
max_subtitle_words=max_subtitle_words,
metadata_tags=metadata_tags,
chapters=chapters_payload,
normalization_overrides=normalization_overrides,
created_at=time.time(),
cover_image_path=cover_path,
cover_image_mime=cover_mime,
chapter_intro_delay=chapter_intro_delay,
read_title_intro=bool(read_title_intro),
read_closing_outro=bool(read_closing_outro),
normalize_chapter_opening_caps=bool(normalize_chapter_opening_caps),
auto_prefix_chapter_titles=bool(auto_prefix_chapter_titles),
chunk_level=chunk_level_value,
speaker_mode=speaker_mode_value,
generate_epub3=generate_epub3,
chunks=processed_chunks,
speakers=speakers,
speaker_analysis=analysis_payload,
speaker_analysis_threshold=analysis_threshold,
analysis_requested=initial_analysis,
)
return PendingBuildResult(
pending=pending,
selected_speaker_config=selected_speaker_config or None,
config_languages=list(config_languages or []),
speaker_config_payload=speaker_config_payload,
)
def render_jobs_panel() -> str:
jobs = get_service().list_jobs()
active_statuses = {JobStatus.PENDING, JobStatus.RUNNING, JobStatus.PAUSED}
active_jobs = [job for job in jobs if job.status in active_statuses]
active_jobs.sort(key=lambda job: ((job.queue_position or 10_000), -job.created_at))
finished_jobs = [job for job in jobs if job.status not in active_statuses]
download_flags = {job.id: job_download_flags(job) for job in jobs}
return render_template(
"partials/jobs.html",
active_jobs=active_jobs,
finished_jobs=finished_jobs[:5],
total_finished=len(finished_jobs),
JobStatus=JobStatus,
download_flags=download_flags,
audiobookshelf_manual_available=audiobookshelf_manual_available(),
)
def normalize_wizard_step(step: Optional[str], pending: Optional[PendingJob] = None) -> str:
if pending is None:
default_step = "book"
else:
default_step = "chapters"
if not step:
chosen = default_step
else:
normalized = step.strip().lower()
if normalized in {"", "upload", "settings"}:
chosen = default_step
elif normalized == "speakers":
chosen = "entities"
elif normalized in _WIZARD_STEP_ORDER:
chosen = normalized
else:
chosen = default_step
return chosen
def wants_wizard_json() -> bool:
format_hint = request.args.get("format", "").strip().lower()
if format_hint == "json":
return True
accept_header = (request.headers.get("Accept") or "").lower()
if "application/json" in accept_header:
return True
requested_with = (request.headers.get("X-Requested-With") or "").lower()
if requested_with in {"xmlhttprequest", "fetch"}:
return True
wizard_header = (request.headers.get("X-Abogen-Wizard") or "").lower()
return wizard_header == "json"
def render_wizard_partial(
pending: Optional[PendingJob],
step: str,
*,
error: Optional[str] = None,
notice: Optional[str] = None,
) -> str:
templates = {
"book": "partials/new_job_step_book.html",
"chapters": "partials/new_job_step_chapters.html",
"entities": "partials/new_job_step_entities.html",
}
template_name = templates[step]
context: Dict[str, Any] = {
"pending": pending,
"readonly": False,
"options": template_options(),
"settings": load_settings(),
"error": error,
"notice": notice,
}
return render_template(template_name, **context)
def wizard_step_payload(
pending: Optional[PendingJob],
step: str,
html: str,
*,
error: Optional[str] = None,
notice: Optional[str] = None,
) -> Dict[str, Any]:
meta = _WIZARD_STEP_META.get(step, {})
try:
active_index = _WIZARD_STEP_ORDER.index(step)
except ValueError:
active_index = 0
max_recorded_index = active_index
if pending is not None:
stored_index = int(getattr(pending, "wizard_max_step_index", -1))
if stored_index < 0:
stored_index = -1
max_recorded_index = max(active_index, stored_index)
max_allowed = len(_WIZARD_STEP_ORDER) - 1
if max_recorded_index > max_allowed:
max_recorded_index = max_allowed
if stored_index != max_recorded_index:
pending.wizard_max_step_index = max_recorded_index
get_service().store_pending_job(pending)
else:
max_allowed = len(_WIZARD_STEP_ORDER) - 1
if max_recorded_index > max_allowed:
max_recorded_index = max_allowed
completed = [slug for idx, slug in enumerate(_WIZARD_STEP_ORDER) if idx <= max_recorded_index]
return {
"step": step,
"step_index": int(meta.get("index", active_index + 1)),
"total_steps": len(_WIZARD_STEP_ORDER),
"title": meta.get("title", ""),
"hint": meta.get("hint", ""),
"html": html,
"completed_steps": completed,
"pending_id": pending.id if pending else "",
"filename": pending.original_filename if pending and pending.original_filename else "",
"error": error or "",
"notice": notice or "",
}
def wizard_json_response(
pending: Optional[PendingJob],
step: str,
*,
error: Optional[str] = None,
notice: Optional[str] = None,
status: int = 200,
) -> ResponseReturnValue:
html = render_wizard_partial(pending, step, error=error, notice=notice)
payload = wizard_step_payload(pending, step, html, error=error, notice=notice)
return jsonify(payload), status
import io
import threading
from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple
import numpy as np
import soundfile as sf
from flask import current_app, send_file
from flask.typing import ResponseReturnValue
SPLIT_PATTERN = r"\n+"
SAMPLE_RATE = 24000
_preview_pipelines: Dict[Tuple[str, str], Any] = {}
_preview_pipeline_lock = threading.Lock()
def _select_device() -> str:
import platform
system = platform.system()
if system == "Darwin" and platform.processor() == "arm":
return "mps"
return "cuda"
def _to_float32(audio_segment) -> np.ndarray:
if audio_segment is None:
return np.zeros(0, dtype="float32")
tensor = audio_segment
if hasattr(tensor, "detach"):
tensor = tensor.detach()
if hasattr(tensor, "cpu"):
try:
tensor = tensor.cpu()
except Exception:
pass
if hasattr(tensor, "numpy"):
return np.asarray(tensor.numpy(), dtype="float32").reshape(-1)
return np.asarray(tensor, dtype="float32").reshape(-1)
def get_preview_pipeline(language: str, device: str) -> Any:
key = (language, device)
with _preview_pipeline_lock:
pipeline = _preview_pipelines.get(key)
if pipeline is not None:
return pipeline
from abogen.utils import load_numpy_kpipeline
_, KPipeline = load_numpy_kpipeline()
pipeline = KPipeline(lang_code=language, repo_id="hexgrad/Kokoro-82M", device=device)
_preview_pipelines[key] = pipeline
return pipeline
def generate_preview_audio(
text: str,
voice_spec: str,
language: str,
speed: float,
use_gpu: bool,
tts_provider: str = "kokoro",
supertonic_total_steps: int = 5,
max_seconds: float = 8.0,
pronunciation_overrides: Optional[Iterable[Mapping[str, Any]]] = None,
manual_overrides: Optional[Iterable[Mapping[str, Any]]] = None,
speakers: Optional[Mapping[str, Any]] = None,
) -> bytes:
if not text.strip():
raise ValueError("Preview text is required")
provider = (tts_provider or "kokoro").strip().lower()
# Apply pronunciation/manual overrides first so tokens like `Unfu*k` still match
# before any downstream normalization potentially strips punctuation.
source_text = text
if pronunciation_overrides or manual_overrides or speakers:
try:
from abogen.webui import conversion_runner as runner
class _PreviewJob:
def __init__(self):
self.language = language
self.voice = voice_spec
self.speakers = speakers
self.manual_overrides = list(manual_overrides or [])
self.pronunciation_overrides = list(pronunciation_overrides or [])
job = _PreviewJob()
merged = runner._merge_pronunciation_overrides(job)
rules = runner._compile_pronunciation_rules(merged)
source_text = runner._apply_pronunciation_rules(source_text, rules)
except Exception:
current_app.logger.exception("Preview override application failed; using raw text")
source_text = text
normalized_text = source_text
if provider != "supertonic":
try:
from abogen.kokoro_text_normalization import normalize_for_pipeline
normalized_text = normalize_for_pipeline(source_text)
except Exception:
current_app.logger.exception("Preview normalization failed; using raw text")
normalized_text = source_text
if provider == "supertonic":
from abogen.tts_supertonic import SupertonicPipeline
pipeline = SupertonicPipeline(sample_rate=SAMPLE_RATE, auto_download=True, total_steps=supertonic_total_steps)
segments = pipeline(
normalized_text,
voice=voice_spec,
speed=speed,
split_pattern=SPLIT_PATTERN,
total_steps=supertonic_total_steps,
)
else:
device = "cpu"
if use_gpu:
try:
device = _select_device()
except Exception:
device = "cpu"
use_gpu = False
pipeline = get_preview_pipeline(language, device)
if pipeline is None:
raise RuntimeError("Preview pipeline is unavailable")
voice_choice: Any = voice_spec
if voice_spec and "*" in voice_spec:
from abogen.voice_formulas import get_new_voice
voice_choice = get_new_voice(pipeline, voice_spec, use_gpu)
segments = pipeline(
normalized_text,
voice=voice_choice,
speed=speed,
split_pattern=SPLIT_PATTERN,
)
audio_chunks: List[np.ndarray] = []
accumulated = 0
max_samples = int(max(1.0, max_seconds) * SAMPLE_RATE)
for segment in segments:
graphemes = getattr(segment, "graphemes", "").strip()
if not graphemes:
continue
audio = _to_float32(getattr(segment, "audio", None))
if audio.size == 0:
continue
remaining = max_samples - accumulated
if remaining <= 0:
break
if audio.shape[0] > remaining:
audio = audio[:remaining]
audio_chunks.append(audio)
accumulated += audio.shape[0]
if accumulated >= max_samples:
break
if not audio_chunks:
raise RuntimeError("Preview could not be generated")
audio_data = np.concatenate(audio_chunks)
buffer = io.BytesIO()
sf.write(buffer, audio_data, SAMPLE_RATE, format="WAV")
return buffer.getvalue()
def synthesize_preview(
text: str,
voice_spec: str,
language: str,
speed: float,
use_gpu: bool,
tts_provider: str = "kokoro",
supertonic_total_steps: int = 5,
max_seconds: float = 8.0,
pronunciation_overrides: Optional[Iterable[Mapping[str, Any]]] = None,
manual_overrides: Optional[Iterable[Mapping[str, Any]]] = None,
speakers: Optional[Mapping[str, Any]] = None,
) -> ResponseReturnValue:
try:
audio_bytes = generate_preview_audio(
text=text,
voice_spec=voice_spec,
language=language,
speed=speed,
use_gpu=use_gpu,
tts_provider=tts_provider,
supertonic_total_steps=supertonic_total_steps,
max_seconds=max_seconds,
pronunciation_overrides=pronunciation_overrides,
manual_overrides=manual_overrides,
speakers=speakers,
)
except Exception as e:
raise e
buffer = io.BytesIO(audio_bytes)
response = send_file(
buffer,
mimetype="audio/wav",
as_attachment=False,
download_name="speaker_preview.wav",
)
response.headers["Cache-Control"] = "no-store"
return response
from typing import cast
from flask import current_app, abort
from abogen.webui.service import ConversionService, PendingJob
def get_service() -> ConversionService:
return current_app.extensions["conversion_service"]
def require_pending_job(pending_id: str) -> PendingJob:
pending = get_service().get_pending_job(pending_id)
if not pending:
abort(404)
return cast(PendingJob, pending)
def remove_pending_job(pending_id: str) -> None:
get_service().pop_pending_job(pending_id)
def submit_job(pending: PendingJob) -> str:
service = get_service()
service.pop_pending_job(pending.id)
job = service.enqueue(
original_filename=pending.original_filename,
stored_path=pending.stored_path,
language=pending.language,
tts_provider=getattr(pending, "tts_provider", "kokoro"),
voice=pending.voice,
speed=pending.speed,
supertonic_total_steps=getattr(pending, "supertonic_total_steps", 5),
use_gpu=pending.use_gpu,
subtitle_mode=pending.subtitle_mode,
output_format=pending.output_format,
save_mode=pending.save_mode,
output_folder=pending.output_folder,
replace_single_newlines=pending.replace_single_newlines,
subtitle_format=pending.subtitle_format,
total_characters=pending.total_characters,
chapters=pending.chapters,
save_chapters_separately=pending.save_chapters_separately,
merge_chapters_at_end=pending.merge_chapters_at_end,
separate_chapters_format=pending.separate_chapters_format,
silence_between_chapters=pending.silence_between_chapters,
save_as_project=pending.save_as_project,
voice_profile=pending.voice_profile,
max_subtitle_words=pending.max_subtitle_words,
metadata_tags=pending.metadata_tags,
cover_image_path=pending.cover_image_path,
cover_image_mime=pending.cover_image_mime,
chapter_intro_delay=pending.chapter_intro_delay,
read_title_intro=pending.read_title_intro,
read_closing_outro=pending.read_closing_outro,
auto_prefix_chapter_titles=pending.auto_prefix_chapter_titles,
normalize_chapter_opening_caps=pending.normalize_chapter_opening_caps,
chunk_level=pending.chunk_level,
chunks=pending.chunks,
speakers=pending.speakers,
speaker_mode=pending.speaker_mode,
generate_epub3=pending.generate_epub3,
speaker_analysis=pending.speaker_analysis,
speaker_analysis_threshold=pending.speaker_analysis_threshold,
analysis_requested=pending.analysis_requested,
entity_summary=getattr(pending, "entity_summary", None),
manual_overrides=getattr(pending, "manual_overrides", None),
pronunciation_overrides=getattr(pending, "pronunciation_overrides", None),
heteronym_overrides=getattr(pending, "heteronym_overrides", None),
normalization_overrides=pending.normalization_overrides,
)
return job.id
import os
import re
from typing import Any, Dict, Mapping, Optional
from abogen.constants import (
LANGUAGE_DESCRIPTIONS,
SUBTITLE_FORMATS,
SUPPORTED_SOUND_FORMATS,
VOICES_INTERNAL,
)
from abogen.normalization_settings import (
DEFAULT_LLM_PROMPT,
environment_llm_defaults,
)
from abogen.utils import load_config, save_config
from abogen.integrations.calibre_opds import CalibreOPDSClient
from abogen.integrations.audiobookshelf import AudiobookshelfConfig
from abogen.webui.routes.utils.common import split_profile_spec
SAVE_MODE_LABELS = {
"save_next_to_input": "Save next to input file",
"save_to_desktop": "Save to Desktop",
"choose_output_folder": "Choose output folder",
"default_output": "Use default save location",
}
LEGACY_SAVE_MODE_MAP = {label: key for key, label in SAVE_MODE_LABELS.items()}
_CHUNK_LEVEL_OPTIONS = [
{"value": "paragraph", "label": "Paragraphs"},
{"value": "sentence", "label": "Sentences"},
]
_CHUNK_LEVEL_VALUES = {option["value"] for option in _CHUNK_LEVEL_OPTIONS}
_DEFAULT_ANALYSIS_THRESHOLD = 3
_APOSTROPHE_MODE_OPTIONS = [
{"value": "off", "label": "Off"},
{"value": "spacy", "label": "spaCy (built-in)"},
{"value": "llm", "label": "LLM assisted"},
]
_NORMALIZATION_BOOLEAN_KEYS = {
"normalization_numbers",
"normalization_titles",
"normalization_terminal",
"normalization_phoneme_hints",
"normalization_caps_quotes",
"normalization_currency",
"normalization_footnotes",
"normalization_internet_slang",
"normalization_apostrophes_contractions",
"normalization_apostrophes_plural_possessives",
"normalization_apostrophes_sibilant_possessives",
"normalization_apostrophes_decades",
"normalization_apostrophes_leading_elisions",
"normalization_contraction_aux_be",
"normalization_contraction_aux_have",
"normalization_contraction_modal_will",
"normalization_contraction_modal_would",
"normalization_contraction_negation_not",
"normalization_contraction_let_us",
}
_NORMALIZATION_STRING_KEYS = {
"normalization_numbers_year_style",
"normalization_apostrophe_mode",
}
BOOLEAN_SETTINGS = {
"replace_single_newlines",
"use_gpu",
"save_chapters_separately",
"merge_chapters_at_end",
"save_as_project",
"generate_epub3",
"enable_entity_recognition",
"read_title_intro",
"read_closing_outro",
"auto_prefix_chapter_titles",
"normalize_chapter_opening_caps",
"normalization_numbers",
"normalization_titles",
"normalization_terminal",
"normalization_phoneme_hints",
"normalization_caps_quotes",
"normalization_currency",
"normalization_footnotes",
"normalization_internet_slang",
"normalization_apostrophes_contractions",
"normalization_apostrophes_plural_possessives",
"normalization_apostrophes_sibilant_possessives",
"normalization_apostrophes_decades",
"normalization_apostrophes_leading_elisions",
"normalization_contraction_aux_be",
"normalization_contraction_aux_have",
"normalization_contraction_modal_will",
"normalization_contraction_modal_would",
"normalization_contraction_negation_not",
"normalization_contraction_let_us",
}
FLOAT_SETTINGS = {"silence_between_chapters", "chapter_intro_delay", "llm_timeout"}
INT_SETTINGS = {"max_subtitle_words", "speaker_analysis_threshold"}
_NORMALIZATION_GROUPS = [
{
"label": "General Rules",
"options": [
{"key": "normalization_numbers", "label": "Convert grouped numbers to words"},
{"key": "normalization_currency", "label": "Convert currency symbols ($10 → ten dollars)"},
{"key": "normalization_titles", "label": "Expand titles and suffixes (Dr., St., Jr., …)"},
{"key": "normalization_internet_slang", "label": "Expand internet slang (pls → please)"},
{"key": "normalization_footnotes", "label": "Remove footnote indicators ([1], [2])"},
{"key": "normalization_terminal", "label": "Ensure sentences end with terminal punctuation"},
{"key": "normalization_caps_quotes", "label": "Convert ALL CAPS dialogue inside quotes"},
]
},
{
"label": "Apostrophes & Contractions",
"options": [
{"key": "normalization_apostrophes_contractions", "label": "Expand contractions (it's → it is)"},
{"key": "normalization_apostrophes_plural_possessives", "label": "Collapse plural possessives (dogs' → dogs)"},
{"key": "normalization_apostrophes_sibilant_possessives", "label": "Mark sibilant possessives (boss's → boss + IZ marker)"},
{"key": "normalization_apostrophes_decades", "label": "Expand decades ('90s → 1990s)"},
{"key": "normalization_apostrophes_leading_elisions", "label": "Expand leading elisions ('tis → it is)"},
{"key": "normalization_phoneme_hints", "label": "Add phoneme hints for possessives"},
{"key": "normalization_contraction_aux_be", "label": "Expand auxiliary 'be' (I'm → I am)"},
{"key": "normalization_contraction_aux_have", "label": "Expand auxiliary 'have' (I've → I have)"},
{"key": "normalization_contraction_modal_will", "label": "Expand modal 'will' (I'll → I will)"},
{"key": "normalization_contraction_modal_would", "label": "Expand modal 'would' (I'd → I would)"},
{"key": "normalization_contraction_negation_not", "label": "Expand negation 'not' (don't → do not)"},
{"key": "normalization_contraction_let_us", "label": "Expand 'let's' → let us"},
]
}
]
def integration_defaults() -> Dict[str, Dict[str, Any]]:
return {
"calibre_opds": {
"enabled": False,
"base_url": "",
"username": "",
"password": "",
"verify_ssl": True,
},
"audiobookshelf": {
"enabled": False,
"base_url": "",
"api_token": "",
"library_id": "",
"collection_id": "",
"folder_id": "",
"verify_ssl": True,
"send_cover": True,
"send_chapters": True,
"send_subtitles": False,
"auto_send": False,
"timeout": 30.0,
},
}
def has_output_override() -> bool:
return bool(os.environ.get("ABOGEN_OUTPUT_DIR") or os.environ.get("ABOGEN_OUTPUT_ROOT"))
def settings_defaults() -> Dict[str, Any]:
llm_env_defaults = environment_llm_defaults()
return {
"output_format": "wav",
"subtitle_format": "srt",
"save_mode": "default_output" if has_output_override() else "save_next_to_input",
"default_speaker": "",
"default_voice": VOICES_INTERNAL[0] if VOICES_INTERNAL else "",
"supertonic_total_steps": 5,
"supertonic_speed": 1.0,
"replace_single_newlines": False,
"use_gpu": True,
"save_chapters_separately": False,
"merge_chapters_at_end": True,
"save_as_project": False,
"separate_chapters_format": "wav",
"silence_between_chapters": 2.0,
"chapter_intro_delay": 0.5,
"read_title_intro": False,
"read_closing_outro": True,
"normalize_chapter_opening_caps": True,
"max_subtitle_words": 50,
"chunk_level": "paragraph",
"enable_entity_recognition": True,
"generate_epub3": False,
"auto_prefix_chapter_titles": True,
"speaker_analysis_threshold": _DEFAULT_ANALYSIS_THRESHOLD,
"speaker_pronunciation_sentence": "This is {{name}} speaking.",
"speaker_random_languages": [],
"llm_base_url": llm_env_defaults.get("llm_base_url", ""),
"llm_api_key": llm_env_defaults.get("llm_api_key", ""),
"llm_model": llm_env_defaults.get("llm_model", ""),
"llm_timeout": llm_env_defaults.get("llm_timeout", 30.0),
"llm_prompt": llm_env_defaults.get("llm_prompt", DEFAULT_LLM_PROMPT),
"llm_context_mode": llm_env_defaults.get("llm_context_mode", "sentence"),
"normalization_numbers": True,
"normalization_currency": True,
"normalization_footnotes": True,
"normalization_titles": True,
"normalization_terminal": True,
"normalization_phoneme_hints": True,
"normalization_caps_quotes": True,
"normalization_internet_slang": False,
"normalization_apostrophes_contractions": True,
"normalization_apostrophes_plural_possessives": True,
"normalization_apostrophes_sibilant_possessives": True,
"normalization_apostrophes_decades": True,
"normalization_apostrophes_leading_elisions": True,
"normalization_apostrophe_mode": "spacy",
"normalization_numbers_year_style": "american",
"normalization_contraction_aux_be": True,
"normalization_contraction_aux_have": True,
"normalization_contraction_modal_will": True,
"normalization_contraction_modal_would": True,
"normalization_contraction_negation_not": True,
"normalization_contraction_let_us": True,
}
def llm_ready(settings: Mapping[str, Any]) -> bool:
base_url = str(settings.get("llm_base_url") or "").strip()
return bool(base_url)
_PROMPT_TOKEN_RE = re.compile(r"{{\s*([a-zA-Z0-9_]+)\s*}}")
def render_prompt_template(template: str, context: Mapping[str, str]) -> str:
if not template:
return ""
def _replace(match: re.Match[str]) -> str:
key = match.group(1)
return context.get(key, "")
return _PROMPT_TOKEN_RE.sub(_replace, template)
def coerce_bool(value: Any, default: bool) -> bool:
if isinstance(value, bool):
return value
if isinstance(value, str):
return value.lower() in {"true", "1", "yes", "on"}
if value is None:
return default
return bool(value)
def coerce_float(value: Any, default: float) -> float:
try:
return max(0.0, float(value))
except (TypeError, ValueError):
return default
def coerce_int(value: Any, default: int, *, minimum: int = 1, maximum: int = 200) -> int:
try:
parsed = int(value)
except (TypeError, ValueError):
return default
return max(minimum, min(parsed, maximum))
def normalize_save_mode(value: Any, default: str) -> str:
if isinstance(value, str):
if value in SAVE_MODE_LABELS:
return value
if value in LEGACY_SAVE_MODE_MAP:
return LEGACY_SAVE_MODE_MAP[value]
return default
def normalize_setting_value(key: str, value: Any, defaults: Dict[str, Any]) -> Any:
if key in BOOLEAN_SETTINGS:
return coerce_bool(value, defaults[key])
if key in FLOAT_SETTINGS:
return coerce_float(value, defaults[key])
if key in INT_SETTINGS:
return coerce_int(value, defaults[key])
if key == "save_mode":
return normalize_save_mode(value, defaults[key])
if key == "output_format":
return value if value in SUPPORTED_SOUND_FORMATS else defaults[key]
if key == "subtitle_format":
valid = {item[0] for item in SUBTITLE_FORMATS}
return value if value in valid else defaults[key]
if key == "separate_chapters_format":
if isinstance(value, str):
normalized = value.lower()
if normalized in {"wav", "flac", "mp3", "opus"}:
return normalized
return defaults[key]
if key == "default_voice":
if isinstance(value, str):
text = value.strip()
if not text:
return defaults[key]
spec, profile_name = split_profile_spec(text)
if profile_name:
return f"speaker:{profile_name}"
return spec
return defaults[key]
if key == "default_speaker":
if isinstance(value, str):
text = value.strip()
if not text:
return ""
spec, profile_name = split_profile_spec(text)
if profile_name:
return f"speaker:{profile_name}"
return spec
return ""
if key == "chunk_level":
if isinstance(value, str) and value in _CHUNK_LEVEL_VALUES:
return value
return defaults[key]
if key == "normalization_apostrophe_mode":
if isinstance(value, str):
normalized_mode = value.strip().lower()
if normalized_mode in {"off", "spacy", "llm"}:
return normalized_mode
return defaults[key]
if key == "normalization_numbers_year_style":
if isinstance(value, str):
normalized_style = value.strip().lower()
if normalized_style in {"american", "off"}:
return normalized_style
return defaults[key]
if key == "llm_context_mode":
if isinstance(value, str):
normalized_scope = value.strip().lower()
if normalized_scope == "sentence":
return normalized_scope
return defaults[key]
if key == "llm_prompt":
candidate = str(value or "").strip()
return candidate if candidate else defaults[key]
if key in {"llm_base_url", "llm_api_key", "llm_model"}:
return str(value or "").strip()
if key == "speaker_random_languages":
if isinstance(value, (list, tuple, set)):
return [code for code in value if isinstance(code, str) and code in LANGUAGE_DESCRIPTIONS]
if isinstance(value, str):
parts = [item.strip().lower() for item in value.split(",") if item.strip()]
return [code for code in parts if code in LANGUAGE_DESCRIPTIONS]
return defaults.get(key, [])
if key == "supertonic_total_steps":
try:
steps = int(value)
except (TypeError, ValueError):
return defaults.get(key, 5)
return max(2, min(15, steps))
if key == "supertonic_speed":
try:
speed = float(value)
except (TypeError, ValueError):
return defaults.get(key, 1.0)
return max(0.7, min(2.0, speed))
return value if value is not None else defaults.get(key)
def load_settings() -> Dict[str, Any]:
defaults = settings_defaults()
cfg = load_config() or {}
settings: Dict[str, Any] = {}
for key, default in defaults.items():
raw_value = cfg.get(key, default)
settings[key] = normalize_setting_value(key, raw_value, defaults)
return settings
def load_integration_settings() -> Dict[str, Dict[str, Any]]:
defaults = integration_defaults()
cfg = load_config() or {}
# Integrations are stored under the "integrations" key in the config
stored_integrations = cfg.get("integrations", {})
if not isinstance(stored_integrations, Mapping):
stored_integrations = {}
integrations: Dict[str, Dict[str, Any]] = {}
for key, default in defaults.items():
stored = stored_integrations.get(key)
merged: Dict[str, Any] = dict(default)
if isinstance(stored, Mapping):
for field, default_value in default.items():
value = stored.get(field, default_value)
if isinstance(default_value, bool):
merged[field] = coerce_bool(value, default_value)
elif isinstance(default_value, float):
try:
merged[field] = float(value)
except (TypeError, ValueError):
merged[field] = default_value
elif isinstance(default_value, int):
try:
merged[field] = int(value)
except (TypeError, ValueError):
merged[field] = default_value
else:
merged[field] = str(value or "")
if key == "calibre_opds":
merged["has_password"] = bool(isinstance(stored, Mapping) and stored.get("password"))
# Do not clear the password here, let the template decide whether to show it or not
# merged["password"] = ""
elif key == "audiobookshelf":
merged["has_api_token"] = bool(isinstance(stored, Mapping) and stored.get("api_token"))
# Do not clear the token here
# merged["api_token"] = ""
integrations[key] = merged
# Environment variable fallbacks for Calibre OPDS
calibre = integrations["calibre_opds"]
if not calibre.get("base_url"):
calibre["base_url"] = os.environ.get("CALIBRE_SERVER_HOST", "")
if not calibre.get("username"):
calibre["username"] = os.environ.get("OPDS_USERNAME", "")
if not calibre.get("password"):
calibre["password"] = os.environ.get("OPDS_PASSWORD", "")
# If we have a password (from storage or env), mark it as present for the UI
if calibre.get("password"):
calibre["has_password"] = True
# Auto-enable if configured via env but not explicitly disabled in config
stored_calibre = stored_integrations.get("calibre_opds")
if stored_calibre is None and calibre.get("base_url"):
calibre["enabled"] = True
return integrations
def stored_integration_config(name: str) -> Dict[str, Any]:
cfg = load_config() or {}
# Check under "integrations" first (new structure)
integrations = cfg.get("integrations")
if isinstance(integrations, Mapping):
entry = integrations.get(name)
if isinstance(entry, Mapping):
return dict(entry)
# Fallback to top-level (legacy structure)
entry = cfg.get(name)
if isinstance(entry, Mapping):
return dict(entry)
return {}
def calibre_settings_from_payload(payload: Mapping[str, Any]) -> Dict[str, Any]:
defaults = integration_defaults()["calibre_opds"]
stored = stored_integration_config("calibre_opds")
base_url = str(
payload.get("base_url")
or payload.get("calibre_opds_base_url")
or stored.get("base_url")
or ""
).strip()
username = str(
payload.get("username")
or payload.get("calibre_opds_username")
or stored.get("username")
or ""
).strip()
password_input = str(
payload.get("password")
or payload.get("calibre_opds_password")
or ""
).strip()
use_saved_password = coerce_bool(
payload.get("use_saved_password")
or payload.get("calibre_opds_use_saved_password"),
False,
)
clear_saved_password = coerce_bool(
payload.get("clear_saved_password")
or payload.get("calibre_opds_password_clear"),
False,
)
password = ""
if password_input:
password = password_input
elif use_saved_password and not clear_saved_password:
password = str(stored.get("password") or "")
verify_ssl = coerce_bool(
payload.get("verify_ssl")
or payload.get("calibre_opds_verify_ssl"),
defaults["verify_ssl"],
)
enabled = coerce_bool(
payload.get("enabled")
or payload.get("calibre_opds_enabled"),
coerce_bool(stored.get("enabled"), False),
)
return {
"enabled": enabled,
"base_url": base_url,
"username": username,
"password": password,
"verify_ssl": verify_ssl,
}
def audiobookshelf_settings_from_payload(payload: Mapping[str, Any]) -> Dict[str, Any]:
defaults = integration_defaults()["audiobookshelf"]
stored = stored_integration_config("audiobookshelf")
base_url = str(
payload.get("base_url")
or payload.get("audiobookshelf_base_url")
or stored.get("base_url")
or ""
).strip()
library_id = str(
payload.get("library_id")
or payload.get("audiobookshelf_library_id")
or stored.get("library_id")
or ""
).strip()
collection_id = str(
payload.get("collection_id")
or payload.get("audiobookshelf_collection_id")
or stored.get("collection_id")
or ""
).strip()
folder_id = str(
payload.get("folder_id")
or payload.get("audiobookshelf_folder_id")
or stored.get("folder_id")
or ""
).strip()
token_input = str(
payload.get("api_token")
or payload.get("audiobookshelf_api_token")
or ""
).strip()
use_saved_token = coerce_bool(
payload.get("use_saved_token")
or payload.get("audiobookshelf_use_saved_token"),
False,
)
clear_saved_token = coerce_bool(
payload.get("clear_saved_token")
or payload.get("audiobookshelf_api_token_clear"),
False,
)
if token_input:
api_token = token_input
elif use_saved_token and not clear_saved_token:
api_token = str(stored.get("api_token") or "")
else:
api_token = ""
verify_ssl = coerce_bool(
payload.get("verify_ssl")
or payload.get("audiobookshelf_verify_ssl"),
defaults["verify_ssl"],
)
send_cover = coerce_bool(
payload.get("send_cover")
or payload.get("audiobookshelf_send_cover"),
defaults["send_cover"],
)
send_chapters = coerce_bool(
payload.get("send_chapters")
or payload.get("audiobookshelf_send_chapters"),
defaults["send_chapters"],
)
send_subtitles = coerce_bool(
payload.get("send_subtitles")
or payload.get("audiobookshelf_send_subtitles"),
defaults["send_subtitles"],
)
auto_send = coerce_bool(
payload.get("auto_send")
or payload.get("audiobookshelf_auto_send"),
defaults["auto_send"],
)
timeout_raw = (
payload.get("timeout")
or payload.get("audiobookshelf_timeout")
or stored.get("timeout")
or defaults["timeout"]
)
try:
timeout = float(timeout_raw)
except (TypeError, ValueError):
timeout = defaults["timeout"]
enabled = coerce_bool(
payload.get("enabled")
or payload.get("audiobookshelf_enabled"),
coerce_bool(stored.get("enabled"), False),
)
return {
"enabled": enabled,
"base_url": base_url,
"library_id": library_id,
"collection_id": collection_id,
"folder_id": folder_id,
"api_token": api_token,
"verify_ssl": verify_ssl,
"send_cover": send_cover,
"send_chapters": send_chapters,
"send_subtitles": send_subtitles,
"auto_send": auto_send,
"timeout": timeout,
}
def build_audiobookshelf_config(settings: Mapping[str, Any]) -> Optional[AudiobookshelfConfig]:
base_url = str(settings.get("base_url") or "").strip()
api_token = str(settings.get("api_token") or "").strip()
library_id = str(settings.get("library_id") or "").strip()
if not (base_url and api_token and library_id):
return None
try:
timeout = float(settings.get("timeout", 3600.0))
except (TypeError, ValueError):
timeout = 3600.0
return AudiobookshelfConfig(
base_url=base_url,
api_token=api_token,
library_id=library_id,
collection_id=(str(settings.get("collection_id") or "").strip() or None),
folder_id=(str(settings.get("folder_id") or "").strip() or None),
verify_ssl=coerce_bool(settings.get("verify_ssl"), True),
send_cover=coerce_bool(settings.get("send_cover"), True),
send_chapters=coerce_bool(settings.get("send_chapters"), True),
send_subtitles=coerce_bool(settings.get("send_subtitles"), False),
timeout=timeout,
)
def calibre_integration_enabled(
integrations: Optional[Mapping[str, Any]] = None,
) -> bool:
if integrations is None:
integrations = load_integration_settings()
payload = integrations.get("calibre_opds") if isinstance(integrations, Mapping) else None
if not isinstance(payload, Mapping):
return False
base_url = str(payload.get("base_url") or "").strip()
enabled_flag = coerce_bool(payload.get("enabled"), False)
return bool(enabled_flag and base_url)
def audiobookshelf_manual_available() -> bool:
settings = stored_integration_config("audiobookshelf")
if not settings:
return False
return coerce_bool(settings.get("enabled"), False)
def build_calibre_client(settings: Mapping[str, Any]) -> CalibreOPDSClient:
base_url = str(settings.get("base_url") or "").strip()
if not base_url:
raise ValueError("Calibre OPDS base URL is required")
username = str(settings.get("username") or "").strip() or None
password = str(settings.get("password") or "").strip() or None
verify_ssl = coerce_bool(settings.get("verify_ssl"), True)
timeout_raw = settings.get("timeout", 15.0)
try:
timeout = float(timeout_raw)
except (TypeError, ValueError):
timeout = 15.0
return CalibreOPDSClient(
base_url,
username=username,
password=password,
timeout=timeout,
verify=verify_ssl,
)
def apply_integration_form(cfg: Dict[str, Any], form: Mapping[str, Any]) -> None:
defaults = integration_defaults()
current_calibre = dict(cfg.get("calibre_opds") or {})
calibre_enabled = coerce_bool(form.get("calibre_opds_enabled"), False)
calibre_base = str(form.get("calibre_opds_base_url") or current_calibre.get("base_url") or "").strip()
calibre_username = str(form.get("calibre_opds_username") or current_calibre.get("username") or "").strip()
calibre_password_input = str(form.get("calibre_opds_password") or "")
calibre_clear = coerce_bool(form.get("calibre_opds_password_clear"), False)
if calibre_password_input:
calibre_password = calibre_password_input
elif calibre_clear:
calibre_password = ""
else:
calibre_password = str(current_calibre.get("password") or "")
calibre_verify = coerce_bool(form.get("calibre_opds_verify_ssl"), defaults["calibre_opds"]["verify_ssl"])
cfg["calibre_opds"] = {
"enabled": calibre_enabled,
"base_url": calibre_base,
"username": calibre_username,
"password": calibre_password,
"verify_ssl": calibre_verify,
}
current_abs = dict(cfg.get("audiobookshelf") or {})
abs_enabled = coerce_bool(form.get("audiobookshelf_enabled"), False)
abs_base = str(form.get("audiobookshelf_base_url") or current_abs.get("base_url") or "").strip()
abs_library = str(form.get("audiobookshelf_library_id") or current_abs.get("library_id") or "").strip()
abs_collection = str(form.get("audiobookshelf_collection_id") or current_abs.get("collection_id") or "").strip()
abs_folder = str(form.get("audiobookshelf_folder_id") or current_abs.get("folder_id") or "").strip()
abs_token_input = str(form.get("audiobookshelf_api_token") or "")
abs_token_clear = coerce_bool(form.get("audiobookshelf_api_token_clear"), False)
if abs_token_input:
abs_token = abs_token_input
elif abs_token_clear:
abs_token = ""
else:
abs_token = str(current_abs.get("api_token") or "")
abs_verify = coerce_bool(form.get("audiobookshelf_verify_ssl"), defaults["audiobookshelf"]["verify_ssl"])
abs_send_cover = coerce_bool(form.get("audiobookshelf_send_cover"), defaults["audiobookshelf"]["send_cover"])
abs_send_chapters = coerce_bool(form.get("audiobookshelf_send_chapters"), defaults["audiobookshelf"]["send_chapters"])
abs_send_subtitles = coerce_bool(form.get("audiobookshelf_send_subtitles"), defaults["audiobookshelf"]["send_subtitles"])
abs_auto_send = coerce_bool(form.get("audiobookshelf_auto_send"), defaults["audiobookshelf"]["auto_send"])
timeout_raw = form.get("audiobookshelf_timeout", current_abs.get("timeout", defaults["audiobookshelf"]["timeout"]))
try:
abs_timeout = float(timeout_raw)
except (TypeError, ValueError):
abs_timeout = defaults["audiobookshelf"]["timeout"]
cfg["audiobookshelf"] = {
"enabled": abs_enabled,
"base_url": abs_base,
"api_token": abs_token,
"library_id": abs_library,
"collection_id": abs_collection,
"folder_id": abs_folder,
"verify_ssl": abs_verify,
"send_cover": abs_send_cover,
"send_chapters": abs_send_chapters,
"send_subtitles": abs_send_subtitles,
"auto_send": abs_auto_send,
"timeout": abs_timeout,
}
def save_settings(settings: Dict[str, Any]) -> None:
save_config(settings)
import threading
from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple, cast
import numpy as np
from abogen.speaker_configs import slugify_label
from abogen.speaker_analysis import analyze_speakers
from abogen.webui.routes.utils.settings import load_settings, settings_defaults, _DEFAULT_ANALYSIS_THRESHOLD, _CHUNK_LEVEL_OPTIONS, _APOSTROPHE_MODE_OPTIONS, _NORMALIZATION_GROUPS
from abogen.webui.routes.utils.common import split_profile_spec
from abogen.voice_profiles import (
load_profiles,
serialize_profiles,
)
from abogen.voice_formulas import get_new_voice, parse_formula_terms
from abogen.constants import (
LANGUAGE_DESCRIPTIONS,
SUBTITLE_FORMATS,
SUPPORTED_SOUND_FORMATS,
SUPPORTED_LANGUAGES_FOR_SUBTITLE_GENERATION,
SAMPLE_VOICE_TEXTS,
VOICES_INTERNAL,
)
from abogen.speaker_configs import list_configs
from abogen.utils import load_numpy_kpipeline
from abogen.webui.conversion_runner import _select_device, _to_float32, SAMPLE_RATE, SPLIT_PATTERN
_preview_pipeline_lock = threading.RLock()
_preview_pipelines: Dict[Tuple[str, str], Any] = {}
def build_narrator_roster(
voice: str,
voice_profile: Optional[str],
existing: Optional[Mapping[str, Any]] = None,
) -> Dict[str, Any]:
roster: Dict[str, Any] = {
"narrator": {
"id": "narrator",
"label": "Narrator",
"voice": voice,
}
}
if voice_profile:
roster["narrator"]["voice_profile"] = voice_profile
existing_entry: Optional[Mapping[str, Any]] = None
if existing is not None:
existing_entry = existing.get("narrator") if isinstance(existing, Mapping) else None
if isinstance(existing_entry, Mapping):
roster_entry = roster["narrator"]
for key in ("label", "voice", "voice_profile", "voice_formula", "pronunciation"):
value = existing_entry.get(key)
if value is not None and value != "":
roster_entry[key] = value
return roster
def build_speaker_roster(
analysis: Dict[str, Any],
base_voice: str,
voice_profile: Optional[str],
existing: Optional[Mapping[str, Any]] = None,
order: Optional[Iterable[str]] = None,
) -> Dict[str, Any]:
roster = build_narrator_roster(base_voice, voice_profile, existing)
existing_map: Dict[str, Any] = dict(existing) if isinstance(existing, Mapping) else {}
speakers = analysis.get("speakers", {}) if isinstance(analysis, dict) else {}
ordered_ids: Iterable[str]
if order is not None:
ordered_ids = [sid for sid in order if sid in speakers]
else:
ordered_ids = speakers.keys()
for speaker_id in ordered_ids:
payload = speakers.get(speaker_id, {})
if speaker_id == "narrator":
continue
if isinstance(payload, Mapping) and payload.get("suppressed"):
continue
previous = existing_map.get(speaker_id)
roster[speaker_id] = {
"id": speaker_id,
"label": payload.get("label") or speaker_id.replace("_", " ").title(),
"analysis_confidence": payload.get("confidence"),
"analysis_count": payload.get("count"),
"gender": payload.get("gender", "unknown"),
}
detected_gender = payload.get("detected_gender")
if detected_gender:
roster[speaker_id]["detected_gender"] = detected_gender
samples = payload.get("sample_quotes")
if isinstance(samples, list):
roster[speaker_id]["sample_quotes"] = samples
if isinstance(previous, Mapping):
for key in ("voice", "voice_profile", "voice_formula", "resolved_voice", "pronunciation"):
value = previous.get(key)
if value is not None and value != "":
roster[speaker_id][key] = value
if "sample_quotes" not in roster[speaker_id]:
prev_samples = previous.get("sample_quotes")
if isinstance(prev_samples, list):
roster[speaker_id]["sample_quotes"] = prev_samples
if "detected_gender" not in roster[speaker_id]:
prev_detected = previous.get("detected_gender")
if isinstance(prev_detected, str) and prev_detected:
roster[speaker_id]["detected_gender"] = prev_detected
return roster
def match_configured_speaker(
config_speakers: Mapping[str, Any],
roster_id: str,
roster_label: str,
) -> Optional[Mapping[str, Any]]:
if not config_speakers:
return None
entry = config_speakers.get(roster_id)
if entry:
return cast(Mapping[str, Any], entry)
slug = slugify_label(roster_label)
if slug != roster_id and slug in config_speakers:
return cast(Mapping[str, Any], config_speakers[slug])
lower_label = roster_label.strip().lower()
for record in config_speakers.values():
if not isinstance(record, Mapping):
continue
if str(record.get("label", "")).strip().lower() == lower_label:
return record
return None
def apply_speaker_config_to_roster(
roster: Mapping[str, Any],
config: Optional[Mapping[str, Any]],
*,
persist_changes: bool = False,
fallback_languages: Optional[Iterable[str]] = None,
) -> Tuple[Dict[str, Any], List[str], Optional[Dict[str, Any]]]:
if not isinstance(roster, Mapping):
effective_languages = [code for code in (fallback_languages or []) if isinstance(code, str) and code]
return {}, effective_languages, None
updated_roster: Dict[str, Any] = {key: dict(value) for key, value in roster.items() if isinstance(value, Mapping)}
if not config:
effective_languages = [code for code in (fallback_languages or []) if isinstance(code, str) and code]
return updated_roster, effective_languages, None
speakers_map = config.get("speakers")
if not isinstance(speakers_map, Mapping):
effective_languages = [code for code in (fallback_languages or []) if isinstance(code, str) and code]
return updated_roster, effective_languages, None
config_languages = config.get("languages")
if isinstance(config_languages, list):
allowed_languages = [code for code in config_languages if isinstance(code, str) and code]
else:
allowed_languages = []
if not allowed_languages and fallback_languages:
allowed_languages = [code for code in fallback_languages if isinstance(code, str) and code]
default_voice = config.get("default_voice") if isinstance(config.get("default_voice"), str) else ""
used_voices = {entry.get("resolved_voice") or entry.get("voice") for entry in updated_roster.values()} - {None}
narrator_voice = ""
narrator_entry = updated_roster.get("narrator") if isinstance(updated_roster, Mapping) else None
if isinstance(narrator_entry, Mapping):
narrator_voice = str(
narrator_entry.get("resolved_voice")
or narrator_entry.get("default_voice")
or ""
).strip()
if narrator_voice:
used_voices.add(narrator_voice)
config_changed = False
new_config_payload: Dict[str, Any] = {
"language": config.get("language", "a"),
"languages": allowed_languages,
"default_voice": default_voice,
"speakers": dict(speakers_map),
"version": config.get("version", 1),
"notes": config.get("notes", ""),
}
speakers_payload = new_config_payload["speakers"]
for speaker_id, roster_entry in updated_roster.items():
if speaker_id == "narrator":
continue
label = str(roster_entry.get("label") or speaker_id)
config_entry = match_configured_speaker(speakers_map, speaker_id, label)
if config_entry is None:
continue
voice_id = str(config_entry.get("voice") or "").strip()
voice_profile = str(config_entry.get("voice_profile") or "").strip()
voice_formula = str(config_entry.get("voice_formula") or "").strip()
resolved_voice = str(config_entry.get("resolved_voice") or "").strip()
languages = config_entry.get("languages") if isinstance(config_entry.get("languages"), list) else []
chosen_voice = resolved_voice or voice_formula or voice_id or roster_entry.get("voice")
usable_languages = languages or allowed_languages
if chosen_voice:
roster_entry["resolved_voice"] = chosen_voice
roster_entry["voice"] = chosen_voice if not voice_profile and not voice_formula else roster_entry.get("voice", chosen_voice)
if voice_profile:
roster_entry["voice_profile"] = voice_profile
if voice_formula:
roster_entry["voice_formula"] = voice_formula
roster_entry["resolved_voice"] = voice_formula
if not voice_formula and not voice_profile and resolved_voice:
roster_entry["resolved_voice"] = resolved_voice
roster_entry["config_languages"] = usable_languages or []
if chosen_voice:
used_voices.add(chosen_voice)
# persist updates back to config payload if required
if persist_changes:
slug = config_entry.get("id") or slugify_label(label)
speakers_payload[slug] = {
"id": slug,
"label": label,
"gender": config_entry.get("gender", "unknown"),
"voice": voice_id,
"voice_profile": voice_profile,
"voice_formula": voice_formula,
"resolved_voice": roster_entry.get("resolved_voice", resolved_voice or voice_id),
"languages": usable_languages,
}
new_config = new_config_payload if (persist_changes and config_changed) else None
return updated_roster, allowed_languages, new_config
def filter_voice_catalog(
catalog: Iterable[Mapping[str, Any]],
*,
gender: str,
allowed_languages: Optional[Iterable[str]] = None,
) -> List[str]:
allowed_set = {code.lower() for code in (allowed_languages or []) if isinstance(code, str) and code}
gender_normalized = (gender or "unknown").lower()
gender_code = ""
if gender_normalized == "male":
gender_code = "m"
elif gender_normalized == "female":
gender_code = "f"
matches: List[str] = []
seen: set[str] = set()
def _consider(entry: Mapping[str, Any]) -> None:
voice_id = entry.get("id")
if not isinstance(voice_id, str) or not voice_id:
return
if voice_id in seen:
return
seen.add(voice_id)
matches.append(voice_id)
primary: List[Mapping[str, Any]] = []
fallback: List[Mapping[str, Any]] = []
for entry in catalog:
if not isinstance(entry, Mapping):
continue
voice_lang = str(entry.get("language", "")).lower()
voice_gender_code = str(entry.get("gender_code", "")).lower()
if allowed_set and voice_lang not in allowed_set:
continue
if gender_code and voice_gender_code != gender_code:
fallback.append(entry)
continue
primary.append(entry)
for entry in primary:
_consider(entry)
if not matches:
for entry in fallback:
_consider(entry)
if not matches:
for entry in catalog:
if isinstance(entry, Mapping):
_consider(entry)
return matches
def build_voice_catalog() -> List[Dict[str, str]]:
catalog: List[Dict[str, str]] = []
gender_map = {"f": "Female", "m": "Male"}
for voice_id in VOICES_INTERNAL:
prefix, _, rest = voice_id.partition("_")
language_code = prefix[0] if prefix else "a"
gender_code = prefix[1] if len(prefix) > 1 else ""
catalog.append(
{
"id": voice_id,
"language": language_code,
"language_label": LANGUAGE_DESCRIPTIONS.get(language_code, language_code.upper()),
"gender": gender_map.get(gender_code, "Unknown"),
"gender_code": gender_code,
"display_name": rest.replace("_", " ").title() if rest else voice_id,
}
)
return catalog
def inject_recommended_voices(
roster: Mapping[str, Any],
*,
fallback_languages: Optional[Iterable[str]] = None,
) -> None:
voice_catalog = build_voice_catalog()
fallback_list = [code for code in (fallback_languages or []) if isinstance(code, str) and code]
for speaker_id, payload in roster.items():
if not isinstance(payload, dict):
continue
languages = payload.get("config_languages")
if isinstance(languages, list) and languages:
language_list = languages
else:
language_list = fallback_list
gender = str(payload.get("gender", "unknown"))
payload["recommended_voices"] = filter_voice_catalog(
voice_catalog,
gender=gender,
allowed_languages=language_list,
)
def extract_speaker_config_form(form: Mapping[str, Any]) -> Tuple[str, Dict[str, Any], List[str]]:
getter = getattr(form, "getlist", None)
def _get_list(name: str) -> List[str]:
if callable(getter):
values = cast(Iterable[Any], getter(name))
return [str(value).strip() for value in values if value]
raw_value = form.get(name)
if isinstance(raw_value, str):
return [item.strip() for item in raw_value.split(",") if item.strip()]
return []
name = (form.get("config_name") or "").strip()
language = str(form.get("config_language") or "a").strip() or "a"
allowed_languages = []
default_voice = (form.get("config_default_voice") or "").strip()
notes = (form.get("config_notes") or "").strip()
try:
parsed = int(form.get("config_version") or 1)
version = max(1, min(parsed, 9999))
except (TypeError, ValueError):
version = 1
speaker_rows = _get_list("speaker_rows")
speakers: Dict[str, Dict[str, Any]] = {}
for row_key in speaker_rows:
prefix = f"speaker-{row_key}-"
label = (form.get(prefix + "label") or "").strip()
if not label:
continue
raw_gender = (form.get(prefix + "gender") or "unknown").strip().lower()
gender = raw_gender if raw_gender in {"male", "female", "unknown"} else "unknown"
voice = (form.get(prefix + "voice") or "").strip()
voice_profile = (form.get(prefix + "profile") or "").strip()
voice_formula = (form.get(prefix + "formula") or "").strip()
speaker_id = (form.get(prefix + "id") or "").strip() or slugify_label(label)
speakers[speaker_id] = {
"id": speaker_id,
"label": label,
"gender": gender,
"voice": voice,
"voice_profile": voice_profile,
"voice_formula": voice_formula,
"resolved_voice": voice_formula or voice,
"languages": [],
}
payload = {
"language": language,
"languages": allowed_languages,
"default_voice": default_voice,
"speakers": speakers,
"notes": notes,
"version": version,
}
errors: List[str] = []
if not name:
errors.append("Configuration name is required.")
if not speakers:
errors.append("Add at least one speaker to the configuration.")
return name, payload, errors
def prepare_speaker_metadata(
*,
chapters: List[Dict[str, Any]],
chunks: List[Dict[str, Any]],
analysis_chunks: Optional[List[Dict[str, Any]]] = None,
voice: str,
voice_profile: Optional[str],
threshold: int,
existing_roster: Optional[Mapping[str, Any]] = None,
run_analysis: bool = True,
speaker_config: Optional[Mapping[str, Any]] = None,
apply_config: bool = False,
persist_config: bool = False,
) -> tuple[List[Dict[str, Any]], Dict[str, Any], Dict[str, Any], List[str], Optional[Dict[str, Any]]]:
chunk_list = [dict(chunk) for chunk in chunks]
analysis_source = [dict(chunk) for chunk in (analysis_chunks or chunks)]
threshold_value = max(1, int(threshold))
analysis_enabled = run_analysis
settings_state = load_settings()
global_random_languages = [
code
for code in settings_state.get("speaker_random_languages", [])
if isinstance(code, str) and code
]
if not analysis_enabled:
for chunk in chunk_list:
chunk["speaker_id"] = "narrator"
chunk["speaker_label"] = "Narrator"
analysis_payload = {
"version": "1.0",
"narrator": "narrator",
"assignments": {str(chunk.get("id")): "narrator" for chunk in chunk_list},
"speakers": {
"narrator": {
"id": "narrator",
"label": "Narrator",
"count": len(chunk_list),
"confidence": "low",
"sample_quotes": [],
"suppressed": False,
}
},
"suppressed": [],
"stats": {
"total_chunks": len(chunk_list),
"explicit_chunks": 0,
"active_speakers": 0,
"unique_speakers": 1,
"suppressed": 0,
},
}
roster = build_narrator_roster(voice, voice_profile, existing_roster)
narrator_pron = roster["narrator"].get("pronunciation")
if narrator_pron:
analysis_payload["speakers"]["narrator"]["pronunciation"] = narrator_pron
return chunk_list, roster, analysis_payload, [], None
analysis_result = analyze_speakers(
chapters,
analysis_source,
threshold=threshold_value,
max_speakers=0,
)
analysis_payload = analysis_result.to_dict()
speakers_payload = analysis_payload.get("speakers", {})
ordered_ids = [
sid
for sid, meta in sorted(
(
(sid, meta)
for sid, meta in speakers_payload.items()
if sid != "narrator" and isinstance(meta, Mapping) and not meta.get("suppressed")
),
key=lambda item: item[1].get("count", 0),
reverse=True,
)
]
analysis_payload["ordered_speakers"] = ordered_ids
assignments = analysis_payload.get("assignments", {})
suppressed_ids = analysis_payload.get("suppressed", [])
suppressed_details: List[Dict[str, Any]] = []
speakers_payload = analysis_payload.get("speakers", {})
if isinstance(suppressed_ids, Iterable):
for suppressed_id in suppressed_ids:
speaker_meta = speakers_payload.get(suppressed_id) if isinstance(speakers_payload, dict) else None
if isinstance(speaker_meta, dict):
suppressed_details.append(
{
"id": suppressed_id,
"label": speaker_meta.get("label")
or str(suppressed_id).replace("_", " ").title(),
"pronunciation": speaker_meta.get("pronunciation"),
}
)
else:
suppressed_details.append(
{
"id": suppressed_id,
"label": str(suppressed_id).replace("_", " ").title(),
"pronunciation": None,
}
)
analysis_payload["suppressed_details"] = suppressed_details
roster = build_speaker_roster(
analysis_payload,
voice,
voice_profile,
existing=existing_roster,
order=analysis_payload.get("ordered_speakers"),
)
applied_languages: List[str] = []
updated_config: Optional[Dict[str, Any]] = None
if apply_config and speaker_config:
roster, applied_languages, updated_config = apply_speaker_config_to_roster(
roster,
speaker_config,
persist_changes=persist_config,
fallback_languages=global_random_languages,
)
speakers_payload = analysis_payload.get("speakers")
if isinstance(speakers_payload, dict):
for roster_id, roster_payload in roster.items():
speaker_meta = speakers_payload.get(roster_id)
if isinstance(speaker_meta, dict):
for key in ("voice", "voice_profile", "voice_formula", "resolved_voice"):
value = roster_payload.get(key)
if value:
speaker_meta[key] = value
effective_languages: List[str] = []
if applied_languages:
effective_languages = applied_languages
elif isinstance(analysis_payload.get("config_languages"), list):
effective_languages = [
code for code in analysis_payload.get("config_languages", []) if isinstance(code, str) and code
]
elif global_random_languages:
effective_languages = list(global_random_languages)
if effective_languages:
analysis_payload["config_languages"] = effective_languages
speakers_payload = analysis_payload.get("speakers")
if isinstance(speakers_payload, dict):
for roster_id, roster_payload in roster.items():
if roster_id in speakers_payload and isinstance(roster_payload, dict):
pronunciation_value = roster_payload.get("pronunciation")
if pronunciation_value:
speakers_payload[roster_id]["pronunciation"] = pronunciation_value
fallback_languages = effective_languages or []
inject_recommended_voices(roster, fallback_languages=fallback_languages)
for chunk in chunk_list:
chunk_id = str(chunk.get("id"))
speaker_id = assignments.get(chunk_id, "narrator")
chunk["speaker_id"] = speaker_id
speaker_meta = roster.get(speaker_id)
chunk["speaker_label"] = speaker_meta.get("label") if isinstance(speaker_meta, dict) else speaker_id
return chunk_list, roster, analysis_payload, applied_languages, updated_config
def formula_from_profile(entry: Dict[str, Any]) -> Optional[str]:
voices = entry.get("voices") or []
if not voices:
return None
total = sum(weight for _, weight in voices)
if total <= 0:
return None
def _format_weight(value: float) -> str:
normalized = value / total if total else 0.0
return (f"{normalized:.4f}").rstrip("0").rstrip(".") or "0"
parts = [f"{name}*{_format_weight(weight)}" for name, weight in voices if weight > 0]
return "+".join(parts) if parts else None
def template_options() -> Dict[str, Any]:
current_settings = load_settings()
profiles = serialize_profiles()
ordered_profiles = sorted(profiles.items())
profile_options = []
for name, entry in ordered_profiles:
provider = str((entry or {}).get("provider") or "kokoro").strip().lower()
profile_options.append(
{
"name": name,
"language": (entry or {}).get("language", ""),
"provider": provider,
"formula": formula_from_profile(entry or {}) or "",
"voice": (entry or {}).get("voice", ""),
"total_steps": (entry or {}).get("total_steps"),
"speed": (entry or {}).get("speed"),
}
)
voice_catalog = build_voice_catalog()
return {
"languages": LANGUAGE_DESCRIPTIONS,
"voices": VOICES_INTERNAL,
"subtitle_formats": SUBTITLE_FORMATS,
"supported_langs_for_subs": SUPPORTED_LANGUAGES_FOR_SUBTITLE_GENERATION,
"output_formats": SUPPORTED_SOUND_FORMATS,
"voice_profiles": ordered_profiles,
"voice_profile_options": profile_options,
"separate_formats": ["wav", "flac", "mp3", "opus"],
"voice_catalog": voice_catalog,
"voice_catalog_map": {entry["id"]: entry for entry in voice_catalog},
"sample_voice_texts": SAMPLE_VOICE_TEXTS,
"voice_profiles_data": profiles,
"speaker_configs": list_configs(),
"chunk_levels": _CHUNK_LEVEL_OPTIONS,
"speaker_analysis_threshold": current_settings.get(
"speaker_analysis_threshold", _DEFAULT_ANALYSIS_THRESHOLD
),
"speaker_pronunciation_sentence": current_settings.get(
"speaker_pronunciation_sentence", settings_defaults()["speaker_pronunciation_sentence"]
),
"apostrophe_modes": _APOSTROPHE_MODE_OPTIONS,
"normalization_groups": _NORMALIZATION_GROUPS,
}
def resolve_profile_voice(
profile_name: Optional[str],
*,
profiles: Optional[Mapping[str, Any]] = None,
) -> tuple[str, Optional[str]]:
if not profile_name:
return "", None
source = profiles if isinstance(profiles, Mapping) else None
if source is None:
source = load_profiles()
entry = source.get(profile_name) if isinstance(source, Mapping) else None
if not isinstance(entry, Mapping):
return "", None
formula = formula_from_profile(dict(entry)) or ""
language = entry.get("language") if isinstance(entry.get("language"), str) else None
if isinstance(language, str):
language = language.strip().lower() or None
return formula, language
def resolve_voice_setting(
value: Any,
*,
profiles: Optional[Mapping[str, Any]] = None,
) -> tuple[str, Optional[str], Optional[str]]:
base_spec, profile_name = split_profile_spec(value)
if profile_name:
formula, language = resolve_profile_voice(profile_name, profiles=profiles)
return formula or "", profile_name, language
return base_spec, None, None
def resolve_voice_choice(
language: str,
base_voice: str,
profile_name: str,
custom_formula: str,
profiles: Dict[str, Any],
) -> tuple[str, str, Optional[str]]:
resolved_voice = base_voice
resolved_language = language
selected_profile = None
if profile_name:
from abogen.voice_profiles import normalize_profile_entry
entry_raw = profiles.get(profile_name)
entry = normalize_profile_entry(entry_raw)
provider = str((entry or {}).get("provider") or "").strip().lower()
# Provider-aware behavior:
# - Kokoro profiles typically represent mixes (formula strings).
# - SuperTonic profiles represent a discrete voice id + settings.
# In that case, we return a speaker reference so downstream can
# resolve provider per-speaker and allow mixed-provider casting.
if provider == "supertonic":
resolved_voice = f"speaker:{profile_name}"
selected_profile = profile_name
profile_language = (entry or {}).get("language")
if profile_language:
resolved_language = str(profile_language)
else:
formula = formula_from_profile(entry or {}) if entry else None
if formula:
resolved_voice = formula
selected_profile = profile_name
profile_language = (entry or {}).get("language")
if profile_language:
resolved_language = profile_language
if custom_formula:
resolved_voice = custom_formula
selected_profile = None
return resolved_voice, resolved_language, selected_profile
def parse_voice_formula(formula: str) -> List[tuple[str, float]]:
voices = parse_formula_terms(formula)
total = sum(weight for _, weight in voices)
if total <= 0:
raise ValueError("Voice weights must sum to a positive value")
return voices
def sanitize_voice_entries(entries: Iterable[Any]) -> List[Dict[str, Any]]:
sanitized: List[Dict[str, Any]] = []
for entry in entries or []:
if isinstance(entry, dict):
voice_id = entry.get("id") or entry.get("voice")
if not voice_id:
continue
enabled = entry.get("enabled", True)
if not enabled:
continue
sanitized.append({"voice": voice_id, "weight": entry.get("weight")})
elif isinstance(entry, (list, tuple)) and len(entry) >= 2:
sanitized.append({"voice": entry[0], "weight": entry[1]})
return sanitized
def pairs_to_formula(pairs: Iterable[Tuple[str, float]]) -> Optional[str]:
voices = [(voice, float(weight)) for voice, weight in pairs if float(weight) > 0]
if not voices:
return None
total = sum(weight for _, weight in voices)
if total <= 0:
return None
def _format_value(value: float) -> str:
normalized = value / total if total else 0.0
return (f"{normalized:.4f}").rstrip("0").rstrip(".") or "0"
parts = [f"{voice}*{_format_value(weight)}" for voice, weight in voices]
return "+".join(parts)
def profiles_payload() -> Dict[str, Any]:
return {"profiles": serialize_profiles()}
def get_preview_pipeline(language: str, device: str):
key = (language, device)
with _preview_pipeline_lock:
pipeline = _preview_pipelines.get(key)
if pipeline is not None:
return pipeline
_, KPipeline = load_numpy_kpipeline()
pipeline = KPipeline(lang_code=language, repo_id="hexgrad/Kokoro-82M", device=device)
_preview_pipelines[key] = pipeline
return pipeline
def synthesize_audio_from_normalized(
*,
normalized_text: str,
voice_spec: str,
language: str,
speed: float,
use_gpu: bool,
max_seconds: float,
) -> np.ndarray:
if not normalized_text.strip():
raise ValueError("Preview text is required")
device = "cpu"
if use_gpu:
try:
device = _select_device()
except Exception:
device = "cpu"
use_gpu = False
pipeline = get_preview_pipeline(language, device)
if pipeline is None:
raise RuntimeError("Preview pipeline is unavailable")
voice_choice: Any = voice_spec
if voice_spec and "*" in voice_spec:
voice_choice = get_new_voice(pipeline, voice_spec, use_gpu)
segments = pipeline(
normalized_text,
voice=voice_choice,
speed=speed,
split_pattern=SPLIT_PATTERN,
)
audio_chunks: List[np.ndarray] = []
accumulated = 0
max_samples = int(max(1.0, max_seconds) * SAMPLE_RATE)
for segment in segments:
graphemes = getattr(segment, "graphemes", "").strip()
if not graphemes:
continue
audio = _to_float32(getattr(segment, "audio", None))
if audio.size == 0:
continue
remaining = max_samples - accumulated
if remaining <= 0:
break
if audio.shape[0] > remaining:
audio = audio[:remaining]
audio_chunks.append(audio)
accumulated += audio.shape[0]
if accumulated >= max_samples:
break
if not audio_chunks:
raise RuntimeError("Preview could not be generated")
return np.concatenate(audio_chunks)
from typing import Any, Dict, List, Optional
from flask import Blueprint, render_template, request, jsonify, abort, flash, redirect, url_for
from flask.typing import ResponseReturnValue
from abogen.webui.routes.utils.voice import (
template_options,
resolve_voice_setting,
resolve_voice_choice,
parse_voice_formula,
)
from abogen.webui.routes.utils.settings import load_settings, coerce_bool
from abogen.webui.routes.utils.preview import synthesize_preview
from abogen.speaker_configs import (
list_configs,
get_config,
load_configs,
save_configs,
delete_config,
)
from abogen.constants import VOICES_INTERNAL
voices_bp = Blueprint("voices", __name__)
@voices_bp.get("/")
def voice_profiles() -> ResponseReturnValue:
return render_template("voices.html", options=template_options())
@voices_bp.post("/test")
def test_voice() -> ResponseReturnValue:
text = (request.form.get("text") or "").strip()
voice = (request.form.get("voice") or "").strip()
speed = float(request.form.get("speed", 1.0))
# This seems to be the form-based preview
settings = load_settings()
use_gpu = coerce_bool(settings.get("use_gpu"), True)
try:
return synthesize_preview(
text=text,
voice_spec=voice,
language="a", # Default language
speed=speed,
use_gpu=use_gpu,
)
except Exception as e:
abort(400, str(e))
@voices_bp.get("/configs")
def speaker_configs() -> ResponseReturnValue:
return jsonify({"configs": list_configs()})
@voices_bp.post("/configs/save")
def save_speaker_config() -> ResponseReturnValue:
payload = request.get_json(force=True)
name = (payload.get("name") or "").strip()
config = payload.get("config")
if not name:
abort(400, "Config name is required")
if not config:
abort(400, "Config data is required")
configs = load_configs()
configs[name] = config
save_configs(configs)
return jsonify({"status": "saved", "configs": list_configs()})
@voices_bp.post("/configs/delete")
def delete_speaker_config() -> ResponseReturnValue:
payload = request.get_json(force=True)
name = (payload.get("name") or "").strip()
if not name:
abort(400, "Config name is required")
delete_config(name)
return jsonify({"status": "deleted", "configs": list_configs()})
@voices_bp.route("/presets", methods=["GET", "POST"])
def speaker_configs_page() -> ResponseReturnValue:
configs = load_configs()
editing_name = request.args.get("config")
message = None
error = None
if request.method == "POST":
try:
name = request.form.get("config_name", "").strip()
if not name:
raise ValueError("Preset name is required")
language = request.form.get("config_language", "en")
speakers = []
row_keys = request.form.getlist("speaker_rows")
for key in row_keys:
s_id = request.form.get(f"speaker-{key}-id", key)
label = request.form.get(f"speaker-{key}-label", "")
gender = request.form.get(f"speaker-{key}-gender", "unknown")
voice = request.form.get(f"speaker-{key}-voice", "")
if label:
speakers.append({
"id": s_id,
"label": label,
"gender": gender,
"voice": voice or None
})
config = {
"name": name,
"language": language,
"speakers": speakers,
"version": 1
}
configs[name] = config
save_configs(configs)
message = f"Preset '{name}' saved."
editing_name = name
except Exception as e:
error = str(e)
editing = configs.get(editing_name, {}) if editing_name else {}
return render_template(
"speakers.html",
options=template_options(),
configs=configs.values(),
editing_name=editing_name,
editing=editing,
message=message,
error=error
)
@voices_bp.post("/presets/<name>/delete")
def delete_speaker_config_named(name: str) -> ResponseReturnValue:
delete_config(name)
return redirect(url_for("voices.speaker_configs_page"))

Sorry, the diff of this file is too big to display

#!/usr/bin/env python3
"""Build PyPI package (wheel and sdist) to `dist` folder for abogen."""
import subprocess
import os
import shutil
import tempfile
def main():
script_dir = os.path.dirname(os.path.abspath(__file__))
output_dir = os.path.join(script_dir, "dist")
print("🔧 abogen PyPI Package Builder")
print("=" * 40)
print(f"📁 Script directory: {script_dir}")
print(f"📦 Output directory: {output_dir}")
# Try to print package version if present
version = None
version_file = os.path.join(script_dir, "abogen", "VERSION")
if os.path.isfile(version_file):
try:
with open(version_file, "r", encoding="utf-8") as vf:
version = vf.read().strip()
except Exception:
version = None
if version:
print(f"🔖 Package version: {version}")
# Check if build module is installed, install if not
# Temporarily remove script_dir from sys.path to avoid importing local build.py
import sys
original_path = sys.path[:]
try:
sys.path = [p for p in sys.path if os.path.abspath(p) != script_dir]
import build
except ImportError:
print("📦 Installing build module...")
subprocess.run([sys.executable, "-m", "pip", "install", "build"], check=True)
finally:
sys.path = original_path
# Create output directory
print(f"📂 Preparing output directory: {output_dir}")
if os.path.exists(output_dir):
shutil.rmtree(output_dir)
os.makedirs(output_dir, exist_ok=True)
print("🏗️ Building PyPI package...")
print(" Using temporary directory to avoid module conflicts...")
# Run from temp directory to avoid local build.py shadowing the build module
with tempfile.TemporaryDirectory() as tmpdir:
print(f" Temp directory: {tmpdir}")
print(" Running: python -m build -o <output_dir> <source_dir>")
result = subprocess.run(
[sys.executable, "-m", "build", "-o", output_dir, script_dir],
check=False,
cwd=tmpdir,
)
print("\n" + "=" * 40)
if result.returncode == 0:
print("✅ Build successful!")
print(f"📦 Files created in {output_dir}:")
files = os.listdir(output_dir)
if files:
for f in files:
file_path = os.path.join(output_dir, f)
size = os.path.getsize(file_path)
print(f" 📄 {f} ({size:,} bytes)")
else:
print(" (No files found)")
print("\n🚀 Ready for upload with:\n")
print(" - To test on Test PyPI:")
print(f" python -m twine upload --repository testpypi {output_dir}/*")
print("\n - To upload to PyPI (when ready):")
print(f" python -m twine upload {output_dir}/*")
else:
print("❌ Build failed!")
print(f" Exit code: {result.returncode}")
sys.exit(result.returncode)
if __name__ == "__main__":
main()
# 1.3.0
- Special thanks to [@jeremiahsb](https://github.com/jeremiahsb) for his [massive contribution](https://github.com/denizsafak/abogen/pull/120) (>55k lines!) that brought the Web UI, EPUB 3 pipeline, and core architectural improvements to life.
- Added an EPUB 3 packaging pipeline that builds media-overlay EPUBs from generated audio and chunk metadata.
- Persisted chunk timing metadata in job artifacts and exercised the exporter with automated tests.
- Added Flask-based Web UI (`abogen-web`) for Docker and headless server deployments.
- Reorganized codebase to support both PyQt6 desktop GUI and Web UI from a shared core.
- Added Supertonic TTS engine support with GPU acceleration.
- Added entity analysis and pronunciation override system for proper nouns.
- Added speaker/role assignment for multi-voice "theatrical" audiobooks.
- Added Calibre OPDS and Audiobookshelf integration.
# 1.2.5
- Added new option: `Override item settings with current selection` in the queue manager. When enabled, all items in the queue will be processed using the current global settings selected in the main GUI, overriding their individual settings. When disabled, each item will retain its own specific settings.
- Fixed `Error "Could not load the Qt platform plugin "xcb"` error that occurred in some Linux distributions due to missing `libxcb-cursor0` library by conditionally loading the bundled library when the system version is unavailable, issue mentioned by @bmcgonag in #101.
- Fixed the `No module named pip` error that occurred for users who installed Abogen via the [**uv**](https://github.com/astral-sh/uv) installer.
- Fixed defaults for `replace_single_newlines` not being applied correctly in some cases.
- Fixed `Save chapters separately for queued epubs is ignored`, issue mentioned by @dymas-cz in #109.
- Fixed incorrect sentence segmentation when using spaCy, where text would erroneously split after opening parentheses.
- Improvements in code and documentation.
# 1.2.4
- **Subtitle generation is now available for all languages!** Abogen now supports subtitle generation for non-English languages using audio duration-based timing. Available modes include `Line`, `Sentence`, and `Sentence + Comma`. (Note: Word-level subtitle modes remain English-only due to Kokoro's timestamp token limitations.)
- New option: **"Use spaCy for sentence segmentation"** You can now use [spaCy](https://spacy.io/) to automatically detect sentence boundaries and produce cleaner, more readable subtitles. Quick summary:
- **What it does:** Splits text into natural sentences so subtitle entries read better and align more naturally with speech.
- **Why this helps:** The previous punctuation-based splitting could break sentences incorrectly at common abbreviations (e.g. "Mr.", "Dr.", "Prof.") or initials, producing wrong subtitle breaks. spaCy avoids those false splits by using linguistic rules to detect real sentence boundaries.
- **For Non-English:** spaCy runs **before** audio generation to create better sentence chunks for TTS.
- **For English:** spaCy runs **during** subtitle generation to find accurate sentence breaks after TTS.
- **Note:** spaCy segmentation is only applied when subtitle mode is `Sentence` or `Sentence + Comma`. When turned off, it falls back to simple punctuation-based splitting.
- New option: **Pre-download models and voices for offline use** You can now pre-download all required Kokoro models, voices, and spaCy language models using this option in the settings menu. Allowing you to use Abogen completely offline without any internet connection.
- Added support for `.` separator in timestamps (e.g. `HH:MM:SS.ms`) for timestamp-based text files.
- Optimized regex compilation and eliminated busy-wait loops.
- Possibly fixed `Silent truncation of long paragraphs` issue mentioned in [#91](https://github.com/denizsafak/abogen/issues/91) by [@xklzlxr](https://github.com/xklzlxr)
- Fixed unused regex patterns and variable naming conventions.
- Improvements in code and documentation.
# 1.2.3
- Same as 1.2.2, re-released to fix an issue with subtitle timing when using timestamp-based text files.
# 1.2.2
- **You can now voice your subtitle files!** Simply add `.srt`, `.ass` or `.vtt` files to generate timed audio. Alternatively, add a text file with timestamps in `HH:MM:SS` or `HH:MM:SS,ms` format to generate audio that matches the timestamps. See [here](https://github.com/denizsafak/abogen?tab=readme-ov-file#about-timestamp-based-text-files) for detailed instructions.
- New option: **"Use silent gaps between subtitles"**: Prevents unnecessary audio speed-up by letting speech continue into the silent gaps between subtitles.
- New option: **"Subtitle speed adjustment method"**: Choose how to speed up audio when needed:
- **TTS Regeneration (better quality):** Re-generates the audio at a faster speed for more natural sound.
- **FFmpeg Time-stretch (better speed):** Quickly speeds up the generated audio.
- Added support for embedding cover images in M4B files. Abogen now automatically extracts cover images from EPUB and PDF files. You can also manually specify a cover image using the `<<METADATA_COVER_PATH:path>>` tag in your text file. (To prevent MPV from showing the cover image, you can add `audio-display=no` to your MPV config file.)
- Fixed `[WinError 1114] A dynamic link library (DLL) initialization routine failed` error on Windows, pre-loading PyTorch DLLs before initializing PyQt6 to avoid DLL initialization errors, mentioned in #98 by @ephr0n.
- Potential fix for `CUDA GPU is not available` issue, by ensuring PyTorch is installed correctly with CUDA support on Windows using the installer script.
- Improvements in code and documentation.
# 1.2.1
- Upgraded Abogen's interface from PyQt5 to PyQt6 for better compatibility and long-term support.
- Added tooltip indicators in queue manager to display book handler options (`Save chapters separately` and `Merge chapters at the end`) for queued items.
- Added `Open processed file` and `Open input file` options for items in the queue manager, instead of just `Open file` option.
- Added loading gif animation to book handler window.
- Fixed light theme slider colors in voice mixer for better visibility (for non-Windows users).
- Fixed subtitle word-count splitting logic for more accurate segmentation.
- Improvements in code and documentation.
# 1.2.0
- Added `Line` option to subtitle generation modes, allowing subtitles to be generated based on line breaks in the text, by @mleg in #94.
- Added a loading indicator to the book handler window for better user experience during book preprocessing.
- Fixed `cannot access local variable 'is_narrow'` error when subtitle format `SRT` was selected, mentioned by @Kinasa0096 in #88.
- Fixed folder and filename sanitization to properly handle OS-specific illegal characters (Windows, Linux, macOS), ensuring compatibility across all platforms when creating chapter folders and files.
- Fixed `/` and `\` path display by normalizing paths.
- Fixed book reprocessing issue where books were being processed every time the chapters window was opened, improving performance when reopening the same book.
- Fixed taskbar icon not appearing correctly in Windows.
- Fixed "Go to folder" button not opening the chapter output directory when only separate chapters were generated.
- Improvements in code and documentation.
# 1.1.9
- Fixed the issue where spaces were deleted before punctuation marks while generating subtitles.
- Fixed markdown TOC generation breaks when "Replace single newlines" is enabled.
- Improvements in code and documentation.
# 1.1.8
- Added `.md` (Markdown) file extension support by @brianxiadong in PR #75
- Added new option `Configure silence between chapters` that lets you configure the silence between chapters, mentioned by @lfperez1982 in #79
- Better indicators and options while displaying and managing the input and processing files.
- Improved the markdown logic to better handle various markdown structures and cases.
- Fixed subtitle splitting before commas by combining punctuation with preceding words.
- Fixed save options not working correctly in queue mode, mentioned by @jborza in #78
- Fixed `No Qt platform plugin could be initialized` error, mentioned by @sunrainxyz in #59
- Fixed ordered list numbers not being included in EPUB content conversion. The numbers are now properly included in the converted content, mentioned by @jefro108 in #47
- Potentially fixed subtitle generation stucks at 9:59:59, mentioned by @bolaykim in #73
- Improvements in code and documentation.
# 1.1.7
- Added MPS GPU acceleration support for Silicon Mac, mentioned in https://github.com/denizsafak/abogen/issues/32#issuecomment-3155902040 by @jefro108. **Please read the [Mac](https://github.com/denizsafak/abogen?tab=readme-ov-file#mac) section in the documentation again, as it requires additional configuration.**
- Added word-by-word karaoke highlighting feature by @robmckinnon in PR #65
- Fixed sleep inhibition error occurring on some Linux systems that do not use systemd, mentioned in #67 by @hendrack
- Improvements in code and documentation.
# 1.1.6
- Improved EPUB chapter detection: Now reliably detects chapters from NAV HTML (TOC) files, even in non-standard EPUBs, fixes the issue mentioned by @jefro108 in #33
- Fixed SRT subtitle numbering issue, mentioned by @page-muncher in #41
- Fixed missing chapter contents issue in some EPUB files.
- Windows installer script now prompts the user to install the CUDA version of PyTorch even if no NVIDIA GPU is detected.
- Abogen now includes Mandarin Chinese (misaki[zh]) by default; manual installation is no longer required.
# 1.1.5
- Changed the temporary directory path to user's cache directory, which is more appropriate for storing cache files and avoids issues with unintended cleanup.
- Fixed the isssue where extra metadata information was not being saved to M4B files when they have no chapters, ensuring that all metadata is correctly written to the output file.
- Fixed sleep prevention process not ending if program exited using Ctrl+C or kill.
- Improved automatic filename suffixing to better prevent overwriting files with the same name, even if they have different extensions.
- Improvements in code and documentation.
# 1.1.4
- Fixed extra metadata information not being saved to M4B files, ensuring that all metadata is correctly written to the output file.
- Reformatted the code using Black for better readability and consistency.
# 1.1.3
- `M4B (with chapters)` generation is faster now, as it directly generates `m4b` files instead of converting from `wav`, which significantly reduces processing time, fixes the issue mentioned by @Milor123 in #39
- Better sleep state handling for Linux.
- The app window now tries to fit the screen if its height would exceed the available display area.
- Fixed issue where the app would not restart properly on Windows.
- Fixed last sentence/subtitle entry timing in generated subtitles, the end time of the final subtitle entry now correctly matches the end of the audio chunk, preventing zero or invalid timings at the end.
# v1.1.2
- Now you can play the audio files while they are processing.
- Audio and subtitle files are now written directly to disk during generation, which significantly reduces memory usage.
- Added a better logic for detecting chapters from the epub, mentioned by @jefro108 in #33
- Added a new option: `Reset to default settings`, allowing users to reset all settings to their default values.
- Added a new option: `Disable Kokoro's internet access`. This lets you prevent Kokoro from downloading models or voices from HuggingFace Hub, which can help avoid long waiting times if your computer is offline.
- HuggingFace Hub telemetry is now disabled by default for improved privacy. (HuggingFace Hub is used by Kokoro to download its models)
- cPotential fix for #37 and #38, where the program was becoming slow while processing large files.
- Fixed `Open folder` and `Open file` buttons in the queue manager GUI.
- Improvements in code structure.
# v1.1.1
- Fixed adding wrong file in queue for EPUB and PDF files, ensuring the correct file is added to the queue.
- Reformatted the code using Black.
# v1.1.0
- Added queue system for processing multiple items, allowing users to add multiple files and process them in a queue, mentioned by @jborza in #30 (Special thanks to @jborza for implementing this feature in PR #35)
- Added a feature that allows selecting multiple items in book handler (in right click menu) by @jborza in #31, that fixes #28
- Added dark theme support, allowing users to switch between light and dark themes in the settings.
- Added auto-accept system to the chapter options dialog in conversion process, allowing the dialog to auto-accept after a certain time if no action is taken.
- Added new option: `Configure max lines in log window` that allows configuring the maximum number of lines to display in the log window.
- Improvements in documentation and code.
# v1.0.9
- Added chunking/segmenting system that fixes memory outage issues when processing large audio files.
- Added new option: `Subtitle format`, allowing users to choose between `srt` , `ass (wide)`, `ass (narrow)`, and `ass (centered wide)` and `ass (centered narrow)`
- Improved chapter filename generation with smart word-boundary truncation at 80 characters, preventing mid-word cuts in filenames.
- `Composer` and `Genre` metadata fields for M4B files are now editable from the text editor.
- Improvements in documentation and code.
# v1.0.8
- Added support for AMD GPUs in Linux (Special thanks to @hg000125 for his contribution in #23)
- Added voice preview caching system that stores generated previews in the cache folder, mentioned by @jborza in #22
- Added extra metadata support for chaptered M4B files, ensuring better compatibility with audiobook players.
- Added new option: `Separate chapters audio format`, allowing to choose between `wav`, `mp4`, `flac` and `opus` formats for chaptered audio files.
- Added a download tracker that displays informative messages while downloading Kokoro models or voices from HuggingFace.
- Skipping PyTorch CUDA installation if GPU is not NVIDIA in WINDOWS_INSTALL.bat script, preventing unnecessary installation of PyTorch.
- Removed `abogen_` prefix that was adding to converted books in temp directory.
- Fixed voice preview player keeps playing silently at the background after preview ends.
- Fixed not writing separate chapters audio when output is OPUS.
- Improved input box background color handling, fixed display issues in Linux.
- Updated profile and voice mixer icons, better visibility and aesthetics in voice mixer.
- Better sleep state handling for Linux.
- Improvements in documentation and code.
# v1.0.7
- Improve chaptered audio generation by outputting directly as `m4b` instead of converting from `wav`.
- Ignore chapter markers and single newlines when calculating text length, improving the accuracy of the text length calculation.
- Prevent cancellation if process is at 99%, ensuring the process is not interrupted at the last moment.
- Improved process handling for subpprocess calls, ensuring better management of subprocesses.
- Improved PDF handling, ignoring empty pages/chapters and better chapter handling.
- Added `Save in a project folder with metadata` option in the book handler, allowing users to save the converted items in a project folder with available metadata files. Useful if you want to work with the converted files in the future, issue mentioned by @Darthagnon in #15
- Added `Go to folder` button in input box, allowing users to open the folder containing the converted file.
- Added `.opus` as output format for generated audio files, which is a more efficient format for audio files.
- Added `Create desktop shortcut and install` option to Linux version, allowing users to create a shortcut and install
- Added "Playing..." indicator for "Preview" button in the voice mixer.
# v1.0.6
- Added `Insert chapter marker` button in text editor to insert chapter markers at the current cursor position.
- Added `Preview` button in voice mixer to preview the voice mix with the selected settings.
- Fixed `f-string: unmatched '['` error in Voice preview, mentioned in #14
- Fixed the issue with the content before first chapter not being included in the output.
- Fixed m4b chapter generation opens CMD window in Windows.
# v1.0.5
- Added new output format: `m4b`, enabling chapter metadata in audiobooks. Special thanks to @jborza for implementing this feature in PR #10.
- Better approach for determining the correct configuration folder for Linux and MacOS, using platformdirs. (Fixes Docker issue #12)
- Improvements in documentation and code.
# v1.0.4
- Merge pull request [#7](https://github.com/denizsafak/abogen/pull/7) by @jborza that improves voice preview and documentation.
- Fixed the issue when a voice is selected, the voice mixer tries to pre-select that voice and ignores existing profiles.
- Fixed the error while renaming the default "New profile" in the voice mixer.
- Fixed subtitle_combo enabling/disabling when a voice in the voice mixer is selected.
- Prevented using special characters in the profile name to avoid conflicts.
- Improved invalid profile handling in the voice mixer.
# v1.0.3
- Added voice mixing, allowing multiple voices to be combined into a single "Mixed Voice", a feature mentioned by @PulsarFTW in #1. Special thanks to @jborza for making this possible through his contributions in #5.
- Added profile system to voice mixer, allowing users to create and manage multiple voice profiles.
- Improvements in the voice mixer, mostly for organizing controls and enhancing user experience.
- Added icons for flags and genders in the GUI, making it easier to identify different options.
- Improved the content and chapter extraction process for EPUB files, ensuring better handling of various structures.
- Switched to platformdirs for determining the correct desktop path, instead of using old methods.
- Fixed preview voices was not using GPU acceleration, which was causing performance issues.
- Improvements in code and documentation.
# v1.0.2
- Enhanced EPUB handling by treating all items in chapter list (including anchors) as chapters, improving navigation and organization for poorly structured books, mentioned by @Darthagnon in #4
- Fixed the issue with some chapters in EPUB files had missing content.
- Fixed the issue with some EPUB files only having one chapter caused the program to ignore the entire book.
- Fixed "utf-8' codec can't decode byte" error, mentioned by @nigelp in #3
- Added "Replace single newlines with spaces" option in the menu. This can be useful for texts that have imaginary line breaks.
- Improvements in code and documentation.
# v1.0.1
- Added abogen-cli command for better troubleshooting and error handling.
- Switched from setuptools to hatchling for packaging.
- Added classifiers to the package metadata.
- Fixed "No module named 'docopt'" and "setuptools.build_meta" import errors while using .bat installer in Windows, mentioned by @nigelp in #2
- Improvements in code and documentation.
# Docker Compose for Abogen Web UI (Flask-based interface)
#
# This configuration runs the web-based Flask UI for Abogen.
# For the Qt desktop UI, see the upstream project's docker configuration.
#
# Usage:
# docker compose -f docker-compose.webui.yml up --build
#
# Or set as default:
# docker compose up --build
#
services:
abogen-webui:
build:
context: .
dockerfile: abogen/webui/Dockerfile
args:
TORCH_INDEX_URL: ${TORCH_INDEX_URL:-https://download.pytorch.org/whl/cu124}
TORCH_VERSION: ${TORCH_VERSION:-}
image: abogen-webui:latest
user: "${ABOGEN_UID:-1000}:${ABOGEN_GID:-1000}"
ports:
- "${ABOGEN_PORT:-8808}:8808"
volumes:
- ${ABOGEN_DATA:-./data}:/data
- ${ABOGEN_SETTINGS_DIR:-./config}:/config
- ${ABOGEN_OUTPUT_DIR:-./storage/output}:/data/outputs
- ${ABOGEN_TEMP_DIR:-./storage/tmp}:/data/cache
environment:
ABOGEN_HOST: 0.0.0.0
ABOGEN_PORT: 8808
ABOGEN_SETTINGS_DIR: "/config"
ABOGEN_UPLOAD_ROOT: /data/uploads
ABOGEN_OUTPUT_DIR: "/data/outputs"
ABOGEN_OUTPUT_ROOT: "/data/outputs"
ABOGEN_TEMP_DIR: "/data/cache"
ABOGEN_VOICE_CACHE_DIR: "/data/voice-cache"
HF_HOME: "/data/huggingface"
HUGGINGFACE_HUB_CACHE: "/data/huggingface/hub"
HOME: "/tmp/abogen-home"
# --- GPU support -----------------------------------------------------
# These settings assume the NVIDIA Container Toolkit is installed.
# Leave them in place for GPU acceleration; comment out the entire block
# below if you are deploying to a CPU-only host.
deploy:
resources:
limits:
cpus: '4.0'
memory: 8G
reservations:
devices:
- capabilities: [gpu]
# driver: nvidia
# count: all
# Runtime flag is only honored by legacy docker-compose (v1) CLI.
# Uncomment if you're still using it:
# runtime: nvidia
restart: unless-stopped
# Docker Compose for Abogen
#
# This configuration runs the Flask-based Web UI for Abogen.
# The Web UI provides a browser-based interface for audiobook generation.
#
# Usage:
# docker compose up --build
#
# Access the web interface at http://localhost:8808
#
# Network modes:
# - Set ABOGEN_NETWORK_MODE=host in .env to use host networking
# (required for accessing LAN resources like Calibre OPDS)
# - Leave unset or use "bridge" for isolated container networking
#
services:
abogen:
build:
context: .
dockerfile: abogen/webui/Dockerfile
args:
TORCH_INDEX_URL: ${TORCH_INDEX_URL:-https://download.pytorch.org/whl/cu126}
TORCH_VERSION: ${TORCH_VERSION:-}
USE_GPU: ${USE_GPU:-true}
image: abogen:latest
user: "${ABOGEN_UID:-1000}:${ABOGEN_GID:-1000}"
network_mode: ${ABOGEN_NETWORK_MODE:-bridge}
ports:
- "${ABOGEN_PORT:-8808}:8808"
volumes:
- ${ABOGEN_DATA:-./data}:/data
- ${ABOGEN_SETTINGS_DIR:-./config}:/config
- ${ABOGEN_OUTPUT_DIR:-./storage/output}:/data/outputs
- ${ABOGEN_TEMP_DIR:-./storage/tmp}:/data/cache
environment:
ABOGEN_HOST: 0.0.0.0
ABOGEN_PORT: 8808
ABOGEN_SETTINGS_DIR: "/config"
ABOGEN_UPLOAD_ROOT: /data/uploads
ABOGEN_OUTPUT_DIR: "/data/outputs"
ABOGEN_OUTPUT_ROOT: "/data/outputs"
ABOGEN_TEMP_DIR: "/data/cache"
ABOGEN_VOICE_CACHE_DIR: "/data/voice-cache"
HF_HOME: "/data/huggingface"
HUGGINGFACE_HUB_CACHE: "/data/huggingface/hub"
HOME: "/tmp/abogen-home"
# --- GPU support -----------------------------------------------------
# These settings assume the NVIDIA Container Toolkit is installed.
# Leave them in place for GPU acceleration; comment out the entire block
# below if you are deploying to a CPU-only host.
deploy:
resources:
limits:
cpus: '4.0'
memory: 8G
reservations:
devices:
- capabilities: [gpu]
# driver: nvidia
# count: all
# Runtime flag is only honored by legacy docker-compose (v1) CLI.
# Uncomment if you're still using it:
# runtime: nvidia
restart: unless-stopped
# Entities Step Overhaul Plan
## Requirements Recap
- Integrate part-of-speech (POS) tagging to detect proper nouns with better precision.
- Rename Step 3 of the wizard from **Speakers** to **Entities** everywhere (routes, templates, copy, JS).
- Introduce a sub-navigation immediately below the step indicators with three tabs: **People**, **Entities**, **Manual Overrides**.
- Populate tabs with appropriate data:
- **People**: characters with dialogue/speech evidence.
- **Entities**: non-person proper nouns (organizations, places, artefacts, etc.).
- **Manual Overrides**: user-added entries with search-driven selection, pronunciation editing, and voice assignment tools.
- Allow manual overrides to:
- Search for tokens present in the uploaded manuscript/EPUB.
- Configure pronunciations and pick a voice (defaulting to narrator voice).
- Trigger previews using the same audio preview logic as other steps.
- Provide voice selection dropdowns (with auto-generate, browse, clear, etc.) for People and Manual Override rows.
- Tighten extraction logic so only proper nouns surface (no "The", "That", etc.).
- Normalise detected names by removing titles ("Mr.", "Dr.") and possessives ("Bob's" -> "Bob").
- Retain expandable sample paragraphs for context ("Preview full text" pattern) in the People tab and wherever excerpts appear.
- Persist pronunciation overrides in a shared store so recurring entities automatically preload past settings.
- Apply pronunciation overrides to every preview request and final conversion so TTS always respects user inputs.
- Add a help page documenting phonetic spelling techniques (inspired by the CMU guide) and surface it via a contextual tooltip/icon inside Step 3.
## Additional Considerations & Assumptions
- POS tagging scope is English-only for the initial release; spaCy will process the manuscript once and cache results so repeated visits to Step 3 reuse the parsed doc.
- spaCy core is MIT-licensed while the bundled `en_core_web_sm` model is CC BY-SA 3.0; we must include attribution and ensure redistribution remains compliant with the share-alike terms when packaging the model.
- spaCy may surface unusual proper nouns (e.g., fantasy names); users can leave them unchanged or override as desired.
- Manual overrides should persist with the pending job so that they can influence subsequent steps and final conversion. We likely need to extend pending job JSON storage and final job payloads.
- People tab currently depends on `pending.speakers` generated in `speaker_analysis.py`. Re-architecting should avoid breaking existing downstream behaviour (e.g., queueing with selected voices).
- Entities tab is new; we need to decide what metadata to display (count, first occurrence, sample sentences) and how it affects conversion (e.g., optional pronunciations, tags?). For now, assume read-only insights with optional pronunciation overrides similar to People.
- Voice preview/generation flows already live in `prepare.js`; ensure refactors keep a single source of truth to avoid duplication.
## Linguistic & Data Strategy
1. **POS Tagging Research & Adoption**
- Leverage **spaCy** (>=3.5) for tokenisation, POS tagging, and named entity recognition (NER). It offers:
- Accurate POS tags for proper nouns (`PROPN`).
- Entity type labels (`PERSON`, `ORG`, `GPE`, etc.) that can help route to People vs Entities.
- Add `spacy` to dependencies and document model installation (`en_core_web_sm` minimum). Provide fallbacks:
- If model missing, prompt friendly error and skip advanced detection rather than failing job.
- Future extension: allow language-specific models per job language (English default, warn otherwise).
2. **Proper Noun Filtering Logic**
- Process each chapter/chunk through spaCy pipeline.
- For each token / entity:
- Keep tokens tagged `PROPN` or NER labelled as proper nouns.
- Discard stopwords and determiners even if mislabelled (helps avoid "The", "That").
- Normalise by removing leading titles (`Mr.`, `Dr.`, `Lady`, etc.) and trailing possessives (`'s`, `’s`).
- Merge contiguous proper nouns into multi-word names (spaCy entity spans help).
- Build frequency map; attach contextual snippets (e.g., surrounding sentence) for each.
- Classify as Person vs Entity:
- If entity label `PERSON` or strongly associated with dialogue attribution (existing heuristics), treat as **Person**.
- Otherwise, map to **Entity**; optionally infer subtypes (Org, Place) for later enhancements.
3. **Integration with Existing Speaker Analysis**
- Reuse dialogue-based detection (`speaker_analysis.py`) for People to keep gender heuristics and sample quotes.
- Align IDs: ensure People tab entries map to existing speaker IDs so voice selections propagate to final job.
- Entities tab can draw from new data structure, decoupled from `speaker_analysis` but referencing chapter/chunk indices.
4. **Manual Overrides Workflow**
- Backend:
- Maintain `pending.manual_overrides` list containing `token`, `normalised_label`, `pronunciation`, `voice`, `notes`, `context`, while syncing to a persistent overrides table (e.g., SQLite) keyed by normalised token + language so history is reused across projects. Manual entries do not require spaCy detection—users can add arbitrary tokens.
- On load, hydrate the pending list with any matching historical overrides before rendering Step 3.
- Provide API endpoints:
1. `GET` suggestions for a search query (scan processed tokens + raw text indexes).
2. `POST` create/update override entries.
3. `DELETE` override.
- Frontend:
- Search input with debounced calls to suggestion endpoint; results list to choose target word/phrase.
- Once selected, show pronunciation input, voice picker (reusing component from People), preview buttons.
- Allow manual entry of custom tokens when no suggestion matches (spaCy not required).
- Persist changes via AJAX (same pattern as existing speaker updates if possible) or within form submission when continuing.
## Implementation Plan
1. **Backend Enhancements**
- Add spaCy dependency and lazy-load model in `speaker_analysis.py` or a new `entity_analysis.py` module.
- Cache parsed spaCy documents per pending job (disk-backed or memoized) so repeated analysis reuses existing results without reprocessing the manuscript.
- Implement `extract_entities(chapters, language, config)` returning structure:
```python
{
"people": [
{"id": "speaker_1", "label": "Bob", "count": 12, "samples": [...], ...}
],
"entities": [
{"id": "entity_1", "label": "Starfleet", "kind": "ORG", "count": 5, "snippets": [...]}
],
"index": {...} # for search/autocomplete
}
```
- Enhance normalisation function to strip titles/possessives and collapse whitespace/diacritics consistently.
- Integrate entity output into pending job serialization so Step 3 view can render tabs without recomputation.
- Update job finalisation logic to include manual overrides and entity-derived metadata (for future TTS improvements).
- Introduce a persistent pronunciation overrides repository (SQLite via SQLAlchemy layer) shared across jobs/instances, with migrations and CRUD helpers.
- Apply pronunciation overrides to preview/conversion pipelines by substituting text prior to TTS synthesis (covering narrator defaults, People tab assignments, Entities tab items, and manual overrides on every TTS run).
2. **Template & UI Updates**
- Rename Step 3 to **Entities** in all templates (`prepare_speakers.html`, upload modal partial, step indicator macros).
- Refactor `prepare_speakers.html` to:
- Wrap content in tabbed interface (likely `<div role="tablist">` + panels).
- Tab panels:
1. **People**: existing speaker list; adjust headings and copy.
2. **Entities**: new list/grid showing non-person entities with counts and sample context; include optional pronunciation/voice controls if relevant.
3. **Manual Overrides**: search box, selected override editing form, table of current overrides.
- Ensure sample paragraphs remain behind a collapsible disclosure control (link + `<details>` as today).
- Place a help icon near pronunciation inputs; focusing/hovering reveals tooltip text summarising phonetic spelling tips and links to the full guide.
- Update CSS to style tabs consistent with modal aesthetic, including tooltip styling for the help icon.
- Add a dedicated phonetic spelling help page (e.g., `phonetic-pronunciation.html`) sourced from the CMU reference with attribution, linked from the tooltip and main help menu.
3. **Frontend Logic (`prepare.js`)**
- Introduce tab controller managing focus and ARIA attributes.
- Wire People tab voice dropdowns to existing preview logic; extend to manual overrides entries.
- Implement search suggestions for manual overrides (debounce, fetch, render list, handle selection).
- Ensure previews use existing `data-role="speaker-preview"` pipeline; extend dataset attributes as needed.
- Persist override edits either via hidden inputs or asynchronous saves; align with form submission semantics.
4. **APIs & Routing**
- Add Flask routes under `routes.py` or `web/service.py` for:
- `/pending/<id>/entities` (fetch processed entity data if not already included in template context).
- `/pending/<id>/overrides` (CRUD operations for manual overrides).
- Ensure permissions and CSRF tokens align with existing patterns.
5. **Data Persistence**
- Expand pending job model (likely stored in `queue_manager_gui.py` / `queued_item.py`) to keep:
- `entity_summary` snapshot (people/entities lists).
- `manual_overrides` list with user edits.
- Cached spaCy doc metadata (hash of source + serialized parse) to avoid reprocessing unchanged texts.
- Introduce persistent `pronunciation_overrides` table (SQLite) keyed by normalised token + language, storing pronunciation, preferred voice, notes, and usage metadata for reuse across projects.
- On finalise, merge overrides into job metadata so downstream conversion can honour pronunciations/voices and sync any changes back to the shared table.
6. **Testing Strategy**
- Unit tests for new normalisation and POS filtering functions (ensure "The", "That" excluded; "Bob's" normalised).
- Integration tests to confirm People tab still flows, manual overrides persist, Entities tab populates expected data.
- Add regression tests ensuring Step 3 rename does not break existing forms (e.g., `test_prepare_form.py`).
- Consider snapshot tests for API JSON structures.
- Add automated checks that pronunciation overrides apply to preview playback and conversion payloads for People and Entities entries alike.
7. **Documentation & Ops**
- Update README / docs with new Step 3 name and manual override instructions.
- Provide guidance for installing spaCy model (e.g., `python -m spacy download en_core_web_sm`).
- Document spaCy/model licensing obligations (MIT for core, CC BY-SA for small model) and add attribution in app credits/help page.
- Publish phonetic spelling help page content and link it from the tooltip/icon in Step 3 and support docs.
## Open Questions / Follow-Ups
- Should Entities tab allow voice assignments that influence TTS, or is it informational only? Yes, it should include voice assignments that influence TTS.
- Manual override search scope: entire text vs detected proper nouns? Current plan searches raw text and entity index.
- Performance: confirm caching strategy (e.g., store spaCy Doc pickles vs. rebuilding from serialized spans) to balance speed and storage.
## Next Steps
1. Validate spaCy dependency choice and licensing obligations (MIT core, CC BY-SA model) with stakeholders.
2. Finalise data contracts for entities, overrides, and the persistent pronunciation history schema.
3. Implement backend entity extraction, cached spaCy parsing, override hydration, and the TTS substitution pipeline.
4. Refactor frontend Step 3 UI with tabs, help icon/tooltip, and updated voice controls.
5. Build manual override search/edit UX wired to the shared overrides store and preview flow.
6. Update documentation (including phonetic guide) and expand automated tests.
# EPUB 3 Upgrade Plan
## Overview
Elevate Abogen to produce rich EPUB 3 packages with synchronized narration, configurable TTS chunking, and groundwork for multi-speaker voice assignment. This document records the objectives, architectural adjustments, data model changes, UI flows, and implementation phases required to deliver the upgrade.
## Goals
- Generate EPUB 3 output that preserves source metadata and embeds audio narration via media overlays.
- Allow users to choose the chunking granularity (paragraph vs. sentence) used for TTS synthesis and media-overlay alignment.
- Introduce speaker assignments for every chunk, starting with a single narrator but paving the way for multi-speaker control.
- Prototype practical, lightweight strategies for detecting likely speakers and estimating their dialogue frequency.
## Non-goals / Out-of-scope
- Full multi-speaker editing UI (beyond gating the option).
- Automatic voice-casting or LLM-based dialogue attribution.
- Desktop GUI resurrection (web UI remains primary).
## Current Architecture Snapshot
| Area | Notes |
| --- | --- |
| Text ingestion | `abogen/text_extractor.py` outputs `ExtractionResult` with chapter-level text.
| Job prep UI | `web/routes.py` builds `PendingJob` objects and renders chapter selection.
| Audio pipeline | `web/conversion_runner.py` creates per-job audio artifacts; chunking is effectively paragraph-level.
| Metadata | `ExtractionResult.metadata` feeds into FF metadata and output tagging, but not yet into EPUB packaging.
## Feature 1 – EPUB 3 Output with Narration
### Requirements
- Preserve original EPUB metadata (Dublin Core entries, TOC, cover art).
- Package synthesized audio and SMIL media overlays aligned to chosen chunk granularity.
- Provide EPUB as an additional selectable output alongside current audio/subtitle formats.
### Proposed Components
1. **`abogen/epub3/exporter.py`** (new module)
- Responsibilities: build XHTML spine with IDs, generate overlay SMIL files, write OPF manifest/spine, assemble zip package.
- Status: **Implemented** — `build_epub3_package` emits EPUB 3 archives with media overlays driven by chunk metadata.
- Dependencies: reuse `ebooklib` for reading source metadata; use `zipfile` for packaging; optional `lxml` for DOM manipulation.
2. **`EPUB3PackageBuilder` class**
- Inputs: extraction payload, chunk collection (with IDs, speaker mapping, timing metadata), audio asset paths, source metadata.
- Outputs: path to generated EPUB.
3. **Metadata preservation**
- Copy from source `ExtractionResult.metadata` and EPUB navigation if available.
- Ensure custom fields (e.g., chapter count) survive.
4. **Media overlay generation**
- Create one SMIL per content doc or per chapter, depending on chunk count.
- `<par>` nodes reference chunk IDs and audio clip times.
5. **Configuration surface**
- Add “EPUB 3 (audio + text)” to output format selector (or a dedicated toggle under project settings).
### Data Flow
```
extract_from_path -> Chapter payload
|-> chunker (sentence/paragraph)
|-> chunk IDs + audio segments (timestamps from runner)
Conversion runner -> audio files + timing index
EPUB3PackageBuilder -> manifest, spine, SMIL, zip
```
### Open Questions
- Should we embed audio inside the EPUB or link externally? (Plan: embed to comply with spec.)
- How to handle very large audio assets? Consider splitting per chapter to keep file sizes manageable.
## Feature 2 – Configurable Chunking
### Requirements
- Users select chunking level (paragraph or sentence) before audio generation.
- Pipeline produces stable, unique IDs for each chunk regardless of level.
- Provide chunk metadata (text, speaker, offsets) to both TTS and EPUB exporter.
### Proposed Architecture
1. **Chunk Model**
```python
@dataclass
class Chunk:
id: str
chapter_index: int
order: int
level: Literal["paragraph", "sentence"]
text: str
speaker_id: str
approx_characters: int
```
2. **Chunker Service (`abogen/chunking.py`)**
- Accepts chapter text and desired level.
- Uses spaCy (already bundled via `en-core-web-sm`) for sentence segmentation; fallback to regex when model unavailable.
- Emits `Chunk` objects with deterministic IDs (e.g., `chap{chapter_index:04d}_para{paragraph_idx:03d}_sent{sentence_idx:03d}`).
3. **Integration points**
- `web/routes.py` -> apply chunker when building `PendingJob` instead of storing raw paragraphs only.
- `PendingJob` / `Job` dataclasses -> include `chunks` list and `chunk_level` enum.
- `conversion_runner` -> iterate over `chunks` when synthesizing audio, producing per-chunk audio and capturing actual duration for overlay.
4. **Settings persistence**
- Extend config with `chunking_level` default; expose in UI (radio buttons or select).
### Testing
- Unit tests for chunk splitting across languages, punctuation, abbreviations.
- Property-based tests ensuring concatenated chunks reproduce original text (except whitespace normalization).
## Feature 3 – Speaker Assignment Foundations
### Requirements
- Every chunk must carry a `speaker_id` (default `narrator`).
- UI offers new option: “Single Speaker” (proceeds) vs. “Multi-Speaker (Coming Soon)” (blocks and shows message).
- Data model anticipates future multi-speaker support.
### Implementation Outline
1. **Data Model Changes**
- `Chunk.speaker_id` default `"narrator"`.
- `PendingJob` & `Job` store `speakers` metadata (dictionary of speaker descriptors).
- `JobResult` optionally includes `chunk_speakers.json` artifact for downstream use.
2. **UI Adjustments**
- On upload form (`index.html` / JS), add selector for speaker mode.
- If “Multi-Speaker” chosen, display tooltip/modal: “Coming soon; please choose Single Speaker to continue.” disable submission.
- In `prepare_job.html`, display speaker info column (read-only for now).
3. **Serialization**
- Update JSON API routes to include speaker data.
- Update queue/job detail templates to show chunk level & speaker summary.
### Testing
- Add web route tests ensuring multi-speaker path blocks progression.
- Verify job persistence includes `speaker_id` fields.
## Feature 4 – Speaker Detection Strategies
### Objectives
Build groundwork for lightweight, deterministic speaker inference to inform future multi-speaker mode.
### User Stories
1. **As a producer**, I can run an automated analysis on a book to see the list of likely speakers and how often they talk, so I can decide where multiple voices make sense.
- _Acceptance_: System outputs a JSON report containing speaker IDs/names, occurrence counts, representative excerpts, and confidence tier. Report stored with job artifacts and downloadable from job detail page.
2. **As a producer**, I can set a minimum occurrence threshold so that infrequent speakers automatically fall back to the narrator voice.
- _Acceptance_: Analysis respects configurable threshold; speakers below it are tagged as `default_narrator` in the report.
3. **As a developer/operator**, I can trigger the analysis via CLI or background task without blocking the main conversion pipeline.
- _Acceptance_: Command `abogen analyze-speakers <input>` (or background queue hook) runs in isolation, returns exit code 0 on success, emits metrics/logs for CI.
### Strategy Ideas
1. **Quotation-bound heuristic**
- Split paragraphs on dialogue quotes.
- Use verb cues ("said", "asked") to associate names preceding/following quotes.
2. **Name detection via NER**
- Use spaCy’s entity recognition to spot `PERSON` entities inside dialogue spans.
- Maintain frequency counts per name.
3. **Speaker dictionary**
- Pre-build mapping of common narrator cues ("he said", "Mary replied") to propagate speaker assignment across adjacent sentences.
4. **Pronoun fallback with gender hints**
- Map pronouns to most recent speaker mention; degrade gracefully when ambiguous.
5. **Thresholding mechanism**
- After counting occurrences, expose a threshold slider (future UI) to decide when to allocate unique voices vs. default narrator.
6. **Diagnostics**
- Provide summary report: top N speaker candidates, counts, unresolved dialogue segments.
### Implementation Staging
1. **Phase 1 – Analysis Engine (Backend)**
- Build `speaker_analysis.py` module implementing heuristics, returning structured results.
- Add CLI entry point `abogen-speaker-analyze` for standalone runs.
- Persist analysis artifacts (`speakers.json`, `speaker_excerpts.csv`) alongside job data when invoked post-extraction.
- Tests: unit tests for heuristic functions; snapshot tests for sample novels.
2. **Phase 2 – Configuration & Thresholding**
- Extend settings UI with optional “speaker analysis threshold” control (numeric).
- Update analysis module to accept threshold; mark low-frequency speakers as narrator.
- Emit summary digest (top speakers, narrator fallback count) in job logs.
3. **Phase 3 – UI Surfacing**
- Display analysis summary on job detail page (charts/table).
- Offer download link for raw JSON/CSV artifacts.
- Provide warning banner when analysis confidence is low (e.g., high unmatched dialogue percentage).
4. **Phase 4 – Integration Hooks**
- Wire analysis output into chunk speaker assignments (without yet enabling multi-speaker playback).
- Store mapping in `Job.speakers` metadata for future voice routing.
### Technical Notes
- Reuse spaCy `en_core_web_sm` for entity recognition; allow pluggable models per language.
- Maintain rolling context window to resolve pronouns (e.g., last two named speakers).
- Provide instrumentation (timings, counts) to assess heuristic accuracy on sample corpora.
- Design analysis output schema versioning (`speaker_analysis_version`) to support iterative improvements.
## UI & Configuration Updates
| Screen | Update |
| --- | --- |
| Upload form (`index.html`) | Add chunking level selector and speaker mode buttons. |
| Prepare job (`prepare_job.html`) | Display chunk level, IDs, speaker column; allow future editing hooks. |
| Settings modal | Persist defaults for chunking level and speaker mode. |
## Data Model Checklist
- [x] Update `PendingJob` and `Job` dataclasses with `chunk_level`, `chunks`, `speakers` metadata.
- [x] Ensure serialization persists these fields in queue state file.
- [x] Persist chunk timing metadata from TTS (start/end timestamps).
## Testing Strategy
- Unit tests for chunker and speaker heuristics.
- Integration tests: enqueue job with sentence-level chunking, assert chunk IDs and speaker metadata.
- Regression tests: ensure existing paragraph-level jobs still succeed.
- Acceptance tests for EPUB exporter: validate manifest, spine, and SMIL structure against schema (use `epubcheck` in CI if feasible).
## Migration & Compat
- Bump state version in `ConversionService` when augmenting job schema; include migration logic for legacy queues.
- Provide CLI flag to reprocess older jobs without speaker metadata.
- Document new dependencies (e.g., `lxml`, optional spaCy models for languages beyond English).
## Implementation Phases
1. **Foundation** – Introduce chunk model, chunker service, speaker defaults.
2. **Pipeline integration** – Update job lifecycle and TTS runner to work with chunks.
3. **EPUB exporter** – Build packaging module, connect to pipeline.
4. **UI polish** – Expose settings, guard multi-speaker path, surface diagnostics.
5. **Speaker analysis tool** – Prototype heuristics and reporting.
## Open Questions
- How to handle non-EPUB inputs (PDF/TXT) when exporting EPUB 3? (Possible: generate synthetic XHTML with normalized chapters.)
- Storage impact of embedding per-chunk audio – do we need compression or streaming strategies?
- Internationalization: sentence segmentation quality varies; need language-specific models.
## Next Steps
- Review plan with stakeholders for scope confirmation.
- Break down Phase 1 into actionable tickets (chunker, data model migration, UI toggle).
- Estimate resource requirements for EPUB packaging and testing (including epubcheck integration).
"""Test package initialization.
Provides lightweight fallbacks for optional dependencies so unit tests can run
without the full runtime stack.
"""
from __future__ import annotations
import sys
from types import ModuleType
def _soundfile_write_stub(
file_obj, data, samplerate, format="WAV", **_kwargs
): # pragma: no cover - stub
"""Minimal stand-in for soundfile.write used in tests.
The real library streams waveform data to disk. Our tests don't exercise
audio synthesis, so it's safe to accept the call and write nothing.
"""
if hasattr(file_obj, "write"):
try:
file_obj.write(b"")
except Exception:
# Ignore errors from exotic buffers; the real implementation would
# write binary samples, so a no-op keeps behavior predictable.
pass
try:
import soundfile
except ImportError:
if "soundfile" not in sys.modules: # pragma: no cover - import guard
stub = ModuleType("soundfile")
stub.write = _soundfile_write_stub # type: ignore[attr-defined]
sys.modules["soundfile"] = stub
def _static_ffmpeg_add_paths_stub(*_args, **_kwargs) -> None: # pragma: no cover - stub
"""Placeholder for static_ffmpeg.add_paths used in tests."""
if "static_ffmpeg" not in sys.modules: # pragma: no cover - import guard
ffmpeg_module = ModuleType("static_ffmpeg")
ffmpeg_module.add_paths = _static_ffmpeg_add_paths_stub # type: ignore[attr-defined]
ffmpeg_run = ModuleType("static_ffmpeg.run")
ffmpeg_run.LOCK_FILE = "" # type: ignore[attr-defined]
ffmpeg_module.run = ffmpeg_run # type: ignore[attr-defined]
sys.modules["static_ffmpeg"] = ffmpeg_module
sys.modules["static_ffmpeg.run"] = ffmpeg_run
import importlib
import sys
import os
import pytest
# Ensure real optional dependencies are imported before tests that install stubs
# so that available packages (like ebooklib, bs4, numpy) aren't replaced with dummy modules.
for module_name in ("ebooklib", "bs4", "numpy"):
if module_name not in sys.modules:
try:
importlib.import_module(module_name)
except Exception:
# On environments without the optional dependency, downstream tests
# will install lightweight stubs as needed.
pass
@pytest.fixture(autouse=True, scope="session")
def _isolate_settings_dir(tmp_path_factory: pytest.TempPathFactory):
settings_dir = tmp_path_factory.mktemp("abogen-settings")
os.environ["ABOGEN_SETTINGS_DIR"] = str(settings_dir)
try:
from abogen.utils import get_user_settings_dir
get_user_settings_dir.cache_clear()
except Exception:
pass
try:
from abogen.normalization_settings import clear_cached_settings
clear_cached_settings()
except Exception:
pass
yield

Sorry, the diff of this file is not supported yet

from __future__ import annotations
import json
from abogen.integrations.audiobookshelf import (
AudiobookshelfClient,
AudiobookshelfConfig,
)
def test_upload_fields_include_series_sequence(tmp_path):
audio_path = tmp_path / "book.mp3"
audio_path.write_bytes(b"audio")
config = AudiobookshelfConfig(
base_url="https://example.test",
api_token="token",
library_id="library-id",
folder_id="folder-id",
)
client = AudiobookshelfClient(config)
client._folder_cache = ("folder-id", "Folder", "Library")
metadata = {
"title": "Example Title",
"seriesName": "Example Saga",
"seriesSequence": "7",
}
fields = client._build_upload_fields(audio_path, metadata, chapters=None)
assert fields["series"] == "Example Saga"
assert fields["seriesSequence"] == "7"
assert "metadata" in fields
payload = json.loads(fields["metadata"])
assert payload["seriesSequence"] == "7"
def test_upload_fields_normalize_alternate_sequence_keys(tmp_path):
audio_path = tmp_path / "book.mp3"
audio_path.write_bytes(b"audio")
config = AudiobookshelfConfig(
base_url="https://example.test",
api_token="token",
library_id="library-id",
folder_id="folder-id",
)
client = AudiobookshelfClient(config)
client._folder_cache = ("folder-id", "Folder", "Library")
metadata = {
"title": "Example Title",
"seriesName": "Example Saga",
"series_index": "Book 3",
}
fields = client._build_upload_fields(audio_path, metadata, chapters=None)
assert fields["series"] == "Example Saga"
assert fields["seriesSequence"] == "3"
def test_upload_fields_preserve_decimal_sequence(tmp_path):
audio_path = tmp_path / "book.mp3"
audio_path.write_bytes(b"audio")
config = AudiobookshelfConfig(
base_url="https://example.test",
api_token="token",
library_id="library-id",
folder_id="folder-id",
)
client = AudiobookshelfClient(config)
client._folder_cache = ("folder-id", "Folder", "Library")
metadata = {
"title": "Example Title",
"seriesName": "Example Saga",
"seriesSequence": "0.5",
}
fields = client._build_upload_fields(audio_path, metadata, chapters=None)
assert fields["seriesSequence"] == "0.5"
import unittest
import os
import sys
import shutil
import time
from PyQt6.QtWidgets import QApplication
# Ensure we can import the module
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from abogen.pyqt.book_handler import HandlerDialog
from ebooklib import epub
# We need a QApplication instance for QWriter/QDialog
app = QApplication(sys.argv)
class TestBookHandlerRegression(unittest.TestCase):
def setUp(self):
self.test_dir = "tests/test_data_handler"
if os.path.exists(self.test_dir):
shutil.rmtree(self.test_dir)
os.makedirs(self.test_dir)
self.sample_epub_path = os.path.join(self.test_dir, "test_book.epub")
self._create_sample_epub()
def tearDown(self):
HandlerDialog.clear_content_cache()
if os.path.exists(self.test_dir):
shutil.rmtree(self.test_dir)
def _create_sample_epub(self):
book = epub.EpubBook()
book.set_identifier("id123456")
book.set_title("Sample Book")
book.set_language("en")
c1 = epub.EpubHtml(title="Intro", file_name="intro.xhtml", lang="en")
c1.content = "<h1>Introduction</h1><p>Welcome to the book.</p>"
book.add_item(c1)
book.spine = ["nav", c1]
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())
epub.write_epub(self.sample_epub_path, book)
def test_handler_initialization(self):
"""Test that HandlerDialog processes the book correctly."""
# HandlerDialog starts processing in a background thread in __init__
# We assume headless environment, so we won't show it.
# But we need to wait for the thread to finish.
dialog = HandlerDialog(self.sample_epub_path)
# Wait for thread to finish
# The dialog emits no signal publicly, but we can check internal state or thread
start_time = time.time()
while time.time() - start_time < 5:
# HandlerDialog logic:
# _loader_thread.finished connect to _on_load_finished
# _on_load_finished populates content_texts and content_lengths
# We can check if content_texts is populated
if dialog.content_texts:
break
app.processEvents() # Process Qt events to let thread signals propagate
time.sleep(0.1)
self.assertTrue(
len(dialog.content_texts) > 0,
"HandlerDialog failed to process content in time",
)
# Validate content similar to what we expect
# intro.xhtml should be there
found_intro = False
for key, text in dialog.content_texts.items():
if "Welcome to the book" in text:
found_intro = True
break
self.assertTrue(found_intro)
# Cleanup
dialog.close()
if __name__ == "__main__":
unittest.main()
import unittest
import os
import sys
import shutil
import fitz # PyMuPDF
from ebooklib import epub
# Ensure we can import the module
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from abogen.book_parser import get_book_parser, PdfParser, EpubParser, MarkdownParser
class TestBookParser(unittest.TestCase):
def setUp(self):
self.test_dir = "tests/test_data"
if os.path.exists(self.test_dir):
shutil.rmtree(self.test_dir)
os.makedirs(self.test_dir)
self.sample_pdf_path = os.path.join(self.test_dir, "test_book.pdf")
self.sample_epub_path = os.path.join(self.test_dir, "test_book.epub")
self.sample_md_path = os.path.join(self.test_dir, "test_book.md")
self._create_sample_pdf()
self._create_sample_epub()
self._create_sample_md()
def tearDown(self):
if os.path.exists(self.test_dir):
shutil.rmtree(self.test_dir)
def _create_sample_pdf(self):
doc = fitz.open()
# Page 1
page1 = doc.new_page()
page1.insert_text((50, 50), "Page 1 content")
# Add pattern to be cleaned
page1.insert_text((50, 100), "[12]")
page1.insert_text((50, 200), "1") # Page number at bottom
# Page 2
page2 = doc.new_page()
page2.insert_text((50, 50), "Page 2 content")
doc.save(self.sample_pdf_path)
doc.close()
def _create_sample_epub(self):
book = epub.EpubBook()
book.set_identifier("id123456")
book.set_title("Sample Book")
book.set_language("en")
book.add_author("Test Author")
c1 = epub.EpubHtml(title="Intro", file_name="intro.xhtml", lang="en")
c1.content = "<h1>Introduction</h1><p>Welcome to the book.</p>"
c2 = epub.EpubHtml(title="Chapter 1", file_name="chap1.xhtml", lang="en")
c2.content = "<h1>Chapter 1</h1><ol><li>Item One</li><li>Item Two</li></ol>"
book.add_item(c1)
book.add_item(c2)
# Basic spine and nav
book.spine = ["nav", c1, c2]
# Add NCX and NAV for compatibility
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())
epub.write_epub(self.sample_epub_path, book)
def _create_sample_md(self):
content = "# Chapter 1\nSome text.\n# Chapter 2\nMore text."
with open(self.sample_md_path, "w") as f:
f.write(content)
def test_factory_returns_correct_class(self):
"""Test that get_book_parser returns the correct subclass based on extension."""
parser_pdf = get_book_parser(self.sample_pdf_path)
self.assertIsInstance(parser_pdf, PdfParser)
parser_md = get_book_parser(self.sample_md_path)
self.assertIsInstance(parser_md, MarkdownParser)
parser_epub = get_book_parser(self.sample_epub_path)
self.assertIsInstance(parser_epub, EpubParser)
def test_factory_explicit_type(self):
"""Test that explicit file type argument overrides extension."""
# 1. Copy sample epub to something.pdf
wrong_ext_path = os.path.join(self.test_dir, "actually_epub.pdf")
shutil.copy(self.sample_epub_path, wrong_ext_path)
# 2. Open it telling parser it IS epub
parser = get_book_parser(wrong_ext_path, file_type="epub")
self.assertIsInstance(parser, EpubParser)
# Should load successfully
parser.load()
self.assertTrue(parser.book is not None)
def test_pdf_parser_content(self):
"""Test PdfParser content extraction."""
parser = get_book_parser(self.sample_pdf_path)
try:
parser.process_content()
self.assertIn("page_1", parser.content_texts)
self.assertIn("page_2", parser.content_texts)
text1 = parser.content_texts["page_1"]
self.assertIn("Page 1 content", text1)
self.assertNotIn("[12]", text1)
finally:
parser.close()
def test_markdown_parser_content(self):
"""Test MarkdownParser splitting logic."""
parser = get_book_parser(self.sample_md_path)
try:
parser.process_content()
# Should have Chapter 1 and Chapter 2 keys (actual keys depend on ID generation)
# Markdown extensions might slugify IDs: "chapter-1"
self.assertIn("chapter-1", parser.content_texts)
self.assertIn("chapter-2", parser.content_texts)
self.assertIn("Some text", parser.content_texts["chapter-1"])
finally:
parser.close()
def test_epub_parser_content(self):
"""Test EpubParser processing."""
parser = get_book_parser(self.sample_epub_path)
parser.process_content()
self.assertIn("intro.xhtml", parser.content_texts)
self.assertIn("chap1.xhtml", parser.content_texts)
self.assertIn("Welcome to the book", parser.content_texts["intro.xhtml"])
def test_epub_metadata_extraction(self):
"""Test metadata extraction in EpubParser."""
parser = get_book_parser(self.sample_epub_path)
# Processing content triggers metadata extraction in current implementation
parser.process_content()
metadata = parser.get_metadata()
self.assertEqual(metadata.get("title"), "Sample Book")
self.assertEqual(metadata.get("author"), "Test Author")
def test_ordered_list_handling(self):
"""Test <ol> handling in EpubParser."""
parser = get_book_parser(self.sample_epub_path)
parser.process_content()
text = parser.content_texts.get("chap1.xhtml", "")
self.assertIn("1) Item One", text)
self.assertIn("2) Item Two", text)
def test_find_position_robust_logic(self):
"""Unit test for _find_position_robust on EpubParser."""
parser = EpubParser(self.sample_epub_path) # Instantiate directly
html = '<html><body><p>Start</p><h1 id="target">Heading</h1><p>End</p></body></html>'
parser.doc_content["dummy.html"] = html
# Test finding ID
pos = parser._find_position_robust("dummy.html", "target")
self.assertGreater(pos, 0)
self.assertTrue(html[pos:].startswith('<h1 id="target"'))
# Test missing ID
pos_missing = parser._find_position_robust("dummy.html", "missing")
self.assertEqual(pos_missing, 0)
def test_get_chapters(self):
"""Test get_chapters returns correct list for different parsers."""
# PDF
parser_pdf = get_book_parser(self.sample_pdf_path)
parser_pdf.process_content()
chapters = parser_pdf.get_chapters()
self.assertEqual(len(chapters), 2)
self.assertEqual(chapters[0], ("page_1", "Page 1 - Page 1 content"))
# MD
parser_md = get_book_parser(self.sample_md_path)
parser_md.process_content() # Must process to get structure
chapters_md = parser_md.get_chapters()
# Expecting chapter-1, chapter-2
ids = [c[0] for c in chapters_md]
self.assertIn("chapter-1", ids)
def test_get_formatted_text(self):
"""Test formatting of full text via BaseBookParser method."""
parser = get_book_parser(self.sample_md_path)
parser.process_content()
text = parser.get_formatted_text()
self.assertIn("<<CHAPTER_MARKER:Chapter 1>>", text)
self.assertIn("Some text", text)
def test_file_type_property(self):
"""Test that file_type property returns correct string for each parser."""
pdf_parser = PdfParser(self.sample_pdf_path)
self.assertEqual(pdf_parser.file_type, "pdf")
epub_parser = EpubParser(self.sample_epub_path)
self.assertEqual(epub_parser.file_type, "epub")
md_parser = MarkdownParser(self.sample_md_path)
self.assertEqual(md_parser.file_type, "markdown")
if __name__ == "__main__":
unittest.main()
from pathlib import Path
from werkzeug.datastructures import MultiDict
from abogen.webui.routes.utils.form import apply_book_step_form
from abogen.webui.service import PendingJob
def _make_pending_job() -> PendingJob:
pending = PendingJob(
id="pending",
original_filename="example.epub",
stored_path=Path("example.epub"),
language="a",
voice="af_nova",
speed=1.0,
use_gpu=False,
subtitle_mode="none",
output_format="mp3",
save_mode="save_next_to_input",
output_folder=None,
replace_single_newlines=False,
subtitle_format="srt",
total_characters=0,
save_chapters_separately=False,
merge_chapters_at_end=True,
separate_chapters_format="wav",
silence_between_chapters=2.0,
save_as_project=False,
voice_profile=None,
max_subtitle_words=50,
metadata_tags={},
chapters=[],
normalization_overrides={},
created_at=0.0,
read_title_intro=False,
normalize_chapter_opening_caps=True,
)
pending.tts_provider = "kokoro"
return pending
def test_book_step_supertonic_profile_becomes_speaker_reference() -> None:
pending = _make_pending_job()
settings = {
"language": "a",
"chunk_level": "paragraph",
"speaker_analysis_threshold": 3,
"default_voice": "af_nova",
"default_speaker": "",
"default_speed": 1.0,
"read_title_intro": False,
"read_closing_outro": True,
"normalize_chapter_opening_caps": True,
}
profiles = {
"Female HQ": {
"provider": "supertonic",
"voice": "F3",
"speed": 1.0,
"total_steps": 5,
"language": "a",
}
}
form = MultiDict(
{
"language": "a",
"voice_profile": "Female HQ",
"speed": "1.0",
}
)
apply_book_step_form(pending, form, settings=settings, profiles=profiles)
# Voice is stored as a speaker reference so provider can be resolved per-speaker.
assert pending.voice == "speaker:Female HQ"
assert pending.voice_profile == "Female HQ"
# Book-level provider should not be overridden by narrator defaults.
assert pending.tts_provider == "kokoro"
from abogen.integrations.calibre_opds import (
CalibreOPDSClient,
OPDSEntry,
OPDSFeed,
OPDSLink,
feed_to_dict,
)
def test_calibre_opds_feed_exposes_series_metadata() -> None:
client = CalibreOPDSClient("http://example.com/catalog")
xml_payload = """<?xml version=\"1.0\" encoding=\"UTF-8\"?>
<feed xmlns=\"http://www.w3.org/2005/Atom\"
xmlns:dc=\"http://purl.org/dc/terms/\"
xmlns:calibre=\"http://calibre.kovidgoyal.net/2009/catalog\">
<id>catalog</id>
<title>Example Catalog</title>
<entry>
<id>book-1</id>
<title>Sample Book</title>
<calibre:series>The Expanse</calibre:series>
<calibre:series_index>4</calibre:series_index>
<link rel=\"http://opds-spec.org/acquisition\"
href=\"books/sample.epub\"
type=\"application/epub+zip\" />
</entry>
</feed>
"""
feed = client._parse_feed(xml_payload, base_url="http://example.com/catalog")
assert feed.entries, "Expected at least one entry in parsed feed"
entry = feed.entries[0]
assert entry.series == "The Expanse"
assert entry.series_index == 4.0
feed_dict = feed_to_dict(feed)
assert feed_dict["entries"][0]["series"] == "The Expanse"
assert feed_dict["entries"][0]["series_index"] == 4.0
def test_calibre_opds_feed_exposes_subtitle_metadata() -> None:
client = CalibreOPDSClient("http://example.com/catalog")
xml_payload = """<?xml version=\"1.0\" encoding=\"UTF-8\"?>
<feed xmlns=\"http://www.w3.org/2005/Atom\"
xmlns:calibre_md=\"http://calibre.kovidgoyal.net/2009/metadata\">
<id>catalog</id>
<title>Example Catalog</title>
<entry>
<id>book-1</id>
<title>Sample Book</title>
<calibre_md:subtitle>A Novel</calibre_md:subtitle>
<link rel=\"http://opds-spec.org/acquisition\"
href=\"books/sample.epub\"
type=\"application/epub+zip\" />
</entry>
</feed>
"""
feed = client._parse_feed(xml_payload, base_url="http://example.com/catalog")
assert feed.entries
assert feed.entries[0].subtitle == "A Novel"
feed_dict = feed_to_dict(feed)
assert feed_dict["entries"][0]["subtitle"] == "A Novel"
def test_calibre_opds_feed_extracts_series_from_categories() -> None:
client = CalibreOPDSClient("http://example.com/catalog")
xml_payload = """<?xml version=\"1.0\" encoding=\"UTF-8\"?>
<feed xmlns=\"http://www.w3.org/2005/Atom\"
xmlns:dc=\"http://purl.org/dc/terms/\"
xmlns:calibre=\"http://calibre.kovidgoyal.net/2009/catalog\">
<id>catalog</id>
<title>Example Catalog</title>
<entry>
<id>book-2</id>
<title>Network Effect</title>
<category
scheme=\"http://calibre.kovidgoyal.net/2009/series\"
term=\"The Murderbot Diaries #5\"
label=\"The Murderbot Diaries [5]\" />
<link rel=\"http://opds-spec.org/acquisition\"
href=\"books/network-effect.epub\"
type=\"application/epub+zip\" />
</entry>
</feed>
"""
feed = client._parse_feed(xml_payload, base_url="http://example.com/catalog")
assert feed.entries, "Expected at least one entry in parsed feed"
entry = feed.entries[0]
assert entry.series == "The Murderbot Diaries"
assert entry.series_index == 5.0
def test_calibre_opds_does_not_map_author_into_series_from_categories() -> None:
client = CalibreOPDSClient("http://example.com/catalog")
xml_payload = """<?xml version=\"1.0\" encoding=\"UTF-8\"?>
<feed xmlns=\"http://www.w3.org/2005/Atom\"
xmlns:dc=\"http://purl.org/dc/terms/\"
xmlns:calibre=\"http://calibre.kovidgoyal.net/2009/catalog\">
<id>catalog</id>
<title>Example Catalog</title>
<entry>
<id>book-author-series-bug</id>
<title>Sample Book</title>
<author>
<name>Alexandre Dumas</name>
</author>
<category
scheme=\"http://calibre.kovidgoyal.net/2009/series\"
term=\"Books: Alexandre Dumas\"
label=\"Books: Alexandre Dumas\" />
<link rel=\"http://opds-spec.org/acquisition\"
href=\"books/sample.epub\"
type=\"application/epub+zip\" />
</entry>
</feed>
"""
feed = client._parse_feed(xml_payload, base_url="http://example.com/catalog")
assert feed.entries
entry = feed.entries[0]
assert entry.authors == ["Alexandre Dumas"]
assert entry.series is None
assert entry.series_index is None
def test_calibre_opds_extracts_tags_and_rating_from_summary() -> None:
client = CalibreOPDSClient("http://example.com/catalog")
xml_payload = """<?xml version=\"1.0\" encoding=\"UTF-8\"?>
<feed xmlns=\"http://www.w3.org/2005/Atom\"
xmlns:dc=\"http://purl.org/dc/terms/\">
<id>catalog</id>
<title>Example Catalog</title>
<entry>
<id>book-3</id>
<title>Summary Sample</title>
<dc:date>2024-01-15T00:00:00+00:00</dc:date>
<summary type=\"text\">RATING: ★★★½
TAGS: Science Fiction; Adventure
SERIES: Saga [3]
This is the detailed summary text.</summary>
<link rel=\"http://opds-spec.org/acquisition\"
href=\"books/sample.epub\"
type=\"application/epub+zip\" />
</entry>
</feed>
"""
feed = client._parse_feed(xml_payload, base_url="http://example.com/catalog")
entry = feed.entries[0]
assert entry.series == "Saga"
assert entry.series_index == 3.0
assert entry.tags == ["Science Fiction", "Adventure"]
assert entry.rating == 3.5
assert entry.rating_max == 5.0
assert entry.summary == "This is the detailed summary text."
assert entry.published == "2024-01-15T00:00:00+00:00"
def test_calibre_opds_relative_urls_keep_catalog_prefix() -> None:
client = CalibreOPDSClient("http://example.com/opds/")
assert client._make_url("search") == "http://example.com/opds/search"
assert (
client._make_url("books/sample.epub")
== "http://example.com/opds/books/sample.epub"
)
assert client._make_url("/cover/1") == "http://example.com/cover/1"
assert client._make_url("?page=2") == "http://example.com/opds?page=2"
def test_calibre_opds_base_url_without_trailing_slash() -> None:
"""Ensure the client works with base URLs that don't have trailing slashes."""
client = CalibreOPDSClient("http://example.com/api/v1/opds")
# Base URL should be stored without trailing slash
assert client._base_url == "http://example.com/api/v1/opds"
# Relative paths should resolve as siblings to the base URL
assert client._make_url("catalog") == "http://example.com/api/v1/opds/catalog"
assert (
client._make_url("search?q=test")
== "http://example.com/api/v1/opds/search?q=test"
)
assert (
client._make_url("/api/v1/opds/books") == "http://example.com/api/v1/opds/books"
)
assert client._make_url("?page=2") == "http://example.com/api/v1/opds?page=2"
def test_calibre_opds_filters_out_unsupported_formats() -> None:
client = CalibreOPDSClient("http://example.com/catalog")
xml_payload = """<?xml version=\"1.0\" encoding=\"UTF-8\"?>
<feed xmlns=\"http://www.w3.org/2005/Atom\">
<id>catalog</id>
<title>Example Catalog</title>
<entry>
<id>audio-book</id>
<title>Unsupported Audio</title>
<link rel=\"http://opds-spec.org/acquisition\"
href=\"books/sample.mp3\"
type=\"audio/mpeg\" />
</entry>
<entry>
<id>pdf-book</id>
<title>Allowed PDF</title>
<link rel=\"http://opds-spec.org/acquisition\"
href=\"books/sample.pdf\"
type=\"application/pdf\" />
</entry>
<entry>
<id>epub-book</id>
<title>Allowed EPUB</title>
<link rel=\"http://opds-spec.org/acquisition\"
href=\"books/sample.epub\" />
</entry>
<entry>
<id>nav-author</id>
<title>Authors (A)</title>
<link rel=\"http://opds-spec.org/subsection\"
href=\"/opds/authors/a\"
type=\"application/atom+xml;profile=opds-catalog\" />
</entry>
</feed>
"""
feed = client._parse_feed(xml_payload, base_url="http://example.com/catalog")
identifiers = {entry.id for entry in feed.entries}
assert identifiers == {"pdf-book", "epub-book", "nav-author"}
for entry in feed.entries:
if entry.id.startswith("nav-"):
assert entry.download is None
assert entry.links, "Expected navigation entry to preserve links"
else:
assert entry.download is not None
assert entry.download.href.endswith((".pdf", ".epub"))
def test_calibre_opds_navigation_entries_without_download_are_preserved() -> None:
client = CalibreOPDSClient("http://example.com/catalog")
xml_payload = """<?xml version=\"1.0\" encoding=\"UTF-8\"?>
<feed xmlns=\"http://www.w3.org/2005/Atom\">
<id>catalog</id>
<title>Example Catalog</title>
<entry>
<id>nav-series</id>
<title>Series</title>
<link rel=\"http://opds-spec.org/subsection\"
href=\"/opds/series\"
type=\"application/atom+xml;profile=opds-catalog\" />
</entry>
</feed>
"""
feed = client._parse_feed(xml_payload, base_url="http://example.com/catalog")
assert [entry.id for entry in feed.entries] == ["nav-series"]
entry = feed.entries[0]
assert entry.download is None
assert any(link.href.endswith("/opds/series") for link in entry.links)
def test_calibre_opds_search_filters_by_title_and_author() -> None:
client = CalibreOPDSClient("http://example.com/catalog")
feed = OPDSFeed(
id="catalog",
title="Catalog",
entries=[
OPDSEntry(id="1", title="The Long Journey", authors=["Alice Smith"]),
OPDSEntry(id="2", title="Hidden Worlds", authors=["Bob Johnson"]),
OPDSEntry(
id="3",
title="Side Stories",
authors=["Cara Nguyen"],
series="Journey Tales",
),
],
)
filtered = client._filter_feed_entries(feed, "journey alice")
assert [entry.id for entry in filtered.entries] == ["1"]
filtered = client._filter_feed_entries(feed, "bob")
assert [entry.id for entry in filtered.entries] == ["2"]
filtered = client._filter_feed_entries(feed, "journey tales")
assert [entry.id for entry in filtered.entries] == ["3"]
filtered = client._filter_feed_entries(feed, "missing")
assert filtered.entries == []
def test_calibre_opds_local_search_follows_next(monkeypatch) -> None:
client = CalibreOPDSClient("http://example.com/catalog")
page_one = OPDSFeed(
id="catalog",
title="Catalog",
entries=[OPDSEntry(id="1", title="Unrelated", authors=["Alice Smith"])],
links={"next": OPDSLink(href="http://example.com/catalog?page=2", rel="next")},
)
page_two = OPDSFeed(
id="catalog",
title="Catalog",
entries=[
OPDSEntry(id="2", title="The Journey Continues", authors=["Bob Johnson"])
],
links={},
)
def fake_fetch(href=None, params=None):
if href == "http://example.com/catalog?page=2":
return page_two
return page_one
monkeypatch.setattr(client, "fetch_feed", fake_fetch)
result = client._local_search("journey", seed_feed=page_one)
assert [entry.id for entry in result.entries] == ["2"]
def test_calibre_opds_local_search_traverses_navigation(monkeypatch) -> None:
client = CalibreOPDSClient("http://example.com/catalog")
root_feed = OPDSFeed(
id="catalog",
title="Catalog",
entries=[
OPDSEntry(
id="nav-authors",
title="Browse Authors",
links=[
OPDSLink(
href="http://example.com/catalog/authors",
rel="http://opds-spec.org/navigation",
type="application/atom+xml;profile=opds-catalog",
)
],
)
],
links={},
)
authors_feed = OPDSFeed(
id="authors",
title="Authors",
entries=[
OPDSEntry(
id="book-42",
title="The Count of Monte Cristo",
authors=["Alexandre Dumas"],
)
],
links={},
)
def fake_fetch(href=None, params=None):
if href == "http://example.com/catalog/authors":
return authors_feed
return root_feed
monkeypatch.setattr(client, "fetch_feed", fake_fetch)
result = client._local_search("monte cristo", seed_feed=root_feed)
assert [entry.id for entry in result.entries] == ["book-42"]
def test_calibre_opds_search_falls_back_to_local_search(monkeypatch) -> None:
client = CalibreOPDSClient("http://example.com/catalog")
search_page = OPDSFeed(
id="catalog",
title="Catalog",
entries=[OPDSEntry(id="1", title="Unrelated", authors=["Alice Smith"])],
links={"next": OPDSLink(href="http://example.com/catalog?page=2", rel="next")},
)
next_page = OPDSFeed(
id="catalog",
title="Catalog",
entries=[OPDSEntry(id="2", title="Journey in Space", authors=["Cara Nguyen"])],
links={},
)
def fake_fetch(path=None, params=None):
if path == "search":
return search_page
if path == "http://example.com/catalog?page=2":
return next_page
return search_page
monkeypatch.setattr(client, "fetch_feed", fake_fetch)
result = client.search("journey")
assert [entry.id for entry in result.entries] == ["2"]
def test_calibre_opds_search_collects_next_page_results(monkeypatch) -> None:
client = CalibreOPDSClient("http://example.com/catalog")
first_page = OPDSFeed(
id="catalog",
title="Catalog",
entries=[OPDSEntry(id="1", title="Ryan's Adventure")],
links={"next": OPDSLink(href="http://example.com/catalog?page=2", rel="next")},
)
second_page = OPDSFeed(
id="catalog",
title="Catalog",
entries=[OPDSEntry(id="2", title="Return of Ryan")],
links={},
)
def fake_fetch(path=None, params=None):
if path == "search":
return first_page
if path == "http://example.com/catalog?page=2":
return second_page
if path is None and params is None:
return first_page
return first_page
monkeypatch.setattr(client, "fetch_feed", fake_fetch)
result = client.search("ryan")
assert [entry.id for entry in result.entries] == ["1", "2"]
def test_calibre_opds_search_supplements_with_local_navigation(monkeypatch) -> None:
client = CalibreOPDSClient("http://example.com/catalog")
search_feed = OPDSFeed(
id="catalog",
title="Catalog",
entries=[
OPDSEntry(id="book-1", title="Ryan's First Mission"),
OPDSEntry(
id="nav-authors",
title="Browse Authors",
links=[
OPDSLink(
href="http://example.com/catalog/authors",
rel="http://opds-spec.org/navigation",
type="application/atom+xml;profile=opds-catalog",
)
],
),
],
links={},
)
authors_feed = OPDSFeed(
id="authors",
title="Authors",
entries=[OPDSEntry(id="book-2", title="Chronicles of Ryan")],
links={},
)
def fake_fetch(path=None, params=None):
if path == "search":
return search_feed
if path == "http://example.com/catalog/authors":
return authors_feed
if path is None and params is None:
return search_feed
return search_feed
monkeypatch.setattr(client, "fetch_feed", fake_fetch)
result = client.search("ryan")
assert [entry.id for entry in result.entries] == ["book-1", "book-2"]
def test_calibre_opds_browse_letter_traverses_next(monkeypatch) -> None:
client = CalibreOPDSClient("http://example.com/catalog")
root_feed = OPDSFeed(
id="catalog",
title="Browse Authors",
entries=[
OPDSEntry(
id="nav-a",
title="A",
links=[
OPDSLink(
href="http://example.com/catalog/authors/a",
rel="http://opds-spec.org/navigation",
type="application/atom+xml;profile=opds-catalog",
)
],
)
],
links={"next": OPDSLink(href="http://example.com/catalog?page=2", rel="next")},
)
page_two = OPDSFeed(
id="catalog",
title="Browse Authors",
entries=[
OPDSEntry(
id="nav-c",
title="C",
links=[
OPDSLink(
href="http://example.com/catalog/authors/c",
rel="http://opds-spec.org/navigation",
type="application/atom+xml;profile=opds-catalog",
)
],
)
],
links={},
)
letter_feed = OPDSFeed(
id="authors-c",
title="Authors starting with C",
entries=[OPDSEntry(id="author-1", title="Clarke, Arthur C.")],
links={},
)
def fake_fetch(href=None, params=None):
if not href:
return root_feed
if href == "http://example.com/catalog?page=2":
return page_two
if href == "http://example.com/catalog/authors/c":
return letter_feed
return root_feed
monkeypatch.setattr(client, "fetch_feed", fake_fetch)
result = client.browse_letter("C")
assert [entry.id for entry in result.entries] == ["author-1"]
def test_calibre_opds_browse_letter_filters_when_missing_navigation(
monkeypatch,
) -> None:
client = CalibreOPDSClient("http://example.com/catalog")
titles_feed = OPDSFeed(
id="catalog",
title="Browse Titles",
entries=[
OPDSEntry(id="book-1", title="The Moon is a Harsh Mistress"),
OPDSEntry(id="book-2", title="Another Story"),
],
links={},
)
def fake_fetch(href=None, params=None):
return titles_feed
monkeypatch.setattr(client, "fetch_feed", fake_fetch)
result = client.browse_letter("M")
assert [entry.id for entry in result.entries] == ["book-1"]
def test_calibre_opds_browse_letter_collects_paginated_entries(monkeypatch) -> None:
client = CalibreOPDSClient("http://example.com/catalog")
first_page = OPDSFeed(
id="catalog",
title="Browse Titles",
entries=[
OPDSEntry(id="book-1", title="Ryan's First Adventure"),
OPDSEntry(id="book-2", title="Another Tale"),
],
links={"next": OPDSLink(href="http://example.com/catalog?page=2", rel="next")},
)
second_page = OPDSFeed(
id="catalog",
title="Browse Titles",
entries=[OPDSEntry(id="book-3", title="Return of Ryan")],
links={},
)
def fake_fetch(href=None, params=None):
if not href:
return first_page
if href == "http://example.com/catalog?page=2":
return second_page
return first_page
monkeypatch.setattr(client, "fetch_feed", fake_fetch)
result = client.browse_letter("R")
assert [entry.id for entry in result.entries] == ["book-1", "book-3"]
def test_calibre_opds_browse_letter_collects_paginated_navigation(monkeypatch) -> None:
client = CalibreOPDSClient("http://example.com/catalog")
root_feed = OPDSFeed(
id="catalog",
title="Browse Authors",
entries=[
OPDSEntry(
id="nav-a",
title="A",
links=[
OPDSLink(
href="http://example.com/catalog/authors/a",
rel="http://opds-spec.org/navigation",
type="application/atom+xml;profile=opds-catalog",
)
],
),
OPDSEntry(
id="nav-r",
title="R",
links=[
OPDSLink(
href="http://example.com/catalog/authors/r",
rel="http://opds-spec.org/navigation",
type="application/atom+xml;profile=opds-catalog",
)
],
),
],
links={},
)
letter_feed = OPDSFeed(
id="authors-r",
title="Authors — R",
entries=[
OPDSEntry(id="author-1", title="Ryan, Alice"),
],
links={
"next": OPDSLink(
href="http://example.com/catalog/authors/r?page=2", rel="next"
)
},
)
letter_page_two = OPDSFeed(
id="authors-r",
title="Authors — R",
entries=[OPDSEntry(id="author-2", title="Ryan, Bob")],
links={},
)
def fake_fetch(href=None, params=None):
if not href:
return root_feed
if href == "http://example.com/catalog/authors/r":
return letter_feed
if href == "http://example.com/catalog/authors/r?page=2":
return letter_page_two
return root_feed
monkeypatch.setattr(client, "fetch_feed", fake_fetch)
result = client.browse_letter("R")
assert [entry.id for entry in result.entries] == ["author-1", "author-2"]
from __future__ import annotations
import sys
import types
def _install_dependency_stubs() -> None:
if "ebooklib" not in sys.modules:
ebooklib_stub = types.ModuleType("ebooklib")
epub_stub = types.ModuleType("ebooklib.epub")
setattr(ebooklib_stub, "epub", epub_stub)
sys.modules["ebooklib"] = ebooklib_stub
sys.modules["ebooklib.epub"] = epub_stub
if "dotenv" not in sys.modules:
dotenv_stub = types.ModuleType("dotenv")
def _noop(*_, **__):
return None
setattr(dotenv_stub, "load_dotenv", _noop)
setattr(dotenv_stub, "find_dotenv", lambda *_, **__: "")
sys.modules["dotenv"] = dotenv_stub
if "numpy" not in sys.modules:
numpy_stub = types.ModuleType("numpy")
class _DummyArray(list):
pass
def _zeros(shape, dtype=None):
size = 1
if isinstance(shape, int):
size = shape
elif shape:
size = 1
for dimension in shape:
size *= int(dimension)
return [0.0] * size
setattr(numpy_stub, "ndarray", _DummyArray)
setattr(numpy_stub, "zeros", _zeros)
setattr(numpy_stub, "float32", "float32")
setattr(numpy_stub, "array", lambda data, dtype=None: data)
setattr(numpy_stub, "asarray", lambda data, dtype=None: data)
setattr(
numpy_stub,
"concatenate",
lambda seq, axis=0: sum((list(item) for item in seq), []),
)
sys.modules["numpy"] = numpy_stub
if "soundfile" not in sys.modules:
soundfile_stub = types.ModuleType("soundfile")
class _DummySoundFile:
def __init__(self, *_, **__):
pass
def write(self, *_args, **_kwargs):
return None
def close(self):
return None
setattr(soundfile_stub, "SoundFile", _DummySoundFile)
setattr(soundfile_stub, "write", lambda *_args, **_kwargs: None)
sys.modules["soundfile"] = soundfile_stub
if "fitz" not in sys.modules:
sys.modules["fitz"] = types.ModuleType("fitz")
if "markdown" not in sys.modules:
markdown_stub = types.ModuleType("markdown")
class _DummyMarkdown:
def __init__(self, *_, **__):
pass
def convert(self, text: str) -> str:
return text
setattr(markdown_stub, "Markdown", _DummyMarkdown)
sys.modules["markdown"] = markdown_stub
if "bs4" not in sys.modules:
bs4_stub = types.ModuleType("bs4")
class _DummySoup:
def __init__(self, *_, **__):
pass
def select(self, *_, **__):
return []
def find_all(self, *_, **__):
return []
setattr(bs4_stub, "BeautifulSoup", _DummySoup)
setattr(bs4_stub, "NavigableString", str)
sys.modules["bs4"] = bs4_stub
_install_dependency_stubs()
from abogen.text_extractor import ExtractedChapter
from abogen.webui.conversion_runner import _apply_chapter_overrides, _merge_metadata
def _sample_chapters() -> list[ExtractedChapter]:
return [
ExtractedChapter(title="Chapter 1", text="Original one"),
ExtractedChapter(title="Chapter 2", text="Original two"),
ExtractedChapter(title="Chapter 3", text="Original three"),
]
def test_apply_chapter_overrides_with_custom_text() -> None:
overrides = [
{"index": 0, "enabled": True, "title": "Intro", "text": "Hello world"},
{"index": 1, "enabled": False},
]
selected, metadata, diagnostics = _apply_chapter_overrides(
_sample_chapters(), overrides
)
assert len(selected) == 1
assert selected[0].title == "Intro"
assert selected[0].text == "Hello world"
assert overrides[0]["characters"] == len("Hello world")
assert metadata == {}
assert diagnostics == []
def test_apply_chapter_overrides_uses_original_content_when_text_missing() -> None:
overrides = [
{"index": 1, "enabled": True},
]
selected, metadata, diagnostics = _apply_chapter_overrides(
_sample_chapters(), overrides
)
assert len(selected) == 1
assert selected[0].title == "Chapter 2"
assert selected[0].text == "Original two"
assert overrides[0]["text"] == "Original two"
assert overrides[0]["characters"] == len("Original two")
assert metadata == {}
assert diagnostics == []
def test_apply_chapter_overrides_collects_metadata_updates() -> None:
overrides = [
{
"index": 2,
"enabled": True,
"metadata": {"artist": "Test Author", "year": 2024},
}
]
selected, metadata, diagnostics = _apply_chapter_overrides(
_sample_chapters(), overrides
)
assert len(selected) == 1
assert metadata == {"artist": "Test Author", "year": "2024"}
assert diagnostics == []
def test_apply_chapter_overrides_reports_diagnostics_for_invalid_payload() -> None:
overrides = [
{"enabled": True, "title": "Missing"},
]
selected, metadata, diagnostics = _apply_chapter_overrides(
_sample_chapters(), overrides
)
assert selected == []
assert metadata == {}
assert diagnostics and "Skipped chapter override" in diagnostics[0]
def test_merge_metadata_prefers_overrides_and_drops_none_values() -> None:
extracted = {"title": "Original", "artist": "Someone"}
overrides = {"artist": "Another", "genre": "Fiction", "year": None}
merged = _merge_metadata(extracted, overrides)
assert merged["title"] == "Original"
assert merged["artist"] == "Another"
assert merged["genre"] == "Fiction"
assert "year" not in merged
from __future__ import annotations
from types import SimpleNamespace
from abogen.chunking import chunk_text
from abogen.webui.conversion_runner import _chunk_voice_spec, _group_chunks_by_chapter
def test_group_chunks_by_chapter_orders_and_groups() -> None:
chunks = [
{"chapter_index": "0", "chunk_index": "5", "text": "tail"},
{"chapter_index": 0, "chunk_index": 1, "text": "body"},
{"chapter_index": 1, "chunk_index": 0, "text": "next"},
]
grouped = _group_chunks_by_chapter(chunks)
assert [entry["text"] for entry in grouped[0]] == ["body", "tail"]
assert grouped[1][0]["text"] == "next"
def test_chunk_voice_spec_prefers_chunk_overrides() -> None:
job = SimpleNamespace(voice="base_voice", speakers={})
chunk = {"voice": "override_voice", "speaker_id": "narrator"}
assert _chunk_voice_spec(job, chunk, "fallback") == "override_voice"
def test_chunk_voice_spec_falls_back_to_speaker_voice() -> None:
job = SimpleNamespace(
voice="base_voice", speakers={"narrator": {"voice": "speaker_voice"}}
)
chunk = {"speaker_id": "narrator"}
assert _chunk_voice_spec(job, chunk, "fallback") == "speaker_voice"
def test_chunk_voice_spec_uses_fallback_when_no_overrides() -> None:
job = SimpleNamespace(voice="base_voice", speakers={})
chunk = {"speaker_id": "unknown"}
assert _chunk_voice_spec(job, chunk, "fallback") == "fallback"
def test_chunk_text_merges_title_abbreviations() -> None:
text = "Dr. Watson met Mr. Holmes at 5 p.m."
chunks = chunk_text(
chapter_index=0,
chapter_title="Chapter 1",
text=text,
level="sentence",
)
assert len(chunks) == 1
chunk = chunks[0]
text_value = str(chunk["text"])
normalized_value = str(chunk.get("normalized_text") or "")
assert normalized_value
assert text_value.startswith("Dr.")
assert "Doctor" in normalized_value
display_value = str(chunk.get("display_text") or "")
assert display_value.startswith("Dr.")
original_value = str(chunk.get("original_text") or "")
assert original_value.startswith("Dr.")
def test_chunk_text_display_preserves_whitespace() -> None:
text = "Line one with double spaces.\nSecond line\n\nThird paragraph."
chunks = chunk_text(
chapter_index=0,
chapter_title="Chapter 1",
text=text,
level="paragraph",
)
assert len(chunks) == 2
first_display = str(chunks[0].get("display_text") or "")
assert " with " in first_display
assert first_display.endswith("\n\n")
second_display = str(chunks[1].get("display_text") or "")
assert second_display == "Third paragraph."
first_original = str(chunks[0].get("original_text") or "")
assert first_original.endswith("\n\n")
from abogen.webui.conversion_runner import _chunk_text_for_tts
def test_chunk_text_for_tts_prefers_text_over_normalized_text():
entry = {
# Simulate a pre-normalized chunk that lost the asterisk.
"normalized_text": "Unfuk",
# Raw chunk should preserve censored token for manual overrides.
"text": "Unfu*k",
}
assert _chunk_text_for_tts(entry) == "Unfu*k"
def test_chunk_text_for_tts_falls_back_to_original_text_then_normalized_text():
entry = {
"original_text": "Hello * world",
"normalized_text": "Hello world",
}
assert _chunk_text_for_tts(entry) == "Hello * world"
entry2 = {
"normalized_text": "Only normalized",
}
assert _chunk_text_for_tts(entry2) == "Only normalized"
import sys
import types
if "soundfile" not in sys.modules:
soundfile_stub = types.ModuleType("soundfile")
class _SoundFileStub: # pragma: no cover - placeholder to satisfy imports
def __init__(self, *args: object, **kwargs: object) -> None:
raise RuntimeError("soundfile is not installed in the test environment")
soundfile_stub.SoundFile = _SoundFileStub # type: ignore[attr-defined]
sys.modules["soundfile"] = soundfile_stub
if "static_ffmpeg" not in sys.modules:
sys.modules["static_ffmpeg"] = types.ModuleType("static_ffmpeg")
if "ebooklib" not in sys.modules:
ebooklib_stub = types.ModuleType("ebooklib")
ebooklib_epub_stub = types.ModuleType("ebooklib.epub")
ebooklib_stub.epub = ebooklib_epub_stub # type: ignore[attr-defined]
sys.modules["ebooklib"] = ebooklib_stub
sys.modules["ebooklib.epub"] = ebooklib_epub_stub
if "fitz" not in sys.modules:
sys.modules["fitz"] = types.ModuleType("fitz")
if "markdown" not in sys.modules:
markdown_stub = types.ModuleType("markdown")
class _MarkdownStub:
def __init__(self, *args: object, **kwargs: object) -> None:
self.toc_tokens = []
def convert(self, text: str) -> str:
return text
markdown_stub.Markdown = _MarkdownStub # type: ignore[attr-defined]
sys.modules["markdown"] = markdown_stub
if "bs4" not in sys.modules:
bs4_stub = types.ModuleType("bs4")
class _BeautifulSoupStub:
def __init__(self, *args: object, **kwargs: object) -> None:
self._text = ""
def find(self, *args: object, **kwargs: object) -> None:
return None
def get_text(self) -> str:
return self._text
def decompose(self) -> None: # pragma: no cover - compatibility shim
return None
class _NavigableStringStub(str):
pass
bs4_stub.BeautifulSoup = _BeautifulSoupStub # type: ignore[attr-defined]
bs4_stub.NavigableString = _NavigableStringStub # type: ignore[attr-defined]
sys.modules["bs4"] = bs4_stub
from abogen.webui.conversion_runner import (
_format_spoken_chapter_title,
_headings_equivalent,
_normalize_chapter_opening_caps,
_strip_duplicate_heading_line,
)
def test_format_spoken_chapter_title_adds_prefix() -> None:
assert _format_spoken_chapter_title("1: A Tale", 1, True) == "Chapter 1. A Tale"
def test_format_spoken_chapter_title_respects_existing_prefix() -> None:
assert (
_format_spoken_chapter_title("Chapter 2: Story", 2, True) == "Chapter 2: Story"
)
def test_format_spoken_chapter_title_handles_empty_title() -> None:
assert _format_spoken_chapter_title("", 4, True) == "Chapter 4"
def test_format_spoken_chapter_title_trims_delimiters() -> None:
assert (
_format_spoken_chapter_title("7 - Into the Wild", 7, True)
== "Chapter 7. Into the Wild"
)
def test_headings_equivalent_ignores_case_and_prefix() -> None:
assert _headings_equivalent("1: The House", "Chapter 1: The House")
def test_strip_duplicate_heading_line_removes_first_match() -> None:
text, removed = _strip_duplicate_heading_line(
"Chapter 3: Intro\nBody text", "Chapter 3: Intro"
)
assert removed is True
assert text.strip() == "Body text"
def test_normalize_chapter_opening_caps_basic_title() -> None:
normalized, changed = _normalize_chapter_opening_caps("ALL CAPS TITLE")
assert normalized == "All Caps Title"
assert changed is True
def test_normalize_chapter_opening_caps_respects_acronyms() -> None:
normalized, changed = _normalize_chapter_opening_caps("NASA MISSION LOG")
assert normalized == "NASA Mission Log"
assert changed is True
def test_normalize_chapter_opening_caps_handles_roman_numerals() -> None:
normalized, changed = _normalize_chapter_opening_caps("IV. THE RETURN")
assert normalized == "IV. The Return"
assert changed is True
def test_normalize_chapter_opening_caps_keeps_mixed_case() -> None:
normalized, changed = _normalize_chapter_opening_caps("Already Mixed Case")
assert normalized == "Already Mixed Case"
assert changed is False
import sys
import types
if "soundfile" not in sys.modules:
soundfile_stub = types.ModuleType("soundfile")
class _SoundFileStub: # pragma: no cover - placeholder to satisfy imports
def __init__(self, *args: object, **kwargs: object) -> None:
raise RuntimeError("soundfile is not installed in the test environment")
soundfile_stub.SoundFile = _SoundFileStub # type: ignore[attr-defined]
sys.modules["soundfile"] = soundfile_stub
if "static_ffmpeg" not in sys.modules:
sys.modules["static_ffmpeg"] = types.ModuleType("static_ffmpeg")
if "ebooklib" not in sys.modules:
ebooklib_stub = types.ModuleType("ebooklib")
ebooklib_epub_stub = types.ModuleType("ebooklib.epub")
ebooklib_stub.epub = ebooklib_epub_stub # type: ignore[attr-defined]
sys.modules["ebooklib"] = ebooklib_stub
sys.modules["ebooklib.epub"] = ebooklib_epub_stub
if "fitz" not in sys.modules:
sys.modules["fitz"] = types.ModuleType("fitz")
if "markdown" not in sys.modules:
markdown_stub = types.ModuleType("markdown")
class _MarkdownStub:
def __init__(self, *args: object, **kwargs: object) -> None:
self.toc_tokens = []
def convert(self, text: str) -> str:
return text
markdown_stub.Markdown = _MarkdownStub # type: ignore[attr-defined]
sys.modules["markdown"] = markdown_stub
if "bs4" not in sys.modules:
bs4_stub = types.ModuleType("bs4")
class _BeautifulSoupStub:
def __init__(self, *args: object, **kwargs: object) -> None:
self._text = ""
def find(self, *args: object, **kwargs: object) -> None:
return None
def get_text(self) -> str:
return self._text
def decompose(self) -> None: # pragma: no cover - compatibility shim
return None
class _NavigableStringStub(str):
pass
bs4_stub.BeautifulSoup = _BeautifulSoupStub # type: ignore[attr-defined]
bs4_stub.NavigableString = _NavigableStringStub # type: ignore[attr-defined]
sys.modules["bs4"] = bs4_stub
from abogen.webui.conversion_runner import _build_outro_text, _build_title_intro_text
def test_title_intro_includes_series_sentence() -> None:
metadata = {
"title": "Galactic Chronicles",
"author": "Jane Doe",
"series": "Chronicles",
"series_index": "2",
}
intro_text = _build_title_intro_text(metadata, "chronicles.mp3")
assert intro_text.startswith("Book 2 of the Chronicles.")
assert "Galactic Chronicles." in intro_text
assert "By Jane Doe." in intro_text
def test_series_sentence_skips_duplicate_article() -> None:
metadata = {
"title": "Iron Council",
"authors": "China Miéville",
"series": "The Bas-Lag",
"series_index": "3",
}
intro_text = _build_title_intro_text(metadata, "iron_council.mp3")
assert "Book 3 of The Bas-Lag." in intro_text
assert "of the The" not in intro_text
def test_outro_appends_series_information() -> None:
metadata = {
"title": "Abaddon's Gate",
"authors": "James S. A. Corey",
"series": "The Expanse",
"series_index": "3",
}
outro_text = _build_outro_text(metadata, "abaddon.mp3")
assert outro_text.startswith("The end of Abaddon's Gate from James S. A. Corey.")
assert outro_text.endswith("Book 3 of The Expanse.")
def test_series_number_preserves_decimal_positions() -> None:
metadata = {
"title": "Interlude",
"author": "Alex Writer",
"series": "Chronicles",
"series_index": "2.5",
}
intro_text = _build_title_intro_text(metadata, "interlude.mp3")
assert "Book 2.5 of the Chronicles." in intro_text
from types import SimpleNamespace
from typing import cast
from abogen.constants import VOICES_INTERNAL
from abogen.webui.conversion_runner import (
_chapter_voice_spec,
_chunk_voice_spec,
_collect_required_voice_ids,
)
from abogen.webui.service import Job
def _sample_job(formula: str) -> Job:
return cast(
Job,
SimpleNamespace(
voice="__custom_mix",
speakers={
"narrator": {
"resolved_voice": formula,
}
},
chapters=[],
chunks=[{}],
),
)
def test_chapter_voice_spec_uses_resolved_formula():
formula = "af_nova*0.7+am_liam*0.3"
job = _sample_job(formula)
assert _chapter_voice_spec(job, None) == formula
def test_chunk_voice_fallback_uses_resolved_formula():
formula = "af_nova*0.7+am_liam*0.3"
job = _sample_job(formula)
result = _chunk_voice_spec(job, {}, "")
assert result == formula
def test_voice_collection_includes_formula_components():
formula = "af_nova*0.7+am_liam*0.3"
job = _sample_job(formula)
voices = _collect_required_voice_ids(job)
assert {"af_nova", "am_liam"}.issubset(voices)
assert voices.issuperset(VOICES_INTERNAL)
import pytest
from abogen.kokoro_text_normalization import (
_normalize_grouped_numbers,
ApostropheConfig,
)
@pytest.fixture
def cfg():
return ApostropheConfig(convert_numbers=True, year_pronunciation_mode="american")
def normalize(text, config):
return _normalize_grouped_numbers(text, config)
class TestDateNormalization:
def test_standard_years(self, cfg):
# 1990 -> nineteen hundred ninety
assert "nineteen hundred ninety" in normalize("In 1990, the web was born.", cfg)
# 1066 -> ten sixty-six
assert "ten sixty-six" in normalize("The battle was in 1066.", cfg)
# 2023 -> twenty twenty-three
assert "twenty twenty-three" in normalize("It is currently 2023.", cfg)
# 1905 -> nineteen hundred oh five
assert "nineteen hundred oh five" in normalize(
"In 1905, Einstein published.", cfg
)
def test_future_years(self, cfg):
# 3400 -> thirty-four hundred
assert "thirty-four hundred" in normalize("In the year 3400, we fly.", cfg)
# 2500 -> twenty-five hundred
assert "twenty-five hundred" in normalize("The year 2500 is far off.", cfg)
def test_years_with_markers(self, cfg):
# 1021 BC -> ten twenty-one
assert "ten twenty-one" in normalize("It happened in 1021 BC.", cfg)
# 4000 BCE -> forty hundred (or four thousand?)
# _format_year_like logic:
# if value % 1000 == 0: return "X thousand"
# 4000 -> four thousand.
# Let's check 4001 -> forty oh one
assert "forty oh one" in normalize("Ancient times 4001 BCE.", cfg)
def test_addresses_explicit(self, cfg):
# "address" keyword present -> should NOT be year
# 1925 -> one thousand nine hundred twenty-five (default num2words)
# or "one nine two five" if num2words isn't doing year stuff.
# num2words(1925) -> "one thousand, nine hundred and twenty-five"
res = normalize("My address is 1925 Main St.", cfg)
assert "nineteen twenty-five" not in res
assert "one thousand" in res or "nineteen hundred" in res
res = normalize("Please send it to the address: 3400 North Blvd.", cfg)
assert "thirty-four hundred" not in res # Should not be year style
assert "three thousand" in res or "thirty-four hundred" in res
# Wait, "thirty-four hundred" IS how you say 3400 in num2words sometimes?
# num2words(3400) -> "three thousand, four hundred" usually.
# Let's verify what "thirty-four hundred" implies.
# If it's a year: "thirty-four hundred".
# If it's a number: "three thousand four hundred".
assert "three thousand" in res
def test_address_with_year_marker_edge_case(self, cfg):
# "address" is present, BUT "BC" is also present. Should be year.
res = normalize("The address was found in 1021 BC ruins.", cfg)
assert "ten twenty-one" in res
def test_ambiguous_numbers(self, cfg):
# Just a number, no "address", no markers. Should default to year if 4 digits 1000-9999
assert "nineteen hundred fifty" in normalize("I have 1950 apples.", cfg)
# This is a known limitation/feature: it aggressively identifies years.
def test_specific_user_examples(self, cfg):
# 1021
assert "ten twenty-one" in normalize("1021", cfg)
# 1925
assert "nineteen hundred" in normalize("1925", cfg)
# 3400
assert "thirty-four hundred" in normalize("3400", cfg)
def test_martin_ford_jobless_future_context(self, cfg):
# Simulating a title or sentence from the book
# "The Rise of the Robots: Technology and the Threat of a Jobless Future"
# Maybe it mentions a year like 2015 (pub date) or a future date.
# "In 2015, Martin Ford wrote..."
assert "twenty fifteen" in normalize("In 2015, Martin Ford wrote...", cfg)
# "By 2100, robots will..."
assert "twenty-one hundred" in normalize("By 2100, robots will...", cfg)
def test_address_context_window(self, cfg):
# "address" is far away (> 60 chars). Should be year.
padding = "x" * 70
text = f"address {padding} 1999"
assert "nineteen hundred ninety-nine" in normalize(text, cfg)
# "address" is close (< 60 chars). Should be number.
padding = "x" * 10
text = f"address {padding} 1999"
res = normalize(text, cfg)
assert "nineteen hundred ninety-nine" not in res
assert "one thousand" in res
def test_2000s(self, cfg):
# 2000-2009 are usually "two thousand X"
assert "two thousand one" in normalize("2001", cfg)
assert "two thousand nine" in normalize("2009", cfg)
# 2010 -> twenty ten
assert "twenty ten" in normalize("2010", cfg)
def test_addresses_plural(self, cfg):
# "addresses" plural -> should also trigger non-year mode?
# Currently the code only looks for "address".
# "The addresses are 1925 and 1926."
# If it fails to detect "addresses", it will say "nineteen twenty-five".
# If we want it to be "one thousand...", we need to update the regex.
res = normalize("The addresses are 1925 and 1926.", cfg)
# Expectation: should probably be numbers, not years.
assert "nineteen twenty-five" not in res
import json
from pathlib import Path
import numpy as np
import pytest
from abogen.debug_tts_samples import (
DEBUG_TTS_SAMPLES,
MARKER_PREFIX,
MARKER_SUFFIX,
iter_expected_codes,
)
from abogen.kokoro_text_normalization import HAS_NUM2WORDS, normalize_for_pipeline
from abogen.normalization_settings import build_apostrophe_config
from abogen.text_extractor import extract_from_path
from abogen.webui.app import create_app
def test_debug_epub_contains_all_codes():
epub_path = Path("tests/fixtures/abogen_debug_tts_samples.epub")
assert epub_path.exists()
extraction = extract_from_path(epub_path)
combined = extraction.combined_text or "\n\n".join(
(c.text or "") for c in extraction.chapters
)
for code in iter_expected_codes():
marker = f"{MARKER_PREFIX}{code}{MARKER_SUFFIX}"
assert marker in combined
def test_debug_samples_normalize_smoke():
# Use the same defaults as the web UI.
from abogen.webui.routes.utils.settings import settings_defaults
settings = settings_defaults()
apostrophe = build_apostrophe_config(settings=settings)
runtime = dict(settings)
normalized = {
sample.code: normalize_for_pipeline(
sample.text, config=apostrophe, settings=runtime
)
for sample in DEBUG_TTS_SAMPLES
}
# Contractions should expand under defaults.
assert "it is" in normalized["APOS_001"].lower()
# Titles should expand.
assert "doctor" in normalized["TITLE_001"].lower()
# Footnotes should be removed.
assert "[1]" not in normalized["FOOT_001"]
# Terminal punctuation should be added.
assert normalized["PUNC_001"].strip()[-1] in {".", "!", "?"}
if HAS_NUM2WORDS:
# Currency and numbers should expand to words when num2words is available.
assert "dollar" in normalized["CUR_001"].lower()
assert "thousand" in normalized["NUM_001"].lower()
def test_settings_debug_route_writes_manifest(tmp_path, monkeypatch):
# Avoid pulling Kokoro models in tests: stub the pipeline.
from abogen.webui import debug_tts_runner as runner
class _Seg:
def __init__(self, audio):
self.audio = audio
class DummyPipeline:
def __call__(self, text, **kwargs):
# 100ms of audio per call, deterministic.
audio = np.zeros(int(0.1 * runner.SAMPLE_RATE), dtype="float32")
audio[::100] = 0.1
yield _Seg(audio)
monkeypatch.setattr(
runner, "_load_pipeline", lambda language, use_gpu: DummyPipeline()
)
app = create_app(
{
"TESTING": True,
"SECRET_KEY": "test",
"OUTPUT_FOLDER": str(tmp_path),
"UPLOAD_FOLDER": str(tmp_path / "uploads"),
}
)
with app.test_client() as client:
resp = client.post("/settings/debug/run")
assert resp.status_code in {302, 303}
location = resp.headers.get("Location", "")
assert "/settings/debug/" in location
# Extract run id from /settings/debug/<run_id>
run_id = (
location.rsplit("/settings/debug/", 1)[1].split("?", 1)[0].split("#", 1)[0]
)
manifest_path = tmp_path / "debug" / run_id / "manifest.json"
assert manifest_path.exists()
manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
filenames = {item["filename"] for item in manifest.get("artifacts", [])}
assert "overall.wav" in filenames
assert any(
name.startswith("case_") and name.endswith(".wav") for name in filenames
)
def test_debug_samples_have_minimum_per_category():
prefixes = {
"APOS": 5,
"POS": 5,
"NUM": 5,
"YEAR": 5,
"DATE": 5,
"CUR": 5,
"TITLE": 5,
"PUNC": 5,
"QUOTE": 5,
"FOOT": 5,
}
counts = {prefix: 0 for prefix in prefixes}
for sample in DEBUG_TTS_SAMPLES:
prefix = sample.code.split("_", 1)[0]
if prefix in counts:
counts[prefix] += 1
for prefix, minimum in prefixes.items():
assert counts[prefix] >= minimum
def test_debug_runner_resolves_profile_voice_before_pipeline(tmp_path, monkeypatch):
from abogen.webui import debug_tts_runner as runner
# Stub voice setting resolution so we don't depend on the user's profile file.
monkeypatch.setattr(
runner, "_resolve_voice_setting", lambda value: ("af_heart", "AM HQ Alt", None)
)
calls = []
class _Seg:
def __init__(self, audio):
self.audio = audio
class DummyPipeline:
def __call__(self, text, **kwargs):
calls.append(kwargs.get("voice"))
audio = np.zeros(int(0.05 * runner.SAMPLE_RATE), dtype="float32")
yield _Seg(audio)
monkeypatch.setattr(
runner, "_load_pipeline", lambda language, use_gpu: DummyPipeline()
)
settings = {
"language": "en",
"default_voice": "profile:AM HQ Alt",
"use_gpu": False,
"default_speed": 1.0,
}
manifest = runner.run_debug_tts_wavs(output_root=tmp_path, settings=settings)
assert manifest.get("run_id")
assert calls
# Must not pass through the profile:* string.
assert all(
isinstance(v, str) and not v.lower().startswith("profile:") for v in calls
)
import unittest
import os
import shutil
import sys
from ebooklib import epub
# Ensure import path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from abogen.book_parser import get_book_parser
class TestEpubContentSlicing(unittest.TestCase):
"""
Tests for the complex content slicing logic in _execute_nav_parsing_logic.
This covers scenarios where multiple chapters/sections are contained within
a single physical HTML file, separated by anchors (fragments).
"""
def setUp(self):
self.test_dir = "tests/test_data_slicing"
if os.path.exists(self.test_dir):
shutil.rmtree(self.test_dir)
os.makedirs(self.test_dir)
self.epub_path = os.path.join(self.test_dir, "slicing_test.epub")
def tearDown(self):
if os.path.exists(self.test_dir):
shutil.rmtree(self.test_dir)
def test_single_file_multiple_chapters(self):
"""
Test splitting one XHTML file into two chapters using an anchor.
"""
book = epub.EpubBook()
book.set_identifier("slice123")
book.set_title("Slicing Test Book")
# Create a single content file with two sections
content_html = """
<html>
<body>
<h1 id="chap1">Chapter 1</h1>
<p>Text for chapter 1.</p>
<hr/>
<h1 id="chap2">Chapter 2</h1>
<p>Text for chapter 2.</p>
</body>
</html>
"""
c1 = epub.EpubHtml(title="Full Content", file_name="content.xhtml", lang="en")
c1.content = content_html
book.add_item(c1)
# Create Nav that points to anchors in the SAME file
# We use EpubHtml for Nav to control content exactly without ebooklib interference
nav_html = """
<nav epub:type="toc" id="toc">
<ol>
<li><a href="content.xhtml#chap1">Chapter 1</a></li>
<li><a href="content.xhtml#chap2">Chapter 2</a></li>
</ol>
</nav>
"""
nav = epub.EpubHtml(title="Nav", file_name="nav.xhtml")
nav.content = nav_html
book.add_item(nav)
book.spine = [nav, c1]
epub.write_epub(self.epub_path, book)
# OPF Patching to valid crash
import zipfile
patched = False
with zipfile.ZipFile(self.epub_path, "r") as zin:
opf_content = zin.read("EPUB/content.opf").decode("utf-8")
if 'toc="ncx"' in opf_content:
opf_content = opf_content.replace('toc="ncx"', "")
patched = True
if patched:
TEMP_EPUB = self.epub_path + ".temp"
with zipfile.ZipFile(TEMP_EPUB, "w") as zout:
for item in zin.infolist():
if item.filename == "EPUB/content.opf":
zout.writestr(item, opf_content)
else:
zout.writestr(item, zin.read(item.filename))
if patched:
shutil.move(TEMP_EPUB, self.epub_path)
# Parse
parser = get_book_parser(self.epub_path)
parser.process_content()
chapters = parser.get_chapters()
# Filter Nav/Intro
chapters = [c for c in chapters if "Chapter" in c[1]]
self.assertEqual(len(chapters), 2)
self.assertEqual(chapters[0][1], "Chapter 1")
self.assertEqual(chapters[1][1], "Chapter 2")
# Check content of Chapter 1
# It should contain "Text for chapter 1" but NOT "Text for chapter 2"
# The parser logic slices from start_pos to next_pos
text1 = parser.content_texts[chapters[0][0]]
self.assertIn("Text for chapter 1", text1)
self.assertNotIn("Text for chapter 2", text1)
# Check content of Chapter 2
text2 = parser.content_texts[chapters[1][0]]
self.assertIn("Text for chapter 2", text2)
def test_list_renumbering(self):
"""
Test that ordered lists are re-numbered when slicing.
The parser has logic to reset <ol start="..."> or insert numbers.
"""
book = epub.EpubBook()
book.set_identifier("list123")
book.set_title("List Test Book")
content_html = """
<html>
<body>
<h1 id="part1">Part 1</h1>
<ol>
<li>Item A</li>
<li>Item B</li>
</ol>
<h1 id="part2">Part 2</h1>
<ol start="3">
<li>Item C</li>
<li>Item D</li>
</ol>
</body>
</html>
"""
c1 = epub.EpubHtml(title="Content", file_name="content.xhtml", lang="en")
c1.content = content_html
book.add_item(c1)
nav_html = """
<nav epub:type="toc">
<ol>
<li><a href="content.xhtml#part1">Part 1</a></li>
<li><a href="content.xhtml#part2">Part 2</a></li>
</ol>
</nav>
"""
nav = epub.EpubHtml(title="Nav", file_name="nav.xhtml")
nav.content = nav_html
book.add_item(nav)
book.spine = [nav, c1]
epub.write_epub(self.epub_path, book)
# Patch
import zipfile
patched = False
with zipfile.ZipFile(self.epub_path, "r") as zin:
opf_content = zin.read("EPUB/content.opf").decode("utf-8")
if 'toc="ncx"' in opf_content:
opf_content = opf_content.replace('toc="ncx"', "")
patched = True
if patched:
TEMP_EPUB = self.epub_path + ".temp"
with zipfile.ZipFile(TEMP_EPUB, "w") as zout:
for item in zin.infolist():
if item.filename == "EPUB/content.opf":
zout.writestr(item, opf_content)
else:
zout.writestr(item, zin.read(item.filename))
if patched:
shutil.move(TEMP_EPUB, self.epub_path)
parser = get_book_parser(self.epub_path)
parser.process_content()
chapters = parser.get_chapters()
chapters = [c for c in chapters if "Part" in c[1]]
self.assertEqual(len(chapters), 2)
# Check Part 1 text
text1 = parser.content_texts[chapters[0][0]]
# The parser explicitly replaces li with "1) Item A" style text
self.assertIn("1) Item A", text1)
self.assertIn("2) Item B", text1)
# Check Part 2 text
text2 = parser.content_texts[chapters[1][0]]
# Should convert start="3" to "3) Item C"
self.assertIn("3) Item C", text2)
self.assertIn("4) Item D", text2)
if __name__ == "__main__":
unittest.main()
from __future__ import annotations
import html
import re
import zipfile
from abogen.epub3.exporter import build_epub3_package
from abogen.text_extractor import ExtractedChapter, ExtractionResult
def _make_sample_extraction() -> ExtractionResult:
return ExtractionResult(
chapters=[
ExtractedChapter(title="Chapter 1", text="Hello world."),
ExtractedChapter(title="Chapter 2", text="Another passage."),
],
metadata={"title": "Sample Book", "artist": "Test Author", "language": "en"},
)
def test_build_epub3_package_creates_expected_structure(tmp_path) -> None:
extraction = _make_sample_extraction()
chunks = [
{
"id": "chap0000_p0000",
"chapter_index": 0,
"chunk_index": 0,
"text": "Hello world.",
"speaker_id": "narrator",
},
{
"id": "chap0001_p0000",
"chapter_index": 1,
"chunk_index": 0,
"text": "Another passage.",
"speaker_id": "narrator",
},
]
chunk_markers = [
{
"id": "chap0000_p0000",
"chapter_index": 0,
"chunk_index": 0,
"start": 0.0,
"end": 1.2,
},
{
"id": "chap0001_p0000",
"chapter_index": 1,
"chunk_index": 0,
"start": 1.2,
"end": 2.4,
},
]
chapter_markers = [
{"index": 1, "title": "Chapter 1", "start": 0.0, "end": 1.2},
{"index": 2, "title": "Chapter 2", "start": 1.2, "end": 2.4},
]
metadata_tags = {"title": "Sample Book", "artist": "Test Author", "language": "en"}
audio_path = tmp_path / "sample.mp3"
audio_path.write_bytes(b"ID3 test audio")
output_path = tmp_path / "output.epub"
result_path = build_epub3_package(
output_path=output_path,
book_id="job-123",
extraction=extraction,
metadata_tags=metadata_tags,
chapter_markers=chapter_markers,
chunk_markers=chunk_markers,
chunks=chunks,
audio_path=audio_path,
speaker_mode="single",
)
assert result_path == output_path
assert output_path.exists()
with zipfile.ZipFile(output_path) as archive:
names = set(archive.namelist())
assert "mimetype" in names
assert archive.read("mimetype") == b"application/epub+zip"
assert "META-INF/container.xml" in names
assert "OEBPS/content.opf" in names
assert "OEBPS/nav.xhtml" in names
assert "OEBPS/audio/sample.mp3" in names
chapter_doc = archive.read("OEBPS/text/chapter_0001.xhtml").decode("utf-8")
assert "Hello world." in chapter_doc
smil_doc = archive.read("OEBPS/smil/chapter_0001.smil").decode("utf-8")
assert 'clipBegin="00:00:00.000"' in smil_doc
opf_doc = archive.read("OEBPS/content.opf").decode("utf-8")
assert "media-overlay" in opf_doc
assert "media:duration" in opf_doc
assert "abogen:speakerMode" in opf_doc
def test_build_epub3_package_handles_missing_markers(tmp_path) -> None:
extraction = _make_sample_extraction()
metadata_tags = {"title": "Sample Book", "artist": "Test Author", "language": "en"}
audio_path = tmp_path / "audio.mp3"
audio_path.write_bytes(b"ID3 audio")
output_path = tmp_path / "output.epub"
result_path = build_epub3_package(
output_path=output_path,
book_id="job-456",
extraction=extraction,
metadata_tags=metadata_tags,
chapter_markers=[],
chunk_markers=[],
chunks=[],
audio_path=audio_path,
speaker_mode="single",
)
with zipfile.ZipFile(result_path) as archive:
nav_doc = archive.read("OEBPS/nav.xhtml").decode("utf-8")
assert "Chapter 1" in nav_doc
chapter_doc = archive.read("OEBPS/text/chapter_0001.xhtml").decode("utf-8")
assert "Hello world." in chapter_doc
def test_epub3_preserves_original_whitespace(tmp_path) -> None:
extraction = ExtractionResult(
chapters=[
ExtractedChapter(
title="Intro",
text="Line one with double spaces.\nSecond line\n\nThird paragraph.",
)
],
metadata={"title": "Sample", "artist": "Author", "language": "en"},
)
chunks = [
{
"id": "chap0000_p0000",
"chapter_index": 0,
"chunk_index": 0,
"text": "Line one with double spaces.",
"speaker_id": "narrator",
},
{
"id": "chap0000_p0001",
"chapter_index": 0,
"chunk_index": 1,
"text": "Second line",
"speaker_id": "narrator",
},
{
"id": "chap0000_p0002",
"chapter_index": 0,
"chunk_index": 2,
"text": "Third paragraph.",
"speaker_id": "narrator",
},
]
chunk_markers = [
{
"id": chunk["id"],
"chapter_index": 0,
"chunk_index": chunk["chunk_index"],
"start": None,
"end": None,
}
for chunk in chunks
]
metadata_tags = {"title": "Sample", "artist": "Author", "language": "en"}
audio_path = tmp_path / "audio.mp3"
audio_path.write_bytes(b"ID3 audio")
output_path = tmp_path / "output.epub"
build_epub3_package(
output_path=output_path,
book_id="job-whitespace",
extraction=extraction,
metadata_tags=metadata_tags,
chapter_markers=[],
chunk_markers=chunk_markers,
chunks=chunks,
audio_path=audio_path,
speaker_mode="single",
)
with zipfile.ZipFile(output_path) as archive:
chapter_doc = archive.read("OEBPS/text/chapter_0001.xhtml").decode("utf-8")
assert "Line one with double spaces." in chapter_doc
chunk_section = chapter_doc.replace(" ", "")
assert "Second line" in chunk_section
assert "Third paragraph." in chunk_section
match = re.search(
r"<pre class=\"chapter-original\"[^>]*>(.*?)</pre>", chapter_doc, re.DOTALL
)
assert match is not None
original_text = html.unescape(match.group(1))
assert "Second line\n\nThird paragraph." in original_text
def test_epub3_sentence_chunks_render_as_paragraphs(tmp_path) -> None:
extraction = ExtractionResult(
chapters=[
ExtractedChapter(
title="Chapter 1",
text="First sentence. Second sentence in same paragraph.\n\nNew paragraph starts here.",
)
],
metadata={"title": "Sample", "artist": "Author", "language": "en"},
)
chunks = [
{
"id": "chap0000_p0000_s0000",
"chapter_index": 0,
"chunk_index": 0,
"text": "First sentence.",
"level": "sentence",
"speaker_id": "narrator",
},
{
"id": "chap0000_p0000_s0001",
"chapter_index": 0,
"chunk_index": 1,
"text": "Second sentence in same paragraph.",
"level": "sentence",
"speaker_id": "narrator",
},
{
"id": "chap0000_p0001_s0000",
"chapter_index": 0,
"chunk_index": 2,
"text": "New paragraph starts here.",
"level": "sentence",
"speaker_id": "narrator",
},
]
chunk_markers = [
{
"id": chunk["id"],
"chapter_index": 0,
"chunk_index": chunk["chunk_index"],
"start": None,
"end": None,
}
for chunk in chunks
]
audio_path = tmp_path / "audio.mp3"
audio_path.write_bytes(b"ID3 audio")
output_path = tmp_path / "output.epub"
build_epub3_package(
output_path=output_path,
book_id="job-paragraphs",
extraction=extraction,
metadata_tags={"title": "Sample", "artist": "Author", "language": "en"},
chapter_markers=[],
chunk_markers=chunk_markers,
chunks=chunks,
audio_path=audio_path,
speaker_mode="single",
)
with zipfile.ZipFile(output_path) as archive:
chapter_doc = archive.read("OEBPS/text/chapter_0001.xhtml").decode("utf-8")
assert '<div class="chunk"' not in chapter_doc
assert chapter_doc.count('<p class="chunk-group"') == 2
assert "First sentence." in chapter_doc
assert "Second sentence in same paragraph." in chapter_doc
first_paragraph_start = chapter_doc.find('<p class="chunk-group"')
first_paragraph_end = chapter_doc.find("</p>", first_paragraph_start)
first_paragraph = chapter_doc[first_paragraph_start:first_paragraph_end]
assert "First sentence." in first_paragraph
assert "Second sentence in same paragraph." in first_paragraph
import unittest
import os
import shutil
import sys
from ebooklib import epub
# Ensure import path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from abogen.book_parser import get_book_parser
class TestEpubHeuristicNav(unittest.TestCase):
"""
Tests for the heuristic fallback in _identify_nav_item (Step 4),
where the parser scans ITEM_DOCUMENTs for <nav epub:type="toc">
when no explicit ITEM_NAVIGATION is found.
"""
def setUp(self):
self.test_dir = "tests/test_data_heuristic"
if os.path.exists(self.test_dir):
shutil.rmtree(self.test_dir)
os.makedirs(self.test_dir)
self.epub_path = os.path.join(self.test_dir, "heuristic_test.epub")
def tearDown(self):
if os.path.exists(self.test_dir):
shutil.rmtree(self.test_dir)
def test_heuristic_nav_discovery(self):
book = epub.EpubBook()
book.set_identifier("heuristic123")
book.set_title("Heuristic Test Book")
# 1. Add Content
c1 = epub.EpubHtml(title="Chapter 1", file_name="chap1.xhtml", lang="en")
c1.content = "<h1>Chapter 1</h1><p>Text</p>"
book.add_item(c1)
# 2. Add a Nav file BUT as a regular EpubHtml (ITEM_DOCUMENT)
# We do NOT use EpubNav. We do NOT look like a standard nav file if possible,
# but content must contain the magical signature.
nav_content = """
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
<body>
<nav epub:type="toc" id="toc">
<h1>Hidden TOC</h1>
<ol>
<li><a href="chap1.xhtml">Chapter 1</a></li>
</ol>
</nav>
</body>
</html>
"""
# Filename intentionally generic/obscure to avoid filename-based heuristics
# (though current code checks content, not just filename)
nav_file = epub.EpubHtml(title="Hidden Nav", file_name="content_toc.xhtml")
nav_file.content = nav_content
book.add_item(nav_file)
# 3. Setup Spine
book.spine = [nav_file, c1]
# 4. Write EPUB
epub.write_epub(self.epub_path, book)
# 5. Patch OPF to ensure ebooklib didn't sneakily add ITEM_NAVIGATION or toc="ncx"
import zipfile
patched = False
with zipfile.ZipFile(self.epub_path, "r") as zin:
opf_content = zin.read("EPUB/content.opf").decode("utf-8")
# Remove toc="ncx" attribute if present (causes crash if no NCX)
if 'toc="ncx"' in opf_content:
opf_content = opf_content.replace('toc="ncx"', "")
patched = True
# Ideally we'd verify properties="nav" isn't there, but EpubHtml shouldn't add it.
# If ebooklib added it, we might need to strip it to force heuristic.
if 'properties="nav"' in opf_content:
opf_content = opf_content.replace('properties="nav"', "")
patched = True
if patched:
TEMP_EPUB = self.epub_path + ".temp"
with zipfile.ZipFile(TEMP_EPUB, "w") as zout:
for item in zin.infolist():
if item.filename == "EPUB/content.opf":
zout.writestr(item, opf_content)
else:
zout.writestr(item, zin.read(item.filename))
if patched:
shutil.move(TEMP_EPUB, self.epub_path)
# 6. Verify our setup: Ensure NO ITEM_NAVIGATION exists
# We can inspect using ebooklib again
import ebooklib
check_book = epub.read_epub(self.epub_path)
nav_items = list(check_book.get_items_of_type(ebooklib.ITEM_NAVIGATION))
self.assertEqual(
len(nav_items), 0, "Setup failed: explicit navigation item found!"
)
# 7. Run Parser
parser = get_book_parser(self.epub_path)
parser.process_content()
chapters = parser.get_chapters()
# 8. Assertions
# Should have found the nav via content scanning
chapter_titles = [c[1] for c in chapters]
self.assertIn("Chapter 1", chapter_titles)
# Also verify we hit the "html" type in identification
# We can't easily check private variables, but success implies it worked.
if __name__ == "__main__":
unittest.main()
import unittest
import os
import shutil
import sys
from ebooklib import epub
# Ensure import path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from abogen.book_parser import get_book_parser
class TestEpubHtmlNavParsing(unittest.TestCase):
"""
Tests for EPUB 3 HTML5 Navigation Document parsing logic (_parse_html_nav_li).
"""
def setUp(self):
self.test_dir = "tests/test_data_nav"
if os.path.exists(self.test_dir):
shutil.rmtree(self.test_dir)
os.makedirs(self.test_dir)
self.epub_path = os.path.join(self.test_dir, "nav_test.epub")
def tearDown(self):
if os.path.exists(self.test_dir):
shutil.rmtree(self.test_dir)
def _create_epub_with_custom_nav(self, nav_html_content):
"""
Creates an EPUB with a manually injected HTML Navigation Document.
"""
book = epub.EpubBook()
book.set_identifier("navtest123")
book.set_title("Nav Test Book")
# Add some content files
c1 = epub.EpubHtml(title="Chapter 1", file_name="chap1.xhtml", lang="en")
c1.content = "<h1>Chapter 1</h1><p>Text 1</p>"
book.add_item(c1)
c2 = epub.EpubHtml(title="Chapter 2", file_name="chap2.xhtml", lang="en")
c2.content = "<h1>Chapter 2</h1><p>Text 2</p>"
book.add_item(c2)
# Create the Nav item manually to control the HTML structure exactly
# Use EpubHtml + OPF patching because EpubNav forces auto-generation
nav = epub.EpubHtml(title="Nav", file_name="nav.xhtml")
nav.content = nav_html_content
book.add_item(nav)
# We must set spine manually
book.spine = [nav, c1, c2]
epub.write_epub(self.epub_path, book)
# Patch the OPF to remove toc="ncx" default which causes crash
# because we intentionally excluded the legacy NCX file.
import zipfile
with zipfile.ZipFile(self.epub_path, "r") as zin:
opf_content = zin.read("EPUB/content.opf").decode("utf-8")
opf_content = opf_content.replace('toc="ncx"', "")
# Repack
TEMP_EPUB = self.epub_path + ".temp"
with zipfile.ZipFile(TEMP_EPUB, "w") as zout:
for item in zin.infolist():
if item.filename == "EPUB/content.opf":
zout.writestr(item, opf_content)
else:
zout.writestr(item, zin.read(item.filename))
shutil.move(TEMP_EPUB, self.epub_path)
def test_basic_html_nav_parsing(self):
"""
Test parsing of a standard flat list of links.
"""
nav_html = """
<nav epub:type="toc" id="toc">
<h1>Table of Contents</h1>
<ol>
<li><a href="chap1.xhtml">Chapter 1</a></li>
<li><a href="chap2.xhtml">Chapter 2</a></li>
</ol>
</nav>
"""
self._create_epub_with_custom_nav(nav_html)
parser = get_book_parser(self.epub_path)
parser.process_content()
chapters = parser.get_chapters()
# Filter out "Nav" or "Introduction" prefix content found from the Nav file itself
chapters = [c for c in chapters if "Chapter" in c[1] or "Section" in c[1]]
self.assertEqual(len(chapters), 2)
self.assertEqual(chapters[0][1], "Chapter 1")
self.assertEqual(chapters[1][1], "Chapter 2")
def test_nested_html_nav_parsing(self):
"""
Test parsing of nested lists (Sub-chapters).
"""
nav_html = """
<nav epub:type="toc">
<ol>
<li>
<a href="chap1.xhtml">Chapter 1</a>
<ol>
<li><a href="chap2.xhtml">Section 1.1</a></li>
</ol>
</li>
</ol>
</nav>
"""
# Note: In this test setup, chap2 is serving as "Section 1.1" effectively
self._create_epub_with_custom_nav(nav_html)
parser = get_book_parser(self.epub_path)
parser.process_content()
chapters = parser.get_chapters()
ids = [c[1] for c in chapters]
self.assertIn("Chapter 1", ids)
self.assertIn("Section 1.1", ids)
def test_span_header_parsing(self):
"""
Test parsing of <li><span>Header</span><ol>...</ol></li> pattern.
This represents a grouping header that isn't a link itself.
"""
nav_html = """
<nav epub:type="toc">
<ol>
<li>
<span>Part I</span>
<ol>
<li><a href="chap1.xhtml">Chapter 1</a></li>
</ol>
</li>
</ol>
</nav>
"""
self._create_epub_with_custom_nav(nav_html)
parser = get_book_parser(self.epub_path)
parser.process_content()
chapters = parser.get_chapters()
chapter_titles = [c[1] for c in chapters]
self.assertIn("Chapter 1", chapter_titles)
self.assertNotIn("Part I", chapter_titles)
# Check internal structure
# Find the node named "Part I" in the processed structure
root_node = next(
node for node in parser.processed_nav_structure if node["title"] == "Part I"
)
self.assertEqual(root_node["title"], "Part I")
self.assertFalse(root_node["has_content"])
self.assertEqual(len(root_node["children"]), 1)
self.assertEqual(root_node["children"][0]["title"], "Chapter 1")
def test_identify_nav_item(self):
"""Test the _identify_nav_item method specifically."""
nav_html = """
<nav epub:type="toc" id="toc"><h1>TOC</h1><ol><li><a href="c1.html">C1</a></li></ol></nav>
"""
self._create_epub_with_custom_nav(nav_html)
parser = get_book_parser(self.epub_path)
# Note: _identify_nav_item relies on self.book being loaded
# The parser constructor or process_content handles load()
# But here we can call load directly if needed, or rely on normal flow up until navigation
parser.load()
nav_item, nav_type = parser._identify_nav_item()
self.assertEqual(nav_type, "html")
self.assertIsNotNone(nav_item)
self.assertTrue("nav.xhtml" in nav_item.get_name())
if __name__ == "__main__":
unittest.main()
import unittest
import os
import shutil
import zipfile
import sys
import logging
from ebooklib import epub
# Ensure import path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from abogen.book_parser import get_book_parser
class TestEpubMissingFileErrorHandling(unittest.TestCase):
"""
Tests for robust error handling and recovery in the book parser.
"""
def setUp(self):
self.test_dir = "tests/test_data_errors"
if os.path.exists(self.test_dir):
shutil.rmtree(self.test_dir)
os.makedirs(self.test_dir)
self.broken_epub_path = os.path.join(self.test_dir, "missing_file.epub")
# Suppress logging during tests to keep output clean,
# or capture it if we want to assert on warnings.
# For now, we just let it be or set level to ERROR.
logging.getLogger().setLevel(logging.ERROR)
def tearDown(self):
if os.path.exists(self.test_dir):
shutil.rmtree(self.test_dir)
def _create_broken_epub(self):
"""
Creates an EPUB where a file listed in the manifest is missing from the ZIP archive.
"""
book = epub.EpubBook()
book.set_identifier("broken123")
book.set_title("Broken Book")
# 1. Add a valid chapter
c1 = epub.EpubHtml(title="Chapter 1", file_name="chap1.xhtml", lang="en")
c1.content = "<h1>Chapter 1</h1><p>Survivable content.</p>"
book.add_item(c1)
# 2. Add a 'ghost' chapter that we will delete later
c2 = epub.EpubHtml(title="Ghost Chapter", file_name="ghost.xhtml", lang="en")
c2.content = "<h1>Ghost</h1><p>I will disappear.</p>"
book.add_item(c2)
book.spine = ["nav", c1, c2]
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())
temp_path = os.path.join(self.test_dir, "temp.epub")
epub.write_epub(temp_path, book)
# 3. Physically remove 'ghost.xhtml' from the ZIP
with zipfile.ZipFile(temp_path, "r") as zin:
with zipfile.ZipFile(self.broken_epub_path, "w") as zout:
for item in zin.infolist():
# Copy everything EXCEPT the ghost file
# Note: ebooklib might put files in OEPS/ or EPUB/ folders depending on version,
# so checking "ghost.xhtml" presence in filename is safer.
if "ghost.xhtml" not in item.filename:
zout.writestr(item, zin.read(item.filename))
def test_missing_file_recovery(self):
"""
Verify that the parser recovers gracefully when a referenced file is missing.
Should log a warning instead of raising KeyError.
"""
self._create_broken_epub()
try:
parser = get_book_parser(self.broken_epub_path)
parser.process_content()
# 1. Ensure process didn't crash
self.assertTrue(True, "Parser should not crash on missing file")
# 2. Ensure valid content was extracted
# Identify the ID for chap1.xhtml (usually file path based)
# Since IDs can vary, we check if ANY content contains our known string
chap1_found = False
for text in parser.content_texts.values():
if "Survivable content" in text:
chap1_found = True
break
self.assertTrue(chap1_found, "The valid chapter should still be processed")
except KeyError:
self.fail("Parser raised KeyError instead of handling the missing file!")
except Exception as e:
self.fail(f"Parser raised unexpected exception: {e}")
if __name__ == "__main__":
unittest.main()
import unittest
import os
import shutil
import sys
from ebooklib import epub
# Ensure we can import the module
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from abogen.book_parser import get_book_parser, EpubParser
class TestEpubNcxParsing(unittest.TestCase):
"""
Focused tests for NCX navigation scenarios, ensuring legacy/compatibility
modes work when HTML5 Navigation is missing.
"""
def setUp(self):
self.test_dir = "tests/test_data_ncx"
if os.path.exists(self.test_dir):
shutil.rmtree(self.test_dir)
os.makedirs(self.test_dir)
self.ncx_only_epub_path = os.path.join(self.test_dir, "ncx_only.epub")
def tearDown(self):
if os.path.exists(self.test_dir):
shutil.rmtree(self.test_dir)
def _create_ncx_only_epub(self, chapters):
"""
Helper to create an EPUB with ONLY NCX table of contents (no HTML nav).
"""
book = epub.EpubBook()
book.set_identifier("ncx_test_123")
book.set_title("NCX Only Book")
book.set_language("en")
epub_chapters = []
for i, (title, content) in enumerate(chapters):
filename = f"chap{i+1}.xhtml"
c = epub.EpubHtml(title=title, file_name=filename, lang="en")
# Ensure content is substantial enough to not be skipped
c.content = f"<h1>{title}</h1><p>{content}</p>"
book.add_item(c)
epub_chapters.append(c)
# Define Table of Contents
book.toc = tuple(epub_chapters)
# Add default NCX and generic spine
book.add_item(epub.EpubNcx())
# IMPORTANT: Do NOT add EpubNav() here, that's what we are testing!
book.spine = ["nav"] + epub_chapters
epub.write_epub(self.ncx_only_epub_path, book)
def test_ncx_only_parsing(self):
"""
Verify that an EPUB with only an NCX file (no HTML nav) is parsed correctly.
Logic tested: _process_epub_content_nav (NCX branch), _parse_ncx_navpoint
"""
# 1. Setup Data
chapters_data = [
("Chapter 1", "This is the first chapter."),
("Chapter 2", "This is the second chapter."),
]
self._create_ncx_only_epub(chapters_data)
# 2. Run Parser
parser = get_book_parser(self.ncx_only_epub_path)
parser.process_content()
# 3. Verify Breakdown
# We expect detailed breakdown based on NCX
chapters = parser.get_chapters()
# Should find exactly 2 chapters based on the Toc
self.assertEqual(len(chapters), 2, "Should have 2 chapters extracted from NCX")
# Check Titles and Sequence
self.assertEqual(chapters[0][1], "Chapter 1")
self.assertEqual(chapters[1][1], "Chapter 2")
# Verify content was extracted
# Note: 'src' in chapters usually points to file_name if no fragments
id_1 = chapters[0][0]
self.assertIn("This is the first chapter", parser.content_texts[id_1])
def test_nested_ncx_parsing(self):
"""
Verify parsing of nested NCX structures (Chapters with Subchapters).
"""
book = epub.EpubBook()
book.set_identifier("nested_ncx")
book.set_title("Nested NCX")
# Create one big file with sections
c1 = epub.EpubHtml(title="Main Chapter", file_name="main.xhtml", lang="en")
c1.content = """
<h1 id="intro">Introduction</h1>
<p>Intro text.</p>
<h2 id="sect1">Section 1</h2>
<p>Section 1 text.</p>
"""
book.add_item(c1)
# Manually construct nested TOC because ebooklib's default helpers are simple
# EbookLib automatically builds NCX from book.toc
# Nested tuple structure: (Section, (Subsection, Sub-subsection))
# We need to link to Fragments for this to really test nested NCX pointing to same file
# EbookLib Link object: epub.Link(href, title, uid)
link_root = epub.Link("main.xhtml#intro", "Introduction", "intro")
link_sect = epub.Link("main.xhtml#sect1", "Section 1", "sect1")
# Structure: Intro -> Section 1 (as child)
book.toc = ((link_root, (link_sect,)),)
book.add_item(epub.EpubNcx())
book.spine = ["nav", c1]
epub.write_epub(self.ncx_only_epub_path, book)
# Parse
parser = get_book_parser(self.ncx_only_epub_path)
parser.process_content()
chapters = parser.get_chapters()
# Depending on how the parser flattens, we should see both entries
titles = [node[1] for node in chapters]
self.assertIn("Introduction", titles)
self.assertIn("Section 1", titles)
if __name__ == "__main__":
unittest.main()
import unittest
import os
import shutil
import sys
from ebooklib import epub
import ebooklib
from unittest.mock import MagicMock
# Ensure import path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from abogen.book_parser import get_book_parser
class TestEpubStandardNav(unittest.TestCase):
"""
Tests for the standard ITEM_NAVIGATION discovery in _identify_nav_item.
Refactored to explicitly test different discovery paths defined in the parser.
"""
def setUp(self):
self.test_dir = "tests/test_data_standard_nav"
if os.path.exists(self.test_dir):
shutil.rmtree(self.test_dir)
os.makedirs(self.test_dir)
self.epub_path = os.path.join(self.test_dir, "standard_nav_test.epub")
def tearDown(self):
if os.path.exists(self.test_dir):
shutil.rmtree(self.test_dir)
def _create_and_load_epub(self):
"""Helper to create a basic EPUB and return a loaded parser."""
book = epub.EpubBook()
book.set_identifier("stdnav123")
book.set_title("Standard Nav Test")
c1 = epub.EpubHtml(title="Chapter 1", file_name="chap1.xhtml", lang="en")
c1.content = "<h1>Chapter 1</h1><p>Text 1</p>"
book.add_item(c1)
# Use Standard EpubNav
nav = epub.EpubNav()
book.add_item(nav)
book.spine = [nav, c1]
epub.write_epub(self.epub_path, book)
# "Zip Surgery" Patch:
# ebooklib unconditionally adds `toc="ncx"` to the spine, even for EPUB 3 files that purely use HTML Nav.
# This creates a dangling reference to a non-existent "ncx" item, causing ebooklib to crash on read.
# We manually remove this attribute to ensure the test EPUB is valid and readable.
# TODO - find real world examples of EPUB 3 files that use HTML Nav
import zipfile
patched = False
with zipfile.ZipFile(self.epub_path, "r") as zin:
opf_content = zin.read("EPUB/content.opf").decode("utf-8")
if 'toc="ncx"' in opf_content:
opf_content = opf_content.replace('toc="ncx"', "")
patched = True
TEMP_EPUB = self.epub_path + ".temp"
with zipfile.ZipFile(TEMP_EPUB, "w") as zout:
for item in zin.infolist():
if item.filename == "EPUB/content.opf":
zout.writestr(item, opf_content)
else:
zout.writestr(item, zin.read(item.filename))
if patched:
shutil.move(TEMP_EPUB, self.epub_path)
parser = get_book_parser(self.epub_path)
parser.load()
return parser
def test_discovery_by_item_navigation_type(self):
"""
Scenario 1: The item is explicitly identified as ITEM_NAVIGATION (4).
This exercises the first branch of _identify_nav_item.
"""
parser = self._create_and_load_epub()
# Inject an item that mocks the ITEM_NAVIGATION type behavior
# (This simulates a library/parser that correctly types the item as 4)
mock_nav = MagicMock()
mock_nav.get_name.return_value = "nav.xhtml"
mock_nav.get_type.return_value = ebooklib.ITEM_NAVIGATION
# We append this mock to the book items to ensure get_items_of_type(ITEM_NAVIGATION) finds it
parser.book.items.append(mock_nav)
nav_item, nav_type = parser._identify_nav_item()
self.assertEqual(nav_type, "html")
self.assertEqual(nav_item.get_name(), "nav.xhtml")
# Verify we are getting the object we expect (implied by success)
def test_discovery_by_nav_property(self):
"""
Scenario 2: The item is ITEM_DOCUMENT (9) but has properties=['nav'].
This is the standard EPUB 3 behavior and exercises the fallback branch.
"""
parser = self._create_and_load_epub()
# Locate the generic 'nav' item loaded by ebooklib
original_nav = parser.book.get_item_with_id("nav")
self.assertIsNotNone(original_nav)
# "Fix" the object to match what we expect from a correct EPUB 3 read:
# It should have properties=['nav'].
# We use a real EpubNav object to ensure structural correctness.
proper_nav = epub.EpubNav(uid=original_nav.id, file_name=original_nav.file_name)
proper_nav.content = original_nav.content
proper_nav.properties = ["nav"]
# Swap it into the book items list
try:
idx = parser.book.items.index(original_nav)
parser.book.items[idx] = proper_nav
except ValueError:
self.fail("Could not find original nav item to swap")
nav_item, nav_type = parser._identify_nav_item()
self.assertEqual(nav_type, "html")
self.assertEqual(nav_item.get_name(), "nav.xhtml")
# Check that we actually found the one with properties
self.assertEqual(getattr(nav_item, "properties", []), ["nav"])
if __name__ == "__main__":
unittest.main()
from __future__ import annotations
from pathlib import Path
from abogen.webui.conversion_runner import _render_ffmetadata, _write_ffmetadata_file
def test_render_ffmetadata_includes_chapters(tmp_path):
metadata = {
"title": "Sample Book",
"artist": "Author Name",
"comment": "Line one\nLine two",
"publisher": "ACME=Corp",
}
chapters = [
{"start": 0.0, "end": 5.0, "title": "Intro", "voice": "voice_a"},
{"start": 5.0, "end": 12.345, "title": "Chapter 2"},
]
rendered = _render_ffmetadata(metadata, chapters)
assert ";FFMETADATA1" in rendered
assert "title=Sample Book" in rendered
assert "artist=Author Name" in rendered
assert "comment=Line one\\nLine two" in rendered
assert "publisher=ACME\\=Corp" in rendered
assert rendered.count("[CHAPTER]") == 2
assert "START=0" in rendered
assert "END=5000" in rendered
assert "voice=voice_a" in rendered
audio_path = tmp_path / "book.m4b"
metadata_path = _write_ffmetadata_file(audio_path, metadata, chapters)
assert metadata_path is not None
assert metadata_path.exists()
content = metadata_path.read_text(encoding="utf-8")
assert "END=12345" in content
metadata_path.unlink()
from abogen.webui import conversion_runner
class DummyJob:
def __init__(self):
self.language = "en"
self.voice = "M1"
self.speakers = None
self.manual_overrides = []
self.pronunciation_overrides = []
def _apply(text: str, job: DummyJob) -> str:
merged = conversion_runner._merge_pronunciation_overrides(job)
rules = conversion_runner._compile_pronunciation_rules(merged)
return conversion_runner._apply_pronunciation_rules(text, rules)
def test_manual_override_is_applied_even_if_pronunciation_overrides_stale():
job = DummyJob()
job.manual_overrides = [
{
"token": "Unfu*k",
"pronunciation": "Unfuck",
}
]
out = _apply("He said Unfu*k loudly.", job)
assert "Unfuck" in out
assert "Unfu*k" not in out
def test_manual_override_takes_precedence_over_existing_pronunciation_override():
job = DummyJob()
job.pronunciation_overrides = [
{
"token": "Unfu*k",
"normalized": "unfu*k",
"pronunciation": "WRONG",
}
]
job.manual_overrides = [
{
"token": "Unfu*k",
"pronunciation": "RIGHT",
}
]
out = _apply("Unfu*k.", job)
assert "RIGHT" in out
assert "WRONG" not in out
from __future__ import annotations
from abogen.webui.routes.api import _opds_metadata_overrides
def test_opds_metadata_overrides_maps_author_and_subtitle() -> None:
overrides = _opds_metadata_overrides(
{
"authors": ["Alexandre Dumas"],
"subtitle": "Unabridged",
"series": "Example",
"series_index": 2,
"tags": ["Fiction", "Classic"],
"summary": "Summary text",
}
)
assert overrides["authors"] == "Alexandre Dumas"
assert overrides["author"] == "Alexandre Dumas"
assert overrides["subtitle"] == "Unabridged"
# Existing behavior still present
assert overrides["series"] == "Example"
assert overrides["series_index"] == "2"
assert overrides["tags"] == "Fiction, Classic"
assert overrides["description"] == "Summary text"
def test_opds_metadata_overrides_accepts_author_string() -> None:
overrides = _opds_metadata_overrides({"author": "Mary Shelley"})
assert overrides["authors"] == "Mary Shelley"
assert overrides["author"] == "Mary Shelley"
from __future__ import annotations
import time
from pathlib import Path
import pytest
from abogen.webui.conversion_runner import _build_output_path, _prepare_project_layout
from abogen.webui.service import Job
def _sample_job(tmp_path: Path) -> Job:
source = tmp_path / "sample.txt"
source.write_text("example", encoding="utf-8")
return Job(
id="job-1",
original_filename="Sample Title.txt",
stored_path=source,
language="en",
voice="af_alloy",
speed=1.0,
use_gpu=False,
subtitle_mode="Sentence",
output_format="mp3",
save_mode="Use default save location",
output_folder=tmp_path,
replace_single_newlines=False,
subtitle_format="srt",
created_at=time.time(),
)
def test_prepare_project_layout_uses_timestamped_folder(
monkeypatch: pytest.MonkeyPatch, tmp_path: Path
) -> None:
job = _sample_job(tmp_path)
monkeypatch.setattr(
"abogen.webui.conversion_runner._output_timestamp_token",
lambda: "20250101-120000",
)
project_root, audio_dir, subtitle_dir, metadata_dir = _prepare_project_layout(
job, tmp_path
)
assert project_root.name.startswith(
"20250101-120000_Sample_Title"
), project_root.name
assert audio_dir == project_root
assert subtitle_dir == project_root
assert metadata_dir is None
output_path = _build_output_path(audio_dir, job.original_filename, "mp3")
assert output_path == project_root / "Sample_Title.mp3"
def test_prepare_project_layout_creates_project_subdirs(
monkeypatch: pytest.MonkeyPatch, tmp_path: Path
) -> None:
job = _sample_job(tmp_path)
job.save_as_project = True
monkeypatch.setattr(
"abogen.webui.conversion_runner._output_timestamp_token",
lambda: "20250101-120500",
)
project_root, audio_dir, subtitle_dir, metadata_dir = _prepare_project_layout(
job, tmp_path
)
assert audio_dir == project_root / "audio"
assert subtitle_dir == project_root / "subtitles"
assert metadata_dir == project_root / "metadata"
assert audio_dir.is_dir()
assert subtitle_dir.is_dir()
assert metadata_dir is not None and metadata_dir.is_dir()
output_path = _build_output_path(audio_dir, job.original_filename, "wav")
assert output_path == audio_dir / "Sample_Title.wav"
import unittest
import os
import shutil
import fitz # PyMuPDF
from abogen.book_parser import PdfParser
class TestPdfStructure(unittest.TestCase):
def setUp(self):
self.test_dir = "tests/test_data_pdf"
if os.path.exists(self.test_dir):
shutil.rmtree(self.test_dir)
os.makedirs(self.test_dir)
self.pdf_path = os.path.join(self.test_dir, "structure_test.pdf")
def tearDown(self):
if os.path.exists(self.test_dir):
shutil.rmtree(self.test_dir)
def test_pdf_structure_with_toc(self):
# Create PDF
doc = fitz.open()
p1 = doc.new_page()
p1.insert_text((50,50), "Page 1 Content")
p2 = doc.new_page()
p2.insert_text((50,50), "Page 2 Content")
p3 = doc.new_page() # Chapter 2 start
p3.insert_text((50,50), "Page 3 Content")
p4 = doc.new_page()
p4.insert_text((50,50), "Page 4 Content")
# Add TOC:
# 1. "Chap 1" -> Page 1
# 2. "Chap 2" -> Page 3
doc.set_toc([[1, "Chap 1", 1], [1, "Chap 2", 3]])
doc.save(self.pdf_path)
doc.close()
with PdfParser(self.pdf_path) as parser:
parser.process_content()
nav = parser.processed_nav_structure
# Expect 2 top level items
self.assertEqual(len(nav), 2)
self.assertEqual(nav[0]['title'], "Chap 1")
self.assertEqual(nav[0]['src'], "page_1")
self.assertEqual(nav[1]['title'], "Chap 2")
self.assertEqual(nav[1]['src'], "page_3")
# Check children of Chap 1 (Page 2 should be there)
children_c1 = nav[0]['children']
self.assertEqual(len(children_c1), 1)
# The child should likely be titled "Page 2 - Page 2 Content" or similar
self.assertIn("Page 2", children_c1[0]['title'])
self.assertEqual(children_c1[0]['src'], "page_2")
# Check children of Chap 2 (Page 4 should be there)
children_c2 = nav[1]['children']
self.assertEqual(len(children_c2), 1)
self.assertIn("Page 4", children_c2[0]['title'])
self.assertEqual(children_c2[0]['src'], "page_4")
def test_pdf_structure_without_toc(self):
# Create PDF without TOC
doc = fitz.open()
p1 = doc.new_page()
p1.insert_text((50,50), "Start")
p2 = doc.new_page()
p2.insert_text((50,50), "End")
doc.save(self.pdf_path)
doc.close()
with PdfParser(self.pdf_path) as parser:
parser.process_content()
nav = parser.processed_nav_structure
# Expect 1 top level item (Pages)
self.assertEqual(len(nav), 1)
self.assertEqual(nav[0]['title'], "Pages")
# Check children (all pages)
children = nav[0]['children']
self.assertEqual(len(children), 2)
self.assertIn("Page 1", children[0]['title'])
self.assertIn("Page 2", children[1]['title'])
def test_pdf_structure_nested_toc(self):
# Create PDF
doc = fitz.open()
p1 = doc.new_page() # Chap 1
p2 = doc.new_page() # Sec 1.1
p3 = doc.new_page() # Chap 2
doc.set_toc([
[1, "Chap 1", 1],
[2, "Sec 1.1", 2],
[1, "Chap 2", 3]
])
doc.save(self.pdf_path)
doc.close()
with PdfParser(self.pdf_path) as parser:
parser.process_content()
nav = parser.processed_nav_structure
self.assertEqual(len(nav), 2) # Chap 1, Chap 2
self.assertEqual(nav[0]['title'], "Chap 1")
# Chap 1 should have child Sec 1.1
self.assertEqual(len(nav[0]['children']), 1)
self.assertEqual(nav[0]['children'][0]['title'], "Sec 1.1")
self.assertEqual(nav[0]['children'][0]['src'], "page_2")
if __name__ == "__main__":
unittest.main()
from pathlib import Path
from werkzeug.datastructures import MultiDict
from abogen.webui.routes.utils.form import apply_prepare_form
from abogen.webui.routes.utils.voice import resolve_voice_setting
from abogen.webui.service import PendingJob
def _make_pending_job() -> PendingJob:
return PendingJob(
id="pending",
original_filename="example.epub",
stored_path=Path("example.epub"),
language="a",
voice="af_nova",
speed=1.0,
use_gpu=False,
subtitle_mode="none",
output_format="mp3",
save_mode="save_next_to_input",
output_folder=None,
replace_single_newlines=False,
subtitle_format="srt",
total_characters=0,
save_chapters_separately=False,
merge_chapters_at_end=True,
separate_chapters_format="wav",
silence_between_chapters=2.0,
save_as_project=False,
voice_profile=None,
max_subtitle_words=50,
metadata_tags={},
chapters=[],
normalization_overrides={},
created_at=0.0,
read_title_intro=False,
normalize_chapter_opening_caps=True,
)
def test_apply_prepare_form_handles_custom_mix_for_speakers():
pending = _make_pending_job()
pending.speakers = {
"hero": {
"id": "hero",
"label": "Hero",
}
}
form = MultiDict(
{
"chapter_intro_delay": "0.5",
"speaker-hero-voice": "__custom_mix",
"speaker-hero-formula": "af_nova*0.6+am_liam*0.4",
}
)
_, _, _, errors, *_ = apply_prepare_form(pending, form)
assert not errors
hero = pending.speakers["hero"]
assert hero["voice_formula"] == "af_nova*0.6+am_liam*0.4"
assert hero["resolved_voice"] == "af_nova*0.6+am_liam*0.4"
assert "voice" not in hero or hero["voice"] != "__custom_mix"
def test_apply_prepare_form_accepts_saved_speaker_reference_for_voice():
pending = _make_pending_job()
pending.speakers = {
"hero": {
"id": "hero",
"label": "Hero",
}
}
form = MultiDict(
{
"chapter_intro_delay": "0.5",
"speaker-hero-voice": "speaker:Female HQ",
"speaker-hero-formula": "",
}
)
_, _, _, errors, *_ = apply_prepare_form(pending, form)
assert not errors
hero = pending.speakers["hero"]
assert hero["voice"] == "speaker:Female HQ"
assert hero["resolved_voice"] == "speaker:Female HQ"
assert "voice_formula" not in hero
def test_resolve_voice_setting_handles_profile_reference():
profiles = {
"Blend": {
"language": "b",
"voices": [
("af_nova", 1.0),
("am_liam", 1.0),
],
}
}
voice, profile_name, language = resolve_voice_setting(
"profile:Blend", profiles=profiles
)
assert voice == "af_nova*0.5+am_liam*0.5"
assert profile_name == "Blend"
assert language == "b"
def test_apply_prepare_form_updates_closing_outro_flag():
pending = _make_pending_job()
pending.read_closing_outro = True
form = MultiDict(
{
"read_closing_outro": "false",
}
)
apply_prepare_form(pending, form)
assert pending.read_closing_outro is False
from abogen.webui.routes.utils import preview
def test_preview_applies_manual_override_before_normalization(monkeypatch):
# Don't run real TTS/normalization; just exercise the override stage by
# forcing provider=kokoro and then stubbing normalize_for_pipeline.
monkeypatch.setattr(preview, "get_preview_pipeline", lambda language, device: None)
# Stub normalize_for_pipeline to be identity; we only care that overrides run.
class _Norm:
@staticmethod
def normalize_for_pipeline(text):
return text
monkeypatch.setitem(
__import__("sys").modules, "abogen.kokoro_text_normalization", _Norm
)
# And stub the kokoro pipeline path so generate_preview_audio won't proceed.
# We'll instead validate by calling the override logic through generate_preview_audio
# with provider=supertonic and stub SupertonicPipeline to capture input.
captured = {}
class DummyPipeline:
def __init__(self, **kwargs):
pass
def __call__(self, text, **kwargs):
captured["text"] = text
return iter(())
monkeypatch.setitem(
__import__("sys").modules,
"abogen.tts_supertonic",
type("M", (), {"SupertonicPipeline": DummyPipeline}),
)
try:
preview.generate_preview_audio(
text="He said Unfu*k loudly.",
voice_spec="M1",
language="en",
speed=1.0,
use_gpu=False,
tts_provider="supertonic",
manual_overrides=[{"token": "Unfu*k", "pronunciation": "Unfuck"}],
)
except Exception:
# generate_preview_audio will raise because no audio chunks; that's fine.
pass
assert "text" in captured
assert "Unfuck" in captured["text"]
assert "Unfu*k" not in captured["text"]
import pytest
from unittest.mock import patch
from abogen.kokoro_text_normalization import (
normalize_for_pipeline,
DEFAULT_APOSTROPHE_CONFIG,
)
from abogen.normalization_settings import build_apostrophe_config, _SETTINGS_DEFAULTS
def normalize(text, overrides=None):
settings = dict(_SETTINGS_DEFAULTS)
if overrides:
settings.update(overrides)
config = build_apostrophe_config(settings=settings, base=DEFAULT_APOSTROPHE_CONFIG)
return normalize_for_pipeline(text, config=config, settings=settings)
def test_year_pronunciation():
# 1925 -> Nineteen Hundred Twenty Five
normalized = normalize("1925")
print(f"1925 -> {normalized}")
assert "nineteen hundred" in normalized.lower()
assert "five" in normalized.lower()
# 2025 -> Twenty Twenty Five
normalized = normalize("2025")
print(f"2025 -> {normalized}")
assert "twenty twenty" in normalized.lower()
assert "five" in normalized.lower()
def test_currency_pronunciation():
# $1.00 -> One dollar (no zero cents)
normalized = normalize("$1.00")
print(f"$1.00 -> {normalized}")
assert "one dollar" in normalized.lower()
assert "zero cents" not in normalized.lower()
# $1.05 -> One dollar and five cents (or comma)
normalized = normalize("$1.05")
print(f"$1.05 -> {normalized}")
assert "one dollar" in normalized.lower()
assert "five cents" in normalized.lower()
def test_url_pronunciation():
# https://www.amazon.com -> amazon dot com
normalized = normalize("https://www.amazon.com")
print(f"https://www.amazon.com -> {normalized}")
assert "amazon dot com" in normalized.lower()
assert "http" not in normalized.lower()
assert "www" not in normalized.lower()
# www.google.com -> google dot com
normalized = normalize("www.google.com")
print(f"www.google.com -> {normalized}")
assert "google dot com" in normalized.lower()
def test_roman_numerals_world_war():
# World War I -> World War One
normalized = normalize("World War I")
print(f"World War I -> {normalized}")
assert "world war one" in normalized.lower()
# World War II -> World War Two
normalized = normalize("World War II")
print(f"World War II -> {normalized}")
assert "world war two" in normalized.lower()
def test_footnote_removal():
# Bob is awesome1. -> Bob is awesome.
normalized = normalize("Bob is awesome1.")
print(f"Bob is awesome1. -> {normalized}")
assert "bob is awesome." in normalized.lower()
assert "1" not in normalized
# Citation needed[1]. -> Citation needed.
normalized = normalize("Citation needed[1].")
print(f"Citation needed[1]. -> {normalized}")
assert "citation needed." in normalized.lower()
assert "[1]" not in normalized
def test_manual_override_normalization():
from abogen.entity_analysis import normalize_manual_override_token
assert normalize_manual_override_token("The") == "the"
assert normalize_manual_override_token(" A ") == "a"
assert normalize_manual_override_token("word") == "word"
from __future__ import annotations
import io
import time
from abogen.webui.service import (
Job,
JobStatus,
build_service,
_JOB_LOGGER,
build_audiobookshelf_metadata,
)
def test_service_processes_job(tmp_path):
uploads = tmp_path / "uploads"
outputs = tmp_path / "outputs"
uploads.mkdir()
outputs.mkdir()
source = uploads / "sample.txt"
payload = "hello world"
source.write_text(payload, encoding="utf-8")
runner_invocations: list[str] = []
def runner(job):
runner_invocations.append(job.id)
job.add_log("processing")
job.progress = 1.0
job.processed_characters = job.total_characters or len(payload)
job.result.audio_path = outputs / f"{job.id}.wav"
service = build_service(
runner=runner,
output_root=outputs,
uploads_root=uploads,
)
job = service.enqueue(
original_filename="sample.txt",
stored_path=source,
language="a",
voice="af_alloy",
speed=1.0,
use_gpu=False,
subtitle_mode="Sentence",
output_format="wav",
save_mode="Save next to input file",
output_folder=outputs,
replace_single_newlines=False,
subtitle_format="srt",
total_characters=len(payload),
)
deadline = time.time() + 5
while time.time() < deadline and job.status not in {
JobStatus.COMPLETED,
JobStatus.FAILED,
JobStatus.CANCELLED,
}:
time.sleep(0.05)
service.shutdown()
assert runner_invocations, "conversion runner was never called"
assert job.status is JobStatus.COMPLETED
assert job.progress == 1.0
assert job.result.audio_path == outputs / f"{job.id}.wav"
assert job.chunk_level == "paragraph"
assert job.speaker_mode == "single"
assert job.chunks == []
assert not job.generate_epub3
def test_job_add_log_emits_to_stream(tmp_path):
sample = tmp_path / "sample.txt"
sample.write_text("payload", encoding="utf-8")
job = Job(
id="job-test",
original_filename="sample.txt",
stored_path=sample,
language="a",
voice="af_alloy",
speed=1.0,
use_gpu=False,
subtitle_mode="Sentence",
output_format="wav",
save_mode="Save next to input file",
output_folder=tmp_path,
replace_single_newlines=False,
subtitle_format="srt",
created_at=time.time(),
)
captured_buffers = []
for handler in list(_JOB_LOGGER.handlers):
if not hasattr(handler, "setStream"):
continue
buffer = io.StringIO()
original_stream = getattr(handler, "stream", None)
handler.setStream(buffer) # type: ignore[attr-defined]
captured_buffers.append((handler, original_stream, buffer))
assert captured_buffers, "Expected job logger to have stream handlers"
try:
job.add_log("Test log line", level="error")
outputs = [buffer.getvalue() for _, _, buffer in captured_buffers]
finally:
for handler, original_stream, _ in captured_buffers:
if hasattr(handler, "setStream"):
handler.setStream(original_stream) # type: ignore[attr-defined]
assert any("Test log line" in output for output in outputs)
assert job.logs[-1].message == "Test log line"
def test_job_add_log_handles_exception(tmp_path, capsys):
sample = tmp_path / "sample.txt"
sample.write_text("payload", encoding="utf-8")
job = Job(
id="job-fail-test",
original_filename="sample.txt",
stored_path=sample,
language="a",
voice="af_alloy",
speed=1.0,
use_gpu=False,
subtitle_mode="Sentence",
output_format="wav",
save_mode="Save next to input file",
output_folder=tmp_path,
replace_single_newlines=False,
subtitle_format="srt",
created_at=time.time(),
)
# Mock the logger to raise an exception
original_log = _JOB_LOGGER.log
def side_effect(*args, **kwargs):
raise RuntimeError("Logger exploded")
_JOB_LOGGER.log = side_effect
try:
job.add_log("This should trigger fallback", level="info")
finally:
_JOB_LOGGER.log = original_log
captured = capsys.readouterr()
assert "Logging failed for job job-fail-test" in captured.err
assert "Logger exploded" in captured.err
def test_retry_removes_failed_job(tmp_path):
uploads = tmp_path / "uploads"
outputs = tmp_path / "outputs"
uploads.mkdir()
outputs.mkdir()
source = uploads / "sample.txt"
source.write_text("hello", encoding="utf-8")
def failing_runner(job):
job.add_log("runner failing", level="error")
raise RuntimeError("boom")
service = build_service(
runner=failing_runner,
output_root=outputs,
uploads_root=uploads,
)
try:
job = service.enqueue(
original_filename="sample.txt",
stored_path=source,
language="a",
voice="af_alloy",
speed=1.0,
use_gpu=False,
subtitle_mode="Sentence",
output_format="wav",
save_mode="Save next to input file",
output_folder=outputs,
replace_single_newlines=False,
subtitle_format="srt",
total_characters=len("hello"),
)
deadline = time.time() + 5
while time.time() < deadline and job.status is not JobStatus.FAILED:
time.sleep(0.05)
assert job.status is JobStatus.FAILED
new_job = service.retry(job.id)
assert new_job is not None
assert new_job.id != job.id
job_ids = {entry.id for entry in service.list_jobs()}
assert job.id not in job_ids
assert new_job.id in job_ids
finally:
service.shutdown()
def test_audiobookshelf_metadata_uses_book_number(tmp_path):
source = tmp_path / "book.txt"
source.write_text("content", encoding="utf-8")
job = Job(
id="job-abs",
original_filename="book.txt",
stored_path=source,
language="en",
voice="af_alloy",
speed=1.0,
use_gpu=False,
subtitle_mode="Sentence",
output_format="mp3",
save_mode="Save next to input file",
output_folder=tmp_path,
replace_single_newlines=False,
subtitle_format="srt",
created_at=time.time(),
metadata_tags={
"series": "Example Saga",
"book_number": "7",
},
)
metadata = build_audiobookshelf_metadata(job)
assert metadata["seriesName"] == "Example Saga"
assert metadata["seriesSequence"] == "7"
def test_audiobookshelf_metadata_normalizes_sequence_value(tmp_path):
source = tmp_path / "book.txt"
source.write_text("content", encoding="utf-8")
job = Job(
id="job-abs-normalize",
original_filename="book.txt",
stored_path=source,
language="en",
voice="af_alloy",
speed=1.0,
use_gpu=False,
subtitle_mode="Sentence",
output_format="mp3",
save_mode="Save next to input file",
output_folder=tmp_path,
replace_single_newlines=False,
subtitle_format="srt",
created_at=time.time(),
metadata_tags={
"series": "Example Saga",
"series_index": "Book 7 of the Series",
},
)
metadata = build_audiobookshelf_metadata(job)
assert metadata["seriesName"] == "Example Saga"
assert metadata["seriesSequence"] == "7"
def test_audiobookshelf_metadata_allows_decimal_sequence(tmp_path):
source = tmp_path / "book.txt"
source.write_text("content", encoding="utf-8")
job = Job(
id="job-abs-decimal",
original_filename="book.txt",
stored_path=source,
language="en",
voice="af_alloy",
speed=1.0,
use_gpu=False,
subtitle_mode="Sentence",
output_format="mp3",
save_mode="Save next to input file",
output_folder=tmp_path,
replace_single_newlines=False,
subtitle_format="srt",
created_at=time.time(),
metadata_tags={
"series": "Example Saga",
"series_number": "Book 4.5",
},
)
metadata = build_audiobookshelf_metadata(job)
assert metadata["seriesSequence"] == "4.5"
from __future__ import annotations
from pathlib import Path
from abogen.utils import load_config, save_config
from abogen.webui.app import create_app
def test_settings_update_preserves_abs_api_token_when_blank(tmp_path):
# Seed config with stored integration secret.
save_config(
{
"language": "en",
"integrations": {
"audiobookshelf": {
"enabled": True,
"base_url": "https://abs.example",
"api_token": "SECRET_TOKEN",
"library_id": "lib1",
"folder_id": "fold1",
"verify_ssl": True,
},
"calibre_opds": {
"enabled": True,
"base_url": "https://opds.example",
"username": "user",
"password": "SECRET_PASS",
"verify_ssl": True,
},
},
}
)
app = create_app(
{
"TESTING": True,
"SECRET_KEY": "test",
"OUTPUT_FOLDER": str(tmp_path),
"UPLOAD_FOLDER": str(tmp_path / "uploads"),
}
)
with app.test_client() as client:
# Emulate saving settings where integrations are present but secrets are blank
# (typical of masked password/token inputs).
resp = client.post(
"/settings/update",
data={
"language": "en",
"output_format": "mp3",
# ABS integration fields (token blank)
"audiobookshelf_enabled": "on",
"audiobookshelf_base_url": "https://abs.example",
"audiobookshelf_api_token": "",
"audiobookshelf_library_id": "lib1",
"audiobookshelf_folder_id": "fold1",
"audiobookshelf_verify_ssl": "on",
# Calibre OPDS integration fields (password blank)
"calibre_opds_enabled": "on",
"calibre_opds_base_url": "https://opds.example",
"calibre_opds_username": "user",
"calibre_opds_password": "",
"calibre_opds_verify_ssl": "on",
},
follow_redirects=False,
)
assert resp.status_code in {302, 303}
cfg = load_config() or {}
integrations = cfg.get("integrations") or {}
assert integrations["audiobookshelf"]["api_token"] == "SECRET_TOKEN"
assert integrations["calibre_opds"]["password"] == "SECRET_PASS"
def test_settings_update_preserves_secrets_when_fields_missing(tmp_path):
save_config(
{
"language": "en",
"integrations": {
"audiobookshelf": {"api_token": "SECRET_TOKEN"},
"calibre_opds": {"password": "SECRET_PASS"},
},
}
)
app = create_app(
{
"TESTING": True,
"SECRET_KEY": "test",
"OUTPUT_FOLDER": str(tmp_path),
"UPLOAD_FOLDER": str(tmp_path / "uploads"),
}
)
with app.test_client() as client:
# Post unrelated changes; omit integration fields completely.
resp = client.post(
"/settings/update",
data={
"language": "en",
"output_format": "wav",
},
follow_redirects=False,
)
assert resp.status_code in {302, 303}
cfg = load_config() or {}
integrations = cfg.get("integrations") or {}
assert integrations["audiobookshelf"]["api_token"] == "SECRET_TOKEN"
assert integrations["calibre_opds"]["password"] == "SECRET_PASS"
from abogen.speaker_analysis import analyze_speakers
def _chapters():
return [
{
"id": "0001",
"index": 0,
"title": "Test",
"text": "",
"enabled": True,
}
]
def _chunk(text: str, idx: int) -> dict:
return {
"id": f"chunk-{idx}",
"chapter_index": 0,
"chunk_index": idx,
"text": text,
}
def test_analyze_speakers_infers_gender_from_pronouns():
chunks = [
_chunk('"Greetings," said John. He adjusted his hat as he smiled.', 0),
_chunk(
'"Hello," said Mary. She straightened her dress as she introduced herself.',
1,
),
_chunk('"Nice to meet you," said Alex.', 2),
]
analysis = analyze_speakers(_chapters(), chunks, threshold=1, max_speakers=0)
john = analysis.speakers.get("john")
mary = analysis.speakers.get("mary")
alex = analysis.speakers.get("alex")
assert john is not None
assert mary is not None
assert alex is not None
assert john.gender == "male"
assert mary.gender == "female"
assert alex.gender == "unknown"
def test_analyze_speakers_ignores_leading_stopwords():
chunks = [
_chunk('But Volescu said, "We march at dawn."', 0),
_chunk('Then Blue Leader shouted, "Hold the perimeter."', 1),
]
analysis = analyze_speakers(_chapters(), chunks, threshold=1, max_speakers=0)
speakers = analysis.speakers
assert "volescu" in speakers
assert speakers["volescu"].label == "Volescu"
assert "blue_leader" in speakers
assert speakers["blue_leader"].label == "Blue Leader"
assert "but_volescu" not in speakers
assert "then_blue_leader" not in speakers
def test_analyze_speakers_applies_threshold_suppression():
chunks = [
_chunk('"Hello there," said Narrator.', 0),
_chunk('"It is lying," said Green.', 1),
]
analysis = analyze_speakers(_chapters(), chunks, threshold=3, max_speakers=0)
green = analysis.speakers.get("green")
assert green is not None
assert green.suppressed is True
assert "green" in analysis.suppressed
def test_sample_excerpt_includes_context_paragraphs():
chunks = [
_chunk("The hallway was quiet as footsteps approached.", 0),
_chunk('"Open the door," said John as he reached for the handle.', 1),
_chunk("Mary watched him closely, unsure of his intent.", 2),
]
analysis = analyze_speakers(_chapters(), chunks, threshold=1, max_speakers=0)
john = analysis.speakers.get("john")
assert john is not None
assert john.sample_quotes, "Expected John to have at least one sample quote"
excerpt = john.sample_quotes[0]["excerpt"]
assert "The hallway was quiet" in excerpt
assert '"Open the door," said John' in excerpt
assert "Mary watched him closely" in excerpt
from unittest.mock import patch
import pytest
from pathlib import Path
from ebooklib import epub
from abogen.text_extractor import extract_from_path
from abogen.utils import calculate_text_length
@pytest.fixture
def sample_epub_path():
return Path(__file__).parent / "fixtures" / "abogen_debug_tts_samples.epub"
def test_epub_character_counts_align_with_calculated_total(sample_epub_path):
result = extract_from_path(sample_epub_path)
combined_total = calculate_text_length(result.combined_text)
chapter_total = sum(chapter.characters for chapter in result.chapters)
assert result.total_characters == combined_total == chapter_total
def test_epub_metadata_composer_matches_artist(sample_epub_path):
result = extract_from_path(sample_epub_path)
composer = result.metadata.get("composer") or result.metadata.get("COMPOSER")
artist = result.metadata.get("artist") or result.metadata.get("ARTIST")
assert composer
assert composer == artist
assert composer != "Narrator"
def test_epub_series_metadata_extracted_from_opf_meta(tmp_path):
book = epub.EpubBook()
book.set_identifier("id")
book.set_title("Example Title")
book.set_language("en")
book.add_author("Example Author")
# Calibre-style series metadata
# ebooklib stores this in memory correctly, but may not round-trip via disk in read_epub
book.add_metadata(
"OPF", "meta", None, {"name": "calibre:series", "content": "Example Saga"}
)
book.add_metadata(
"OPF", "meta", None, {"name": "calibre:series_index", "content": "2"}
)
chapter = epub.EpubHtml(title="Chapter 1", file_name="chap_01.xhtml", lang="en")
chapter.content = "<h1>Chapter 1</h1><p>Hello</p>"
chapter.id = "chap_01"
book.add_item(chapter)
# We manually set the spine to match what ebooklib.read_epub produces (list of tuples),
# since we are bypassing the serialization round-trip that normally converts it.
# The 'nav' item is usually handled separately or implicitly, but for this test
# we just need the chapter to be navigable via spine.
book.spine = [("nav", "yes"), ("chap_01", "yes")]
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())
path = tmp_path / "example.epub"
epub.write_epub(str(path), book)
# We mock read_epub to avoid serialization issues with custom metadata in ebooklib
with patch("abogen.text_extractor.epub.read_epub", return_value=book):
result = extract_from_path(path)
assert result.metadata.get("series") == "Example Saga"
assert result.metadata.get("series_index") == "2"
from __future__ import annotations
import pytest
from unittest.mock import patch
from abogen.kokoro_text_normalization import (
DEFAULT_APOSTROPHE_CONFIG,
normalize_for_pipeline,
normalize_roman_numeral_titles,
)
from abogen.normalization_settings import (
apply_overrides as apply_normalization_overrides,
build_apostrophe_config,
get_runtime_settings,
)
from abogen.spacy_contraction_resolver import resolve_ambiguous_contractions
SPACY_RESOLVER_AVAILABLE = bool(
resolve_ambiguous_contractions("It's been a long time.")
)
def _normalize_text(
text: str, *, normalization_overrides: dict[str, object] | None = None
) -> str:
runtime_settings = get_runtime_settings()
if normalization_overrides:
runtime_settings = apply_normalization_overrides(
runtime_settings, normalization_overrides
)
config = build_apostrophe_config(
settings=runtime_settings, base=DEFAULT_APOSTROPHE_CONFIG
)
return normalize_for_pipeline(text, config=config, settings=runtime_settings)
def test_title_abbreviations_are_expanded():
text = "Dr. Watson met Mr. Holmes and Ms. Hudson."
normalized = _normalize_text(text)
assert "Doctor" in normalized
assert "Mister" in normalized
assert "Miz" in normalized
def test_suffix_abbreviations_are_expanded_with_case_preserved():
text = "John Doe Jr. spoke to JANE DOE SR. about the estate."
normalized = _normalize_text(text)
assert "John Doe Junior" in normalized
assert "JANE DOE SENIOR" in normalized
def test_missing_terminal_punctuation_is_added():
normalized = _normalize_text("Chapter 1")
assert normalized.endswith(".")
def test_terminal_punctuation_respects_closing_quotes():
normalized = _normalize_text('"Chapter 1"')
compact = normalized.replace(" ", "")
assert compact.endswith('."')
def test_normalization_preserves_spacing_around_quotes_and_hyphen():
sample = "“Still,” said Château-Renaud, “Dr. d’Avrigny, who attends my mother, declares he is in despair about it."
normalized = _normalize_text(sample)
assert normalized.startswith(
"“Still,” said Château-Renaud, “Doctor d'Avrigny, who attends my mother, declares he is in despair about it."
)
assert " " not in normalized
assert "Château-Renaud" in normalized
assert "Doctor d'Avrigny" in normalized
def test_normalize_roman_titles_converts_when_majority() -> None:
titles = ["I: Opening", "II: Rising Action", "III: Climax"]
normalized = normalize_roman_numeral_titles(titles)
assert normalized == ["1: Opening", "2: Rising Action", "3: Climax"]
def test_normalize_roman_titles_skips_when_not_majority() -> None:
titles = ["Preface", "I: Opening", "Acknowledgements"]
normalized = normalize_roman_numeral_titles(titles)
assert normalized == titles
def test_normalize_roman_titles_preserves_separators() -> None:
titles = [" IV. The Trial", "V - The Verdict", "VI\nAftermath"]
normalized = normalize_roman_numeral_titles(titles)
assert normalized[0] == " 4. The Trial"
assert normalized[1] == "5 - The Verdict"
assert normalized[2].startswith("6\nAftermath")
def test_grouped_numbers_are_spelled_out() -> None:
normalized = _normalize_text("The vault holds 35,000 credits")
assert "thirty-five thousand" in normalized.lower()
def test_numeric_ranges_are_spoken_with_to() -> None:
normalized = _normalize_text("Chapters 1-3")
assert "one to three" in normalized.lower()
def test_simple_fractions_are_spoken() -> None:
normalized = _normalize_text("Add 1/2 cup of sugar")
assert "one half" in normalized.lower()
def test_plain_numbers_are_spelled_out() -> None:
normalized = _normalize_text("He rolled a 42.")
assert "forty-two" in normalized.lower()
def test_decimal_numbers_include_point() -> None:
normalized = _normalize_text("Book 4.5 of the series.")
assert "four point five" in normalized.lower()
def test_space_separated_numbers_become_ranges() -> None:
normalized = _normalize_text("Read pages 12 14 tonight.")
assert "pages twelve to fourteen" in normalized.lower()
def test_year_like_numbers_use_common_pronunciation() -> None:
normalized = _normalize_text("In 1924 the journey began")
folded = normalized.lower().replace("-", " ")
assert "nineteen hundred" in folded
assert "twenty four" in folded
def test_early_century_years_use_hundred_format() -> None:
normalized = _normalize_text("In 1204 the city fell")
assert "twelve hundred" in normalized.lower()
assert "oh four" in normalized.lower()
def test_roman_numerals_in_titles_are_converted() -> None:
normalized = _normalize_text("Chapter IV begins now")
assert "chapter four" in normalized.lower()
def test_roman_numeral_suffixes_use_ordinals() -> None:
normalized = _normalize_text("Bob Smith II arrived late")
assert "bob smith the second" in normalized.lower()
def test_lowercase_roman_after_part_converts_to_cardinal() -> None:
normalized = _normalize_text("We studied part iii of the manuscript.")
assert "part three" in normalized.lower()
def test_hyphenated_phase_with_roman_is_converted() -> None:
normalized = _normalize_text("They executed phase-IV without delay.")
assert "phase four" in normalized.lower()
def test_all_caps_quotes_are_sentence_cased() -> None:
normalized = _normalize_text('"THIS IS A TEST."')
cleaned = normalized.replace('" ', '"')
assert '"This is a test."' in cleaned
def test_caps_quote_preserves_acronyms() -> None:
normalized = _normalize_text("“THE NASA TEAM ARRIVED.”")
assert "“The NASA team arrived.”" in normalized
def test_caps_quote_normalization_respects_override() -> None:
normalized = _normalize_text(
'"KEEP SHOUTING."',
normalization_overrides={"normalization_caps_quotes": False},
)
cleaned = normalized.replace('" ', '"')
assert '"KEEP SHOUTING."' in cleaned
def test_recent_years_split_twenty_style() -> None:
normalized = _normalize_text("In 2025 we planned ahead")
folded = normalized.lower().replace("-", " ")
assert "twenty twenty five" in folded
def test_two_thousands_use_two_thousand_prefix() -> None:
normalized = _normalize_text("In 2005 we celebrated")
assert "two thousand five" in normalized.lower()
def test_year_style_can_be_disabled() -> None:
normalized = _normalize_text(
"In 2025 we planned ahead",
normalization_overrides={"normalization_numbers_year_style": "off"},
)
folded = normalized.lower().replace("-", " ")
assert "twenty twenty five" not in folded
def test_contractions_can_be_kept_when_override_disabled() -> None:
normalized = _normalize_text(
"It's a good day.",
normalization_overrides={"normalization_apostrophes_contractions": False},
)
assert "It's" in normalized
def test_sibilant_possessives_remain_when_marking_disabled() -> None:
normalized = _normalize_text(
"The boss's chair wobbled.",
normalization_overrides={
"normalization_apostrophes_sibilant_possessives": False
},
)
assert "boss's" in normalized
assert "boss iz" not in normalized.lower()
def test_decades_can_skip_expansion_when_disabled() -> None:
normalized = _normalize_text(
"Classic hits from the '90s filled the hall.",
normalization_overrides={"normalization_apostrophes_decades": False},
)
assert "'90s" in normalized
def test_abbreviated_decades_expand_to_spoken_form() -> None:
normalized = _normalize_text("She loved music from the '80s.")
assert "eighties" in normalized.lower()
def test_currency_under_one_dollar_uses_cents() -> None:
normalized = _normalize_text("It cost $0.99.")
folded = normalized.lower().replace("-", " ")
assert "zero dollars" not in folded
assert "cents" in folded
def test_iso_dates_use_locale_order_and_ordinals(monkeypatch) -> None:
monkeypatch.setenv("LC_TIME", "en_US.UTF-8")
normalized = _normalize_text("The date is 2025/12/15.")
folded = normalized.lower().replace("-", " ")
assert "december" in folded
assert "fifteenth" in folded
def test_times_and_acronyms_do_not_say_dot() -> None:
normalized = _normalize_text("Meet at 5 p.m. near the U.S.A. border.")
folded = normalized.lower()
assert " dot " not in folded
def test_internet_slang_expansion_is_configurable() -> None:
normalized = _normalize_text(
"pls knock before entering.",
normalization_overrides={"normalization_internet_slang": True},
)
assert "please" in normalized.lower()
@pytest.mark.skipif(not SPACY_RESOLVER_AVAILABLE, reason="spaCy model unavailable")
def test_spacy_disambiguates_it_has_from_context() -> None:
normalized = _normalize_text("It's been a long time.")
assert "It has been a long time." == normalized
@pytest.mark.skipif(not SPACY_RESOLVER_AVAILABLE, reason="spaCy model unavailable")
def test_spacy_disambiguates_it_is_from_context() -> None:
normalized = _normalize_text("It's cold outside.")
assert "It is cold outside." == normalized
@pytest.mark.skipif(not SPACY_RESOLVER_AVAILABLE, reason="spaCy model unavailable")
def test_spacy_disambiguates_she_had() -> None:
normalized = _normalize_text("She'd left before dawn.")
assert "She had left before dawn." == normalized
@pytest.mark.skipif(not SPACY_RESOLVER_AVAILABLE, reason="spaCy model unavailable")
def test_spacy_disambiguates_she_would() -> None:
normalized = _normalize_text("She'd go if invited.")
assert "She would go if invited." == normalized
@pytest.mark.skipif(not SPACY_RESOLVER_AVAILABLE, reason="spaCy model unavailable")
def test_sample_sentence_handles_complex_contractions() -> None:
sample = (
"I've heard the captain'll arrive by dusk, but they'd said the same yesterday."
)
normalized = _normalize_text(sample)
assert (
"I have heard the captain will arrive by dusk, but they had said the same yesterday."
== normalized
)
def test_modal_will_contractions_can_be_disabled() -> None:
sample = "The captain'll arrive at dawn."
normalized = _normalize_text(
sample,
normalization_overrides={"normalization_contraction_modal_will": False},
)
assert "captain'll" in normalized
@pytest.fixture(autouse=True)
def mock_settings():
defaults = {
"normalization_numbers": True,
"normalization_titles": True,
"normalization_terminal": True,
"normalization_phoneme_hints": True,
"normalization_caps_quotes": True,
"normalization_apostrophes_contractions": True,
"normalization_apostrophes_plural_possessives": True,
"normalization_apostrophes_sibilant_possessives": True,
"normalization_apostrophes_decades": True,
"normalization_apostrophes_leading_elisions": True,
"normalization_apostrophe_mode": "spacy",
"normalization_contraction_aux_be": True,
"normalization_contraction_aux_have": True,
"normalization_contraction_modal_will": True,
"normalization_contraction_modal_would": True,
"normalization_contraction_negation_not": True,
"normalization_contraction_let_us": True,
"normalization_currency": True,
"normalization_footnotes": True,
"normalization_numbers_year_style": "american",
}
with patch(
"tests.test_text_normalization.get_runtime_settings", return_value=defaults
):
yield
def test_currency_magnitude():
cases = [
("$2 million", "two million dollars"),
("$2.5 million", "two point five million dollars"),
("$100 billion", "one hundred billion dollars"),
("$1.2 trillion", "one point two trillion dollars"),
("$2.55 million", "two point five five million dollars"),
("$1 million", "one million dollars"),
("$0.5 million", "zero point five million dollars"),
("$2.50", "two dollars, fifty cents"),
("$100", "one hundred dollars"),
]
settings = {
"normalization_numbers": True,
"normalization_currency": True,
"normalization_apostrophe_mode": "spacy",
}
for input_text, expected in cases:
normalized = _normalize_text(input_text, normalization_overrides=settings)
assert (
expected.lower() in normalized.lower()
), f"Failed for {input_text}: got '{normalized}'"
import numpy as np
from abogen.tts_supertonic import SupertonicPipeline
class _DummyTTS:
def get_voice_style(self, voice_name: str):
return {"voice": voice_name}
def synthesize(
self,
*,
text: str,
voice_style,
total_steps: int,
speed: float,
max_chunk_length: int,
silence_duration: float,
verbose: bool,
):
if "•" in text:
raise ValueError("Found 1 unsupported character(s): ['•']")
# Return 50ms of audio at 24kHz.
sr = 24000
audio = np.zeros(int(0.05 * sr), dtype="float32")
return audio, 0.05
def test_supertonic_pipeline_strips_unsupported_characters_and_retries():
# Avoid importing/initializing real supertonic by manually constructing the pipeline.
pipeline = SupertonicPipeline.__new__(SupertonicPipeline)
pipeline.sample_rate = 24000
pipeline.total_steps = 5
pipeline.max_chunk_length = 1000
pipeline._tts = _DummyTTS()
segs = list(pipeline("Hello • world", voice="M1", speed=1.0))
assert len(segs) == 1
assert segs[0].graphemes == "Hello world" or segs[0].graphemes == "Hello world"
assert isinstance(segs[0].audio, np.ndarray)
assert segs[0].audio.dtype == np.float32
assert segs[0].audio.size > 0
def test_supertonic_pipeline_drops_chunk_if_only_unsupported_characters():
pipeline = SupertonicPipeline.__new__(SupertonicPipeline)
pipeline.sample_rate = 24000
pipeline.total_steps = 5
pipeline.max_chunk_length = 1000
pipeline._tts = _DummyTTS()
segs = list(pipeline("•", voice="M1", speed=1.0))
assert segs == []
import os
import sys
from pathlib import Path
from typing import Iterable
import pytest
ROOT = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(ROOT))
@pytest.fixture(autouse=True)
def clear_utils_cache():
import abogen.utils as utils
getattr(utils.get_user_cache_root, "cache_clear")()
yield
getattr(utils.get_user_cache_root, "cache_clear")()
def _clear_env(monkeypatch: pytest.MonkeyPatch, keys: Iterable[str]) -> None:
for key in keys:
monkeypatch.delenv(key, raising=False)
def test_abogen_temp_dir_configures_hf_cache(monkeypatch, tmp_path):
import abogen.utils as utils
cache_root = tmp_path / "cache-root"
home_dir = tmp_path / "home"
monkeypatch.setenv("ABOGEN_TEMP_DIR", str(cache_root))
monkeypatch.setenv("HOME", str(home_dir))
_clear_env(
monkeypatch,
(
"XDG_CACHE_HOME",
"HF_HOME",
"HUGGINGFACE_HUB_CACHE",
"TRANSFORMERS_CACHE",
"ABOGEN_INTERNAL_CACHE_ROOT",
),
)
root = utils.get_user_cache_root()
expected_root = os.path.abspath(str(cache_root))
expected_hf = os.path.join(expected_root, "huggingface")
assert root == expected_root
assert os.environ["XDG_CACHE_HOME"] == expected_root
assert os.environ["HF_HOME"] == expected_hf
assert os.environ["HUGGINGFACE_HUB_CACHE"] == expected_hf
assert os.environ["TRANSFORMERS_CACHE"] == expected_hf
from types import SimpleNamespace
from typing import cast
import pytest
from abogen.constants import VOICES_INTERNAL
from abogen.voice_cache import (
LocalEntryNotFoundError,
_CACHED_VOICES,
ensure_voice_assets,
)
from abogen.webui.conversion_runner import _collect_required_voice_ids
from abogen.webui.service import Job
@pytest.fixture(autouse=True)
def clear_voice_cache():
_CACHED_VOICES.clear()
yield
_CACHED_VOICES.clear()
def test_ensure_voice_assets_downloads_missing(monkeypatch):
recorded = []
cached = set()
def fake_download(**kwargs):
filename = kwargs["filename"]
if kwargs.get("local_files_only"):
if filename in cached:
return f"/tmp/{filename}"
raise LocalEntryNotFoundError(f"{filename} missing")
recorded.append(filename)
cached.add(filename)
return f"/tmp/{filename}"
monkeypatch.setattr("abogen.voice_cache.hf_hub_download", fake_download)
downloaded, errors = ensure_voice_assets(["af_nova", "am_liam"])
assert downloaded == {"af_nova", "am_liam"}
assert errors == {}
assert set(recorded) == {"voices/af_nova.pt", "voices/am_liam.pt"}
recorded.clear()
downloaded_again, errors_again = ensure_voice_assets(["af_nova"])
assert downloaded_again == set()
assert errors_again == {}
assert recorded == []
def test_collect_required_voice_ids_includes_all():
job = SimpleNamespace(
voice="af_nova",
chapters=[{"voice_formula": "af_nova*0.7+am_liam*0.3"}],
chunks=[{"voice": "am_michael"}],
speakers={
"hero": {"voice_formula": "af_nova*0.6+am_liam*0.4"},
"narrator": {"voice": "af_nova"},
},
)
voices = _collect_required_voice_ids(cast(Job, job))
assert {"af_nova", "am_liam", "am_michael"}.issubset(voices)
assert voices.issuperset(VOICES_INTERNAL)
from __future__ import annotations
from abogen.webui.conversion_runner import _resolve_voice, _supertonic_voice_from_spec
from abogen.tts_supertonic import DEFAULT_SUPERTONIC_VOICES
def test_resolve_voice_formula_without_pipeline_does_not_crash() -> None:
# This can happen when a previously-saved Kokoro mix formula is present
# but the active provider is SuperTonic (no Kokoro pipeline object).
formula = "af_heart*0.5+af_sky*0.5"
resolved = _resolve_voice(None, formula, use_gpu=False)
assert resolved == formula
def test_supertonic_voice_from_formula_falls_back_to_valid_voice() -> None:
# When a stale Kokoro mix formula is present, SuperTonic should not receive it.
chosen = _supertonic_voice_from_spec("af_heart*0.5+af_sky*0.5", "af_heart*1.0")
assert chosen in DEFAULT_SUPERTONIC_VOICES
+1
-1
Metadata-Version: 2.4
Name: abogen
Version: 1.3.0
Version: 1.3.1
Summary: Generate audiobooks from EPUBs, PDFs and text with synchronized captions.

@@ -5,0 +5,0 @@ Project-URL: Homepage, https://github.com/denizsafak/abogen

@@ -99,4 +99,2 @@ [build-system]

[tool.hatch.build]
include = ["abogen/webui/templates/**", "abogen/webui/static/**"]

@@ -103,0 +101,0 @@ [tool.hatch.version]