Latest Threat Research:SANDWORM_MODE: Shai-Hulud-Style npm Worm Hijacks CI Workflows and Poisons AI Toolchains.Details
Socket
Book a DemoInstallSign in
Socket

abogen

Package Overview
Dependencies
Maintainers
1
Versions
28
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

abogen - npm Package Compare versions

Comparing version
1.2.3
to
1.2.4
+590
abogen/predownload_gui.py
"""
Pre-download dialog and worker for Abogen
This module consolidates pre-download logic for Kokoro voices and model
and spaCy language models. The code favors clarity, avoids duplication,
and handles optional dependencies gracefully.
"""
from typing import List, Optional, Tuple
import importlib
import importlib.util
from PyQt6.QtWidgets import (
QDialog,
QVBoxLayout,
QHBoxLayout,
QLabel,
QPushButton,
QSpacerItem,
QSizePolicy,
)
from PyQt6.QtCore import QThread, pyqtSignal
from abogen.constants import COLORS, VOICES_INTERNAL
from abogen.spacy_utils import SPACY_MODELS
import abogen.hf_tracker
# Helpers
def _unique_sorted_models() -> List[str]:
"""Return a sorted list of unique spaCy model package names."""
return sorted(set(SPACY_MODELS.values()))
def _is_package_installed(pkg_name: str) -> bool:
"""Return True if a package with the given name can be imported (site-packages)."""
try:
return importlib.util.find_spec(pkg_name) is not None
except Exception:
return False
# NOTE: explicit HF cache helper removed; we use try_to_load_from_cache in-scope where needed
class PreDownloadWorker(QThread):
"""Worker thread to download required models/voices.
Emits human-readable messages via `progress`. Uses `category_done` to indicate
a category (voices/model/spacy) finished successfully. Emits `error` on exception
and `finished` after all work completes.
"""
# Emit (category, status, message)
progress = pyqtSignal(str, str, str)
category_done = pyqtSignal(str)
finished = pyqtSignal()
error = pyqtSignal(str)
def __init__(self, parent=None):
super().__init__(parent)
self._cancelled = False
# repo and filenames used for Kokoro model
self._repo_id = "hexgrad/Kokoro-82M"
self._model_files = ["kokoro-v1_0.pth", "config.json"]
# Track download success per category
self._voices_success = False
self._model_success = False
self._spacy_success = False
# Suppress HF tracker warnings during downloads
self._original_emitter = abogen.hf_tracker.show_warning_signal_emitter
def cancel(self) -> None:
self._cancelled = True
def run(self) -> None:
# Suppress HF tracker warnings during downloads
abogen.hf_tracker.show_warning_signal_emitter = None
try:
self._download_kokoro_voices()
if self._cancelled:
return
if self._voices_success:
self.category_done.emit("voices")
self._download_kokoro_model()
if self._cancelled:
return
if self._model_success:
self.category_done.emit("model")
self._download_spacy_models()
if self._cancelled:
return
if self._spacy_success:
self.category_done.emit("spacy")
self.finished.emit()
except Exception as exc: # pragma: no cover - best-effort reporting
self.error.emit(str(exc))
finally:
# Restore original emitter
abogen.hf_tracker.show_warning_signal_emitter = self._original_emitter
# Kokoro voices
def _download_kokoro_voices(self) -> None:
self._voices_success = True
try:
from huggingface_hub import hf_hub_download, try_to_load_from_cache
except Exception:
self.progress.emit(
"voice", "warning", "huggingface_hub not installed, skipping voices..."
)
self._voices_success = False
return
voice_list = VOICES_INTERNAL
for idx, voice in enumerate(voice_list, start=1):
if self._cancelled:
self._voices_success = False
return
filename = f"voices/{voice}.pt"
if try_to_load_from_cache(repo_id=self._repo_id, filename=filename):
self.progress.emit(
"voice",
"installed",
f"{idx}/{len(voice_list)}: {voice} already present",
)
continue
self.progress.emit(
"voice", "downloading", f"{idx}/{len(voice_list)}: {voice}..."
)
try:
hf_hub_download(repo_id=self._repo_id, filename=filename)
self.progress.emit("voice", "downloaded", f"{voice} downloaded")
except Exception as exc:
self.progress.emit(
"voice", "warning", f"could not download {voice}: {exc}"
)
self._voices_success = False
# Kokoro model
def _download_kokoro_model(self) -> None:
self._model_success = True
try:
from huggingface_hub import hf_hub_download, try_to_load_from_cache
except Exception:
self.progress.emit(
"model", "warning", "huggingface_hub not installed, skipping model..."
)
self._model_success = False
return
for fname in self._model_files:
if self._cancelled:
self._model_success = False
return
category = "config" if fname == "config.json" else "model"
if try_to_load_from_cache(repo_id=self._repo_id, filename=fname):
self.progress.emit(
category, "installed", f"file {fname} already present"
)
continue
self.progress.emit(category, "downloading", f"file {fname}...")
try:
hf_hub_download(repo_id=self._repo_id, filename=fname)
self.progress.emit(category, "downloaded", f"file {fname} downloaded")
except Exception as exc:
self.progress.emit(
category, "warning", f"could not download file {fname}: {exc}"
)
self._model_success = False
# spaCy models
def _download_spacy_models(self) -> None:
"""Download spaCy models. Prefer missing models provided by parent.
Parent dialog will populate _spacy_models_missing during checking.
"""
self._spacy_success = True
# Determine which models to process: prefer parent-provided missing list to avoid
# re-checking everything; otherwise use the full unique list.
parent = self.parent()
models_to_process: List[str] = _unique_sorted_models()
try:
if (
parent is not None
and hasattr(parent, "_spacy_models_missing")
and parent._spacy_models_missing
):
models_to_process = list(dict.fromkeys(parent._spacy_models_missing))
except Exception:
pass
# If spaCy is not available to run the CLI, skip gracefully
try:
import spacy.cli as _spacy_cli
except Exception:
self.progress.emit(
"spacy", "warning", "spaCy not available, skipping spaCy models..."
)
self._spacy_success = False
return
for idx, model_name in enumerate(models_to_process, start=1):
if self._cancelled:
self._spacy_success = False
return
if _is_package_installed(model_name):
self.progress.emit(
"spacy",
"installed",
f"{idx}/{len(models_to_process)}: {model_name} already installed",
)
continue
self.progress.emit(
"spacy",
"downloading",
f"{idx}/{len(models_to_process)}: {model_name}...",
)
try:
_spacy_cli.download(model_name)
self.progress.emit("spacy", "downloaded", f"{model_name} downloaded")
except Exception as exc:
self.progress.emit(
"spacy", "warning", f"could not download {model_name}: {exc}"
)
self._spacy_success = False
class PreDownloadDialog(QDialog):
"""Dialog to show and control pre-download process."""
VOICE_PREFIX = "Kokoro voices: "
MODEL_PREFIX = "Kokoro model: "
CONFIG_PREFIX = "Kokoro config: "
SPACY_PREFIX = "spaCy models: "
def __init__(self, parent=None):
super().__init__(parent)
self.setWindowTitle("Pre-download Models and Voices")
self.setMinimumWidth(500)
self.worker: Optional[PreDownloadWorker] = None
self.has_missing = False
self._spacy_models_checked: List[tuple] = []
self._spacy_models_missing: List[str] = []
self._status_worker = None
# Map keywords to (label, prefix) - labels filled after UI creation
self.status_map = {
"voice": (None, self.VOICE_PREFIX),
"spacy": (None, self.SPACY_PREFIX),
"model": (None, self.MODEL_PREFIX),
"config": (None, self.CONFIG_PREFIX),
}
self.category_map = {
"voices": ["voice"],
"model": ["model", "config"],
"spacy": ["spacy"],
}
self._setup_ui()
self._start_status_check()
def _setup_ui(self) -> None:
layout = QVBoxLayout(self)
layout.setSpacing(0)
layout.setContentsMargins(15, 0, 15, 15)
desc = QLabel(
"You can pre-download all required models and voices for offline use.\n"
"This includes Kokoro voices, Kokoro model (and config), and spaCy models."
)
desc.setWordWrap(True)
layout.addWidget(desc)
# Status rows
status_layout = QVBoxLayout()
status_title = QLabel("<b>Current Status:</b>")
status_layout.addWidget(status_title)
self.voices_status = QLabel(self.VOICE_PREFIX + "⏳ Checking...")
row = QHBoxLayout()
row.addWidget(self.voices_status)
row.addStretch()
status_layout.addLayout(row)
self.model_status = QLabel(self.MODEL_PREFIX + "⏳ Checking...")
row = QHBoxLayout()
row.addWidget(self.model_status)
row.addStretch()
status_layout.addLayout(row)
self.config_status = QLabel(self.CONFIG_PREFIX + "⏳ Checking...")
row = QHBoxLayout()
row.addWidget(self.config_status)
row.addStretch()
status_layout.addLayout(row)
self.spacy_status = QLabel(self.SPACY_PREFIX + "⏳ Checking...")
row = QHBoxLayout()
row.addWidget(self.spacy_status)
row.addStretch()
status_layout.addLayout(row)
# register labels
self.status_map["voice"] = (self.voices_status, self.VOICE_PREFIX)
self.status_map["model"] = (self.model_status, self.MODEL_PREFIX)
self.status_map["config"] = (self.config_status, self.CONFIG_PREFIX)
self.status_map["spacy"] = (self.spacy_status, self.SPACY_PREFIX)
layout.addLayout(status_layout)
layout.addItem(
QSpacerItem(0, 20, QSizePolicy.Policy.Minimum, QSizePolicy.Policy.Fixed)
)
# Buttons
button_row = QHBoxLayout()
button_row.setSpacing(10)
self.download_btn = QPushButton("Download all")
self.download_btn.setMinimumWidth(100)
self.download_btn.setMinimumHeight(35)
self.download_btn.setEnabled(False)
self.download_btn.clicked.connect(self._start_download)
button_row.addWidget(self.download_btn)
self.close_btn = QPushButton("Close")
self.close_btn.setMinimumWidth(100)
self.close_btn.setMinimumHeight(35)
self.close_btn.clicked.connect(self._handle_close)
button_row.addWidget(self.close_btn)
layout.addLayout(button_row)
self.adjustSize()
# Status checking worker
class StatusCheckWorker(QThread):
voices_checked = pyqtSignal(bool, list)
model_checked = pyqtSignal(bool)
config_checked = pyqtSignal(bool)
spacy_model_checking = pyqtSignal(str)
spacy_model_result = pyqtSignal(str, bool)
spacy_checked = pyqtSignal(bool, list)
def run(self):
parent = self.parent()
if parent is None:
return
voices_ok, missing_voices = parent._check_kokoro_voices()
self.voices_checked.emit(voices_ok, missing_voices)
model_ok = parent._check_kokoro_model()
self.model_checked.emit(model_ok)
config_ok = parent._check_kokoro_config()
self.config_checked.emit(config_ok)
# Check spaCy models by package name to detect site-package installs
unique = _unique_sorted_models()
missing: List[str] = []
for name in unique:
self.spacy_model_checking.emit(name)
ok = _is_package_installed(name)
self.spacy_model_result.emit(name, ok)
if not ok:
missing.append(name)
parent._spacy_models_missing = missing
self.spacy_checked.emit(len(missing) == 0, missing)
def _start_status_check(self) -> None:
self._status_worker = self.StatusCheckWorker(self)
self._status_worker.voices_checked.connect(self._update_voices_status)
self._status_worker.model_checked.connect(self._update_model_status)
self._status_worker.config_checked.connect(self._update_config_status)
self._status_worker.spacy_model_checking.connect(self._spacy_model_checking)
self._status_worker.spacy_model_result.connect(self._spacy_model_result)
self._status_worker.spacy_checked.connect(self._update_spacy_status)
# These are initialized in __init__ to keep consistent object state
# Set checking visual state
for lbl in (
self.voices_status,
self.model_status,
self.config_status,
self.spacy_status,
):
lbl.setStyleSheet(f"color: {COLORS['ORANGE']};")
self.spacy_status.setText(self.SPACY_PREFIX + "⏳ Checking...")
self._status_worker.start()
# UI update callbacks
def _spacy_model_checking(self, name: str) -> None:
self.spacy_status.setText(f"{self.SPACY_PREFIX}Checking {name}...")
def _spacy_model_result(self, name: str, ok: bool) -> None:
self._spacy_models_checked.append((name, ok))
if not ok and name not in self._spacy_models_missing:
self._spacy_models_missing.append(name)
checked = len(self._spacy_models_checked)
missing_count = len(self._spacy_models_missing)
if missing_count:
self.spacy_status.setText(
f"{self.SPACY_PREFIX}{checked} checked, {missing_count} missing..."
)
else:
self.spacy_status.setText(f"{self.SPACY_PREFIX}{checked} checked...")
def _update_voices_status(self, ok: bool, missing: List[str]) -> None:
if ok:
self._set_status("voice", "✓ Downloaded", COLORS["GREEN"])
else:
self.has_missing = True
if missing:
self._set_status(
"voice", f"✗ Missing {len(missing)} voices", COLORS["RED"]
)
else:
self._set_status("voice", "✗ Not downloaded", COLORS["RED"])
def _update_model_status(self, ok: bool) -> None:
if ok:
self._set_status("model", "✓ Downloaded", COLORS["GREEN"])
else:
self.has_missing = True
self._set_status("model", "✗ Not downloaded", COLORS["RED"])
def _update_config_status(self, ok: bool) -> None:
if ok:
self._set_status("config", "✓ Downloaded", COLORS["GREEN"])
else:
self.has_missing = True
self._set_status("config", "✗ Not downloaded", COLORS["RED"])
def _update_spacy_status(self, ok: bool, missing: List[str]) -> None:
if ok:
self._set_status("spacy", "✓ Downloaded", COLORS["GREEN"])
else:
self.has_missing = True
if missing:
self._set_status(
"spacy", f"✗ Missing {len(missing)} model(s)", COLORS["RED"]
)
else:
self._set_status("spacy", "✗ Not downloaded", COLORS["RED"])
self.download_btn.setEnabled(self.has_missing)
def _set_status(self, key: str, text: str, color: str) -> None:
lbl, prefix = self.status_map.get(key, (None, ""))
if not lbl:
return
lbl.setText(prefix + text)
lbl.setStyleSheet(f"color: {color};")
# Helper checks
def _check_kokoro_voices(self) -> Tuple[bool, List[str]]:
"""Return (ok, missing_list) for Kokoro voices check."""
missing = []
try:
from huggingface_hub import try_to_load_from_cache
for voice in VOICES_INTERNAL:
if not try_to_load_from_cache(
repo_id="hexgrad/Kokoro-82M", filename=f"voices/{voice}.pt"
):
missing.append(voice)
except Exception:
# If HF missing, report all as missing
return False, list(VOICES_INTERNAL)
return (len(missing) == 0), missing
def _check_kokoro_model(self) -> bool:
try:
from huggingface_hub import try_to_load_from_cache
return (
try_to_load_from_cache(
repo_id="hexgrad/Kokoro-82M", filename="kokoro-v1_0.pth"
)
is not None
)
except Exception:
return False
def _check_kokoro_config(self) -> bool:
try:
from huggingface_hub import try_to_load_from_cache
return (
try_to_load_from_cache(
repo_id="hexgrad/Kokoro-82M", filename="config.json"
)
is not None
)
except Exception:
return False
def _check_spacy_models(self) -> bool:
unique = _unique_sorted_models()
missing = [m for m in unique if not _is_package_installed(m)]
self._spacy_models_missing = missing
return len(missing) == 0
# Download control
def _start_download(self) -> None:
self.download_btn.setEnabled(False)
self.download_btn.setText("Downloading...")
# mark the start of downloads; this triggers the labels
self._on_progress("system", "starting", "Processing, please wait...")
self.worker = PreDownloadWorker(self)
self.worker.progress.connect(self._on_progress)
self.worker.category_done.connect(self._on_category_done)
self.worker.finished.connect(self._on_download_finished)
self.worker.error.connect(self._on_download_error)
self.worker.start()
def _on_progress(self, category: str, status: str, message: str) -> None:
"""Map worker (category, status, message) to UI label updates.
Status is one of: 'downloading', 'installed', 'downloaded', 'warning', 'starting'.
Category is one of: 'voice', 'model', 'spacy', 'config', or 'system'.
"""
try:
# If the category targets a specific label, update directly
if category in self.status_map:
lbl, prefix = self.status_map[category]
if not lbl:
return
# Compose message and set color based on status token
full_text = prefix + message
if len(full_text) > 60:
display_text = full_text[:57] + "..."
lbl.setText(display_text)
lbl.setToolTip(full_text)
else:
lbl.setText(full_text)
lbl.setToolTip("") # Clear tooltip if not needed
if status == "downloading":
lbl.setStyleSheet(f"color: {COLORS['ORANGE']};")
elif status in ("installed", "downloaded"):
lbl.setStyleSheet(f"color: {COLORS['GREEN']};")
elif status == "warning":
lbl.setStyleSheet(f"color: {COLORS['RED']};")
elif status == "error":
lbl.setStyleSheet(f"color: {COLORS['RED']};")
return
# System-level messages
if category == "system":
if status == "starting":
for k in self.status_map:
lbl, prefix = self.status_map[k]
if lbl:
lbl.setText(prefix + "Processing, please wait...")
lbl.setStyleSheet(f"color: {COLORS['ORANGE']};")
# other system statuses don't require action
return
except Exception:
# Do not let UI thread crash on unexpected worker message
pass
def _on_category_done(self, category: str) -> None:
for key in self.category_map.get(category, []):
self._set_status(key, "✓ Downloaded", COLORS["GREEN"])
def _on_download_finished(self) -> None:
self.has_missing = False
self.download_btn.setText("Download all")
self.download_btn.setEnabled(False)
def _on_download_error(self, error_msg: str) -> None:
self.download_btn.setText("Download all")
self.download_btn.setEnabled(True)
for key in self.status_map:
self._set_status(key, f"✗ Error - {error_msg}", COLORS["RED"])
def _handle_close(self) -> None:
if self.worker and self.worker.isRunning():
self.worker.cancel()
self.worker.wait(2000)
self.accept()
def closeEvent(self, event) -> None:
if self.worker and self.worker.isRunning():
self.worker.cancel()
self.worker.wait(2000)
super().closeEvent(event)
"""
Lazy-loaded spaCy utilities for sentence segmentation.
"""
# Cached spaCy module and models (lazy loaded)
_spacy = None
_nlp_cache = {}
# Language code to spaCy model mapping
SPACY_MODELS = {
"a": "en_core_web_sm", # American English
"b": "en_core_web_sm", # British English
"e": "es_core_news_sm", # Spanish
"f": "fr_core_news_sm", # French
"i": "it_core_news_sm", # Italian
"p": "pt_core_news_sm", # Brazilian Portuguese
"z": "zh_core_web_sm", # Mandarin Chinese
"j": "ja_core_news_sm", # Japanese
"h": "xx_sent_ud_sm", # Hindi (multi-language model)
}
def _load_spacy():
"""Lazy load spaCy module."""
global _spacy
if _spacy is None:
try:
import spacy
_spacy = spacy
except ImportError:
return None
return _spacy
def get_spacy_model(lang_code, log_callback=None):
"""
Get or load a spaCy model for the given language code.
Downloads the model automatically if not available.
Args:
lang_code: Language code (a, b, e, f, etc.)
log_callback: Optional function to log messages
Returns:
Loaded spaCy model or None if unavailable
"""
def log(msg, is_error=False):
# Prefer GUI log callback when provided to avoid spamming stdout.
if log_callback:
color = "red" if is_error else "grey"
try:
log_callback((msg, color))
except Exception:
# Fallback to printing if callback misbehaves
print(msg)
else:
print(msg)
# Check if model is cached
if lang_code in _nlp_cache:
return _nlp_cache[lang_code]
# Check if language is supported
model_name = SPACY_MODELS.get(lang_code)
if not model_name:
log(f"\nspaCy: No model mapping for language '{lang_code}'...")
return None
# Lazy load spaCy
spacy = _load_spacy()
if spacy is None:
log("\nspaCy: Module not installed, falling back to default segmentation...")
return None
# Try to load the model
try:
log(f"\nLoading spaCy model '{model_name}'...")
nlp = spacy.load(
model_name,
disable=["ner", "parser", "tagger", "lemmatizer", "attribute_ruler"],
)
# Enable sentence segmentation only
if "sentencizer" not in nlp.pipe_names:
nlp.add_pipe("sentencizer")
_nlp_cache[lang_code] = nlp
return nlp
except OSError:
# Model not found, attempt download
log(f"\nspaCy: Downloading model '{model_name}'...")
try:
from spacy.cli import download
download(model_name)
# Retry loading
nlp = spacy.load(
model_name,
disable=["ner", "parser", "tagger", "lemmatizer", "attribute_ruler"],
)
if "sentencizer" not in nlp.pipe_names:
nlp.add_pipe("sentencizer")
_nlp_cache[lang_code] = nlp
log(f"spaCy model '{model_name}' downloaded and loaded")
return nlp
except Exception as e:
log(
f"\nspaCy: Failed to download model '{model_name}': {e}...",
is_error=True,
)
return None
except Exception as e:
log(f"\nspaCy: Error loading model '{model_name}': {e}...", is_error=True)
return None
def segment_sentences(text, lang_code, log_callback=None):
"""
Segment text into sentences using spaCy.
Args:
text: Text to segment
lang_code: Language code
log_callback: Optional function to log messages
Returns:
List of sentence strings, or None if spaCy unavailable
"""
nlp = get_spacy_model(lang_code, log_callback)
if nlp is None:
return None
# Ensure spaCy can handle large texts by adjusting max_length if necessary
try:
text_len = len(text or "")
if text_len and hasattr(nlp, "max_length") and text_len > nlp.max_length:
# increase a bit beyond the text length to be safe
nlp.max_length = text_len + 1000
except Exception:
pass
# Process text and extract sentences
doc = nlp(text)
return [sent.text.strip() for sent in doc.sents if sent.text.strip()]
def is_spacy_available():
"""Check if spaCy can be imported."""
return _load_spacy() is not None
def clear_cache():
"""Clear the model cache to free memory."""
global _nlp_cache
_nlp_cache.clear()
+1
-4

@@ -64,6 +64,3 @@ from abogen.utils import get_version

# 384 if self.lang_code in 'ab':
SUPPORTED_LANGUAGES_FOR_SUBTITLE_GENERATION = [
"a",
"b",
]
SUPPORTED_LANGUAGES_FOR_SUBTITLE_GENERATION = list(LANGUAGE_DESCRIPTIONS.keys())

@@ -70,0 +67,0 @@ # Voice and sample text constants

+19
-11

@@ -13,3 +13,10 @@ import os

# Pre-compile frequently used regex patterns for better performance
_WHITESPACE_PATTERN = re.compile(r"[^\S\n]+")
_MULTIPLE_NEWLINES_PATTERN = re.compile(r"\n{3,}")
_SINGLE_NEWLINE_PATTERN = re.compile(r"(?<!\n)\n(?!\n)")
_CHAPTER_MARKER_PATTERN = re.compile(r"<<CHAPTER_MARKER:.*?>>")
_METADATA_PATTERN = re.compile(r"<<METADATA_[^:]+:[^>]*>>")
def detect_encoding(file_path):

@@ -132,9 +139,12 @@ import chardet

# Collapse all whitespace (excluding newlines) into single spaces per line and trim edges
lines = [re.sub(r"[^\S\n]+", " ", line).strip() for line in text.splitlines()]
# Use pre-compiled pattern for better performance
lines = [_WHITESPACE_PATTERN.sub(" ", line).strip() for line in text.splitlines()]
text = "\n".join(lines)
# Standardize paragraph breaks (multiple newlines become exactly two) and trim overall whitespace
text = re.sub(r"\n{3,}", "\n\n", text).strip()
# Use pre-compiled pattern for better performance
text = _MULTIPLE_NEWLINES_PATTERN.sub("\n\n", text).strip()
# Optionally replace single newlines with spaces, but preserve double newlines
if replace_single_newlines:
text = re.sub(r"(?<!\n)\n(?!\n)", " ", text)
# Use pre-compiled pattern for better performance
text = _SINGLE_NEWLINE_PATTERN.sub(" ", text)
return text

@@ -248,10 +258,8 @@

def calculate_text_length(text):
# Ignore chapter markers
text = re.sub(r"<<CHAPTER_MARKER:.*?>>", "", text)
# Ignore metadata patterns
text = re.sub(r"<<METADATA_[^:]+:[^>]*>>", "", text)
# Ignore newlines
text = text.replace("\n", "")
# Ignore leading/trailing spaces
text = text.strip()
# Use pre-compiled patterns for better performance
# Ignore chapter markers and metadata patterns in a single pass
text = _CHAPTER_MARKER_PATTERN.sub("", text)
text = _METADATA_PATTERN.sub("", text)
# Ignore newlines and leading/trailing spaces
text = text.replace("\n", "").strip()
# Calculate character count

@@ -258,0 +266,0 @@ char_count = len(text)

@@ -1,1 +0,1 @@

1.2.3
1.2.4

@@ -0,1 +1,16 @@

# 1.2.4 (Pre-release)
- **Subtitle generation is now available for all languages!** Abogen now supports subtitle generation for non-English languages using audio duration-based timing. Available modes include `Line`, `Sentence`, and `Sentence + Comma`. (Note: Word-level subtitle modes remain English-only due to Kokoro's timestamp token limitations.)
- New option: **"Use spaCy for sentence segmentation"** You can now use [spaCy](https://spacy.io/) to automatically detect sentence boundaries and produce cleaner, more readable subtitles. Quick summary:
- **What it does:** Splits text into natural sentences so subtitle entries read better and align more naturally with speech.
- **Why this helps:** The previous punctuation-based splitting could break sentences incorrectly at common abbreviations (e.g. "Mr.", "Dr.", "Prof.") or initials, producing wrong subtitle breaks. spaCy avoids those false splits by using linguistic rules to detect real sentence boundaries.
- **For Non-English:** spaCy runs **before** audio generation to create better sentence chunks for TTS.
- **For English:** spaCy runs **during** subtitle generation to find accurate sentence breaks after TTS.
- **Note:** spaCy segmentation is only applied when subtitle mode is `Sentence` or `Sentence + Comma`. When turned off, it falls back to simple punctuation-based splitting.
- New option: **Pre-download models and voices for offline use** You can now pre-download all required Kokoro models, voices, and spaCy language models using this option in the settings menu. Allowing you to use Abogen completely offline without any internet connection.
- Added support for `.` separator in timestamps (e.g. `HH:MM:SS.ms`) for timestamp-based text files.
- Optimized regex compilation and eliminated busy-wait loops.
- Possibly fixed `Silent truncation of long paragraphs` issue mentioned in [#91](https://github.com/denizsafak/abogen/issues/91) by [@xklzlxr](https://github.com/xklzlxr)
- Fixed unused regex patterns and variable naming conventions.
- Improvements in code and documentation.
# 1.2.3

@@ -2,0 +17,0 @@ - Same as 1.2.2, re-released to fix an issue with subtitle timing when using timestamp-based text files.

Metadata-Version: 2.4
Name: abogen
Version: 1.2.3
Version: 1.2.4
Summary: Generate audiobooks from EPUBs, PDFs and text with synchronized captions.

@@ -35,2 +35,3 @@ Project-URL: Homepage, https://github.com/denizsafak/abogen

Requires-Dist: soundfile>=0.13.1
Requires-Dist: spacy>=3.8.7
Requires-Dist: static-ffmpeg>=2.13

@@ -212,4 +213,6 @@ Description-Content-Type: text/markdown

| **Subtitle speed adjustment method** | Choose how to speed up audio when needed: `TTS Regeneration (better quality)` re-generates the audio at a faster speed, while `FFmpeg Time-stretch (better speed)` quickly speeds up the generated audio. (for subtitle files). |
| **Use spaCy for sentence segmentation** | When this option is enabled, Abogen uses [spaCy](https://spacy.io/) to detect sentence boundaries more accurately, instead of using punctuation marks (like periods, question marks, etc.) to split sentences, which could incorrectly cut off phrases like "Mr." or "Dr.". With spaCy, sentences are divided more accurately. For non-English text, spaCy runs **before** audio generation to create sentence chunks. For English text, spaCy runs **during** subtitle generation to improve timing and readability. spaCy is only used when subtitle mode is `Sentence` or `Sentence + Comma`. If you prefer the old punctuation splitting method, you can turn this option off. |
| **Pre-download models and voices for offline use** | Opens a window that displays the available models and voices. Click `Download all` button to download all required models and voices, allowing you to use Abogen completely offline without any internet connection. |
| **Disable Kokoro's internet access** | Prevents Kokoro from downloading models or voices from HuggingFace Hub, useful for offline use. |
| **Check for updates at startup** | Automatically checks for updates when the program starts. |
| **Disable Kokoro's internet access** | Prevents Kokoro from downloading models or voices from HuggingFace Hub, useful for offline use. |
| **Reset to default settings** | Resets all settings to their default values. |

@@ -277,3 +280,3 @@

## `About Timestamp-based Text Files`
Similar to converting subtitle files to audio, Abogen can automatically detect text files that contain timestamps in `HH:MM:SS` or `HH:MM:SS,ms` format. When timestamps are found inside your text file, Abogen will ask if you want to use them for audio timing. This is useful for creating timed narrations, scripts, or transcripts where you need exact control over when each segment is spoken.
Similar to converting subtitle files to audio, Abogen can automatically detect text files that contain timestamps in `HH:MM:SS`, `HH:MM:SS,ms` or `HH:MM:SS.ms` format. When timestamps are found inside your text file, Abogen will ask if you want to use them for audio timing. This is useful for creating timed narrations, scripts, or transcripts where you need exact control over when each segment is spoken.

@@ -293,3 +296,3 @@ Format your text file like this:

**Important notes:**
- Timestamps must be in `HH:MM:SS` or `HH:MM:SS,ms` format (e.g., `00:05:30` for 5 minutes 30 seconds, or `00:05:30,500` for 5 minutes 30.5 seconds)
- Timestamps must be in `HH:MM:SS`, `HH:MM:SS,ms` or `HH:MM:SS.ms` format (e.g., `00:05:30` for 5 minutes 30 seconds, or `00:05:30.500` for 5 minutes 30.5 seconds)
- Milliseconds are optional and provide precision up to 1/1000th of a second

@@ -520,2 +523,3 @@ - Text before the first timestamp (if any) will automatically start at `00:00:00`

- Abogen uses [Kokoro](https://github.com/hexgrad/kokoro) for its high-quality, natural-sounding text-to-speech synthesis. Huge thanks to the Kokoro team for making this possible.
- Thanks to the [spaCy](https://spacy.io/) project for its sentence-segmentation tools, which help Abogen produce cleaner, more natural sentence segmentation.
- Thanks to [@wojiushixiaobai](https://github.com/wojiushixiaobai) for [Embedded Python](https://github.com/wojiushixiaobai/Python-Embed-Win64) packages. These modified packages include pip pre-installed, enabling Abogen to function as a standalone application without requiring users to separately install Python in Windows.

@@ -533,5 +537,5 @@ - Thanks to creators of [EbookLib](https://github.com/aerkalov/ebooklib), a Python library for reading and writing ePub files, which is used for extracting text from ePub files.

> [!IMPORTANT]
> Subtitle generation currently works only for English. This is because Kokoro provides timestamp tokens only for English text. If you want subtitles in other languages, please request this feature in the [Kokoro project](https://github.com/hexgrad/kokoro). For more technical details, see [this line](https://github.com/hexgrad/kokoro/blob/6d87f4ae7abc2d14dbc4b3ef2e5f19852e861ac2/kokoro/pipeline.py#L383) in the Kokoro's code.
> [!NOTE]
> Abogen supports subtitle generation for all languages. However, word-level subtitle modes (e.g., "1 word", "2 words", "3 words", etc.) are only available for English because [Kokoro provides timestamp tokens only for English text](https://github.com/hexgrad/kokoro/blob/6d87f4ae7abc2d14dbc4b3ef2e5f19852e861ac2/kokoro/pipeline.py#L383). For non-English languages, Abogen uses a duration-based fallback that supports sentence-level and comma-based subtitle modes ("Line", "Sentence", "Sentence + Comma"). If you need word-level subtitles for other languages, please request that feature in the [Kokoro project](https://github.com/hexgrad/kokoro).
> Tags: audiobook, kokoro, text-to-speech, TTS, audiobook generator, audiobooks, text to speech, audiobook maker, audiobook creator, audiobook generator, voice-synthesis, text to audio, text to audio converter, text to speech converter, text to speech generator, text to speech software, text to speech app, epub to audio, pdf to audio, markdown to audio, subtitle to audio, srt to audio, ass to audio, vtt to audio, webvtt to audio, content-creation, media-generation

@@ -28,3 +28,4 @@ [build-system]

"static_ffmpeg>=2.13",
"Markdown>=3.9"
"Markdown>=3.9",
"spacy>=3.8.7"
]

@@ -31,0 +32,0 @@

@@ -174,4 +174,6 @@ # abogen <img width="40px" title="abogen icon" src="https://raw.githubusercontent.com/denizsafak/abogen/refs/heads/main/abogen/assets/icon.ico" align="right" style="padding-left: 10px; padding-top:5px;">

| **Subtitle speed adjustment method** | Choose how to speed up audio when needed: `TTS Regeneration (better quality)` re-generates the audio at a faster speed, while `FFmpeg Time-stretch (better speed)` quickly speeds up the generated audio. (for subtitle files). |
| **Use spaCy for sentence segmentation** | When this option is enabled, Abogen uses [spaCy](https://spacy.io/) to detect sentence boundaries more accurately, instead of using punctuation marks (like periods, question marks, etc.) to split sentences, which could incorrectly cut off phrases like "Mr." or "Dr.". With spaCy, sentences are divided more accurately. For non-English text, spaCy runs **before** audio generation to create sentence chunks. For English text, spaCy runs **during** subtitle generation to improve timing and readability. spaCy is only used when subtitle mode is `Sentence` or `Sentence + Comma`. If you prefer the old punctuation splitting method, you can turn this option off. |
| **Pre-download models and voices for offline use** | Opens a window that displays the available models and voices. Click `Download all` button to download all required models and voices, allowing you to use Abogen completely offline without any internet connection. |
| **Disable Kokoro's internet access** | Prevents Kokoro from downloading models or voices from HuggingFace Hub, useful for offline use. |
| **Check for updates at startup** | Automatically checks for updates when the program starts. |
| **Disable Kokoro's internet access** | Prevents Kokoro from downloading models or voices from HuggingFace Hub, useful for offline use. |
| **Reset to default settings** | Resets all settings to their default values. |

@@ -239,3 +241,3 @@

## `About Timestamp-based Text Files`
Similar to converting subtitle files to audio, Abogen can automatically detect text files that contain timestamps in `HH:MM:SS` or `HH:MM:SS,ms` format. When timestamps are found inside your text file, Abogen will ask if you want to use them for audio timing. This is useful for creating timed narrations, scripts, or transcripts where you need exact control over when each segment is spoken.
Similar to converting subtitle files to audio, Abogen can automatically detect text files that contain timestamps in `HH:MM:SS`, `HH:MM:SS,ms` or `HH:MM:SS.ms` format. When timestamps are found inside your text file, Abogen will ask if you want to use them for audio timing. This is useful for creating timed narrations, scripts, or transcripts where you need exact control over when each segment is spoken.

@@ -255,3 +257,3 @@ Format your text file like this:

**Important notes:**
- Timestamps must be in `HH:MM:SS` or `HH:MM:SS,ms` format (e.g., `00:05:30` for 5 minutes 30 seconds, or `00:05:30,500` for 5 minutes 30.5 seconds)
- Timestamps must be in `HH:MM:SS`, `HH:MM:SS,ms` or `HH:MM:SS.ms` format (e.g., `00:05:30` for 5 minutes 30 seconds, or `00:05:30.500` for 5 minutes 30.5 seconds)
- Milliseconds are optional and provide precision up to 1/1000th of a second

@@ -482,2 +484,3 @@ - Text before the first timestamp (if any) will automatically start at `00:00:00`

- Abogen uses [Kokoro](https://github.com/hexgrad/kokoro) for its high-quality, natural-sounding text-to-speech synthesis. Huge thanks to the Kokoro team for making this possible.
- Thanks to the [spaCy](https://spacy.io/) project for its sentence-segmentation tools, which help Abogen produce cleaner, more natural sentence segmentation.
- Thanks to [@wojiushixiaobai](https://github.com/wojiushixiaobai) for [Embedded Python](https://github.com/wojiushixiaobai/Python-Embed-Win64) packages. These modified packages include pip pre-installed, enabling Abogen to function as a standalone application without requiring users to separately install Python in Windows.

@@ -495,5 +498,5 @@ - Thanks to creators of [EbookLib](https://github.com/aerkalov/ebooklib), a Python library for reading and writing ePub files, which is used for extracting text from ePub files.

> [!IMPORTANT]
> Subtitle generation currently works only for English. This is because Kokoro provides timestamp tokens only for English text. If you want subtitles in other languages, please request this feature in the [Kokoro project](https://github.com/hexgrad/kokoro). For more technical details, see [this line](https://github.com/hexgrad/kokoro/blob/6d87f4ae7abc2d14dbc4b3ef2e5f19852e861ac2/kokoro/pipeline.py#L383) in the Kokoro's code.
> [!NOTE]
> Abogen supports subtitle generation for all languages. However, word-level subtitle modes (e.g., "1 word", "2 words", "3 words", etc.) are only available for English because [Kokoro provides timestamp tokens only for English text](https://github.com/hexgrad/kokoro/blob/6d87f4ae7abc2d14dbc4b3ef2e5f19852e861ac2/kokoro/pipeline.py#L383). For non-English languages, Abogen uses a duration-based fallback that supports sentence-level and comma-based subtitle modes ("Line", "Sentence", "Sentence + Comma"). If you need word-level subtitles for other languages, please request that feature in the [Kokoro project](https://github.com/hexgrad/kokoro).
> Tags: audiobook, kokoro, text-to-speech, TTS, audiobook generator, audiobooks, text to speech, audiobook maker, audiobook creator, audiobook generator, voice-synthesis, text to audio, text to audio converter, text to speech converter, text to speech generator, text to speech software, text to speech app, epub to audio, pdf to audio, markdown to audio, subtitle to audio, srt to audio, ass to audio, vtt to audio, webvtt to audio, content-creation, media-generation

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is too big to display