abogen - npm Package Compare versions

+590

abogen/predownload_gui.py

		"""
		Pre-download dialog and worker for Abogen

		This module consolidates pre-download logic for Kokoro voices and model
		and spaCy language models. The code favors clarity, avoids duplication,
		and handles optional dependencies gracefully.
		"""

		from typing import List, Optional, Tuple
		import importlib
		import importlib.util

		from PyQt6.QtWidgets import (
		QDialog,
		QVBoxLayout,
		QHBoxLayout,
		QLabel,
		QPushButton,
		QSpacerItem,
		QSizePolicy,
		)
		from PyQt6.QtCore import QThread, pyqtSignal

		from abogen.constants import COLORS, VOICES_INTERNAL
		from abogen.spacy_utils import SPACY_MODELS
		import abogen.hf_tracker


		# Helpers
		def _unique_sorted_models() -> List[str]:
		"""Return a sorted list of unique spaCy model package names."""
		return sorted(set(SPACY_MODELS.values()))


		def _is_package_installed(pkg_name: str) -> bool:
		"""Return True if a package with the given name can be imported (site-packages)."""
		try:
		return importlib.util.find_spec(pkg_name) is not None
		except Exception:
		return False


		# NOTE: explicit HF cache helper removed; we use try_to_load_from_cache in-scope where needed


		class PreDownloadWorker(QThread):
		"""Worker thread to download required models/voices.

		Emits human-readable messages via `progress`. Uses `category_done` to indicate
		a category (voices/model/spacy) finished successfully. Emits `error` on exception
		and `finished` after all work completes.
		"""

		# Emit (category, status, message)
		progress = pyqtSignal(str, str, str)
		category_done = pyqtSignal(str)
		finished = pyqtSignal()
		error = pyqtSignal(str)

		def __init__(self, parent=None):
		super().__init__(parent)
		self._cancelled = False
		# repo and filenames used for Kokoro model
		self._repo_id = "hexgrad/Kokoro-82M"
		self._model_files = ["kokoro-v1_0.pth", "config.json"]
		# Track download success per category
		self._voices_success = False
		self._model_success = False
		self._spacy_success = False
		# Suppress HF tracker warnings during downloads
		self._original_emitter = abogen.hf_tracker.show_warning_signal_emitter

		def cancel(self) -> None:
		self._cancelled = True

		def run(self) -> None:
		# Suppress HF tracker warnings during downloads
		abogen.hf_tracker.show_warning_signal_emitter = None
		try:
		self._download_kokoro_voices()
		if self._cancelled:
		return
		if self._voices_success:
		self.category_done.emit("voices")

		self._download_kokoro_model()
		if self._cancelled:
		return
		if self._model_success:
		self.category_done.emit("model")

		self._download_spacy_models()
		if self._cancelled:
		return
		if self._spacy_success:
		self.category_done.emit("spacy")

		self.finished.emit()
		except Exception as exc: # pragma: no cover - best-effort reporting
		self.error.emit(str(exc))
		finally:
		# Restore original emitter
		abogen.hf_tracker.show_warning_signal_emitter = self._original_emitter

		# Kokoro voices
		def _download_kokoro_voices(self) -> None:
		self._voices_success = True
		try:
		from huggingface_hub import hf_hub_download, try_to_load_from_cache
		except Exception:
		self.progress.emit(
		"voice", "warning", "huggingface_hub not installed, skipping voices..."
		)
		self._voices_success = False
		return

		voice_list = VOICES_INTERNAL
		for idx, voice in enumerate(voice_list, start=1):
		if self._cancelled:
		self._voices_success = False
		return
		filename = f"voices/{voice}.pt"
		if try_to_load_from_cache(repo_id=self._repo_id, filename=filename):
		self.progress.emit(
		"voice",
		"installed",
		f"{idx}/{len(voice_list)}: {voice} already present",
		)
		continue
		self.progress.emit(
		"voice", "downloading", f"{idx}/{len(voice_list)}: {voice}..."
		)
		try:
		hf_hub_download(repo_id=self._repo_id, filename=filename)
		self.progress.emit("voice", "downloaded", f"{voice} downloaded")
		except Exception as exc:
		self.progress.emit(
		"voice", "warning", f"could not download {voice}: {exc}"
		)
		self._voices_success = False

		# Kokoro model
		def _download_kokoro_model(self) -> None:
		self._model_success = True
		try:
		from huggingface_hub import hf_hub_download, try_to_load_from_cache
		except Exception:
		self.progress.emit(
		"model", "warning", "huggingface_hub not installed, skipping model..."
		)
		self._model_success = False
		return
		for fname in self._model_files:
		if self._cancelled:
		self._model_success = False
		return
		category = "config" if fname == "config.json" else "model"
		if try_to_load_from_cache(repo_id=self._repo_id, filename=fname):
		self.progress.emit(
		category, "installed", f"file {fname} already present"
		)
		continue
		self.progress.emit(category, "downloading", f"file {fname}...")
		try:
		hf_hub_download(repo_id=self._repo_id, filename=fname)
		self.progress.emit(category, "downloaded", f"file {fname} downloaded")
		except Exception as exc:
		self.progress.emit(
		category, "warning", f"could not download file {fname}: {exc}"
		)
		self._model_success = False

		# spaCy models
		def _download_spacy_models(self) -> None:
		"""Download spaCy models. Prefer missing models provided by parent.

		Parent dialog will populate _spacy_models_missing during checking.
		"""
		self._spacy_success = True
		# Determine which models to process: prefer parent-provided missing list to avoid
		# re-checking everything; otherwise use the full unique list.
		parent = self.parent()
		models_to_process: List[str] = _unique_sorted_models()
		try:
		if (
		parent is not None
		and hasattr(parent, "_spacy_models_missing")
		and parent._spacy_models_missing
		):
		models_to_process = list(dict.fromkeys(parent._spacy_models_missing))
		except Exception:
		pass

		# If spaCy is not available to run the CLI, skip gracefully
		try:
		import spacy.cli as _spacy_cli
		except Exception:
		self.progress.emit(
		"spacy", "warning", "spaCy not available, skipping spaCy models..."
		)
		self._spacy_success = False
		return

		for idx, model_name in enumerate(models_to_process, start=1):
		if self._cancelled:
		self._spacy_success = False
		return
		if _is_package_installed(model_name):
		self.progress.emit(
		"spacy",
		"installed",
		f"{idx}/{len(models_to_process)}: {model_name} already installed",
		)
		continue
		self.progress.emit(
		"spacy",
		"downloading",
		f"{idx}/{len(models_to_process)}: {model_name}...",
		)
		try:
		_spacy_cli.download(model_name)
		self.progress.emit("spacy", "downloaded", f"{model_name} downloaded")
		except Exception as exc:
		self.progress.emit(
		"spacy", "warning", f"could not download {model_name}: {exc}"
		)
		self._spacy_success = False


		class PreDownloadDialog(QDialog):
		"""Dialog to show and control pre-download process."""

		VOICE_PREFIX = "Kokoro voices: "
		MODEL_PREFIX = "Kokoro model: "
		CONFIG_PREFIX = "Kokoro config: "
		SPACY_PREFIX = "spaCy models: "

		def __init__(self, parent=None):
		super().__init__(parent)
		self.setWindowTitle("Pre-download Models and Voices")
		self.setMinimumWidth(500)
		self.worker: Optional[PreDownloadWorker] = None
		self.has_missing = False
		self._spacy_models_checked: List[tuple] = []
		self._spacy_models_missing: List[str] = []
		self._status_worker = None

		# Map keywords to (label, prefix) - labels filled after UI creation
		self.status_map = {
		"voice": (None, self.VOICE_PREFIX),
		"spacy": (None, self.SPACY_PREFIX),
		"model": (None, self.MODEL_PREFIX),
		"config": (None, self.CONFIG_PREFIX),
		}

		self.category_map = {
		"voices": ["voice"],
		"model": ["model", "config"],
		"spacy": ["spacy"],
		}

		self._setup_ui()
		self._start_status_check()

		def _setup_ui(self) -> None:
		layout = QVBoxLayout(self)
		layout.setSpacing(0)
		layout.setContentsMargins(15, 0, 15, 15)

		desc = QLabel(
		"You can pre-download all required models and voices for offline use.\n"
		"This includes Kokoro voices, Kokoro model (and config), and spaCy models."
		)
		desc.setWordWrap(True)
		layout.addWidget(desc)

		# Status rows
		status_layout = QVBoxLayout()
		status_title = QLabel("<b>Current Status:</b>")
		status_layout.addWidget(status_title)

		self.voices_status = QLabel(self.VOICE_PREFIX + "⏳ Checking...")
		row = QHBoxLayout()
		row.addWidget(self.voices_status)
		row.addStretch()
		status_layout.addLayout(row)

		self.model_status = QLabel(self.MODEL_PREFIX + "⏳ Checking...")
		row = QHBoxLayout()
		row.addWidget(self.model_status)
		row.addStretch()
		status_layout.addLayout(row)

		self.config_status = QLabel(self.CONFIG_PREFIX + "⏳ Checking...")
		row = QHBoxLayout()
		row.addWidget(self.config_status)
		row.addStretch()
		status_layout.addLayout(row)

		self.spacy_status = QLabel(self.SPACY_PREFIX + "⏳ Checking...")
		row = QHBoxLayout()
		row.addWidget(self.spacy_status)
		row.addStretch()
		status_layout.addLayout(row)

		# register labels
		self.status_map["voice"] = (self.voices_status, self.VOICE_PREFIX)
		self.status_map["model"] = (self.model_status, self.MODEL_PREFIX)
		self.status_map["config"] = (self.config_status, self.CONFIG_PREFIX)
		self.status_map["spacy"] = (self.spacy_status, self.SPACY_PREFIX)

		layout.addLayout(status_layout)

		layout.addItem(
		QSpacerItem(0, 20, QSizePolicy.Policy.Minimum, QSizePolicy.Policy.Fixed)
		)

		# Buttons
		button_row = QHBoxLayout()
		button_row.setSpacing(10)
		self.download_btn = QPushButton("Download all")
		self.download_btn.setMinimumWidth(100)
		self.download_btn.setMinimumHeight(35)
		self.download_btn.setEnabled(False)
		self.download_btn.clicked.connect(self._start_download)
		button_row.addWidget(self.download_btn)

		self.close_btn = QPushButton("Close")
		self.close_btn.setMinimumWidth(100)
		self.close_btn.setMinimumHeight(35)
		self.close_btn.clicked.connect(self._handle_close)
		button_row.addWidget(self.close_btn)

		layout.addLayout(button_row)
		self.adjustSize()

		# Status checking worker
		class StatusCheckWorker(QThread):
		voices_checked = pyqtSignal(bool, list)
		model_checked = pyqtSignal(bool)
		config_checked = pyqtSignal(bool)
		spacy_model_checking = pyqtSignal(str)
		spacy_model_result = pyqtSignal(str, bool)
		spacy_checked = pyqtSignal(bool, list)

		def run(self):
		parent = self.parent()
		if parent is None:
		return

		voices_ok, missing_voices = parent._check_kokoro_voices()
		self.voices_checked.emit(voices_ok, missing_voices)

		model_ok = parent._check_kokoro_model()
		self.model_checked.emit(model_ok)

		config_ok = parent._check_kokoro_config()
		self.config_checked.emit(config_ok)

		# Check spaCy models by package name to detect site-package installs
		unique = _unique_sorted_models()
		missing: List[str] = []
		for name in unique:
		self.spacy_model_checking.emit(name)
		ok = _is_package_installed(name)
		self.spacy_model_result.emit(name, ok)
		if not ok:
		missing.append(name)
		parent._spacy_models_missing = missing
		self.spacy_checked.emit(len(missing) == 0, missing)

		def _start_status_check(self) -> None:
		self._status_worker = self.StatusCheckWorker(self)
		self._status_worker.voices_checked.connect(self._update_voices_status)
		self._status_worker.model_checked.connect(self._update_model_status)
		self._status_worker.config_checked.connect(self._update_config_status)
		self._status_worker.spacy_model_checking.connect(self._spacy_model_checking)
		self._status_worker.spacy_model_result.connect(self._spacy_model_result)
		self._status_worker.spacy_checked.connect(self._update_spacy_status)

		# These are initialized in __init__ to keep consistent object state

		# Set checking visual state
		for lbl in (
		self.voices_status,
		self.model_status,
		self.config_status,
		self.spacy_status,
		):
		lbl.setStyleSheet(f"color: {COLORS['ORANGE']};")

		self.spacy_status.setText(self.SPACY_PREFIX + "⏳ Checking...")
		self._status_worker.start()

		# UI update callbacks
		def _spacy_model_checking(self, name: str) -> None:
		self.spacy_status.setText(f"{self.SPACY_PREFIX}Checking {name}...")

		def _spacy_model_result(self, name: str, ok: bool) -> None:
		self._spacy_models_checked.append((name, ok))
		if not ok and name not in self._spacy_models_missing:
		self._spacy_models_missing.append(name)
		checked = len(self._spacy_models_checked)
		missing_count = len(self._spacy_models_missing)
		if missing_count:
		self.spacy_status.setText(
		f"{self.SPACY_PREFIX}{checked} checked, {missing_count} missing..."
		)
		else:
		self.spacy_status.setText(f"{self.SPACY_PREFIX}{checked} checked...")

		def _update_voices_status(self, ok: bool, missing: List[str]) -> None:
		if ok:
		self._set_status("voice", "✓ Downloaded", COLORS["GREEN"])
		else:
		self.has_missing = True
		if missing:
		self._set_status(
		"voice", f"✗ Missing {len(missing)} voices", COLORS["RED"]
		)
		else:
		self._set_status("voice", "✗ Not downloaded", COLORS["RED"])

		def _update_model_status(self, ok: bool) -> None:
		if ok:
		self._set_status("model", "✓ Downloaded", COLORS["GREEN"])
		else:
		self.has_missing = True
		self._set_status("model", "✗ Not downloaded", COLORS["RED"])

		def _update_config_status(self, ok: bool) -> None:
		if ok:
		self._set_status("config", "✓ Downloaded", COLORS["GREEN"])
		else:
		self.has_missing = True
		self._set_status("config", "✗ Not downloaded", COLORS["RED"])

		def _update_spacy_status(self, ok: bool, missing: List[str]) -> None:
		if ok:
		self._set_status("spacy", "✓ Downloaded", COLORS["GREEN"])
		else:
		self.has_missing = True
		if missing:
		self._set_status(
		"spacy", f"✗ Missing {len(missing)} model(s)", COLORS["RED"]
		)
		else:
		self._set_status("spacy", "✗ Not downloaded", COLORS["RED"])
		self.download_btn.setEnabled(self.has_missing)

		def _set_status(self, key: str, text: str, color: str) -> None:
		lbl, prefix = self.status_map.get(key, (None, ""))
		if not lbl:
		return
		lbl.setText(prefix + text)
		lbl.setStyleSheet(f"color: {color};")

		# Helper checks
		def _check_kokoro_voices(self) -> Tuple[bool, List[str]]:
		"""Return (ok, missing_list) for Kokoro voices check."""
		missing = []
		try:
		from huggingface_hub import try_to_load_from_cache

		for voice in VOICES_INTERNAL:
		if not try_to_load_from_cache(
		repo_id="hexgrad/Kokoro-82M", filename=f"voices/{voice}.pt"
		):
		missing.append(voice)
		except Exception:
		# If HF missing, report all as missing
		return False, list(VOICES_INTERNAL)
		return (len(missing) == 0), missing

		def _check_kokoro_model(self) -> bool:
		try:
		from huggingface_hub import try_to_load_from_cache

		return (
		try_to_load_from_cache(
		repo_id="hexgrad/Kokoro-82M", filename="kokoro-v1_0.pth"
		)
		is not None
		)
		except Exception:
		return False

		def _check_kokoro_config(self) -> bool:
		try:
		from huggingface_hub import try_to_load_from_cache

		return (
		try_to_load_from_cache(
		repo_id="hexgrad/Kokoro-82M", filename="config.json"
		)
		is not None
		)
		except Exception:
		return False

		def _check_spacy_models(self) -> bool:
		unique = _unique_sorted_models()
		missing = [m for m in unique if not _is_package_installed(m)]
		self._spacy_models_missing = missing
		return len(missing) == 0

		# Download control
		def _start_download(self) -> None:
		self.download_btn.setEnabled(False)
		self.download_btn.setText("Downloading...")
		# mark the start of downloads; this triggers the labels
		self._on_progress("system", "starting", "Processing, please wait...")
		self.worker = PreDownloadWorker(self)
		self.worker.progress.connect(self._on_progress)
		self.worker.category_done.connect(self._on_category_done)
		self.worker.finished.connect(self._on_download_finished)
		self.worker.error.connect(self._on_download_error)
		self.worker.start()

		def _on_progress(self, category: str, status: str, message: str) -> None:
		"""Map worker (category, status, message) to UI label updates.

		Status is one of: 'downloading', 'installed', 'downloaded', 'warning', 'starting'.
		Category is one of: 'voice', 'model', 'spacy', 'config', or 'system'.
		"""
		try:
		# If the category targets a specific label, update directly
		if category in self.status_map:
		lbl, prefix = self.status_map[category]
		if not lbl:
		return
		# Compose message and set color based on status token
		full_text = prefix + message
		if len(full_text) > 60:
		display_text = full_text[:57] + "..."
		lbl.setText(display_text)
		lbl.setToolTip(full_text)
		else:
		lbl.setText(full_text)
		lbl.setToolTip("") # Clear tooltip if not needed
		if status == "downloading":
		lbl.setStyleSheet(f"color: {COLORS['ORANGE']};")
		elif status in ("installed", "downloaded"):
		lbl.setStyleSheet(f"color: {COLORS['GREEN']};")
		elif status == "warning":
		lbl.setStyleSheet(f"color: {COLORS['RED']};")
		elif status == "error":
		lbl.setStyleSheet(f"color: {COLORS['RED']};")
		return

		# System-level messages
		if category == "system":
		if status == "starting":
		for k in self.status_map:
		lbl, prefix = self.status_map[k]
		if lbl:
		lbl.setText(prefix + "Processing, please wait...")
		lbl.setStyleSheet(f"color: {COLORS['ORANGE']};")
		# other system statuses don't require action
		return
		except Exception:
		# Do not let UI thread crash on unexpected worker message
		pass

		def _on_category_done(self, category: str) -> None:
		for key in self.category_map.get(category, []):
		self._set_status(key, "✓ Downloaded", COLORS["GREEN"])

		def _on_download_finished(self) -> None:
		self.has_missing = False
		self.download_btn.setText("Download all")
		self.download_btn.setEnabled(False)

		def _on_download_error(self, error_msg: str) -> None:
		self.download_btn.setText("Download all")
		self.download_btn.setEnabled(True)
		for key in self.status_map:
		self._set_status(key, f"✗ Error - {error_msg}", COLORS["RED"])

		def _handle_close(self) -> None:
		if self.worker and self.worker.isRunning():
		self.worker.cancel()
		self.worker.wait(2000)
		self.accept()

		def closeEvent(self, event) -> None:
		if self.worker and self.worker.isRunning():
		self.worker.cancel()
		self.worker.wait(2000)
		super().closeEvent(event)

+155

abogen/spacy_utils.py

		"""
		Lazy-loaded spaCy utilities for sentence segmentation.
		"""

		# Cached spaCy module and models (lazy loaded)
		_spacy = None
		_nlp_cache = {}

		# Language code to spaCy model mapping
		SPACY_MODELS = {
		"a": "en_core_web_sm", # American English
		"b": "en_core_web_sm", # British English
		"e": "es_core_news_sm", # Spanish
		"f": "fr_core_news_sm", # French
		"i": "it_core_news_sm", # Italian
		"p": "pt_core_news_sm", # Brazilian Portuguese
		"z": "zh_core_web_sm", # Mandarin Chinese
		"j": "ja_core_news_sm", # Japanese
		"h": "xx_sent_ud_sm", # Hindi (multi-language model)
		}


		def _load_spacy():
		"""Lazy load spaCy module."""
		global _spacy
		if _spacy is None:
		try:
		import spacy

		_spacy = spacy
		except ImportError:
		return None
		return _spacy


		def get_spacy_model(lang_code, log_callback=None):
		"""
		Get or load a spaCy model for the given language code.
		Downloads the model automatically if not available.

		Args:
		lang_code: Language code (a, b, e, f, etc.)
		log_callback: Optional function to log messages

		Returns:
		Loaded spaCy model or None if unavailable
		"""

		def log(msg, is_error=False):
		# Prefer GUI log callback when provided to avoid spamming stdout.
		if log_callback:
		color = "red" if is_error else "grey"
		try:
		log_callback((msg, color))
		except Exception:
		# Fallback to printing if callback misbehaves
		print(msg)
		else:
		print(msg)

		# Check if model is cached
		if lang_code in _nlp_cache:
		return _nlp_cache[lang_code]

		# Check if language is supported
		model_name = SPACY_MODELS.get(lang_code)
		if not model_name:
		log(f"\nspaCy: No model mapping for language '{lang_code}'...")
		return None

		# Lazy load spaCy
		spacy = _load_spacy()
		if spacy is None:
		log("\nspaCy: Module not installed, falling back to default segmentation...")
		return None

		# Try to load the model
		try:
		log(f"\nLoading spaCy model '{model_name}'...")
		nlp = spacy.load(
		model_name,
		disable=["ner", "parser", "tagger", "lemmatizer", "attribute_ruler"],
		)
		# Enable sentence segmentation only
		if "sentencizer" not in nlp.pipe_names:
		nlp.add_pipe("sentencizer")
		_nlp_cache[lang_code] = nlp
		return nlp
		except OSError:
		# Model not found, attempt download
		log(f"\nspaCy: Downloading model '{model_name}'...")
		try:
		from spacy.cli import download

		download(model_name)
		# Retry loading
		nlp = spacy.load(
		model_name,
		disable=["ner", "parser", "tagger", "lemmatizer", "attribute_ruler"],
		)
		if "sentencizer" not in nlp.pipe_names:
		nlp.add_pipe("sentencizer")
		_nlp_cache[lang_code] = nlp
		log(f"spaCy model '{model_name}' downloaded and loaded")
		return nlp
		except Exception as e:
		log(
		f"\nspaCy: Failed to download model '{model_name}': {e}...",
		is_error=True,
		)
		return None
		except Exception as e:
		log(f"\nspaCy: Error loading model '{model_name}': {e}...", is_error=True)
		return None


		def segment_sentences(text, lang_code, log_callback=None):
		"""
		Segment text into sentences using spaCy.

		Args:
		text: Text to segment
		lang_code: Language code
		log_callback: Optional function to log messages

		Returns:
		List of sentence strings, or None if spaCy unavailable
		"""
		nlp = get_spacy_model(lang_code, log_callback)
		if nlp is None:
		return None

		# Ensure spaCy can handle large texts by adjusting max_length if necessary
		try:
		text_len = len(text or "")
		if text_len and hasattr(nlp, "max_length") and text_len > nlp.max_length:
		# increase a bit beyond the text length to be safe
		nlp.max_length = text_len + 1000
		except Exception:
		pass

		# Process text and extract sentences
		doc = nlp(text)
		return [sent.text.strip() for sent in doc.sents if sent.text.strip()]


		def is_spacy_available():
		"""Check if spaCy can be imported."""
		return _load_spacy() is not None


		def clear_cache():
		"""Clear the model cache to free memory."""
		global _nlp_cache
		_nlp_cache.clear()

+1

-4

abogen/constants.py

		@@ -64,6 +64,3 @@ from abogen.utils import get_version
		# 384 if self.lang_code in 'ab':
		SUPPORTED_LANGUAGES_FOR_SUBTITLE_GENERATION = [
		"a",
		"b",
		]
		SUPPORTED_LANGUAGES_FOR_SUBTITLE_GENERATION = list(LANGUAGE_DESCRIPTIONS.keys())

		@@ -70,0 +67,0 @@ # Voice and sample text constants

+19

-11

abogen/utils.py

		@@ -13,3 +13,10 @@ import os

		# Pre-compile frequently used regex patterns for better performance
		_WHITESPACE_PATTERN = re.compile(r"[^\S\n]+")
		_MULTIPLE_NEWLINES_PATTERN = re.compile(r"\n{3,}")
		_SINGLE_NEWLINE_PATTERN = re.compile(r"(?<!\n)\n(?!\n)")
		_CHAPTER_MARKER_PATTERN = re.compile(r"<<CHAPTER_MARKER:.*?>>")
		_METADATA_PATTERN = re.compile(r"<<METADATA_[^:]+:[^>]*>>")


		def detect_encoding(file_path):
		@@ -132,9 +139,12 @@ import chardet
		# Collapse all whitespace (excluding newlines) into single spaces per line and trim edges
		lines = [re.sub(r"[^\S\n]+", " ", line).strip() for line in text.splitlines()]
		# Use pre-compiled pattern for better performance
		lines = [_WHITESPACE_PATTERN.sub(" ", line).strip() for line in text.splitlines()]
		text = "\n".join(lines)
		# Standardize paragraph breaks (multiple newlines become exactly two) and trim overall whitespace
		text = re.sub(r"\n{3,}", "\n\n", text).strip()
		# Use pre-compiled pattern for better performance
		text = _MULTIPLE_NEWLINES_PATTERN.sub("\n\n", text).strip()
		# Optionally replace single newlines with spaces, but preserve double newlines
		if replace_single_newlines:
		text = re.sub(r"(?<!\n)\n(?!\n)", " ", text)
		# Use pre-compiled pattern for better performance
		text = _SINGLE_NEWLINE_PATTERN.sub(" ", text)
		return text
		@@ -248,10 +258,8 @@
		def calculate_text_length(text):
		# Ignore chapter markers
		text = re.sub(r"<<CHAPTER_MARKER:.*?>>", "", text)
		# Ignore metadata patterns
		text = re.sub(r"<<METADATA_[^:]+:[^>]*>>", "", text)
		# Ignore newlines
		text = text.replace("\n", "")
		# Ignore leading/trailing spaces
		text = text.strip()
		# Use pre-compiled patterns for better performance
		# Ignore chapter markers and metadata patterns in a single pass
		text = _CHAPTER_MARKER_PATTERN.sub("", text)
		text = _METADATA_PATTERN.sub("", text)
		# Ignore newlines and leading/trailing spaces
		text = text.replace("\n", "").strip()
		# Calculate character count
		@@ -258,0 +266,0 @@ char_count = len(text)

+1

-1

abogen/VERSION

		@@ -1,1 +0,1 @@
		1.2.3
		1.2.4

+15

-0

CHANGELOG.md

		@@ -0,1 +1,16 @@
		# 1.2.4 (Pre-release)
		- Subtitle generation is now available for all languages! Abogen now supports subtitle generation for non-English languages using audio duration-based timing. Available modes include `Line`, `Sentence`, and `Sentence + Comma`. (Note: Word-level subtitle modes remain English-only due to Kokoro's timestamp token limitations.)
		- New option: "Use spaCy for sentence segmentation" You can now use [spaCy](https://spacy.io/) to automatically detect sentence boundaries and produce cleaner, more readable subtitles. Quick summary:
		- What it does: Splits text into natural sentences so subtitle entries read better and align more naturally with speech.
		- Why this helps: The previous punctuation-based splitting could break sentences incorrectly at common abbreviations (e.g. "Mr.", "Dr.", "Prof.") or initials, producing wrong subtitle breaks. spaCy avoids those false splits by using linguistic rules to detect real sentence boundaries.
		- For Non-English: spaCy runs before audio generation to create better sentence chunks for TTS.
		- For English: spaCy runs during subtitle generation to find accurate sentence breaks after TTS.
		- Note: spaCy segmentation is only applied when subtitle mode is `Sentence` or `Sentence + Comma`. When turned off, it falls back to simple punctuation-based splitting.
		- New option: Pre-download models and voices for offline use You can now pre-download all required Kokoro models, voices, and spaCy language models using this option in the settings menu. Allowing you to use Abogen completely offline without any internet connection.
		- Added support for `.` separator in timestamps (e.g. `HH:MM:SS.ms`) for timestamp-based text files.
		- Optimized regex compilation and eliminated busy-wait loops.
		- Possibly fixed `Silent truncation of long paragraphs` issue mentioned in [#91](https://github.com/denizsafak/abogen/issues/91) by [@xklzlxr](https://github.com/xklzlxr)
		- Fixed unused regex patterns and variable naming conventions.
		- Improvements in code and documentation.

		# 1.2.3
		@@ -2,0 +17,0 @@ - Same as 1.2.2, re-released to fix an issue with subtitle timing when using timestamp-based text files.

+10

-6

PKG-INFO

		Metadata-Version: 2.4
		Name: abogen
		Version: 1.2.3
		Version: 1.2.4
		Summary: Generate audiobooks from EPUBs, PDFs and text with synchronized captions.
		@@ -35,2 +35,3 @@ Project-URL: Homepage, https://github.com/denizsafak/abogen
		Requires-Dist: soundfile>=0.13.1
		Requires-Dist: spacy>=3.8.7
		Requires-Dist: static-ffmpeg>=2.13
		@@ -212,4 +213,6 @@ Description-Content-Type: text/markdown
		\| Subtitle speed adjustment method \| Choose how to speed up audio when needed: `TTS Regeneration (better quality)` re-generates the audio at a faster speed, while `FFmpeg Time-stretch (better speed)` quickly speeds up the generated audio. (for subtitle files). \|
		\| Use spaCy for sentence segmentation \| When this option is enabled, Abogen uses [spaCy](https://spacy.io/) to detect sentence boundaries more accurately, instead of using punctuation marks (like periods, question marks, etc.) to split sentences, which could incorrectly cut off phrases like "Mr." or "Dr.". With spaCy, sentences are divided more accurately. For non-English text, spaCy runs before audio generation to create sentence chunks. For English text, spaCy runs during subtitle generation to improve timing and readability. spaCy is only used when subtitle mode is `Sentence` or `Sentence + Comma`. If you prefer the old punctuation splitting method, you can turn this option off. \|
		\| Pre-download models and voices for offline use \| Opens a window that displays the available models and voices. Click `Download all` button to download all required models and voices, allowing you to use Abogen completely offline without any internet connection. \|
		\| Disable Kokoro's internet access \| Prevents Kokoro from downloading models or voices from HuggingFace Hub, useful for offline use. \|
		\| Check for updates at startup \| Automatically checks for updates when the program starts. \|
		\| Disable Kokoro's internet access \| Prevents Kokoro from downloading models or voices from HuggingFace Hub, useful for offline use. \|
		\| Reset to default settings \| Resets all settings to their default values. \|
		@@ -277,3 +280,3 @@
		## `About Timestamp-based Text Files`
		Similar to converting subtitle files to audio, Abogen can automatically detect text files that contain timestamps in `HH:MM:SS` or `HH:MM:SS,ms` format. When timestamps are found inside your text file, Abogen will ask if you want to use them for audio timing. This is useful for creating timed narrations, scripts, or transcripts where you need exact control over when each segment is spoken.
		Similar to converting subtitle files to audio, Abogen can automatically detect text files that contain timestamps in `HH:MM:SS`, `HH:MM:SS,ms` or `HH:MM:SS.ms` format. When timestamps are found inside your text file, Abogen will ask if you want to use them for audio timing. This is useful for creating timed narrations, scripts, or transcripts where you need exact control over when each segment is spoken.

		@@ -293,3 +296,3 @@ Format your text file like this:
		Important notes:
		- Timestamps must be in `HH:MM:SS` or `HH:MM:SS,ms` format (e.g., `00:05:30` for 5 minutes 30 seconds, or `00:05:30,500` for 5 minutes 30.5 seconds)
		- Timestamps must be in `HH:MM:SS`, `HH:MM:SS,ms` or `HH:MM:SS.ms` format (e.g., `00:05:30` for 5 minutes 30 seconds, or `00:05:30.500` for 5 minutes 30.5 seconds)
		- Milliseconds are optional and provide precision up to 1/1000th of a second
		@@ -520,2 +523,3 @@ - Text before the first timestamp (if any) will automatically start at `00:00:00`
		- Abogen uses [Kokoro](https://github.com/hexgrad/kokoro) for its high-quality, natural-sounding text-to-speech synthesis. Huge thanks to the Kokoro team for making this possible.
		- Thanks to the [spaCy](https://spacy.io/) project for its sentence-segmentation tools, which help Abogen produce cleaner, more natural sentence segmentation.
		- Thanks to [@wojiushixiaobai](https://github.com/wojiushixiaobai) for [Embedded Python](https://github.com/wojiushixiaobai/Python-Embed-Win64) packages. These modified packages include pip pre-installed, enabling Abogen to function as a standalone application without requiring users to separately install Python in Windows.
		@@ -533,5 +537,5 @@ - Thanks to creators of [EbookLib](https://github.com/aerkalov/ebooklib), a Python library for reading and writing ePub files, which is used for extracting text from ePub files.

		> [!IMPORTANT]
		> Subtitle generation currently works only for English. This is because Kokoro provides timestamp tokens only for English text. If you want subtitles in other languages, please request this feature in the [Kokoro project](https://github.com/hexgrad/kokoro). For more technical details, see [this line](https://github.com/hexgrad/kokoro/blob/6d87f4ae7abc2d14dbc4b3ef2e5f19852e861ac2/kokoro/pipeline.py#L383) in the Kokoro's code.
		> [!NOTE]
		> Abogen supports subtitle generation for all languages. However, word-level subtitle modes (e.g., "1 word", "2 words", "3 words", etc.) are only available for English because [Kokoro provides timestamp tokens only for English text](https://github.com/hexgrad/kokoro/blob/6d87f4ae7abc2d14dbc4b3ef2e5f19852e861ac2/kokoro/pipeline.py#L383). For non-English languages, Abogen uses a duration-based fallback that supports sentence-level and comma-based subtitle modes ("Line", "Sentence", "Sentence + Comma"). If you need word-level subtitles for other languages, please request that feature in the [Kokoro project](https://github.com/hexgrad/kokoro).

		> Tags: audiobook, kokoro, text-to-speech, TTS, audiobook generator, audiobooks, text to speech, audiobook maker, audiobook creator, audiobook generator, voice-synthesis, text to audio, text to audio converter, text to speech converter, text to speech generator, text to speech software, text to speech app, epub to audio, pdf to audio, markdown to audio, subtitle to audio, srt to audio, ass to audio, vtt to audio, webvtt to audio, content-creation, media-generation

+2

-1

pyproject.toml

		@@ -28,3 +28,4 @@ [build-system]
		"static_ffmpeg>=2.13",
		"Markdown>=3.9"
		"Markdown>=3.9",
		"spacy>=3.8.7"
		]
		@@ -31,0 +32,0 @@

+8

-5

README.md

		@@ -174,4 +174,6 @@ # abogen <img width="40px" title="abogen icon" src="https://raw.githubusercontent.com/denizsafak/abogen/refs/heads/main/abogen/assets/icon.ico" align="right" style="padding-left: 10px; padding-top:5px;">
		\| Subtitle speed adjustment method \| Choose how to speed up audio when needed: `TTS Regeneration (better quality)` re-generates the audio at a faster speed, while `FFmpeg Time-stretch (better speed)` quickly speeds up the generated audio. (for subtitle files). \|
		\| Use spaCy for sentence segmentation \| When this option is enabled, Abogen uses [spaCy](https://spacy.io/) to detect sentence boundaries more accurately, instead of using punctuation marks (like periods, question marks, etc.) to split sentences, which could incorrectly cut off phrases like "Mr." or "Dr.". With spaCy, sentences are divided more accurately. For non-English text, spaCy runs before audio generation to create sentence chunks. For English text, spaCy runs during subtitle generation to improve timing and readability. spaCy is only used when subtitle mode is `Sentence` or `Sentence + Comma`. If you prefer the old punctuation splitting method, you can turn this option off. \|
		\| Pre-download models and voices for offline use \| Opens a window that displays the available models and voices. Click `Download all` button to download all required models and voices, allowing you to use Abogen completely offline without any internet connection. \|
		\| Disable Kokoro's internet access \| Prevents Kokoro from downloading models or voices from HuggingFace Hub, useful for offline use. \|
		\| Check for updates at startup \| Automatically checks for updates when the program starts. \|
		\| Disable Kokoro's internet access \| Prevents Kokoro from downloading models or voices from HuggingFace Hub, useful for offline use. \|
		\| Reset to default settings \| Resets all settings to their default values. \|
		@@ -239,3 +241,3 @@
		## `About Timestamp-based Text Files`
		Similar to converting subtitle files to audio, Abogen can automatically detect text files that contain timestamps in `HH:MM:SS` or `HH:MM:SS,ms` format. When timestamps are found inside your text file, Abogen will ask if you want to use them for audio timing. This is useful for creating timed narrations, scripts, or transcripts where you need exact control over when each segment is spoken.
		Similar to converting subtitle files to audio, Abogen can automatically detect text files that contain timestamps in `HH:MM:SS`, `HH:MM:SS,ms` or `HH:MM:SS.ms` format. When timestamps are found inside your text file, Abogen will ask if you want to use them for audio timing. This is useful for creating timed narrations, scripts, or transcripts where you need exact control over when each segment is spoken.

		@@ -255,3 +257,3 @@ Format your text file like this:
		Important notes:
		- Timestamps must be in `HH:MM:SS` or `HH:MM:SS,ms` format (e.g., `00:05:30` for 5 minutes 30 seconds, or `00:05:30,500` for 5 minutes 30.5 seconds)
		- Timestamps must be in `HH:MM:SS`, `HH:MM:SS,ms` or `HH:MM:SS.ms` format (e.g., `00:05:30` for 5 minutes 30 seconds, or `00:05:30.500` for 5 minutes 30.5 seconds)
		- Milliseconds are optional and provide precision up to 1/1000th of a second
		@@ -482,2 +484,3 @@ - Text before the first timestamp (if any) will automatically start at `00:00:00`
		- Abogen uses [Kokoro](https://github.com/hexgrad/kokoro) for its high-quality, natural-sounding text-to-speech synthesis. Huge thanks to the Kokoro team for making this possible.
		- Thanks to the [spaCy](https://spacy.io/) project for its sentence-segmentation tools, which help Abogen produce cleaner, more natural sentence segmentation.
		- Thanks to [@wojiushixiaobai](https://github.com/wojiushixiaobai) for [Embedded Python](https://github.com/wojiushixiaobai/Python-Embed-Win64) packages. These modified packages include pip pre-installed, enabling Abogen to function as a standalone application without requiring users to separately install Python in Windows.
		@@ -495,5 +498,5 @@ - Thanks to creators of [EbookLib](https://github.com/aerkalov/ebooklib), a Python library for reading and writing ePub files, which is used for extracting text from ePub files.

		> [!IMPORTANT]
		> Subtitle generation currently works only for English. This is because Kokoro provides timestamp tokens only for English text. If you want subtitles in other languages, please request this feature in the [Kokoro project](https://github.com/hexgrad/kokoro). For more technical details, see [this line](https://github.com/hexgrad/kokoro/blob/6d87f4ae7abc2d14dbc4b3ef2e5f19852e861ac2/kokoro/pipeline.py#L383) in the Kokoro's code.
		> [!NOTE]
		> Abogen supports subtitle generation for all languages. However, word-level subtitle modes (e.g., "1 word", "2 words", "3 words", etc.) are only available for English because [Kokoro provides timestamp tokens only for English text](https://github.com/hexgrad/kokoro/blob/6d87f4ae7abc2d14dbc4b3ef2e5f19852e861ac2/kokoro/pipeline.py#L383). For non-English languages, Abogen uses a duration-based fallback that supports sentence-level and comma-based subtitle modes ("Line", "Sentence", "Sentence + Comma"). If you need word-level subtitles for other languages, please request that feature in the [Kokoro project](https://github.com/hexgrad/kokoro).

		> Tags: audiobook, kokoro, text-to-speech, TTS, audiobook generator, audiobooks, text to speech, audiobook maker, audiobook creator, audiobook generator, voice-synthesis, text to audio, text to audio converter, text to speech converter, text to speech generator, text to speech software, text to speech app, epub to audio, pdf to audio, markdown to audio, subtitle to audio, srt to audio, ass to audio, vtt to audio, webvtt to audio, content-creation, media-generation

abogen/book_handler.py

Sorry, the diff of this file is too big to display

abogen/conversion.py

Sorry, the diff of this file is too big to display

abogen/gui.py

Sorry, the diff of this file is too big to display

abogen - npm Package Compare versions

Improved metrics