abogen
Advanced tools
| """ | ||
| Pre-download dialog and worker for Abogen | ||
| This module consolidates pre-download logic for Kokoro voices and model | ||
| and spaCy language models. The code favors clarity, avoids duplication, | ||
| and handles optional dependencies gracefully. | ||
| """ | ||
| from typing import List, Optional, Tuple | ||
| import importlib | ||
| import importlib.util | ||
| from PyQt6.QtWidgets import ( | ||
| QDialog, | ||
| QVBoxLayout, | ||
| QHBoxLayout, | ||
| QLabel, | ||
| QPushButton, | ||
| QSpacerItem, | ||
| QSizePolicy, | ||
| ) | ||
| from PyQt6.QtCore import QThread, pyqtSignal | ||
| from abogen.constants import COLORS, VOICES_INTERNAL | ||
| from abogen.spacy_utils import SPACY_MODELS | ||
| import abogen.hf_tracker | ||
| # Helpers | ||
| def _unique_sorted_models() -> List[str]: | ||
| """Return a sorted list of unique spaCy model package names.""" | ||
| return sorted(set(SPACY_MODELS.values())) | ||
| def _is_package_installed(pkg_name: str) -> bool: | ||
| """Return True if a package with the given name can be imported (site-packages).""" | ||
| try: | ||
| return importlib.util.find_spec(pkg_name) is not None | ||
| except Exception: | ||
| return False | ||
| # NOTE: explicit HF cache helper removed; we use try_to_load_from_cache in-scope where needed | ||
| class PreDownloadWorker(QThread): | ||
| """Worker thread to download required models/voices. | ||
| Emits human-readable messages via `progress`. Uses `category_done` to indicate | ||
| a category (voices/model/spacy) finished successfully. Emits `error` on exception | ||
| and `finished` after all work completes. | ||
| """ | ||
| # Emit (category, status, message) | ||
| progress = pyqtSignal(str, str, str) | ||
| category_done = pyqtSignal(str) | ||
| finished = pyqtSignal() | ||
| error = pyqtSignal(str) | ||
| def __init__(self, parent=None): | ||
| super().__init__(parent) | ||
| self._cancelled = False | ||
| # repo and filenames used for Kokoro model | ||
| self._repo_id = "hexgrad/Kokoro-82M" | ||
| self._model_files = ["kokoro-v1_0.pth", "config.json"] | ||
| # Track download success per category | ||
| self._voices_success = False | ||
| self._model_success = False | ||
| self._spacy_success = False | ||
| # Suppress HF tracker warnings during downloads | ||
| self._original_emitter = abogen.hf_tracker.show_warning_signal_emitter | ||
| def cancel(self) -> None: | ||
| self._cancelled = True | ||
| def run(self) -> None: | ||
| # Suppress HF tracker warnings during downloads | ||
| abogen.hf_tracker.show_warning_signal_emitter = None | ||
| try: | ||
| self._download_kokoro_voices() | ||
| if self._cancelled: | ||
| return | ||
| if self._voices_success: | ||
| self.category_done.emit("voices") | ||
| self._download_kokoro_model() | ||
| if self._cancelled: | ||
| return | ||
| if self._model_success: | ||
| self.category_done.emit("model") | ||
| self._download_spacy_models() | ||
| if self._cancelled: | ||
| return | ||
| if self._spacy_success: | ||
| self.category_done.emit("spacy") | ||
| self.finished.emit() | ||
| except Exception as exc: # pragma: no cover - best-effort reporting | ||
| self.error.emit(str(exc)) | ||
| finally: | ||
| # Restore original emitter | ||
| abogen.hf_tracker.show_warning_signal_emitter = self._original_emitter | ||
| # Kokoro voices | ||
| def _download_kokoro_voices(self) -> None: | ||
| self._voices_success = True | ||
| try: | ||
| from huggingface_hub import hf_hub_download, try_to_load_from_cache | ||
| except Exception: | ||
| self.progress.emit( | ||
| "voice", "warning", "huggingface_hub not installed, skipping voices..." | ||
| ) | ||
| self._voices_success = False | ||
| return | ||
| voice_list = VOICES_INTERNAL | ||
| for idx, voice in enumerate(voice_list, start=1): | ||
| if self._cancelled: | ||
| self._voices_success = False | ||
| return | ||
| filename = f"voices/{voice}.pt" | ||
| if try_to_load_from_cache(repo_id=self._repo_id, filename=filename): | ||
| self.progress.emit( | ||
| "voice", | ||
| "installed", | ||
| f"{idx}/{len(voice_list)}: {voice} already present", | ||
| ) | ||
| continue | ||
| self.progress.emit( | ||
| "voice", "downloading", f"{idx}/{len(voice_list)}: {voice}..." | ||
| ) | ||
| try: | ||
| hf_hub_download(repo_id=self._repo_id, filename=filename) | ||
| self.progress.emit("voice", "downloaded", f"{voice} downloaded") | ||
| except Exception as exc: | ||
| self.progress.emit( | ||
| "voice", "warning", f"could not download {voice}: {exc}" | ||
| ) | ||
| self._voices_success = False | ||
| # Kokoro model | ||
| def _download_kokoro_model(self) -> None: | ||
| self._model_success = True | ||
| try: | ||
| from huggingface_hub import hf_hub_download, try_to_load_from_cache | ||
| except Exception: | ||
| self.progress.emit( | ||
| "model", "warning", "huggingface_hub not installed, skipping model..." | ||
| ) | ||
| self._model_success = False | ||
| return | ||
| for fname in self._model_files: | ||
| if self._cancelled: | ||
| self._model_success = False | ||
| return | ||
| category = "config" if fname == "config.json" else "model" | ||
| if try_to_load_from_cache(repo_id=self._repo_id, filename=fname): | ||
| self.progress.emit( | ||
| category, "installed", f"file {fname} already present" | ||
| ) | ||
| continue | ||
| self.progress.emit(category, "downloading", f"file {fname}...") | ||
| try: | ||
| hf_hub_download(repo_id=self._repo_id, filename=fname) | ||
| self.progress.emit(category, "downloaded", f"file {fname} downloaded") | ||
| except Exception as exc: | ||
| self.progress.emit( | ||
| category, "warning", f"could not download file {fname}: {exc}" | ||
| ) | ||
| self._model_success = False | ||
| # spaCy models | ||
| def _download_spacy_models(self) -> None: | ||
| """Download spaCy models. Prefer missing models provided by parent. | ||
| Parent dialog will populate _spacy_models_missing during checking. | ||
| """ | ||
| self._spacy_success = True | ||
| # Determine which models to process: prefer parent-provided missing list to avoid | ||
| # re-checking everything; otherwise use the full unique list. | ||
| parent = self.parent() | ||
| models_to_process: List[str] = _unique_sorted_models() | ||
| try: | ||
| if ( | ||
| parent is not None | ||
| and hasattr(parent, "_spacy_models_missing") | ||
| and parent._spacy_models_missing | ||
| ): | ||
| models_to_process = list(dict.fromkeys(parent._spacy_models_missing)) | ||
| except Exception: | ||
| pass | ||
| # If spaCy is not available to run the CLI, skip gracefully | ||
| try: | ||
| import spacy.cli as _spacy_cli | ||
| except Exception: | ||
| self.progress.emit( | ||
| "spacy", "warning", "spaCy not available, skipping spaCy models..." | ||
| ) | ||
| self._spacy_success = False | ||
| return | ||
| for idx, model_name in enumerate(models_to_process, start=1): | ||
| if self._cancelled: | ||
| self._spacy_success = False | ||
| return | ||
| if _is_package_installed(model_name): | ||
| self.progress.emit( | ||
| "spacy", | ||
| "installed", | ||
| f"{idx}/{len(models_to_process)}: {model_name} already installed", | ||
| ) | ||
| continue | ||
| self.progress.emit( | ||
| "spacy", | ||
| "downloading", | ||
| f"{idx}/{len(models_to_process)}: {model_name}...", | ||
| ) | ||
| try: | ||
| _spacy_cli.download(model_name) | ||
| self.progress.emit("spacy", "downloaded", f"{model_name} downloaded") | ||
| except Exception as exc: | ||
| self.progress.emit( | ||
| "spacy", "warning", f"could not download {model_name}: {exc}" | ||
| ) | ||
| self._spacy_success = False | ||
| class PreDownloadDialog(QDialog): | ||
| """Dialog to show and control pre-download process.""" | ||
| VOICE_PREFIX = "Kokoro voices: " | ||
| MODEL_PREFIX = "Kokoro model: " | ||
| CONFIG_PREFIX = "Kokoro config: " | ||
| SPACY_PREFIX = "spaCy models: " | ||
| def __init__(self, parent=None): | ||
| super().__init__(parent) | ||
| self.setWindowTitle("Pre-download Models and Voices") | ||
| self.setMinimumWidth(500) | ||
| self.worker: Optional[PreDownloadWorker] = None | ||
| self.has_missing = False | ||
| self._spacy_models_checked: List[tuple] = [] | ||
| self._spacy_models_missing: List[str] = [] | ||
| self._status_worker = None | ||
| # Map keywords to (label, prefix) - labels filled after UI creation | ||
| self.status_map = { | ||
| "voice": (None, self.VOICE_PREFIX), | ||
| "spacy": (None, self.SPACY_PREFIX), | ||
| "model": (None, self.MODEL_PREFIX), | ||
| "config": (None, self.CONFIG_PREFIX), | ||
| } | ||
| self.category_map = { | ||
| "voices": ["voice"], | ||
| "model": ["model", "config"], | ||
| "spacy": ["spacy"], | ||
| } | ||
| self._setup_ui() | ||
| self._start_status_check() | ||
| def _setup_ui(self) -> None: | ||
| layout = QVBoxLayout(self) | ||
| layout.setSpacing(0) | ||
| layout.setContentsMargins(15, 0, 15, 15) | ||
| desc = QLabel( | ||
| "You can pre-download all required models and voices for offline use.\n" | ||
| "This includes Kokoro voices, Kokoro model (and config), and spaCy models." | ||
| ) | ||
| desc.setWordWrap(True) | ||
| layout.addWidget(desc) | ||
| # Status rows | ||
| status_layout = QVBoxLayout() | ||
| status_title = QLabel("<b>Current Status:</b>") | ||
| status_layout.addWidget(status_title) | ||
| self.voices_status = QLabel(self.VOICE_PREFIX + "⏳ Checking...") | ||
| row = QHBoxLayout() | ||
| row.addWidget(self.voices_status) | ||
| row.addStretch() | ||
| status_layout.addLayout(row) | ||
| self.model_status = QLabel(self.MODEL_PREFIX + "⏳ Checking...") | ||
| row = QHBoxLayout() | ||
| row.addWidget(self.model_status) | ||
| row.addStretch() | ||
| status_layout.addLayout(row) | ||
| self.config_status = QLabel(self.CONFIG_PREFIX + "⏳ Checking...") | ||
| row = QHBoxLayout() | ||
| row.addWidget(self.config_status) | ||
| row.addStretch() | ||
| status_layout.addLayout(row) | ||
| self.spacy_status = QLabel(self.SPACY_PREFIX + "⏳ Checking...") | ||
| row = QHBoxLayout() | ||
| row.addWidget(self.spacy_status) | ||
| row.addStretch() | ||
| status_layout.addLayout(row) | ||
| # register labels | ||
| self.status_map["voice"] = (self.voices_status, self.VOICE_PREFIX) | ||
| self.status_map["model"] = (self.model_status, self.MODEL_PREFIX) | ||
| self.status_map["config"] = (self.config_status, self.CONFIG_PREFIX) | ||
| self.status_map["spacy"] = (self.spacy_status, self.SPACY_PREFIX) | ||
| layout.addLayout(status_layout) | ||
| layout.addItem( | ||
| QSpacerItem(0, 20, QSizePolicy.Policy.Minimum, QSizePolicy.Policy.Fixed) | ||
| ) | ||
| # Buttons | ||
| button_row = QHBoxLayout() | ||
| button_row.setSpacing(10) | ||
| self.download_btn = QPushButton("Download all") | ||
| self.download_btn.setMinimumWidth(100) | ||
| self.download_btn.setMinimumHeight(35) | ||
| self.download_btn.setEnabled(False) | ||
| self.download_btn.clicked.connect(self._start_download) | ||
| button_row.addWidget(self.download_btn) | ||
| self.close_btn = QPushButton("Close") | ||
| self.close_btn.setMinimumWidth(100) | ||
| self.close_btn.setMinimumHeight(35) | ||
| self.close_btn.clicked.connect(self._handle_close) | ||
| button_row.addWidget(self.close_btn) | ||
| layout.addLayout(button_row) | ||
| self.adjustSize() | ||
| # Status checking worker | ||
| class StatusCheckWorker(QThread): | ||
| voices_checked = pyqtSignal(bool, list) | ||
| model_checked = pyqtSignal(bool) | ||
| config_checked = pyqtSignal(bool) | ||
| spacy_model_checking = pyqtSignal(str) | ||
| spacy_model_result = pyqtSignal(str, bool) | ||
| spacy_checked = pyqtSignal(bool, list) | ||
| def run(self): | ||
| parent = self.parent() | ||
| if parent is None: | ||
| return | ||
| voices_ok, missing_voices = parent._check_kokoro_voices() | ||
| self.voices_checked.emit(voices_ok, missing_voices) | ||
| model_ok = parent._check_kokoro_model() | ||
| self.model_checked.emit(model_ok) | ||
| config_ok = parent._check_kokoro_config() | ||
| self.config_checked.emit(config_ok) | ||
| # Check spaCy models by package name to detect site-package installs | ||
| unique = _unique_sorted_models() | ||
| missing: List[str] = [] | ||
| for name in unique: | ||
| self.spacy_model_checking.emit(name) | ||
| ok = _is_package_installed(name) | ||
| self.spacy_model_result.emit(name, ok) | ||
| if not ok: | ||
| missing.append(name) | ||
| parent._spacy_models_missing = missing | ||
| self.spacy_checked.emit(len(missing) == 0, missing) | ||
| def _start_status_check(self) -> None: | ||
| self._status_worker = self.StatusCheckWorker(self) | ||
| self._status_worker.voices_checked.connect(self._update_voices_status) | ||
| self._status_worker.model_checked.connect(self._update_model_status) | ||
| self._status_worker.config_checked.connect(self._update_config_status) | ||
| self._status_worker.spacy_model_checking.connect(self._spacy_model_checking) | ||
| self._status_worker.spacy_model_result.connect(self._spacy_model_result) | ||
| self._status_worker.spacy_checked.connect(self._update_spacy_status) | ||
| # These are initialized in __init__ to keep consistent object state | ||
| # Set checking visual state | ||
| for lbl in ( | ||
| self.voices_status, | ||
| self.model_status, | ||
| self.config_status, | ||
| self.spacy_status, | ||
| ): | ||
| lbl.setStyleSheet(f"color: {COLORS['ORANGE']};") | ||
| self.spacy_status.setText(self.SPACY_PREFIX + "⏳ Checking...") | ||
| self._status_worker.start() | ||
| # UI update callbacks | ||
| def _spacy_model_checking(self, name: str) -> None: | ||
| self.spacy_status.setText(f"{self.SPACY_PREFIX}Checking {name}...") | ||
| def _spacy_model_result(self, name: str, ok: bool) -> None: | ||
| self._spacy_models_checked.append((name, ok)) | ||
| if not ok and name not in self._spacy_models_missing: | ||
| self._spacy_models_missing.append(name) | ||
| checked = len(self._spacy_models_checked) | ||
| missing_count = len(self._spacy_models_missing) | ||
| if missing_count: | ||
| self.spacy_status.setText( | ||
| f"{self.SPACY_PREFIX}{checked} checked, {missing_count} missing..." | ||
| ) | ||
| else: | ||
| self.spacy_status.setText(f"{self.SPACY_PREFIX}{checked} checked...") | ||
| def _update_voices_status(self, ok: bool, missing: List[str]) -> None: | ||
| if ok: | ||
| self._set_status("voice", "✓ Downloaded", COLORS["GREEN"]) | ||
| else: | ||
| self.has_missing = True | ||
| if missing: | ||
| self._set_status( | ||
| "voice", f"✗ Missing {len(missing)} voices", COLORS["RED"] | ||
| ) | ||
| else: | ||
| self._set_status("voice", "✗ Not downloaded", COLORS["RED"]) | ||
| def _update_model_status(self, ok: bool) -> None: | ||
| if ok: | ||
| self._set_status("model", "✓ Downloaded", COLORS["GREEN"]) | ||
| else: | ||
| self.has_missing = True | ||
| self._set_status("model", "✗ Not downloaded", COLORS["RED"]) | ||
| def _update_config_status(self, ok: bool) -> None: | ||
| if ok: | ||
| self._set_status("config", "✓ Downloaded", COLORS["GREEN"]) | ||
| else: | ||
| self.has_missing = True | ||
| self._set_status("config", "✗ Not downloaded", COLORS["RED"]) | ||
| def _update_spacy_status(self, ok: bool, missing: List[str]) -> None: | ||
| if ok: | ||
| self._set_status("spacy", "✓ Downloaded", COLORS["GREEN"]) | ||
| else: | ||
| self.has_missing = True | ||
| if missing: | ||
| self._set_status( | ||
| "spacy", f"✗ Missing {len(missing)} model(s)", COLORS["RED"] | ||
| ) | ||
| else: | ||
| self._set_status("spacy", "✗ Not downloaded", COLORS["RED"]) | ||
| self.download_btn.setEnabled(self.has_missing) | ||
| def _set_status(self, key: str, text: str, color: str) -> None: | ||
| lbl, prefix = self.status_map.get(key, (None, "")) | ||
| if not lbl: | ||
| return | ||
| lbl.setText(prefix + text) | ||
| lbl.setStyleSheet(f"color: {color};") | ||
| # Helper checks | ||
| def _check_kokoro_voices(self) -> Tuple[bool, List[str]]: | ||
| """Return (ok, missing_list) for Kokoro voices check.""" | ||
| missing = [] | ||
| try: | ||
| from huggingface_hub import try_to_load_from_cache | ||
| for voice in VOICES_INTERNAL: | ||
| if not try_to_load_from_cache( | ||
| repo_id="hexgrad/Kokoro-82M", filename=f"voices/{voice}.pt" | ||
| ): | ||
| missing.append(voice) | ||
| except Exception: | ||
| # If HF missing, report all as missing | ||
| return False, list(VOICES_INTERNAL) | ||
| return (len(missing) == 0), missing | ||
| def _check_kokoro_model(self) -> bool: | ||
| try: | ||
| from huggingface_hub import try_to_load_from_cache | ||
| return ( | ||
| try_to_load_from_cache( | ||
| repo_id="hexgrad/Kokoro-82M", filename="kokoro-v1_0.pth" | ||
| ) | ||
| is not None | ||
| ) | ||
| except Exception: | ||
| return False | ||
| def _check_kokoro_config(self) -> bool: | ||
| try: | ||
| from huggingface_hub import try_to_load_from_cache | ||
| return ( | ||
| try_to_load_from_cache( | ||
| repo_id="hexgrad/Kokoro-82M", filename="config.json" | ||
| ) | ||
| is not None | ||
| ) | ||
| except Exception: | ||
| return False | ||
| def _check_spacy_models(self) -> bool: | ||
| unique = _unique_sorted_models() | ||
| missing = [m for m in unique if not _is_package_installed(m)] | ||
| self._spacy_models_missing = missing | ||
| return len(missing) == 0 | ||
| # Download control | ||
| def _start_download(self) -> None: | ||
| self.download_btn.setEnabled(False) | ||
| self.download_btn.setText("Downloading...") | ||
| # mark the start of downloads; this triggers the labels | ||
| self._on_progress("system", "starting", "Processing, please wait...") | ||
| self.worker = PreDownloadWorker(self) | ||
| self.worker.progress.connect(self._on_progress) | ||
| self.worker.category_done.connect(self._on_category_done) | ||
| self.worker.finished.connect(self._on_download_finished) | ||
| self.worker.error.connect(self._on_download_error) | ||
| self.worker.start() | ||
| def _on_progress(self, category: str, status: str, message: str) -> None: | ||
| """Map worker (category, status, message) to UI label updates. | ||
| Status is one of: 'downloading', 'installed', 'downloaded', 'warning', 'starting'. | ||
| Category is one of: 'voice', 'model', 'spacy', 'config', or 'system'. | ||
| """ | ||
| try: | ||
| # If the category targets a specific label, update directly | ||
| if category in self.status_map: | ||
| lbl, prefix = self.status_map[category] | ||
| if not lbl: | ||
| return | ||
| # Compose message and set color based on status token | ||
| full_text = prefix + message | ||
| if len(full_text) > 60: | ||
| display_text = full_text[:57] + "..." | ||
| lbl.setText(display_text) | ||
| lbl.setToolTip(full_text) | ||
| else: | ||
| lbl.setText(full_text) | ||
| lbl.setToolTip("") # Clear tooltip if not needed | ||
| if status == "downloading": | ||
| lbl.setStyleSheet(f"color: {COLORS['ORANGE']};") | ||
| elif status in ("installed", "downloaded"): | ||
| lbl.setStyleSheet(f"color: {COLORS['GREEN']};") | ||
| elif status == "warning": | ||
| lbl.setStyleSheet(f"color: {COLORS['RED']};") | ||
| elif status == "error": | ||
| lbl.setStyleSheet(f"color: {COLORS['RED']};") | ||
| return | ||
| # System-level messages | ||
| if category == "system": | ||
| if status == "starting": | ||
| for k in self.status_map: | ||
| lbl, prefix = self.status_map[k] | ||
| if lbl: | ||
| lbl.setText(prefix + "Processing, please wait...") | ||
| lbl.setStyleSheet(f"color: {COLORS['ORANGE']};") | ||
| # other system statuses don't require action | ||
| return | ||
| except Exception: | ||
| # Do not let UI thread crash on unexpected worker message | ||
| pass | ||
| def _on_category_done(self, category: str) -> None: | ||
| for key in self.category_map.get(category, []): | ||
| self._set_status(key, "✓ Downloaded", COLORS["GREEN"]) | ||
| def _on_download_finished(self) -> None: | ||
| self.has_missing = False | ||
| self.download_btn.setText("Download all") | ||
| self.download_btn.setEnabled(False) | ||
| def _on_download_error(self, error_msg: str) -> None: | ||
| self.download_btn.setText("Download all") | ||
| self.download_btn.setEnabled(True) | ||
| for key in self.status_map: | ||
| self._set_status(key, f"✗ Error - {error_msg}", COLORS["RED"]) | ||
| def _handle_close(self) -> None: | ||
| if self.worker and self.worker.isRunning(): | ||
| self.worker.cancel() | ||
| self.worker.wait(2000) | ||
| self.accept() | ||
| def closeEvent(self, event) -> None: | ||
| if self.worker and self.worker.isRunning(): | ||
| self.worker.cancel() | ||
| self.worker.wait(2000) | ||
| super().closeEvent(event) |
| """ | ||
| Lazy-loaded spaCy utilities for sentence segmentation. | ||
| """ | ||
| # Cached spaCy module and models (lazy loaded) | ||
| _spacy = None | ||
| _nlp_cache = {} | ||
| # Language code to spaCy model mapping | ||
| SPACY_MODELS = { | ||
| "a": "en_core_web_sm", # American English | ||
| "b": "en_core_web_sm", # British English | ||
| "e": "es_core_news_sm", # Spanish | ||
| "f": "fr_core_news_sm", # French | ||
| "i": "it_core_news_sm", # Italian | ||
| "p": "pt_core_news_sm", # Brazilian Portuguese | ||
| "z": "zh_core_web_sm", # Mandarin Chinese | ||
| "j": "ja_core_news_sm", # Japanese | ||
| "h": "xx_sent_ud_sm", # Hindi (multi-language model) | ||
| } | ||
| def _load_spacy(): | ||
| """Lazy load spaCy module.""" | ||
| global _spacy | ||
| if _spacy is None: | ||
| try: | ||
| import spacy | ||
| _spacy = spacy | ||
| except ImportError: | ||
| return None | ||
| return _spacy | ||
| def get_spacy_model(lang_code, log_callback=None): | ||
| """ | ||
| Get or load a spaCy model for the given language code. | ||
| Downloads the model automatically if not available. | ||
| Args: | ||
| lang_code: Language code (a, b, e, f, etc.) | ||
| log_callback: Optional function to log messages | ||
| Returns: | ||
| Loaded spaCy model or None if unavailable | ||
| """ | ||
| def log(msg, is_error=False): | ||
| # Prefer GUI log callback when provided to avoid spamming stdout. | ||
| if log_callback: | ||
| color = "red" if is_error else "grey" | ||
| try: | ||
| log_callback((msg, color)) | ||
| except Exception: | ||
| # Fallback to printing if callback misbehaves | ||
| print(msg) | ||
| else: | ||
| print(msg) | ||
| # Check if model is cached | ||
| if lang_code in _nlp_cache: | ||
| return _nlp_cache[lang_code] | ||
| # Check if language is supported | ||
| model_name = SPACY_MODELS.get(lang_code) | ||
| if not model_name: | ||
| log(f"\nspaCy: No model mapping for language '{lang_code}'...") | ||
| return None | ||
| # Lazy load spaCy | ||
| spacy = _load_spacy() | ||
| if spacy is None: | ||
| log("\nspaCy: Module not installed, falling back to default segmentation...") | ||
| return None | ||
| # Try to load the model | ||
| try: | ||
| log(f"\nLoading spaCy model '{model_name}'...") | ||
| nlp = spacy.load( | ||
| model_name, | ||
| disable=["ner", "parser", "tagger", "lemmatizer", "attribute_ruler"], | ||
| ) | ||
| # Enable sentence segmentation only | ||
| if "sentencizer" not in nlp.pipe_names: | ||
| nlp.add_pipe("sentencizer") | ||
| _nlp_cache[lang_code] = nlp | ||
| return nlp | ||
| except OSError: | ||
| # Model not found, attempt download | ||
| log(f"\nspaCy: Downloading model '{model_name}'...") | ||
| try: | ||
| from spacy.cli import download | ||
| download(model_name) | ||
| # Retry loading | ||
| nlp = spacy.load( | ||
| model_name, | ||
| disable=["ner", "parser", "tagger", "lemmatizer", "attribute_ruler"], | ||
| ) | ||
| if "sentencizer" not in nlp.pipe_names: | ||
| nlp.add_pipe("sentencizer") | ||
| _nlp_cache[lang_code] = nlp | ||
| log(f"spaCy model '{model_name}' downloaded and loaded") | ||
| return nlp | ||
| except Exception as e: | ||
| log( | ||
| f"\nspaCy: Failed to download model '{model_name}': {e}...", | ||
| is_error=True, | ||
| ) | ||
| return None | ||
| except Exception as e: | ||
| log(f"\nspaCy: Error loading model '{model_name}': {e}...", is_error=True) | ||
| return None | ||
| def segment_sentences(text, lang_code, log_callback=None): | ||
| """ | ||
| Segment text into sentences using spaCy. | ||
| Args: | ||
| text: Text to segment | ||
| lang_code: Language code | ||
| log_callback: Optional function to log messages | ||
| Returns: | ||
| List of sentence strings, or None if spaCy unavailable | ||
| """ | ||
| nlp = get_spacy_model(lang_code, log_callback) | ||
| if nlp is None: | ||
| return None | ||
| # Ensure spaCy can handle large texts by adjusting max_length if necessary | ||
| try: | ||
| text_len = len(text or "") | ||
| if text_len and hasattr(nlp, "max_length") and text_len > nlp.max_length: | ||
| # increase a bit beyond the text length to be safe | ||
| nlp.max_length = text_len + 1000 | ||
| except Exception: | ||
| pass | ||
| # Process text and extract sentences | ||
| doc = nlp(text) | ||
| return [sent.text.strip() for sent in doc.sents if sent.text.strip()] | ||
| def is_spacy_available(): | ||
| """Check if spaCy can be imported.""" | ||
| return _load_spacy() is not None | ||
| def clear_cache(): | ||
| """Clear the model cache to free memory.""" | ||
| global _nlp_cache | ||
| _nlp_cache.clear() |
@@ -64,6 +64,3 @@ from abogen.utils import get_version | ||
| # 384 if self.lang_code in 'ab': | ||
| SUPPORTED_LANGUAGES_FOR_SUBTITLE_GENERATION = [ | ||
| "a", | ||
| "b", | ||
| ] | ||
| SUPPORTED_LANGUAGES_FOR_SUBTITLE_GENERATION = list(LANGUAGE_DESCRIPTIONS.keys()) | ||
@@ -70,0 +67,0 @@ # Voice and sample text constants |
+19
-11
@@ -13,3 +13,10 @@ import os | ||
| # Pre-compile frequently used regex patterns for better performance | ||
| _WHITESPACE_PATTERN = re.compile(r"[^\S\n]+") | ||
| _MULTIPLE_NEWLINES_PATTERN = re.compile(r"\n{3,}") | ||
| _SINGLE_NEWLINE_PATTERN = re.compile(r"(?<!\n)\n(?!\n)") | ||
| _CHAPTER_MARKER_PATTERN = re.compile(r"<<CHAPTER_MARKER:.*?>>") | ||
| _METADATA_PATTERN = re.compile(r"<<METADATA_[^:]+:[^>]*>>") | ||
| def detect_encoding(file_path): | ||
@@ -132,9 +139,12 @@ import chardet | ||
| # Collapse all whitespace (excluding newlines) into single spaces per line and trim edges | ||
| lines = [re.sub(r"[^\S\n]+", " ", line).strip() for line in text.splitlines()] | ||
| # Use pre-compiled pattern for better performance | ||
| lines = [_WHITESPACE_PATTERN.sub(" ", line).strip() for line in text.splitlines()] | ||
| text = "\n".join(lines) | ||
| # Standardize paragraph breaks (multiple newlines become exactly two) and trim overall whitespace | ||
| text = re.sub(r"\n{3,}", "\n\n", text).strip() | ||
| # Use pre-compiled pattern for better performance | ||
| text = _MULTIPLE_NEWLINES_PATTERN.sub("\n\n", text).strip() | ||
| # Optionally replace single newlines with spaces, but preserve double newlines | ||
| if replace_single_newlines: | ||
| text = re.sub(r"(?<!\n)\n(?!\n)", " ", text) | ||
| # Use pre-compiled pattern for better performance | ||
| text = _SINGLE_NEWLINE_PATTERN.sub(" ", text) | ||
| return text | ||
@@ -248,10 +258,8 @@ | ||
| def calculate_text_length(text): | ||
| # Ignore chapter markers | ||
| text = re.sub(r"<<CHAPTER_MARKER:.*?>>", "", text) | ||
| # Ignore metadata patterns | ||
| text = re.sub(r"<<METADATA_[^:]+:[^>]*>>", "", text) | ||
| # Ignore newlines | ||
| text = text.replace("\n", "") | ||
| # Ignore leading/trailing spaces | ||
| text = text.strip() | ||
| # Use pre-compiled patterns for better performance | ||
| # Ignore chapter markers and metadata patterns in a single pass | ||
| text = _CHAPTER_MARKER_PATTERN.sub("", text) | ||
| text = _METADATA_PATTERN.sub("", text) | ||
| # Ignore newlines and leading/trailing spaces | ||
| text = text.replace("\n", "").strip() | ||
| # Calculate character count | ||
@@ -258,0 +266,0 @@ char_count = len(text) |
+1
-1
@@ -1,1 +0,1 @@ | ||
| 1.2.3 | ||
| 1.2.4 |
+15
-0
@@ -0,1 +1,16 @@ | ||
| # 1.2.4 (Pre-release) | ||
| - **Subtitle generation is now available for all languages!** Abogen now supports subtitle generation for non-English languages using audio duration-based timing. Available modes include `Line`, `Sentence`, and `Sentence + Comma`. (Note: Word-level subtitle modes remain English-only due to Kokoro's timestamp token limitations.) | ||
| - New option: **"Use spaCy for sentence segmentation"** You can now use [spaCy](https://spacy.io/) to automatically detect sentence boundaries and produce cleaner, more readable subtitles. Quick summary: | ||
| - **What it does:** Splits text into natural sentences so subtitle entries read better and align more naturally with speech. | ||
| - **Why this helps:** The previous punctuation-based splitting could break sentences incorrectly at common abbreviations (e.g. "Mr.", "Dr.", "Prof.") or initials, producing wrong subtitle breaks. spaCy avoids those false splits by using linguistic rules to detect real sentence boundaries. | ||
| - **For Non-English:** spaCy runs **before** audio generation to create better sentence chunks for TTS. | ||
| - **For English:** spaCy runs **during** subtitle generation to find accurate sentence breaks after TTS. | ||
| - **Note:** spaCy segmentation is only applied when subtitle mode is `Sentence` or `Sentence + Comma`. When turned off, it falls back to simple punctuation-based splitting. | ||
| - New option: **Pre-download models and voices for offline use** You can now pre-download all required Kokoro models, voices, and spaCy language models using this option in the settings menu. Allowing you to use Abogen completely offline without any internet connection. | ||
| - Added support for `.` separator in timestamps (e.g. `HH:MM:SS.ms`) for timestamp-based text files. | ||
| - Optimized regex compilation and eliminated busy-wait loops. | ||
| - Possibly fixed `Silent truncation of long paragraphs` issue mentioned in [#91](https://github.com/denizsafak/abogen/issues/91) by [@xklzlxr](https://github.com/xklzlxr) | ||
| - Fixed unused regex patterns and variable naming conventions. | ||
| - Improvements in code and documentation. | ||
| # 1.2.3 | ||
@@ -2,0 +17,0 @@ - Same as 1.2.2, re-released to fix an issue with subtitle timing when using timestamp-based text files. |
+10
-6
| Metadata-Version: 2.4 | ||
| Name: abogen | ||
| Version: 1.2.3 | ||
| Version: 1.2.4 | ||
| Summary: Generate audiobooks from EPUBs, PDFs and text with synchronized captions. | ||
@@ -35,2 +35,3 @@ Project-URL: Homepage, https://github.com/denizsafak/abogen | ||
| Requires-Dist: soundfile>=0.13.1 | ||
| Requires-Dist: spacy>=3.8.7 | ||
| Requires-Dist: static-ffmpeg>=2.13 | ||
@@ -212,4 +213,6 @@ Description-Content-Type: text/markdown | ||
| | **Subtitle speed adjustment method** | Choose how to speed up audio when needed: `TTS Regeneration (better quality)` re-generates the audio at a faster speed, while `FFmpeg Time-stretch (better speed)` quickly speeds up the generated audio. (for subtitle files). | | ||
| | **Use spaCy for sentence segmentation** | When this option is enabled, Abogen uses [spaCy](https://spacy.io/) to detect sentence boundaries more accurately, instead of using punctuation marks (like periods, question marks, etc.) to split sentences, which could incorrectly cut off phrases like "Mr." or "Dr.". With spaCy, sentences are divided more accurately. For non-English text, spaCy runs **before** audio generation to create sentence chunks. For English text, spaCy runs **during** subtitle generation to improve timing and readability. spaCy is only used when subtitle mode is `Sentence` or `Sentence + Comma`. If you prefer the old punctuation splitting method, you can turn this option off. | | ||
| | **Pre-download models and voices for offline use** | Opens a window that displays the available models and voices. Click `Download all` button to download all required models and voices, allowing you to use Abogen completely offline without any internet connection. | | ||
| | **Disable Kokoro's internet access** | Prevents Kokoro from downloading models or voices from HuggingFace Hub, useful for offline use. | | ||
| | **Check for updates at startup** | Automatically checks for updates when the program starts. | | ||
| | **Disable Kokoro's internet access** | Prevents Kokoro from downloading models or voices from HuggingFace Hub, useful for offline use. | | ||
| | **Reset to default settings** | Resets all settings to their default values. | | ||
@@ -277,3 +280,3 @@ | ||
| ## `About Timestamp-based Text Files` | ||
| Similar to converting subtitle files to audio, Abogen can automatically detect text files that contain timestamps in `HH:MM:SS` or `HH:MM:SS,ms` format. When timestamps are found inside your text file, Abogen will ask if you want to use them for audio timing. This is useful for creating timed narrations, scripts, or transcripts where you need exact control over when each segment is spoken. | ||
| Similar to converting subtitle files to audio, Abogen can automatically detect text files that contain timestamps in `HH:MM:SS`, `HH:MM:SS,ms` or `HH:MM:SS.ms` format. When timestamps are found inside your text file, Abogen will ask if you want to use them for audio timing. This is useful for creating timed narrations, scripts, or transcripts where you need exact control over when each segment is spoken. | ||
@@ -293,3 +296,3 @@ Format your text file like this: | ||
| **Important notes:** | ||
| - Timestamps must be in `HH:MM:SS` or `HH:MM:SS,ms` format (e.g., `00:05:30` for 5 minutes 30 seconds, or `00:05:30,500` for 5 minutes 30.5 seconds) | ||
| - Timestamps must be in `HH:MM:SS`, `HH:MM:SS,ms` or `HH:MM:SS.ms` format (e.g., `00:05:30` for 5 minutes 30 seconds, or `00:05:30.500` for 5 minutes 30.5 seconds) | ||
| - Milliseconds are optional and provide precision up to 1/1000th of a second | ||
@@ -520,2 +523,3 @@ - Text before the first timestamp (if any) will automatically start at `00:00:00` | ||
| - Abogen uses [Kokoro](https://github.com/hexgrad/kokoro) for its high-quality, natural-sounding text-to-speech synthesis. Huge thanks to the Kokoro team for making this possible. | ||
| - Thanks to the [spaCy](https://spacy.io/) project for its sentence-segmentation tools, which help Abogen produce cleaner, more natural sentence segmentation. | ||
| - Thanks to [@wojiushixiaobai](https://github.com/wojiushixiaobai) for [Embedded Python](https://github.com/wojiushixiaobai/Python-Embed-Win64) packages. These modified packages include pip pre-installed, enabling Abogen to function as a standalone application without requiring users to separately install Python in Windows. | ||
@@ -533,5 +537,5 @@ - Thanks to creators of [EbookLib](https://github.com/aerkalov/ebooklib), a Python library for reading and writing ePub files, which is used for extracting text from ePub files. | ||
| > [!IMPORTANT] | ||
| > Subtitle generation currently works only for English. This is because Kokoro provides timestamp tokens only for English text. If you want subtitles in other languages, please request this feature in the [Kokoro project](https://github.com/hexgrad/kokoro). For more technical details, see [this line](https://github.com/hexgrad/kokoro/blob/6d87f4ae7abc2d14dbc4b3ef2e5f19852e861ac2/kokoro/pipeline.py#L383) in the Kokoro's code. | ||
| > [!NOTE] | ||
| > Abogen supports subtitle generation for all languages. However, word-level subtitle modes (e.g., "1 word", "2 words", "3 words", etc.) are only available for English because [Kokoro provides timestamp tokens only for English text](https://github.com/hexgrad/kokoro/blob/6d87f4ae7abc2d14dbc4b3ef2e5f19852e861ac2/kokoro/pipeline.py#L383). For non-English languages, Abogen uses a duration-based fallback that supports sentence-level and comma-based subtitle modes ("Line", "Sentence", "Sentence + Comma"). If you need word-level subtitles for other languages, please request that feature in the [Kokoro project](https://github.com/hexgrad/kokoro). | ||
| > Tags: audiobook, kokoro, text-to-speech, TTS, audiobook generator, audiobooks, text to speech, audiobook maker, audiobook creator, audiobook generator, voice-synthesis, text to audio, text to audio converter, text to speech converter, text to speech generator, text to speech software, text to speech app, epub to audio, pdf to audio, markdown to audio, subtitle to audio, srt to audio, ass to audio, vtt to audio, webvtt to audio, content-creation, media-generation |
+2
-1
@@ -28,3 +28,4 @@ [build-system] | ||
| "static_ffmpeg>=2.13", | ||
| "Markdown>=3.9" | ||
| "Markdown>=3.9", | ||
| "spacy>=3.8.7" | ||
| ] | ||
@@ -31,0 +32,0 @@ |
+8
-5
@@ -174,4 +174,6 @@ # abogen <img width="40px" title="abogen icon" src="https://raw.githubusercontent.com/denizsafak/abogen/refs/heads/main/abogen/assets/icon.ico" align="right" style="padding-left: 10px; padding-top:5px;"> | ||
| | **Subtitle speed adjustment method** | Choose how to speed up audio when needed: `TTS Regeneration (better quality)` re-generates the audio at a faster speed, while `FFmpeg Time-stretch (better speed)` quickly speeds up the generated audio. (for subtitle files). | | ||
| | **Use spaCy for sentence segmentation** | When this option is enabled, Abogen uses [spaCy](https://spacy.io/) to detect sentence boundaries more accurately, instead of using punctuation marks (like periods, question marks, etc.) to split sentences, which could incorrectly cut off phrases like "Mr." or "Dr.". With spaCy, sentences are divided more accurately. For non-English text, spaCy runs **before** audio generation to create sentence chunks. For English text, spaCy runs **during** subtitle generation to improve timing and readability. spaCy is only used when subtitle mode is `Sentence` or `Sentence + Comma`. If you prefer the old punctuation splitting method, you can turn this option off. | | ||
| | **Pre-download models and voices for offline use** | Opens a window that displays the available models and voices. Click `Download all` button to download all required models and voices, allowing you to use Abogen completely offline without any internet connection. | | ||
| | **Disable Kokoro's internet access** | Prevents Kokoro from downloading models or voices from HuggingFace Hub, useful for offline use. | | ||
| | **Check for updates at startup** | Automatically checks for updates when the program starts. | | ||
| | **Disable Kokoro's internet access** | Prevents Kokoro from downloading models or voices from HuggingFace Hub, useful for offline use. | | ||
| | **Reset to default settings** | Resets all settings to their default values. | | ||
@@ -239,3 +241,3 @@ | ||
| ## `About Timestamp-based Text Files` | ||
| Similar to converting subtitle files to audio, Abogen can automatically detect text files that contain timestamps in `HH:MM:SS` or `HH:MM:SS,ms` format. When timestamps are found inside your text file, Abogen will ask if you want to use them for audio timing. This is useful for creating timed narrations, scripts, or transcripts where you need exact control over when each segment is spoken. | ||
| Similar to converting subtitle files to audio, Abogen can automatically detect text files that contain timestamps in `HH:MM:SS`, `HH:MM:SS,ms` or `HH:MM:SS.ms` format. When timestamps are found inside your text file, Abogen will ask if you want to use them for audio timing. This is useful for creating timed narrations, scripts, or transcripts where you need exact control over when each segment is spoken. | ||
@@ -255,3 +257,3 @@ Format your text file like this: | ||
| **Important notes:** | ||
| - Timestamps must be in `HH:MM:SS` or `HH:MM:SS,ms` format (e.g., `00:05:30` for 5 minutes 30 seconds, or `00:05:30,500` for 5 minutes 30.5 seconds) | ||
| - Timestamps must be in `HH:MM:SS`, `HH:MM:SS,ms` or `HH:MM:SS.ms` format (e.g., `00:05:30` for 5 minutes 30 seconds, or `00:05:30.500` for 5 minutes 30.5 seconds) | ||
| - Milliseconds are optional and provide precision up to 1/1000th of a second | ||
@@ -482,2 +484,3 @@ - Text before the first timestamp (if any) will automatically start at `00:00:00` | ||
| - Abogen uses [Kokoro](https://github.com/hexgrad/kokoro) for its high-quality, natural-sounding text-to-speech synthesis. Huge thanks to the Kokoro team for making this possible. | ||
| - Thanks to the [spaCy](https://spacy.io/) project for its sentence-segmentation tools, which help Abogen produce cleaner, more natural sentence segmentation. | ||
| - Thanks to [@wojiushixiaobai](https://github.com/wojiushixiaobai) for [Embedded Python](https://github.com/wojiushixiaobai/Python-Embed-Win64) packages. These modified packages include pip pre-installed, enabling Abogen to function as a standalone application without requiring users to separately install Python in Windows. | ||
@@ -495,5 +498,5 @@ - Thanks to creators of [EbookLib](https://github.com/aerkalov/ebooklib), a Python library for reading and writing ePub files, which is used for extracting text from ePub files. | ||
| > [!IMPORTANT] | ||
| > Subtitle generation currently works only for English. This is because Kokoro provides timestamp tokens only for English text. If you want subtitles in other languages, please request this feature in the [Kokoro project](https://github.com/hexgrad/kokoro). For more technical details, see [this line](https://github.com/hexgrad/kokoro/blob/6d87f4ae7abc2d14dbc4b3ef2e5f19852e861ac2/kokoro/pipeline.py#L383) in the Kokoro's code. | ||
| > [!NOTE] | ||
| > Abogen supports subtitle generation for all languages. However, word-level subtitle modes (e.g., "1 word", "2 words", "3 words", etc.) are only available for English because [Kokoro provides timestamp tokens only for English text](https://github.com/hexgrad/kokoro/blob/6d87f4ae7abc2d14dbc4b3ef2e5f19852e861ac2/kokoro/pipeline.py#L383). For non-English languages, Abogen uses a duration-based fallback that supports sentence-level and comma-based subtitle modes ("Line", "Sentence", "Sentence + Comma"). If you need word-level subtitles for other languages, please request that feature in the [Kokoro project](https://github.com/hexgrad/kokoro). | ||
| > Tags: audiobook, kokoro, text-to-speech, TTS, audiobook generator, audiobooks, text to speech, audiobook maker, audiobook creator, audiobook generator, voice-synthesis, text to audio, text to audio converter, text to speech converter, text to speech generator, text to speech software, text to speech app, epub to audio, pdf to audio, markdown to audio, subtitle to audio, srt to audio, ass to audio, vtt to audio, webvtt to audio, content-creation, media-generation |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
835514
6.33%40
5.26%11856
8.92%