from .model import load_silero_vad

		vad_model = load_silero_vad()
		print("silero_vad jit model Loaded")

		from .utils_vad import (get_speech_timestamps,
		save_audio,
		read_audio,
		VADIterator,
		collect_chunks,
		drop_chunks)


		def svad(filename , sampling_rate=16000 , min_speech_duration_ms=250 , max_speech_duration_s=float('inf'),min_silence_duration_ms=100):
		wav = read_audio(filename)
		speech_timestamps = get_speech_timestamps(
		wav,
		vad_model,
		return_seconds=True, # Return speech timestamps in seconds (default is samples)
		sampling_rate=sampling_rate,
		min_speech_duration_ms=min_speech_duration_ms,
		max_speech_duration_s=max_speech_duration_s,
		min_silence_duration_ms=min_silence_duration_ms,
		)
		return(speech_timestamps , wav)

iman/svad/data/__init__.py

+33

iman/svad/model.py

		from .utils_vad import init_jit_model, OnnxWrapper
		import torch
		torch.set_num_threads(1)
		import os


		def load_silero_vad(onnx=False, opset_version=16):
		available_ops = [16]
		if onnx and opset_version not in available_ops:
		raise Exception(f'Available ONNX opset_version: {available_ops}')

		if onnx:
		if opset_version == 16:
		model_name = 'silero_vad.onnx'
		else:
		model_name = f'silero_vad_16k_op{opset_version}.onnx'
		else:
		model_name = 'silero_vad.jit'


		current_dir = os.path.dirname(__file__)
		data_dir = os.path.join(current_dir, "data")
		model_file_path = os.path.join(data_dir, model_name)

		if not os.path.exists(model_file_path):
		raise FileNotFoundError(f"Model file not found: {model_file_path}")

		if onnx:
		model = OnnxWrapper(str(model_file_path), force_onnx_cpu=True)
		else:
		model = init_jit_model(model_file_path)

		return model

+619

iman/svad/utils_vad.py

		import torch
		import torchaudio
		from typing import Callable, List
		import warnings

		languages = ['ru', 'en', 'de', 'es']


		class OnnxWrapper():

		def __init__(self, path, force_onnx_cpu=False):
		import numpy as np
		global np
		import onnxruntime

		opts = onnxruntime.SessionOptions()
		opts.inter_op_num_threads = 1
		opts.intra_op_num_threads = 1

		if force_onnx_cpu and 'CPUExecutionProvider' in onnxruntime.get_available_providers():
		self.session = onnxruntime.InferenceSession(path, providers=['CPUExecutionProvider'], sess_options=opts)
		else:
		self.session = onnxruntime.InferenceSession(path, sess_options=opts)

		self.reset_states()
		if '16k' in path:
		warnings.warn('This model support only 16000 sampling rate!')
		self.sample_rates = [16000]
		else:
		self.sample_rates = [8000, 16000]

		def _validate_input(self, x, sr: int):
		if x.dim() == 1:
		x = x.unsqueeze(0)
		if x.dim() > 2:
		raise ValueError(f"Too many dimensions for input audio chunk {x.dim()}")

		if sr != 16000 and (sr % 16000 == 0):
		step = sr // 16000
		x = x[:,::step]
		sr = 16000

		if sr not in self.sample_rates:
		raise ValueError(f"Supported sampling rates: {self.sample_rates} (or multiply of 16000)")
		if sr / x.shape[1] > 31.25:
		raise ValueError("Input audio chunk is too short")

		return x, sr

		def reset_states(self, batch_size=1):
		self._state = torch.zeros((2, batch_size, 128)).float()
		self._context = torch.zeros(0)
		self._last_sr = 0
		self._last_batch_size = 0

		def __call__(self, x, sr: int):

		x, sr = self._validate_input(x, sr)
		num_samples = 512 if sr == 16000 else 256

		if x.shape[-1] != num_samples:
		raise ValueError(f"Provided number of samples is {x.shape[-1]} (Supported values: 256 for 8000 sample rate, 512 for 16000)")

		batch_size = x.shape[0]
		context_size = 64 if sr == 16000 else 32

		if not self._last_batch_size:
		self.reset_states(batch_size)
		if (self._last_sr) and (self._last_sr != sr):
		self.reset_states(batch_size)
		if (self._last_batch_size) and (self._last_batch_size != batch_size):
		self.reset_states(batch_size)

		if not len(self._context):
		self._context = torch.zeros(batch_size, context_size)

		x = torch.cat([self._context, x], dim=1)
		if sr in [8000, 16000]:
		ort_inputs = {'input': x.numpy(), 'state': self._state.numpy(), 'sr': np.array(sr, dtype='int64')}
		ort_outs = self.session.run(None, ort_inputs)
		out, state = ort_outs
		self._state = torch.from_numpy(state)
		else:
		raise ValueError()

		self._context = x[..., -context_size:]
		self._last_sr = sr
		self._last_batch_size = batch_size

		out = torch.from_numpy(out)
		return out

		def audio_forward(self, x, sr: int):
		outs = []
		x, sr = self._validate_input(x, sr)
		self.reset_states()
		num_samples = 512 if sr == 16000 else 256

		if x.shape[1] % num_samples:
		pad_num = num_samples - (x.shape[1] % num_samples)
		x = torch.nn.functional.pad(x, (0, pad_num), 'constant', value=0.0)

		for i in range(0, x.shape[1], num_samples):
		wavs_batch = x[:, i:i+num_samples]
		out_chunk = self.__call__(wavs_batch, sr)
		outs.append(out_chunk)

		stacked = torch.cat(outs, dim=1)
		return stacked.cpu()


		class Validator():
		def __init__(self, url, force_onnx_cpu):
		self.onnx = True if url.endswith('.onnx') else False
		torch.hub.download_url_to_file(url, 'inf.model')
		if self.onnx:
		import onnxruntime
		if force_onnx_cpu and 'CPUExecutionProvider' in onnxruntime.get_available_providers():
		self.model = onnxruntime.InferenceSession('inf.model', providers=['CPUExecutionProvider'])
		else:
		self.model = onnxruntime.InferenceSession('inf.model')
		else:
		self.model = init_jit_model(model_path='inf.model')

		def __call__(self, inputs: torch.Tensor):
		with torch.no_grad():
		if self.onnx:
		ort_inputs = {'input': inputs.cpu().numpy()}
		outs = self.model.run(None, ort_inputs)
		outs = [torch.Tensor(x) for x in outs]
		else:
		outs = self.model(inputs)

		return outs


		def read_audio(path: str,
		sampling_rate: int = 16000):
		list_backends = torchaudio.list_audio_backends()

		assert len(list_backends) > 0, 'The list of available backends is empty, please install backend manually. \
		\n Recommendations: \n \tSox (UNIX OS) \n \tSoundfile (Windows OS, UNIX OS) \n \tffmpeg (Windows OS, UNIX OS)'

		try:
		effects = [
		['channels', '1'],
		['rate', str(sampling_rate)]
		]

		wav, sr = torchaudio.sox_effects.apply_effects_file(path, effects=effects)
		except:
		wav, sr = torchaudio.load(path)

		if wav.size(0) > 1:
		wav = wav.mean(dim=0, keepdim=True)

		if sr != sampling_rate:
		transform = torchaudio.transforms.Resample(orig_freq=sr,
		new_freq=sampling_rate)
		wav = transform(wav)
		sr = sampling_rate

		assert sr == sampling_rate
		return wav.squeeze(0)


		def save_audio(path: str,
		tensor: torch.Tensor,
		sampling_rate: int = 16000):
		torchaudio.save(path, tensor.unsqueeze(0), sampling_rate, bits_per_sample=16)


		def init_jit_model(model_path: str,
		device=torch.device('cpu')):
		model = torch.jit.load(model_path, map_location=device)
		model.eval()
		return model


		def make_visualization(probs, step):
		import pandas as pd
		pd.DataFrame({'probs': probs},
		index=[x * step for x in range(len(probs))]).plot(figsize=(16, 8),
		kind='area', ylim=[0, 1.05], xlim=[0, len(probs) * step],
		xlabel='seconds',
		ylabel='speech probability',
		colormap='tab20')


		@torch.no_grad()
		def get_speech_timestamps(audio: torch.Tensor,
		model,
		threshold: float = 0.5,
		sampling_rate: int = 16000,
		min_speech_duration_ms: int = 250,
		max_speech_duration_s: float = float('inf'),
		min_silence_duration_ms: int = 100,
		speech_pad_ms: int = 30,
		return_seconds: bool = False,
		time_resolution: int = 1,
		visualize_probs: bool = False,
		progress_tracking_callback: Callable[[float], None] = None,
		neg_threshold: float = None,
		window_size_samples: int = 512,
		min_silence_at_max_speech: float = 98,
		use_max_poss_sil_at_max_speech: bool = True):

		"""
		This method is used for splitting long audios into speech chunks using silero VAD

		Parameters
		----------
		audio: torch.Tensor, one dimensional
		One dimensional float torch.Tensor, other types are casted to torch if possible

		model: preloaded .jit/.onnx silero VAD model

		threshold: float (default - 0.5)
		Speech threshold. Silero VAD outputs speech probabilities for each audio chunk, probabilities ABOVE this value are considered as SPEECH.
		It is better to tune this parameter for each dataset separately, but "lazy" 0.5 is pretty good for most datasets.

		sampling_rate: int (default - 16000)
		Currently silero VAD models support 8000 and 16000 (or multiply of 16000) sample rates

		min_speech_duration_ms: int (default - 250 milliseconds)
		Final speech chunks shorter min_speech_duration_ms are thrown out

		max_speech_duration_s: int (default - inf)
		Maximum duration of speech chunks in seconds
		Chunks longer than max_speech_duration_s will be split at the timestamp of the last silence that lasts more than 100ms (if any), to prevent agressive cutting.
		Otherwise, they will be split aggressively just before max_speech_duration_s.

		min_silence_duration_ms: int (default - 100 milliseconds)
		In the end of each speech chunk wait for min_silence_duration_ms before separating it

		speech_pad_ms: int (default - 30 milliseconds)
		Final speech chunks are padded by speech_pad_ms each side

		return_seconds: bool (default - False)
		whether return timestamps in seconds (default - samples)

		time_resolution: bool (default - 1)
		time resolution of speech coordinates when requested as seconds

		visualize_probs: bool (default - False)
		whether draw prob hist or not

		progress_tracking_callback: Callable[[float], None] (default - None)
		callback function taking progress in percents as an argument

		neg_threshold: float (default = threshold - 0.15)
		Negative threshold (noise or exit threshold). If model's current state is SPEECH, values BELOW this value are considered as NON-SPEECH.

		min_silence_at_max_speech: float (default - 98ms)
		Minimum silence duration in ms which is used to avoid abrupt cuts when max_speech_duration_s is reached

		use_max_poss_sil_at_max_speech: bool (default - True)
		Whether to use the maximum possible silence at max_speech_duration_s or not. If not, the last silence is used.

		window_size_samples: int (default - 512 samples)
		!!! DEPRECATED, DOES NOTHING !!!

		Returns
		----------
		speeches: list of dicts
		list containing ends and beginnings of speech chunks (samples or seconds based on return_seconds)
		"""
		if not torch.is_tensor(audio):
		try:
		audio = torch.Tensor(audio)
		except:
		raise TypeError("Audio cannot be casted to tensor. Cast it manually")

		if len(audio.shape) > 1:
		for i in range(len(audio.shape)): # trying to squeeze empty dimensions
		audio = audio.squeeze(0)
		if len(audio.shape) > 1:
		raise ValueError("More than one dimension in audio. Are you trying to process audio with 2 channels?")

		if sampling_rate > 16000 and (sampling_rate % 16000 == 0):
		step = sampling_rate // 16000
		sampling_rate = 16000
		audio = audio[::step]
		warnings.warn('Sampling rate is a multiply of 16000, casting to 16000 manually!')
		else:
		step = 1

		if sampling_rate not in [8000, 16000]:
		raise ValueError("Currently silero VAD models support 8000 and 16000 (or multiply of 16000) sample rates")

		window_size_samples = 512 if sampling_rate == 16000 else 256
		hop_size_samples = int(window_size_samples)

		model.reset_states()
		min_speech_samples = sampling_rate * min_speech_duration_ms / 1000
		speech_pad_samples = sampling_rate * speech_pad_ms / 1000
		max_speech_samples = sampling_rate * max_speech_duration_s - window_size_samples - 2 * speech_pad_samples
		min_silence_samples = sampling_rate * min_silence_duration_ms / 1000
		min_silence_samples_at_max_speech = sampling_rate * min_silence_at_max_speech / 1000

		audio_length_samples = len(audio)

		speech_probs = []
		for current_start_sample in range(0, audio_length_samples, hop_size_samples):
		chunk = audio[current_start_sample: current_start_sample + window_size_samples]
		if len(chunk) < window_size_samples:
		chunk = torch.nn.functional.pad(chunk, (0, int(window_size_samples - len(chunk))))
		try:
		speech_prob = model(chunk, sampling_rate).item()
		except Exception as e:
		import ipdb; ipdb.set_trace()
		speech_probs.append(speech_prob)
		# caculate progress and seng it to callback function
		progress = current_start_sample + hop_size_samples
		if progress > audio_length_samples:
		progress = audio_length_samples
		progress_percent = (progress / audio_length_samples) * 100
		if progress_tracking_callback:
		progress_tracking_callback(progress_percent)

		triggered = False
		speeches = []
		current_speech = {}

		if neg_threshold is None:
		neg_threshold = max(threshold - 0.15, 0.01)
		temp_end = 0 # to save potential segment end (and tolerate some silence)
		prev_end = next_start = 0 # to save potential segment limits in case of maximum segment size reached
		possible_ends = []

		for i, speech_prob in enumerate(speech_probs):
		if (speech_prob >= threshold) and temp_end:
		if temp_end != 0:
		sil_dur = (hop_size_samples * i) - temp_end
		if sil_dur > min_silence_samples_at_max_speech:
		possible_ends.append((temp_end, sil_dur))
		temp_end = 0
		if next_start < prev_end:
		next_start = hop_size_samples * i

		if (speech_prob >= threshold) and not triggered:
		triggered = True
		current_speech['start'] = hop_size_samples * i
		continue

		if triggered and (hop_size_samples * i) - current_speech['start'] > max_speech_samples:
		if possible_ends:
		if use_max_poss_sil_at_max_speech:
		prev_end, dur = max(possible_ends, key=lambda x: x[1]) # use the longest possible silence segment in the current speech chunk
		else:
		prev_end, dur = possible_ends[-1] # use the last possible silence segement
		current_speech['end'] = prev_end
		speeches.append(current_speech)
		current_speech = {}
		next_start = prev_end + dur
		if next_start < prev_end + hop_size_samples * i: # previously reached silence (< neg_thres) and is still not speech (< thres)
		#triggered = False
		current_speech['start'] = next_start
		else:
		triggered = False
		#current_speech['start'] = next_start
		prev_end = next_start = temp_end = 0
		possible_ends = []
		else:
		current_speech['end'] = hop_size_samples * i
		speeches.append(current_speech)
		current_speech = {}
		prev_end = next_start = temp_end = 0
		triggered = False
		possible_ends = []
		continue

		if (speech_prob < neg_threshold) and triggered:
		if not temp_end:
		temp_end = hop_size_samples * i
		# if ((hop_size_samples * i) - temp_end) > min_silence_samples_at_max_speech: # condition to avoid cutting in very short silence
		# prev_end = temp_end
		if (hop_size_samples * i) - temp_end < min_silence_samples:
		continue
		else:
		current_speech['end'] = temp_end
		if (current_speech['end'] - current_speech['start']) > min_speech_samples:
		speeches.append(current_speech)
		current_speech = {}
		prev_end = next_start = temp_end = 0
		triggered = False
		possible_ends = []
		continue

		if current_speech and (audio_length_samples - current_speech['start']) > min_speech_samples:
		current_speech['end'] = audio_length_samples
		speeches.append(current_speech)

		for i, speech in enumerate(speeches):
		if i == 0:
		speech['start'] = int(max(0, speech['start'] - speech_pad_samples))
		if i != len(speeches) - 1:
		silence_duration = speeches[i+1]['start'] - speech['end']
		if silence_duration < 2 * speech_pad_samples:
		speech['end'] += int(silence_duration // 2)
		speeches[i+1]['start'] = int(max(0, speeches[i+1]['start'] - silence_duration // 2))
		else:
		speech['end'] = int(min(audio_length_samples, speech['end'] + speech_pad_samples))
		speeches[i+1]['start'] = int(max(0, speeches[i+1]['start'] - speech_pad_samples))
		else:
		speech['end'] = int(min(audio_length_samples, speech['end'] + speech_pad_samples))

		if return_seconds:
		audio_length_seconds = audio_length_samples / sampling_rate
		for speech_dict in speeches:
		speech_dict['start'] = max(round(speech_dict['start'] / sampling_rate, time_resolution), 0)
		speech_dict['end'] = min(round(speech_dict['end'] / sampling_rate, time_resolution), audio_length_seconds)
		elif step > 1:
		for speech_dict in speeches:
		speech_dict['start'] *= step
		speech_dict['end'] *= step

		if visualize_probs:
		make_visualization(speech_probs, hop_size_samples / sampling_rate)

		return speeches


		class VADIterator:
		def __init__(self,
		model,
		threshold: float = 0.5,
		sampling_rate: int = 16000,
		min_silence_duration_ms: int = 100,
		speech_pad_ms: int = 30
		):

		"""
		Class for stream imitation

		Parameters
		----------
		model: preloaded .jit/.onnx silero VAD model

		threshold: float (default - 0.5)
		Speech threshold. Silero VAD outputs speech probabilities for each audio chunk, probabilities ABOVE this value are considered as SPEECH.
		It is better to tune this parameter for each dataset separately, but "lazy" 0.5 is pretty good for most datasets.

		sampling_rate: int (default - 16000)
		Currently silero VAD models support 8000 and 16000 sample rates

		min_silence_duration_ms: int (default - 100 milliseconds)
		In the end of each speech chunk wait for min_silence_duration_ms before separating it

		speech_pad_ms: int (default - 30 milliseconds)
		Final speech chunks are padded by speech_pad_ms each side
		"""

		self.model = model
		self.threshold = threshold
		self.sampling_rate = sampling_rate

		if sampling_rate not in [8000, 16000]:
		raise ValueError('VADIterator does not support sampling rates other than [8000, 16000]')

		self.min_silence_samples = sampling_rate * min_silence_duration_ms / 1000
		self.speech_pad_samples = sampling_rate * speech_pad_ms / 1000
		self.reset_states()

		def reset_states(self):

		self.model.reset_states()
		self.triggered = False
		self.temp_end = 0
		self.current_sample = 0

		@torch.no_grad()
		def __call__(self, x, return_seconds=False, time_resolution: int = 1):
		"""
		x: torch.Tensor
		audio chunk (see examples in repo)

		return_seconds: bool (default - False)
		whether return timestamps in seconds (default - samples)

		time_resolution: int (default - 1)
		time resolution of speech coordinates when requested as seconds
		"""

		if not torch.is_tensor(x):
		try:
		x = torch.Tensor(x)
		except:
		raise TypeError("Audio cannot be casted to tensor. Cast it manually")

		window_size_samples = len(x[0]) if x.dim() == 2 else len(x)
		self.current_sample += window_size_samples

		speech_prob = self.model(x, self.sampling_rate).item()

		if (speech_prob >= self.threshold) and self.temp_end:
		self.temp_end = 0

		if (speech_prob >= self.threshold) and not self.triggered:
		self.triggered = True
		speech_start = max(0, self.current_sample - self.speech_pad_samples - window_size_samples)
		return {'start': int(speech_start) if not return_seconds else round(speech_start / self.sampling_rate, time_resolution)}

		if (speech_prob < self.threshold - 0.15) and self.triggered:
		if not self.temp_end:
		self.temp_end = self.current_sample
		if self.current_sample - self.temp_end < self.min_silence_samples:
		return None
		else:
		speech_end = self.temp_end + self.speech_pad_samples - window_size_samples
		self.temp_end = 0
		self.triggered = False
		return {'end': int(speech_end) if not return_seconds else round(speech_end / self.sampling_rate, time_resolution)}

		return None


		def collect_chunks(tss: List[dict],
		wav: torch.Tensor,
		seconds: bool = False,
		sampling_rate: int = None) -> torch.Tensor:
		"""Collect audio chunks from a longer audio clip

		This method extracts audio chunks from an audio clip, using a list of
		provided coordinates, and concatenates them together. Coordinates can be
		passed either as sample numbers or in seconds, in which case the audio
		sampling rate is also needed.

		Parameters
		----------
		tss: List[dict]
		Coordinate list of the clips to collect from the audio.
		wav: torch.Tensor, one dimensional
		One dimensional float torch.Tensor, containing the audio to clip.
		seconds: bool (default - False)
		Whether input coordinates are passed as seconds or samples.
		sampling_rate: int (default - None)
		Input audio sampling rate. Required if seconds is True.

		Returns
		-------
		torch.Tensor, one dimensional
		One dimensional float torch.Tensor of the concatenated clipped audio
		chunks.

		Raises
		------
		ValueError
		Raised if sampling_rate is not provided when seconds is True.

		"""
		if seconds and not sampling_rate:
		raise ValueError('sampling_rate must be provided when seconds is True')

		chunks = list()
		_tss = _seconds_to_samples_tss(tss, sampling_rate) if seconds else tss

		for i in _tss:
		chunks.append(wav[i['start']:i['end']])

		return torch.cat(chunks)


		def drop_chunks(tss: List[dict],
		wav: torch.Tensor,
		seconds: bool = False,
		sampling_rate: int = None) -> torch.Tensor:
		"""Drop audio chunks from a longer audio clip

		This method extracts audio chunks from an audio clip, using a list of
		provided coordinates, and drops them. Coordinates can be passed either as
		sample numbers or in seconds, in which case the audio sampling rate is also
		needed.

		Parameters
		----------
		tss: List[dict]
		Coordinate list of the clips to drop from from the audio.
		wav: torch.Tensor, one dimensional
		One dimensional float torch.Tensor, containing the audio to clip.
		seconds: bool (default - False)
		Whether input coordinates are passed as seconds or samples.
		sampling_rate: int (default - None)
		Input audio sampling rate. Required if seconds is True.

		Returns
		-------
		torch.Tensor, one dimensional
		One dimensional float torch.Tensor of the input audio minus the dropped
		chunks.

		Raises
		------
		ValueError
		Raised if sampling_rate is not provided when seconds is True.

		"""
		if seconds and not sampling_rate:
		raise ValueError('sampling_rate must be provided when seconds is True')

		chunks = list()
		cur_start = 0

		_tss = _seconds_to_samples_tss(tss, sampling_rate) if seconds else tss

		for i in _tss:
		chunks.append((wav[cur_start: i['start']]))
		cur_start = i['end']

		return torch.cat(chunks)


		def _seconds_to_samples_tss(tss: List[dict], sampling_rate: int) -> List[dict]:
		"""Convert coordinates expressed in seconds to sample coordinates.
		"""
		return [{
		'start': round(crd['start']) * sampling_rate,
		'end': round(crd['end']) * sampling_rate
		} for crd in tss]

+272

-210

iman.egg-info/PKG-INFO

		Metadata-Version: 2.1
		Name: iman
		Version: 1.0.28
		Version: 2.0
		Summary: Python package for daily Tasks
		@@ -12,328 +12,390 @@ Author: Iman Sarraf

		from iman import *
		==================
		iman
		====

		1-plt
		Overview
		--------

		2-now() ``get time``
		``iman`` is a comprehensive Python package offering a wide array of utilities for audio processing, file manipulation, machine learning, system operations, web utilities, and more. It provides tools for tasks such as audio feature extraction, voice activity detection, file I/O, system monitoring, and integration with frameworks like PyTorch and TensorFlow. The package is organized into multiple submodules, each designed for specific functionalities, as detailed below.

		3-F ``format floating point``
		Installation
		------------

		4-D ``format int number``
		Install ``iman`` via pip:

		5-Write_List(MyList,Filename)
		.. code-block:: bash

		6-Write_Dic(MyDic,Filename)
		pip install iman

		7-Read(Filename) ``read txt file``
		Ensure dependencies like ``numpy``, ``torch``, ``tensorflow``, ``speechbrain``, ``librosa``, ``matplotlib``, ``pandas``, and external tools like ``ffmpeg``, ``ffprobe``, and ``WinRAR`` are installed. Some functions require pre-trained models or specific paths (e.g., model files, ``ffmpeg_path``).

		8-Read_Lines(Filename) ``read txt file line by line and return list``
		Usage
		-----

		9-Write(_str,Filename)
		Below are examples of key functionalities from the ``iman`` package. For detailed function signatures and parameters, refer to the sections below or use the built-in help system:

		10-gf(pattern) ``Get files in a directory``
		.. code-block:: python

		11-gfa(directory_pattern , ext=".") ``Get Files in a Directory and SubDirectories``
		from iman import examples
		examples.help("Audio") # Get help on a specific module

		12-ReadE(Filename) ``Read Excel files``
		Example: Audio Processing

		13-PM(dir) ``creat directory``
		.. code-block:: python

		14-PB(fname) ``get basename``
		from iman import Audio

		15-PN(fname) ``get file name``
		# Read a WAV file
		data, sr = Audio.Read("audio.wav", sr=16000, start_from=0, dur=None, mono=True, ffmpeg_path="c:\\ffmpeg.exe", ffprobe_path="c:\\ffprobe.exe")

		16-PE(fname) ``get ext``
		# Resample and write audio
		resampled = Audio.Resample(data, fs=sr, sr=8000)
		Audio.Write("output.wav", resampled, fs=8000)

		17-PD(fname) ``get directory``
		Example: File Operations

		18-PS(fname) ``get size``
		.. code-block:: python

		19-PJ(segments) ``Join Path``
		from iman import *

		20-clear() ``clear cmd``
		# Get files matching a pattern
		files = gf("*.txt")

		21-os
		# Write a dictionary to a file
		my_dict = {"key1": "value1", "key2": "value2"}
		Write_Dic(my_dict, "output.txt")

		22-np
		Example: VAD with Segmenter

		23-RI(start_int , end_int , count=1) ``random int``
		.. code-block:: python

		24-RF(start_float , end_float , count=1) ``random float``
		from iman.sad_torch_mfcc import Segmenter

		25-RS(Arr) ``shuffle``
		seg = Segmenter(batch_size=32, vad_type="vad", sr=8000, model_path="c:\\sad_model_pytorch.pth", tq=1, ffmpeg_path="c:\\ffmpeg.exe", complete_output=False, device="cuda", input_type="file")
		isig, wav, mfcc = seg("audio.wav")

		26-LJ(job_file_name)
		Modules and Functions
		---------------------

		27-SJ(value , job_file_name)
		The ``iman`` package is organized into several submodules, each with specific functions. Below is a complete list of modules and their functions as provided.

		28-LN(np_file_name)
		iman
		~~~~

		29-SN(arr , np_file_name)
		- ``plt``: Matplotlib plotting library.
		- ``now()``: Get current time.
		- ``F``: Format floating-point number.
		- ``D``: Format integer number.
		- ``Write_List(MyList, Filename)``: Write a list to a text file.
		- ``Write_Dic(MyDic, Filename)``: Write a dictionary to a text file.
		- ``Read(Filename)``: Read a text file.
		- ``Read_Lines(Filename)``: Read a text file line by line and return a list.
		- ``Write(_str, Filename)``: Write a string to a text file.
		- ``gf(pattern)``: Get files in a directory matching a pattern.
		- ``gfa(directory_pattern, ext=".")``: Get files in a directory and subdirectories.
		- ``ReadE(Filename)``: Read Excel files.
		- ``PM(dir)``: Create a directory.
		- ``PB(fname)``: Get basename of a file.
		- ``PN(fname)``: Get filename without path.
		- ``PE(fname)``: Get file extension.
		- ``PD(fname)``: Get directory of a file.
		- ``PS(fname)``: Get file size.
		- ``PJ(segments)``: Join path segments.
		- ``clear()``: Clear command-line interface.
		- ``os``: Python os module.
		- ``np``: NumPy module.
		- ``RI(start_int, end_int, count=1)``: Generate random integers.
		- ``RF(start_float, end_float, count=1)``: Generate random floats.
		- ``RS(Arr)``: Shuffle an array.
		- ``LJ(job_file_name)``: Load job file (details not specified).
		- ``SJ(value, job_file_name)``: Save job file (details not specified).
		- ``LN(np_file_name)``: Load NumPy file (details not specified).
		- ``SN(arr, np_file_name)``: Save NumPy array to file.
		- ``cmd(command, redirect=True)``: Run a command in CMD.
		- ``PX(fname)``: Check existence of a file.
		- ``RC(Arr, size=1)``: Random choice from an array.
		- ``onehot(data, nb_classes)``: Convert data to one-hot encoding.
		- ``exe(pyfile)``: Convert Python file to executable (requires PyInstaller).
		- ``FWL(wavfolder, sr)``: Get total audio length in a folder.
		- ``norm(vector)``: Normalize a vector (vector/magnitude(vector)).
		- ``delete(pattern)``: Delete files matching a pattern.
		- ``rename(fname, fout)``: Rename a file.
		- ``separate(pattern, folout)``: Separate vocal from music.
		- ``dll(fname)``: Create a .pyd file from a Python file.
		- ``get_hard_serial()``: Get hardware serial number.
		- ``mute_mic()``: Toggle microphone on/off.
		- ``PA(fname)``: Get absolute path of a file.

		30-cmd(command , redirect=True) ``Run command in CMD``
		iman.Audio
		~~~~~~~~~~

		31-PX(fname) ``check existance of file``
		- ``Read(filename, sr, start_from, dur, mono, ffmpeg_path, ffprobe_path)``: Read WAV, ALAW, MP3, and other audio formats.
		- ``Resample(data, fs, sr)``: Resample audio data.
		- ``Write(filename, data, fs)``: Write audio data to a file.
		- ``frame(y)``: Frame audio data (details not specified).
		- ``split(y)``: Split audio data (details not specified).
		- ``ReadT(filename, sr, mono=True)``: Read and resample WAV file with torchaudio.
		- ``VAD(y, top_db=40, frame_length=200, hop_length=80)``: Voice activity detection.
		- ``compress(fname_pattern, sr=16000, ext='mp3', mono=True, ffmpeg_path='c:\\ffmpeg.exe', ofolder=None, worker=4)``: Compress audio files.
		- ``clip_value(wav)``: Return clipping percentage in an audio file.
		- ``WriteS(filename, data, fs)``: Convert and write audio to stereo.

		32-RC(Arr , size=1) ``Random Choice``
		iman.info
		~~~~~~~~~

		33-onehot(data, nb_classes)
		- ``get()``: Get information about CPU and GPU (requires torch).
		- ``cpu()``: Get CPU percentage usage.
		- ``gpu()``: Get GPU memory usage.
		- ``memory()``: Get RAM usage in GB.
		- ``plot(fname="log.txt", delay=1)``: Plot system metrics from a log file.

		34-exe(pyfile) ``need pyinstaller``
		iman.metrics
		~~~~~~~~~~~~

		35-FWL(wavfolder , sr) ``Get Folder Audio Length``
		- ``EER(lab, score)``: Compute Equal Error Rate.
		- ``cosine_distance(v1, v2)``: Compute cosine distance between two vectors.
		- ``roc(lab, score)``: Compute ROC curve.
		- ``wer(ref, hyp)``: Compute Word Error Rate.
		- ``cer(ref, hyp)``: Compute Character Error Rate.
		- ``wer_list(ref_list, hyp_list)``: Compute WER for lists.
		- ``cer_list(ref_list, hyp_list)``: Compute CER for lists.
		- ``DER(ref_list, res_list, file_dur=-1, sr=8000)``: Compute Detection Error Rate.

		36-norm(vector) ``vector/magnitude(vector)``
		iman.tsne
		~~~~~~~~~

		37-delete(pattern)
		- ``plot(fea, label)``: Plot t-SNE visualization of features.

		38-rename(fname , fout)
		iman.xvector
		~~~~~~~~~~~~

		39-separate(pattern,folout) ``separate vocal from music``
		- ``xvec, lda_xvec, gender = get(filename, model(model_path, model_name, model_speaker_num))``: Extract x-vectors for speaker recognition.

		40-dll(fname) ``create a pyd file from py file``
		iman.web
		~~~~~~~~

		41-get_hard_serial()
		- ``change_wallpaper()``: Change system wallpaper.
		- ``dl(url)``: Download a file from a URL.
		- ``links(url, filter_text=None)``: Extract links from a URL.
		- ``imgs(url, filter_text=None)``: Extract images from a URL.

		42-mute_mic() ``on and off microphone``
		iman.matlab
		~~~~~~~~~~~

		43-PA(fname) ``get abs path``
		- ``np2mat(param, mat_file_name)``: Convert NumPy array to MATLAB file.
		- ``dic2mat(param, mat_file_name)``: Convert dictionary to MATLAB file.
		- ``mat2dic(mat_file_name)``: Convert MATLAB file to dictionary.

		from iman import Audio
		======================
		1-Read(filename,sr,start_from,dur,mono,ffmpeg_path,ffprobe_path) ``Read wav alaw and mp3 and others``
		iman.Features
		~~~~~~~~~~~~~

		2-Resample(data , fs, sr)
		- ``mfcc_fea, mspec, log_energy = mfcc.SB.Get(wav, sample_rate)``: Compute MFCC with SpeechBrain (input must be read with torchaudio).
		- ``mfcc.SB.Normal(MFCC)``: Mean-variance normalization of MFCC with SpeechBrain.
		- ``mfcc_fea, log_energy = mfcc.LS.Get(wav, sample_rate, le=False)``: Compute MFCC with Librosa (input is NumPy array).
		- ``mfcc.LS.Normal(MFCC, win_len=150)``: Mean-variance normalization (local, 150 frames left and right).

		3-Write(filename, data ,fs)
		iman.AUG
		~~~~~~~~

		4-frame(y)
		- ``Add_Noise(data, noise, snr)``: Add noise to audio data.
		- ``Add_Reverb(data, rir)``: Add reverberation to audio data.
		- ``Add_NoiseT(data, noise, snr)``: Add noise using torchaudio.
		- ``Add_ReverbT(data, rir)``: Add reverberation using torchaudio.
		- ``mp3(fname, fout, sr_out, ratio, ffmpeg_path='c:\\ffmpeg.exe')``: Convert to MP3.
		- ``speed(fname, fout, ratio, ffmpeg_path='c:\\ffmpeg.exe')``: Change audio speed.
		- ``volume(fname, fout, ratio, ffmpeg_path='c:\\ffmpeg.exe')``: Adjust audio volume.

		5-split(y)
		iman.sad_torch_mfcc \| iman.sad_tf
		~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

		6-ReadT(filename, sr , mono=True) ``Read and resample wav file with torchaudio``
		- Initializer (PyTorch):

		7-VAD(y,top_db=40, frame_length=200, hop_length=80)
		.. code-block:: python

		8-compress(fname_pattern , sr=16000 , ext='mp3' , mono=True ,ffmpeg_path='c:\\ffmpeg.exe' , ofolder=None, worker=4)
		seg = Segmenter(batch_size, vad_type=['sad'\|'vad'], sr=8000, model_path="c:\\sad_model_pytorch.pth", tq=1, ffmpeg_path='c:\\ffmpeg.exe', complete_output=False, device='cuda', input_type='file')

		9-clip_value(wav) ``return clipping percentage in audio file``
		- Initializer (TensorFlow):

		10-WriteS(filename, data ,fs) ``Convert to Sterio``
		.. code-block:: python

		from iman import info
		=====================
		seg = Segmenter(batch_size, vad_type=['sad'\|'vad'], sr=16000, model_path="c:\\keras_speech_music_noise_cnn.hdf5", gender_path="c:\\keras_male_female_cnn.hdf5", ffmpeg_path='c:\\ffmpeg.exe', detect_gender=False, complete_output=False, device='cuda', input_type='file')

		1-get() info about cpu and gpu ``need torch``
		- ``isig, wav, mfcc = seg(fname)``: Process audio file (MFCC output only in PyTorch model).
		- ``nmfcc = filter_fea(isig, mfcc, sr, max_time)``: Filter features (PyTorch only).
		- ``mfcc = MVN(mfcc)``: Mean-variance normalization (PyTorch only).
		- ``isig = filter_output(isig, max_silence, ignore_small_speech_segments, max_speech_len, split_speech_bigger_than)``: Filter output when ``complete_output=False``.
		- ``seg2aud(isig, filename)``: Convert segments to audio.
		- ``seg2json(isig)``: Convert segments to JSON.
		- ``seg2Gender_Info(isig)``: Extract gender information from segments.
		- ``seg2Info(isig)``: Extract segment information.
		- ``wav_speech, wav_noise = filter_sig(isig, wav, sr)``: Get speech and noise parts (when ``complete_output=False``).

		2-cpu() ``get cpu percentage usage``
		- sad_tf.segmentero:

		3-gpu() ``get gpu memory usage``
		.. code-block:: python

		4-memory() ``get ram usage GB``
		from sad_tf.segmentero import Segmenter # Use ONNX models (requires onnxruntime)

		5-plot(fname="log.txt" , delay=1)
		iman.sad_torch_mfcc_speaker
		~~~~~~~~~~~~~~~~~~~~~~~~~~~

		- Initializer:

		from iman import metrics
		========================
		1-EER(lab,score)
		.. code-block:: python

		2-cosine_distance(v1,v2)
		seg = Segmenter(batch_size, vad_type=['sad'\|'vad'], sr=8000, model_path="c:\\sad_model_pytorch.pth", max_time=120, tq=1, ffmpeg_path='c:\\ffmpeg.exe', device='cuda', pad=False)

		3-roc(lab,score)
		- ``mfcc, len(sec) = seg(fname)``: Process audio file, MFCC padded to ``max_time`` if ``pad=True``.

		4-wer(ref, hyp)
		iman.sad_tf_mlp_speaker
		~~~~~~~~~~~~~~~~~~~~~~~

		5-cer(ref, hyp)
		- Initializer:

		6-wer_list(ref_list , hyp_list)
		.. code-block:: python

		7-cer_list(ref_list , hyp_list)
		seg = Segmenter(batch_size, vad_type=['sad'\|'vad'], sr=8000, model_path="sad_tf_mlp.h5", max_time=120, tq=1, ffmpeg_path='c:\\ffmpeg.exe', device='cuda', pad=False)

		8-DER(ref_list , res_list , file_dur=-1 , sr=8000) ``Detection Error Rate``
		- ``mfcc, len(sec) = seg(fname)``: Process audio file, MFCC padded to ``max_time`` if ``pad=True``.

		from iman import tsne
		=====================
		iman.Report
		~~~~~~~~~~~

		1-plot(fea , label)
		- Initializer:

		from iman import xvector
		========================
		1-xvec,lda_xvec,gender = get(filename , model(model_path , model_name , model_speaker_num))
		.. code-block:: python

		r = Report.rep(log_dir=None)

		from iman import web
		====================
		1-change_wallpaper()
		- ``WS(_type, _name, value, itr)``: Add scalar to TensorBoard.
		- ``WT(_type, _name, _str, itr)``: Add text to TensorBoard.
		- ``WG(pytorch_model, example_input)``: Add graph to TensorBoard.
		- ``WI(_type, _name, images, itr)``: Add image to TensorBoard.

		2-dl(url)
		iman.par
		~~~~~~~~

		3-links(url , filter_text=None)
		- Parallel Processing:

		4-imgs(url , filter_text=None)
		.. code-block:: python

		from iman import matlab
		=======================
		1-np2mat(param , mat_file_name)
		if __name__ == '__main__':
		res = par.par(files, func, worker=4, args=[]) # func defined as: def func(fname, _args): ...

		2-dic2mat(param , mat_file_name)
		iman.Image
		~~~~~~~~~~

		3-mat2dic (mat_file_name)
		- ``Image.convert(fname_pattern, ext='jpg', ofolder=None, w=-1, h=-1, level=100, worker=4, ffmpeg_path='c:\\ffmpeg.exe')``: Convert images to specified format.
		- ``Image.resize(fname_pattern, ext='jpg', ofolder=None, w=2, h=2, worker=4, ffmpeg_path='c:\\ffmpeg.exe')``: Resize images to 1/w and 1/h.

		from iman import Features
		=========================
		1- mfcc_fea,mspec,log_energy = mfcc.SB.Get(wav,sample_rate) ``Compute MFCC with speechbrain - input must read with torchaudio``
		iman.Boors
		~~~~~~~~~~

		2-mfcc.SB.Normal(MFCC) ``Mean Var Normalization Utt with speechbrain``
		- ``Boors.get(sahm)``: Get stock information.

		3- mfcc_fea,log_energy = mfcc.LS.Get(wav,sample_rate,le=False) ``Compute MFCC with Librosa - input is numpy array``
		iman.Text
		~~~~~~~~~

		4-mfcc.LS.Normal(MFCC , win_len=150) ``Mean Var Normalization Local 150 left and 150 right``
		- Initializer:

		from iman import AUG
		====================
		1-Add_Noise(data , noise , snr)
		.. code-block:: python

		2-Add_Reverb( data , rir)
		norm = Text.normal("c:\\Replace_List.txt")

		3-Add_NoiseT(data , noise , snr) ``(torchaudio)``
		- ``norm.rep(str)``: Replace text based on normalization rules.
		- ``norm.from_file(filename, file_out=None)``: Normalize text from a file.

		4-Add_ReverbT( data , rir) ``(torchaudio)``
		iman.num2fa
		~~~~~~~~~~~

		5-mp3(fname , fout,sr_out,ratio,ffmpeg_path='c:\\ffmpeg.exe')
		- ``words(number)``: Convert number to Persian words.

		6-speed(fname,fout,ratio,ffmpeg_path='c:\\ffmpeg.exe')
		iman.examples
		~~~~~~~~~~~~~

		7-volume(fname ,fout,ratio,ffmpeg_path='c:\\ffmpeg.exe')
		- ``examples.items``: Get items in the examples folder.
		- ``examples.help(topic)``: Get help on a specific topic.

		from iman.[sad_torch_mfcc \| sad_tf] import *
		===============================================================================
		seg = Segmenter(batch_size, vad_type=['sad'\|'vad'] , sr=8000 , model_path="c:\\sad_model_pytorch.pth" , tq=1,ffmpeg_path='c:\\ffmpeg.exe',complete_output=False , device='cuda',input_type='file') ``TORCH``
		iman.Rar
		~~~~~~~~

		seg = Segmenter(batch_size, vad_type=['sad'\|'vad'] , sr=16000 , model_path="c:\\keras_speech_music_noise_cnn.hdf5",gender_path="c:\\keras_male_female_cnn.hdf5",ffmpeg_path='c:\\ffmpeg.exe',detect_gender=False,complete_output=False,device='cuda',input_type='file') ``TensorFlow``
		- ``rar(fname, out="", rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")``: Create RAR archive.
		- ``zip(fname, out="", rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")``: Create ZIP archive.
		- ``unrar(fname, out="", rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")``: Extract RAR archive.
		- ``unzip(fname, out="", rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")``: Extract ZIP archive.

		isig,wav,mfcc = seg(fname) ``mfcc output Just in torch model``
		iman.Enhance
		~~~~~~~~~~~~

		nmfcc = filter_fea(isig , mfcc , sr , max_time) ``Just in torch model``
		- ``Enhance.Dereverb(pattern, out_fol, sr=16000, batchsize=16, device="cuda", model_path=r"C:\\UVR-DeEcho-DeReverb.pth")``: Dereverberate audio files.
		- ``Enhance.Denoise(pattern, out_fol, sr=16000, batchsize=16, device="cuda", model_path=r"C:\\UVR-DeNoise-Lite.pth")``: Denoise audio files.

		mfcc = MVN(mfcc) ``Just in torch model``
		iman.tf
		~~~~~~~

		isig = filter_output(isig , max_silence ,ignore_small_speech_segments , max_speech_len ,split_speech_bigger_than) ``Do when complete_output=False``
		- ``flops(model)``: Get FLOPs of a TensorFlow model.
		- ``param(model)``: Get parameter count of a TensorFlow model.
		- ``paramp(model)``: Get parameter count and print model layers.
		- ``gpu()``: Return True if GPU is available.
		- ``gpun()``: Return number of GPUs.
		- ``limit()``: Limit GPU memory allocation for TensorFlow models.

		seg2aud(isig , filename)

		seg2json(isig)
		iman.torch
		~~~~~~~~~~

		seg2Gender_Info(isig)
		- ``param(model)``: Get parameter and trainable count of a PyTorch model.
		- ``paramp(model)``: Get parameter count and print model layers.
		- ``layers(model)``: Get layers of a PyTorch model.
		- ``gpu()``: Return True if GPU is available.
		- ``gpun()``: Return number of GPUs.

		seg2Info(isig)
		iman.yt
		~~~~~~~

		wav_speech , wav_noise = filter_sig(isig , wav , sr) ``Get Speech and Noise Parts of file - Do when complete_output=False``
		- ``dl(url)``: Download a YouTube video.
		- ``list_formats(url)``: List available formats for a YouTube link.

		from sad_tf.segmentero import Segmenter ``to use onnx models - need to install onnxruntime``
		iman.svad
		~~~~~~~~~

		from iman.sad_torch_mfcc_speaker import *
		================================================
		seg = Segmenter(batch_size, vad_type=['sad'\|'vad'] , sr=8000 , model_path="c:\\sad_model_pytorch.pth" , max_time=120(sec) , tq=1,ffmpeg_path='c:\\ffmpeg.exe', device='cuda' , pad=False) ``TORCH - max_time in second to split fea output``
		mfcc, len(sec) = seg(fname) ``mfcc pad to max_time length if pad=True``
		- ``segments, wav = svad(filename, sampling_rate=16000, min_speech_duration_ms=250, max_speech_duration_s=float('inf'), min_silence_duration_ms=100)``: Run fast speech activity detection and return speech segments.

		from iman.sad_tf_mlp_speaker import *
		================================================
		seg = Segmenter(batch_size, vad_type=['sad'\|'vad'] , sr=8000 , model_path="sad_tf_mlp.h5" , max_time=120(sec) , tq=1,ffmpeg_path='c:\\ffmpeg.exe', device='cuda' , pad=False) ``Tensorflow (small mlp model) - max_time in second to split fea output``
		mfcc, len(sec) = seg(fname) ``mfcc pad to max_time length if pad=True``
		Dependencies
		------------

		from iman import Report ``Tensorboard Writer``
		==================================================
		r=Report.rep(log_dir=None)
		The ``iman`` package requires the following:

		r.WS(_type , _name , value , itr) ``Add_scalar``
		- Python Packages: ``numpy``, ``torch``, ``tensorflow``, ``speechbrain``, ``librosa``, ``matplotlib``, ``pandas``, ``onnxruntime`` (for ONNX models).
		- External Tools: ``ffmpeg``, ``ffprobe``, ``WinRAR`` (for RAR/ZIP operations).
		- Optional: Pre-trained models (e.g., for VAD, x-vector, dereverberation) specified in function arguments.

		r.WT(_type , _name , _str , itr) ``Add_text``
		Check the package's ``requirements.txt`` for specific versions.

		r.WG(pytorch_model , example_input) ``Add_graph``
		Documentation
		-------------

		r.WI(_type , _name , images , itr) ``Add_image``
		For detailed usage, refer to the source code or use the built-in help system:

		from iman import par
		========================
		if (__name__ == '__main__'):

		res = par.par(files , func , worker=4 , args=[]) ``def func(fname , _args): ...``
		.. code-block:: python

		from iman import Image
		=========================
		Image.convert(fname_pattern ,ext ='jpg',ofolder=None , w=-1 , h=-1,level=100, worker=4,ffmpeg_path='c:\\ffmpeg.exe')
		from iman import examples
		examples.help("Audio") # Get help on the Audio module

		Image.resize(fname_pattern ,ext ='jpg',ofolder=None , w=2 , h=2, worker=4,ffmpeg_path='c:\\ffmpeg.exe') ``resize to 1/h and 1/w``
		Contributing
		------------

		from iman import Boors
		==========================
		Boors.get(sahm) ``get sahm info``
		Contributions are welcome! Submit bug reports, feature requests, or pull requests via the project's GitHub repository (if available). Follow contribution guidelines and include tests for new features.

		from iman import Text
		=====================
		norm = Text.normal("c:\\Replace_List.txt")
		License
		-------

		norm.rep(str)
		``iman`` is licensed under the MIT License (assumed). See the LICENSE file for details.

		norm.from_file(filename ,file_out=None)
		Contact
		-------

		from iman.num2fa import words
		=============================
		words(number)
		For support, contact the maintainers via the project's GitHub page or email (if provided).

		from iman import examples
		==========================
		examples.items ``get items in examples folder``
		.. note::

		examples.help(topic)

		from iman import Rar
		====================
		1-rar(fname , out="" , rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")

		2-zip(fname , out="" , rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")

		3-unrar(fname , out="" , rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")

		4-unzip(fname , out="" , rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")

		from iman import Enhance
		=========================
		1-Enhance.Dereverb(pattern , out_fol , sr = 16000, batchsize=16 , device="cuda" ,model_path=r"C:\\UVR-DeEcho-DeReverb.pth")

		2-Enhance.Denoise(pattern , out_fol , sr = 16000, batchsize=16 , device="cuda" ,model_path=r"C:\UVR-DeNoise-Lite.pth")

		from iman.tf import *
		=====================
		1-flops(model) ``get flops of tf model``

		2-param(model) ``return parameter number of tf model``

		3-paramp(model) ``return parameter number of tf model and print model layers``

		4-gpu() ``return True if available``

		5-gpun() ``return number of gpus``

		6-limit() ``Tf model only allocate as much GPU memory based on runtime allocations``

		from iman.torch import *
		========================
		1-param(model) ``return parameter number and trainable number of torch model``

		2-paramp(model) ``return parameter number of torch model and print model layers``

		3-layers(model) ``return layers of torch model``

		4-gpu() ``return True if available``

		5-gpun() ``return number of gpus``

		from iman.yt import *
		========================
		1-dl(url) ``Download youtube link``

		2-list_formats(url) ``return all available formats for yt link``

		Some functions require external tools (e.g., ``ffmpeg``, ``WinRAR``) or pre-trained models. Ensure these are configured correctly. Paths like ``c:\\ffmpeg.exe`` are Windows-specific; adjust for other operating systems.

+5

-1

iman.egg-info/SOURCES.txt

		@@ -111,2 +111,6 @@ README.rst
		iman/sad_torch_mfcc_speaker/viterbi.py
		iman/sad_torch_mfcc_speaker/viterbi_utils.py
		iman/sad_torch_mfcc_speaker/viterbi_utils.py
		iman/svad/__init__.py
		iman/svad/model.py
		iman/svad/utils_vad.py
		iman/svad/data/__init__.py

+1

-1

iman/__init__.py

		@@ -186,3 +186,3 @@ import matplotlib.pyplot as plt

		def separate(pattern,folout=None,model_path_folder=None): #model_path_folder contain .th model of Dmucs
		def separate(pattern,folout=None,model_path_folder=None): #model_path_folder contain .th model of Dmucs pip install demucs
		files = gf(pattern)
		@@ -189,0 +189,0 @@ for fname in files:

+272

-210

PKG-INFO

		Metadata-Version: 2.1
		Name: iman
		Version: 1.0.28
		Version: 2.0
		Summary: Python package for daily Tasks
		@@ -12,328 +12,390 @@ Author: Iman Sarraf

		from iman import *
		==================
		iman
		====

		1-plt
		Overview
		--------

		2-now() ``get time``
		``iman`` is a comprehensive Python package offering a wide array of utilities for audio processing, file manipulation, machine learning, system operations, web utilities, and more. It provides tools for tasks such as audio feature extraction, voice activity detection, file I/O, system monitoring, and integration with frameworks like PyTorch and TensorFlow. The package is organized into multiple submodules, each designed for specific functionalities, as detailed below.

		3-F ``format floating point``
		Installation
		------------

		4-D ``format int number``
		Install ``iman`` via pip:

		5-Write_List(MyList,Filename)
		.. code-block:: bash

		6-Write_Dic(MyDic,Filename)
		pip install iman

		7-Read(Filename) ``read txt file``
		Ensure dependencies like ``numpy``, ``torch``, ``tensorflow``, ``speechbrain``, ``librosa``, ``matplotlib``, ``pandas``, and external tools like ``ffmpeg``, ``ffprobe``, and ``WinRAR`` are installed. Some functions require pre-trained models or specific paths (e.g., model files, ``ffmpeg_path``).

		8-Read_Lines(Filename) ``read txt file line by line and return list``
		Usage
		-----

		9-Write(_str,Filename)
		Below are examples of key functionalities from the ``iman`` package. For detailed function signatures and parameters, refer to the sections below or use the built-in help system:

		10-gf(pattern) ``Get files in a directory``
		.. code-block:: python

		11-gfa(directory_pattern , ext=".") ``Get Files in a Directory and SubDirectories``
		from iman import examples
		examples.help("Audio") # Get help on a specific module

		12-ReadE(Filename) ``Read Excel files``
		Example: Audio Processing

		13-PM(dir) ``creat directory``
		.. code-block:: python

		14-PB(fname) ``get basename``
		from iman import Audio

		15-PN(fname) ``get file name``
		# Read a WAV file
		data, sr = Audio.Read("audio.wav", sr=16000, start_from=0, dur=None, mono=True, ffmpeg_path="c:\\ffmpeg.exe", ffprobe_path="c:\\ffprobe.exe")

		16-PE(fname) ``get ext``
		# Resample and write audio
		resampled = Audio.Resample(data, fs=sr, sr=8000)
		Audio.Write("output.wav", resampled, fs=8000)

		17-PD(fname) ``get directory``
		Example: File Operations

		18-PS(fname) ``get size``
		.. code-block:: python

		19-PJ(segments) ``Join Path``
		from iman import *

		20-clear() ``clear cmd``
		# Get files matching a pattern
		files = gf("*.txt")

		21-os
		# Write a dictionary to a file
		my_dict = {"key1": "value1", "key2": "value2"}
		Write_Dic(my_dict, "output.txt")

		22-np
		Example: VAD with Segmenter

		23-RI(start_int , end_int , count=1) ``random int``
		.. code-block:: python

		24-RF(start_float , end_float , count=1) ``random float``
		from iman.sad_torch_mfcc import Segmenter

		25-RS(Arr) ``shuffle``
		seg = Segmenter(batch_size=32, vad_type="vad", sr=8000, model_path="c:\\sad_model_pytorch.pth", tq=1, ffmpeg_path="c:\\ffmpeg.exe", complete_output=False, device="cuda", input_type="file")
		isig, wav, mfcc = seg("audio.wav")

		26-LJ(job_file_name)
		Modules and Functions
		---------------------

		27-SJ(value , job_file_name)
		The ``iman`` package is organized into several submodules, each with specific functions. Below is a complete list of modules and their functions as provided.

		28-LN(np_file_name)
		iman
		~~~~

		29-SN(arr , np_file_name)
		- ``plt``: Matplotlib plotting library.
		- ``now()``: Get current time.
		- ``F``: Format floating-point number.
		- ``D``: Format integer number.
		- ``Write_List(MyList, Filename)``: Write a list to a text file.
		- ``Write_Dic(MyDic, Filename)``: Write a dictionary to a text file.
		- ``Read(Filename)``: Read a text file.
		- ``Read_Lines(Filename)``: Read a text file line by line and return a list.
		- ``Write(_str, Filename)``: Write a string to a text file.
		- ``gf(pattern)``: Get files in a directory matching a pattern.
		- ``gfa(directory_pattern, ext=".")``: Get files in a directory and subdirectories.
		- ``ReadE(Filename)``: Read Excel files.
		- ``PM(dir)``: Create a directory.
		- ``PB(fname)``: Get basename of a file.
		- ``PN(fname)``: Get filename without path.
		- ``PE(fname)``: Get file extension.
		- ``PD(fname)``: Get directory of a file.
		- ``PS(fname)``: Get file size.
		- ``PJ(segments)``: Join path segments.
		- ``clear()``: Clear command-line interface.
		- ``os``: Python os module.
		- ``np``: NumPy module.
		- ``RI(start_int, end_int, count=1)``: Generate random integers.
		- ``RF(start_float, end_float, count=1)``: Generate random floats.
		- ``RS(Arr)``: Shuffle an array.
		- ``LJ(job_file_name)``: Load job file (details not specified).
		- ``SJ(value, job_file_name)``: Save job file (details not specified).
		- ``LN(np_file_name)``: Load NumPy file (details not specified).
		- ``SN(arr, np_file_name)``: Save NumPy array to file.
		- ``cmd(command, redirect=True)``: Run a command in CMD.
		- ``PX(fname)``: Check existence of a file.
		- ``RC(Arr, size=1)``: Random choice from an array.
		- ``onehot(data, nb_classes)``: Convert data to one-hot encoding.
		- ``exe(pyfile)``: Convert Python file to executable (requires PyInstaller).
		- ``FWL(wavfolder, sr)``: Get total audio length in a folder.
		- ``norm(vector)``: Normalize a vector (vector/magnitude(vector)).
		- ``delete(pattern)``: Delete files matching a pattern.
		- ``rename(fname, fout)``: Rename a file.
		- ``separate(pattern, folout)``: Separate vocal from music.
		- ``dll(fname)``: Create a .pyd file from a Python file.
		- ``get_hard_serial()``: Get hardware serial number.
		- ``mute_mic()``: Toggle microphone on/off.
		- ``PA(fname)``: Get absolute path of a file.

		30-cmd(command , redirect=True) ``Run command in CMD``
		iman.Audio
		~~~~~~~~~~

		31-PX(fname) ``check existance of file``
		- ``Read(filename, sr, start_from, dur, mono, ffmpeg_path, ffprobe_path)``: Read WAV, ALAW, MP3, and other audio formats.
		- ``Resample(data, fs, sr)``: Resample audio data.
		- ``Write(filename, data, fs)``: Write audio data to a file.
		- ``frame(y)``: Frame audio data (details not specified).
		- ``split(y)``: Split audio data (details not specified).
		- ``ReadT(filename, sr, mono=True)``: Read and resample WAV file with torchaudio.
		- ``VAD(y, top_db=40, frame_length=200, hop_length=80)``: Voice activity detection.
		- ``compress(fname_pattern, sr=16000, ext='mp3', mono=True, ffmpeg_path='c:\\ffmpeg.exe', ofolder=None, worker=4)``: Compress audio files.
		- ``clip_value(wav)``: Return clipping percentage in an audio file.
		- ``WriteS(filename, data, fs)``: Convert and write audio to stereo.

		32-RC(Arr , size=1) ``Random Choice``
		iman.info
		~~~~~~~~~

		33-onehot(data, nb_classes)
		- ``get()``: Get information about CPU and GPU (requires torch).
		- ``cpu()``: Get CPU percentage usage.
		- ``gpu()``: Get GPU memory usage.
		- ``memory()``: Get RAM usage in GB.
		- ``plot(fname="log.txt", delay=1)``: Plot system metrics from a log file.

		34-exe(pyfile) ``need pyinstaller``
		iman.metrics
		~~~~~~~~~~~~

		35-FWL(wavfolder , sr) ``Get Folder Audio Length``
		- ``EER(lab, score)``: Compute Equal Error Rate.
		- ``cosine_distance(v1, v2)``: Compute cosine distance between two vectors.
		- ``roc(lab, score)``: Compute ROC curve.
		- ``wer(ref, hyp)``: Compute Word Error Rate.
		- ``cer(ref, hyp)``: Compute Character Error Rate.
		- ``wer_list(ref_list, hyp_list)``: Compute WER for lists.
		- ``cer_list(ref_list, hyp_list)``: Compute CER for lists.
		- ``DER(ref_list, res_list, file_dur=-1, sr=8000)``: Compute Detection Error Rate.

		36-norm(vector) ``vector/magnitude(vector)``
		iman.tsne
		~~~~~~~~~

		37-delete(pattern)
		- ``plot(fea, label)``: Plot t-SNE visualization of features.

		38-rename(fname , fout)
		iman.xvector
		~~~~~~~~~~~~

		39-separate(pattern,folout) ``separate vocal from music``
		- ``xvec, lda_xvec, gender = get(filename, model(model_path, model_name, model_speaker_num))``: Extract x-vectors for speaker recognition.

		40-dll(fname) ``create a pyd file from py file``
		iman.web
		~~~~~~~~

		41-get_hard_serial()
		- ``change_wallpaper()``: Change system wallpaper.
		- ``dl(url)``: Download a file from a URL.
		- ``links(url, filter_text=None)``: Extract links from a URL.
		- ``imgs(url, filter_text=None)``: Extract images from a URL.

		42-mute_mic() ``on and off microphone``
		iman.matlab
		~~~~~~~~~~~

		43-PA(fname) ``get abs path``
		- ``np2mat(param, mat_file_name)``: Convert NumPy array to MATLAB file.
		- ``dic2mat(param, mat_file_name)``: Convert dictionary to MATLAB file.
		- ``mat2dic(mat_file_name)``: Convert MATLAB file to dictionary.

		from iman import Audio
		======================
		1-Read(filename,sr,start_from,dur,mono,ffmpeg_path,ffprobe_path) ``Read wav alaw and mp3 and others``
		iman.Features
		~~~~~~~~~~~~~

		2-Resample(data , fs, sr)
		- ``mfcc_fea, mspec, log_energy = mfcc.SB.Get(wav, sample_rate)``: Compute MFCC with SpeechBrain (input must be read with torchaudio).
		- ``mfcc.SB.Normal(MFCC)``: Mean-variance normalization of MFCC with SpeechBrain.
		- ``mfcc_fea, log_energy = mfcc.LS.Get(wav, sample_rate, le=False)``: Compute MFCC with Librosa (input is NumPy array).
		- ``mfcc.LS.Normal(MFCC, win_len=150)``: Mean-variance normalization (local, 150 frames left and right).

		3-Write(filename, data ,fs)
		iman.AUG
		~~~~~~~~

		4-frame(y)
		- ``Add_Noise(data, noise, snr)``: Add noise to audio data.
		- ``Add_Reverb(data, rir)``: Add reverberation to audio data.
		- ``Add_NoiseT(data, noise, snr)``: Add noise using torchaudio.
		- ``Add_ReverbT(data, rir)``: Add reverberation using torchaudio.
		- ``mp3(fname, fout, sr_out, ratio, ffmpeg_path='c:\\ffmpeg.exe')``: Convert to MP3.
		- ``speed(fname, fout, ratio, ffmpeg_path='c:\\ffmpeg.exe')``: Change audio speed.
		- ``volume(fname, fout, ratio, ffmpeg_path='c:\\ffmpeg.exe')``: Adjust audio volume.

		5-split(y)
		iman.sad_torch_mfcc \| iman.sad_tf
		~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

		6-ReadT(filename, sr , mono=True) ``Read and resample wav file with torchaudio``
		- Initializer (PyTorch):

		7-VAD(y,top_db=40, frame_length=200, hop_length=80)
		.. code-block:: python

		8-compress(fname_pattern , sr=16000 , ext='mp3' , mono=True ,ffmpeg_path='c:\\ffmpeg.exe' , ofolder=None, worker=4)
		seg = Segmenter(batch_size, vad_type=['sad'\|'vad'], sr=8000, model_path="c:\\sad_model_pytorch.pth", tq=1, ffmpeg_path='c:\\ffmpeg.exe', complete_output=False, device='cuda', input_type='file')

		9-clip_value(wav) ``return clipping percentage in audio file``
		- Initializer (TensorFlow):

		10-WriteS(filename, data ,fs) ``Convert to Sterio``
		.. code-block:: python

		from iman import info
		=====================
		seg = Segmenter(batch_size, vad_type=['sad'\|'vad'], sr=16000, model_path="c:\\keras_speech_music_noise_cnn.hdf5", gender_path="c:\\keras_male_female_cnn.hdf5", ffmpeg_path='c:\\ffmpeg.exe', detect_gender=False, complete_output=False, device='cuda', input_type='file')

		1-get() info about cpu and gpu ``need torch``
		- ``isig, wav, mfcc = seg(fname)``: Process audio file (MFCC output only in PyTorch model).
		- ``nmfcc = filter_fea(isig, mfcc, sr, max_time)``: Filter features (PyTorch only).
		- ``mfcc = MVN(mfcc)``: Mean-variance normalization (PyTorch only).
		- ``isig = filter_output(isig, max_silence, ignore_small_speech_segments, max_speech_len, split_speech_bigger_than)``: Filter output when ``complete_output=False``.
		- ``seg2aud(isig, filename)``: Convert segments to audio.
		- ``seg2json(isig)``: Convert segments to JSON.
		- ``seg2Gender_Info(isig)``: Extract gender information from segments.
		- ``seg2Info(isig)``: Extract segment information.
		- ``wav_speech, wav_noise = filter_sig(isig, wav, sr)``: Get speech and noise parts (when ``complete_output=False``).

		2-cpu() ``get cpu percentage usage``
		- sad_tf.segmentero:

		3-gpu() ``get gpu memory usage``
		.. code-block:: python

		4-memory() ``get ram usage GB``
		from sad_tf.segmentero import Segmenter # Use ONNX models (requires onnxruntime)

		5-plot(fname="log.txt" , delay=1)
		iman.sad_torch_mfcc_speaker
		~~~~~~~~~~~~~~~~~~~~~~~~~~~

		- Initializer:

		from iman import metrics
		========================
		1-EER(lab,score)
		.. code-block:: python

		2-cosine_distance(v1,v2)
		seg = Segmenter(batch_size, vad_type=['sad'\|'vad'], sr=8000, model_path="c:\\sad_model_pytorch.pth", max_time=120, tq=1, ffmpeg_path='c:\\ffmpeg.exe', device='cuda', pad=False)

		3-roc(lab,score)
		- ``mfcc, len(sec) = seg(fname)``: Process audio file, MFCC padded to ``max_time`` if ``pad=True``.

		4-wer(ref, hyp)
		iman.sad_tf_mlp_speaker
		~~~~~~~~~~~~~~~~~~~~~~~

		5-cer(ref, hyp)
		- Initializer:

		6-wer_list(ref_list , hyp_list)
		.. code-block:: python

		7-cer_list(ref_list , hyp_list)
		seg = Segmenter(batch_size, vad_type=['sad'\|'vad'], sr=8000, model_path="sad_tf_mlp.h5", max_time=120, tq=1, ffmpeg_path='c:\\ffmpeg.exe', device='cuda', pad=False)

		8-DER(ref_list , res_list , file_dur=-1 , sr=8000) ``Detection Error Rate``
		- ``mfcc, len(sec) = seg(fname)``: Process audio file, MFCC padded to ``max_time`` if ``pad=True``.

		from iman import tsne
		=====================
		iman.Report
		~~~~~~~~~~~

		1-plot(fea , label)
		- Initializer:

		from iman import xvector
		========================
		1-xvec,lda_xvec,gender = get(filename , model(model_path , model_name , model_speaker_num))
		.. code-block:: python

		r = Report.rep(log_dir=None)

		from iman import web
		====================
		1-change_wallpaper()
		- ``WS(_type, _name, value, itr)``: Add scalar to TensorBoard.
		- ``WT(_type, _name, _str, itr)``: Add text to TensorBoard.
		- ``WG(pytorch_model, example_input)``: Add graph to TensorBoard.
		- ``WI(_type, _name, images, itr)``: Add image to TensorBoard.

		2-dl(url)
		iman.par
		~~~~~~~~

		3-links(url , filter_text=None)
		- Parallel Processing:

		4-imgs(url , filter_text=None)
		.. code-block:: python

		from iman import matlab
		=======================
		1-np2mat(param , mat_file_name)
		if __name__ == '__main__':
		res = par.par(files, func, worker=4, args=[]) # func defined as: def func(fname, _args): ...

		2-dic2mat(param , mat_file_name)
		iman.Image
		~~~~~~~~~~

		3-mat2dic (mat_file_name)
		- ``Image.convert(fname_pattern, ext='jpg', ofolder=None, w=-1, h=-1, level=100, worker=4, ffmpeg_path='c:\\ffmpeg.exe')``: Convert images to specified format.
		- ``Image.resize(fname_pattern, ext='jpg', ofolder=None, w=2, h=2, worker=4, ffmpeg_path='c:\\ffmpeg.exe')``: Resize images to 1/w and 1/h.

		from iman import Features
		=========================
		1- mfcc_fea,mspec,log_energy = mfcc.SB.Get(wav,sample_rate) ``Compute MFCC with speechbrain - input must read with torchaudio``
		iman.Boors
		~~~~~~~~~~

		2-mfcc.SB.Normal(MFCC) ``Mean Var Normalization Utt with speechbrain``
		- ``Boors.get(sahm)``: Get stock information.

		3- mfcc_fea,log_energy = mfcc.LS.Get(wav,sample_rate,le=False) ``Compute MFCC with Librosa - input is numpy array``
		iman.Text
		~~~~~~~~~

		4-mfcc.LS.Normal(MFCC , win_len=150) ``Mean Var Normalization Local 150 left and 150 right``
		- Initializer:

		from iman import AUG
		====================
		1-Add_Noise(data , noise , snr)
		.. code-block:: python

		2-Add_Reverb( data , rir)
		norm = Text.normal("c:\\Replace_List.txt")

		3-Add_NoiseT(data , noise , snr) ``(torchaudio)``
		- ``norm.rep(str)``: Replace text based on normalization rules.
		- ``norm.from_file(filename, file_out=None)``: Normalize text from a file.

		4-Add_ReverbT( data , rir) ``(torchaudio)``
		iman.num2fa
		~~~~~~~~~~~

		5-mp3(fname , fout,sr_out,ratio,ffmpeg_path='c:\\ffmpeg.exe')
		- ``words(number)``: Convert number to Persian words.

		6-speed(fname,fout,ratio,ffmpeg_path='c:\\ffmpeg.exe')
		iman.examples
		~~~~~~~~~~~~~

		7-volume(fname ,fout,ratio,ffmpeg_path='c:\\ffmpeg.exe')
		- ``examples.items``: Get items in the examples folder.
		- ``examples.help(topic)``: Get help on a specific topic.

		from iman.[sad_torch_mfcc \| sad_tf] import *
		===============================================================================
		seg = Segmenter(batch_size, vad_type=['sad'\|'vad'] , sr=8000 , model_path="c:\\sad_model_pytorch.pth" , tq=1,ffmpeg_path='c:\\ffmpeg.exe',complete_output=False , device='cuda',input_type='file') ``TORCH``
		iman.Rar
		~~~~~~~~

		seg = Segmenter(batch_size, vad_type=['sad'\|'vad'] , sr=16000 , model_path="c:\\keras_speech_music_noise_cnn.hdf5",gender_path="c:\\keras_male_female_cnn.hdf5",ffmpeg_path='c:\\ffmpeg.exe',detect_gender=False,complete_output=False,device='cuda',input_type='file') ``TensorFlow``
		- ``rar(fname, out="", rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")``: Create RAR archive.
		- ``zip(fname, out="", rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")``: Create ZIP archive.
		- ``unrar(fname, out="", rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")``: Extract RAR archive.
		- ``unzip(fname, out="", rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")``: Extract ZIP archive.

		isig,wav,mfcc = seg(fname) ``mfcc output Just in torch model``
		iman.Enhance
		~~~~~~~~~~~~

		nmfcc = filter_fea(isig , mfcc , sr , max_time) ``Just in torch model``
		- ``Enhance.Dereverb(pattern, out_fol, sr=16000, batchsize=16, device="cuda", model_path=r"C:\\UVR-DeEcho-DeReverb.pth")``: Dereverberate audio files.
		- ``Enhance.Denoise(pattern, out_fol, sr=16000, batchsize=16, device="cuda", model_path=r"C:\\UVR-DeNoise-Lite.pth")``: Denoise audio files.

		mfcc = MVN(mfcc) ``Just in torch model``
		iman.tf
		~~~~~~~

		isig = filter_output(isig , max_silence ,ignore_small_speech_segments , max_speech_len ,split_speech_bigger_than) ``Do when complete_output=False``
		- ``flops(model)``: Get FLOPs of a TensorFlow model.
		- ``param(model)``: Get parameter count of a TensorFlow model.
		- ``paramp(model)``: Get parameter count and print model layers.
		- ``gpu()``: Return True if GPU is available.
		- ``gpun()``: Return number of GPUs.
		- ``limit()``: Limit GPU memory allocation for TensorFlow models.

		seg2aud(isig , filename)

		seg2json(isig)
		iman.torch
		~~~~~~~~~~

		seg2Gender_Info(isig)
		- ``param(model)``: Get parameter and trainable count of a PyTorch model.
		- ``paramp(model)``: Get parameter count and print model layers.
		- ``layers(model)``: Get layers of a PyTorch model.
		- ``gpu()``: Return True if GPU is available.
		- ``gpun()``: Return number of GPUs.

		seg2Info(isig)
		iman.yt
		~~~~~~~

		wav_speech , wav_noise = filter_sig(isig , wav , sr) ``Get Speech and Noise Parts of file - Do when complete_output=False``
		- ``dl(url)``: Download a YouTube video.
		- ``list_formats(url)``: List available formats for a YouTube link.

		from sad_tf.segmentero import Segmenter ``to use onnx models - need to install onnxruntime``
		iman.svad
		~~~~~~~~~

		from iman.sad_torch_mfcc_speaker import *
		================================================
		seg = Segmenter(batch_size, vad_type=['sad'\|'vad'] , sr=8000 , model_path="c:\\sad_model_pytorch.pth" , max_time=120(sec) , tq=1,ffmpeg_path='c:\\ffmpeg.exe', device='cuda' , pad=False) ``TORCH - max_time in second to split fea output``
		mfcc, len(sec) = seg(fname) ``mfcc pad to max_time length if pad=True``
		- ``segments, wav = svad(filename, sampling_rate=16000, min_speech_duration_ms=250, max_speech_duration_s=float('inf'), min_silence_duration_ms=100)``: Run fast speech activity detection and return speech segments.

		from iman.sad_tf_mlp_speaker import *
		================================================
		seg = Segmenter(batch_size, vad_type=['sad'\|'vad'] , sr=8000 , model_path="sad_tf_mlp.h5" , max_time=120(sec) , tq=1,ffmpeg_path='c:\\ffmpeg.exe', device='cuda' , pad=False) ``Tensorflow (small mlp model) - max_time in second to split fea output``
		mfcc, len(sec) = seg(fname) ``mfcc pad to max_time length if pad=True``
		Dependencies
		------------

		from iman import Report ``Tensorboard Writer``
		==================================================
		r=Report.rep(log_dir=None)
		The ``iman`` package requires the following:

		r.WS(_type , _name , value , itr) ``Add_scalar``
		- Python Packages: ``numpy``, ``torch``, ``tensorflow``, ``speechbrain``, ``librosa``, ``matplotlib``, ``pandas``, ``onnxruntime`` (for ONNX models).
		- External Tools: ``ffmpeg``, ``ffprobe``, ``WinRAR`` (for RAR/ZIP operations).
		- Optional: Pre-trained models (e.g., for VAD, x-vector, dereverberation) specified in function arguments.

		r.WT(_type , _name , _str , itr) ``Add_text``
		Check the package's ``requirements.txt`` for specific versions.

		r.WG(pytorch_model , example_input) ``Add_graph``
		Documentation
		-------------

		r.WI(_type , _name , images , itr) ``Add_image``
		For detailed usage, refer to the source code or use the built-in help system:

		from iman import par
		========================
		if (__name__ == '__main__'):

		res = par.par(files , func , worker=4 , args=[]) ``def func(fname , _args): ...``
		.. code-block:: python

		from iman import Image
		=========================
		Image.convert(fname_pattern ,ext ='jpg',ofolder=None , w=-1 , h=-1,level=100, worker=4,ffmpeg_path='c:\\ffmpeg.exe')
		from iman import examples
		examples.help("Audio") # Get help on the Audio module

		Image.resize(fname_pattern ,ext ='jpg',ofolder=None , w=2 , h=2, worker=4,ffmpeg_path='c:\\ffmpeg.exe') ``resize to 1/h and 1/w``
		Contributing
		------------

		from iman import Boors
		==========================
		Boors.get(sahm) ``get sahm info``
		Contributions are welcome! Submit bug reports, feature requests, or pull requests via the project's GitHub repository (if available). Follow contribution guidelines and include tests for new features.

		from iman import Text
		=====================
		norm = Text.normal("c:\\Replace_List.txt")
		License
		-------

		norm.rep(str)
		``iman`` is licensed under the MIT License (assumed). See the LICENSE file for details.

		norm.from_file(filename ,file_out=None)
		Contact
		-------

		from iman.num2fa import words
		=============================
		words(number)
		For support, contact the maintainers via the project's GitHub page or email (if provided).

		from iman import examples
		==========================
		examples.items ``get items in examples folder``
		.. note::

		examples.help(topic)

		from iman import Rar
		====================
		1-rar(fname , out="" , rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")

		2-zip(fname , out="" , rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")

		3-unrar(fname , out="" , rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")

		4-unzip(fname , out="" , rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")

		from iman import Enhance
		=========================
		1-Enhance.Dereverb(pattern , out_fol , sr = 16000, batchsize=16 , device="cuda" ,model_path=r"C:\\UVR-DeEcho-DeReverb.pth")

		2-Enhance.Denoise(pattern , out_fol , sr = 16000, batchsize=16 , device="cuda" ,model_path=r"C:\UVR-DeNoise-Lite.pth")

		from iman.tf import *
		=====================
		1-flops(model) ``get flops of tf model``

		2-param(model) ``return parameter number of tf model``

		3-paramp(model) ``return parameter number of tf model and print model layers``

		4-gpu() ``return True if available``

		5-gpun() ``return number of gpus``

		6-limit() ``Tf model only allocate as much GPU memory based on runtime allocations``

		from iman.torch import *
		========================
		1-param(model) ``return parameter number and trainable number of torch model``

		2-paramp(model) ``return parameter number of torch model and print model layers``

		3-layers(model) ``return layers of torch model``

		4-gpu() ``return True if available``

		5-gpun() ``return number of gpus``

		from iman.yt import *
		========================
		1-dl(url) ``Download youtube link``

		2-list_formats(url) ``return all available formats for yt link``

		Some functions require external tools (e.g., ``ffmpeg``, ``WinRAR``) or pre-trained models. Ensure these are configured correctly. Paths like ``c:\\ffmpeg.exe`` are Windows-specific; adjust for other operating systems.

+271

-209

README.rst

		@@ -1,327 +0,389 @@
		from iman import *
		==================
		iman
		====

		1-plt
		Overview
		--------

		2-now() ``get time``
		``iman`` is a comprehensive Python package offering a wide array of utilities for audio processing, file manipulation, machine learning, system operations, web utilities, and more. It provides tools for tasks such as audio feature extraction, voice activity detection, file I/O, system monitoring, and integration with frameworks like PyTorch and TensorFlow. The package is organized into multiple submodules, each designed for specific functionalities, as detailed below.

		3-F ``format floating point``
		Installation
		------------

		4-D ``format int number``
		Install ``iman`` via pip:

		5-Write_List(MyList,Filename)
		.. code-block:: bash

		6-Write_Dic(MyDic,Filename)
		pip install iman

		7-Read(Filename) ``read txt file``
		Ensure dependencies like ``numpy``, ``torch``, ``tensorflow``, ``speechbrain``, ``librosa``, ``matplotlib``, ``pandas``, and external tools like ``ffmpeg``, ``ffprobe``, and ``WinRAR`` are installed. Some functions require pre-trained models or specific paths (e.g., model files, ``ffmpeg_path``).

		8-Read_Lines(Filename) ``read txt file line by line and return list``
		Usage
		-----

		9-Write(_str,Filename)
		Below are examples of key functionalities from the ``iman`` package. For detailed function signatures and parameters, refer to the sections below or use the built-in help system:

		10-gf(pattern) ``Get files in a directory``
		.. code-block:: python

		11-gfa(directory_pattern , ext=".") ``Get Files in a Directory and SubDirectories``
		from iman import examples
		examples.help("Audio") # Get help on a specific module

		12-ReadE(Filename) ``Read Excel files``
		Example: Audio Processing

		13-PM(dir) ``creat directory``
		.. code-block:: python

		14-PB(fname) ``get basename``
		from iman import Audio

		15-PN(fname) ``get file name``
		# Read a WAV file
		data, sr = Audio.Read("audio.wav", sr=16000, start_from=0, dur=None, mono=True, ffmpeg_path="c:\\ffmpeg.exe", ffprobe_path="c:\\ffprobe.exe")

		16-PE(fname) ``get ext``
		# Resample and write audio
		resampled = Audio.Resample(data, fs=sr, sr=8000)
		Audio.Write("output.wav", resampled, fs=8000)

		17-PD(fname) ``get directory``
		Example: File Operations

		18-PS(fname) ``get size``
		.. code-block:: python

		19-PJ(segments) ``Join Path``
		from iman import *

		20-clear() ``clear cmd``
		# Get files matching a pattern
		files = gf("*.txt")

		21-os
		# Write a dictionary to a file
		my_dict = {"key1": "value1", "key2": "value2"}
		Write_Dic(my_dict, "output.txt")

		22-np
		Example: VAD with Segmenter

		23-RI(start_int , end_int , count=1) ``random int``
		.. code-block:: python

		24-RF(start_float , end_float , count=1) ``random float``
		from iman.sad_torch_mfcc import Segmenter

		25-RS(Arr) ``shuffle``
		seg = Segmenter(batch_size=32, vad_type="vad", sr=8000, model_path="c:\\sad_model_pytorch.pth", tq=1, ffmpeg_path="c:\\ffmpeg.exe", complete_output=False, device="cuda", input_type="file")
		isig, wav, mfcc = seg("audio.wav")

		26-LJ(job_file_name)
		Modules and Functions
		---------------------

		27-SJ(value , job_file_name)
		The ``iman`` package is organized into several submodules, each with specific functions. Below is a complete list of modules and their functions as provided.

		28-LN(np_file_name)
		iman
		~~~~

		29-SN(arr , np_file_name)
		- ``plt``: Matplotlib plotting library.
		- ``now()``: Get current time.
		- ``F``: Format floating-point number.
		- ``D``: Format integer number.
		- ``Write_List(MyList, Filename)``: Write a list to a text file.
		- ``Write_Dic(MyDic, Filename)``: Write a dictionary to a text file.
		- ``Read(Filename)``: Read a text file.
		- ``Read_Lines(Filename)``: Read a text file line by line and return a list.
		- ``Write(_str, Filename)``: Write a string to a text file.
		- ``gf(pattern)``: Get files in a directory matching a pattern.
		- ``gfa(directory_pattern, ext=".")``: Get files in a directory and subdirectories.
		- ``ReadE(Filename)``: Read Excel files.
		- ``PM(dir)``: Create a directory.
		- ``PB(fname)``: Get basename of a file.
		- ``PN(fname)``: Get filename without path.
		- ``PE(fname)``: Get file extension.
		- ``PD(fname)``: Get directory of a file.
		- ``PS(fname)``: Get file size.
		- ``PJ(segments)``: Join path segments.
		- ``clear()``: Clear command-line interface.
		- ``os``: Python os module.
		- ``np``: NumPy module.
		- ``RI(start_int, end_int, count=1)``: Generate random integers.
		- ``RF(start_float, end_float, count=1)``: Generate random floats.
		- ``RS(Arr)``: Shuffle an array.
		- ``LJ(job_file_name)``: Load job file (details not specified).
		- ``SJ(value, job_file_name)``: Save job file (details not specified).
		- ``LN(np_file_name)``: Load NumPy file (details not specified).
		- ``SN(arr, np_file_name)``: Save NumPy array to file.
		- ``cmd(command, redirect=True)``: Run a command in CMD.
		- ``PX(fname)``: Check existence of a file.
		- ``RC(Arr, size=1)``: Random choice from an array.
		- ``onehot(data, nb_classes)``: Convert data to one-hot encoding.
		- ``exe(pyfile)``: Convert Python file to executable (requires PyInstaller).
		- ``FWL(wavfolder, sr)``: Get total audio length in a folder.
		- ``norm(vector)``: Normalize a vector (vector/magnitude(vector)).
		- ``delete(pattern)``: Delete files matching a pattern.
		- ``rename(fname, fout)``: Rename a file.
		- ``separate(pattern, folout)``: Separate vocal from music.
		- ``dll(fname)``: Create a .pyd file from a Python file.
		- ``get_hard_serial()``: Get hardware serial number.
		- ``mute_mic()``: Toggle microphone on/off.
		- ``PA(fname)``: Get absolute path of a file.

		30-cmd(command , redirect=True) ``Run command in CMD``
		iman.Audio
		~~~~~~~~~~

		31-PX(fname) ``check existance of file``
		- ``Read(filename, sr, start_from, dur, mono, ffmpeg_path, ffprobe_path)``: Read WAV, ALAW, MP3, and other audio formats.
		- ``Resample(data, fs, sr)``: Resample audio data.
		- ``Write(filename, data, fs)``: Write audio data to a file.
		- ``frame(y)``: Frame audio data (details not specified).
		- ``split(y)``: Split audio data (details not specified).
		- ``ReadT(filename, sr, mono=True)``: Read and resample WAV file with torchaudio.
		- ``VAD(y, top_db=40, frame_length=200, hop_length=80)``: Voice activity detection.
		- ``compress(fname_pattern, sr=16000, ext='mp3', mono=True, ffmpeg_path='c:\\ffmpeg.exe', ofolder=None, worker=4)``: Compress audio files.
		- ``clip_value(wav)``: Return clipping percentage in an audio file.
		- ``WriteS(filename, data, fs)``: Convert and write audio to stereo.

		32-RC(Arr , size=1) ``Random Choice``
		iman.info
		~~~~~~~~~

		33-onehot(data, nb_classes)
		- ``get()``: Get information about CPU and GPU (requires torch).
		- ``cpu()``: Get CPU percentage usage.
		- ``gpu()``: Get GPU memory usage.
		- ``memory()``: Get RAM usage in GB.
		- ``plot(fname="log.txt", delay=1)``: Plot system metrics from a log file.

		34-exe(pyfile) ``need pyinstaller``
		iman.metrics
		~~~~~~~~~~~~

		35-FWL(wavfolder , sr) ``Get Folder Audio Length``
		- ``EER(lab, score)``: Compute Equal Error Rate.
		- ``cosine_distance(v1, v2)``: Compute cosine distance between two vectors.
		- ``roc(lab, score)``: Compute ROC curve.
		- ``wer(ref, hyp)``: Compute Word Error Rate.
		- ``cer(ref, hyp)``: Compute Character Error Rate.
		- ``wer_list(ref_list, hyp_list)``: Compute WER for lists.
		- ``cer_list(ref_list, hyp_list)``: Compute CER for lists.
		- ``DER(ref_list, res_list, file_dur=-1, sr=8000)``: Compute Detection Error Rate.

		36-norm(vector) ``vector/magnitude(vector)``
		iman.tsne
		~~~~~~~~~

		37-delete(pattern)
		- ``plot(fea, label)``: Plot t-SNE visualization of features.

		38-rename(fname , fout)
		iman.xvector
		~~~~~~~~~~~~

		39-separate(pattern,folout) ``separate vocal from music``
		- ``xvec, lda_xvec, gender = get(filename, model(model_path, model_name, model_speaker_num))``: Extract x-vectors for speaker recognition.

		40-dll(fname) ``create a pyd file from py file``
		iman.web
		~~~~~~~~

		41-get_hard_serial()
		- ``change_wallpaper()``: Change system wallpaper.
		- ``dl(url)``: Download a file from a URL.
		- ``links(url, filter_text=None)``: Extract links from a URL.
		- ``imgs(url, filter_text=None)``: Extract images from a URL.

		42-mute_mic() ``on and off microphone``
		iman.matlab
		~~~~~~~~~~~

		43-PA(fname) ``get abs path``
		- ``np2mat(param, mat_file_name)``: Convert NumPy array to MATLAB file.
		- ``dic2mat(param, mat_file_name)``: Convert dictionary to MATLAB file.
		- ``mat2dic(mat_file_name)``: Convert MATLAB file to dictionary.

		from iman import Audio
		======================
		1-Read(filename,sr,start_from,dur,mono,ffmpeg_path,ffprobe_path) ``Read wav alaw and mp3 and others``
		iman.Features
		~~~~~~~~~~~~~

		2-Resample(data , fs, sr)
		- ``mfcc_fea, mspec, log_energy = mfcc.SB.Get(wav, sample_rate)``: Compute MFCC with SpeechBrain (input must be read with torchaudio).
		- ``mfcc.SB.Normal(MFCC)``: Mean-variance normalization of MFCC with SpeechBrain.
		- ``mfcc_fea, log_energy = mfcc.LS.Get(wav, sample_rate, le=False)``: Compute MFCC with Librosa (input is NumPy array).
		- ``mfcc.LS.Normal(MFCC, win_len=150)``: Mean-variance normalization (local, 150 frames left and right).

		3-Write(filename, data ,fs)
		iman.AUG
		~~~~~~~~

		4-frame(y)
		- ``Add_Noise(data, noise, snr)``: Add noise to audio data.
		- ``Add_Reverb(data, rir)``: Add reverberation to audio data.
		- ``Add_NoiseT(data, noise, snr)``: Add noise using torchaudio.
		- ``Add_ReverbT(data, rir)``: Add reverberation using torchaudio.
		- ``mp3(fname, fout, sr_out, ratio, ffmpeg_path='c:\\ffmpeg.exe')``: Convert to MP3.
		- ``speed(fname, fout, ratio, ffmpeg_path='c:\\ffmpeg.exe')``: Change audio speed.
		- ``volume(fname, fout, ratio, ffmpeg_path='c:\\ffmpeg.exe')``: Adjust audio volume.

		5-split(y)
		iman.sad_torch_mfcc \| iman.sad_tf
		~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

		6-ReadT(filename, sr , mono=True) ``Read and resample wav file with torchaudio``
		- Initializer (PyTorch):

		7-VAD(y,top_db=40, frame_length=200, hop_length=80)
		.. code-block:: python

		8-compress(fname_pattern , sr=16000 , ext='mp3' , mono=True ,ffmpeg_path='c:\\ffmpeg.exe' , ofolder=None, worker=4)
		seg = Segmenter(batch_size, vad_type=['sad'\|'vad'], sr=8000, model_path="c:\\sad_model_pytorch.pth", tq=1, ffmpeg_path='c:\\ffmpeg.exe', complete_output=False, device='cuda', input_type='file')

		9-clip_value(wav) ``return clipping percentage in audio file``
		- Initializer (TensorFlow):

		10-WriteS(filename, data ,fs) ``Convert to Sterio``
		.. code-block:: python

		from iman import info
		=====================
		seg = Segmenter(batch_size, vad_type=['sad'\|'vad'], sr=16000, model_path="c:\\keras_speech_music_noise_cnn.hdf5", gender_path="c:\\keras_male_female_cnn.hdf5", ffmpeg_path='c:\\ffmpeg.exe', detect_gender=False, complete_output=False, device='cuda', input_type='file')

		1-get() info about cpu and gpu ``need torch``
		- ``isig, wav, mfcc = seg(fname)``: Process audio file (MFCC output only in PyTorch model).
		- ``nmfcc = filter_fea(isig, mfcc, sr, max_time)``: Filter features (PyTorch only).
		- ``mfcc = MVN(mfcc)``: Mean-variance normalization (PyTorch only).
		- ``isig = filter_output(isig, max_silence, ignore_small_speech_segments, max_speech_len, split_speech_bigger_than)``: Filter output when ``complete_output=False``.
		- ``seg2aud(isig, filename)``: Convert segments to audio.
		- ``seg2json(isig)``: Convert segments to JSON.
		- ``seg2Gender_Info(isig)``: Extract gender information from segments.
		- ``seg2Info(isig)``: Extract segment information.
		- ``wav_speech, wav_noise = filter_sig(isig, wav, sr)``: Get speech and noise parts (when ``complete_output=False``).

		2-cpu() ``get cpu percentage usage``
		- sad_tf.segmentero:

		3-gpu() ``get gpu memory usage``
		.. code-block:: python

		4-memory() ``get ram usage GB``
		from sad_tf.segmentero import Segmenter # Use ONNX models (requires onnxruntime)

		5-plot(fname="log.txt" , delay=1)
		iman.sad_torch_mfcc_speaker
		~~~~~~~~~~~~~~~~~~~~~~~~~~~

		- Initializer:

		from iman import metrics
		========================
		1-EER(lab,score)
		.. code-block:: python

		2-cosine_distance(v1,v2)
		seg = Segmenter(batch_size, vad_type=['sad'\|'vad'], sr=8000, model_path="c:\\sad_model_pytorch.pth", max_time=120, tq=1, ffmpeg_path='c:\\ffmpeg.exe', device='cuda', pad=False)

		3-roc(lab,score)
		- ``mfcc, len(sec) = seg(fname)``: Process audio file, MFCC padded to ``max_time`` if ``pad=True``.

		4-wer(ref, hyp)
		iman.sad_tf_mlp_speaker
		~~~~~~~~~~~~~~~~~~~~~~~

		5-cer(ref, hyp)
		- Initializer:

		6-wer_list(ref_list , hyp_list)
		.. code-block:: python

		7-cer_list(ref_list , hyp_list)
		seg = Segmenter(batch_size, vad_type=['sad'\|'vad'], sr=8000, model_path="sad_tf_mlp.h5", max_time=120, tq=1, ffmpeg_path='c:\\ffmpeg.exe', device='cuda', pad=False)

		8-DER(ref_list , res_list , file_dur=-1 , sr=8000) ``Detection Error Rate``
		- ``mfcc, len(sec) = seg(fname)``: Process audio file, MFCC padded to ``max_time`` if ``pad=True``.

		from iman import tsne
		=====================
		iman.Report
		~~~~~~~~~~~

		1-plot(fea , label)
		- Initializer:

		from iman import xvector
		========================
		1-xvec,lda_xvec,gender = get(filename , model(model_path , model_name , model_speaker_num))
		.. code-block:: python

		r = Report.rep(log_dir=None)

		from iman import web
		====================
		1-change_wallpaper()
		- ``WS(_type, _name, value, itr)``: Add scalar to TensorBoard.
		- ``WT(_type, _name, _str, itr)``: Add text to TensorBoard.
		- ``WG(pytorch_model, example_input)``: Add graph to TensorBoard.
		- ``WI(_type, _name, images, itr)``: Add image to TensorBoard.

		2-dl(url)
		iman.par
		~~~~~~~~

		3-links(url , filter_text=None)
		- Parallel Processing:

		4-imgs(url , filter_text=None)
		.. code-block:: python

		from iman import matlab
		=======================
		1-np2mat(param , mat_file_name)
		if __name__ == '__main__':
		res = par.par(files, func, worker=4, args=[]) # func defined as: def func(fname, _args): ...

		2-dic2mat(param , mat_file_name)
		iman.Image
		~~~~~~~~~~

		3-mat2dic (mat_file_name)
		- ``Image.convert(fname_pattern, ext='jpg', ofolder=None, w=-1, h=-1, level=100, worker=4, ffmpeg_path='c:\\ffmpeg.exe')``: Convert images to specified format.
		- ``Image.resize(fname_pattern, ext='jpg', ofolder=None, w=2, h=2, worker=4, ffmpeg_path='c:\\ffmpeg.exe')``: Resize images to 1/w and 1/h.

		from iman import Features
		=========================
		1- mfcc_fea,mspec,log_energy = mfcc.SB.Get(wav,sample_rate) ``Compute MFCC with speechbrain - input must read with torchaudio``
		iman.Boors
		~~~~~~~~~~

		2-mfcc.SB.Normal(MFCC) ``Mean Var Normalization Utt with speechbrain``
		- ``Boors.get(sahm)``: Get stock information.

		3- mfcc_fea,log_energy = mfcc.LS.Get(wav,sample_rate,le=False) ``Compute MFCC with Librosa - input is numpy array``
		iman.Text
		~~~~~~~~~

		4-mfcc.LS.Normal(MFCC , win_len=150) ``Mean Var Normalization Local 150 left and 150 right``
		- Initializer:

		from iman import AUG
		====================
		1-Add_Noise(data , noise , snr)
		.. code-block:: python

		2-Add_Reverb( data , rir)
		norm = Text.normal("c:\\Replace_List.txt")

		3-Add_NoiseT(data , noise , snr) ``(torchaudio)``
		- ``norm.rep(str)``: Replace text based on normalization rules.
		- ``norm.from_file(filename, file_out=None)``: Normalize text from a file.

		4-Add_ReverbT( data , rir) ``(torchaudio)``
		iman.num2fa
		~~~~~~~~~~~

		5-mp3(fname , fout,sr_out,ratio,ffmpeg_path='c:\\ffmpeg.exe')
		- ``words(number)``: Convert number to Persian words.

		6-speed(fname,fout,ratio,ffmpeg_path='c:\\ffmpeg.exe')
		iman.examples
		~~~~~~~~~~~~~

		7-volume(fname ,fout,ratio,ffmpeg_path='c:\\ffmpeg.exe')
		- ``examples.items``: Get items in the examples folder.
		- ``examples.help(topic)``: Get help on a specific topic.

		from iman.[sad_torch_mfcc \| sad_tf] import *
		===============================================================================
		seg = Segmenter(batch_size, vad_type=['sad'\|'vad'] , sr=8000 , model_path="c:\\sad_model_pytorch.pth" , tq=1,ffmpeg_path='c:\\ffmpeg.exe',complete_output=False , device='cuda',input_type='file') ``TORCH``
		iman.Rar
		~~~~~~~~

		seg = Segmenter(batch_size, vad_type=['sad'\|'vad'] , sr=16000 , model_path="c:\\keras_speech_music_noise_cnn.hdf5",gender_path="c:\\keras_male_female_cnn.hdf5",ffmpeg_path='c:\\ffmpeg.exe',detect_gender=False,complete_output=False,device='cuda',input_type='file') ``TensorFlow``
		- ``rar(fname, out="", rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")``: Create RAR archive.
		- ``zip(fname, out="", rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")``: Create ZIP archive.
		- ``unrar(fname, out="", rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")``: Extract RAR archive.
		- ``unzip(fname, out="", rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")``: Extract ZIP archive.

		isig,wav,mfcc = seg(fname) ``mfcc output Just in torch model``
		iman.Enhance
		~~~~~~~~~~~~

		nmfcc = filter_fea(isig , mfcc , sr , max_time) ``Just in torch model``
		- ``Enhance.Dereverb(pattern, out_fol, sr=16000, batchsize=16, device="cuda", model_path=r"C:\\UVR-DeEcho-DeReverb.pth")``: Dereverberate audio files.
		- ``Enhance.Denoise(pattern, out_fol, sr=16000, batchsize=16, device="cuda", model_path=r"C:\\UVR-DeNoise-Lite.pth")``: Denoise audio files.

		mfcc = MVN(mfcc) ``Just in torch model``
		iman.tf
		~~~~~~~

		isig = filter_output(isig , max_silence ,ignore_small_speech_segments , max_speech_len ,split_speech_bigger_than) ``Do when complete_output=False``
		- ``flops(model)``: Get FLOPs of a TensorFlow model.
		- ``param(model)``: Get parameter count of a TensorFlow model.
		- ``paramp(model)``: Get parameter count and print model layers.
		- ``gpu()``: Return True if GPU is available.
		- ``gpun()``: Return number of GPUs.
		- ``limit()``: Limit GPU memory allocation for TensorFlow models.

		seg2aud(isig , filename)

		seg2json(isig)
		iman.torch
		~~~~~~~~~~

		seg2Gender_Info(isig)
		- ``param(model)``: Get parameter and trainable count of a PyTorch model.
		- ``paramp(model)``: Get parameter count and print model layers.
		- ``layers(model)``: Get layers of a PyTorch model.
		- ``gpu()``: Return True if GPU is available.
		- ``gpun()``: Return number of GPUs.

		seg2Info(isig)
		iman.yt
		~~~~~~~

		wav_speech , wav_noise = filter_sig(isig , wav , sr) ``Get Speech and Noise Parts of file - Do when complete_output=False``
		- ``dl(url)``: Download a YouTube video.
		- ``list_formats(url)``: List available formats for a YouTube link.

		from sad_tf.segmentero import Segmenter ``to use onnx models - need to install onnxruntime``
		iman.svad
		~~~~~~~~~

		from iman.sad_torch_mfcc_speaker import *
		================================================
		seg = Segmenter(batch_size, vad_type=['sad'\|'vad'] , sr=8000 , model_path="c:\\sad_model_pytorch.pth" , max_time=120(sec) , tq=1,ffmpeg_path='c:\\ffmpeg.exe', device='cuda' , pad=False) ``TORCH - max_time in second to split fea output``
		mfcc, len(sec) = seg(fname) ``mfcc pad to max_time length if pad=True``
		- ``segments, wav = svad(filename, sampling_rate=16000, min_speech_duration_ms=250, max_speech_duration_s=float('inf'), min_silence_duration_ms=100)``: Run fast speech activity detection and return speech segments.

		from iman.sad_tf_mlp_speaker import *
		================================================
		seg = Segmenter(batch_size, vad_type=['sad'\|'vad'] , sr=8000 , model_path="sad_tf_mlp.h5" , max_time=120(sec) , tq=1,ffmpeg_path='c:\\ffmpeg.exe', device='cuda' , pad=False) ``Tensorflow (small mlp model) - max_time in second to split fea output``
		mfcc, len(sec) = seg(fname) ``mfcc pad to max_time length if pad=True``
		Dependencies
		------------

		from iman import Report ``Tensorboard Writer``
		==================================================
		r=Report.rep(log_dir=None)
		The ``iman`` package requires the following:

		r.WS(_type , _name , value , itr) ``Add_scalar``
		- Python Packages: ``numpy``, ``torch``, ``tensorflow``, ``speechbrain``, ``librosa``, ``matplotlib``, ``pandas``, ``onnxruntime`` (for ONNX models).
		- External Tools: ``ffmpeg``, ``ffprobe``, ``WinRAR`` (for RAR/ZIP operations).
		- Optional: Pre-trained models (e.g., for VAD, x-vector, dereverberation) specified in function arguments.

		r.WT(_type , _name , _str , itr) ``Add_text``
		Check the package's ``requirements.txt`` for specific versions.

		r.WG(pytorch_model , example_input) ``Add_graph``
		Documentation
		-------------

		r.WI(_type , _name , images , itr) ``Add_image``
		For detailed usage, refer to the source code or use the built-in help system:

		from iman import par
		========================
		if (__name__ == '__main__'):

		res = par.par(files , func , worker=4 , args=[]) ``def func(fname , _args): ...``
		.. code-block:: python

		from iman import Image
		=========================
		Image.convert(fname_pattern ,ext ='jpg',ofolder=None , w=-1 , h=-1,level=100, worker=4,ffmpeg_path='c:\\ffmpeg.exe')
		from iman import examples
		examples.help("Audio") # Get help on the Audio module

		Image.resize(fname_pattern ,ext ='jpg',ofolder=None , w=2 , h=2, worker=4,ffmpeg_path='c:\\ffmpeg.exe') ``resize to 1/h and 1/w``
		Contributing
		------------

		from iman import Boors
		==========================
		Boors.get(sahm) ``get sahm info``
		Contributions are welcome! Submit bug reports, feature requests, or pull requests via the project's GitHub repository (if available). Follow contribution guidelines and include tests for new features.

		from iman import Text
		=====================
		norm = Text.normal("c:\\Replace_List.txt")
		License
		-------

		norm.rep(str)
		``iman`` is licensed under the MIT License (assumed). See the LICENSE file for details.

		norm.from_file(filename ,file_out=None)
		Contact
		-------

		from iman.num2fa import words
		=============================
		words(number)
		For support, contact the maintainers via the project's GitHub page or email (if provided).

		from iman import examples
		==========================
		examples.items ``get items in examples folder``
		.. note::

		examples.help(topic)

		from iman import Rar
		====================
		1-rar(fname , out="" , rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")

		2-zip(fname , out="" , rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")

		3-unrar(fname , out="" , rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")

		4-unzip(fname , out="" , rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")

		from iman import Enhance
		=========================
		1-Enhance.Dereverb(pattern , out_fol , sr = 16000, batchsize=16 , device="cuda" ,model_path=r"C:\\UVR-DeEcho-DeReverb.pth")

		2-Enhance.Denoise(pattern , out_fol , sr = 16000, batchsize=16 , device="cuda" ,model_path=r"C:\UVR-DeNoise-Lite.pth")

		from iman.tf import *
		=====================
		1-flops(model) ``get flops of tf model``

		2-param(model) ``return parameter number of tf model``

		3-paramp(model) ``return parameter number of tf model and print model layers``

		4-gpu() ``return True if available``

		5-gpun() ``return number of gpus``

		6-limit() ``Tf model only allocate as much GPU memory based on runtime allocations``

		from iman.torch import *
		========================
		1-param(model) ``return parameter number and trainable number of torch model``

		2-paramp(model) ``return parameter number of torch model and print model layers``

		3-layers(model) ``return layers of torch model``

		4-gpu() ``return True if available``

		5-gpun() ``return number of gpus``

		from iman.yt import *
		========================
		1-dl(url) ``Download youtube link``

		2-list_formats(url) ``return all available formats for yt link``

		Some functions require external tools (e.g., ``ffmpeg``, ``WinRAR``) or pre-trained models. Ensure these are configured correctly. Paths like ``c:\\ffmpeg.exe`` are Windows-specific; adjust for other operating systems.

+1

-1

setup.py

		@@ -11,3 +11,3 @@ import os
		name="iman",
		version='1.0.28',
		version='2.0',
		author="Iman Sarraf",
		@@ -14,0 +14,0 @@ author_email="imansarraf@gmail.com",

iman - pypi Package Compare versions

Improved metrics