You're Invited:Meet the Socket Team at RSAC and BSidesSF 2026, March 23–26.RSVP
Socket
Book a DemoSign in
Socket

iman

Package Overview
Dependencies
Maintainers
1
Versions
131
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

iman - pypi Package Compare versions

Comparing version
1.0.28
to
2.0
+25
iman/svad/__init__.py
from .model import load_silero_vad
vad_model = load_silero_vad()
print("silero_vad jit model Loaded")
from .utils_vad import (get_speech_timestamps,
save_audio,
read_audio,
VADIterator,
collect_chunks,
drop_chunks)
def svad(filename , sampling_rate=16000 , min_speech_duration_ms=250 , max_speech_duration_s=float('inf'),min_silence_duration_ms=100):
wav = read_audio(filename)
speech_timestamps = get_speech_timestamps(
wav,
vad_model,
return_seconds=True, # Return speech timestamps in seconds (default is samples)
sampling_rate=sampling_rate,
min_speech_duration_ms=min_speech_duration_ms,
max_speech_duration_s=max_speech_duration_s,
min_silence_duration_ms=min_silence_duration_ms,
)
return(speech_timestamps , wav)
from .utils_vad import init_jit_model, OnnxWrapper
import torch
torch.set_num_threads(1)
import os
def load_silero_vad(onnx=False, opset_version=16):
available_ops = [16]
if onnx and opset_version not in available_ops:
raise Exception(f'Available ONNX opset_version: {available_ops}')
if onnx:
if opset_version == 16:
model_name = 'silero_vad.onnx'
else:
model_name = f'silero_vad_16k_op{opset_version}.onnx'
else:
model_name = 'silero_vad.jit'
current_dir = os.path.dirname(__file__)
data_dir = os.path.join(current_dir, "data")
model_file_path = os.path.join(data_dir, model_name)
if not os.path.exists(model_file_path):
raise FileNotFoundError(f"Model file not found: {model_file_path}")
if onnx:
model = OnnxWrapper(str(model_file_path), force_onnx_cpu=True)
else:
model = init_jit_model(model_file_path)
return model
import torch
import torchaudio
from typing import Callable, List
import warnings
languages = ['ru', 'en', 'de', 'es']
class OnnxWrapper():
def __init__(self, path, force_onnx_cpu=False):
import numpy as np
global np
import onnxruntime
opts = onnxruntime.SessionOptions()
opts.inter_op_num_threads = 1
opts.intra_op_num_threads = 1
if force_onnx_cpu and 'CPUExecutionProvider' in onnxruntime.get_available_providers():
self.session = onnxruntime.InferenceSession(path, providers=['CPUExecutionProvider'], sess_options=opts)
else:
self.session = onnxruntime.InferenceSession(path, sess_options=opts)
self.reset_states()
if '16k' in path:
warnings.warn('This model support only 16000 sampling rate!')
self.sample_rates = [16000]
else:
self.sample_rates = [8000, 16000]
def _validate_input(self, x, sr: int):
if x.dim() == 1:
x = x.unsqueeze(0)
if x.dim() > 2:
raise ValueError(f"Too many dimensions for input audio chunk {x.dim()}")
if sr != 16000 and (sr % 16000 == 0):
step = sr // 16000
x = x[:,::step]
sr = 16000
if sr not in self.sample_rates:
raise ValueError(f"Supported sampling rates: {self.sample_rates} (or multiply of 16000)")
if sr / x.shape[1] > 31.25:
raise ValueError("Input audio chunk is too short")
return x, sr
def reset_states(self, batch_size=1):
self._state = torch.zeros((2, batch_size, 128)).float()
self._context = torch.zeros(0)
self._last_sr = 0
self._last_batch_size = 0
def __call__(self, x, sr: int):
x, sr = self._validate_input(x, sr)
num_samples = 512 if sr == 16000 else 256
if x.shape[-1] != num_samples:
raise ValueError(f"Provided number of samples is {x.shape[-1]} (Supported values: 256 for 8000 sample rate, 512 for 16000)")
batch_size = x.shape[0]
context_size = 64 if sr == 16000 else 32
if not self._last_batch_size:
self.reset_states(batch_size)
if (self._last_sr) and (self._last_sr != sr):
self.reset_states(batch_size)
if (self._last_batch_size) and (self._last_batch_size != batch_size):
self.reset_states(batch_size)
if not len(self._context):
self._context = torch.zeros(batch_size, context_size)
x = torch.cat([self._context, x], dim=1)
if sr in [8000, 16000]:
ort_inputs = {'input': x.numpy(), 'state': self._state.numpy(), 'sr': np.array(sr, dtype='int64')}
ort_outs = self.session.run(None, ort_inputs)
out, state = ort_outs
self._state = torch.from_numpy(state)
else:
raise ValueError()
self._context = x[..., -context_size:]
self._last_sr = sr
self._last_batch_size = batch_size
out = torch.from_numpy(out)
return out
def audio_forward(self, x, sr: int):
outs = []
x, sr = self._validate_input(x, sr)
self.reset_states()
num_samples = 512 if sr == 16000 else 256
if x.shape[1] % num_samples:
pad_num = num_samples - (x.shape[1] % num_samples)
x = torch.nn.functional.pad(x, (0, pad_num), 'constant', value=0.0)
for i in range(0, x.shape[1], num_samples):
wavs_batch = x[:, i:i+num_samples]
out_chunk = self.__call__(wavs_batch, sr)
outs.append(out_chunk)
stacked = torch.cat(outs, dim=1)
return stacked.cpu()
class Validator():
def __init__(self, url, force_onnx_cpu):
self.onnx = True if url.endswith('.onnx') else False
torch.hub.download_url_to_file(url, 'inf.model')
if self.onnx:
import onnxruntime
if force_onnx_cpu and 'CPUExecutionProvider' in onnxruntime.get_available_providers():
self.model = onnxruntime.InferenceSession('inf.model', providers=['CPUExecutionProvider'])
else:
self.model = onnxruntime.InferenceSession('inf.model')
else:
self.model = init_jit_model(model_path='inf.model')
def __call__(self, inputs: torch.Tensor):
with torch.no_grad():
if self.onnx:
ort_inputs = {'input': inputs.cpu().numpy()}
outs = self.model.run(None, ort_inputs)
outs = [torch.Tensor(x) for x in outs]
else:
outs = self.model(inputs)
return outs
def read_audio(path: str,
sampling_rate: int = 16000):
list_backends = torchaudio.list_audio_backends()
assert len(list_backends) > 0, 'The list of available backends is empty, please install backend manually. \
\n Recommendations: \n \tSox (UNIX OS) \n \tSoundfile (Windows OS, UNIX OS) \n \tffmpeg (Windows OS, UNIX OS)'
try:
effects = [
['channels', '1'],
['rate', str(sampling_rate)]
]
wav, sr = torchaudio.sox_effects.apply_effects_file(path, effects=effects)
except:
wav, sr = torchaudio.load(path)
if wav.size(0) > 1:
wav = wav.mean(dim=0, keepdim=True)
if sr != sampling_rate:
transform = torchaudio.transforms.Resample(orig_freq=sr,
new_freq=sampling_rate)
wav = transform(wav)
sr = sampling_rate
assert sr == sampling_rate
return wav.squeeze(0)
def save_audio(path: str,
tensor: torch.Tensor,
sampling_rate: int = 16000):
torchaudio.save(path, tensor.unsqueeze(0), sampling_rate, bits_per_sample=16)
def init_jit_model(model_path: str,
device=torch.device('cpu')):
model = torch.jit.load(model_path, map_location=device)
model.eval()
return model
def make_visualization(probs, step):
import pandas as pd
pd.DataFrame({'probs': probs},
index=[x * step for x in range(len(probs))]).plot(figsize=(16, 8),
kind='area', ylim=[0, 1.05], xlim=[0, len(probs) * step],
xlabel='seconds',
ylabel='speech probability',
colormap='tab20')
@torch.no_grad()
def get_speech_timestamps(audio: torch.Tensor,
model,
threshold: float = 0.5,
sampling_rate: int = 16000,
min_speech_duration_ms: int = 250,
max_speech_duration_s: float = float('inf'),
min_silence_duration_ms: int = 100,
speech_pad_ms: int = 30,
return_seconds: bool = False,
time_resolution: int = 1,
visualize_probs: bool = False,
progress_tracking_callback: Callable[[float], None] = None,
neg_threshold: float = None,
window_size_samples: int = 512,
min_silence_at_max_speech: float = 98,
use_max_poss_sil_at_max_speech: bool = True):
"""
This method is used for splitting long audios into speech chunks using silero VAD
Parameters
----------
audio: torch.Tensor, one dimensional
One dimensional float torch.Tensor, other types are casted to torch if possible
model: preloaded .jit/.onnx silero VAD model
threshold: float (default - 0.5)
Speech threshold. Silero VAD outputs speech probabilities for each audio chunk, probabilities ABOVE this value are considered as SPEECH.
It is better to tune this parameter for each dataset separately, but "lazy" 0.5 is pretty good for most datasets.
sampling_rate: int (default - 16000)
Currently silero VAD models support 8000 and 16000 (or multiply of 16000) sample rates
min_speech_duration_ms: int (default - 250 milliseconds)
Final speech chunks shorter min_speech_duration_ms are thrown out
max_speech_duration_s: int (default - inf)
Maximum duration of speech chunks in seconds
Chunks longer than max_speech_duration_s will be split at the timestamp of the last silence that lasts more than 100ms (if any), to prevent agressive cutting.
Otherwise, they will be split aggressively just before max_speech_duration_s.
min_silence_duration_ms: int (default - 100 milliseconds)
In the end of each speech chunk wait for min_silence_duration_ms before separating it
speech_pad_ms: int (default - 30 milliseconds)
Final speech chunks are padded by speech_pad_ms each side
return_seconds: bool (default - False)
whether return timestamps in seconds (default - samples)
time_resolution: bool (default - 1)
time resolution of speech coordinates when requested as seconds
visualize_probs: bool (default - False)
whether draw prob hist or not
progress_tracking_callback: Callable[[float], None] (default - None)
callback function taking progress in percents as an argument
neg_threshold: float (default = threshold - 0.15)
Negative threshold (noise or exit threshold). If model's current state is SPEECH, values BELOW this value are considered as NON-SPEECH.
min_silence_at_max_speech: float (default - 98ms)
Minimum silence duration in ms which is used to avoid abrupt cuts when max_speech_duration_s is reached
use_max_poss_sil_at_max_speech: bool (default - True)
Whether to use the maximum possible silence at max_speech_duration_s or not. If not, the last silence is used.
window_size_samples: int (default - 512 samples)
!!! DEPRECATED, DOES NOTHING !!!
Returns
----------
speeches: list of dicts
list containing ends and beginnings of speech chunks (samples or seconds based on return_seconds)
"""
if not torch.is_tensor(audio):
try:
audio = torch.Tensor(audio)
except:
raise TypeError("Audio cannot be casted to tensor. Cast it manually")
if len(audio.shape) > 1:
for i in range(len(audio.shape)): # trying to squeeze empty dimensions
audio = audio.squeeze(0)
if len(audio.shape) > 1:
raise ValueError("More than one dimension in audio. Are you trying to process audio with 2 channels?")
if sampling_rate > 16000 and (sampling_rate % 16000 == 0):
step = sampling_rate // 16000
sampling_rate = 16000
audio = audio[::step]
warnings.warn('Sampling rate is a multiply of 16000, casting to 16000 manually!')
else:
step = 1
if sampling_rate not in [8000, 16000]:
raise ValueError("Currently silero VAD models support 8000 and 16000 (or multiply of 16000) sample rates")
window_size_samples = 512 if sampling_rate == 16000 else 256
hop_size_samples = int(window_size_samples)
model.reset_states()
min_speech_samples = sampling_rate * min_speech_duration_ms / 1000
speech_pad_samples = sampling_rate * speech_pad_ms / 1000
max_speech_samples = sampling_rate * max_speech_duration_s - window_size_samples - 2 * speech_pad_samples
min_silence_samples = sampling_rate * min_silence_duration_ms / 1000
min_silence_samples_at_max_speech = sampling_rate * min_silence_at_max_speech / 1000
audio_length_samples = len(audio)
speech_probs = []
for current_start_sample in range(0, audio_length_samples, hop_size_samples):
chunk = audio[current_start_sample: current_start_sample + window_size_samples]
if len(chunk) < window_size_samples:
chunk = torch.nn.functional.pad(chunk, (0, int(window_size_samples - len(chunk))))
try:
speech_prob = model(chunk, sampling_rate).item()
except Exception as e:
import ipdb; ipdb.set_trace()
speech_probs.append(speech_prob)
# caculate progress and seng it to callback function
progress = current_start_sample + hop_size_samples
if progress > audio_length_samples:
progress = audio_length_samples
progress_percent = (progress / audio_length_samples) * 100
if progress_tracking_callback:
progress_tracking_callback(progress_percent)
triggered = False
speeches = []
current_speech = {}
if neg_threshold is None:
neg_threshold = max(threshold - 0.15, 0.01)
temp_end = 0 # to save potential segment end (and tolerate some silence)
prev_end = next_start = 0 # to save potential segment limits in case of maximum segment size reached
possible_ends = []
for i, speech_prob in enumerate(speech_probs):
if (speech_prob >= threshold) and temp_end:
if temp_end != 0:
sil_dur = (hop_size_samples * i) - temp_end
if sil_dur > min_silence_samples_at_max_speech:
possible_ends.append((temp_end, sil_dur))
temp_end = 0
if next_start < prev_end:
next_start = hop_size_samples * i
if (speech_prob >= threshold) and not triggered:
triggered = True
current_speech['start'] = hop_size_samples * i
continue
if triggered and (hop_size_samples * i) - current_speech['start'] > max_speech_samples:
if possible_ends:
if use_max_poss_sil_at_max_speech:
prev_end, dur = max(possible_ends, key=lambda x: x[1]) # use the longest possible silence segment in the current speech chunk
else:
prev_end, dur = possible_ends[-1] # use the last possible silence segement
current_speech['end'] = prev_end
speeches.append(current_speech)
current_speech = {}
next_start = prev_end + dur
if next_start < prev_end + hop_size_samples * i: # previously reached silence (< neg_thres) and is still not speech (< thres)
#triggered = False
current_speech['start'] = next_start
else:
triggered = False
#current_speech['start'] = next_start
prev_end = next_start = temp_end = 0
possible_ends = []
else:
current_speech['end'] = hop_size_samples * i
speeches.append(current_speech)
current_speech = {}
prev_end = next_start = temp_end = 0
triggered = False
possible_ends = []
continue
if (speech_prob < neg_threshold) and triggered:
if not temp_end:
temp_end = hop_size_samples * i
# if ((hop_size_samples * i) - temp_end) > min_silence_samples_at_max_speech: # condition to avoid cutting in very short silence
# prev_end = temp_end
if (hop_size_samples * i) - temp_end < min_silence_samples:
continue
else:
current_speech['end'] = temp_end
if (current_speech['end'] - current_speech['start']) > min_speech_samples:
speeches.append(current_speech)
current_speech = {}
prev_end = next_start = temp_end = 0
triggered = False
possible_ends = []
continue
if current_speech and (audio_length_samples - current_speech['start']) > min_speech_samples:
current_speech['end'] = audio_length_samples
speeches.append(current_speech)
for i, speech in enumerate(speeches):
if i == 0:
speech['start'] = int(max(0, speech['start'] - speech_pad_samples))
if i != len(speeches) - 1:
silence_duration = speeches[i+1]['start'] - speech['end']
if silence_duration < 2 * speech_pad_samples:
speech['end'] += int(silence_duration // 2)
speeches[i+1]['start'] = int(max(0, speeches[i+1]['start'] - silence_duration // 2))
else:
speech['end'] = int(min(audio_length_samples, speech['end'] + speech_pad_samples))
speeches[i+1]['start'] = int(max(0, speeches[i+1]['start'] - speech_pad_samples))
else:
speech['end'] = int(min(audio_length_samples, speech['end'] + speech_pad_samples))
if return_seconds:
audio_length_seconds = audio_length_samples / sampling_rate
for speech_dict in speeches:
speech_dict['start'] = max(round(speech_dict['start'] / sampling_rate, time_resolution), 0)
speech_dict['end'] = min(round(speech_dict['end'] / sampling_rate, time_resolution), audio_length_seconds)
elif step > 1:
for speech_dict in speeches:
speech_dict['start'] *= step
speech_dict['end'] *= step
if visualize_probs:
make_visualization(speech_probs, hop_size_samples / sampling_rate)
return speeches
class VADIterator:
def __init__(self,
model,
threshold: float = 0.5,
sampling_rate: int = 16000,
min_silence_duration_ms: int = 100,
speech_pad_ms: int = 30
):
"""
Class for stream imitation
Parameters
----------
model: preloaded .jit/.onnx silero VAD model
threshold: float (default - 0.5)
Speech threshold. Silero VAD outputs speech probabilities for each audio chunk, probabilities ABOVE this value are considered as SPEECH.
It is better to tune this parameter for each dataset separately, but "lazy" 0.5 is pretty good for most datasets.
sampling_rate: int (default - 16000)
Currently silero VAD models support 8000 and 16000 sample rates
min_silence_duration_ms: int (default - 100 milliseconds)
In the end of each speech chunk wait for min_silence_duration_ms before separating it
speech_pad_ms: int (default - 30 milliseconds)
Final speech chunks are padded by speech_pad_ms each side
"""
self.model = model
self.threshold = threshold
self.sampling_rate = sampling_rate
if sampling_rate not in [8000, 16000]:
raise ValueError('VADIterator does not support sampling rates other than [8000, 16000]')
self.min_silence_samples = sampling_rate * min_silence_duration_ms / 1000
self.speech_pad_samples = sampling_rate * speech_pad_ms / 1000
self.reset_states()
def reset_states(self):
self.model.reset_states()
self.triggered = False
self.temp_end = 0
self.current_sample = 0
@torch.no_grad()
def __call__(self, x, return_seconds=False, time_resolution: int = 1):
"""
x: torch.Tensor
audio chunk (see examples in repo)
return_seconds: bool (default - False)
whether return timestamps in seconds (default - samples)
time_resolution: int (default - 1)
time resolution of speech coordinates when requested as seconds
"""
if not torch.is_tensor(x):
try:
x = torch.Tensor(x)
except:
raise TypeError("Audio cannot be casted to tensor. Cast it manually")
window_size_samples = len(x[0]) if x.dim() == 2 else len(x)
self.current_sample += window_size_samples
speech_prob = self.model(x, self.sampling_rate).item()
if (speech_prob >= self.threshold) and self.temp_end:
self.temp_end = 0
if (speech_prob >= self.threshold) and not self.triggered:
self.triggered = True
speech_start = max(0, self.current_sample - self.speech_pad_samples - window_size_samples)
return {'start': int(speech_start) if not return_seconds else round(speech_start / self.sampling_rate, time_resolution)}
if (speech_prob < self.threshold - 0.15) and self.triggered:
if not self.temp_end:
self.temp_end = self.current_sample
if self.current_sample - self.temp_end < self.min_silence_samples:
return None
else:
speech_end = self.temp_end + self.speech_pad_samples - window_size_samples
self.temp_end = 0
self.triggered = False
return {'end': int(speech_end) if not return_seconds else round(speech_end / self.sampling_rate, time_resolution)}
return None
def collect_chunks(tss: List[dict],
wav: torch.Tensor,
seconds: bool = False,
sampling_rate: int = None) -> torch.Tensor:
"""Collect audio chunks from a longer audio clip
This method extracts audio chunks from an audio clip, using a list of
provided coordinates, and concatenates them together. Coordinates can be
passed either as sample numbers or in seconds, in which case the audio
sampling rate is also needed.
Parameters
----------
tss: List[dict]
Coordinate list of the clips to collect from the audio.
wav: torch.Tensor, one dimensional
One dimensional float torch.Tensor, containing the audio to clip.
seconds: bool (default - False)
Whether input coordinates are passed as seconds or samples.
sampling_rate: int (default - None)
Input audio sampling rate. Required if seconds is True.
Returns
-------
torch.Tensor, one dimensional
One dimensional float torch.Tensor of the concatenated clipped audio
chunks.
Raises
------
ValueError
Raised if sampling_rate is not provided when seconds is True.
"""
if seconds and not sampling_rate:
raise ValueError('sampling_rate must be provided when seconds is True')
chunks = list()
_tss = _seconds_to_samples_tss(tss, sampling_rate) if seconds else tss
for i in _tss:
chunks.append(wav[i['start']:i['end']])
return torch.cat(chunks)
def drop_chunks(tss: List[dict],
wav: torch.Tensor,
seconds: bool = False,
sampling_rate: int = None) -> torch.Tensor:
"""Drop audio chunks from a longer audio clip
This method extracts audio chunks from an audio clip, using a list of
provided coordinates, and drops them. Coordinates can be passed either as
sample numbers or in seconds, in which case the audio sampling rate is also
needed.
Parameters
----------
tss: List[dict]
Coordinate list of the clips to drop from from the audio.
wav: torch.Tensor, one dimensional
One dimensional float torch.Tensor, containing the audio to clip.
seconds: bool (default - False)
Whether input coordinates are passed as seconds or samples.
sampling_rate: int (default - None)
Input audio sampling rate. Required if seconds is True.
Returns
-------
torch.Tensor, one dimensional
One dimensional float torch.Tensor of the input audio minus the dropped
chunks.
Raises
------
ValueError
Raised if sampling_rate is not provided when seconds is True.
"""
if seconds and not sampling_rate:
raise ValueError('sampling_rate must be provided when seconds is True')
chunks = list()
cur_start = 0
_tss = _seconds_to_samples_tss(tss, sampling_rate) if seconds else tss
for i in _tss:
chunks.append((wav[cur_start: i['start']]))
cur_start = i['end']
return torch.cat(chunks)
def _seconds_to_samples_tss(tss: List[dict], sampling_rate: int) -> List[dict]:
"""Convert coordinates expressed in seconds to sample coordinates.
"""
return [{
'start': round(crd['start']) * sampling_rate,
'end': round(crd['end']) * sampling_rate
} for crd in tss]
+272
-210
Metadata-Version: 2.1
Name: iman
Version: 1.0.28
Version: 2.0
Summary: Python package for daily Tasks

@@ -12,328 +12,390 @@ Author: Iman Sarraf

from iman import *
==================
iman
====
1-plt
Overview
--------
2-now() ``get time``
``iman`` is a comprehensive Python package offering a wide array of utilities for audio processing, file manipulation, machine learning, system operations, web utilities, and more. It provides tools for tasks such as audio feature extraction, voice activity detection, file I/O, system monitoring, and integration with frameworks like PyTorch and TensorFlow. The package is organized into multiple submodules, each designed for specific functionalities, as detailed below.
3-F ``format floating point``
Installation
------------
4-D ``format int number``
Install ``iman`` via pip:
5-Write_List(MyList,Filename)
.. code-block:: bash
6-Write_Dic(MyDic,Filename)
pip install iman
7-Read(Filename) ``read txt file``
Ensure dependencies like ``numpy``, ``torch``, ``tensorflow``, ``speechbrain``, ``librosa``, ``matplotlib``, ``pandas``, and external tools like ``ffmpeg``, ``ffprobe``, and ``WinRAR`` are installed. Some functions require pre-trained models or specific paths (e.g., model files, ``ffmpeg_path``).
8-Read_Lines(Filename) ``read txt file line by line and return list``
Usage
-----
9-Write(_str,Filename)
Below are examples of key functionalities from the ``iman`` package. For detailed function signatures and parameters, refer to the sections below or use the built-in help system:
10-gf(pattern) ``Get files in a directory``
.. code-block:: python
11-gfa(directory_pattern , ext="*.*") ``Get Files in a Directory and SubDirectories``
from iman import examples
examples.help("Audio") # Get help on a specific module
12-ReadE(Filename) ``Read Excel files``
**Example: Audio Processing**
13-PM(dir) ``creat directory``
.. code-block:: python
14-PB(fname) ``get basename``
from iman import Audio
15-PN(fname) ``get file name``
# Read a WAV file
data, sr = Audio.Read("audio.wav", sr=16000, start_from=0, dur=None, mono=True, ffmpeg_path="c:\\ffmpeg.exe", ffprobe_path="c:\\ffprobe.exe")
16-PE(fname) ``get ext``
# Resample and write audio
resampled = Audio.Resample(data, fs=sr, sr=8000)
Audio.Write("output.wav", resampled, fs=8000)
17-PD(fname) ``get directory``
**Example: File Operations**
18-PS(fname) ``get size``
.. code-block:: python
19-PJ(segments) ``Join Path``
from iman import *
20-clear() ``clear cmd``
# Get files matching a pattern
files = gf("*.txt")
21-os
# Write a dictionary to a file
my_dict = {"key1": "value1", "key2": "value2"}
Write_Dic(my_dict, "output.txt")
22-np
**Example: VAD with Segmenter**
23-RI(start_int , end_int , count=1) ``random int``
.. code-block:: python
24-RF(start_float , end_float , count=1) ``random float``
from iman.sad_torch_mfcc import Segmenter
25-RS(Arr) ``shuffle``
seg = Segmenter(batch_size=32, vad_type="vad", sr=8000, model_path="c:\\sad_model_pytorch.pth", tq=1, ffmpeg_path="c:\\ffmpeg.exe", complete_output=False, device="cuda", input_type="file")
isig, wav, mfcc = seg("audio.wav")
26-LJ(job_file_name)
Modules and Functions
---------------------
27-SJ(value , job_file_name)
The ``iman`` package is organized into several submodules, each with specific functions. Below is a complete list of modules and their functions as provided.
28-LN(np_file_name)
iman
~~~~
29-SN(arr , np_file_name)
- ``plt``: Matplotlib plotting library.
- ``now()``: Get current time.
- ``F``: Format floating-point number.
- ``D``: Format integer number.
- ``Write_List(MyList, Filename)``: Write a list to a text file.
- ``Write_Dic(MyDic, Filename)``: Write a dictionary to a text file.
- ``Read(Filename)``: Read a text file.
- ``Read_Lines(Filename)``: Read a text file line by line and return a list.
- ``Write(_str, Filename)``: Write a string to a text file.
- ``gf(pattern)``: Get files in a directory matching a pattern.
- ``gfa(directory_pattern, ext="*.*")``: Get files in a directory and subdirectories.
- ``ReadE(Filename)``: Read Excel files.
- ``PM(dir)``: Create a directory.
- ``PB(fname)``: Get basename of a file.
- ``PN(fname)``: Get filename without path.
- ``PE(fname)``: Get file extension.
- ``PD(fname)``: Get directory of a file.
- ``PS(fname)``: Get file size.
- ``PJ(segments)``: Join path segments.
- ``clear()``: Clear command-line interface.
- ``os``: Python os module.
- ``np``: NumPy module.
- ``RI(start_int, end_int, count=1)``: Generate random integers.
- ``RF(start_float, end_float, count=1)``: Generate random floats.
- ``RS(Arr)``: Shuffle an array.
- ``LJ(job_file_name)``: Load job file (details not specified).
- ``SJ(value, job_file_name)``: Save job file (details not specified).
- ``LN(np_file_name)``: Load NumPy file (details not specified).
- ``SN(arr, np_file_name)``: Save NumPy array to file.
- ``cmd(command, redirect=True)``: Run a command in CMD.
- ``PX(fname)``: Check existence of a file.
- ``RC(Arr, size=1)``: Random choice from an array.
- ``onehot(data, nb_classes)``: Convert data to one-hot encoding.
- ``exe(pyfile)``: Convert Python file to executable (requires PyInstaller).
- ``FWL(wavfolder, sr)``: Get total audio length in a folder.
- ``norm(vector)``: Normalize a vector (vector/magnitude(vector)).
- ``delete(pattern)``: Delete files matching a pattern.
- ``rename(fname, fout)``: Rename a file.
- ``separate(pattern, folout)``: Separate vocal from music.
- ``dll(fname)``: Create a .pyd file from a Python file.
- ``get_hard_serial()``: Get hardware serial number.
- ``mute_mic()``: Toggle microphone on/off.
- ``PA(fname)``: Get absolute path of a file.
30-cmd(command , redirect=True) ``Run command in CMD``
iman.Audio
~~~~~~~~~~
31-PX(fname) ``check existance of file``
- ``Read(filename, sr, start_from, dur, mono, ffmpeg_path, ffprobe_path)``: Read WAV, ALAW, MP3, and other audio formats.
- ``Resample(data, fs, sr)``: Resample audio data.
- ``Write(filename, data, fs)``: Write audio data to a file.
- ``frame(y)``: Frame audio data (details not specified).
- ``split(y)``: Split audio data (details not specified).
- ``ReadT(filename, sr, mono=True)``: Read and resample WAV file with torchaudio.
- ``VAD(y, top_db=40, frame_length=200, hop_length=80)``: Voice activity detection.
- ``compress(fname_pattern, sr=16000, ext='mp3', mono=True, ffmpeg_path='c:\\ffmpeg.exe', ofolder=None, worker=4)``: Compress audio files.
- ``clip_value(wav)``: Return clipping percentage in an audio file.
- ``WriteS(filename, data, fs)``: Convert and write audio to stereo.
32-RC(Arr , size=1) ``Random Choice``
iman.info
~~~~~~~~~
33-onehot(data, nb_classes)
- ``get()``: Get information about CPU and GPU (requires torch).
- ``cpu()``: Get CPU percentage usage.
- ``gpu()``: Get GPU memory usage.
- ``memory()``: Get RAM usage in GB.
- ``plot(fname="log.txt", delay=1)``: Plot system metrics from a log file.
34-exe(pyfile) ``need pyinstaller``
iman.metrics
~~~~~~~~~~~~
35-FWL(wavfolder , sr) ``Get Folder Audio Length``
- ``EER(lab, score)``: Compute Equal Error Rate.
- ``cosine_distance(v1, v2)``: Compute cosine distance between two vectors.
- ``roc(lab, score)``: Compute ROC curve.
- ``wer(ref, hyp)``: Compute Word Error Rate.
- ``cer(ref, hyp)``: Compute Character Error Rate.
- ``wer_list(ref_list, hyp_list)``: Compute WER for lists.
- ``cer_list(ref_list, hyp_list)``: Compute CER for lists.
- ``DER(ref_list, res_list, file_dur=-1, sr=8000)``: Compute Detection Error Rate.
36-norm(vector) ``vector/magnitude(vector)``
iman.tsne
~~~~~~~~~
37-delete(pattern)
- ``plot(fea, label)``: Plot t-SNE visualization of features.
38-rename(fname , fout)
iman.xvector
~~~~~~~~~~~~
39-separate(pattern,folout) ``separate vocal from music``
- ``xvec, lda_xvec, gender = get(filename, model(model_path, model_name, model_speaker_num))``: Extract x-vectors for speaker recognition.
40-dll(fname) ``create a pyd file from py file``
iman.web
~~~~~~~~
41-get_hard_serial()
- ``change_wallpaper()``: Change system wallpaper.
- ``dl(url)``: Download a file from a URL.
- ``links(url, filter_text=None)``: Extract links from a URL.
- ``imgs(url, filter_text=None)``: Extract images from a URL.
42-mute_mic() ``on and off microphone``
iman.matlab
~~~~~~~~~~~
43-PA(fname) ``get abs path``
- ``np2mat(param, mat_file_name)``: Convert NumPy array to MATLAB file.
- ``dic2mat(param, mat_file_name)``: Convert dictionary to MATLAB file.
- ``mat2dic(mat_file_name)``: Convert MATLAB file to dictionary.
from iman import Audio
======================
1-Read(filename,sr,start_from,dur,mono,ffmpeg_path,ffprobe_path) ``Read wav alaw and mp3 and others``
iman.Features
~~~~~~~~~~~~~
2-Resample(data , fs, sr)
- ``mfcc_fea, mspec, log_energy = mfcc.SB.Get(wav, sample_rate)``: Compute MFCC with SpeechBrain (input must be read with torchaudio).
- ``mfcc.SB.Normal(MFCC)``: Mean-variance normalization of MFCC with SpeechBrain.
- ``mfcc_fea, log_energy = mfcc.LS.Get(wav, sample_rate, le=False)``: Compute MFCC with Librosa (input is NumPy array).
- ``mfcc.LS.Normal(MFCC, win_len=150)``: Mean-variance normalization (local, 150 frames left and right).
3-Write(filename, data ,fs)
iman.AUG
~~~~~~~~
4-frame(y)
- ``Add_Noise(data, noise, snr)``: Add noise to audio data.
- ``Add_Reverb(data, rir)``: Add reverberation to audio data.
- ``Add_NoiseT(data, noise, snr)``: Add noise using torchaudio.
- ``Add_ReverbT(data, rir)``: Add reverberation using torchaudio.
- ``mp3(fname, fout, sr_out, ratio, ffmpeg_path='c:\\ffmpeg.exe')``: Convert to MP3.
- ``speed(fname, fout, ratio, ffmpeg_path='c:\\ffmpeg.exe')``: Change audio speed.
- ``volume(fname, fout, ratio, ffmpeg_path='c:\\ffmpeg.exe')``: Adjust audio volume.
5-split(y)
iman.sad_torch_mfcc | iman.sad_tf
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6-ReadT(filename, sr , mono=True) ``Read and resample wav file with torchaudio``
- **Initializer** (PyTorch):
7-VAD(y,top_db=40, frame_length=200, hop_length=80)
.. code-block:: python
8-compress(fname_pattern , sr=16000 , ext='mp3' , mono=True ,ffmpeg_path='c:\\ffmpeg.exe' , ofolder=None, worker=4)
seg = Segmenter(batch_size, vad_type=['sad'|'vad'], sr=8000, model_path="c:\\sad_model_pytorch.pth", tq=1, ffmpeg_path='c:\\ffmpeg.exe', complete_output=False, device='cuda', input_type='file')
9-clip_value(wav) ``return clipping percentage in audio file``
- **Initializer** (TensorFlow):
10-WriteS(filename, data ,fs) ``Convert to Sterio``
.. code-block:: python
from iman import info
=====================
seg = Segmenter(batch_size, vad_type=['sad'|'vad'], sr=16000, model_path="c:\\keras_speech_music_noise_cnn.hdf5", gender_path="c:\\keras_male_female_cnn.hdf5", ffmpeg_path='c:\\ffmpeg.exe', detect_gender=False, complete_output=False, device='cuda', input_type='file')
1-get() info about cpu and gpu ``need torch``
- ``isig, wav, mfcc = seg(fname)``: Process audio file (MFCC output only in PyTorch model).
- ``nmfcc = filter_fea(isig, mfcc, sr, max_time)``: Filter features (PyTorch only).
- ``mfcc = MVN(mfcc)``: Mean-variance normalization (PyTorch only).
- ``isig = filter_output(isig, max_silence, ignore_small_speech_segments, max_speech_len, split_speech_bigger_than)``: Filter output when ``complete_output=False``.
- ``seg2aud(isig, filename)``: Convert segments to audio.
- ``seg2json(isig)``: Convert segments to JSON.
- ``seg2Gender_Info(isig)``: Extract gender information from segments.
- ``seg2Info(isig)``: Extract segment information.
- ``wav_speech, wav_noise = filter_sig(isig, wav, sr)``: Get speech and noise parts (when ``complete_output=False``).
2-cpu() ``get cpu percentage usage``
- **sad_tf.segmentero**:
3-gpu() ``get gpu memory usage``
.. code-block:: python
4-memory() ``get ram usage GB``
from sad_tf.segmentero import Segmenter # Use ONNX models (requires onnxruntime)
5-plot(fname="log.txt" , delay=1)
iman.sad_torch_mfcc_speaker
~~~~~~~~~~~~~~~~~~~~~~~~~~~
- **Initializer**:
from iman import metrics
========================
1-EER(lab,score)
.. code-block:: python
2-cosine_distance(v1,v2)
seg = Segmenter(batch_size, vad_type=['sad'|'vad'], sr=8000, model_path="c:\\sad_model_pytorch.pth", max_time=120, tq=1, ffmpeg_path='c:\\ffmpeg.exe', device='cuda', pad=False)
3-roc(lab,score)
- ``mfcc, len(sec) = seg(fname)``: Process audio file, MFCC padded to ``max_time`` if ``pad=True``.
4-wer(ref, hyp)
iman.sad_tf_mlp_speaker
~~~~~~~~~~~~~~~~~~~~~~~
5-cer(ref, hyp)
- **Initializer**:
6-wer_list(ref_list , hyp_list)
.. code-block:: python
7-cer_list(ref_list , hyp_list)
seg = Segmenter(batch_size, vad_type=['sad'|'vad'], sr=8000, model_path="sad_tf_mlp.h5", max_time=120, tq=1, ffmpeg_path='c:\\ffmpeg.exe', device='cuda', pad=False)
8-DER(ref_list , res_list , file_dur=-1 , sr=8000) ``Detection Error Rate``
- ``mfcc, len(sec) = seg(fname)``: Process audio file, MFCC padded to ``max_time`` if ``pad=True``.
from iman import tsne
=====================
iman.Report
~~~~~~~~~~~
1-plot(fea , label)
- **Initializer**:
from iman import xvector
========================
1-xvec,lda_xvec,gender = get(filename , model(model_path , model_name , model_speaker_num))
.. code-block:: python
r = Report.rep(log_dir=None)
from iman import web
====================
1-change_wallpaper()
- ``WS(_type, _name, value, itr)``: Add scalar to TensorBoard.
- ``WT(_type, _name, _str, itr)``: Add text to TensorBoard.
- ``WG(pytorch_model, example_input)``: Add graph to TensorBoard.
- ``WI(_type, _name, images, itr)``: Add image to TensorBoard.
2-dl(url)
iman.par
~~~~~~~~
3-links(url , filter_text=None)
- **Parallel Processing**:
4-imgs(url , filter_text=None)
.. code-block:: python
from iman import matlab
=======================
1-np2mat(param , mat_file_name)
if __name__ == '__main__':
res = par.par(files, func, worker=4, args=[]) # func defined as: def func(fname, _args): ...
2-dic2mat(param , mat_file_name)
iman.Image
~~~~~~~~~~
3-mat2dic (mat_file_name)
- ``Image.convert(fname_pattern, ext='jpg', ofolder=None, w=-1, h=-1, level=100, worker=4, ffmpeg_path='c:\\ffmpeg.exe')``: Convert images to specified format.
- ``Image.resize(fname_pattern, ext='jpg', ofolder=None, w=2, h=2, worker=4, ffmpeg_path='c:\\ffmpeg.exe')``: Resize images to 1/w and 1/h.
from iman import Features
=========================
1- mfcc_fea,mspec,log_energy = mfcc.SB.Get(wav,sample_rate) ``Compute MFCC with speechbrain - input must read with torchaudio``
iman.Boors
~~~~~~~~~~
2-mfcc.SB.Normal(MFCC) ``Mean Var Normalization Utt with speechbrain``
- ``Boors.get(sahm)``: Get stock information.
3- mfcc_fea,log_energy = mfcc.LS.Get(wav,sample_rate,le=False) ``Compute MFCC with Librosa - input is numpy array``
iman.Text
~~~~~~~~~
4-mfcc.LS.Normal(MFCC , win_len=150) ``Mean Var Normalization Local 150 left and 150 right``
- **Initializer**:
from iman import AUG
====================
1-Add_Noise(data , noise , snr)
.. code-block:: python
2-Add_Reverb( data , rir)
norm = Text.normal("c:\\Replace_List.txt")
3-Add_NoiseT(data , noise , snr) ``(torchaudio)``
- ``norm.rep(str)``: Replace text based on normalization rules.
- ``norm.from_file(filename, file_out=None)``: Normalize text from a file.
4-Add_ReverbT( data , rir) ``(torchaudio)``
iman.num2fa
~~~~~~~~~~~
5-mp3(fname , fout,sr_out,ratio,ffmpeg_path='c:\\ffmpeg.exe')
- ``words(number)``: Convert number to Persian words.
6-speed(fname,fout,ratio,ffmpeg_path='c:\\ffmpeg.exe')
iman.examples
~~~~~~~~~~~~~
7-volume(fname ,fout,ratio,ffmpeg_path='c:\\ffmpeg.exe')
- ``examples.items``: Get items in the examples folder.
- ``examples.help(topic)``: Get help on a specific topic.
from iman.[sad_torch_mfcc | sad_tf] import *
===============================================================================
seg = Segmenter(batch_size, vad_type=['sad'|'vad'] , sr=8000 , model_path="c:\\sad_model_pytorch.pth" , tq=1,ffmpeg_path='c:\\ffmpeg.exe',complete_output=False , device='cuda',input_type='file') ``TORCH``
iman.Rar
~~~~~~~~
seg = Segmenter(batch_size, vad_type=['sad'|'vad'] , sr=16000 , model_path="c:\\keras_speech_music_noise_cnn.hdf5",gender_path="c:\\keras_male_female_cnn.hdf5",ffmpeg_path='c:\\ffmpeg.exe',detect_gender=False,complete_output=False,device='cuda',input_type='file') ``TensorFlow``
- ``rar(fname, out="", rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")``: Create RAR archive.
- ``zip(fname, out="", rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")``: Create ZIP archive.
- ``unrar(fname, out="", rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")``: Extract RAR archive.
- ``unzip(fname, out="", rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")``: Extract ZIP archive.
isig,wav,mfcc = seg(fname) ``mfcc output Just in torch model``
iman.Enhance
~~~~~~~~~~~~
nmfcc = filter_fea(isig , mfcc , sr , max_time) ``Just in torch model``
- ``Enhance.Dereverb(pattern, out_fol, sr=16000, batchsize=16, device="cuda", model_path=r"C:\\UVR-DeEcho-DeReverb.pth")``: Dereverberate audio files.
- ``Enhance.Denoise(pattern, out_fol, sr=16000, batchsize=16, device="cuda", model_path=r"C:\\UVR-DeNoise-Lite.pth")``: Denoise audio files.
mfcc = MVN(mfcc) ``Just in torch model``
iman.tf
~~~~~~~
isig = filter_output(isig , max_silence ,ignore_small_speech_segments , max_speech_len ,split_speech_bigger_than) ``Do when complete_output=False``
- ``flops(model)``: Get FLOPs of a TensorFlow model.
- ``param(model)``: Get parameter count of a TensorFlow model.
- ``paramp(model)``: Get parameter count and print model layers.
- ``gpu()``: Return True if GPU is available.
- ``gpun()``: Return number of GPUs.
- ``limit()``: Limit GPU memory allocation for TensorFlow models.
seg2aud(isig , filename)
seg2json(isig)
iman.torch
~~~~~~~~~~
seg2Gender_Info(isig)
- ``param(model)``: Get parameter and trainable count of a PyTorch model.
- ``paramp(model)``: Get parameter count and print model layers.
- ``layers(model)``: Get layers of a PyTorch model.
- ``gpu()``: Return True if GPU is available.
- ``gpun()``: Return number of GPUs.
seg2Info(isig)
iman.yt
~~~~~~~
wav_speech , wav_noise = filter_sig(isig , wav , sr) ``Get Speech and Noise Parts of file - Do when complete_output=False``
- ``dl(url)``: Download a YouTube video.
- ``list_formats(url)``: List available formats for a YouTube link.
from sad_tf.segmentero import Segmenter ``to use onnx models - need to install onnxruntime``
iman.svad
~~~~~~~~~
from iman.sad_torch_mfcc_speaker import *
================================================
seg = Segmenter(batch_size, vad_type=['sad'|'vad'] , sr=8000 , model_path="c:\\sad_model_pytorch.pth" , max_time=120(sec) , tq=1,ffmpeg_path='c:\\ffmpeg.exe', device='cuda' , pad=False) ``TORCH - max_time in second to split fea output``
mfcc, len(sec) = seg(fname) ``mfcc pad to max_time length if pad=True``
- ``segments, wav = svad(filename, sampling_rate=16000, min_speech_duration_ms=250, max_speech_duration_s=float('inf'), min_silence_duration_ms=100)``: Run fast speech activity detection and return speech segments.
from iman.sad_tf_mlp_speaker import *
================================================
seg = Segmenter(batch_size, vad_type=['sad'|'vad'] , sr=8000 , model_path="sad_tf_mlp.h5" , max_time=120(sec) , tq=1,ffmpeg_path='c:\\ffmpeg.exe', device='cuda' , pad=False) ``Tensorflow (small mlp model) - max_time in second to split fea output``
mfcc, len(sec) = seg(fname) ``mfcc pad to max_time length if pad=True``
Dependencies
------------
from iman import Report ``Tensorboard Writer``
==================================================
r=Report.rep(log_dir=None)
The ``iman`` package requires the following:
r.WS(_type , _name , value , itr) ``Add_scalar``
- **Python Packages**: ``numpy``, ``torch``, ``tensorflow``, ``speechbrain``, ``librosa``, ``matplotlib``, ``pandas``, ``onnxruntime`` (for ONNX models).
- **External Tools**: ``ffmpeg``, ``ffprobe``, ``WinRAR`` (for RAR/ZIP operations).
- **Optional**: Pre-trained models (e.g., for VAD, x-vector, dereverberation) specified in function arguments.
r.WT(_type , _name , _str , itr) ``Add_text``
Check the package's ``requirements.txt`` for specific versions.
r.WG(pytorch_model , example_input) ``Add_graph``
Documentation
-------------
r.WI(_type , _name , images , itr) ``Add_image``
For detailed usage, refer to the source code or use the built-in help system:
from iman import par
========================
if (__name__ == '__main__'):
res = par.par(files , func , worker=4 , args=[]) ``def func(fname , _args): ...``
.. code-block:: python
from iman import Image
=========================
Image.convert(fname_pattern ,ext ='jpg',ofolder=None , w=-1 , h=-1,level=100, worker=4,ffmpeg_path='c:\\ffmpeg.exe')
from iman import examples
examples.help("Audio") # Get help on the Audio module
Image.resize(fname_pattern ,ext ='jpg',ofolder=None , w=2 , h=2, worker=4,ffmpeg_path='c:\\ffmpeg.exe') ``resize to 1/h and 1/w``
Contributing
------------
from iman import Boors
==========================
Boors.get(sahm) ``get sahm info``
Contributions are welcome! Submit bug reports, feature requests, or pull requests via the project's GitHub repository (if available). Follow contribution guidelines and include tests for new features.
from iman import Text
=====================
norm = Text.normal("c:\\Replace_List.txt")
License
-------
norm.rep(str)
``iman`` is licensed under the MIT License (assumed). See the LICENSE file for details.
norm.from_file(filename ,file_out=None)
Contact
-------
from iman.num2fa import words
=============================
words(number)
For support, contact the maintainers via the project's GitHub page or email (if provided).
from iman import examples
==========================
examples.items ``get items in examples folder``
.. note::
examples.help(topic)
from iman import Rar
====================
1-rar(fname , out="" , rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")
2-zip(fname , out="" , rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")
3-unrar(fname , out="" , rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")
4-unzip(fname , out="" , rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")
from iman import Enhance
=========================
1-Enhance.Dereverb(pattern , out_fol , sr = 16000, batchsize=16 , device="cuda" ,model_path=r"C:\\UVR-DeEcho-DeReverb.pth")
2-Enhance.Denoise(pattern , out_fol , sr = 16000, batchsize=16 , device="cuda" ,model_path=r"C:\UVR-DeNoise-Lite.pth")
from iman.tf import *
=====================
1-flops(model) ``get flops of tf model``
2-param(model) ``return parameter number of tf model``
3-paramp(model) ``return parameter number of tf model and print model layers``
4-gpu() ``return True if available``
5-gpun() ``return number of gpus``
6-limit() ``Tf model only allocate as much GPU memory based on runtime allocations``
from iman.torch import *
========================
1-param(model) ``return parameter number and trainable number of torch model``
2-paramp(model) ``return parameter number of torch model and print model layers``
3-layers(model) ``return layers of torch model``
4-gpu() ``return True if available``
5-gpun() ``return number of gpus``
from iman.yt import *
========================
1-dl(url) ``Download youtube link``
2-list_formats(url) ``return all available formats for yt link``
Some functions require external tools (e.g., ``ffmpeg``, ``WinRAR``) or pre-trained models. Ensure these are configured correctly. Paths like ``c:\\ffmpeg.exe`` are Windows-specific; adjust for other operating systems.

@@ -111,2 +111,6 @@ README.rst

iman/sad_torch_mfcc_speaker/viterbi.py
iman/sad_torch_mfcc_speaker/viterbi_utils.py
iman/sad_torch_mfcc_speaker/viterbi_utils.py
iman/svad/__init__.py
iman/svad/model.py
iman/svad/utils_vad.py
iman/svad/data/__init__.py

@@ -186,3 +186,3 @@ import matplotlib.pyplot as plt

def separate(pattern,folout=None,model_path_folder=None): #model_path_folder contain .th model of Dmucs
def separate(pattern,folout=None,model_path_folder=None): #model_path_folder contain .th model of Dmucs pip install demucs
files = gf(pattern)

@@ -189,0 +189,0 @@ for fname in files:

+272
-210
Metadata-Version: 2.1
Name: iman
Version: 1.0.28
Version: 2.0
Summary: Python package for daily Tasks

@@ -12,328 +12,390 @@ Author: Iman Sarraf

from iman import *
==================
iman
====
1-plt
Overview
--------
2-now() ``get time``
``iman`` is a comprehensive Python package offering a wide array of utilities for audio processing, file manipulation, machine learning, system operations, web utilities, and more. It provides tools for tasks such as audio feature extraction, voice activity detection, file I/O, system monitoring, and integration with frameworks like PyTorch and TensorFlow. The package is organized into multiple submodules, each designed for specific functionalities, as detailed below.
3-F ``format floating point``
Installation
------------
4-D ``format int number``
Install ``iman`` via pip:
5-Write_List(MyList,Filename)
.. code-block:: bash
6-Write_Dic(MyDic,Filename)
pip install iman
7-Read(Filename) ``read txt file``
Ensure dependencies like ``numpy``, ``torch``, ``tensorflow``, ``speechbrain``, ``librosa``, ``matplotlib``, ``pandas``, and external tools like ``ffmpeg``, ``ffprobe``, and ``WinRAR`` are installed. Some functions require pre-trained models or specific paths (e.g., model files, ``ffmpeg_path``).
8-Read_Lines(Filename) ``read txt file line by line and return list``
Usage
-----
9-Write(_str,Filename)
Below are examples of key functionalities from the ``iman`` package. For detailed function signatures and parameters, refer to the sections below or use the built-in help system:
10-gf(pattern) ``Get files in a directory``
.. code-block:: python
11-gfa(directory_pattern , ext="*.*") ``Get Files in a Directory and SubDirectories``
from iman import examples
examples.help("Audio") # Get help on a specific module
12-ReadE(Filename) ``Read Excel files``
**Example: Audio Processing**
13-PM(dir) ``creat directory``
.. code-block:: python
14-PB(fname) ``get basename``
from iman import Audio
15-PN(fname) ``get file name``
# Read a WAV file
data, sr = Audio.Read("audio.wav", sr=16000, start_from=0, dur=None, mono=True, ffmpeg_path="c:\\ffmpeg.exe", ffprobe_path="c:\\ffprobe.exe")
16-PE(fname) ``get ext``
# Resample and write audio
resampled = Audio.Resample(data, fs=sr, sr=8000)
Audio.Write("output.wav", resampled, fs=8000)
17-PD(fname) ``get directory``
**Example: File Operations**
18-PS(fname) ``get size``
.. code-block:: python
19-PJ(segments) ``Join Path``
from iman import *
20-clear() ``clear cmd``
# Get files matching a pattern
files = gf("*.txt")
21-os
# Write a dictionary to a file
my_dict = {"key1": "value1", "key2": "value2"}
Write_Dic(my_dict, "output.txt")
22-np
**Example: VAD with Segmenter**
23-RI(start_int , end_int , count=1) ``random int``
.. code-block:: python
24-RF(start_float , end_float , count=1) ``random float``
from iman.sad_torch_mfcc import Segmenter
25-RS(Arr) ``shuffle``
seg = Segmenter(batch_size=32, vad_type="vad", sr=8000, model_path="c:\\sad_model_pytorch.pth", tq=1, ffmpeg_path="c:\\ffmpeg.exe", complete_output=False, device="cuda", input_type="file")
isig, wav, mfcc = seg("audio.wav")
26-LJ(job_file_name)
Modules and Functions
---------------------
27-SJ(value , job_file_name)
The ``iman`` package is organized into several submodules, each with specific functions. Below is a complete list of modules and their functions as provided.
28-LN(np_file_name)
iman
~~~~
29-SN(arr , np_file_name)
- ``plt``: Matplotlib plotting library.
- ``now()``: Get current time.
- ``F``: Format floating-point number.
- ``D``: Format integer number.
- ``Write_List(MyList, Filename)``: Write a list to a text file.
- ``Write_Dic(MyDic, Filename)``: Write a dictionary to a text file.
- ``Read(Filename)``: Read a text file.
- ``Read_Lines(Filename)``: Read a text file line by line and return a list.
- ``Write(_str, Filename)``: Write a string to a text file.
- ``gf(pattern)``: Get files in a directory matching a pattern.
- ``gfa(directory_pattern, ext="*.*")``: Get files in a directory and subdirectories.
- ``ReadE(Filename)``: Read Excel files.
- ``PM(dir)``: Create a directory.
- ``PB(fname)``: Get basename of a file.
- ``PN(fname)``: Get filename without path.
- ``PE(fname)``: Get file extension.
- ``PD(fname)``: Get directory of a file.
- ``PS(fname)``: Get file size.
- ``PJ(segments)``: Join path segments.
- ``clear()``: Clear command-line interface.
- ``os``: Python os module.
- ``np``: NumPy module.
- ``RI(start_int, end_int, count=1)``: Generate random integers.
- ``RF(start_float, end_float, count=1)``: Generate random floats.
- ``RS(Arr)``: Shuffle an array.
- ``LJ(job_file_name)``: Load job file (details not specified).
- ``SJ(value, job_file_name)``: Save job file (details not specified).
- ``LN(np_file_name)``: Load NumPy file (details not specified).
- ``SN(arr, np_file_name)``: Save NumPy array to file.
- ``cmd(command, redirect=True)``: Run a command in CMD.
- ``PX(fname)``: Check existence of a file.
- ``RC(Arr, size=1)``: Random choice from an array.
- ``onehot(data, nb_classes)``: Convert data to one-hot encoding.
- ``exe(pyfile)``: Convert Python file to executable (requires PyInstaller).
- ``FWL(wavfolder, sr)``: Get total audio length in a folder.
- ``norm(vector)``: Normalize a vector (vector/magnitude(vector)).
- ``delete(pattern)``: Delete files matching a pattern.
- ``rename(fname, fout)``: Rename a file.
- ``separate(pattern, folout)``: Separate vocal from music.
- ``dll(fname)``: Create a .pyd file from a Python file.
- ``get_hard_serial()``: Get hardware serial number.
- ``mute_mic()``: Toggle microphone on/off.
- ``PA(fname)``: Get absolute path of a file.
30-cmd(command , redirect=True) ``Run command in CMD``
iman.Audio
~~~~~~~~~~
31-PX(fname) ``check existance of file``
- ``Read(filename, sr, start_from, dur, mono, ffmpeg_path, ffprobe_path)``: Read WAV, ALAW, MP3, and other audio formats.
- ``Resample(data, fs, sr)``: Resample audio data.
- ``Write(filename, data, fs)``: Write audio data to a file.
- ``frame(y)``: Frame audio data (details not specified).
- ``split(y)``: Split audio data (details not specified).
- ``ReadT(filename, sr, mono=True)``: Read and resample WAV file with torchaudio.
- ``VAD(y, top_db=40, frame_length=200, hop_length=80)``: Voice activity detection.
- ``compress(fname_pattern, sr=16000, ext='mp3', mono=True, ffmpeg_path='c:\\ffmpeg.exe', ofolder=None, worker=4)``: Compress audio files.
- ``clip_value(wav)``: Return clipping percentage in an audio file.
- ``WriteS(filename, data, fs)``: Convert and write audio to stereo.
32-RC(Arr , size=1) ``Random Choice``
iman.info
~~~~~~~~~
33-onehot(data, nb_classes)
- ``get()``: Get information about CPU and GPU (requires torch).
- ``cpu()``: Get CPU percentage usage.
- ``gpu()``: Get GPU memory usage.
- ``memory()``: Get RAM usage in GB.
- ``plot(fname="log.txt", delay=1)``: Plot system metrics from a log file.
34-exe(pyfile) ``need pyinstaller``
iman.metrics
~~~~~~~~~~~~
35-FWL(wavfolder , sr) ``Get Folder Audio Length``
- ``EER(lab, score)``: Compute Equal Error Rate.
- ``cosine_distance(v1, v2)``: Compute cosine distance between two vectors.
- ``roc(lab, score)``: Compute ROC curve.
- ``wer(ref, hyp)``: Compute Word Error Rate.
- ``cer(ref, hyp)``: Compute Character Error Rate.
- ``wer_list(ref_list, hyp_list)``: Compute WER for lists.
- ``cer_list(ref_list, hyp_list)``: Compute CER for lists.
- ``DER(ref_list, res_list, file_dur=-1, sr=8000)``: Compute Detection Error Rate.
36-norm(vector) ``vector/magnitude(vector)``
iman.tsne
~~~~~~~~~
37-delete(pattern)
- ``plot(fea, label)``: Plot t-SNE visualization of features.
38-rename(fname , fout)
iman.xvector
~~~~~~~~~~~~
39-separate(pattern,folout) ``separate vocal from music``
- ``xvec, lda_xvec, gender = get(filename, model(model_path, model_name, model_speaker_num))``: Extract x-vectors for speaker recognition.
40-dll(fname) ``create a pyd file from py file``
iman.web
~~~~~~~~
41-get_hard_serial()
- ``change_wallpaper()``: Change system wallpaper.
- ``dl(url)``: Download a file from a URL.
- ``links(url, filter_text=None)``: Extract links from a URL.
- ``imgs(url, filter_text=None)``: Extract images from a URL.
42-mute_mic() ``on and off microphone``
iman.matlab
~~~~~~~~~~~
43-PA(fname) ``get abs path``
- ``np2mat(param, mat_file_name)``: Convert NumPy array to MATLAB file.
- ``dic2mat(param, mat_file_name)``: Convert dictionary to MATLAB file.
- ``mat2dic(mat_file_name)``: Convert MATLAB file to dictionary.
from iman import Audio
======================
1-Read(filename,sr,start_from,dur,mono,ffmpeg_path,ffprobe_path) ``Read wav alaw and mp3 and others``
iman.Features
~~~~~~~~~~~~~
2-Resample(data , fs, sr)
- ``mfcc_fea, mspec, log_energy = mfcc.SB.Get(wav, sample_rate)``: Compute MFCC with SpeechBrain (input must be read with torchaudio).
- ``mfcc.SB.Normal(MFCC)``: Mean-variance normalization of MFCC with SpeechBrain.
- ``mfcc_fea, log_energy = mfcc.LS.Get(wav, sample_rate, le=False)``: Compute MFCC with Librosa (input is NumPy array).
- ``mfcc.LS.Normal(MFCC, win_len=150)``: Mean-variance normalization (local, 150 frames left and right).
3-Write(filename, data ,fs)
iman.AUG
~~~~~~~~
4-frame(y)
- ``Add_Noise(data, noise, snr)``: Add noise to audio data.
- ``Add_Reverb(data, rir)``: Add reverberation to audio data.
- ``Add_NoiseT(data, noise, snr)``: Add noise using torchaudio.
- ``Add_ReverbT(data, rir)``: Add reverberation using torchaudio.
- ``mp3(fname, fout, sr_out, ratio, ffmpeg_path='c:\\ffmpeg.exe')``: Convert to MP3.
- ``speed(fname, fout, ratio, ffmpeg_path='c:\\ffmpeg.exe')``: Change audio speed.
- ``volume(fname, fout, ratio, ffmpeg_path='c:\\ffmpeg.exe')``: Adjust audio volume.
5-split(y)
iman.sad_torch_mfcc | iman.sad_tf
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6-ReadT(filename, sr , mono=True) ``Read and resample wav file with torchaudio``
- **Initializer** (PyTorch):
7-VAD(y,top_db=40, frame_length=200, hop_length=80)
.. code-block:: python
8-compress(fname_pattern , sr=16000 , ext='mp3' , mono=True ,ffmpeg_path='c:\\ffmpeg.exe' , ofolder=None, worker=4)
seg = Segmenter(batch_size, vad_type=['sad'|'vad'], sr=8000, model_path="c:\\sad_model_pytorch.pth", tq=1, ffmpeg_path='c:\\ffmpeg.exe', complete_output=False, device='cuda', input_type='file')
9-clip_value(wav) ``return clipping percentage in audio file``
- **Initializer** (TensorFlow):
10-WriteS(filename, data ,fs) ``Convert to Sterio``
.. code-block:: python
from iman import info
=====================
seg = Segmenter(batch_size, vad_type=['sad'|'vad'], sr=16000, model_path="c:\\keras_speech_music_noise_cnn.hdf5", gender_path="c:\\keras_male_female_cnn.hdf5", ffmpeg_path='c:\\ffmpeg.exe', detect_gender=False, complete_output=False, device='cuda', input_type='file')
1-get() info about cpu and gpu ``need torch``
- ``isig, wav, mfcc = seg(fname)``: Process audio file (MFCC output only in PyTorch model).
- ``nmfcc = filter_fea(isig, mfcc, sr, max_time)``: Filter features (PyTorch only).
- ``mfcc = MVN(mfcc)``: Mean-variance normalization (PyTorch only).
- ``isig = filter_output(isig, max_silence, ignore_small_speech_segments, max_speech_len, split_speech_bigger_than)``: Filter output when ``complete_output=False``.
- ``seg2aud(isig, filename)``: Convert segments to audio.
- ``seg2json(isig)``: Convert segments to JSON.
- ``seg2Gender_Info(isig)``: Extract gender information from segments.
- ``seg2Info(isig)``: Extract segment information.
- ``wav_speech, wav_noise = filter_sig(isig, wav, sr)``: Get speech and noise parts (when ``complete_output=False``).
2-cpu() ``get cpu percentage usage``
- **sad_tf.segmentero**:
3-gpu() ``get gpu memory usage``
.. code-block:: python
4-memory() ``get ram usage GB``
from sad_tf.segmentero import Segmenter # Use ONNX models (requires onnxruntime)
5-plot(fname="log.txt" , delay=1)
iman.sad_torch_mfcc_speaker
~~~~~~~~~~~~~~~~~~~~~~~~~~~
- **Initializer**:
from iman import metrics
========================
1-EER(lab,score)
.. code-block:: python
2-cosine_distance(v1,v2)
seg = Segmenter(batch_size, vad_type=['sad'|'vad'], sr=8000, model_path="c:\\sad_model_pytorch.pth", max_time=120, tq=1, ffmpeg_path='c:\\ffmpeg.exe', device='cuda', pad=False)
3-roc(lab,score)
- ``mfcc, len(sec) = seg(fname)``: Process audio file, MFCC padded to ``max_time`` if ``pad=True``.
4-wer(ref, hyp)
iman.sad_tf_mlp_speaker
~~~~~~~~~~~~~~~~~~~~~~~
5-cer(ref, hyp)
- **Initializer**:
6-wer_list(ref_list , hyp_list)
.. code-block:: python
7-cer_list(ref_list , hyp_list)
seg = Segmenter(batch_size, vad_type=['sad'|'vad'], sr=8000, model_path="sad_tf_mlp.h5", max_time=120, tq=1, ffmpeg_path='c:\\ffmpeg.exe', device='cuda', pad=False)
8-DER(ref_list , res_list , file_dur=-1 , sr=8000) ``Detection Error Rate``
- ``mfcc, len(sec) = seg(fname)``: Process audio file, MFCC padded to ``max_time`` if ``pad=True``.
from iman import tsne
=====================
iman.Report
~~~~~~~~~~~
1-plot(fea , label)
- **Initializer**:
from iman import xvector
========================
1-xvec,lda_xvec,gender = get(filename , model(model_path , model_name , model_speaker_num))
.. code-block:: python
r = Report.rep(log_dir=None)
from iman import web
====================
1-change_wallpaper()
- ``WS(_type, _name, value, itr)``: Add scalar to TensorBoard.
- ``WT(_type, _name, _str, itr)``: Add text to TensorBoard.
- ``WG(pytorch_model, example_input)``: Add graph to TensorBoard.
- ``WI(_type, _name, images, itr)``: Add image to TensorBoard.
2-dl(url)
iman.par
~~~~~~~~
3-links(url , filter_text=None)
- **Parallel Processing**:
4-imgs(url , filter_text=None)
.. code-block:: python
from iman import matlab
=======================
1-np2mat(param , mat_file_name)
if __name__ == '__main__':
res = par.par(files, func, worker=4, args=[]) # func defined as: def func(fname, _args): ...
2-dic2mat(param , mat_file_name)
iman.Image
~~~~~~~~~~
3-mat2dic (mat_file_name)
- ``Image.convert(fname_pattern, ext='jpg', ofolder=None, w=-1, h=-1, level=100, worker=4, ffmpeg_path='c:\\ffmpeg.exe')``: Convert images to specified format.
- ``Image.resize(fname_pattern, ext='jpg', ofolder=None, w=2, h=2, worker=4, ffmpeg_path='c:\\ffmpeg.exe')``: Resize images to 1/w and 1/h.
from iman import Features
=========================
1- mfcc_fea,mspec,log_energy = mfcc.SB.Get(wav,sample_rate) ``Compute MFCC with speechbrain - input must read with torchaudio``
iman.Boors
~~~~~~~~~~
2-mfcc.SB.Normal(MFCC) ``Mean Var Normalization Utt with speechbrain``
- ``Boors.get(sahm)``: Get stock information.
3- mfcc_fea,log_energy = mfcc.LS.Get(wav,sample_rate,le=False) ``Compute MFCC with Librosa - input is numpy array``
iman.Text
~~~~~~~~~
4-mfcc.LS.Normal(MFCC , win_len=150) ``Mean Var Normalization Local 150 left and 150 right``
- **Initializer**:
from iman import AUG
====================
1-Add_Noise(data , noise , snr)
.. code-block:: python
2-Add_Reverb( data , rir)
norm = Text.normal("c:\\Replace_List.txt")
3-Add_NoiseT(data , noise , snr) ``(torchaudio)``
- ``norm.rep(str)``: Replace text based on normalization rules.
- ``norm.from_file(filename, file_out=None)``: Normalize text from a file.
4-Add_ReverbT( data , rir) ``(torchaudio)``
iman.num2fa
~~~~~~~~~~~
5-mp3(fname , fout,sr_out,ratio,ffmpeg_path='c:\\ffmpeg.exe')
- ``words(number)``: Convert number to Persian words.
6-speed(fname,fout,ratio,ffmpeg_path='c:\\ffmpeg.exe')
iman.examples
~~~~~~~~~~~~~
7-volume(fname ,fout,ratio,ffmpeg_path='c:\\ffmpeg.exe')
- ``examples.items``: Get items in the examples folder.
- ``examples.help(topic)``: Get help on a specific topic.
from iman.[sad_torch_mfcc | sad_tf] import *
===============================================================================
seg = Segmenter(batch_size, vad_type=['sad'|'vad'] , sr=8000 , model_path="c:\\sad_model_pytorch.pth" , tq=1,ffmpeg_path='c:\\ffmpeg.exe',complete_output=False , device='cuda',input_type='file') ``TORCH``
iman.Rar
~~~~~~~~
seg = Segmenter(batch_size, vad_type=['sad'|'vad'] , sr=16000 , model_path="c:\\keras_speech_music_noise_cnn.hdf5",gender_path="c:\\keras_male_female_cnn.hdf5",ffmpeg_path='c:\\ffmpeg.exe',detect_gender=False,complete_output=False,device='cuda',input_type='file') ``TensorFlow``
- ``rar(fname, out="", rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")``: Create RAR archive.
- ``zip(fname, out="", rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")``: Create ZIP archive.
- ``unrar(fname, out="", rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")``: Extract RAR archive.
- ``unzip(fname, out="", rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")``: Extract ZIP archive.
isig,wav,mfcc = seg(fname) ``mfcc output Just in torch model``
iman.Enhance
~~~~~~~~~~~~
nmfcc = filter_fea(isig , mfcc , sr , max_time) ``Just in torch model``
- ``Enhance.Dereverb(pattern, out_fol, sr=16000, batchsize=16, device="cuda", model_path=r"C:\\UVR-DeEcho-DeReverb.pth")``: Dereverberate audio files.
- ``Enhance.Denoise(pattern, out_fol, sr=16000, batchsize=16, device="cuda", model_path=r"C:\\UVR-DeNoise-Lite.pth")``: Denoise audio files.
mfcc = MVN(mfcc) ``Just in torch model``
iman.tf
~~~~~~~
isig = filter_output(isig , max_silence ,ignore_small_speech_segments , max_speech_len ,split_speech_bigger_than) ``Do when complete_output=False``
- ``flops(model)``: Get FLOPs of a TensorFlow model.
- ``param(model)``: Get parameter count of a TensorFlow model.
- ``paramp(model)``: Get parameter count and print model layers.
- ``gpu()``: Return True if GPU is available.
- ``gpun()``: Return number of GPUs.
- ``limit()``: Limit GPU memory allocation for TensorFlow models.
seg2aud(isig , filename)
seg2json(isig)
iman.torch
~~~~~~~~~~
seg2Gender_Info(isig)
- ``param(model)``: Get parameter and trainable count of a PyTorch model.
- ``paramp(model)``: Get parameter count and print model layers.
- ``layers(model)``: Get layers of a PyTorch model.
- ``gpu()``: Return True if GPU is available.
- ``gpun()``: Return number of GPUs.
seg2Info(isig)
iman.yt
~~~~~~~
wav_speech , wav_noise = filter_sig(isig , wav , sr) ``Get Speech and Noise Parts of file - Do when complete_output=False``
- ``dl(url)``: Download a YouTube video.
- ``list_formats(url)``: List available formats for a YouTube link.
from sad_tf.segmentero import Segmenter ``to use onnx models - need to install onnxruntime``
iman.svad
~~~~~~~~~
from iman.sad_torch_mfcc_speaker import *
================================================
seg = Segmenter(batch_size, vad_type=['sad'|'vad'] , sr=8000 , model_path="c:\\sad_model_pytorch.pth" , max_time=120(sec) , tq=1,ffmpeg_path='c:\\ffmpeg.exe', device='cuda' , pad=False) ``TORCH - max_time in second to split fea output``
mfcc, len(sec) = seg(fname) ``mfcc pad to max_time length if pad=True``
- ``segments, wav = svad(filename, sampling_rate=16000, min_speech_duration_ms=250, max_speech_duration_s=float('inf'), min_silence_duration_ms=100)``: Run fast speech activity detection and return speech segments.
from iman.sad_tf_mlp_speaker import *
================================================
seg = Segmenter(batch_size, vad_type=['sad'|'vad'] , sr=8000 , model_path="sad_tf_mlp.h5" , max_time=120(sec) , tq=1,ffmpeg_path='c:\\ffmpeg.exe', device='cuda' , pad=False) ``Tensorflow (small mlp model) - max_time in second to split fea output``
mfcc, len(sec) = seg(fname) ``mfcc pad to max_time length if pad=True``
Dependencies
------------
from iman import Report ``Tensorboard Writer``
==================================================
r=Report.rep(log_dir=None)
The ``iman`` package requires the following:
r.WS(_type , _name , value , itr) ``Add_scalar``
- **Python Packages**: ``numpy``, ``torch``, ``tensorflow``, ``speechbrain``, ``librosa``, ``matplotlib``, ``pandas``, ``onnxruntime`` (for ONNX models).
- **External Tools**: ``ffmpeg``, ``ffprobe``, ``WinRAR`` (for RAR/ZIP operations).
- **Optional**: Pre-trained models (e.g., for VAD, x-vector, dereverberation) specified in function arguments.
r.WT(_type , _name , _str , itr) ``Add_text``
Check the package's ``requirements.txt`` for specific versions.
r.WG(pytorch_model , example_input) ``Add_graph``
Documentation
-------------
r.WI(_type , _name , images , itr) ``Add_image``
For detailed usage, refer to the source code or use the built-in help system:
from iman import par
========================
if (__name__ == '__main__'):
res = par.par(files , func , worker=4 , args=[]) ``def func(fname , _args): ...``
.. code-block:: python
from iman import Image
=========================
Image.convert(fname_pattern ,ext ='jpg',ofolder=None , w=-1 , h=-1,level=100, worker=4,ffmpeg_path='c:\\ffmpeg.exe')
from iman import examples
examples.help("Audio") # Get help on the Audio module
Image.resize(fname_pattern ,ext ='jpg',ofolder=None , w=2 , h=2, worker=4,ffmpeg_path='c:\\ffmpeg.exe') ``resize to 1/h and 1/w``
Contributing
------------
from iman import Boors
==========================
Boors.get(sahm) ``get sahm info``
Contributions are welcome! Submit bug reports, feature requests, or pull requests via the project's GitHub repository (if available). Follow contribution guidelines and include tests for new features.
from iman import Text
=====================
norm = Text.normal("c:\\Replace_List.txt")
License
-------
norm.rep(str)
``iman`` is licensed under the MIT License (assumed). See the LICENSE file for details.
norm.from_file(filename ,file_out=None)
Contact
-------
from iman.num2fa import words
=============================
words(number)
For support, contact the maintainers via the project's GitHub page or email (if provided).
from iman import examples
==========================
examples.items ``get items in examples folder``
.. note::
examples.help(topic)
from iman import Rar
====================
1-rar(fname , out="" , rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")
2-zip(fname , out="" , rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")
3-unrar(fname , out="" , rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")
4-unzip(fname , out="" , rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")
from iman import Enhance
=========================
1-Enhance.Dereverb(pattern , out_fol , sr = 16000, batchsize=16 , device="cuda" ,model_path=r"C:\\UVR-DeEcho-DeReverb.pth")
2-Enhance.Denoise(pattern , out_fol , sr = 16000, batchsize=16 , device="cuda" ,model_path=r"C:\UVR-DeNoise-Lite.pth")
from iman.tf import *
=====================
1-flops(model) ``get flops of tf model``
2-param(model) ``return parameter number of tf model``
3-paramp(model) ``return parameter number of tf model and print model layers``
4-gpu() ``return True if available``
5-gpun() ``return number of gpus``
6-limit() ``Tf model only allocate as much GPU memory based on runtime allocations``
from iman.torch import *
========================
1-param(model) ``return parameter number and trainable number of torch model``
2-paramp(model) ``return parameter number of torch model and print model layers``
3-layers(model) ``return layers of torch model``
4-gpu() ``return True if available``
5-gpun() ``return number of gpus``
from iman.yt import *
========================
1-dl(url) ``Download youtube link``
2-list_formats(url) ``return all available formats for yt link``
Some functions require external tools (e.g., ``ffmpeg``, ``WinRAR``) or pre-trained models. Ensure these are configured correctly. Paths like ``c:\\ffmpeg.exe`` are Windows-specific; adjust for other operating systems.
+271
-209

@@ -1,327 +0,389 @@

from iman import *
==================
iman
====
1-plt
Overview
--------
2-now() ``get time``
``iman`` is a comprehensive Python package offering a wide array of utilities for audio processing, file manipulation, machine learning, system operations, web utilities, and more. It provides tools for tasks such as audio feature extraction, voice activity detection, file I/O, system monitoring, and integration with frameworks like PyTorch and TensorFlow. The package is organized into multiple submodules, each designed for specific functionalities, as detailed below.
3-F ``format floating point``
Installation
------------
4-D ``format int number``
Install ``iman`` via pip:
5-Write_List(MyList,Filename)
.. code-block:: bash
6-Write_Dic(MyDic,Filename)
pip install iman
7-Read(Filename) ``read txt file``
Ensure dependencies like ``numpy``, ``torch``, ``tensorflow``, ``speechbrain``, ``librosa``, ``matplotlib``, ``pandas``, and external tools like ``ffmpeg``, ``ffprobe``, and ``WinRAR`` are installed. Some functions require pre-trained models or specific paths (e.g., model files, ``ffmpeg_path``).
8-Read_Lines(Filename) ``read txt file line by line and return list``
Usage
-----
9-Write(_str,Filename)
Below are examples of key functionalities from the ``iman`` package. For detailed function signatures and parameters, refer to the sections below or use the built-in help system:
10-gf(pattern) ``Get files in a directory``
.. code-block:: python
11-gfa(directory_pattern , ext="*.*") ``Get Files in a Directory and SubDirectories``
from iman import examples
examples.help("Audio") # Get help on a specific module
12-ReadE(Filename) ``Read Excel files``
**Example: Audio Processing**
13-PM(dir) ``creat directory``
.. code-block:: python
14-PB(fname) ``get basename``
from iman import Audio
15-PN(fname) ``get file name``
# Read a WAV file
data, sr = Audio.Read("audio.wav", sr=16000, start_from=0, dur=None, mono=True, ffmpeg_path="c:\\ffmpeg.exe", ffprobe_path="c:\\ffprobe.exe")
16-PE(fname) ``get ext``
# Resample and write audio
resampled = Audio.Resample(data, fs=sr, sr=8000)
Audio.Write("output.wav", resampled, fs=8000)
17-PD(fname) ``get directory``
**Example: File Operations**
18-PS(fname) ``get size``
.. code-block:: python
19-PJ(segments) ``Join Path``
from iman import *
20-clear() ``clear cmd``
# Get files matching a pattern
files = gf("*.txt")
21-os
# Write a dictionary to a file
my_dict = {"key1": "value1", "key2": "value2"}
Write_Dic(my_dict, "output.txt")
22-np
**Example: VAD with Segmenter**
23-RI(start_int , end_int , count=1) ``random int``
.. code-block:: python
24-RF(start_float , end_float , count=1) ``random float``
from iman.sad_torch_mfcc import Segmenter
25-RS(Arr) ``shuffle``
seg = Segmenter(batch_size=32, vad_type="vad", sr=8000, model_path="c:\\sad_model_pytorch.pth", tq=1, ffmpeg_path="c:\\ffmpeg.exe", complete_output=False, device="cuda", input_type="file")
isig, wav, mfcc = seg("audio.wav")
26-LJ(job_file_name)
Modules and Functions
---------------------
27-SJ(value , job_file_name)
The ``iman`` package is organized into several submodules, each with specific functions. Below is a complete list of modules and their functions as provided.
28-LN(np_file_name)
iman
~~~~
29-SN(arr , np_file_name)
- ``plt``: Matplotlib plotting library.
- ``now()``: Get current time.
- ``F``: Format floating-point number.
- ``D``: Format integer number.
- ``Write_List(MyList, Filename)``: Write a list to a text file.
- ``Write_Dic(MyDic, Filename)``: Write a dictionary to a text file.
- ``Read(Filename)``: Read a text file.
- ``Read_Lines(Filename)``: Read a text file line by line and return a list.
- ``Write(_str, Filename)``: Write a string to a text file.
- ``gf(pattern)``: Get files in a directory matching a pattern.
- ``gfa(directory_pattern, ext="*.*")``: Get files in a directory and subdirectories.
- ``ReadE(Filename)``: Read Excel files.
- ``PM(dir)``: Create a directory.
- ``PB(fname)``: Get basename of a file.
- ``PN(fname)``: Get filename without path.
- ``PE(fname)``: Get file extension.
- ``PD(fname)``: Get directory of a file.
- ``PS(fname)``: Get file size.
- ``PJ(segments)``: Join path segments.
- ``clear()``: Clear command-line interface.
- ``os``: Python os module.
- ``np``: NumPy module.
- ``RI(start_int, end_int, count=1)``: Generate random integers.
- ``RF(start_float, end_float, count=1)``: Generate random floats.
- ``RS(Arr)``: Shuffle an array.
- ``LJ(job_file_name)``: Load job file (details not specified).
- ``SJ(value, job_file_name)``: Save job file (details not specified).
- ``LN(np_file_name)``: Load NumPy file (details not specified).
- ``SN(arr, np_file_name)``: Save NumPy array to file.
- ``cmd(command, redirect=True)``: Run a command in CMD.
- ``PX(fname)``: Check existence of a file.
- ``RC(Arr, size=1)``: Random choice from an array.
- ``onehot(data, nb_classes)``: Convert data to one-hot encoding.
- ``exe(pyfile)``: Convert Python file to executable (requires PyInstaller).
- ``FWL(wavfolder, sr)``: Get total audio length in a folder.
- ``norm(vector)``: Normalize a vector (vector/magnitude(vector)).
- ``delete(pattern)``: Delete files matching a pattern.
- ``rename(fname, fout)``: Rename a file.
- ``separate(pattern, folout)``: Separate vocal from music.
- ``dll(fname)``: Create a .pyd file from a Python file.
- ``get_hard_serial()``: Get hardware serial number.
- ``mute_mic()``: Toggle microphone on/off.
- ``PA(fname)``: Get absolute path of a file.
30-cmd(command , redirect=True) ``Run command in CMD``
iman.Audio
~~~~~~~~~~
31-PX(fname) ``check existance of file``
- ``Read(filename, sr, start_from, dur, mono, ffmpeg_path, ffprobe_path)``: Read WAV, ALAW, MP3, and other audio formats.
- ``Resample(data, fs, sr)``: Resample audio data.
- ``Write(filename, data, fs)``: Write audio data to a file.
- ``frame(y)``: Frame audio data (details not specified).
- ``split(y)``: Split audio data (details not specified).
- ``ReadT(filename, sr, mono=True)``: Read and resample WAV file with torchaudio.
- ``VAD(y, top_db=40, frame_length=200, hop_length=80)``: Voice activity detection.
- ``compress(fname_pattern, sr=16000, ext='mp3', mono=True, ffmpeg_path='c:\\ffmpeg.exe', ofolder=None, worker=4)``: Compress audio files.
- ``clip_value(wav)``: Return clipping percentage in an audio file.
- ``WriteS(filename, data, fs)``: Convert and write audio to stereo.
32-RC(Arr , size=1) ``Random Choice``
iman.info
~~~~~~~~~
33-onehot(data, nb_classes)
- ``get()``: Get information about CPU and GPU (requires torch).
- ``cpu()``: Get CPU percentage usage.
- ``gpu()``: Get GPU memory usage.
- ``memory()``: Get RAM usage in GB.
- ``plot(fname="log.txt", delay=1)``: Plot system metrics from a log file.
34-exe(pyfile) ``need pyinstaller``
iman.metrics
~~~~~~~~~~~~
35-FWL(wavfolder , sr) ``Get Folder Audio Length``
- ``EER(lab, score)``: Compute Equal Error Rate.
- ``cosine_distance(v1, v2)``: Compute cosine distance between two vectors.
- ``roc(lab, score)``: Compute ROC curve.
- ``wer(ref, hyp)``: Compute Word Error Rate.
- ``cer(ref, hyp)``: Compute Character Error Rate.
- ``wer_list(ref_list, hyp_list)``: Compute WER for lists.
- ``cer_list(ref_list, hyp_list)``: Compute CER for lists.
- ``DER(ref_list, res_list, file_dur=-1, sr=8000)``: Compute Detection Error Rate.
36-norm(vector) ``vector/magnitude(vector)``
iman.tsne
~~~~~~~~~
37-delete(pattern)
- ``plot(fea, label)``: Plot t-SNE visualization of features.
38-rename(fname , fout)
iman.xvector
~~~~~~~~~~~~
39-separate(pattern,folout) ``separate vocal from music``
- ``xvec, lda_xvec, gender = get(filename, model(model_path, model_name, model_speaker_num))``: Extract x-vectors for speaker recognition.
40-dll(fname) ``create a pyd file from py file``
iman.web
~~~~~~~~
41-get_hard_serial()
- ``change_wallpaper()``: Change system wallpaper.
- ``dl(url)``: Download a file from a URL.
- ``links(url, filter_text=None)``: Extract links from a URL.
- ``imgs(url, filter_text=None)``: Extract images from a URL.
42-mute_mic() ``on and off microphone``
iman.matlab
~~~~~~~~~~~
43-PA(fname) ``get abs path``
- ``np2mat(param, mat_file_name)``: Convert NumPy array to MATLAB file.
- ``dic2mat(param, mat_file_name)``: Convert dictionary to MATLAB file.
- ``mat2dic(mat_file_name)``: Convert MATLAB file to dictionary.
from iman import Audio
======================
1-Read(filename,sr,start_from,dur,mono,ffmpeg_path,ffprobe_path) ``Read wav alaw and mp3 and others``
iman.Features
~~~~~~~~~~~~~
2-Resample(data , fs, sr)
- ``mfcc_fea, mspec, log_energy = mfcc.SB.Get(wav, sample_rate)``: Compute MFCC with SpeechBrain (input must be read with torchaudio).
- ``mfcc.SB.Normal(MFCC)``: Mean-variance normalization of MFCC with SpeechBrain.
- ``mfcc_fea, log_energy = mfcc.LS.Get(wav, sample_rate, le=False)``: Compute MFCC with Librosa (input is NumPy array).
- ``mfcc.LS.Normal(MFCC, win_len=150)``: Mean-variance normalization (local, 150 frames left and right).
3-Write(filename, data ,fs)
iman.AUG
~~~~~~~~
4-frame(y)
- ``Add_Noise(data, noise, snr)``: Add noise to audio data.
- ``Add_Reverb(data, rir)``: Add reverberation to audio data.
- ``Add_NoiseT(data, noise, snr)``: Add noise using torchaudio.
- ``Add_ReverbT(data, rir)``: Add reverberation using torchaudio.
- ``mp3(fname, fout, sr_out, ratio, ffmpeg_path='c:\\ffmpeg.exe')``: Convert to MP3.
- ``speed(fname, fout, ratio, ffmpeg_path='c:\\ffmpeg.exe')``: Change audio speed.
- ``volume(fname, fout, ratio, ffmpeg_path='c:\\ffmpeg.exe')``: Adjust audio volume.
5-split(y)
iman.sad_torch_mfcc | iman.sad_tf
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6-ReadT(filename, sr , mono=True) ``Read and resample wav file with torchaudio``
- **Initializer** (PyTorch):
7-VAD(y,top_db=40, frame_length=200, hop_length=80)
.. code-block:: python
8-compress(fname_pattern , sr=16000 , ext='mp3' , mono=True ,ffmpeg_path='c:\\ffmpeg.exe' , ofolder=None, worker=4)
seg = Segmenter(batch_size, vad_type=['sad'|'vad'], sr=8000, model_path="c:\\sad_model_pytorch.pth", tq=1, ffmpeg_path='c:\\ffmpeg.exe', complete_output=False, device='cuda', input_type='file')
9-clip_value(wav) ``return clipping percentage in audio file``
- **Initializer** (TensorFlow):
10-WriteS(filename, data ,fs) ``Convert to Sterio``
.. code-block:: python
from iman import info
=====================
seg = Segmenter(batch_size, vad_type=['sad'|'vad'], sr=16000, model_path="c:\\keras_speech_music_noise_cnn.hdf5", gender_path="c:\\keras_male_female_cnn.hdf5", ffmpeg_path='c:\\ffmpeg.exe', detect_gender=False, complete_output=False, device='cuda', input_type='file')
1-get() info about cpu and gpu ``need torch``
- ``isig, wav, mfcc = seg(fname)``: Process audio file (MFCC output only in PyTorch model).
- ``nmfcc = filter_fea(isig, mfcc, sr, max_time)``: Filter features (PyTorch only).
- ``mfcc = MVN(mfcc)``: Mean-variance normalization (PyTorch only).
- ``isig = filter_output(isig, max_silence, ignore_small_speech_segments, max_speech_len, split_speech_bigger_than)``: Filter output when ``complete_output=False``.
- ``seg2aud(isig, filename)``: Convert segments to audio.
- ``seg2json(isig)``: Convert segments to JSON.
- ``seg2Gender_Info(isig)``: Extract gender information from segments.
- ``seg2Info(isig)``: Extract segment information.
- ``wav_speech, wav_noise = filter_sig(isig, wav, sr)``: Get speech and noise parts (when ``complete_output=False``).
2-cpu() ``get cpu percentage usage``
- **sad_tf.segmentero**:
3-gpu() ``get gpu memory usage``
.. code-block:: python
4-memory() ``get ram usage GB``
from sad_tf.segmentero import Segmenter # Use ONNX models (requires onnxruntime)
5-plot(fname="log.txt" , delay=1)
iman.sad_torch_mfcc_speaker
~~~~~~~~~~~~~~~~~~~~~~~~~~~
- **Initializer**:
from iman import metrics
========================
1-EER(lab,score)
.. code-block:: python
2-cosine_distance(v1,v2)
seg = Segmenter(batch_size, vad_type=['sad'|'vad'], sr=8000, model_path="c:\\sad_model_pytorch.pth", max_time=120, tq=1, ffmpeg_path='c:\\ffmpeg.exe', device='cuda', pad=False)
3-roc(lab,score)
- ``mfcc, len(sec) = seg(fname)``: Process audio file, MFCC padded to ``max_time`` if ``pad=True``.
4-wer(ref, hyp)
iman.sad_tf_mlp_speaker
~~~~~~~~~~~~~~~~~~~~~~~
5-cer(ref, hyp)
- **Initializer**:
6-wer_list(ref_list , hyp_list)
.. code-block:: python
7-cer_list(ref_list , hyp_list)
seg = Segmenter(batch_size, vad_type=['sad'|'vad'], sr=8000, model_path="sad_tf_mlp.h5", max_time=120, tq=1, ffmpeg_path='c:\\ffmpeg.exe', device='cuda', pad=False)
8-DER(ref_list , res_list , file_dur=-1 , sr=8000) ``Detection Error Rate``
- ``mfcc, len(sec) = seg(fname)``: Process audio file, MFCC padded to ``max_time`` if ``pad=True``.
from iman import tsne
=====================
iman.Report
~~~~~~~~~~~
1-plot(fea , label)
- **Initializer**:
from iman import xvector
========================
1-xvec,lda_xvec,gender = get(filename , model(model_path , model_name , model_speaker_num))
.. code-block:: python
r = Report.rep(log_dir=None)
from iman import web
====================
1-change_wallpaper()
- ``WS(_type, _name, value, itr)``: Add scalar to TensorBoard.
- ``WT(_type, _name, _str, itr)``: Add text to TensorBoard.
- ``WG(pytorch_model, example_input)``: Add graph to TensorBoard.
- ``WI(_type, _name, images, itr)``: Add image to TensorBoard.
2-dl(url)
iman.par
~~~~~~~~
3-links(url , filter_text=None)
- **Parallel Processing**:
4-imgs(url , filter_text=None)
.. code-block:: python
from iman import matlab
=======================
1-np2mat(param , mat_file_name)
if __name__ == '__main__':
res = par.par(files, func, worker=4, args=[]) # func defined as: def func(fname, _args): ...
2-dic2mat(param , mat_file_name)
iman.Image
~~~~~~~~~~
3-mat2dic (mat_file_name)
- ``Image.convert(fname_pattern, ext='jpg', ofolder=None, w=-1, h=-1, level=100, worker=4, ffmpeg_path='c:\\ffmpeg.exe')``: Convert images to specified format.
- ``Image.resize(fname_pattern, ext='jpg', ofolder=None, w=2, h=2, worker=4, ffmpeg_path='c:\\ffmpeg.exe')``: Resize images to 1/w and 1/h.
from iman import Features
=========================
1- mfcc_fea,mspec,log_energy = mfcc.SB.Get(wav,sample_rate) ``Compute MFCC with speechbrain - input must read with torchaudio``
iman.Boors
~~~~~~~~~~
2-mfcc.SB.Normal(MFCC) ``Mean Var Normalization Utt with speechbrain``
- ``Boors.get(sahm)``: Get stock information.
3- mfcc_fea,log_energy = mfcc.LS.Get(wav,sample_rate,le=False) ``Compute MFCC with Librosa - input is numpy array``
iman.Text
~~~~~~~~~
4-mfcc.LS.Normal(MFCC , win_len=150) ``Mean Var Normalization Local 150 left and 150 right``
- **Initializer**:
from iman import AUG
====================
1-Add_Noise(data , noise , snr)
.. code-block:: python
2-Add_Reverb( data , rir)
norm = Text.normal("c:\\Replace_List.txt")
3-Add_NoiseT(data , noise , snr) ``(torchaudio)``
- ``norm.rep(str)``: Replace text based on normalization rules.
- ``norm.from_file(filename, file_out=None)``: Normalize text from a file.
4-Add_ReverbT( data , rir) ``(torchaudio)``
iman.num2fa
~~~~~~~~~~~
5-mp3(fname , fout,sr_out,ratio,ffmpeg_path='c:\\ffmpeg.exe')
- ``words(number)``: Convert number to Persian words.
6-speed(fname,fout,ratio,ffmpeg_path='c:\\ffmpeg.exe')
iman.examples
~~~~~~~~~~~~~
7-volume(fname ,fout,ratio,ffmpeg_path='c:\\ffmpeg.exe')
- ``examples.items``: Get items in the examples folder.
- ``examples.help(topic)``: Get help on a specific topic.
from iman.[sad_torch_mfcc | sad_tf] import *
===============================================================================
seg = Segmenter(batch_size, vad_type=['sad'|'vad'] , sr=8000 , model_path="c:\\sad_model_pytorch.pth" , tq=1,ffmpeg_path='c:\\ffmpeg.exe',complete_output=False , device='cuda',input_type='file') ``TORCH``
iman.Rar
~~~~~~~~
seg = Segmenter(batch_size, vad_type=['sad'|'vad'] , sr=16000 , model_path="c:\\keras_speech_music_noise_cnn.hdf5",gender_path="c:\\keras_male_female_cnn.hdf5",ffmpeg_path='c:\\ffmpeg.exe',detect_gender=False,complete_output=False,device='cuda',input_type='file') ``TensorFlow``
- ``rar(fname, out="", rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")``: Create RAR archive.
- ``zip(fname, out="", rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")``: Create ZIP archive.
- ``unrar(fname, out="", rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")``: Extract RAR archive.
- ``unzip(fname, out="", rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")``: Extract ZIP archive.
isig,wav,mfcc = seg(fname) ``mfcc output Just in torch model``
iman.Enhance
~~~~~~~~~~~~
nmfcc = filter_fea(isig , mfcc , sr , max_time) ``Just in torch model``
- ``Enhance.Dereverb(pattern, out_fol, sr=16000, batchsize=16, device="cuda", model_path=r"C:\\UVR-DeEcho-DeReverb.pth")``: Dereverberate audio files.
- ``Enhance.Denoise(pattern, out_fol, sr=16000, batchsize=16, device="cuda", model_path=r"C:\\UVR-DeNoise-Lite.pth")``: Denoise audio files.
mfcc = MVN(mfcc) ``Just in torch model``
iman.tf
~~~~~~~
isig = filter_output(isig , max_silence ,ignore_small_speech_segments , max_speech_len ,split_speech_bigger_than) ``Do when complete_output=False``
- ``flops(model)``: Get FLOPs of a TensorFlow model.
- ``param(model)``: Get parameter count of a TensorFlow model.
- ``paramp(model)``: Get parameter count and print model layers.
- ``gpu()``: Return True if GPU is available.
- ``gpun()``: Return number of GPUs.
- ``limit()``: Limit GPU memory allocation for TensorFlow models.
seg2aud(isig , filename)
seg2json(isig)
iman.torch
~~~~~~~~~~
seg2Gender_Info(isig)
- ``param(model)``: Get parameter and trainable count of a PyTorch model.
- ``paramp(model)``: Get parameter count and print model layers.
- ``layers(model)``: Get layers of a PyTorch model.
- ``gpu()``: Return True if GPU is available.
- ``gpun()``: Return number of GPUs.
seg2Info(isig)
iman.yt
~~~~~~~
wav_speech , wav_noise = filter_sig(isig , wav , sr) ``Get Speech and Noise Parts of file - Do when complete_output=False``
- ``dl(url)``: Download a YouTube video.
- ``list_formats(url)``: List available formats for a YouTube link.
from sad_tf.segmentero import Segmenter ``to use onnx models - need to install onnxruntime``
iman.svad
~~~~~~~~~
from iman.sad_torch_mfcc_speaker import *
================================================
seg = Segmenter(batch_size, vad_type=['sad'|'vad'] , sr=8000 , model_path="c:\\sad_model_pytorch.pth" , max_time=120(sec) , tq=1,ffmpeg_path='c:\\ffmpeg.exe', device='cuda' , pad=False) ``TORCH - max_time in second to split fea output``
mfcc, len(sec) = seg(fname) ``mfcc pad to max_time length if pad=True``
- ``segments, wav = svad(filename, sampling_rate=16000, min_speech_duration_ms=250, max_speech_duration_s=float('inf'), min_silence_duration_ms=100)``: Run fast speech activity detection and return speech segments.
from iman.sad_tf_mlp_speaker import *
================================================
seg = Segmenter(batch_size, vad_type=['sad'|'vad'] , sr=8000 , model_path="sad_tf_mlp.h5" , max_time=120(sec) , tq=1,ffmpeg_path='c:\\ffmpeg.exe', device='cuda' , pad=False) ``Tensorflow (small mlp model) - max_time in second to split fea output``
mfcc, len(sec) = seg(fname) ``mfcc pad to max_time length if pad=True``
Dependencies
------------
from iman import Report ``Tensorboard Writer``
==================================================
r=Report.rep(log_dir=None)
The ``iman`` package requires the following:
r.WS(_type , _name , value , itr) ``Add_scalar``
- **Python Packages**: ``numpy``, ``torch``, ``tensorflow``, ``speechbrain``, ``librosa``, ``matplotlib``, ``pandas``, ``onnxruntime`` (for ONNX models).
- **External Tools**: ``ffmpeg``, ``ffprobe``, ``WinRAR`` (for RAR/ZIP operations).
- **Optional**: Pre-trained models (e.g., for VAD, x-vector, dereverberation) specified in function arguments.
r.WT(_type , _name , _str , itr) ``Add_text``
Check the package's ``requirements.txt`` for specific versions.
r.WG(pytorch_model , example_input) ``Add_graph``
Documentation
-------------
r.WI(_type , _name , images , itr) ``Add_image``
For detailed usage, refer to the source code or use the built-in help system:
from iman import par
========================
if (__name__ == '__main__'):
res = par.par(files , func , worker=4 , args=[]) ``def func(fname , _args): ...``
.. code-block:: python
from iman import Image
=========================
Image.convert(fname_pattern ,ext ='jpg',ofolder=None , w=-1 , h=-1,level=100, worker=4,ffmpeg_path='c:\\ffmpeg.exe')
from iman import examples
examples.help("Audio") # Get help on the Audio module
Image.resize(fname_pattern ,ext ='jpg',ofolder=None , w=2 , h=2, worker=4,ffmpeg_path='c:\\ffmpeg.exe') ``resize to 1/h and 1/w``
Contributing
------------
from iman import Boors
==========================
Boors.get(sahm) ``get sahm info``
Contributions are welcome! Submit bug reports, feature requests, or pull requests via the project's GitHub repository (if available). Follow contribution guidelines and include tests for new features.
from iman import Text
=====================
norm = Text.normal("c:\\Replace_List.txt")
License
-------
norm.rep(str)
``iman`` is licensed under the MIT License (assumed). See the LICENSE file for details.
norm.from_file(filename ,file_out=None)
Contact
-------
from iman.num2fa import words
=============================
words(number)
For support, contact the maintainers via the project's GitHub page or email (if provided).
from iman import examples
==========================
examples.items ``get items in examples folder``
.. note::
examples.help(topic)
from iman import Rar
====================
1-rar(fname , out="" , rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")
2-zip(fname , out="" , rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")
3-unrar(fname , out="" , rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")
4-unzip(fname , out="" , rar_path=r"C:\\Program Files\\WinRAR\\winrar.exe")
from iman import Enhance
=========================
1-Enhance.Dereverb(pattern , out_fol , sr = 16000, batchsize=16 , device="cuda" ,model_path=r"C:\\UVR-DeEcho-DeReverb.pth")
2-Enhance.Denoise(pattern , out_fol , sr = 16000, batchsize=16 , device="cuda" ,model_path=r"C:\UVR-DeNoise-Lite.pth")
from iman.tf import *
=====================
1-flops(model) ``get flops of tf model``
2-param(model) ``return parameter number of tf model``
3-paramp(model) ``return parameter number of tf model and print model layers``
4-gpu() ``return True if available``
5-gpun() ``return number of gpus``
6-limit() ``Tf model only allocate as much GPU memory based on runtime allocations``
from iman.torch import *
========================
1-param(model) ``return parameter number and trainable number of torch model``
2-paramp(model) ``return parameter number of torch model and print model layers``
3-layers(model) ``return layers of torch model``
4-gpu() ``return True if available``
5-gpun() ``return number of gpus``
from iman.yt import *
========================
1-dl(url) ``Download youtube link``
2-list_formats(url) ``return all available formats for yt link``
Some functions require external tools (e.g., ``ffmpeg``, ``WinRAR``) or pre-trained models. Ensure these are configured correctly. Paths like ``c:\\ffmpeg.exe`` are Windows-specific; adjust for other operating systems.

@@ -11,3 +11,3 @@ import os

name="iman",
version='1.0.28',
version='2.0',
author="Iman Sarraf",

@@ -14,0 +14,0 @@ author_email="imansarraf@gmail.com",