Cereja 🍒
Cereja was written only with the Standard Python Library, and it was a great way to improve knowledge in the Language
also to avoid the rewriting of code.
Getting Started DEV
Don't be shy \0/ ... Clone the repository and submit a function or module you made or use some function you liked.
See CONTRIBUTING 💻
Setup
Install
pip install --user cereja
or for all users
pip install cereja
Cereja Example usage
See some of the Cereja tools
To access the Cereja's tools you need to import it import cereja as cj
.
Create new files
import cereja as cj
file_json = cj.FileIO.create('./json_new_file.json', data={'k': 'v', 'k2': 'v2'})
file_txt = cj.FileIO.create('./txt_new_file.txt', ['line1', 'line2', 'line3'])
file_json.save()
file_txt.save()
print(file_json.exists)
print(file_txt.exists)
print(cj.can_do(file_txt))
print(cj.can_do(file_json))
Load and edit files
import cereja as cj
file_json = cj.FileIO.load('./json_new_file.json')
print(file_json.data)
file_json.add(key='new_key', value='value')
print(file_json.data)
file_txt = cj.FileIO.load('./txt_new_file.txt')
print(file_txt.data)
file_txt.add('line4')
print(file_txt.data)
file_txt.save(exist_ok=True)
file_json.save(exist_ok=True)
📍 Path
import cereja as cj
file_path = cj.Path('/my/path/file.ext')
print(cj.can_do(file_path))
🆗 HTTP Requests
import cereja as cj
url = 'localhost:8000/example'
headers = {'Authorization': 'TOKEN'}
data = {'q': 'test'}
response = cj.request.post(url, data=data, headers=headers)
if response.code == 200:
data = response.data
import cereja as cj
import time
my_iterable = ['Cereja', 'is', 'very', 'easy']
for i in cj.Progress.prog(my_iterable):
print(f"current: {i}")
time.sleep(2)
📊 Freq
import cereja as cj
freq = cj.Freq([1, 2, 3, 3, 10, 10, 4, 4, 4, 4])
freq.most_common(2)
freq.least_freq(2)
freq.probability
freq.sample(min_freq=1, max_freq=2)
freq.to_json('./freq.json')
🧹 Text Preprocess
import cereja as cj
text = "Oi tudo bem?? meu nome é joab!"
text = cj.preprocess.remove_extra_chars(text)
print(text)
text = cj.preprocess.separate(text, sep=['?', '!'])
text = cj.preprocess.accent_remove(text)
preprocessor = cj.Preprocessor(stop_words=(),
punctuation='!?,.', to_lower=True, is_remove_punctuation=False,
is_remove_stop_words=False,
is_remove_accent=True)
print(preprocessor.preprocess(text))
print(preprocessor.preprocess(text, is_destructive=True))
🔣 Tokenizer
import cereja as cj
text = ['oi tudo bem meu nome é joab']
tokenizer = cj.Tokenizer(text, use_unk=True)
token_sequence, hash_ = tokenizer.encode('meu nome é Neymar Júnior')
decoded_sequence = tokenizer.decode(token_sequence, hash_=hash_)
⏸ Corpus
Great training and test separator.
import cereja as cj
X = ['how are you?', 'my name is Joab', 'I like coffee', 'how are you joab?', 'how', 'we are the world']
Y = ['como você está?', 'meu nome é Joab', 'Eu gosto de café', 'Como você está joab?', 'como', 'Nós somos o mundo']
corpus = cj.Corpus(source_data=X, target_data=Y, source_name='en', target_name='pt')
print(corpus)
print(corpus.source)
print(corpus.target)
corpus.source.phrases_freq
corpus.source.word_freq
corpus.target.phrases_freq
corpus.target.words_freq
train, test = corpus.split_data()
🔢 Array
import cereja as cj
cj.array.is_empty(data)
cj.array.get_shape(data)
data = cj.array.flatten(data)
cj.array.prod(data)
cj.array.sub(data)
cj.array.div(data)
cj.array.rand_n(0.0, 2.0, n=3)
cj.array.rand_n(1, 10)
cj.array.array_randn((3, 3,
3))
cj.chunk(data=[1, 2, 3, 4], batch_size=3, fill_with=0)
cj.array.remove_duplicate_items(['hi', 'hi', 'ih'])
cj.array.get_cols([['line1_col1', 'line1_col2'],
['line2_col1', 'line2_col2']])
cj.array.dotproduct([1, 2], [1, 2])
a = cj.array.array_gen((3, 3), 1)
b = cj.array.array_gen((3, 3), 1)
cj.array.dot(a, b)
cj.mathtools.theta_angle((2, 2), (0, -2))
🧰 Utils
import cereja.utils.time
import cereja as cj
data = {"key1": 'value1', "key2": 'value2', "key3": 'value3', "key4": 'value4'}
cj.utils.chunk(list(range(10)), batch_size=3)
cj.utils.chunk(list(range(10)), batch_size=3, fill_with=0, is_random=True)
cj.utils.invert_dict(data)
cj.utils.sample(data, k=2, is_random=True)
cj.utils.fill([1, 2, 3, 4], max_size=20, with_=0)
cj.utils.rescale_values([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], granularity=4)
cj.utils.import_string('cereja.file._io.FileIO')
cj.utils.list_methods(cj.Path)
cj.utils.string_to_literal('[1,2,3,4]')
cereja.utils.time.time_format(3600)
cj.utils.truncate("Cereja is fun.", k=3)
data = [[1, 2, 3], [3, 3, 3]]
cj.utils.is_iterable(data)
cj.utils.is_sequence(data)
cj.utils.is_numeric_sequence(data)
See Usage - Jupyter Notebook
License
This project is licensed under the MIT License - see the LICENSE file for details