khanaa
Khanaa is a tool to make spelling Thai more convenient.
Installation
For Python >=3.7
pip install khanaa
Usage
Spelling
from khanaa import Kham
basic_example = {
'onset': 'ก',
'vowel': 'อา',
'silent_before': '',
'coda': '',
'silent_after': '',
'tone': -1
}
kaa = Kham(**basic_example)
kaa.form
lai = Kham(onset='ล', vowel='อาย')
lai.form
steak = Kham(onset='สต', vowel='เอะ', coda='ก', tone=3)
steak.form
shin = Kham(onset='ฌ', vowel='อิ', coda='น', silent_after='สก')
shin.form
sia = Kham(onset='ซย', vowel='อา', onset_style='phinthu')
sia.all_tone()
pai = Kham(onset='ป', vowel='อาย', vowel_length='short')
pai.form
SpellWord
was deprecated but not removed.
Getting information
from khanaa import Kham
kwang = Kham(onset='กว', vowel='อา', coda='ง', tone=2)
kwang.form
kwang.onset_main
kwang.onset_class
kwang.vowel_length
kwang.coda_class
kwang.is_checked
kwang.tone_realized
kwang.use_leading_h
kwang.use_pair_onset
kwang.ipa()
kwang.data
""" =>
{'all_tone': ['กวาง', 'กว่าง', 'กว้าง', 'กว๊าง', 'กว๋าง'],
'coda': 'ง',
'coda_class': 'alive',
'form': 'กว้าง',
'homophone': ['กว้าง'],
'ipa': 'k w aː ŋ ˥˩',
'is_checked': False,
'is_donee_end': False,
'is_donor_end': True,
'is_donor_start': True,
'is_possible_tone': True,
'onset': 'กว',
'onset_class': 'mid',
'onset_index': -2,
'onset_main': 'ก',
'silent_after': '',
'silent_before': '',
'tone': 2,
'tone_mark': '้',
'tone_realized': 2,
'use_leading_h': False,
'use_pair_onset': False,
'vowel': 'อา',
'vowel_length': 'long'}
"""
Ambiguity
As Thai orthography can be ambiguous, we can use these methods to detect if the spelled word's boundary is ambiguous (so that we can do something such as putting dash between ambiguous syllables to clarify the pronunciation).
from khanaa import Kham
kwang = Kham(onset='กว', vowel='อา', coda='ง', tone=2)
kwang.form
kwang.is_donee_end()
kwang.is_donor_end()
kwang.is_donor_start()
In other words, if a word that returns true on is_donee_end()
is followed by a word that returns true on is_donor_start()
, there will be ambiguity (in theory), for example, ตา and กลม.
If a word that returns true on is_donor_end()
is followed by any word, there will be possible ambiguity.
Homophone
from khanaa import Kham
khuu = Kham(onset='ค', vowel='อู', tone=2)
khuu.form
khuu.homophone()
Others
Find all available consonants, vowels and true clusters in khanaa
from khanaa import find_letter_list
find_letter_list()
A experimental, basic method to turn text into Kham
from khanaa import Kham, spelling_decompose
sd = spelling_decompose("เขียน")
sd
""" =>
{'data': {'coda': 'น',
'onset': 'ข',
'silent_after': '',
'silent_before': '',
'tone': 4,
'vowel': 'เอีย'},
'detail': {'leading_h': False,
'onset_index': -1,
'onset_main': 'ข',
'tone_mark': '',
'vowel_form': 'เ-ี+ย'},
'pref': {}}
"""
khian = Kham(**sd['data'])
khian.form
khian.ipa()
License
MIT