File size: 1,494 Bytes
22c7a44 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
from .model import OnnxModel
import re
class Phonikud:
def __init__(self, model_path: str):
self.model = OnnxModel(model_path)
def add_diacritics(
self, sentences: list | str, mark_matres_lectionis: str | None = None
) -> str:
"""
Adds nikud (Hebrew diacritics) to the given text.
Parameters:
- sentences (list | str): A string or a list of strings to be processed. Each string should not exceed 2048 characters.
- mark_matres_lectionis (str | None, optional): A string used to mark nikud male. For example, if set to '|',
"ืึดืืึผืึผืึธืื" will be returned as "ืึดื|ืึผืึผืึธืื". Default is None (no marking).
Returns:
- str: The text with added diacritics.
"""
if isinstance(sentences, str):
sentences = [sentences]
result = self.model.predict(
sentences, mark_matres_lectionis=mark_matres_lectionis
)
return result[0]
def get_nikud_male(self, text: str, mark_matres_lectionis: str):
"""
Based on given mark character remove the mark character to keep it as nikud male
"""
return text.replace(mark_matres_lectionis, "")
def get_nikud_haser(self, text: str):
"""
Based on given mark_matres_lectionis remove the nikud nikud male character along with the mark character
"""
return re.sub(r".\|", "", text) # Remove {char}{matres_lectionis}
|