|
from .model import OnnxModel |
|
import re |
|
|
|
|
|
class Phonikud: |
|
def __init__(self, model_path: str): |
|
self.model = OnnxModel(model_path) |
|
|
|
def add_diacritics( |
|
self, sentences: list | str, mark_matres_lectionis: str | None = None |
|
) -> str: |
|
""" |
|
Adds nikud (Hebrew diacritics) to the given text. |
|
|
|
Parameters: |
|
- sentences (list | str): A string or a list of strings to be processed. Each string should not exceed 2048 characters. |
|
- mark_matres_lectionis (str | None, optional): A string used to mark nikud male. For example, if set to '|', |
|
"ืึดืืึผืึผืึธืื" will be returned as "ืึดื|ืึผืึผืึธืื". Default is None (no marking). |
|
|
|
Returns: |
|
- str: The text with added diacritics. |
|
""" |
|
|
|
if isinstance(sentences, str): |
|
sentences = [sentences] |
|
result = self.model.predict( |
|
sentences, mark_matres_lectionis=mark_matres_lectionis |
|
) |
|
return result[0] |
|
|
|
def get_nikud_male(self, text: str, mark_matres_lectionis: str): |
|
""" |
|
Based on given mark character remove the mark character to keep it as nikud male |
|
""" |
|
return text.replace(mark_matres_lectionis, "") |
|
|
|
def get_nikud_haser(self, text: str): |
|
""" |
|
Based on given mark_matres_lectionis remove the nikud nikud male character along with the mark character |
|
""" |
|
return re.sub(r".\|", "", text) |
|
|