Spaces:

thewh1teagle
/

phonemize-in-hebrew

Running

thewh1teagle

latest

1866014 13 days ago

1.49 kB

	from .model import OnnxModel
	import re


	class Phonikud:
	def __init__(self, model_path: str):
	self.model = OnnxModel(model_path)

	def add_diacritics(
	self, sentences: list \| str, mark_matres_lectionis: str \| None = None
	) -> str:
	"""
	Adds nikud (Hebrew diacritics) to the given text.

	Parameters:
	- sentences (list \| str): A string or a list of strings to be processed. Each string should not exceed 2048 characters.
	- mark_matres_lectionis (str \| None, optional): A string used to mark nikud male. For example, if set to '\|',
	"לִימּוּדָיו" will be returned as "לִי\|מּוּדָיו". Default is None (no marking).

	Returns:
	- str: The text with added diacritics.
	"""

	if isinstance(sentences, str):
	sentences = [sentences]
	result = self.model.predict(
	sentences, mark_matres_lectionis=mark_matres_lectionis
	)
	return result[0]

	def get_nikud_male(self, text: str, mark_matres_lectionis: str):
	"""
	Based on given mark character remove the mark character to keep it as nikud male
	"""
	return text.replace(mark_matres_lectionis, "")

	def get_nikud_haser(self, text: str):
	"""
	Based on given mark_matres_lectionis remove the nikud nikud male character along with the mark character
	"""
	return re.sub(r".\\|", "", text) # Remove {char}{matres_lectionis}