File size: 3,054 Bytes
1484d4b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import os
from collections.abc import Iterable

import logging
logging.basicConfig(level=logging.WARNING)

from .base_engine import BaseEngineTransformer, LANG_LIST_FILE

F_DIR = os.path.dirname(os.path.realpath(__file__))

MODEL_DOWNLOAD_URL = 'https://github.com/AI4Bharat/IndicXlit/releases/download/v1.0/indicxlit-indic-en-v1.0.zip'
DICTS_DOWNLOAD_URL = 'https://github.com/AI4Bharat/IndicXlit/releases/download/v1.0/word_prob_dicts_en.zip'
XLIT_VERSION = "v1.0" # If model/dict is changed on the storage, do not forget to change this variable in-order to force-download new assets

def is_folder_writable(folder):
    try:
        os.makedirs(folder, exist_ok=True)
        tmp_file = os.path.join(folder, '.write_test')
        with open(tmp_file, 'w') as f:
            f.write('Permission Check')
        os.remove(tmp_file)
        return True
    except:
        return False

def is_directory_writable(path):
    if os.name == 'nt':
        return is_folder_writable(path)
    return os.access(path, os.W_OK | os.X_OK)

class XlitEngineTransformer_Indic2En(BaseEngineTransformer):
    """
    For Managing the top level tasks and applications of transliteration

    TODO: Ability to pass `beam_width` dynamically
    """
    def __init__(self, beam_width=4, rescore=True):
        if is_directory_writable(F_DIR):
            models_path = os.path.join(F_DIR, 'models')
        else:
            user_home = os.path.expanduser("~")
            models_path = os.path.join(user_home, '.AI4Bharat_Xlit_Models')
        models_path = os.path.join(models_path, "indic2en", XLIT_VERSION)
        os.makedirs(models_path, exist_ok=True)

        lang_list_file = os.path.join(models_path, LANG_LIST_FILE)
        _all_supported_langs = open(lang_list_file).read().strip().split('\n')
        self._all_supported_langs = set(_all_supported_langs)
        if "en" in self._all_supported_langs:
            self._all_supported_langs.remove("en")

        self._tgt_langs = set(["en"])

        model_file_path = self.download_models(models_path, MODEL_DOWNLOAD_URL)
        if rescore:
            dicts_folder = self.download_dicts(models_path, DICTS_DOWNLOAD_URL)
        else:
            dicts_folder = None
        
        super().__init__(models_path, beam_width=beam_width, rescore=rescore)
    
    @property
    def all_supported_langs(self):
        return self._all_supported_langs

    @property
    def tgt_langs(self):
        return self._tgt_langs
    
    def translit_word(self, word, lang_code, topk=4):
        if lang_code not in self.all_supported_langs:
            raise NotImplementedError(f"Language: `{lang_code}` not yet supported")
        return self._transliterate_word(word, src_lang=lang_code, tgt_lang='en', topk=topk)
    
    def translit_sentence(self, indic_sentence, lang_code):
        if lang_code not in self.all_supported_langs:
            raise NotImplementedError(f"Language: `{lang_code}` not yet supported")
        return self._transliterate_sentence(indic_sentence, src_lang=lang_code, tgt_lang='en')