Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import MarianMTModel, MarianTokenizer | |
# Define a list of supported language pairs | |
languages = { | |
'English': 'en', | |
'Urdu': 'ur', | |
'French': 'fr', | |
'Spanish': 'es', | |
'German': 'de', | |
'Chinese': 'zh', | |
'Italian': 'it', | |
'Russian': 'ru', | |
'Japanese': 'ja', | |
'Arabic': 'ar', | |
'Hindi': 'hi', | |
} | |
# Define supported language pairs | |
language_pairs = { | |
('en', 'ur'): 'Helsinki-NLP/opus-mt-en-ur', | |
('ur', 'en'): 'Helsinki-NLP/opus-mt-ur-en', | |
('en', 'fr'): 'Helsinki-NLP/opus-mt-en-fr', | |
('fr', 'en'): 'Helsinki-NLP/opus-mt-fr-en', | |
('en', 'es'): 'Helsinki-NLP/opus-mt-en-es', | |
('es', 'en'): 'Helsinki-NLP/opus-mt-es-en', | |
('en', 'de'): 'Helsinki-NLP/opus-mt-en-de', | |
('de', 'en'): 'Helsinki-NLP/opus-mt-de-en', | |
('en', 'zh'): 'Helsinki-NLP/opus-mt-en-zh', | |
('zh', 'en'): 'Helsinki-NLP/opus-mt-zh-en', | |
('en', 'it'): 'Helsinki-NLP/opus-mt-en-it', | |
('it', 'en'): 'Helsinki-NLP/opus-mt-it-en', | |
('en', 'ru'): 'Helsinki-NLP/opus-mt-en-ru', | |
('ru', 'en'): 'Helsinki-NLP/opus-mt-ru-en', | |
('en', 'ja'): 'Helsinki-NLP/opus-mt-en-ja', | |
('ja', 'en'): 'Helsinki-NLP/opus-mt-ja-en', | |
('en', 'ar'): 'Helsinki-NLP/opus-mt-en-ar', | |
('ar', 'en'): 'Helsinki-NLP/opus-mt-ar-en', | |
('en', 'hi'): 'Helsinki-NLP/opus-mt-en-hi', | |
('hi', 'en'): 'Helsinki-NLP/opus-mt-hi-en', | |
# Add more pairs as available | |
} | |
def load_model(src_lang, tgt_lang): | |
model_name = language_pairs.get((src_lang, tgt_lang)) | |
if not model_name: | |
raise ValueError(f"No available model for {src_lang} to {tgt_lang}") | |
tokenizer = MarianTokenizer.from_pretrained(model_name) | |
model = MarianMTModel.from_pretrained(model_name) | |
return model, tokenizer | |
def translate(text, src_lang, tgt_lang): | |
model, tokenizer = load_model(src_lang, tgt_lang) | |
inputs = tokenizer.encode(text, return_tensors="pt", padding=True) | |
translated = model.generate(inputs) | |
return tokenizer.decode(translated[0], skip_special_tokens=True) | |
def translate_chain(text, src_lang, tgt_lang): | |
if src_lang != 'en': | |
text = translate(text, src_lang, 'en') | |
if tgt_lang != 'en': | |
text = translate(text, 'en', tgt_lang) | |
return text | |
def translate_ui(text, source_language, target_language): | |
src_lang = languages[source_language] | |
tgt_lang = languages[target_language] | |
try: | |
return translate(text, src_lang, tgt_lang) | |
except ValueError: | |
return translate_chain(text, src_lang, tgt_lang) | |
# Streamlit App UI | |
st.title("Multilingual Translator") | |
st.write("Translate text between various languages including Urdu, French, Spanish, and more.") | |
# Input text | |
text = st.text_area("Enter text to translate", height=100) | |
# Source and Target Languages | |
source_language = st.selectbox("Select Source Language", list(languages.keys())) | |
target_language = st.selectbox("Select Target Language", list(languages.keys())) | |
# Translate Button | |
if st.button("Translate"): | |
if text.strip(): | |
translation = translate_ui(text, source_language, target_language) | |
st.text_area("Translated Text", translation, height=100) | |
else: | |
st.warning("Please enter text to translate.") | |
# About Section | |
st.sidebar.title("About") | |
st.sidebar.info( | |
""" | |
This app allows you to translate text between multiple languages using the MarianMT model from Hugging Face's Helsinki-NLP collection. | |
""" | |
) |