Spaces:
Runtime error
Runtime error
File size: 6,559 Bytes
37d5811 567bb6c 3a29a54 2229429 37d5811 eca5e0e 567bb6c 37d5811 567bb6c 37d5811 eca5e0e 567bb6c eca5e0e 567bb6c eca5e0e 567bb6c eca5e0e 567bb6c eca5e0e 567bb6c 37d5811 2229429 37d5811 eca5e0e 37d5811 eca5e0e 37d5811 eca5e0e 37d5811 eca5e0e 567bb6c eca5e0e 567bb6c eca5e0e 567bb6c eca5e0e 567bb6c eca5e0e 2229429 567bb6c eca5e0e 2d318b7 a6177b6 37d5811 3da8840 eca5e0e 3da8840 2229429 3a29a54 06245e7 eca5e0e 06245e7 eca5e0e 06245e7 567bb6c eca5e0e 567bb6c 1c07b94 e68c0c3 eca5e0e fc6f998 1c07b94 06245e7 43d8590 eca5e0e 43d8590 567bb6c 43d8590 06245e7 2229429 43d8590 eca5e0e 3a29a54 eca5e0e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from transformers import pipeline, M2M100ForConditionalGeneration, M2M100Tokenizer, MBart50TokenizerFast, MBartForConditionalGeneration, AutoTokenizer, AutoModelForSequenceClassification
import pandas as pd
import re
import streamlit as st
def translate_text_blob(text):
blob = TextBlob(text)
return str(blob.translate(from_lang="pt", to="en"))
def translate_text_M2M100(text, model, tokenizer):
encoded = tokenizer(text, return_tensors="pt")
generated_tokens = model.generate(
**encoded, forced_bos_token_id=tokenizer.get_lang_id("en"))
return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
def translate_BART(text, model, tokenizer):
inputs = tokenizer(text, return_tensors='pt')
input_ids = inputs.input_ids
attention_mask = inputs.attention_mask
output = model.generate(input_ids, attention_mask=attention_mask,
forced_bos_token_id=tokenizer.lang_code_to_id['en_XX'])
return tokenizer.decode(output[0], skip_special_tokens=True)
def sentiment_vader(sentence, vader_object):
sentiment_dict = vader_object.polarity_scores(sentence)
negative = sentiment_dict['neg']
neutral = sentiment_dict['neu']
positive = sentiment_dict['pos']
compound = sentiment_dict['compound']
if sentiment_dict['compound'] >= 0.05:
overall_sentiment = "Positive"
elif sentiment_dict['compound'] <= - 0.05:
overall_sentiment = "Negative"
else:
overall_sentiment = "Neutral"
return overall_sentiment.upper()
def sentiment_finbert(text, pipeline):
result = pipeline(text)[0]
return result["label"].upper()
m2m100 = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
m2m100_token = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
m2m100_token.src_lang = "pt"
BART_token = MBart50TokenizerFast.from_pretrained(
'Narrativa/mbart-large-50-finetuned-opus-pt-en-translation')
BART = MBartForConditionalGeneration.from_pretrained(
'Narrativa/mbart-large-50-finetuned-opus-pt-en-translation')
BART_token.src_lang = 'pt_XX'
finbert_token = AutoTokenizer.from_pretrained("ProsusAI/finbert")
finbert = AutoModelForSequenceClassification.from_pretrained(
"ProsusAI/finbert", num_labels=3)
finbert_pipeline = pipeline(
"sentiment-analysis", model=finbert, tokenizer=finbert_token)
sid_obj = SentimentIntensityAnalyzer()
classifier = pipeline("zero-shot-classification",
model="joeddav/xlm-roberta-large-xnli")
sheet_id = "1IGFSKnnmQndKVmGOWMCbsvJJMU_2jvnm"
sheet_name = "Sheet1"
url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"
df = pd.read_csv(url)
header = st.container()
model_1, model_2 = st.columns(2)
model = st.container()
dataset = st.container()
analysis = st.container()
with st.sidebar:
st.markdown("# Lorem Ipsum\nLorem ipsum dolor sit amet, consectetur adipiscing elit. Praesent sapien tortor, suscipit quis ornare ut, laoreet vitae nisi. Mauris quis consectetur risus, non blandit mauris. Sed ut odio tempor, ullamcorper leo eu, mollis eros. Curabitur pretium sollicitudin sapien, vel mattis augue convallis quis. Suspendisse eleifend turpis non nunc gravida, aliquet hendrerit orci viverra. Sed aliquet, nunc eu posuere tempor, libero ex dignissim velit, ut ultricies erat felis at urna. Proin metus augue, commodo in faucibus sed, aliquet ac eros. Nullam turpis leo, dictum eu tellus a, aliquam egestas velit. Suspendisse cursus augue a sem dapibus, eu faucibus nisl ultrices. Cras tortor ipsum, luctus vitae tincidunt id, dapibus id justo. Sed mi nunc, tempor eu iaculis in, tristique cursus massa. Integer metus felis, pulvinar ut aliquam ut, consectetur in nulla.")
with header:
st.title("IC 2022 Classificação de Dados Financeiros")
st.write("Lorem ipsum dolor sit amet, consectetur adipiscing elit. Praesent sapien tortor, suscipit quis ornare ut, laoreet vitae nisi. Mauris quis consectetur risus, non blandit mauris. Sed ut odio tempor, ullamcorper leo eu, mollis eros.")
with model_1:
st.header("Modelo para Tradução e Classificação!")
translator = st.selectbox(
'Qual modelo você deseja usar para tradução?', ('TextBlob', 'M2M100', 'BART'))
sentimentor = st.selectbox(
'Qual modelo você deseja usar para a análise de sentimentos?', ('Vader', 'FinBERT'))
text = st.text_area("Coloque seu texto sobre mercado financeiro em português!",
"As ações da Ultrafarma subiram em 98% no último bimestre, segundo os dados da revista!")
submit = st.button('Gerar!')
with model_2:
if submit:
if translator == "TextBlob":
text_en = translate_text_blob(text)
elif translator == "M2M100":
text_en = translate_text_M2M100(text, m2m100, m2m100_token)
elif translator == "BART":
text_en = translate_text_M2M100(text, BART, BART_token)
if sentimentor == "Vader":
sentiment = sentiment_vader(text_en, sid_obj)
elif sentimentor == "FinBERT":
sentiment = sentiment_finbert(text_en, finbert_pipeline)
st.text_area(value="Translation: " + text_en)
st.text_area(value="Sentiment: " + sentiment)
st.write(text_en)
st.write(sentiment)
with model:
st.header("Modelo para Tradução e Classificação!")
text = st.text_area(
"Coloque seu texto sobre mercado financeiro em português!")
submit = st.button('Generate')
if text:
text_en = translate_text(text)
st.write("*Translation:* {}".format(text_en))
sentiment = sentiment_classification(text_en, sid_obj)
st.write("*Sentiment:* {} - {}".format(sentiment[0], sentiment[1]))
theme = theme_classification(text_en, classifier)
st.write("*Theme:* {}".format(theme))
with dataset:
st.header("Dados utilizados no projeto!")
st.write("Os dados blablablabla")
st.dataframe(df)
st.subheader("Descrição das colunas:")
st.write("\t*- Texts:* Coluna que mostra os textos financeiros")
st.write("\t*- Texts:* Coluna que mostra os textos financeiros")
st.write("\t*- Texts:* Coluna que mostra os textos financeiros")
st.write("\t*- Texts:* Coluna que mostra os textos financeiros")
st.write("\t*- Texts:* Coluna que mostra os textos financeiros")
with analysis:
st.header("Visualização dos dados utilizados")
|