Spaces:
Runtime error
Runtime error
import gradio as gr | |
import json | |
from datetime import datetime | |
import demoji | |
from huggingface_hub import CommitScheduler | |
from pathlib import Path | |
import re | |
from transformers import pipeline | |
from uuid import uuid4 | |
import os | |
import tempfile | |
offload_dir='/offload' | |
os.makedirs(offload_dir) if not os.path.exists(offload_dir) else None | |
#based on https://huggingface.co/spaces/Wauplin/space_to_dataset_saver/blob/main/app_json.py | |
#data is saved at https://huggingface.co/datasets/MR17u/tweeteval-irony-mcc/tree/main | |
# JSON_DATASET_DIR = Path("json_dataset") | |
# JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True) | |
# JSON_DATASET_PATH = JSON_DATASET_DIR / f"data-{uuid4()}.json" | |
prompt = '''### Instruction: | |
Classify if the following tweet is ironic or not | |
### Input: | |
{text} | |
### Response: | |
''' | |
# scheduler = CommitScheduler( | |
# repo_id="tweeteval-irony-mcc", | |
# repo_type="dataset", | |
# folder_path=JSON_DATASET_DIR, | |
# path_in_repo="data", | |
# ) | |
classifier = pipeline("text-generation", model="meta-llama/Llama-2-7b-hf", low_cpu_mem_usage=True, device_map="auto", offload_folder=offload_dir) | |
classifier.load_lora_weights("PierreEpron/llama7b-irony", weight_name="adapter_model.safetensors") | |
def clean_brackets(text): | |
return text.replace('{', '(').replace('}', ')') | |
def clean_emojis(text, type:str = ''): | |
if type=='rem': | |
return demoji.replace(text, '') | |
elif type!='keep': | |
return demoji.replace_with_desc(text, type) | |
else: | |
return text | |
def clean_hashtags(text, hashtags=['#irony', '#sarcasm','#not']): | |
for hashtag in hashtags: | |
text = re.sub(hashtag, '', text, flags=re.I) | |
return re.sub(r' +', r' ', text) | |
def clean_text(text): | |
return re.sub(' {2,}', ' ',clean_emojis(clean_hashtags(clean_brackets(text)))).strip() | |
# def save_json(entry: str, result) -> None: | |
# with scheduler.lock: | |
# with JSON_DATASET_PATH.open("a") as f: | |
# result = json.loads(result.replace("'",'"'))[0] | |
# json.dump({"entry": entry, "label": result['label'], "score": result['score'], "datetime": datetime.now().isoformat()}, f) | |
# f.write("\n") | |
def classif(text: str): | |
return classifier(prompt.format(text=clean_text(text))) | |
with gr.Blocks() as demo: | |
with gr.Row(): | |
entry = gr.Textbox(label="Input") | |
result = gr.Textbox(label="Classification") | |
input_btn = gr.Button("Submit") | |
input_btn.click(fn=classif, inputs=entry, outputs=result).success( | |
fn=print, #save_json, | |
inputs=[entry, result], | |
outputs=None | |
) | |
demo.launch() |