import gradio as gr import json from datetime import datetime import demoji from huggingface_hub import CommitScheduler from pathlib import Path import re from transformers import pipeline from uuid import uuid4 import os import tempfile offload_dir='/content/offload' os.makedirs(offload_dir) if not os.path.exists(offload_dir) else None #based on https://huggingface.co/spaces/Wauplin/space_to_dataset_saver/blob/main/app_json.py #data is saved at https://huggingface.co/datasets/MR17u/tweeteval-irony-mcc/tree/main # JSON_DATASET_DIR = Path("json_dataset") # JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True) # JSON_DATASET_PATH = JSON_DATASET_DIR / f"data-{uuid4()}.json" prompt = '''### Instruction: Classify if the following tweet is ironic or not ### Input: {text} ### Response: ''' # scheduler = CommitScheduler( # repo_id="tweeteval-irony-mcc", # repo_type="dataset", # folder_path=JSON_DATASET_DIR, # path_in_repo="data", # ) classifier = pipeline("text-generation", model="meta-llama/Llama-2-7b-hf", low_cpu_mem_usage=True, device_map="auto", offload_folder=offload_dir) classifier.load_lora_weights("PierreEpron/llama7b-irony", weight_name="adapter_model.safetensors") def clean_brackets(text): return text.replace('{', '(').replace('}', ')') def clean_emojis(text, type:str = ''): if type=='rem': return demoji.replace(text, '') elif type!='keep': return demoji.replace_with_desc(text, type) else: return text def clean_hashtags(text, hashtags=['#irony', '#sarcasm','#not']): for hashtag in hashtags: text = re.sub(hashtag, '', text, flags=re.I) return re.sub(r' +', r' ', text) def clean_text(text): return re.sub(' {2,}', ' ',clean_emojis(clean_hashtags(clean_brackets(text)))).strip() # def save_json(entry: str, result) -> None: # with scheduler.lock: # with JSON_DATASET_PATH.open("a") as f: # result = json.loads(result.replace("'",'"'))[0] # json.dump({"entry": entry, "label": result['label'], "score": result['score'], "datetime": datetime.now().isoformat()}, f) # f.write("\n") def classif(text: str): return classifier(prompt.format(text=clean_text(text))) with gr.Blocks() as demo: with gr.Row(): entry = gr.Textbox(label="Input") result = gr.Textbox(label="Classification") input_btn = gr.Button("Submit") input_btn.click(fn=classif, inputs=entry, outputs=result).success( fn=print, #save_json, inputs=[entry, result], outputs=None ) demo.launch()