ShellapallyJyothsna commited on
Commit
949c765
·
1 Parent(s): f7d1879

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +144 -0
app.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import gradio as gr
4
+ from huggingface_hub import InferenceClient
5
+
6
+ HF_TOKEN = os.environ.get("HF_TOKEN", None)
7
+ API_URL = "https://api-inference.huggingface.co/models/tiiuae/falcon-180B-chat"
8
+ BOT_NAME = "Falcon"
9
+
10
+ STOP_SEQUENCES = ["\nUser:", "<|endoftext|>", " User:", "###"]
11
+
12
+ EXAMPLES = [
13
+ ["Hey Falcon! Any recommendations for my holidays in Abu Dhabi?"],
14
+ ["What's the Everett interpretation of quantum mechanics?"],
15
+ ["Give me a list of the top 10 dive sites you would recommend around the world."],
16
+ ["Can you tell me more about deep-water soloing?"],
17
+ ["Can you write a short tweet about the release of our latest AI model, Falcon LLM?"]
18
+ ]
19
+
20
+ client = InferenceClient(
21
+ API_URL,
22
+ headers={"Authorization": f"Bearer {HF_TOKEN}"},
23
+ )
24
+
25
+ def format_prompt(message, history, system_prompt):
26
+ prompt = ""
27
+ if system_prompt:
28
+ prompt += f"System: {system_prompt}\n"
29
+ for user_prompt, bot_response in history:
30
+ prompt += f"User: {user_prompt}\n"
31
+ prompt += f"Falcon: {bot_response}\n" # Response already contains "Falcon: "
32
+ prompt += f"""User: {message}
33
+ Falcon:"""
34
+ return prompt
35
+
36
+ seed = 42
37
+
38
+ def generate(
39
+ prompt, history, system_prompt="", temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,
40
+ ):
41
+ temperature = float(temperature)
42
+ if temperature < 1e-2:
43
+ temperature = 1e-2
44
+ top_p = float(top_p)
45
+ global seed
46
+ generate_kwargs = dict(
47
+ temperature=temperature,
48
+ max_new_tokens=max_new_tokens,
49
+ top_p=top_p,
50
+ repetition_penalty=repetition_penalty,
51
+ stop_sequences=STOP_SEQUENCES,
52
+ do_sample=True,
53
+ seed=seed,
54
+ )
55
+ seed = seed + 1
56
+ formatted_prompt = format_prompt(prompt, history, system_prompt)
57
+
58
+ try:
59
+ stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
60
+ output = ""
61
+
62
+ for response in stream:
63
+ output += response.token.text
64
+
65
+ for stop_str in STOP_SEQUENCES:
66
+ if output.endswith(stop_str):
67
+ output = output[:-len(stop_str)]
68
+ output = output.rstrip()
69
+ yield output
70
+ yield output
71
+ except Exception as e:
72
+ raise gr.Error(f"Error while generating: {e}")
73
+ return output
74
+
75
+
76
+ additional_inputs=[
77
+ gr.Textbox("", label="Optional system prompt"),
78
+ gr.Slider(
79
+ label="Temperature",
80
+ value=0.9,
81
+ minimum=0.0,
82
+ maximum=1.0,
83
+ step=0.05,
84
+ interactive=True,
85
+ info="Higher values produce more diverse outputs",
86
+ ),
87
+ gr.Slider(
88
+ label="Max new tokens",
89
+ value=256,
90
+ minimum=0,
91
+ maximum=3000,
92
+ step=64,
93
+ interactive=True,
94
+ info="The maximum numbers of new tokens",
95
+ ),
96
+ gr.Slider(
97
+ label="Top-p (nucleus sampling)",
98
+ value=0.90,
99
+ minimum=0.01,
100
+ maximum=0.99,
101
+ step=0.05,
102
+ interactive=True,
103
+ info="Higher values sample more low-probability tokens",
104
+ ),
105
+ gr.Slider(
106
+ label="Repetition penalty",
107
+ value=1.2,
108
+ minimum=1.0,
109
+ maximum=2.0,
110
+ step=0.05,
111
+ interactive=True,
112
+ info="Penalize repeated tokens",
113
+ )
114
+ ]
115
+
116
+
117
+ with gr.Blocks() as demo:
118
+ with gr.Row():
119
+ with gr.Column(scale=2):
120
+ gr.Image("better_banner.jpeg", elem_id="banner-image", show_label=False)
121
+ with gr.Column(scale=5):
122
+ gr.Markdown(
123
+ """# Falcon-180B Demo
124
+ **Chat with [Falcon-180B-Chat](https://huggingface.co/tiiuae/falcon-180b-chat), brainstorm ideas, discuss your holiday plans, and more!**
125
+
126
+ ✨ This demo is powered by [Falcon-180B](https://huggingface.co/tiiuae/falcon-180B) and finetuned on a mixture of [Ultrachat](https://huggingface.co/datasets/stingning/ultrachat), [Platypus](https://huggingface.co/datasets/garage-bAInd/Open-Platypus) and [Airoboros](https://huggingface.co/datasets/jondurbin/airoboros-2.1). [Falcon-180B](https://huggingface.co/tiiuae/falcon-180b) is a state-of-the-art large language model built by the [Technology Innovation Institute](https://www.tii.ae) in Abu Dhabi. It is trained on 3.5 trillion tokens (including [RefinedWeb](https://huggingface.co/datasets/tiiuae/falcon-refinedweb)) and available under the [Falcon-180B TII License](https://huggingface.co/spaces/tiiuae/falcon-180b-license/blob/main/LICENSE.txt). It currently holds the 🥇 1st place on the [🤗 Open LLM leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) for a pretrained model.
127
+
128
+ 🧪 This is only a **first experimental preview**: we intend to provide increasingly capable versions of Falcon in the future, based on improved datasets and RLHF/RLAIF.
129
+
130
+ 👀 **Learn more about Falcon LLM:** [falconllm.tii.ae](https://falconllm.tii.ae/)
131
+
132
+ ➡️️ **Intended Use**: this demo is intended to showcase an early finetuning of [Falcon-180B](https://huggingface.co/tiiuae/falcon-180b), to illustrate the impact (and limitations) of finetuning on a dataset of conversations and instructions. We encourage the community to further build upon the base model, and to create even better instruct/chat versions!
133
+
134
+ ⚠️ **Limitations**: the model can and will produce factually incorrect information, hallucinating facts and actions. As it has not undergone any advanced tuning/alignment, it can produce problematic outputs, especially if prompted to do so. Finally, this demo is limited to a session length of about 1,000 words.
135
+ """
136
+ )
137
+
138
+ gr.ChatInterface(
139
+ generate,
140
+ examples=EXAMPLES,
141
+ additional_inputs=additional_inputs,
142
+ )
143
+
144
+ demo.queue(api_open=False).launch(show_api=False)