Spaces:
Running
Running
import streamlit as st | |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
st.set_page_config(page_title="Chat with Qwen2.5-Omni-7B", layout="centered") | |
st.title("Chat with Qwen2.5-Omni-7B") | |
# Model name | |
model_name = "Qwen/Qwen2.5-Omni-7B" | |
# Prompt input | |
system_prompt = st.text_area("System Prompt", "You are a helpful assistant.", height=100) | |
user_input = st.text_input("Your Message", "") | |
# Temp & token sliders | |
temperature = st.slider("Temperature", 0.0, 1.0, 0.7) | |
max_tokens = st.slider("Max Tokens", 16, 1024, 256) | |
# Optional: Hugging Face token field (left empty for user) | |
hf_token = st.text_input("Hugging Face Token (optional)", type="password") | |
# Load model pipeline | |
def load_pipeline(): | |
return pipeline( | |
"text-generation", | |
model=model_name, | |
tokenizer=model_name, | |
use_auth_token=hf_token if hf_token else None, | |
device_map="auto" | |
) | |
if user_input: | |
pipe = load_pipeline() | |
prompt = f"{system_prompt}\nUser: {user_input}\nAssistant:" | |
response = pipe(prompt, temperature=temperature, max_new_tokens=max_tokens)[0]['generated_text'] | |
st.markdown("**Response:**") | |
st.write(response.replace(prompt, "")) | |