druvx13 commited on
Commit
7de9288
·
verified ·
1 Parent(s): bfad274

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -0
app.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uvicorn
2
+ from fastapi import FastAPI, HTTPException
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from huggingface_hub import InferenceClient
5
+ from pydantic import BaseModel
6
+
7
+ app = FastAPI()
8
+
9
+ # Configure CORS
10
+ app.add_middleware(
11
+ CORSMiddleware,
12
+ allow_origins=["*"],
13
+ allow_methods=["*"],
14
+ allow_headers=["*"],
15
+ )
16
+
17
+ client = InferenceClient("Qwen/Qwen2.5-Coder-32B-Instruct")
18
+
19
+ class ChatRequest(BaseModel):
20
+ message: str
21
+ history: list[tuple[str, str]]
22
+ system_message: str
23
+ max_tokens: int
24
+ temperature: float
25
+ top_p: float
26
+
27
+ def generate_response(messages, max_tokens, temperature, top_p):
28
+ for chunk in client.chat_completion(
29
+ messages,
30
+ max_tokens=max_tokens,
31
+ stream=True,
32
+ temperature=temperature,
33
+ top_p=top_p,
34
+ ):
35
+ yield chunk.choices[0].delta.content or ""
36
+
37
+ @app.post("/api/chat")
38
+ async def chat_stream(request: ChatRequest):
39
+ try:
40
+ messages = [{"role": "system", "content": request.system_message}]
41
+
42
+ for user_msg, assistant_msg in request.history:
43
+ messages.extend([
44
+ {"role": "user", "content": user_msg},
45
+ {"role": "assistant", "content": assistant_msg}
46
+ ])
47
+
48
+ messages.append({"role": "user", "content": request.message})
49
+
50
+ return StreamingResponse(
51
+ generate_response(
52
+ messages=messages,
53
+ max_tokens=request.max_tokens,
54
+ temperature=request.temperature,
55
+ top_p=request.top_p
56
+ ),
57
+ media_type="text/event-stream"
58
+ )
59
+ except Exception as e:
60
+ raise HTTPException(status_code=500, detail=str(e))
61
+
62
+ if __name__ == "__main__":
63
+ uvicorn.run(app, host="0.0.0.0", port=7860)