Commit
·
21759c1
1
Parent(s):
fa98d6d
Ollama test
Browse files- Dockerfile +22 -5
- app.py +33 -0
- nginx.conf +0 -11
- requirements.txt +5 -0
- start.sh +34 -0
Dockerfile
CHANGED
@@ -1,7 +1,24 @@
|
|
1 |
-
FROM
|
2 |
|
3 |
-
#
|
4 |
-
|
|
|
|
|
5 |
|
6 |
-
#
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9-slim
|
2 |
|
3 |
+
# Install curl and Ollama
|
4 |
+
RUN apt-get update && apt-get install -y curl && \
|
5 |
+
curl -fsSL https://ollama.ai/install.sh | sh && \
|
6 |
+
apt-get clean && rm -rf /var/lib/apt/lists/*
|
7 |
|
8 |
+
# Set up user and environment
|
9 |
+
RUN useradd -m -u 1000 user
|
10 |
+
USER user
|
11 |
+
ENV HOME=/home/user \
|
12 |
+
PATH="/home/user/.local/bin:$PATH"
|
13 |
+
|
14 |
+
WORKDIR $HOME/app
|
15 |
+
|
16 |
+
COPY --chown=user requirements.txt .
|
17 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
18 |
+
|
19 |
+
COPY --chown=user . .
|
20 |
+
|
21 |
+
# Make the start script executable
|
22 |
+
RUN chmod +x start.sh
|
23 |
+
|
24 |
+
CMD ["./start.sh"]
|
app.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import logging
|
3 |
+
from fastapi import FastAPI, HTTPException
|
4 |
+
from fastapi.responses import StreamingResponse
|
5 |
+
from pydantic import BaseModel
|
6 |
+
from langchain_community.llms import Ollama
|
7 |
+
from langchain.callbacks.manager import CallbackManager
|
8 |
+
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
9 |
+
|
10 |
+
logging.basicConfig(level=logging.INFO)
|
11 |
+
logger = logging.getLogger(__name__)
|
12 |
+
|
13 |
+
app = FastAPI()
|
14 |
+
MODEL_NAME = 'tinyllama'
|
15 |
+
|
16 |
+
def get_llm():
|
17 |
+
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
|
18 |
+
return Ollama(model=MODEL_NAME, callback_manager=callback_manager)
|
19 |
+
|
20 |
+
class Question(BaseModel):
|
21 |
+
text: str
|
22 |
+
|
23 |
+
@app.get("/")
|
24 |
+
def read_root():
|
25 |
+
return {"Hello": f"Welcome to {MODEL_NAME} FastAPI"}
|
26 |
+
|
27 |
+
@app.on_event("startup")
|
28 |
+
async def startup_event():
|
29 |
+
logger.info(f"Starting up with model: {MODEL_NAME}")
|
30 |
+
|
31 |
+
@app.on_event("shutdown")
|
32 |
+
async def shutdown_event():
|
33 |
+
logger.info("Shutting down")
|
nginx.conf
DELETED
@@ -1,11 +0,0 @@
|
|
1 |
-
events {}
|
2 |
-
http {
|
3 |
-
server {
|
4 |
-
listen 7860;
|
5 |
-
location / {
|
6 |
-
proxy_pass http://localhost:11434;
|
7 |
-
proxy_set_header Host $host;
|
8 |
-
proxy_set_header X-Real-IP $remote_addr;
|
9 |
-
}
|
10 |
-
}
|
11 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fastapi
|
2 |
+
uvicorn
|
3 |
+
langchain
|
4 |
+
langchain_community
|
5 |
+
ollama
|
start.sh
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
# Set environment variables for optimization
|
4 |
+
export OMP_NUM_THREADS=4
|
5 |
+
export MKL_NUM_THREADS=4
|
6 |
+
export CUDA_VISIBLE_DEVICES=0
|
7 |
+
|
8 |
+
# Start Ollama in the background
|
9 |
+
ollama serve &
|
10 |
+
|
11 |
+
# Pull the model if not already present
|
12 |
+
if ! ollama list | grep -q "tinyllama"; then
|
13 |
+
ollama pull tinyllama
|
14 |
+
fi
|
15 |
+
|
16 |
+
# Wait for Ollama to start up
|
17 |
+
max_attempts=30
|
18 |
+
attempt=0
|
19 |
+
while ! curl -s http://localhost:11434/api/tags >/dev/null; do
|
20 |
+
sleep 1
|
21 |
+
attempt=$((attempt + 1))
|
22 |
+
if [ $attempt -eq $max_attempts ]; then
|
23 |
+
echo "Ollama failed to start within 30 seconds. Exiting."
|
24 |
+
exit 1
|
25 |
+
fi
|
26 |
+
done
|
27 |
+
|
28 |
+
echo "Ollama is ready."
|
29 |
+
|
30 |
+
# Print the API URL
|
31 |
+
echo "API is running on: http://0.0.0.0:7860"
|
32 |
+
|
33 |
+
# Start the FastAPI server
|
34 |
+
uvicorn app:app --host 0.0.0.0 --port 7860 --workers 4 --limit-concurrency 20
|