DENNY
commited on
Commit
Β·
8f0965e
1
Parent(s):
6d2ea02
Update
Browse files- README.md +24 -129
- app.py +3 -3
- dockerfile +12 -8
- requirements.txt +5 -5
README.md
CHANGED
@@ -1,144 +1,39 @@
|
|
1 |
-
# Panduan Penggunaan Docker untuk Hugging Face Model API
|
2 |
|
3 |
-
## Struktur File
|
4 |
-
```
|
5 |
-
your-project/
|
6 |
-
βββ Dockerfile
|
7 |
-
βββ docker-compose.yml
|
8 |
-
βββ requirements.txt
|
9 |
-
βββ app.py
|
10 |
-
βββ cache/ # Folder untuk cache model
|
11 |
-
βββ README.md
|
12 |
-
```
|
13 |
|
14 |
-
|
15 |
|
16 |
-
|
17 |
-
```bash
|
18 |
-
# Clone atau buat direktori project
|
19 |
-
mkdir gema-model-api
|
20 |
-
cd gema-model-api
|
21 |
|
22 |
-
|
23 |
-
# Kemudian jalankan:
|
24 |
-
docker-compose up --build
|
25 |
-
```
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
docker build -t gema-model-api .
|
31 |
|
32 |
-
|
33 |
-
docker run -p 8000:8000 -v $(pwd)/cache:/root/.cache/huggingface gema-model-api
|
34 |
-
```
|
35 |
-
|
36 |
-
## Testing API
|
37 |
|
38 |
-
### 1. Health Check
|
39 |
```bash
|
40 |
-
curl
|
41 |
-
```
|
42 |
-
|
43 |
-
### 2. Generate Text
|
44 |
-
```bash
|
45 |
-
curl -X POST "http://localhost:8000/generate" \
|
46 |
-
-H "Content-Type: application/json" \
|
47 |
-
-d '{
|
48 |
-
"inputs": "Apa kabar dunia teknologi hari ini?"
|
49 |
-
}'
|
50 |
-
```
|
51 |
-
|
52 |
-
### 3. Generate dengan Parameter Custom
|
53 |
-
```bash
|
54 |
-
curl -X POST "http://localhost:8000/generate" \
|
55 |
-H "Content-Type: application/json" \
|
56 |
-d '{
|
57 |
-
"inputs": "
|
58 |
-
"
|
|
|
59 |
"temperature": 0.8,
|
60 |
-
"
|
|
|
|
|
|
|
61 |
}'
|
62 |
```
|
63 |
|
64 |
-
##
|
65 |
-
|
66 |
-
### Python
|
67 |
-
```python
|
68 |
-
import requests
|
69 |
-
|
70 |
-
url = "http://localhost:8000/generate"
|
71 |
-
data = {
|
72 |
-
"inputs": "CONTOH INPUT USER"
|
73 |
-
}
|
74 |
-
|
75 |
-
response = requests.post(url, json=data)
|
76 |
-
result = response.json()
|
77 |
-
print(result["generated_text"])
|
78 |
-
```
|
79 |
-
|
80 |
-
### JavaScript/Node.js
|
81 |
-
```javascript
|
82 |
-
const response = await fetch('http://localhost:8000/generate', {
|
83 |
-
method: 'POST',
|
84 |
-
headers: {
|
85 |
-
'Content-Type': 'application/json',
|
86 |
-
},
|
87 |
-
body: JSON.stringify({
|
88 |
-
inputs: 'CONTOH INPUT USER'
|
89 |
-
})
|
90 |
-
});
|
91 |
-
|
92 |
-
const result = await response.json();
|
93 |
-
console.log(result.generated_text);
|
94 |
-
```
|
95 |
-
|
96 |
-
## Dokumentasi API
|
97 |
-
Setelah menjalankan container, buka browser dan akses:
|
98 |
-
- API Docs: `http://localhost:8000/docs`
|
99 |
-
- ReDoc: `http://localhost:8000/redoc`
|
100 |
-
|
101 |
-
## Tips Optimasi
|
102 |
-
|
103 |
-
### 1. Untuk GPU Support
|
104 |
-
Jika Anda memiliki GPU NVIDIA, update `app.py`:
|
105 |
-
```python
|
106 |
-
# Ubah gpu_layers dari 0 ke jumlah yang sesuai
|
107 |
-
gpu_layers=50 # Atau sesuai kemampuan GPU Anda
|
108 |
-
```
|
109 |
-
|
110 |
-
Dan update `docker-compose.yml`:
|
111 |
-
```yaml
|
112 |
-
services:
|
113 |
-
gema-model-api:
|
114 |
-
# ... konfigurasi lainnya
|
115 |
-
runtime: nvidia # Untuk GPU support
|
116 |
-
environment:
|
117 |
-
- NVIDIA_VISIBLE_DEVICES=all
|
118 |
-
```
|
119 |
-
|
120 |
-
### 2. Untuk Production
|
121 |
-
- Gunakan reverse proxy (nginx)
|
122 |
-
- Implement authentication
|
123 |
-
- Add rate limiting
|
124 |
-
- Set up monitoring dan logging
|
125 |
-
- Use environment variables untuk konfigurasi
|
126 |
-
|
127 |
-
### 3. Memory Management
|
128 |
-
Model ini memerlukan RAM yang cukup. Sesuaikan memory limits di docker-compose.yml berdasarkan spesifikasi server Anda.
|
129 |
-
|
130 |
-
## Troubleshooting
|
131 |
-
|
132 |
-
### Model Loading Issues
|
133 |
-
- Pastikan koneksi internet stabil saat pertama kali menjalankan
|
134 |
-
- Model akan diunduh otomatis dan disimpan di cache
|
135 |
-
- Jika gagal, coba hapus folder cache dan jalankan ulang
|
136 |
-
|
137 |
-
### Memory Issues
|
138 |
-
- Kurangi `context_length` di app.py
|
139 |
-
- Sesuaikan memory limits di docker-compose.yml
|
140 |
-
- Gunakan swap file jika diperlukan
|
141 |
|
142 |
-
|
143 |
-
-
|
144 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
+
# Gema 4B Indonesian Language Model API
|
4 |
|
5 |
+
REST API untuk model bahasa Indonesia berbasis Gema 4B yang dioptimalkan untuk budaya dan pariwisata Indramayu.
|
|
|
|
|
|
|
|
|
6 |
|
7 |
+
## Endpoints
|
|
|
|
|
|
|
8 |
|
9 |
+
- `POST /generate` - Generate text
|
10 |
+
- `GET /health` - Health check
|
11 |
+
- `GET /docs` - API documentation
|
|
|
12 |
|
13 |
+
## Usage
|
|
|
|
|
|
|
|
|
14 |
|
|
|
15 |
```bash
|
16 |
+
curl -X POST "https://dnfs-llm-apiku.hf.space/generate" \
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
-H "Content-Type: application/json" \
|
18 |
-d '{
|
19 |
+
"inputs": "Ceritakan tentang Batik Dermayon",
|
20 |
+
"system_prompt": "ANDA ADALAH AHLI BUDAYA DAN PARIWISATA INDRAMAYU.JAWAB RESPON DENGAN SINGKAT DAN SELALU GUNAKAN BAHASA INDRAMAYU .",
|
21 |
+
"max_tokens": 500,
|
22 |
"temperature": 0.8,
|
23 |
+
"top_k": 40,
|
24 |
+
"top_p": 0.95,
|
25 |
+
"repeat_penalty": 2,
|
26 |
+
"stop": ["<end_of_turn>"]
|
27 |
}'
|
28 |
```
|
29 |
|
30 |
+
## Parameters
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
+
- `inputs` (required): Input text
|
33 |
+
- `system_prompt` (optional): System prompt
|
34 |
+
- `max_tokens` (optional): Maximum tokens to generate
|
35 |
+
- `temperature` (optional): Sampling temperature
|
36 |
+
- `top_k` (optional): Top-k sampling
|
37 |
+
- `top_p` (optional): Top-p sampling
|
38 |
+
- `repeat_penalty` (optional): Repetition penalty
|
39 |
+
- `stop` (optional): Stop sequences
|
app.py
CHANGED
@@ -16,11 +16,11 @@ app = FastAPI(title="Gema 4B Model API", version="1.0.0")
|
|
16 |
class TextRequest(BaseModel):
|
17 |
inputs: str
|
18 |
system_prompt: Optional[str] = None
|
19 |
-
max_tokens: Optional[int] =
|
20 |
temperature: Optional[float] = 0.7
|
21 |
top_k: Optional[int] = 50
|
22 |
top_p: Optional[float] = 0.9
|
23 |
-
repeat_penalty: Optional[float] =
|
24 |
stop: Optional[List[str]] = None
|
25 |
|
26 |
# Response model
|
@@ -90,4 +90,4 @@ async def root():
|
|
90 |
return {"message": "Gema 4B Model API", "docs": "/docs"}
|
91 |
|
92 |
if __name__ == "__main__":
|
93 |
-
uvicorn.run(app, host="0.0.0.0", port=8000)
|
|
|
16 |
class TextRequest(BaseModel):
|
17 |
inputs: str
|
18 |
system_prompt: Optional[str] = None
|
19 |
+
max_tokens: Optional[int] = 512
|
20 |
temperature: Optional[float] = 0.7
|
21 |
top_k: Optional[int] = 50
|
22 |
top_p: Optional[float] = 0.9
|
23 |
+
repeat_penalty: Optional[float] = 1.1
|
24 |
stop: Optional[List[str]] = None
|
25 |
|
26 |
# Response model
|
|
|
90 |
return {"message": "Gema 4B Model API", "docs": "/docs"}
|
91 |
|
92 |
if __name__ == "__main__":
|
93 |
+
uvicorn.run(app, host="0.0.0.0", port=8000, log_level="info")
|
dockerfile
CHANGED
@@ -1,27 +1,31 @@
|
|
1 |
-
# Dockerfile
|
2 |
FROM python:3.10-slim
|
3 |
|
4 |
-
# Set working directory
|
5 |
WORKDIR /app
|
6 |
|
7 |
# Install system dependencies
|
8 |
RUN apt-get update && apt-get install -y \
|
9 |
-
git \
|
10 |
-
curl \
|
11 |
build-essential \
|
|
|
12 |
&& rm -rf /var/lib/apt/lists/*
|
13 |
|
14 |
-
# Copy requirements first
|
15 |
COPY requirements.txt .
|
16 |
|
17 |
# Install Python dependencies
|
18 |
RUN pip install --no-cache-dir -r requirements.txt
|
19 |
|
20 |
-
# Copy application
|
21 |
-
COPY . .
|
|
|
|
|
|
|
22 |
|
23 |
# Expose port
|
24 |
EXPOSE 8000
|
25 |
|
26 |
-
#
|
|
|
|
|
|
|
|
|
27 |
CMD ["python", "app.py"]
|
|
|
|
|
1 |
FROM python:3.10-slim
|
2 |
|
|
|
3 |
WORKDIR /app
|
4 |
|
5 |
# Install system dependencies
|
6 |
RUN apt-get update && apt-get install -y \
|
|
|
|
|
7 |
build-essential \
|
8 |
+
curl \
|
9 |
&& rm -rf /var/lib/apt/lists/*
|
10 |
|
11 |
+
# Copy requirements first for better caching
|
12 |
COPY requirements.txt .
|
13 |
|
14 |
# Install Python dependencies
|
15 |
RUN pip install --no-cache-dir -r requirements.txt
|
16 |
|
17 |
+
# Copy application code
|
18 |
+
COPY app.py .
|
19 |
+
|
20 |
+
# Create cache directory
|
21 |
+
RUN mkdir -p /root/.cache/huggingface
|
22 |
|
23 |
# Expose port
|
24 |
EXPOSE 8000
|
25 |
|
26 |
+
# Health check
|
27 |
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
28 |
+
CMD curl -f http://localhost:8000/health || exit 1
|
29 |
+
|
30 |
+
# Run the application
|
31 |
CMD ["python", "app.py"]
|
requirements.txt
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
-
transformers==4.36.0
|
2 |
-
torch==2.1.0
|
3 |
fastapi==0.104.1
|
4 |
-
uvicorn==0.24.0
|
|
|
|
|
|
|
5 |
huggingface-hub==0.19.4
|
6 |
pydantic==2.5.0
|
7 |
-
accelerate==0.25.0
|
8 |
-
ctransformers==0.2.27
|
|
|
|
|
|
|
1 |
fastapi==0.104.1
|
2 |
+
uvicorn[standard]==0.24.0
|
3 |
+
ctransformers==0.2.27
|
4 |
+
torch==2.1.0
|
5 |
+
transformers==4.36.0
|
6 |
huggingface-hub==0.19.4
|
7 |
pydantic==2.5.0
|
8 |
+
accelerate==0.25.0
|
|