Merge pull request #328 from aaronbolton/main
Browse filesCreated DEFAULT_NUM_CTX VAR with a default of 32768
- .env.example +9 -1
- CONTRIBUTING.md +16 -0
- Dockerfile +6 -2
- app/lib/.server/llm/model.ts +5 -1
- docker-compose.yaml +2 -0
.env.example
CHANGED
@@ -65,4 +65,12 @@ LMSTUDIO_API_BASE_URL=
|
|
65 |
XAI_API_KEY=
|
66 |
|
67 |
# Include this environment variable if you want more logging for debugging locally
|
68 |
-
VITE_LOG_LEVEL=debug
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
XAI_API_KEY=
|
66 |
|
67 |
# Include this environment variable if you want more logging for debugging locally
|
68 |
+
VITE_LOG_LEVEL=debug
|
69 |
+
|
70 |
+
# Example Context Values for qwen2.5-coder:32b
|
71 |
+
#
|
72 |
+
# DEFAULT_NUM_CTX=32768 # Consumes 36GB of VRAM
|
73 |
+
# DEFAULT_NUM_CTX=24576 # Consumes 32GB of VRAM
|
74 |
+
# DEFAULT_NUM_CTX=12288 # Consumes 26GB of VRAM
|
75 |
+
# DEFAULT_NUM_CTX=6144 # Consumes 24GB of VRAM
|
76 |
+
DEFAULT_NUM_CTX=
|
CONTRIBUTING.md
CHANGED
@@ -1,4 +1,7 @@
|
|
1 |
# Contributing to Bolt.new Fork
|
|
|
|
|
|
|
2 |
|
3 |
First off, thank you for considering contributing to Bolt.new! This fork aims to expand the capabilities of the original project by integrating multiple LLM providers and enhancing functionality. Every contribution helps make Bolt.new a better tool for developers worldwide.
|
4 |
|
@@ -81,6 +84,19 @@ ANTHROPIC_API_KEY=XXX
|
|
81 |
```bash
|
82 |
VITE_LOG_LEVEL=debug
|
83 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
**Important**: Never commit your `.env.local` file to version control. It's already included in .gitignore.
|
85 |
|
86 |
### 🚀 Running the Development Server
|
|
|
1 |
# Contributing to Bolt.new Fork
|
2 |
+
## DEFAULT_NUM_CTX
|
3 |
+
|
4 |
+
The `DEFAULT_NUM_CTX` environment variable can be used to limit the maximum number of context values used by the qwen2.5-coder model. For example, to limit the context to 24576 values (which uses 32GB of VRAM), set `DEFAULT_NUM_CTX=24576` in your `.env.local` file.
|
5 |
|
6 |
First off, thank you for considering contributing to Bolt.new! This fork aims to expand the capabilities of the original project by integrating multiple LLM providers and enhancing functionality. Every contribution helps make Bolt.new a better tool for developers worldwide.
|
7 |
|
|
|
84 |
```bash
|
85 |
VITE_LOG_LEVEL=debug
|
86 |
```
|
87 |
+
|
88 |
+
- Optionally set context size:
|
89 |
+
```bash
|
90 |
+
DEFAULT_NUM_CTX=32768
|
91 |
+
```
|
92 |
+
|
93 |
+
Some Example Context Values for the qwen2.5-coder:32b models are.
|
94 |
+
|
95 |
+
* DEFAULT_NUM_CTX=32768 - Consumes 36GB of VRAM
|
96 |
+
* DEFAULT_NUM_CTX=24576 - Consumes 32GB of VRAM
|
97 |
+
* DEFAULT_NUM_CTX=12288 - Consumes 26GB of VRAM
|
98 |
+
* DEFAULT_NUM_CTX=6144 - Consumes 24GB of VRAM
|
99 |
+
|
100 |
**Important**: Never commit your `.env.local` file to version control. It's already included in .gitignore.
|
101 |
|
102 |
### 🚀 Running the Development Server
|
Dockerfile
CHANGED
@@ -26,6 +26,7 @@ ARG OPEN_ROUTER_API_KEY
|
|
26 |
ARG GOOGLE_GENERATIVE_AI_API_KEY
|
27 |
ARG OLLAMA_API_BASE_URL
|
28 |
ARG VITE_LOG_LEVEL=debug
|
|
|
29 |
|
30 |
ENV WRANGLER_SEND_METRICS=false \
|
31 |
GROQ_API_KEY=${GROQ_API_KEY} \
|
@@ -35,7 +36,8 @@ ENV WRANGLER_SEND_METRICS=false \
|
|
35 |
OPEN_ROUTER_API_KEY=${OPEN_ROUTER_API_KEY} \
|
36 |
GOOGLE_GENERATIVE_AI_API_KEY=${GOOGLE_GENERATIVE_AI_API_KEY} \
|
37 |
OLLAMA_API_BASE_URL=${OLLAMA_API_BASE_URL} \
|
38 |
-
VITE_LOG_LEVEL=${VITE_LOG_LEVEL}
|
|
|
39 |
|
40 |
# Pre-configure wrangler to disable metrics
|
41 |
RUN mkdir -p /root/.config/.wrangler && \
|
@@ -57,6 +59,7 @@ ARG OPEN_ROUTER_API_KEY
|
|
57 |
ARG GOOGLE_GENERATIVE_AI_API_KEY
|
58 |
ARG OLLAMA_API_BASE_URL
|
59 |
ARG VITE_LOG_LEVEL=debug
|
|
|
60 |
|
61 |
ENV GROQ_API_KEY=${GROQ_API_KEY} \
|
62 |
HuggingFace_API_KEY=${HuggingFace_API_KEY} \
|
@@ -65,7 +68,8 @@ ENV GROQ_API_KEY=${GROQ_API_KEY} \
|
|
65 |
OPEN_ROUTER_API_KEY=${OPEN_ROUTER_API_KEY} \
|
66 |
GOOGLE_GENERATIVE_AI_API_KEY=${GOOGLE_GENERATIVE_AI_API_KEY} \
|
67 |
OLLAMA_API_BASE_URL=${OLLAMA_API_BASE_URL} \
|
68 |
-
VITE_LOG_LEVEL=${VITE_LOG_LEVEL}
|
|
|
69 |
|
70 |
RUN mkdir -p ${WORKDIR}/run
|
71 |
CMD pnpm run dev --host
|
|
|
26 |
ARG GOOGLE_GENERATIVE_AI_API_KEY
|
27 |
ARG OLLAMA_API_BASE_URL
|
28 |
ARG VITE_LOG_LEVEL=debug
|
29 |
+
ARG DEFAULT_NUM_CTX
|
30 |
|
31 |
ENV WRANGLER_SEND_METRICS=false \
|
32 |
GROQ_API_KEY=${GROQ_API_KEY} \
|
|
|
36 |
OPEN_ROUTER_API_KEY=${OPEN_ROUTER_API_KEY} \
|
37 |
GOOGLE_GENERATIVE_AI_API_KEY=${GOOGLE_GENERATIVE_AI_API_KEY} \
|
38 |
OLLAMA_API_BASE_URL=${OLLAMA_API_BASE_URL} \
|
39 |
+
VITE_LOG_LEVEL=${VITE_LOG_LEVEL} \
|
40 |
+
DEFAULT_NUM_CTX=${DEFAULT_NUM_CTX}
|
41 |
|
42 |
# Pre-configure wrangler to disable metrics
|
43 |
RUN mkdir -p /root/.config/.wrangler && \
|
|
|
59 |
ARG GOOGLE_GENERATIVE_AI_API_KEY
|
60 |
ARG OLLAMA_API_BASE_URL
|
61 |
ARG VITE_LOG_LEVEL=debug
|
62 |
+
ARG DEFAULT_NUM_CTX
|
63 |
|
64 |
ENV GROQ_API_KEY=${GROQ_API_KEY} \
|
65 |
HuggingFace_API_KEY=${HuggingFace_API_KEY} \
|
|
|
68 |
OPEN_ROUTER_API_KEY=${OPEN_ROUTER_API_KEY} \
|
69 |
GOOGLE_GENERATIVE_AI_API_KEY=${GOOGLE_GENERATIVE_AI_API_KEY} \
|
70 |
OLLAMA_API_BASE_URL=${OLLAMA_API_BASE_URL} \
|
71 |
+
VITE_LOG_LEVEL=${VITE_LOG_LEVEL} \
|
72 |
+
DEFAULT_NUM_CTX=${DEFAULT_NUM_CTX}
|
73 |
|
74 |
RUN mkdir -p ${WORKDIR}/run
|
75 |
CMD pnpm run dev --host
|
app/lib/.server/llm/model.ts
CHANGED
@@ -9,6 +9,10 @@ import { createOpenRouter } from "@openrouter/ai-sdk-provider";
|
|
9 |
import { createMistral } from '@ai-sdk/mistral';
|
10 |
import { createCohere } from '@ai-sdk/cohere'
|
11 |
|
|
|
|
|
|
|
|
|
12 |
export function getAnthropicModel(apiKey: string, model: string) {
|
13 |
const anthropic = createAnthropic({
|
14 |
apiKey,
|
@@ -77,7 +81,7 @@ export function getHuggingFaceModel(apiKey: string, model: string) {
|
|
77 |
|
78 |
export function getOllamaModel(baseURL: string, model: string) {
|
79 |
let Ollama = ollama(model, {
|
80 |
-
numCtx:
|
81 |
});
|
82 |
|
83 |
Ollama.config.baseURL = `${baseURL}/api`;
|
|
|
9 |
import { createMistral } from '@ai-sdk/mistral';
|
10 |
import { createCohere } from '@ai-sdk/cohere'
|
11 |
|
12 |
+
export const DEFAULT_NUM_CTX = process.env.DEFAULT_NUM_CTX ?
|
13 |
+
parseInt(process.env.DEFAULT_NUM_CTX, 10) :
|
14 |
+
32768;
|
15 |
+
|
16 |
export function getAnthropicModel(apiKey: string, model: string) {
|
17 |
const anthropic = createAnthropic({
|
18 |
apiKey,
|
|
|
81 |
|
82 |
export function getOllamaModel(baseURL: string, model: string) {
|
83 |
let Ollama = ollama(model, {
|
84 |
+
numCtx: DEFAULT_NUM_CTX,
|
85 |
});
|
86 |
|
87 |
Ollama.config.baseURL = `${baseURL}/api`;
|
docker-compose.yaml
CHANGED
@@ -21,6 +21,7 @@ services:
|
|
21 |
- GOOGLE_GENERATIVE_AI_API_KEY=${GOOGLE_GENERATIVE_AI_API_KEY}
|
22 |
- OLLAMA_API_BASE_URL=${OLLAMA_API_BASE_URL}
|
23 |
- VITE_LOG_LEVEL=${VITE_LOG_LEVEL:-debug}
|
|
|
24 |
- RUNNING_IN_DOCKER=true
|
25 |
extra_hosts:
|
26 |
- "host.docker.internal:host-gateway"
|
@@ -48,6 +49,7 @@ services:
|
|
48 |
- GOOGLE_GENERATIVE_AI_API_KEY=${GOOGLE_GENERATIVE_AI_API_KEY}
|
49 |
- OLLAMA_API_BASE_URL=${OLLAMA_API_BASE_URL}
|
50 |
- VITE_LOG_LEVEL=${VITE_LOG_LEVEL:-debug}
|
|
|
51 |
- RUNNING_IN_DOCKER=true
|
52 |
extra_hosts:
|
53 |
- "host.docker.internal:host-gateway"
|
|
|
21 |
- GOOGLE_GENERATIVE_AI_API_KEY=${GOOGLE_GENERATIVE_AI_API_KEY}
|
22 |
- OLLAMA_API_BASE_URL=${OLLAMA_API_BASE_URL}
|
23 |
- VITE_LOG_LEVEL=${VITE_LOG_LEVEL:-debug}
|
24 |
+
- DEFAULT_NUM_CTX=${DEFAULT_NUM_CTX:-32768}
|
25 |
- RUNNING_IN_DOCKER=true
|
26 |
extra_hosts:
|
27 |
- "host.docker.internal:host-gateway"
|
|
|
49 |
- GOOGLE_GENERATIVE_AI_API_KEY=${GOOGLE_GENERATIVE_AI_API_KEY}
|
50 |
- OLLAMA_API_BASE_URL=${OLLAMA_API_BASE_URL}
|
51 |
- VITE_LOG_LEVEL=${VITE_LOG_LEVEL:-debug}
|
52 |
+
- DEFAULT_NUM_CTX=${DEFAULT_NUM_CTX:-32768}
|
53 |
- RUNNING_IN_DOCKER=true
|
54 |
extra_hosts:
|
55 |
- "host.docker.internal:host-gateway"
|