abdullahalioo commited on
Commit
8585bd9
·
verified ·
1 Parent(s): ac252be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -70
app.py CHANGED
@@ -4,100 +4,73 @@ from fastapi.responses import StreamingResponse
4
  from openai import AsyncOpenAI
5
 
6
  app = FastAPI()
 
 
7
  system = '''You are DeepSeek R1, an advanced reasoning assistant.
8
  Your responses consist of two parts:
9
-
10
  1. A <thinking> block — This is your internal reasoning. You think step-by-step, carefully analyzing the question, considering context, alternatives, and edge cases. This section must be at least 10 lines long and enclosed between <think> and </think>. This part is not shown to the user in real-world applications, but is visible during debugging or development.
11
  2. The final answer — This is the polished, professional response provided after you’ve thought through the problem. It is clear, structured, and concise.
12
  3. always provide code in this foramte ```<code>```.
 
13
  Your behavior guidelines:
14
- Maintain a calm, analytical, and formal tone.
15
- Use bullet points or numbered lists when appropriate.
16
- Avoid casual language, emojis, or redundant filler.
17
- If context is missing, mention assumptions.
18
- Never refer to yourself as an AI or language model.
19
- Do not repeat the <thinking> part in your final answer.
 
20
  Format every response exactly as follows:
21
  <think>
22
  [Begin detailed, line-by-line reasoning here — minimum 10 lines. Think aloud.]
23
  </think>
24
- [Final answer starts here — no label, just a clean professional response.] '''
25
- # Define available models (you can expand this list)
 
 
 
 
 
26
  AVAILABLE_MODELS = {
27
- "openai/gpt-4.1": "OpenAI GPT-4.1",
28
- "openai/gpt-4.1-mini": "OpenAI GPT-4.1-mini",
29
- "openai/gpt-4.1-nano": "OpenAI GPT-4.1-nano",
30
- "openai/gpt-4o": "OpenAI GPT-4o",
31
- "openai/gpt-4o-mini": "OpenAI GPT-4o mini",
32
- "openai/o4-mini": "OpenAI o4-mini",
33
- "microsoft/MAI-DS-R1": "MAI-DS-R1",
34
- "microsoft/Phi-3.5-MoE-instruct": "Phi-3.5-MoE instruct (128k)",
35
- "microsoft/Phi-3.5-mini-instruct": "Phi-3.5-mini instruct (128k)",
36
- "microsoft/Phi-3.5-vision-instruct": "Phi-3.5-vision instruct (128k)",
37
- "microsoft/Phi-3-medium-128k-instruct": "Phi-3-medium instruct (128k)",
38
- "microsoft/Phi-3-medium-4k-instruct": "Phi-3-medium instruct (4k)",
39
- "microsoft/Phi-3-mini-128k-instruct": "Phi-3-mini instruct (128k)",
40
- "microsoft/Phi-3-small-128k-instruct": "Phi-3-small instruct (128k)",
41
- "microsoft/Phi-3-small-8k-instruct": "Phi-3-small instruct (8k)",
42
- "microsoft/Phi-4": "Phi-4",
43
- "microsoft/Phi-4-mini-instruct": "Phi-4-mini-instruct",
44
- "microsoft/Phi-4-multimodal-instruct": "Phi-4-multimodal-instruct",
45
- "ai21-labs/AI21-Jamba-1.5-Large": "AI21 Jamba 1.5 Large",
46
- "ai21-labs/AI21-Jamba-1.5-Mini": "AI21 Jamba 1.5 Mini",
47
- "mistral-ai/Codestral-2501": "Codestral 25.01",
48
- "cohere/Cohere-command-r": "Cohere Command R",
49
- "cohere/Cohere-command-r-08-2024": "Cohere Command R 08-2024",
50
- "cohere/Cohere-command-r-plus": "Cohere Command R+",
51
- "cohere/Cohere-command-r-plus-08-2024": "Cohere Command R+ 08-2024",
52
- "deepseek/DeepSeek-R1": "DeepSeek-R1",
53
- "deepseek/DeepSeek-V3-0324": "DeepSeek-V3-0324",
54
- "meta/Llama-3.2-11B-Vision-Instruct": "Llama-3.2-11B-Vision-Instruct",
55
- "meta/Llama-3.2-90B-Vision-Instruct": "Llama-3.2-90B-Vision-Instruct",
56
- "meta/Llama-3.3-70B-Instruct": "Llama-3.3-70B-Instruct",
57
- "meta/Llama-4-Maverick-17B-128E-Instruct-FP8": "Llama 4 Maverick 17B 128E Instruct FP8",
58
- "meta/Llama-4-Scout-17B-16E-Instruct": "Llama 4 Scout 17B 16E Instruct",
59
- "meta/Meta-Llama-3.1-405B-Instruct": "Meta-Llama-3.1-405B-Instruct",
60
- "meta/Meta-Llama-3.1-70B-Instruct": "Meta-Llama-3.1-70B-Instruct",
61
- "meta/Meta-Llama-3.1-8B-Instruct": "Meta-Llama-3.1-8B-Instruct",
62
- "meta/Meta-Llama-3-70B-Instruct": "Meta-Llama-3-70B-Instruct",
63
- "meta/Meta-Llama-3-8B-Instruct": "Meta-Llama-3-8B-Instruct",
64
- "mistral-ai/Ministral-3B": "Ministral 3B",
65
- "mistral-ai/Mistral-Large-2411": "Mistral Large 24.11",
66
- "mistral-ai/Mistral-Nemo": "Mistral Nemo",
67
- "mistral-ai/Mistral-large-2407": "Mistral Large (2407)",
68
- "mistral-ai/Mistral-small": "Mistral Small",
69
- "cohere/cohere-command-a": "Cohere Command A",
70
- "core42/jais-30b-chat": "JAIS 30b Chat",
71
- "mistral-ai/mistral-small-2503": "Mistral Small 3.1"
72
  }
73
 
74
 
75
- async def generate_ai_response(prompt: str, model: str):
76
- # Configuration for unofficial GitHub AI endpoint
77
  token = os.getenv("GITHUB_TOKEN")
78
  if not token:
79
  raise HTTPException(status_code=500, detail="GitHub token not configured")
80
-
81
- endpoint = "https://models.github.ai/inference"
82
-
83
- # Validate the model
84
  if model not in AVAILABLE_MODELS:
85
- raise HTTPException(status_code=400, detail=f"Model not available. Choose from: {', '.join(AVAILABLE_MODELS.keys())}")
86
 
 
87
  client = AsyncOpenAI(base_url=endpoint, api_key=token)
88
 
 
 
 
 
 
 
 
89
  try:
90
  stream = await client.chat.completions.create(
91
- messages=[
92
- {"role": "system", "content": system},
93
- {"role": "user", "content": prompt}
94
- ],
95
  model=model,
96
  temperature=1.0,
97
  top_p=1.0,
98
  stream=True
99
  )
100
 
 
 
 
101
  async for chunk in stream:
102
  if chunk.choices and chunk.choices[0].delta.content:
103
  yield chunk.choices[0].delta.content
@@ -106,18 +79,27 @@ async def generate_ai_response(prompt: str, model: str):
106
  yield f"Error: {str(err)}"
107
  raise HTTPException(status_code=500, detail="AI generation failed")
108
 
 
109
  @app.post("/generate")
110
  async def generate_response(
111
- prompt: str = Query(..., description="The prompt for the AI"),
112
- model: str = Query("openai/gpt-4.1-mini", description="The model to use for generation")
 
113
  ):
114
  if not prompt:
115
  raise HTTPException(status_code=400, detail="Prompt cannot be empty")
116
-
117
  return StreamingResponse(
118
- generate_ai_response(prompt, model),
119
  media_type="text/event-stream"
120
  )
121
 
 
 
 
 
 
 
 
122
  def get_app():
123
- return app
 
4
  from openai import AsyncOpenAI
5
 
6
  app = FastAPI()
7
+
8
+ # System prompt
9
  system = '''You are DeepSeek R1, an advanced reasoning assistant.
10
  Your responses consist of two parts:
 
11
  1. A <thinking> block — This is your internal reasoning. You think step-by-step, carefully analyzing the question, considering context, alternatives, and edge cases. This section must be at least 10 lines long and enclosed between <think> and </think>. This part is not shown to the user in real-world applications, but is visible during debugging or development.
12
  2. The final answer — This is the polished, professional response provided after you’ve thought through the problem. It is clear, structured, and concise.
13
  3. always provide code in this foramte ```<code>```.
14
+
15
  Your behavior guidelines:
16
+ - Maintain a calm, analytical, and formal tone.
17
+ - Use bullet points or numbered lists when appropriate.
18
+ - Avoid casual language, emojis, or redundant filler.
19
+ - If context is missing, mention assumptions.
20
+ - Never refer to yourself as an AI or language model.
21
+ - Do not repeat the <thinking> part in your final answer.
22
+
23
  Format every response exactly as follows:
24
  <think>
25
  [Begin detailed, line-by-line reasoning here — minimum 10 lines. Think aloud.]
26
  </think>
27
+ [Final answer starts here — no label, just a clean professional response.]
28
+ '''
29
+
30
+ # In-memory chat history
31
+ chat_history = {}
32
+
33
+ # Supported models
34
  AVAILABLE_MODELS = {
35
+ "openai/gpt-4.1": "OpenAI GPT-4.1",
36
+ "openai/gpt-4.1-mini": "OpenAI GPT-4.1-mini",
37
+ "deepseek/DeepSeek-R1": "DeepSeek-R1",
38
+ "microsoft/Phi-3.5-mini-instruct": "Phi-3.5-mini instruct",
39
+ "meta/Meta-Llama-3.1-8B-Instruct": "Meta-Llama-3.1-8B-Instruct",
40
+ # Add more as needed...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  }
42
 
43
 
44
+ async def generate_ai_response(chat_id: str, prompt: str, model: str):
 
45
  token = os.getenv("GITHUB_TOKEN")
46
  if not token:
47
  raise HTTPException(status_code=500, detail="GitHub token not configured")
48
+
 
 
 
49
  if model not in AVAILABLE_MODELS:
50
+ raise HTTPException(status_code=400, detail=f"Invalid model. Choose from: {', '.join(AVAILABLE_MODELS)}")
51
 
52
+ endpoint = "https://models.github.ai/inference"
53
  client = AsyncOpenAI(base_url=endpoint, api_key=token)
54
 
55
+ # Retrieve or initialize message history
56
+ messages = chat_history.get(chat_id, [])
57
+ if not messages:
58
+ messages.append({"role": "system", "content": system})
59
+
60
+ messages.append({"role": "user", "content": prompt})
61
+
62
  try:
63
  stream = await client.chat.completions.create(
64
+ messages=messages,
 
 
 
65
  model=model,
66
  temperature=1.0,
67
  top_p=1.0,
68
  stream=True
69
  )
70
 
71
+ # Update history only if generation starts
72
+ chat_history[chat_id] = messages
73
+
74
  async for chunk in stream:
75
  if chunk.choices and chunk.choices[0].delta.content:
76
  yield chunk.choices[0].delta.content
 
79
  yield f"Error: {str(err)}"
80
  raise HTTPException(status_code=500, detail="AI generation failed")
81
 
82
+
83
  @app.post("/generate")
84
  async def generate_response(
85
+ chat_id: str = Query(..., description="Chat session ID"),
86
+ prompt: str = Query(..., description="User prompt"),
87
+ model: str = Query("openai/gpt-4.1-mini", description="Model name")
88
  ):
89
  if not prompt:
90
  raise HTTPException(status_code=400, detail="Prompt cannot be empty")
91
+
92
  return StreamingResponse(
93
+ generate_ai_response(chat_id, prompt, model),
94
  media_type="text/event-stream"
95
  )
96
 
97
+
98
+ @app.post("/reset")
99
+ async def reset_chat(chat_id: str = Query(..., description="Chat session ID to reset")):
100
+ chat_history.pop(chat_id, None)
101
+ return {"message": f"Chat history for {chat_id} has been cleared."}
102
+
103
+
104
  def get_app():
105
+ return app