abdullahalioo commited on
Commit
0218c20
·
verified ·
1 Parent(s): e1108f3

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +44 -24
main.py CHANGED
@@ -1,19 +1,26 @@
1
- from fastapi import FastAPI, Request
2
  from pydantic import BaseModel
3
  from fastapi.middleware.cors import CORSMiddleware
4
- from g4f.client import Client
5
  from fastapi.responses import StreamingResponse
 
 
6
 
7
- # Initialize the AI client
8
- client = Client()
 
 
 
 
 
 
9
 
10
  # FastAPI app
11
  app = FastAPI()
12
 
13
- # CORS Middleware (so JS from browser can access it too)
14
  app.add_middleware(
15
  CORSMiddleware,
16
- allow_origins=["*"], # Change "*" to your frontend URL for better security
17
  allow_credentials=True,
18
  allow_methods=["*"],
19
  allow_headers=["*"],
@@ -23,28 +30,41 @@ app.add_middleware(
23
  class Question(BaseModel):
24
  question: str
25
 
 
 
 
 
26
  async def generate_response_chunks(prompt: str):
27
- try:
28
- response = client.chat.completions.create(
29
- model="gpt-4o-mini", # Use a supported model
30
- messages=[
31
- {"role": "user", "content": prompt},
32
- {"role": "system", "content": "You are a Orion AI assistant created by abdullah ali who is very intelegent and he is 13 years old and live in lahore."}
33
- ],
34
- stream=True # Enable streaming
35
- )
36
-
37
- for part in response:
38
- content = part.choices[0].delta.content
39
- if content:
40
- yield content
41
-
42
- except Exception as e:
43
- yield f"Error occurred: {e}"
 
 
 
 
 
 
 
 
 
44
 
45
  @app.post("/ask")
46
  async def ask(question: Question):
47
  return StreamingResponse(
48
  generate_response_chunks(question.question),
49
  media_type="text/plain"
50
- )
 
1
+ from fastapi import FastAPI
2
  from pydantic import BaseModel
3
  from fastapi.middleware.cors import CORSMiddleware
 
4
  from fastapi.responses import StreamingResponse
5
+ from transformers import AutoTokenizer, AutoModelForCausalLM
6
+ import torch
7
 
8
+ # Load Qwen model and tokenizer (once)
9
+ model_name = "Qwen/Qwen2.5-0.5B-Instruct"
10
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
11
+ model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
12
+
13
+ # Set device
14
+ device = torch.device("cpu") # Or "cuda" if using GPU
15
+ model.to(device)
16
 
17
  # FastAPI app
18
  app = FastAPI()
19
 
20
+ # CORS settings
21
  app.add_middleware(
22
  CORSMiddleware,
23
+ allow_origins=["*"],
24
  allow_credentials=True,
25
  allow_methods=["*"],
26
  allow_headers=["*"],
 
30
  class Question(BaseModel):
31
  question: str
32
 
33
+ # System prompt (your custom instructions)
34
+ SYSTEM_PROMPT = "You are Orion, an intelligent AI assistant created by Abdullah Ali, a 13-year-old from Lahore. Respond kindly and wisely."
35
+
36
+ # Chat response generator
37
  async def generate_response_chunks(prompt: str):
38
+ # Build prompt using Qwen's expected format
39
+ qwen_prompt = (
40
+ f"<|im_start|>system\n{SYSTEM_PROMPT}<|im_end|>\n"
41
+ f"<|im_start|>user\n{prompt}<|im_end|>\n"
42
+ f"<|im_start|>assistant\n"
43
+ )
44
+
45
+ # Tokenize input
46
+ inputs = tokenizer(qwen_prompt, return_tensors="pt").to(device)
47
+
48
+ # Generate response
49
+ outputs = model.generate(
50
+ **inputs,
51
+ max_new_tokens=256,
52
+ do_sample=True,
53
+ temperature=0.7,
54
+ top_p=0.9,
55
+ pad_token_id=tokenizer.eos_token_id
56
+ )
57
+
58
+ # Decode and yield line by line
59
+ full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
60
+ reply = full_output.split("<|im_start|>assistant\n")[-1].strip()
61
+
62
+ for chunk in reply.split():
63
+ yield chunk + " "
64
 
65
  @app.post("/ask")
66
  async def ask(question: Question):
67
  return StreamingResponse(
68
  generate_response_chunks(question.question),
69
  media_type="text/plain"
70
+ )