abdullahalioo commited on
Commit
c58dbd5
·
verified ·
1 Parent(s): 2c9d491

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +46 -21
main.py CHANGED
@@ -1,50 +1,75 @@
1
- from fastapi import FastAPI, Request
2
  from pydantic import BaseModel
3
  from fastapi.middleware.cors import CORSMiddleware
4
- from g4f.client import Client
5
  from fastapi.responses import StreamingResponse
6
 
7
- # Initialize the AI client
8
- client = Client()
9
 
10
- # FastAPI app
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  app = FastAPI()
12
 
13
- # CORS Middleware (so JS from browser can access it too)
14
  app.add_middleware(
15
  CORSMiddleware,
16
- allow_origins=["*"], # Change "*" to your frontend URL for better security
17
  allow_credentials=True,
18
  allow_methods=["*"],
19
  allow_headers=["*"],
20
  )
21
 
22
- # Request body model
23
  class Question(BaseModel):
24
  question: str
25
 
 
26
  async def generate_response_chunks(prompt: str):
27
  try:
28
- response = client.chat.completions.create(
29
- model="gpt-4o-mini", # Use a supported model
30
- messages=[
31
- {"role": "user", "content": prompt},
32
- {"role": "system", "content": "You are a Orion AI assistant created by abdullah ali who is very intelegent and he is 13 years old and live in lahore."}
33
- ],
34
- stream=True # Enable streaming
35
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
- for part in response:
38
- content = part.choices[0].delta.content
39
- if content:
40
- yield content
41
-
42
  except Exception as e:
43
  yield f"Error occurred: {e}"
44
 
 
45
  @app.post("/ask")
46
  async def ask(question: Question):
47
  return StreamingResponse(
48
  generate_response_chunks(question.question),
49
  media_type="text/plain"
50
- )
 
1
+ from fastapi import FastAPI
2
  from pydantic import BaseModel
3
  from fastapi.middleware.cors import CORSMiddleware
 
4
  from fastapi.responses import StreamingResponse
5
 
6
+ import torch
7
+ from transformers import AutoModelForCausalLM, AutoTokenizer
8
 
9
+ # Define model ID
10
+ model_id = "Qwen/Qwen2.5-VL-7B-Instruct"
11
+
12
+ # Download model and tokenizer locally
13
+ tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
14
+ model = AutoModelForCausalLM.from_pretrained(
15
+ model_id,
16
+ device_map="auto", # Use "cpu" if you want to force CPU: device_map="cpu"
17
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, # GPU: float16, CPU: float32
18
+ trust_remote_code=True
19
+ )
20
+ model.eval()
21
+
22
+ # Initialize FastAPI
23
  app = FastAPI()
24
 
25
+ # CORS settings
26
  app.add_middleware(
27
  CORSMiddleware,
28
+ allow_origins=["*"],
29
  allow_credentials=True,
30
  allow_methods=["*"],
31
  allow_headers=["*"],
32
  )
33
 
34
+ # Request model
35
  class Question(BaseModel):
36
  question: str
37
 
38
+ # Generate response chunks
39
  async def generate_response_chunks(prompt: str):
40
  try:
41
+ # Define system prompt
42
+ system_prompt = (
43
+ "You are a Orion AI assistant created by Abdullah Ali who is very intelligent and he is 13 years old and lives in Lahore."
 
 
 
 
44
  )
45
+ full_prompt = f"{system_prompt}\n\nUser: {prompt}\nAssistant:"
46
+
47
+ # Tokenize input
48
+ input_ids = tokenizer(full_prompt, return_tensors="pt").to(model.device)
49
+
50
+ # Generate output
51
+ output_ids = model.generate(
52
+ **input_ids,
53
+ max_new_tokens=512,
54
+ do_sample=True,
55
+ temperature=0.7,
56
+ top_p=0.9,
57
+ repetition_penalty=1.1
58
+ )
59
+
60
+ # Decode output
61
+ output_text = tokenizer.decode(output_ids[0][input_ids.input_ids.shape[-1]:], skip_special_tokens=True)
62
 
63
+ # Stream output letter-by-letter
64
+ for letter in output_text:
65
+ yield letter
 
 
66
  except Exception as e:
67
  yield f"Error occurred: {e}"
68
 
69
+ # API Endpoint
70
  @app.post("/ask")
71
  async def ask(question: Question):
72
  return StreamingResponse(
73
  generate_response_chunks(question.question),
74
  media_type="text/plain"
75
+ )