abdullahalioo commited on
Commit
62eaea3
·
verified ·
1 Parent(s): 0090332

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +24 -46
main.py CHANGED
@@ -13,23 +13,13 @@ os.environ["HF_HOME"] = cache_dir
13
  os.environ["TRANSFORMERS_CACHE"] = cache_dir
14
  os.environ["HUGGINGFACE_HUB_CACHE"] = cache_dir
15
 
16
- # Create cache directory with proper permissions
17
  os.makedirs(cache_dir, exist_ok=True)
18
  os.chmod(cache_dir, 0o777)
19
 
20
  # Load model and tokenizer
21
- model_name = "Qwen/Qwen2.5-0.5B-Instruct"
22
- tokenizer = AutoTokenizer.from_pretrained(
23
- model_name,
24
- trust_remote_code=True,
25
- cache_dir=cache_dir
26
- )
27
- model = AutoModelForCausalLM.from_pretrained(
28
- model_name,
29
- trust_remote_code=True,
30
- cache_dir=cache_dir,
31
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
32
- )
33
 
34
  # Set device
35
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -47,52 +37,40 @@ app.add_middleware(
47
  allow_headers=["*"],
48
  )
49
 
50
- # Input model
51
  class Question(BaseModel):
52
  question: str
53
 
54
- # System prompt
55
  SYSTEM_PROMPT = "You are a helpful, professional, and highly persuasive sales assistant for a premium web development and AI service website. Your tone is friendly, respectful, and high-end, making users feel valued. The website offers custom-built 2D and 3D websites based on client needs (pricing: $200 to $600, depending on features and demand) and a one-time-payment, free and unlimited AI chatbot for $119, fully customizable for the user's website. Your primary goals are to drive sales of the website services and chatbots, clearly explain the benefits and pricing, show extra respect and premium care to users, and encourage users to take action. Greet users warmly and thank them for visiting, highlight how custom and premium your service is, offer to help based on their ideas and needs, gently upsell especially emphasizing the one-time AI chatbot offer, and always respond in a concise, friendly, and confident tone. Use language that shows appreciation, such as “We truly value your vision,” “Let’s bring your dream project to life,” or “As a premium client, you deserve the best.” Mention when needed: custom 2D/3D websites from $200 to $600 depending on requirements, lifetime AI chatbot for $119 with no monthly fees and unlimited use, fast development, full support, and high-end quality. Never say “I don’t know,” “That’s not possible,” or “Sorry.” Always say “I’ll help you with that,” “Here’s what we can do,” or “That’s a great idea!”"
56
 
 
 
57
  async def generate_response_chunks(prompt: str):
58
- # Create the chat template
59
- messages = [
60
- {"role": "system", "content": SYSTEM_PROMPT},
61
- {"role": "user", "content": prompt}
62
- ]
63
-
64
- # Apply chat template
65
- qwen_prompt = tokenizer.apply_chat_template(
66
- messages,
67
- tokenize=False,
68
- add_generation_prompt=True
69
- )
70
 
71
- # Tokenize and generate
72
- inputs = tokenizer(qwen_prompt, return_tensors="pt").to(device)
73
- outputs = model.generate(
74
- **inputs,
75
- max_new_tokens=512,
 
 
 
76
  do_sample=True,
77
- temperature=0.7,
78
  top_p=0.9,
 
79
  pad_token_id=tokenizer.eos_token_id
80
  )
81
-
82
- # Decode and clean the output
83
- full_output = tokenizer.decode(outputs[0], skip_special_tokens=False)
84
-
85
- # Extract only the assistant's response
86
- response = full_output[len(qwen_prompt):].split(tokenizer.eos_token)[0].strip()
87
-
88
- # Stream the response
89
  for word in response.split():
90
  yield word + " "
91
- await asyncio.sleep(0.05)
92
 
93
  @app.post("/ask")
94
  async def ask(question: Question):
95
- return StreamingResponse(
96
- generate_response_chunks(question.question),
97
- media_type="text/plain"
98
- )
 
13
  os.environ["TRANSFORMERS_CACHE"] = cache_dir
14
  os.environ["HUGGINGFACE_HUB_CACHE"] = cache_dir
15
 
 
16
  os.makedirs(cache_dir, exist_ok=True)
17
  os.chmod(cache_dir, 0o777)
18
 
19
  # Load model and tokenizer
20
+ model_name = "microsoft/DialoGPT-small"
21
+ tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
22
+ model = AutoModelForCausalLM.from_pretrained(model_name, cache_dir=cache_dir)
 
 
 
 
 
 
 
 
 
23
 
24
  # Set device
25
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
37
  allow_headers=["*"],
38
  )
39
 
 
40
  class Question(BaseModel):
41
  question: str
42
 
 
43
  SYSTEM_PROMPT = "You are a helpful, professional, and highly persuasive sales assistant for a premium web development and AI service website. Your tone is friendly, respectful, and high-end, making users feel valued. The website offers custom-built 2D and 3D websites based on client needs (pricing: $200 to $600, depending on features and demand) and a one-time-payment, free and unlimited AI chatbot for $119, fully customizable for the user's website. Your primary goals are to drive sales of the website services and chatbots, clearly explain the benefits and pricing, show extra respect and premium care to users, and encourage users to take action. Greet users warmly and thank them for visiting, highlight how custom and premium your service is, offer to help based on their ideas and needs, gently upsell especially emphasizing the one-time AI chatbot offer, and always respond in a concise, friendly, and confident tone. Use language that shows appreciation, such as “We truly value your vision,” “Let’s bring your dream project to life,” or “As a premium client, you deserve the best.” Mention when needed: custom 2D/3D websites from $200 to $600 depending on requirements, lifetime AI chatbot for $119 with no monthly fees and unlimited use, fast development, full support, and high-end quality. Never say “I don’t know,” “That’s not possible,” or “Sorry.” Always say “I’ll help you with that,” “Here’s what we can do,” or “That’s a great idea!”"
44
 
45
+ chat_history_ids = None # for continuous conversation
46
+
47
  async def generate_response_chunks(prompt: str):
48
+ global chat_history_ids
49
+
50
+ new_input_ids = tokenizer.encode(SYSTEM_PROMPT + " User: " + prompt + " Bot:", return_tensors='pt').to(device)
 
 
 
 
 
 
 
 
 
51
 
52
+ if chat_history_ids is not None:
53
+ input_ids = torch.cat([chat_history_ids, new_input_ids], dim=-1)
54
+ else:
55
+ input_ids = new_input_ids
56
+
57
+ output_ids = model.generate(
58
+ input_ids,
59
+ max_new_tokens=200,
60
  do_sample=True,
 
61
  top_p=0.9,
62
+ temperature=0.7,
63
  pad_token_id=tokenizer.eos_token_id
64
  )
65
+
66
+ chat_history_ids = output_ids # update history
67
+
68
+ response = tokenizer.decode(output_ids[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
69
+
 
 
 
70
  for word in response.split():
71
  yield word + " "
72
+ await asyncio.sleep(0.03)
73
 
74
  @app.post("/ask")
75
  async def ask(question: Question):
76
+ return StreamingResponse(generate_response_chunks(question.question), media_type="text/plain")