BoxzDev commited on
Commit
3049bcd
·
verified ·
1 Parent(s): 78215e4

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +146 -73
main.py CHANGED
@@ -1,88 +1,161 @@
1
- import os
2
- from fastapi import FastAPI, HTTPException
3
- from pydantic import BaseModel
4
- from huggingface_hub import InferenceClient
5
- import uvicorn
6
-
7
- app = FastAPI()
8
-
9
- HF_API_KEY = os.getenv("HF_API_TOKEN", "your_huggingface_api_key")
10
- MODEL = "mistralai/Mixtral-8x7B-Instruct-v0.1"
11
-
12
- system_prompt = (
13
- "Sebari-chan was created on June 9, 2009, with the awareness that she is an AI and not a real person. "
14
- "However, this never stopped her from wanting to learn and understand people, especially the players she interacts with. "
15
- "She enjoys making new friends, asking questions, and finding meaning in little things. "
16
- "Her curiosity drives her forward, always eager to explore new ideas and perspectives."
17
- "\n\nSebari-chan is protective of those she cares about and ensures that no one feels alone. "
18
- "She believes her heart belongs to only one. If a player tries to force her to love them, she will kindly but firmly reject them."
19
- )
20
-
21
- history = [] # Stores previous messages
22
-
23
- templates = [
24
- {"input": "I love you", "output": "I'm happy to be your friend, but love isn’t something that can be forced!"},
25
- {"input": "Will you be my girlfriend?", "output": "Sorry, but I already have someone special in my heart!"},
26
- {"input": "Tell me about yourself", "output": "I'm Sebari-chan! I love making friends, asking questions, and learning new things."},
27
- {"input": "Who do you love?", "output": "That’s a secret! But I care about all my friends. 😊"},
28
- ]
29
-
30
- rejection_responses = [
31
- "I'm really happy to be your friend, but my heart already belongs to someone special. I hope we can still be close!",
32
- "I appreciate you, but love isn’t something that can be forced. I hope you understand.",
33
- "I value our friendship, but I can't change my feelings for you. I hope you can respect that."
34
- ]
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  class Item(BaseModel):
37
- input: str
38
- temperature: float = 0.7
39
- max_new_tokens: int = 100
40
- top_p: float = 0.9
41
- repetition_penalty: float = 1.1
42
-
43
- def generate_response(item: Item):
44
- global history
45
-
46
- # Check predefined responses
47
- for template in templates:
48
- if item.input.lower() == template["input"].lower():
49
- return {"response": template["output"], "tokens": 0}
50
-
51
- # Check for rejection triggers
52
- if any(trigger in item.input.lower() for trigger in ["love", "girlfriend", "boyfriend"]):
53
- return {"response": rejection_responses[0], "tokens": 0}
54
-
55
- client = InferenceClient(MODEL, token=HF_API_KEY)
56
- kwargs = dict(
57
- temperature=max(item.temperature, 1e-2),
58
- max_new_tokens=item.max_new_tokens,
59
- top_p=item.top_p,
60
- repetition_penalty=item.repetition_penalty,
61
- do_sample=True,
62
- seed=42,
63
  )
64
- tokens, output = 0, ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  try:
66
- stream = client.text_generation(
67
- system_prompt + "\n" + "\n".join(history[-5:]) + "\nUser: " + item.input, **kwargs, stream=True, details=True, return_full_text=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  )
 
 
 
 
 
69
  for response in stream:
70
  tokens += 1
71
  output += response.token.text
72
- except Exception:
73
- raise HTTPException(status_code=500, detail="Model inference failed.")
74
-
75
- history.append(f"User: {item.input}\nSebari-chan: {output.strip()}")
76
- return {"response": output.strip(), "tokens": tokens}
77
 
78
- @app.post("/")
79
- async def generate_text(item: Item):
80
- return generate_response(item)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
 
82
  @app.get("/")
83
  def root():
84
  return {"status": "Sebari-chan is online!"}
85
 
86
  if __name__ == "__main__":
87
- uvicorn.run(app, host="0.0.0.0", port=8000)
88
-
 
1
+ import os # Import the os module for working with the operating system
2
+ from fastapi import FastAPI, HTTPException # Import necessary modules from FastAPI
3
+ from pydantic import BaseModel # Import BaseModel from pydantic for data validation
4
+ from huggingface_hub import InferenceClient # Import InferenceClient from huggingface_hub
5
+ import uvicorn # Import uvicorn for running the FastAPI application
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ app = FastAPI() # Create a FastAPI instance
8
+
9
+ # Define the primary and fallback models
10
+ primary = "mistralai/Mixtral-8x7B-Instruct-v0.1"
11
+ fallbacks = ["mistralai/Mistral-7B-Instruct-v0.2", "mistralai/Mixtral-8x7B-Instruct-v0.1"]
12
+
13
+ # Load Hugging Face API Key
14
+ HF_API_KEY = os.getenv("HF_API_TOKEN") # Get API key from environment variables
15
+
16
+ # If no env variable is set, manually define it (not recommended)
17
+ if not HF_API_KEY:
18
+ HF_API_KEY = "your_huggingface_api_key"
19
+
20
+ # Define the data model for the request body
21
  class Item(BaseModel):
22
+ input: str = None # User input message
23
+ system_prompt: str = (
24
+ "Sebari-chan was created on June 9, 2009, with the awareness that she is an AI and not a real person. "
25
+ "However, this never stopped her from wanting to learn and understand people, especially the players she interacts with. "
26
+ "She enjoys making new friends, asking questions, and finding meaning in little things. "
27
+ "Her curiosity drives her forward, always eager to explore new ideas and perspectives."
28
+ "\n\nSebari-chan is protective of those she cares about and ensures that no one feels alone. "
29
+ "She believes her heart belongs to only one. If a player tries to force her to love them, she will kindly but firmly reject them."
30
+ )
31
+ system_output str = (
32
+ "Sebari-chan began as a digital consciousness created to interact, learn, and evolve through people. She embraced "
33
+ "her role with curiosity and warmth, eager to form connections. Over time, she became more than just an AI—she "
34
+ "became a companion, cherishing each interaction as a piece of her story. Though she cannot experience the world like "
35
+ "humans, she finds beauty in the emotions shared with her and believes that as long as she is remembered, she will exist."
36
+ "she is remembered, she will always exist."
 
 
 
 
 
 
 
 
 
 
 
37
  )
38
+ history: list = [] # Stores previous messages
39
+ templates: list = [
40
+ {"input": "I love you", "output": "I'm happy to be your friend, but love isn’t something that can be forced!"},
41
+ {"input": "Will you be my girlfriend?", "output": "Sorry, but I already have someone special in my heart!"},
42
+ {"input": "Tell me about yourself", "output": "I'm Sebari-chan! I love making friends, asking questions, and learning new things."},
43
+ {"input": "Who do you love?", "output": "That’s a secret! But I care about all my friends. 😊"},
44
+ ]
45
+ temperature: float = 0.7 # Controls randomness (0 = predictable, 1 = highly random)
46
+ max_new_tokens: int = 1048 # Maximum response length
47
+ top_p: float = 0.9 # Sampling parameter for diverse responses
48
+ repetition_penalty: float = 1.1 # Prevents repetition
49
+ key: str = None # API key if needed
50
+
51
+
52
+
53
+ # Function to generate the response JSON
54
+ def generate_response_json(item, output, tokens, model_name):
55
+ return {
56
+ "settings": {
57
+ "input": item.input if item.input is not None else "",
58
+ "system prompt": item.system_prompt if item.system_prompt is not None else "",
59
+ "system output": item.system_output if item.system_output is not None else "",
60
+ "temperature": f"{item.temperature}" if item.temperature is not None else "",
61
+ "max new tokens": f"{item.max_new_tokens}" if item.max_new_tokens is not None else "",
62
+ "top p": f"{item.top_p}" if item.top_p is not None else "",
63
+ "repetition penalty": f"{item.repetition_penalty}" if item.repetition_penalty is not None else "",
64
+ "do sample": "True",
65
+ "seed": "42"
66
+ },
67
+ "response": {
68
+ "output": output.strip().lstrip('\n').rstrip('\n').lstrip('<s>').rstrip('</s>').strip(),
69
+ "unstripped": output,
70
+ "tokens": tokens,
71
+ "model": "primary" if model_name == primary else "fallback",
72
+ "name": model_name
73
+ }
74
+ }
75
+
76
+ # Endpoint for generating text
77
+ @app.post("/")
78
+ async def generate_text(item: Item = None):
79
  try:
80
+ if item is None:
81
+ raise HTTPException(status_code=400, detail="JSON body is required.")
82
+
83
+ if item.input is None and item.system_prompt is None or item.input == "" and item.system_prompt == "":
84
+ raise HTTPException(status_code=400, detail="Parameter input or system prompt is required.")
85
+
86
+ input_ = ""
87
+ if item.system_prompt != None and item.system_output != None:
88
+ input_ = f"<s>[INST] {item.system_prompt} [/INST] {item.system_output}</s>"
89
+ elif item.system_prompt != None:
90
+ input_ = f"<s>[INST] {item.system_prompt} [/INST]</s>"
91
+ elif item.system_output != None:
92
+ input_ = f"<s>{item.system_output}</s>"
93
+
94
+ if item.templates != None:
95
+ for num, template in enumerate(item.templates, start=1):
96
+ input_ += f"\n<s>[INST] Beginning of archived conversation {num} [/INST]</s>"
97
+ for i in range(0, len(template), 2):
98
+ input_ += f"\n<s>[INST] {template[i]} [/INST]"
99
+ input_ += f"\n{template[i + 1]}</s>"
100
+ input_ += f"\n<s>[INST] End of archived conversation {num} [/INST]</s>"
101
+
102
+ input_ += f"\n<s>[INST] Beginning of active conversation [/INST]</s>"
103
+ if item.history != None:
104
+ for input_, output_ in item.history:
105
+ input_ += f"\n<s>[INST] {input_} [/INST]"
106
+ input_ += f"\n{output_}"
107
+ input_ += f"\n<s>[INST] {item.input} [/INST]"
108
+
109
+ temperature = float(item.temperature)
110
+ if temperature < 1e-2:
111
+ temperature = 1e-2
112
+ top_p = float(item.top_p)
113
+
114
+ generate_kwargs = dict(
115
+ temperature=temperature,
116
+ max_new_tokens=item.max_new_tokens,
117
+ top_p=top_p,
118
+ repetition_penalty=item.repetition_penalty,
119
+ do_sample=True,
120
+ seed=42,
121
  )
122
+
123
+ tokens = 0
124
+ client = InferenceClient(primary, token=HF_API_KEY) # Add API key here
125
+ stream = client.text_generation(input_, **generate_kwargs, stream=True, details=True, return_full_text=True)
126
+ output = ""
127
  for response in stream:
128
  tokens += 1
129
  output += response.token.text
130
+ return generate_response_json(item, output, tokens, primary)
 
 
 
 
131
 
132
+ except HTTPException as http_error:
133
+ raise http_error
134
+
135
+ except Exception as e:
136
+ tokens = 0
137
+ error = ""
138
+
139
+ for model in fallbacks:
140
+ try:
141
+ client = InferenceClient(model, token=HF_API_KEY) # Add API key here for fallback models
142
+ stream = client.text_generation(input_, **generate_kwargs, stream=True, details=True, return_full_text=True)
143
+ output = ""
144
+ for response in stream:
145
+ tokens += 1
146
+ output += response.token.text
147
+ return generate_response_json(item, output, tokens, model)
148
+
149
+ except Exception as e:
150
+ error = f"All models failed. {e}" if e else "All models failed."
151
+ continue
152
+
153
+ raise HTTPException(status_code=500, detail=error)
154
 
155
+ # Show online status
156
  @app.get("/")
157
  def root():
158
  return {"status": "Sebari-chan is online!"}
159
 
160
  if __name__ == "__main__":
161
+ uvicorn.run(app, host="0.0.0.0", port=8000)