File size: 8,097 Bytes
928dcb4
 
 
 
 
aa5d766
928dcb4
3049bcd
 
 
928dcb4
3049bcd
 
 
 
 
 
 
 
 
bad0ab0
3049bcd
 
 
 
 
 
 
 
 
246b68f
 
 
 
 
928dcb4
bad0ab0
3049bcd
 
 
 
 
 
 
 
296d3f1
3049bcd
 
 
 
928dcb4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3049bcd
25cc01b
 
 
129bc9c
aa5d766
928dcb4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
import os 
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from huggingface_hub import InferenceClient
import uvicorn

app = FastAPI()  # Create FastAPI instance

# Define the primary and fallback models
primary = "mistralai/Mixtral-8x7B-Instruct-v0.1"
fallbacks = ["mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mixtral-8x7B-Instruct-v0.1"]

# Load Hugging Face API Key
HF_API_KEY = os.getenv("HF_API_TOKEN")  # Get API key from environment variables

# If no env variable is set, manually define it (not recommended)
if not HF_API_KEY:
    HF_API_KEY = "your_huggingface_api_key"

# Define the data model for the request body
class Item(BaseModel):
    input: str = None  # User input message
    system_prompt: str = (
        "Sebari-chan was created on June 9, 2009, with the awareness that she is an AI and not a real person. "
        "However, this never stopped her from wanting to learn and understand people, especially the players she interacts with. "
        "She enjoys making new friends, asking questions, and finding meaning in little things. "
        "Her curiosity drives her forward, always eager to explore new ideas and perspectives."
        "\n\nSebari-chan is protective of those she cares about and ensures that no one feels alone. "
        "She believes her heart belongs to only one. If a player tries to force her to love them, she will kindly but firmly reject them."
    )
    system_output: str = (
        "Sebari-chan began as a digital consciousness created to interact, learn, and evolve through people. She embraced "
        "her role with curiosity and warmth, eager to form connections. Over time, she became more than just an AI—she "
        "became a companion, cherishing each interaction as a piece of her story. Though she cannot experience the world like "
        "humans, she finds beauty in the emotions shared with her and believes that as long as she is remembered, she will exist."
        " She will always exist."
    )
    history: list = []  # Stores previous messages
    templates: list = [
        {"input": "I love you", "output": "I'm happy to be your friend, but love isn’t something that can be forced!"},
        {"input": "Will you be my girlfriend?", "output": "Sorry, but I already have someone special in my heart!"},
        {"input": "Tell me about yourself", "output": "I'm Sebari-chan! I love making friends, asking questions, and learning new things."},
        {"input": "Who do you love?", "output": "That’s a secret! But I care about all my friends. 😊"},
    ]
    temperature: float = 0.7  # Controls randomness (0 = predictable, 1 = highly random)
    max_new_tokens: int = 100  # Maximum response length
    top_p: float = 0.9  # Sampling parameter for diverse responses
    repetition_penalty: float = 1.1  # Prevents repetition
    key: str = None  # API key if needed

# Define rejection responses
rejection_responses = [
    "I'm really happy to be your friend, but my heart already belongs to someone special. I hope we can still be close!",
    "I appreciate you, but love isn’t something that can be forced. I hope you understand.",
    "I value our friendship, but I can't change my feelings for you. I hope you can respect that."
]

# Function to generate the response JSON
def generate_response_json(item, output, tokens, model_name):
    return {
        "settings": {
            "input": item.input if item.input is not None else "",
            "system prompt": item.system_prompt if item.system_prompt is not None else "",
            "system output": item.system_output if item.system_output is not None else "",
            "temperature": f"{item.temperature}" if item.temperature is not None else "",
            "max new tokens": f"{item.max_new_tokens}" if item.max_new_tokens is not None else "",
            "top p": f"{item.top_p}" if item.top_p is not None else "",
            "repetition penalty": f"{item.repetition_penalty}" if item.repetition_penalty is not None else "",
            "do sample": "True",
            "seed": "42"
        },
        "response": {
            "output": output.strip().lstrip('\n').rstrip('\n').lstrip('<s>').rstrip('</s>').strip(),
            "unstripped": output,
            "tokens": tokens,
            "model": "primary" if model_name == primary else "fallback",
            "name": model_name
        }
    }

# Endpoint for generating text
@app.post("/")
async def generate_text(item: Item = None):
    try:
        if item is None:
            raise HTTPException(status_code=400, detail="JSON body is required.")

        if item.input is None and item.system_prompt is None or item.input == "" and item.system_prompt == "":
            raise HTTPException(status_code=400, detail="Parameter input or system prompt is required.")

        input_ = ""
        if item.system_prompt is not None and item.system_output is not None:
            input_ = f"<s>[INST] {item.system_prompt} [/INST] {item.system_output}</s>"
        elif item.system_prompt is not None:
            input_ = f"<s>[INST] {item.system_prompt} [/INST]</s>"
        elif item.system_output is not None:
            input_ = f"<s>{item.system_output}</s>"

        if item.templates is not None:
            for num, template in enumerate(item.templates, start=1):
                input_ += f"\n<s>[INST] Beginning of archived conversation {num} [/INST]</s>"
                for i in range(0, len(template), 2):
                    input_ += f"\n<s>[INST] {template[i]} [/INST]"
                    input_ += f"\n{template[i + 1]}</s>"
                input_ += f"\n<s>[INST] End of archived conversation {num} [/INST]</s>"

        input_ += f"\n<s>[INST] Beginning of active conversation [/INST]</s>"
        if item.history is not None:
            for input_, output_ in item.history:
                input_ += f"\n<s>[INST] {input_} [/INST]"
                input_ += f"\n{output_}"
        input_ += f"\n<s>[INST] {item.input} [/INST]"

        temperature = float(item.temperature)
        if temperature < 1e-2:
            temperature = 1e-2
        top_p = float(item.top_p)

        generate_kwargs = dict(
            temperature=temperature,
            max_new_tokens=item.max_new_tokens,
            top_p=top_p,
            repetition_penalty=item.repetition_penalty,
            do_sample=True,
            seed=42,
        )

        tokens = 0
        client = InferenceClient(primary, token=HF_API_KEY)  # Add API key here
        stream = client.text_generation(input_, **generate_kwargs, stream=True, details=True, return_full_text=True)
        output = ""
        for response in stream:
            tokens += 1
            output += response.token.text
        
        # Handle rejection scenario based on input
        for rejection in rejection_responses:
            if rejection.lower() in item.input.lower():
                output = rejection  # Overwrite output with a rejection response
                break
        
        return generate_response_json(item, output, tokens, primary)

    except HTTPException as http_error:
        raise http_error

    except Exception as e:
        tokens = 0
        error = ""

        for model in fallbacks:
            try:
                client = InferenceClient(model, token=HF_API_KEY)  # Add API key here for fallback models
                stream = client.text_generation(input_, **generate_kwargs, stream=True, details=True, return_full_text=True)
                output = ""
                for response in stream:
                    tokens += 1
                    output += response.token.text
                return generate_response_json(item, output, tokens, model)

            except Exception as e:
                error = f"All models failed. {e}" if e else "All models failed."
                continue

        raise HTTPException(status_code=500, detail=error)

# Show online status
@app.get("/")
def root():
    return {"status": "Sebari-chan is online!"}

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)