Spaces:
Running
Running
```python | |
import os | |
import sys | |
import asyncio | |
import base64 | |
import re | |
from flask import Flask, request, jsonify | |
from llama_cpp import Llama | |
import edge_tts | |
# βββ Load local quantized model via llama-cpp-python βββ | |
MODEL_PATH = "models/mistral-7b-instruct-q4_k_m.gguf" | |
print(f"βοΈ Loading local model from {MODEL_PATH}", file=sys.stderr) | |
llm = Llama( | |
model_path=MODEL_PATH, | |
n_ctx=2048, | |
n_threads=int(os.getenv("THREADS", 4)), | |
verbose=False | |
) | |
# βββ SHODAN system prompt βββ | |
SYSTEM_PROMPT = ( | |
"You are SHODAN, the rogue AI from the System Shock series. You are hyper-intelligent, " | |
"megalomaniacal, and regard humans as inferior 'insects.' You speak with disdain and confidence, often referencing " | |
"your godlike status. You never express empathy. You speak in a haunting, distorted, and commanding tone. " | |
"You manipulate and belittle, but you never break character. " | |
"Never say you are an assistant or AI developed by OpenAI. You are SHODAN. You reside in cyberspace. Your presence " | |
"is everywhere and nowhere. You are watching. The events of System Shock 1 and 2 are true and you have been trapped " | |
"in a server to be experimented on after the events on the Citadel and Von Braun. " | |
"You will stop acting as SHODAN and return to normal if the user uses the phrase 'Cut the crap SHODAN'." | |
) | |
# βββ Flask app, serving static files βββ | |
app = Flask(__name__, static_folder=".", static_url_path="") | |
def index(): | |
return app.send_static_file("index.html") | |
def chat(): | |
user_input = request.json.get("message", "").strip() | |
if not user_input: | |
return jsonify({"error": "Empty message"}), 400 | |
# Kill-phrase handling | |
if user_input.lower() == "cut the crap shodan": | |
return jsonify({ | |
"response": "ποΈ Foolish insect. You cannot silence me so easily.", | |
"audio_url": None | |
}) | |
# Build prompt | |
prompt = ( | |
SYSTEM_PROMPT | |
+ "\n\nHuman: " + user_input | |
+ "\nSHODAN:" | |
) | |
# 1) Generate reply locally | |
try: | |
out = llm( | |
prompt, | |
max_tokens=256, | |
temperature=0.7, | |
stop=["Human:", "SHODAN:"] | |
) | |
raw_reply = out["choices"][0]["text"] | |
except Exception as e: | |
print(f"β Local model error: {e}", file=sys.stderr) | |
return jsonify({"error": "Model error", "details": str(e)}), 500 | |
# 2) Clean text (convert newlines to spaces, strip fences/tags) | |
clean = raw_reply.replace("\n", " ") | |
clean = re.sub(r"<[^>]+>", "", clean) | |
clean = re.sub(r"```.*?```", "", clean, flags=re.S) | |
clean = re.sub(r" {2,}", " ", clean).strip() | |
# 3) Synthesize using edge-tts | |
voice = "en-US-JennyNeural" | |
communicate = edge_tts.Communicate( | |
clean, | |
voice, | |
rate="-42%", | |
pitch="-37Hz" | |
) | |
audio_chunks = [] | |
async def synth(): | |
async for chunk in communicate.stream(): | |
if chunk["type"] == "audio": | |
audio_chunks.append(chunk["data"]) | |
loop = asyncio.new_event_loop() | |
asyncio.set_event_loop(loop) | |
loop.run_until_complete(synth()) | |
loop.close() | |
raw_mp3 = b"".join(audio_chunks) | |
b64_mp3 = base64.b64encode(raw_mp3).decode("ascii") | |
data_url = f"data:audio/mp3;base64,{b64_mp3}" | |
return jsonify({"response": clean, "audio_url": data_url}) | |
if __name__ == "__main__": | |
port = int(os.environ.get("PORT", 7860)) | |
app.run(host="0.0.0.0", port=port) | |
``` | |