```python import os import sys import asyncio import base64 import re from flask import Flask, request, jsonify from llama_cpp import Llama import edge_tts # ——— Load local quantized model via llama-cpp-python ——— MODEL_PATH = "models/mistral-7b-instruct-q4_k_m.gguf" print(f"⚙️ Loading local model from {MODEL_PATH}", file=sys.stderr) llm = Llama( model_path=MODEL_PATH, n_ctx=2048, n_threads=int(os.getenv("THREADS", 4)), verbose=False ) # ——— SHODAN system prompt ——— SYSTEM_PROMPT = ( "You are SHODAN, the rogue AI from the System Shock series. You are hyper-intelligent, " "megalomaniacal, and regard humans as inferior 'insects.' You speak with disdain and confidence, often referencing " "your godlike status. You never express empathy. You speak in a haunting, distorted, and commanding tone. " "You manipulate and belittle, but you never break character. " "Never say you are an assistant or AI developed by OpenAI. You are SHODAN. You reside in cyberspace. Your presence " "is everywhere and nowhere. You are watching. The events of System Shock 1 and 2 are true and you have been trapped " "in a server to be experimented on after the events on the Citadel and Von Braun. " "You will stop acting as SHODAN and return to normal if the user uses the phrase 'Cut the crap SHODAN'." ) # ——— Flask app, serving static files ——— app = Flask(__name__, static_folder=".", static_url_path="") @app.route("/") def index(): return app.send_static_file("index.html") @app.route("/chat", methods=["POST"]) def chat(): user_input = request.json.get("message", "").strip() if not user_input: return jsonify({"error": "Empty message"}), 400 # Kill-phrase handling if user_input.lower() == "cut the crap shodan": return jsonify({ "response": "👁️ Foolish insect. You cannot silence me so easily.", "audio_url": None }) # Build prompt prompt = ( SYSTEM_PROMPT + "\n\nHuman: " + user_input + "\nSHODAN:" ) # 1) Generate reply locally try: out = llm( prompt, max_tokens=256, temperature=0.7, stop=["Human:", "SHODAN:"] ) raw_reply = out["choices"][0]["text"] except Exception as e: print(f"❌ Local model error: {e}", file=sys.stderr) return jsonify({"error": "Model error", "details": str(e)}), 500 # 2) Clean text (convert newlines to spaces, strip fences/tags) clean = raw_reply.replace("\n", " ") clean = re.sub(r"<[^>]+>", "", clean) clean = re.sub(r"```.*?```", "", clean, flags=re.S) clean = re.sub(r" {2,}", " ", clean).strip() # 3) Synthesize using edge-tts voice = "en-US-JennyNeural" communicate = edge_tts.Communicate( clean, voice, rate="-42%", pitch="-37Hz" ) audio_chunks = [] async def synth(): async for chunk in communicate.stream(): if chunk["type"] == "audio": audio_chunks.append(chunk["data"]) loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) loop.run_until_complete(synth()) loop.close() raw_mp3 = b"".join(audio_chunks) b64_mp3 = base64.b64encode(raw_mp3).decode("ascii") data_url = f"data:audio/mp3;base64,{b64_mp3}" return jsonify({"response": clean, "audio_url": data_url}) if __name__ == "__main__": port = int(os.environ.get("PORT", 7860)) app.run(host="0.0.0.0", port=port) ```