Euryeth commited on
Commit
65e7b56
·
verified ·
1 Parent(s): d4203b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -19
app.py CHANGED
@@ -7,17 +7,16 @@ from huggingface_hub import login
7
  from flask import Flask, request, jsonify, Response
8
  import gradio as gr
9
 
10
- # Authenticate with Hugging Face
11
  login(os.getenv("HUGGINGFACEHUB_API_TOKEN"))
12
  API_TOKEN = os.getenv("HF_API_TOKEN")
13
 
14
- # Load model and tokenizer
15
  model_name = "cerebras/btlm-3b-8k-chat"
16
  revision = "main"
17
  torch_dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
18
  os.environ['HF_HOME'] = '/tmp/cache'
19
 
20
- print("Loading model and tokenizer...")
21
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, revision=revision)
22
  model = AutoModelForCausalLM.from_pretrained(
23
  model_name,
@@ -37,7 +36,7 @@ generator = pipeline(
37
  trust_remote_code=True
38
  )
39
 
40
- # Initialize Flask app
41
  app = Flask(__name__)
42
 
43
  @app.route("/")
@@ -116,36 +115,39 @@ def chat():
116
  }]
117
  })
118
 
119
- # Gradio Chat Interface
120
- chat_history = []
121
-
122
- def gradio_chat(message, history):
123
- global chat_history
124
- messages = [{"role": "user", "content": message}]
125
- prompt = "User: {}\nAssistant:".format(message)
126
 
127
  output = generator(
128
- prompt,
129
  max_new_tokens=256,
130
  temperature=0.7,
131
  top_p=0.9,
132
  repetition_penalty=1.1,
133
  do_sample=True
134
  )
135
- reply = output[0]['generated_text'].replace(prompt, "").strip()
136
- history.append((message, reply))
137
  return history, history
138
 
139
  with gr.Blocks() as demo:
140
- gr.Markdown("### 🧠 Skyrim NPC LLM Interface")
141
  chatbot = gr.Chatbot()
142
- msg = gr.Textbox()
143
  clear = gr.Button("Clear")
144
 
145
- msg.submit(gradio_chat, [msg, chatbot], [chatbot, chatbot])
146
- clear.click(lambda: [], None, chatbot)
 
 
147
 
148
- demo.launch(share=False, inline=True)
 
149
 
 
150
  if __name__ == "__main__":
151
  app.run(host="0.0.0.0", port=8080)
 
7
  from flask import Flask, request, jsonify, Response
8
  import gradio as gr
9
 
10
+ # Hugging Face Auth
11
  login(os.getenv("HUGGINGFACEHUB_API_TOKEN"))
12
  API_TOKEN = os.getenv("HF_API_TOKEN")
13
 
14
+ # Model config
15
  model_name = "cerebras/btlm-3b-8k-chat"
16
  revision = "main"
17
  torch_dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
18
  os.environ['HF_HOME'] = '/tmp/cache'
19
 
 
20
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, revision=revision)
21
  model = AutoModelForCausalLM.from_pretrained(
22
  model_name,
 
36
  trust_remote_code=True
37
  )
38
 
39
+ # Flask backend
40
  app = Flask(__name__)
41
 
42
  @app.route("/")
 
115
  }]
116
  })
117
 
118
+ # Gradio Chat UI
119
+ def gradio_chat(user_input, history=[]):
120
+ full_prompt = ""
121
+ for turn in history:
122
+ full_prompt += f"User: {turn[0]}\nAssistant: {turn[1]}\n"
123
+ full_prompt += f"User: {user_input}\nAssistant:"
 
124
 
125
  output = generator(
126
+ full_prompt,
127
  max_new_tokens=256,
128
  temperature=0.7,
129
  top_p=0.9,
130
  repetition_penalty=1.1,
131
  do_sample=True
132
  )
133
+ reply = output[0]["generated_text"].replace(full_prompt, "").strip()
134
+ history.append((user_input, reply))
135
  return history, history
136
 
137
  with gr.Blocks() as demo:
138
+ gr.Markdown("## 💬 Chat with Ariphes (LLM-powered)")
139
  chatbot = gr.Chatbot()
140
+ msg = gr.Textbox(placeholder="Ask me anything...", label="Message")
141
  clear = gr.Button("Clear")
142
 
143
+ state = gr.State([])
144
+
145
+ msg.submit(gradio_chat, [msg, state], [chatbot, state])
146
+ clear.click(lambda: ([], []), None, [chatbot, state])
147
 
148
+ # ✅ Enable share=True so Hugging Face can access it
149
+ demo.launch(share=True)
150
 
151
+ # ✅ Still serve API endpoint for OpenAI-compatible connector
152
  if __name__ == "__main__":
153
  app.run(host="0.0.0.0", port=8080)