Euryeth commited on
Commit
3077668
·
verified ·
1 Parent(s): 8a9f79f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -4
app.py CHANGED
@@ -1,8 +1,11 @@
1
  from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
2
  import torch
3
  import os
 
 
4
  from huggingface_hub import login
5
- from flask import Flask, request, jsonify
 
6
 
7
  # Login to Hugging Face using secret token stored in Space secrets
8
  login(os.getenv("HUGGINGFACEHUB_API_TOKEN"))
@@ -51,6 +54,7 @@ def chat():
51
  messages = data.get("messages", [])
52
  max_tokens = data.get("max_tokens", 256)
53
  temperature = data.get("temperature", 0.7)
 
54
 
55
  # Build the prompt from chat history
56
  prompt = ""
@@ -60,7 +64,40 @@ def chat():
60
  prompt += f"{role}: {content}\n"
61
  prompt += "Assistant:"
62
 
63
- # Generate response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  output = generator(
65
  prompt,
66
  max_new_tokens=max_tokens,
@@ -71,7 +108,6 @@ def chat():
71
  )
72
  reply = output[0]["generated_text"].replace(prompt, "").strip()
73
 
74
- # Return response in OpenAI-style format
75
  return jsonify({
76
  "choices": [
77
  {
@@ -85,6 +121,12 @@ def chat():
85
  ]
86
  })
87
 
 
 
 
 
 
 
88
  if __name__ == "__main__":
89
  # Listen on port 8080 as required by HF Spaces
90
- app.run(host="0.0.0.0", port=8080)
 
1
  from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
2
  import torch
3
  import os
4
+ import json
5
+ import time
6
  from huggingface_hub import login
7
+ from flask import Flask, request, jsonify, Response
8
+ import gradio as gr
9
 
10
  # Login to Hugging Face using secret token stored in Space secrets
11
  login(os.getenv("HUGGINGFACEHUB_API_TOKEN"))
 
54
  messages = data.get("messages", [])
55
  max_tokens = data.get("max_tokens", 256)
56
  temperature = data.get("temperature", 0.7)
57
+ stream = data.get("stream", False)
58
 
59
  # Build the prompt from chat history
60
  prompt = ""
 
64
  prompt += f"{role}: {content}\n"
65
  prompt += "Assistant:"
66
 
67
+ # If stream = True, stream response like OpenAI
68
+ if stream:
69
+ def generate_stream():
70
+ output = generator(
71
+ prompt,
72
+ max_new_tokens=max_tokens,
73
+ temperature=temperature,
74
+ top_p=0.9,
75
+ repetition_penalty=1.1,
76
+ do_sample=True
77
+ )
78
+ reply = output[0]["generated_text"].replace(prompt, "").strip()
79
+ for word in reply.split():
80
+ chunk = {
81
+ "choices": [{
82
+ "delta": {"content": word + " "},
83
+ "index": 0,
84
+ "finish_reason": None
85
+ }]
86
+ }
87
+ yield f"data: {json.dumps(chunk)}\n\n"
88
+ time.sleep(0.01)
89
+ yield "data: " + json.dumps({
90
+ "choices": [{
91
+ "delta": {},
92
+ "index": 0,
93
+ "finish_reason": "stop"
94
+ }]
95
+ }) + "\n\n"
96
+ yield "data: [DONE]\n\n"
97
+
98
+ return Response(generate_stream(), content_type="text/event-stream")
99
+
100
+ # Non-streamed response
101
  output = generator(
102
  prompt,
103
  max_new_tokens=max_tokens,
 
108
  )
109
  reply = output[0]["generated_text"].replace(prompt, "").strip()
110
 
 
111
  return jsonify({
112
  "choices": [
113
  {
 
121
  ]
122
  })
123
 
124
+ # Optional Gradio frontend to keep Hugging Face Space active
125
+ with gr.Blocks() as demo:
126
+ gr.Markdown("### LLM backend is running and ready for API calls.")
127
+
128
+ demo.launch()
129
+
130
  if __name__ == "__main__":
131
  # Listen on port 8080 as required by HF Spaces
132
+ app.run(host="0.0.0.0", port=8080)