Euryeth commited on
Commit
7b7ead5
·
verified ·
1 Parent(s): e2f4417

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -16
app.py CHANGED
@@ -1,28 +1,52 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
  import torch
4
  import os
5
 
6
  # Login using Hugging Face token from environment variable (set via Secrets)
7
  from huggingface_hub import login
8
- login(os.getenv("HUGGINGFACEHUB_API_TOKEN"))
 
 
 
9
 
10
- # Use float32 for CPU/GPU compatibility
11
- torch_dtype = torch.float32
12
-
13
- # Set Hugging Face cache dir (optional but helps in Spaces)
14
  os.environ['HF_HOME'] = '/tmp/cache'
15
 
16
- # Load Falcon 1B Instruct model pipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  generator = pipeline(
18
  "text-generation",
19
- model="tiiuae/falcon-rw-1b-instruct",
20
- device=0 if torch.cuda.is_available() else -1,
21
- torch_dtype=torch_dtype
 
22
  )
23
 
24
  def generate_chat_completion(message, history):
25
- """Simple chatbot logic"""
 
 
 
26
  prompt = f"User: {message}\nAssistant:"
27
  output = generator(
28
  prompt,
@@ -30,17 +54,20 @@ def generate_chat_completion(message, history):
30
  temperature=0.9,
31
  top_p=0.9,
32
  repetition_penalty=1.1,
33
- do_sample=True
 
34
  )
35
- response = output[0]['generated_text'].replace(prompt, "").strip()
 
 
36
  return response
37
 
38
- # Gradio chat interface
39
  gr.ChatInterface(
40
  fn=generate_chat_completion,
41
- title="Falcon Chatbot",
42
  description="Roleplay-ready chat using Falcon-1B-Instruct",
43
  retry_btn="Retry",
44
  undo_btn="Undo",
45
- clear_btn="Clear"
46
  ).launch()
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
  import torch
4
  import os
5
 
6
  # Login using Hugging Face token from environment variable (set via Secrets)
7
  from huggingface_hub import login
8
+ hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
9
+ if hf_token is None:
10
+ raise ValueError("HUGGINGFACEHUB_API_TOKEN environment variable not set")
11
+ login(hf_token)
12
 
13
+ # Set Hugging Face cache dir (optional but recommended for Spaces)
 
 
 
14
  os.environ['HF_HOME'] = '/tmp/cache'
15
 
16
+ model_name = "tiiuae/falcon-rw-1b-instruct"
17
+
18
+ # Load tokenizer and model explicitly (better control)
19
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
20
+
21
+ # Use bfloat16 if on GPU and supported, else fallback to float32
22
+ if torch.cuda.is_available():
23
+ torch_dtype = torch.bfloat16
24
+ device_map = "auto"
25
+ else:
26
+ torch_dtype = torch.float32
27
+ device_map = None
28
+
29
+ model = AutoModelForCausalLM.from_pretrained(
30
+ model_name,
31
+ torch_dtype=torch_dtype,
32
+ device_map=device_map,
33
+ trust_remote_code=True
34
+ )
35
+
36
+ # Create text-generation pipeline with tokenizer and model
37
  generator = pipeline(
38
  "text-generation",
39
+ model=model,
40
+ tokenizer=tokenizer,
41
+ device_map=device_map,
42
+ torch_dtype=torch_dtype,
43
  )
44
 
45
  def generate_chat_completion(message, history):
46
+ """
47
+ Simple chat function using Falcon 1B instruct model.
48
+ Formats prompt for chat style and returns response.
49
+ """
50
  prompt = f"User: {message}\nAssistant:"
51
  output = generator(
52
  prompt,
 
54
  temperature=0.9,
55
  top_p=0.9,
56
  repetition_penalty=1.1,
57
+ do_sample=True,
58
+ eos_token_id=tokenizer.eos_token_id,
59
  )
60
+ # Remove the prompt from the generated text to get clean assistant reply
61
+ generated_text = output[0]['generated_text']
62
+ response = generated_text[len(prompt):].strip()
63
  return response
64
 
65
+ # Launch Gradio Chat Interface
66
  gr.ChatInterface(
67
  fn=generate_chat_completion,
68
+ title="Falcon 1B Instruct Chatbot",
69
  description="Roleplay-ready chat using Falcon-1B-Instruct",
70
  retry_btn="Retry",
71
  undo_btn="Undo",
72
+ clear_btn="Clear",
73
  ).launch()