ruslanmv commited on
Commit
40e0f8e
·
verified ·
1 Parent(s): c39fb11

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -7
app.py CHANGED
@@ -15,8 +15,10 @@ try:
15
  torch.backends.cudnn.benchmark = True
16
 
17
  model_name = "HuggingFaceH4/zephyr-7b-beta"
 
18
  model = AutoModelForCausalLM.from_pretrained(
19
  model_name,
 
20
  torch_dtype=torch.bfloat16,
21
  device_map="auto"
22
  )
@@ -24,7 +26,7 @@ try:
24
  if hasattr(torch, "compile"):
25
  model = torch.compile(model)
26
 
27
- tokenizer = AutoTokenizer.from_pretrained(model_name)
28
  inference_mode = "local"
29
 
30
  except ImportError:
@@ -37,10 +39,10 @@ except ImportError:
37
 
38
  model_name = "HuggingFaceH4/zephyr-7b-beta"
39
  tokenizer = AutoTokenizer.from_pretrained(model_name)
40
- client = InferenceClient(model_name)
 
41
  inference_mode = "client"
42
 
43
-
44
  # ------------------------------------------------------------------------------
45
  # SYSTEM PROMPT (PATIENT ROLE)
46
  # ------------------------------------------------------------------------------
@@ -63,7 +65,6 @@ BEHAVIOR INSTRUCTIONS:
63
  - Keep your responses concise, aiming for a maximum of {max_response_words} words.
64
  Start the conversation by expressing your current feelings or challenges from the patient's point of view."""
65
 
66
-
67
  # ------------------------------------------------------------------------------
68
  # Utility Functions
69
  # ------------------------------------------------------------------------------
@@ -91,7 +92,6 @@ def truncate_response(text: str, max_words: int) -> str:
91
  return " ".join(words[:max_words]) + "..."
92
  return text
93
 
94
-
95
  # ------------------------------------------------------------------------------
96
  # Response Function
97
  # ------------------------------------------------------------------------------
@@ -138,7 +138,6 @@ def respond(
138
  final_response = truncate_response(generated_response, max_response_words)
139
  return final_response
140
 
141
-
142
  # ------------------------------------------------------------------------------
143
  # Optional Initial Message and Gradio Interface
144
  # ------------------------------------------------------------------------------
@@ -164,4 +163,4 @@ demo = gr.ChatInterface(
164
  )
165
 
166
  if __name__ == "__main__":
167
- demo.launch()
 
15
  torch.backends.cudnn.benchmark = True
16
 
17
  model_name = "HuggingFaceH4/zephyr-7b-beta"
18
+ # Pass token if required for private models.
19
  model = AutoModelForCausalLM.from_pretrained(
20
  model_name,
21
+ use_auth_token=HF_TOKEN,
22
  torch_dtype=torch.bfloat16,
23
  device_map="auto"
24
  )
 
26
  if hasattr(torch, "compile"):
27
  model = torch.compile(model)
28
 
29
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=HF_TOKEN)
30
  inference_mode = "local"
31
 
32
  except ImportError:
 
39
 
40
  model_name = "HuggingFaceH4/zephyr-7b-beta"
41
  tokenizer = AutoTokenizer.from_pretrained(model_name)
42
+ # Pass the token to the client to avoid authentication errors.
43
+ client = InferenceClient(model_name, token=HF_TOKEN)
44
  inference_mode = "client"
45
 
 
46
  # ------------------------------------------------------------------------------
47
  # SYSTEM PROMPT (PATIENT ROLE)
48
  # ------------------------------------------------------------------------------
 
65
  - Keep your responses concise, aiming for a maximum of {max_response_words} words.
66
  Start the conversation by expressing your current feelings or challenges from the patient's point of view."""
67
 
 
68
  # ------------------------------------------------------------------------------
69
  # Utility Functions
70
  # ------------------------------------------------------------------------------
 
92
  return " ".join(words[:max_words]) + "..."
93
  return text
94
 
 
95
  # ------------------------------------------------------------------------------
96
  # Response Function
97
  # ------------------------------------------------------------------------------
 
138
  final_response = truncate_response(generated_response, max_response_words)
139
  return final_response
140
 
 
141
  # ------------------------------------------------------------------------------
142
  # Optional Initial Message and Gradio Interface
143
  # ------------------------------------------------------------------------------
 
163
  )
164
 
165
  if __name__ == "__main__":
166
+ demo.launch(share=True)