Dhahlan2000 commited on
Commit
ce003c3
·
verified ·
1 Parent(s): 00f1aa9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -1
app.py CHANGED
@@ -17,7 +17,7 @@ model = AutoModelForCausalLM.from_pretrained(
17
  model.eval() # Set the model to evaluation mode
18
 
19
  # Initialize the inference client (if needed for other API-based tasks)
20
- # client = InferenceClient(token=access_token)
21
 
22
  def conversation_predict(input_text):
23
  """Generate a response for single-turn input using the model."""
@@ -54,6 +54,7 @@ def respond(
54
 
55
  # Stream response tokens from the chat completion API
56
  for message_chunk in client.chat_completion(
 
57
  messages=messages,
58
  max_tokens=max_tokens,
59
  stream=True,
 
17
  model.eval() # Set the model to evaluation mode
18
 
19
  # Initialize the inference client (if needed for other API-based tasks)
20
+ client = InferenceClient(provider="together",token=access_token)
21
 
22
  def conversation_predict(input_text):
23
  """Generate a response for single-turn input using the model."""
 
54
 
55
  # Stream response tokens from the chat completion API
56
  for message_chunk in client.chat_completion(
57
+ model = "google/gemma-2b-it"
58
  messages=messages,
59
  max_tokens=max_tokens,
60
  stream=True,