Reality123b commited on
Commit
1975705
·
verified ·
1 Parent(s): fc4b315

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -16
app.py CHANGED
@@ -34,7 +34,8 @@ app.add_middleware(
34
 
35
  # Initialize Hugging Face client
36
  hf_client = InferenceClient(
37
- api_key=os.getenv("HF_TOKEN"),
 
38
  timeout=30
39
  )
40
 
@@ -45,21 +46,19 @@ async def generate_stream(messages: List[Message], max_tokens: int, temperature:
45
  try:
46
  # Convert messages to the format expected by the API
47
  formatted_messages = [{"role": msg.role, "content": msg.content} for msg in messages]
48
-
49
- # Create the streaming completion
50
- stream = hf_client.chat.completions.create(
51
- model=MODEL_ID,
52
- messages=formatted_messages,
53
- temperature=temperature,
54
- max_tokens=max_tokens,
55
- top_p=top_p,
56
- stream=True
57
- )
58
 
59
  # Stream the response chunks
60
- for chunk in stream:
61
- if chunk.choices[0].delta.content is not None:
62
- yield chunk.choices[0].delta.content
 
 
 
 
 
 
 
 
63
 
64
  except Exception as e:
65
  logger.error(f"Error in generate_stream: {e}", exc_info=True)
@@ -71,13 +70,13 @@ async def chat_stream(input: ChatInput, request: Request):
71
  try:
72
  if not os.getenv("HF_TOKEN"):
73
  raise HTTPException(
74
- status_code=500,
75
  detail="HF_TOKEN environment variable not set"
76
  )
77
 
78
  logger.info(f"Received chat request from {request.client.host}")
79
  logger.info(f"Number of messages: {len(input.messages)}")
80
-
81
  return StreamingResponse(
82
  generate_stream(
83
  messages=input.messages,
 
34
 
35
  # Initialize Hugging Face client
36
  hf_client = InferenceClient(
37
+ model=os.getenv("MODEL_ID", "mistralai/Mistral-Nemo-Instruct-2407"), # default model added to client
38
+ token=os.getenv("HF_TOKEN"), # renamed api_key to token
39
  timeout=30
40
  )
41
 
 
46
  try:
47
  # Convert messages to the format expected by the API
48
  formatted_messages = [{"role": msg.role, "content": msg.content} for msg in messages]
 
 
 
 
 
 
 
 
 
 
49
 
50
  # Stream the response chunks
51
+ for chunk in hf_client.text_generation(
52
+ prompt= formatted_messages, # updated to text_generation
53
+ details=True,
54
+ max_new_tokens=max_tokens, # renamed max_tokens to max_new_tokens
55
+ temperature=temperature,
56
+ top_p=top_p,
57
+ do_sample=True,
58
+ stream=True,
59
+ ):
60
+ if chunk.token.text is not None:
61
+ yield chunk.token.text
62
 
63
  except Exception as e:
64
  logger.error(f"Error in generate_stream: {e}", exc_info=True)
 
70
  try:
71
  if not os.getenv("HF_TOKEN"):
72
  raise HTTPException(
73
+ status_code=500,
74
  detail="HF_TOKEN environment variable not set"
75
  )
76
 
77
  logger.info(f"Received chat request from {request.client.host}")
78
  logger.info(f"Number of messages: {len(input.messages)}")
79
+
80
  return StreamingResponse(
81
  generate_stream(
82
  messages=input.messages,