Shreyas094 commited on
Commit
1dc1ef1
·
verified ·
1 Parent(s): d6312bb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -13,6 +13,9 @@ from langchain_community.embeddings import HuggingFaceEmbeddings
13
  from llama_parse import LlamaParse
14
  from langchain_core.documents import Document
15
  from huggingface_hub import InferenceClient
 
 
 
16
 
17
  # Environment variables and configurations
18
  huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
@@ -70,7 +73,7 @@ def update_vectors(files, parser):
70
 
71
  return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}."
72
 
73
- def generate_chunked_response(prompt, max_tokens=1000, max_chunks=5, temperature=0.7, repetition_penalty=1.1):
74
  client = InferenceClient(
75
  "meta-llama/Meta-Llama-3.1-8B-Instruct",
76
  token=huggingface_token,
@@ -84,9 +87,8 @@ def generate_chunked_response(prompt, max_tokens=1000, max_chunks=5, temperature
84
  chunk_response = ""
85
  for message in client.chat_completion(
86
  messages=messages,
87
- max_tokens=max_tokens, # Changed from max_new_tokens to max_tokens
88
  temperature=temperature,
89
- repetition_penalty=repetition_penalty,
90
  stream=True,
91
  ):
92
  chunk = message.choices[0].delta.content
 
13
  from llama_parse import LlamaParse
14
  from langchain_core.documents import Document
15
  from huggingface_hub import InferenceClient
16
+ import inspect
17
+
18
+ print(inspect.signature(client.chat_completion))
19
 
20
  # Environment variables and configurations
21
  huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
 
73
 
74
  return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}."
75
 
76
+ def generate_chunked_response(prompt, max_tokens=1000, max_chunks=5, temperature=0.7):
77
  client = InferenceClient(
78
  "meta-llama/Meta-Llama-3.1-8B-Instruct",
79
  token=huggingface_token,
 
87
  chunk_response = ""
88
  for message in client.chat_completion(
89
  messages=messages,
90
+ max_tokens=max_tokens,
91
  temperature=temperature,
 
92
  stream=True,
93
  ):
94
  chunk = message.choices[0].delta.content