Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -70,7 +70,7 @@ def update_vectors(files, parser):
|
|
70 |
|
71 |
return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}."
|
72 |
|
73 |
-
def generate_chunked_response(prompt, max_tokens=1000, max_chunks=5, temperature=0.
|
74 |
client = InferenceClient(
|
75 |
"meta-llama/Meta-Llama-3.1-8B-Instruct",
|
76 |
token=huggingface_token,
|
@@ -84,7 +84,7 @@ def generate_chunked_response(prompt, max_tokens=1000, max_chunks=5, temperature
|
|
84 |
chunk_response = ""
|
85 |
for message in client.chat_completion(
|
86 |
messages=messages,
|
87 |
-
|
88 |
temperature=temperature,
|
89 |
repetition_penalty=repetition_penalty,
|
90 |
stream=True,
|
|
|
70 |
|
71 |
return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}."
|
72 |
|
73 |
+
def generate_chunked_response(prompt, max_tokens=1000, max_chunks=5, temperature=0.7, repetition_penalty=1.1):
|
74 |
client = InferenceClient(
|
75 |
"meta-llama/Meta-Llama-3.1-8B-Instruct",
|
76 |
token=huggingface_token,
|
|
|
84 |
chunk_response = ""
|
85 |
for message in client.chat_completion(
|
86 |
messages=messages,
|
87 |
+
max_tokens=max_tokens, # Changed from max_new_tokens to max_tokens
|
88 |
temperature=temperature,
|
89 |
repetition_penalty=repetition_penalty,
|
90 |
stream=True,
|