Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -13,6 +13,9 @@ from langchain_community.embeddings import HuggingFaceEmbeddings
|
|
13 |
from llama_parse import LlamaParse
|
14 |
from langchain_core.documents import Document
|
15 |
from huggingface_hub import InferenceClient
|
|
|
|
|
|
|
16 |
|
17 |
# Environment variables and configurations
|
18 |
huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
|
@@ -70,7 +73,7 @@ def update_vectors(files, parser):
|
|
70 |
|
71 |
return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}."
|
72 |
|
73 |
-
def generate_chunked_response(prompt, max_tokens=1000, max_chunks=5, temperature=0.7
|
74 |
client = InferenceClient(
|
75 |
"meta-llama/Meta-Llama-3.1-8B-Instruct",
|
76 |
token=huggingface_token,
|
@@ -84,9 +87,8 @@ def generate_chunked_response(prompt, max_tokens=1000, max_chunks=5, temperature
|
|
84 |
chunk_response = ""
|
85 |
for message in client.chat_completion(
|
86 |
messages=messages,
|
87 |
-
max_tokens=max_tokens,
|
88 |
temperature=temperature,
|
89 |
-
repetition_penalty=repetition_penalty,
|
90 |
stream=True,
|
91 |
):
|
92 |
chunk = message.choices[0].delta.content
|
|
|
13 |
from llama_parse import LlamaParse
|
14 |
from langchain_core.documents import Document
|
15 |
from huggingface_hub import InferenceClient
|
16 |
+
import inspect
|
17 |
+
|
18 |
+
print(inspect.signature(client.chat_completion))
|
19 |
|
20 |
# Environment variables and configurations
|
21 |
huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
|
|
|
73 |
|
74 |
return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}."
|
75 |
|
76 |
+
def generate_chunked_response(prompt, max_tokens=1000, max_chunks=5, temperature=0.7):
|
77 |
client = InferenceClient(
|
78 |
"meta-llama/Meta-Llama-3.1-8B-Instruct",
|
79 |
token=huggingface_token,
|
|
|
87 |
chunk_response = ""
|
88 |
for message in client.chat_completion(
|
89 |
messages=messages,
|
90 |
+
max_tokens=max_tokens,
|
91 |
temperature=temperature,
|
|
|
92 |
stream=True,
|
93 |
):
|
94 |
chunk = message.choices[0].delta.content
|