codelion commited on
Commit
c15f617
·
verified ·
1 Parent(s): fd65021

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +22 -11
main.py CHANGED
@@ -8,6 +8,7 @@ from requests import JSONDecodeError
8
  from langchain_community.embeddings import HuggingFaceBgeEmbeddings
9
  from langchain_community.vectorstores import SupabaseVectorStore
10
  from langchain_community.llms import HuggingFaceEndpoint
 
11
 
12
  from langchain.chains import ConversationalRetrievalChain
13
  from langchain.memory import ConversationBufferMemory
@@ -60,17 +61,27 @@ def response_generator(query: str) -> str:
60
  logger.info("Using HF model %s", model)
61
 
62
  # prepare HF text-generation LLM
63
- hf = HuggingFaceEndpoint(
64
- # endpoint_url=f"https://api-inference.huggingface.co/models/{model}",
65
- endpoint_url=f"https://router.huggingface.co/hf-inference/models/{model}",
66
- task="text-generation",
67
- huggingfacehub_api_token=hf_api_key,
68
- model_kwargs={
69
- "temperature": temperature,
70
- "max_new_tokens": max_tokens,
71
- "return_full_text": False,
72
- },
73
- )
 
 
 
 
 
 
 
 
 
 
74
 
75
  # conversational RAG chain
76
  qa = ConversationalRetrievalChain.from_llm(
 
8
  from langchain_community.embeddings import HuggingFaceBgeEmbeddings
9
  from langchain_community.vectorstores import SupabaseVectorStore
10
  from langchain_community.llms import HuggingFaceEndpoint
11
+ from langchain_openai import ChatOpenAI
12
 
13
  from langchain.chains import ConversationalRetrievalChain
14
  from langchain.memory import ConversationBufferMemory
 
61
  logger.info("Using HF model %s", model)
62
 
63
  # prepare HF text-generation LLM
64
+ # hf = HuggingFaceEndpoint(
65
+ # # endpoint_url=f"https://api-inference.huggingface.co/models/{model}",
66
+ # endpoint_url=f"https://router.huggingface.co/hf-inference/models/{model}",
67
+ # task="text-generation",
68
+ # huggingfacehub_api_token=hf_api_key,
69
+ # model_kwargs={
70
+ # "temperature": temperature,
71
+ # "max_new_tokens": max_tokens,
72
+ # "return_full_text": False,
73
+ # },
74
+ # )
75
+
76
+ hf = ChatOpenAI(
77
+ base_url=f"https://router.huggingface.co/hf-inference/models/{model}/v1",
78
+ api_key=hf_api_key,
79
+ model=model,
80
+ temperature=temperature,
81
+ max_tokens=max_tokens,
82
+ timeout=30, # Add timeout
83
+ max_retries=3, # Built-in retry logic
84
+ )
85
 
86
  # conversational RAG chain
87
  qa = ConversationalRetrievalChain.from_llm(