Spaces:

quoc-khanh
/

chatbot4nct_test1

Sleeping

App Files Files Community

quoc-khanh commited on Feb 25

Commit

1d07d89

verified ·

1 Parent(s): d44a5bf

Update helpers.py

Browse files

Files changed (1) hide show

helpers.py +1 -99

helpers.py CHANGED Viewed

@@ -21,32 +21,6 @@ if "GOOGLE_API_KEY" not in os.environ:
     os.environ["GOOGLE_API_KEY"] = "AIzaSyDJ4vIKuIBIPNHATLxnoHlagXWbsAz-vRs"
 key = "AIzaSyDJ4vIKuIBIPNHATLxnoHlagXWbsAz-vRs"
-###
-def get_vectorstore():
-    ### Xử lý tất cả các tài liệu và nhét vào database
-    folder_path = "syllabus_nct_word_format/"
-    docx_files = list_docx_files(folder_path)
-    all_splits = []  # Khởi tạo danh sách lưu kết quả
-    for i, file_path in enumerate(tqdm(docx_files, desc="Đang xử lý", unit="file")):
-        output_json_path = f"output_{i}.json"
-        splits = get_splits(file_path, output_json_path)
-        all_splits += splits
-    # Xử lý FAQ
-    FAQ_path = "syllabus_nct_word_format/FAQ.json"
-    FAQ_splits = get_json_splits_only(FAQ_path)
-    all_splits += FAQ_splits
-    # Lưu vào vectorstore với nhúng từ Google GenAI
-    embedding = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
-    vectorstore = FAISS.from_documents(documents=all_splits, embedding=embedding)
-    return vectorstore
-###
 async def get_urls_splits(url='https://nct.neu.edu.vn/', char='https://nct.neu.edu.vn/'):
     reqs = requests.get(url)
     soup = BeautifulSoup(reqs.text, 'html.parser')
@@ -220,76 +194,4 @@ def prompt_order(queries):
     for q in queries:
         i += 1
         text += f'Question {i}: {str(q)}\n'
-    return text
-# Define the augment_prompt function
-def augment_prompt(query: str, k: int = 10):
-    queries = []
-    queries.append(query)
-    if "vectorstore" not in globals():
-        print("Không tìm thấy vectorstore. Đang tạo mới...")
-        vectorstore = get_vectorstore()
-        print("✅ Vectorstore đã được tạo thành công!")
-    else:
-        print("✅ Vectorstore đã tồn tại, không cần tạo lại.")
-    retriever = vectorstore.as_retriever(search_kwargs={"k": k})
-    results = retriever.invoke(query)
-    if results:
-        source_knowledge = "\n\n".join([doc.page_content for doc in results])
-        return f"""Using the contexts below, answer the query.
-Contexts:
-{source_knowledge}
-"""
-    else:
-        return f"No relevant context found.\n."
-def get_answer(query, queries_list=None):
-    if queries_list is None:
-        queries_list = []
-    messages = [
-    {"role": "user", "parts": [{"text": "IMPORTANT: You are a super energetic, helpful, polite, Vietnamese-speaking assistant. If you can not see the answer in contexts, try to search it up online by yourself but remember to give the source."}]},
-    {"role": "user", "parts": [{"text": augment_prompt(query)}]}
-]
-#     bonus = '''
-# Bạn tham kháo thêm các nguồn thông tin tại:
-# Trang thông tin điện tử: https://neu.edu.vn ; https://daotao.neu.edu.vn
-# Trang mạng xã hội có thông tin tuyển sinh: https://www.facebook.com/ktqdNEU ; https://www.facebook.com/tvtsneu ;
-# Email tuyển sinh: [email protected]
-# Số điện thoại tuyển sinh: 0888.128.558
-#   '''
-    queries_list.append(query)
-    queries = {"role": "user", "parts": [{"text": prompt_order(queries_list)}]}
-    messages_with_queries = messages.copy()
-    messages_with_queries.append(queries)
-    # messages_with_queries.insert(0, queries)
-  # Configure API key
-    genai.configure(api_key=key)
-  # Initialize the Gemini model
-    model = genai.GenerativeModel("gemini-2.0-flash")
-    response = model.generate_content(contents=messages_with_queries, stream=True)
-    response_text = ""
-    for chunk in response:
-        response_text += chunk.text
-        yield response_text
-    messages.append({"role": "model", "parts": [{"text": response_text}]})
-        # user_feedback = yield "\nNhập phản hồi của bạn (hoặc nhập 'q' để thoát): "
-        # if user_feedback.lower() == "q":
-        #     break
-        # messages.append({"role": "user", "parts": [{"text": query}]})
-    log_message(messages)

     os.environ["GOOGLE_API_KEY"] = "AIzaSyDJ4vIKuIBIPNHATLxnoHlagXWbsAz-vRs"
 key = "AIzaSyDJ4vIKuIBIPNHATLxnoHlagXWbsAz-vRs"
 async def get_urls_splits(url='https://nct.neu.edu.vn/', char='https://nct.neu.edu.vn/'):
     reqs = requests.get(url)
     soup = BeautifulSoup(reqs.text, 'html.parser')
     for q in queries:
         i += 1
         text += f'Question {i}: {str(q)}\n'
+    return text