Spaces:

ThanhHoang
/

Chat

Sleeping

File size: 3,859 Bytes

aef27f4
 
517f593
aef27f4
 
 
 
 
 
 
 
fca525c
aef27f4
 
fca525c
aef27f4
 
fca525c
 
6af106b
e1c8786
f6d32f3
24030c8
e1c8786
aef27f4
5dc09ff
aef27f4
5dc09ff
24030c8
aef27f4
 
 
 
 
 
 
 
 
 
b65581e
aef27f4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b65581e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aef27f4
 
b65581e

import numpy as np
import pandas as pd
#!pip install neattext               # Installing the neattext library
import neattext.functions as ntf
from sklearn.feature_extraction.text import TfidfVectorizer,CountVectorizer,TfidfTransformer
from sklearn.pipeline import Pipeline

import os
import pandas as pd

# Đường dẫn tới file Excel
#excel_file_path = '/content/Data_LHU_2cot .xlsx'

# Đọc file Excel
#df = pd.read_excel(excel_file_path)

# Lấy tên file Excel và thay đổi đuôi sang CSV
#csv_file_name = os.path.splitext(os.path.basename(excel_file_path))[0] + '.csv'
#csv_file_path = os.path.join(os.path.dirname(excel_file_path), csv_file_name)
# lấy file csv
csv_file_path="Data_LHU_2cot .csv"
sn=pd.read_csv(csv_file_path)
#csv_file_path="Data_LHU_2cot.csv"

# Ghi dữ liệu ra file CSV
#df.to_csv(csv_file_path, index=False, encoding='utf-8-sig')  # utf-8-sig để giữ nguyên dấu

#print(f'Chuyển đổi thành công! File CSV được lưu tại: {csv_file_path}')
#sn=pd.read_csv(csv_file_path)
# Kiểm tra và loại bỏ NaN từ cột 'Câu hỏi' và 'Câu trả lời'
sn['Câu hỏi'].fillna('', inplace=True)
sn['Câu trả lời'].fillna('', inplace=True)

# Làm sạch dữ liệu
sn['Câu hỏi_Làm sạch'] = sn['Câu hỏi'].apply(lambda x: ntf.remove_userhandles(x) if isinstance(x, str) else x)
sn['Câu hỏi_Làm sạch'] = sn['Câu hỏi_Làm sạch'].apply(lambda x: ntf.remove_punctuations(x) if isinstance(x, str) else x)

sn['Câu trả lời_Làm sạch'] = sn['Câu trả lời'].apply(lambda x: ntf.remove_userhandles(x) if isinstance(x, str) else x)
sn['Câu trả lời_Làm sạch'] = sn['Câu trả lời_Làm sạch'].apply(lambda x: ntf.remove_punctuations(x) if isinstance(x, str) else x)

from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer

# Khởi tạo mô hình Neural Network
nn_pipe = Pipeline([
    ('vect', CountVectorizer()),
    ('tfidf', TfidfTransformer()),
    ('nn', MLPClassifier(hidden_layer_sizes=(100,), max_iter=300, activation='relu', solver='adam'))
])

# Huấn luyện mô hình Neural Network
nn_pipe.fit(sn['Câu hỏi'], sn['Câu trả lời'])

import openai
openai.api_key = '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'

# Danh sách tin nhắn cho OpenAI
messages = [
    {"role": "system", "content": "You are a kind helpful assistant."},
]

confidence_threshold = 0.5

import gradio as gr


def chatbot(question):
    # Làm sạch câu hỏi từ người dùng
    clean_question = ntf.remove_userhandles(question)
    clean_question = ntf.remove_punctuations(clean_question)

    # Sử dụng mô hình Neural Network để dự đoán câu trả lời
    predicted_answer_prob_nn = nn_pipe.predict_proba([clean_question])[0]
    predicted_answer_nn = nn_pipe.predict([clean_question])[0]

    # Check the confidence level for Neural Network
    if max(predicted_answer_prob_nn) >= confidence_threshold:
        return predicted_answer_nn
    else:
        # Sử dụng OpenAI GPT-3.5-turbo nếu Neural Network không tự tin
        messages.append({"role": "user", "content": question + "trường Đại học Lạc Hồng ở phường Bửu Long, thành phố Biên Hòa, tỉnh Đồng Nai"})
        chat = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages)
        reply = chat.choices[0].message.content
        messages.append({"role": "assistant", "content": reply})
        return reply

iface = gr.Interface(
    fn=chatbot,
    inputs="text",
    outputs="text",
    title="ChatBot_LHU",
    description="Nhập câu hỏi của bạn ở đây"
)
iface.launch()

# Lưu trữ mô hình
import pickle

# Lưu trữ mô hình
with open("model.pkl", "wb") as f:
    pickle.dump(chatbot, f)