Spaces:

ThanhHoang
/

Chat

Sleeping

App Files Files Community

Chat / app.py

ThanhHoang

Update app.py

07475f4 verified over 1 year ago

raw

history blame

3.8 kB

	import numpy as np
	import pandas as pd
	#!pip install neattext # Installing the neattext library
	import neattext.functions as ntf
	from sklearn.feature_extraction.text import TfidfVectorizer,CountVectorizer,TfidfTransformer
	from sklearn.pipeline import Pipeline

	import os
	import pandas as pd

	# Đường dẫn tới file Excel
	#excel_file_path = '/content/Data_LHU_2cot .xlsx'

	# Đọc file Excel
	#df = pd.read_excel(excel_file_path)

	# Lấy tên file Excel và thay đổi đuôi sang CSV
	#csv_file_name = os.path.splitext(os.path.basename(excel_file_path))[0] + '.csv'
	#csv_file_path = os.path.join(os.path.dirname(excel_file_path), csv_file_name)
	# lấy file csv
	csv_file_path='Chat/Data_LHU_2cot.csv'
	# Ghi dữ liệu ra file CSV
	#df.to_csv(csv_file_path, index=False, encoding='utf-8-sig') # utf-8-sig để giữ nguyên dấu

	#print(f'Chuyển đổi thành công! File CSV được lưu tại: {csv_file_path}')
	sn=pd.read_csv(csv_file_path)
	# Kiểm tra và loại bỏ NaN từ cột 'Câu hỏi' và 'Câu trả lời'
	sn['Câu hỏi'].fillna('', inplace=True)
	sn['Câu trả lời'].fillna('', inplace=True)

	# Làm sạch dữ liệu
	sn['Câu hỏi_Làm sạch'] = sn['Câu hỏi'].apply(lambda x: ntf.remove_userhandles(x) if isinstance(x, str) else x)
	sn['Câu hỏi_Làm sạch'] = sn['Câu hỏi_Làm sạch'].apply(lambda x: ntf.remove_punctuations(x) if isinstance(x, str) else x)

	sn['Câu trả lời_Làm sạch'] = sn['Câu trả lời'].apply(lambda x: ntf.remove_userhandles(x) if isinstance(x, str) else x)
	sn['Câu trả lời_Làm sạch'] = sn['Câu trả lời_Làm sạch'].apply(lambda x: ntf.remove_punctuations(x) if isinstance(x, str) else x)

	from sklearn.neural_network import MLPClassifier
	from sklearn.pipeline import Pipeline
	from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer

	# Khởi tạo mô hình Neural Network
	nn_pipe = Pipeline([
	('vect', CountVectorizer()),
	('tfidf', TfidfTransformer()),
	('nn', MLPClassifier(hidden_layer_sizes=(100,), max_iter=300, activation='relu', solver='adam'))
	])

	# Huấn luyện mô hình Neural Network
	nn_pipe.fit(sn['Câu hỏi'], sn['Câu trả lời'])

	import openai
	openai.api_key = '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'

	# Danh sách tin nhắn cho OpenAI
	messages = [
	{"role": "system", "content": "You are a kind helpful assistant."},
	]

	confidence_threshold = 0.5

	import gradio as gr


	def chatbot(question):
	# Làm sạch câu hỏi từ người dùng
	clean_question = ntf.remove_userhandles(question)
	clean_question = ntf.remove_punctuations(clean_question)

	# Sử dụng mô hình Neural Network để dự đoán câu trả lời
	predicted_answer_prob_nn = nn_pipe.predict_proba([clean_question])[0]
	predicted_answer_nn = nn_pipe.predict([clean_question])[0]

	# Check the confidence level for Neural Network
	if max(predicted_answer_prob_nn) >= confidence_threshold:
	return predicted_answer_nn
	else:
	# Sử dụng OpenAI GPT-3.5-turbo nếu Neural Network không tự tin
	messages.append({"role": "user", "content": question + "trường Đại học Lạc Hồng ở phường Bửu Long, thành phố Biên Hòa, tỉnh Đồng Nai"})
	chat = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages)
	reply = chat.choices[0].message.content
	messages.append({"role": "assistant", "content": reply})
	return reply

	iface = gr.Interface(
	fn=chatbot,
	inputs="text",
	outputs="text",
	title="ChatBot_LHU",
	description="Nhập câu hỏi của bạn ở đây"
	)
	iface.launch()

	# Lưu trữ mô hình
	import pickle

	# Lưu trữ mô hình
	with open("model.pkl", "wb") as f:
	pickle.dump(chatbot, f)