Spaces:

ThanhHoang
/

Chat

Sleeping

App Files Files Community

ThanhHoang commited on Feb 28, 2024

Commit

aef27f4

verified ·

1 Parent(s): 98d251e

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -7

app.py CHANGED Viewed

@@ -1,10 +1,63 @@
-import gradio as gr
-from transformers import pipeline
-# Load model directly
-# from transformers import AutoModel
-# model = AutoModel.from_pretrained("ThanhHoang/Chat")
-# pipeline = pipeline( model=model)
 def chatbot(question):
@@ -41,5 +94,6 @@ import pickle
 # Lưu trữ mô hình
 with open("model.pkl", "wb") as f:
-    pickle.dump(nn_pipe, f)

+import numpy as np
+import pandas as pd
+# ! pip install neattext               # Installing the neattext library
+import neattext.functions as ntf
+from sklearn.feature_extraction.text import TfidfVectorizer,CountVectorizer,TfidfTransformer
+from sklearn.pipeline import Pipeline
+import os
+import pandas as pd
+# Đường dẫn tới file Excel
+excel_file_path = '/content/Data_LHU_2cot .xlsx'
+# Đọc file Excel
+df = pd.read_excel(excel_file_path)
+# Lấy tên file Excel và thay đổi đuôi sang CSV
+csv_file_name = os.path.splitext(os.path.basename(excel_file_path))[0] + '.csv'
+csv_file_path = os.path.join(os.path.dirname(excel_file_path), csv_file_name)
+# Ghi dữ liệu ra file CSV
+df.to_csv(csv_file_path, index=False, encoding='utf-8-sig')  # utf-8-sig để giữ nguyên dấu
+print(f'Chuyển đổi thành công! File CSV được lưu tại: {csv_file_path}')
+# Kiểm tra và loại bỏ NaN từ cột 'Câu hỏi' và 'Câu trả lời'
+sn['Câu hỏi'].fillna('', inplace=True)
+sn['Câu trả lời'].fillna('', inplace=True)
+# Làm sạch dữ liệu
+sn['Câu hỏi_Làm sạch'] = sn['Câu hỏi'].apply(lambda x: ntf.remove_userhandles(x) if isinstance(x, str) else x)
+sn['Câu hỏi_Làm sạch'] = sn['Câu hỏi_Làm sạch'].apply(lambda x: ntf.remove_punctuations(x) if isinstance(x, str) else x)
+sn['Câu trả lời_Làm sạch'] = sn['Câu trả lời'].apply(lambda x: ntf.remove_userhandles(x) if isinstance(x, str) else x)
+sn['Câu trả lời_Làm sạch'] = sn['Câu trả lời_Làm sạch'].apply(lambda x: ntf.remove_punctuations(x) if isinstance(x, str) else x)
+from sklearn.neural_network import MLPClassifier
+from sklearn.pipeline import Pipeline
+from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
+# Khởi tạo mô hình Neural Network
+nn_pipe = Pipeline([
+    ('vect', CountVectorizer()),
+    ('tfidf', TfidfTransformer()),
+    ('nn', MLPClassifier(hidden_layer_sizes=(100,), max_iter=300, activation='relu', solver='adam'))
+])
+# Huấn luyện mô hình Neural Network
+nn_pipe.fit(sn['Câu hỏi'], sn['Câu trả lời'])
+import openai
+openai.api_key = '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'
+# Danh sách tin nhắn cho OpenAI
+messages = [
+    {"role": "system", "content": "You are a kind helpful assistant."},
+]
+confidence_threshold = 0.5
+import gradio as gr
 def chatbot(question):
 # Lưu trữ mô hình
 with open("model.pkl", "wb") as f:
+    pickle.dump(chatbot, f)