Spaces:

nolzZ
/

dashbordclass

Sleeping

App Files Files Community

dashbordclass / src /Senti_real /sentiment.py

nolzZ

Upload 4 files

4fc8e0c verified about 2 months ago

raw

history blame contribute delete

8.07 kB

	import os
	import gspread
	import pandas as pd
	from google.oauth2.service_account import Credentials
	from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

	# ========== STEP 1: ตั้งค่า path ==========
	BASE_DIR = os.path.dirname(__file__)
	CRED_PATH = os.path.join(BASE_DIR, "credentials.json")

	DATA_DIR = os.path.join(BASE_DIR, "Senti_real", "Data")
	SENTIMENT_DIR = os.path.join(BASE_DIR, "Senti_real", "Sentiment")

	os.makedirs(DATA_DIR, exist_ok=True)
	os.makedirs(SENTIMENT_DIR, exist_ok=True)

	# ========== STEP 2: เชื่อมต่อ Google Sheet ==========
	scopes = ['https://www.googleapis.com/auth/spreadsheets']
	credentials = Credentials.from_service_account_file(CRED_PATH, scopes=scopes)
	client = gspread.authorize(credentials)

	sheet_url = "https://docs.google.com/spreadsheets/d/1M3hwj9BRg4diW8_7rdk0OskLndnEUscdn_CWM2vNj_0/edit?usp=sharing"
	worksheet = client.open_by_url(sheet_url).get_worksheet(0)
	data = worksheet.get_all_records()
	df = pd.DataFrame(data)

	# ========== STEP 3: Preprocess ==========
	column_mapping = {
	'ประทับเวลา': 'timestamp',
	'เพศ': 'gender',
	'อายุ (ใส่เป็นตัวเลข)': 'age',
	'บทบาทของคุณในการใช้งาน แอปพลิเคชัน Check PD': 'user_role',
	'ผลลัพธ์ที่ได้จาก แอปพลิเคชัน Check PD': 'app_result',
	'สถานที่ท่านเข้ารับบริการ ( อื่น ๆ โปรดระบุจังหวัด )': 'service_location',
	'วันที่เข้ารับบริการ ': 'service_date',
	'แอปพลิเคชัน CheckPD ใช้งานง่ายและไม่ซับซ้อน ': 'app_usability',
	'ข้อมูลและคำแนะนำในแอปพลิเคชันมีความเข้าใจง่าย ': 'app_info_clarity',
	'คุณรู้สึกว่าแอปพลิเคชันให้ผลวิเคราะห์ที่แม่นยำและน่าเชื่อถือ': 'app_accuracy',
	'แอปช่วยให้คุณสามารถติดตามหรือวางแผนดูแลอาการได้ดีขึ้น': 'app_support_effectiveness',
	'โดยรวมแล้วคุณพึงพอใจกับการใช้งานแอปพลิเคชัน CheckPD มากน้อยเพียงใด ': 'app_overall_satisfaction',
	'การให้บริการของเจ้าหน้าที่': 'staff_service_quality',
	'ความรวดเร็วในการให้บริการของเจ้าหน้าที่': 'staff_response_speed',
	'เจ้าหน้าที่สามารถให้คำแนะนำหรือข้อมูลเบื้องต้นได้ชัดเจน': 'staff_info_clarity',
	'คุณรู้สึกสะดวกและประทับใจเมื่อขอความช่วยเหลือ ': 'staff_comfort',
	'คุณพึงพอใจต่อการบริการจากเจ้าหน้าที่ทางการแพทย์หรือไม่ ': 'staff_overall_satisfaction',
	'คุณรู้สึกอย่างไรเมื่อใช้งานแอปพลิเคชัน CheckPD ครั้งแรก? ': 'user_feeling_first_use',
	'บริการของเจ้าหน้าที่ทำให้คุณรู้สึกอย่างไร?': 'staff_emotion_feedback',
	'สิ่งใดในแอปพลิเคชันหรืองานบริการที่คุณคิดว่าควรปรับปรุง? ': 'improvement_suggestions',
	'แบบทดสอบที่ได้ทำในแอปพลิเคชัน Check PD ': 'app_quiz_used',
	'ปัญหาการใช้งานแอปพลิเคชัน Check PD ': 'app_issue_encountered',
	' ความพร้อมในการให้บริการของเจ้าหน้าที่ ': 'staff_readiness',
	}
	df.rename(columns=column_mapping, inplace=True)

	# แยกแบบทดสอบ
	df_split = df['app_quiz_used'].str.split(',\s*', expand=True)
	quiz_column_names = {
	"ประวัติส่วนตัว (ชื่อ-นามสกุล เลขบัตรประจำตัวประชาชน ที่อยู่)": "personal_info",
	"การออกเสียง 'อาาา' (Voice Test - Ahhh)": "voice_ahhh",
	"การออกเสียง 'ยายพาหลานไปซื้อขนมที่ตลาด' (Voice Test - ยายพาหลานไปซื้อขนมที่ตลาด)": "voice_sentence",
	"อาการสั่นขณะนั่งนิ่ง (Resting Tremor)": "resting_tremor",
	"อาการสั่นขณะยกแขน (Postural Tremor)": "postural_tremor",
	"แตะสลับนิ้วขวา (Dual Tap - Right)": "dual_tap_right",
	"แตะสลับนิ้วซ้าย (Dual Tap - Left)": "dual_tap_left",
	"การขยายวงกลม - ขวา (Pinch to Size - Right)": "pinch_size_right",
	"การขยายวงกลม - ซ้าย (Pinch to Size - Left)": "pinch_size_left",
	"การเดิน (Gait walk)": "gait_walk",
	"การทรงตัวขณะยืน (Balance)": "balance",
	"ตอบคำถาม 20 ข้อ (Questionnaire)": "questionnaire"
	}
	for col in quiz_column_names.values():
	df[col] = 0
	for idx, row in df_split.iterrows():
	for val in row.dropna():
	en_col = quiz_column_names.get(val.strip())
	if en_col:
	df.at[idx, en_col] = 1
	df.drop(columns=['app_quiz_used'], inplace=True)

	# ========== STEP 4: บันทึกข้อมูลหลัง Preprocess ==========
	csv_path_clean = os.path.join(DATA_DIR, "checkpd_data.csv")
	df.to_csv(csv_path_clean, index=False)

	# ========== STEP 5: Sentiment Analysis ==========
	text_columns = ['user_feeling_first_use', 'staff_emotion_feedback', 'improvement_suggestions']
	df_sentiment = df[['service_location', 'user_feeling_first_use', 'staff_emotion_feedback', 'improvement_suggestions']].copy()



	model_name = "phoner45/wangchan-sentiment-thai-text-model"
	tokenizer = AutoTokenizer.from_pretrained(model_name , use_fast=False)
	model = AutoModelForSequenceClassification.from_pretrained(model_name)
	sentiment_pipe = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)

	def analyze_sentiment(series):
	labels = []
	scores = []
	for text in series.fillna("").astype(str):
	if text.strip() == "":
	labels.append("NA")
	scores.append(None)
	else:
	try:
	result = sentiment_pipe(text[:512])[0]
	labels.append(result['label'])
	scores.append(round(result['score'], 4))
	except:
	labels.append("ERROR")
	scores.append(None)
	return labels, scores

	for col in text_columns:
	label_col = col + "_sentiment"
	score_col = col + "_score"
	labels, scores = analyze_sentiment(df_sentiment[col])
	df_sentiment[label_col] = labels
	df_sentiment[score_col] = scores

	# ========== STEP 6: บันทึกผลลัพธ์ ==========
	output_path = os.path.join(SENTIMENT_DIR, "checkpd_sentiment.csv")
	df_sentiment.to_csv(output_path, index=False)

	print(f"\n✅ วิเคราะห์ Sentiment เสร็จสมบูรณ์! บันทึกที่: {output_path}")