ngcanh commited on
Commit
c6b42fa
·
verified ·
1 Parent(s): 7dbd703

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -46
app.py CHANGED
@@ -2,6 +2,8 @@ import streamlit as st
2
  import pandas as pd
3
  from openai import OpenAI
4
  import os
 
 
5
  import subprocess
6
  TOKEN=os.getenv('HF_TOKEN')
7
  subprocess.run(["huggingface-cli", "login", "--token", TOKEN, "--add-to-git-credential"])
@@ -10,11 +12,6 @@ OPENAI_API_KEY = os.getenv("OPENAI_API")
10
  client = OpenAI(api_key=OPENAI_API_KEY) #INSERT KEY INSODE HE QUOTES IN THE BRACKET
11
  from docx import Document
12
 
13
- # Function to extract text from a .docx file
14
- def extract_text_from_docx(file):
15
- doc = Document(file)
16
- text = "\n".join([para.text for para in doc.paragraphs])
17
- return text.strip()
18
 
19
  # Function to parse the feedback into rubric components
20
  def parse_feedback(feedback):
@@ -44,7 +41,7 @@ def parse_feedback(feedback):
44
  return scores
45
 
46
  # Function to grade the essay using GPT-4
47
- def grade_essay(essay, guided_data, topic, rubric):
48
  # Sample prompt for grading using GPT-4
49
  prompt = f"""
50
  You are an consultant that grades marketing and business proposal based on a provided rubric, ensuring an unbiased evaluation while considering clarity, originality, organization, and depth of analysis. Advise in Vietnamse, only use English for buzzwords.
@@ -72,7 +69,13 @@ def grade_essay(essay, guided_data, topic, rubric):
72
  {"role": "user", "content": prompt}
73
  ])
74
  return response.choices[0].message.content
75
-
 
 
 
 
 
 
76
  # Function to export results to CSV
77
  def export_to_csv(data):
78
  df = pd.DataFrame(data)
@@ -99,57 +102,38 @@ def main():
99
  st.session_state.results = []
100
 
101
  # File uploader for example graded essays (DOCX)
102
- example_files = st.file_uploader("Upload 10 example graded essays (DOCX)", type=["docx"], accept_multiple_files=True)
 
 
 
 
 
 
 
 
103
 
104
  # File uploader for corresponding scores (DOCX)
105
- scores_file = st.file_uploader("Upload the DOCX file containing corresponding scores", type=["xlsx"])
106
-
 
 
 
107
  # File uploader for new essays to be graded (DOCX)
108
- new_files = st.file_uploader("Upload DOCX files with essays to be graded", type=["docx"], accept_multiple_files=True)
109
-
 
110
  # Grading button
111
  if st.button("Grade Essays"):
112
- if example_files and scores_file and new_files:
113
- # Extract scores from the scores file
114
- scores_text = extract_text_from_docx(scores_file)
115
- scores_lines = scores_text.splitlines()
116
-
117
- # Create a dictionary to match scores to participant names
118
- scores_dict = {}
119
- for line in scores_lines:
120
- if ':' in line: # Assuming the format is "Participant Name: Score"
121
- name, score = line.split(':', 1)
122
- scores_dict[name.strip()] = score.strip()
123
-
124
- # Prepare guided data from example graded essays
125
- guided_data = {}
126
- for example_file in example_files:
127
- essay_text = extract_text_from_docx(example_file)
128
- participant_name = os.path.splitext(example_file.name)[0] # Assuming name is file name
129
- if participant_name in scores_dict:
130
- guided_data[participant_name] = {
131
- 'essay': essay_text,
132
- 'score': scores_dict[participant_name]
133
- }
134
-
135
- # Combine guided essays with their scores
136
- guided_data_combined = "\n".join([f"{name}: {data['essay']} (Score: {data['score']})" for name, data in guided_data.items()])
137
-
138
- # Process each new essay
139
- for new_file in new_files:
140
- new_essay = extract_text_from_docx(new_file)
141
- new_participant_name = os.path.splitext(new_file.name)[0] # Assuming name is file name
142
- st.write(f"Grading essay for: {new_participant_name}")
143
-
144
  # Grading the new essay using the provided rubric and example graded essays
145
- result = grade_essay(new_essay, guided_data_combined, rubric)
146
 
147
  # Parse feedback into rubric components
148
  parsed_scores = parse_feedback(result)
149
 
150
  # Store results in session state
151
  st.session_state.results.append({
152
- 'Participant Name': new_participant_name,
153
  'Essay File': new_file.name,
154
  **parsed_scores,
155
  'Feedback': result,
 
2
  import pandas as pd
3
  from openai import OpenAI
4
  import os
5
+ import json
6
+ IMPORT pypdf
7
  import subprocess
8
  TOKEN=os.getenv('HF_TOKEN')
9
  subprocess.run(["huggingface-cli", "login", "--token", TOKEN, "--add-to-git-credential"])
 
12
  client = OpenAI(api_key=OPENAI_API_KEY) #INSERT KEY INSODE HE QUOTES IN THE BRACKET
13
  from docx import Document
14
 
 
 
 
 
 
15
 
16
  # Function to parse the feedback into rubric components
17
  def parse_feedback(feedback):
 
41
  return scores
42
 
43
  # Function to grade the essay using GPT-4
44
+ def grade_essay(essay, guided_data, rubric):
45
  # Sample prompt for grading using GPT-4
46
  prompt = f"""
47
  You are an consultant that grades marketing and business proposal based on a provided rubric, ensuring an unbiased evaluation while considering clarity, originality, organization, and depth of analysis. Advise in Vietnamse, only use English for buzzwords.
 
69
  {"role": "user", "content": prompt}
70
  ])
71
  return response.choices[0].message.content
72
+ def read_pdf(pdf_reader):
73
+ for page in pdf_reader.pages:
74
+ page_text = page.extract_text()
75
+ if page_text:
76
+ all_text += page_text + "\n"
77
+ return all_text
78
+
79
  # Function to export results to CSV
80
  def export_to_csv(data):
81
  df = pd.DataFrame(data)
 
102
  st.session_state.results = []
103
 
104
  # File uploader for example graded essays (DOCX)
105
+ # example_files = st.file_uploader("Upload 10 example graded essays (DOCX)", type=["docx"], accept_multiple_files=True)
106
+ for filename in os.listdir("data"):
107
+ if filename.lower().endswith(".pdf"):
108
+ pdf_path = os.path.join(pdf_directory, filename)
109
+ with open(pdf_path, "rb") as pdf_file:
110
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
111
+ example_files = read_pdf(pdf_reader)
112
+
113
+
114
 
115
  # File uploader for corresponding scores (DOCX)
116
+ # scores_file = st.file_uploader("Upload the json file containing corresponding scores", type=["xlsx"])
117
+ # Open and read the JSON file with utf-8 encoding
118
+ with open('abs.json', 'r', encoding='utf-8') as file:
119
+ scores_file = json.load(file)
120
+
121
  # File uploader for new essays to be graded (DOCX)
122
+ pdf_file = st.file_uploader("Upload proposal to be graded", type=["pdf"], accept_multiple_files=True)
123
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
124
+ new_file = read_pdf(pdf_reader)
125
  # Grading button
126
  if st.button("Grade Essays"):
127
+ if example_files and scores_file and new_file:
128
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  # Grading the new essay using the provided rubric and example graded essays
130
+ result = grade_essay(new_file, example_files, rubric)
131
 
132
  # Parse feedback into rubric components
133
  parsed_scores = parse_feedback(result)
134
 
135
  # Store results in session state
136
  st.session_state.results.append({
 
137
  'Essay File': new_file.name,
138
  **parsed_scores,
139
  'Feedback': result,