PDF-EXAM / app.py
ginipick's picture
Update app.py
57f518a verified
import streamlit as st
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain_google_genai import ChatGoogleGenerativeAI
import fitz
import json
import docx
import os
# Session states
if "mcqs" not in st.session_state:
st.session_state.mcqs = []
if "current_q" not in st.session_state:
st.session_state.current_q = 0
if "user_answers" not in st.session_state:
st.session_state.user_answers = {}
if "quiz_finished" not in st.session_state:
st.session_state.quiz_finished = False
if "language" not in st.session_state:
st.session_state.language = "English"
# Language selection in sidebar
st.sidebar.title("🌐 Language / μ–Έμ–΄")
language = st.sidebar.selectbox(
"Select Language / μ–Έμ–΄ 선택",
["English", "ν•œκ΅­μ–΄"],
index=0 if st.session_state.language == "English" else 1
)
st.session_state.language = language
# UI Text Dictionary
ui_text = {
"English": {
"title": "πŸ“„ PDF/Word based Self EXAM",
"sidebar_title": "Upload & Settings",
"upload_prompt": "Upload a file (PDF or Word)",
"num_questions": "Number of questions",
"generate_button": "Generate EXAMs",
"no_file_error": "Please upload a file.",
"generating": "Extracting text and generating EXAMs...",
"success": "βœ… EXAMs generated successfully!",
"error": "Error generating EXAMs:",
"question_prefix": "Question",
"choose_answer": "Choose an answer:",
"next_button": "Next",
"quiz_completed": "πŸŽ‰ Quiz completed!",
"results_header": "πŸ“Š Quiz Results",
"your_answer": "Your answer:",
"correct_answer": "Correct answer:",
"score": "βœ… You scored {score} out of {total}"
},
"ν•œκ΅­μ–΄": {
"title": "πŸ“„ PDF/Word 기반 객관식 문제 생성기",
"sidebar_title": "파일 μ—…λ‘œλ“œ 및 μ„€μ •",
"upload_prompt": "파일 μ—…λ‘œλ“œ (PDF λ˜λŠ” Word)",
"num_questions": "문제 개수",
"generate_button": "문제 생성",
"no_file_error": "νŒŒμΌμ„ μ—…λ‘œλ“œν•΄μ£Όμ„Έμš”.",
"generating": "ν…μŠ€νŠΈ μΆ”μΆœ 및 문제 생성 쀑...",
"success": "βœ… λ¬Έμ œκ°€ μ„±κ³΅μ μœΌλ‘œ μƒμ„±λ˜μ—ˆμŠ΅λ‹ˆλ‹€!",
"error": "문제 생성 였λ₯˜:",
"question_prefix": "문제",
"choose_answer": "닡을 μ„ νƒν•˜μ„Έμš”:",
"next_button": "λ‹€μŒ",
"quiz_completed": "πŸŽ‰ ν€΄μ¦ˆκ°€ μ™„λ£Œλ˜μ—ˆμŠ΅λ‹ˆλ‹€!",
"results_header": "πŸ“Š ν€΄μ¦ˆ κ²°κ³Ό",
"your_answer": "λ‹Ήμ‹ μ˜ λ‹΅:",
"correct_answer": "μ •λ‹΅:",
"score": "βœ… {total}문제 쀑 {score}문제λ₯Ό λ§žμΆ”μ…¨μŠ΅λ‹ˆλ‹€"
}
}
# Get current language texts
texts = ui_text[language]
# Title
st.title(texts["title"])
# Sidebar
st.sidebar.title(texts["sidebar_title"])
# Upload file
uploaded_file = st.sidebar.file_uploader(texts["upload_prompt"], type=["pdf", "docx"])
# Number of questions
number_of_questions = st.sidebar.slider(texts["num_questions"], min_value=1, max_value=20, value=5)
# Gemini setup
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
llm = ChatGoogleGenerativeAI(
model="gemini-2.0-flash-exp",
google_api_key=GOOGLE_API_KEY,
temperature=0.7
)
# Templates for different languages
template_english = ("You are an expert EXAM generator. Generate {number} unique multiple-choice questions from the given text.\n"
"Each question must have exactly 1 correct answer and 3 incorrect options.\n"
"Strictly return output in the following JSON format (no explanations, no markdown):\n"
"[\n"
" {{\n"
' "question": "What is ...?",\n'
' "options": ["Option A", "Option B", "Option C", "Option D"],\n'
' "answer": "Option D"\n'
" }},\n"
" ...\n"
"]\n"
"TEXT:\n"
"{text}")
template_korean = ("당신은 μ „λ¬Έ 객관식 문제 μΆœμ œμžμž…λ‹ˆλ‹€. μ£Όμ–΄μ§„ ν…μŠ€νŠΈμ—μ„œ {number}개의 κ³ μœ ν•œ 객관식 문제λ₯Ό μƒμ„±ν•˜μ„Έμš”.\n"
"각 λ¬Έμ œλŠ” μ •ν™•νžˆ 1개의 μ •λ‹΅κ³Ό 3개의 μ˜€λ‹΅μ„ κ°€μ Έμ•Ό ν•©λ‹ˆλ‹€.\n"
"λ‹€μŒ JSON ν˜•μ‹μœΌλ‘œλ§Œ 좜λ ₯ν•˜μ„Έμš” (μ„€λͺ…μ΄λ‚˜ λ§ˆν¬λ‹€μš΄ 없이):\n"
"[\n"
" {{\n"
' "question": "...λŠ” λ¬΄μ—‡μž…λ‹ˆκΉŒ?",\n'
' "options": ["선택지 A", "선택지 B", "선택지 C", "선택지 D"],\n'
' "answer": "선택지 D"\n'
" }},\n"
" ...\n"
"]\n"
"ν…μŠ€νŠΈ:\n"
"{text}")
# Select template based on language
template = template_english if language == "English" else template_korean
prompt = PromptTemplate(
input_variables=["text", "number"],
template=template
)
mcq_chain = LLMChain(llm=llm, prompt=prompt)
# Extract text from PDF or Word
def extract_text(file):
if file.name.endswith(".pdf"):
# Read the entire file content into memory
file_bytes = file.read()
# Open the PDF from the byte stream
doc = fitz.open(stream=file_bytes, filetype="pdf")
# Extract text from all pages
text = ""
for page in doc:
text += page.get_text()
return text
elif file.name.endswith(".docx"):
doc = docx.Document(file)
return "\n".join([para.text for para in doc.paragraphs])
return ""
# Generate MCQs
if st.sidebar.button(texts["generate_button"]):
if uploaded_file is None:
st.error(texts["no_file_error"])
else:
with st.spinner(texts["generating"]):
text = extract_text(uploaded_file)
try:
response = mcq_chain.run(text=text, number=str(number_of_questions))
# Clean the response to extract JSON
response = response.strip()
if response.startswith("```json"):
response = response[7:]
if response.endswith("```"):
response = response[:-3]
mcqs_json = json.loads(response)
st.session_state.mcqs = mcqs_json
st.session_state.current_q = 0
st.session_state.user_answers = {}
st.session_state.quiz_finished = False
st.success(texts["success"])
except Exception as e:
st.error(f"{texts['error']} {e}")
# Display question
if st.session_state.mcqs and not st.session_state.quiz_finished:
idx = st.session_state.current_q
q_data = st.session_state.mcqs[idx]
st.subheader(f"{texts['question_prefix']} {idx + 1}: {q_data['question']}")
with st.form(key=f"form_{idx}"):
selected_option = st.radio(texts["choose_answer"], q_data["options"], key=f"radio_{idx}")
submitted = st.form_submit_button(texts["next_button"])
if submitted:
st.session_state.user_answers[idx] = selected_option
if idx < len(st.session_state.mcqs) - 1:
st.session_state.current_q += 1
st.rerun()
else:
st.session_state.quiz_finished = True
st.success(texts["quiz_completed"])
st.rerun()
# Show result
if st.session_state.quiz_finished:
st.header(texts["results_header"])
score = 0
total = len(st.session_state.mcqs)
for i, q in enumerate(st.session_state.mcqs):
user_ans = st.session_state.user_answers.get(i)
correct_ans = q["answer"]
if user_ans == correct_ans:
score += 1
# Question display
st.markdown(f"**{texts['question_prefix']}{i+1}: {q['question']}**")
# Answers with color coding
if user_ans == correct_ans:
st.markdown(f"- {texts['your_answer']} :green[{user_ans}] βœ“")
else:
st.markdown(f"- {texts['your_answer']} :red[{user_ans}] βœ—")
st.markdown(f"- {texts['correct_answer']} :green[{correct_ans}]")
st.markdown("---")
# Score display
score_text = texts["score"].format(score=score, total=total)
st.success(score_text)
# Reset button
if language == "English":
reset_text = "Start New Quiz"
else:
reset_text = "μƒˆ ν€΄μ¦ˆ μ‹œμž‘"
if st.button(reset_text):
st.session_state.mcqs = []
st.session_state.current_q = 0
st.session_state.user_answers = {}
st.session_state.quiz_finished = False
st.rerun()