import gradio as gr import os from PyPDF2 import PdfReader from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer import math import tempfile import nltk # NLTK download is now handled by the Dockerfile, so we just import. # The `punkt` tokenizer should be available in the container. # --- Configuration --- QG_MODEL_NAME = "valhalla/t5-small-e2e-qg" # Define the local path where the model will be cached inside the Docker container LOCAL_MODEL_CACHE_DIR = "/app/model_cache" CHUNK_SIZE_FOR_QG = 700 # Target characters per text chunk for LLM processing OVERLAP_PERCENTAGE = 0.15 # 15% overlap between chunks for context preservation # Default marks per question type for tallying DEFAULT_MCQ_MARKS = 2 DEFAULT_DESCRIPTIVE_MARKS = 5 # --- COMBINED Hardcoded MCQs from both samples --- # Accounting Sample MCQs (Original) HARDCODED_MCQS = [ { "scenario_title": "Case-Scenario 1: Tropic Pvt Ltd", "scenario_text": """ Tropic Pvt Ltd was engaged in the business of manufacturing Product P. The product P required 2 units of Material R. The company intends to sell 24,000 units of Product P and does not wish to retain any closing stock. However, the opening stock of Product P is 4,000 units. Raw Material R has to be procured after considering the opening stock of R amounting to 10,000 units. The technical team further confirms that the yield in the course of manufacture of Product P is 80% of the input. The company presently procures its annual requirement of materials on a quarterly basis from its regular supplier enjoying a discount of 2.5% on the invoice price of the material of ₹ 20 per unit. Every time the company places orders for Material R, it incurs ₹ 125 for each of the order placed. The company also has taken a rented warehouse for storing material R and the annual cost of storage is ₹ 10 per unit. The company appointed Mr. T a Chartered Accountant to review the cost of inventory and provide measures of improvement of cost. After reviewing the material purchase and consumption pattern, Mr. T suggested that the implementation of Wilson’s EOQ would be beneficial to the company. He emphasized that the change in the quantity ordered would result in reduction of inventory carrying costs. Mr. T further reviewed the labour costing and identified that the employees were paid overtime wages to ensure timely completion of projects. Overtime wages comprised of daily wage and 100% of daily wages as overtime premium. Based on the cost record it was understood that every month had 180 hours of regular working hours which was remunerated at ₹ 200 per hour and Overtime of 20 hours which was remunerated at ₹ 400 per hour. Mr. T suggested that the above time taken may be considered as standard and a scheme of Incentive be introduced to reduce overtime cost. He further indicated that Rowan scheme of incentive be used to measure performance and the improved productivity per hour would be 125 units per hour. In this regard, address the following queries in line with the suggestions provided by Mr. T to Tropic Pvt Ltd. """, "questions": [ { "text": "The annual requirement of Material R to meet the target sales of 24,000 units of Product P is:", "options": [ "48,000 units", "60,000 units", "40,000 units", "50,000 units" ], "marks": 2 }, { "text": "The ordering quantity as per the current inventory policy and the proposed Wilson’s Economic order quantity of Material R are:", "options": [ "Order Quantity as per the current inventory policy – 10,000 units & Economic Order Quantity – 1,000 units", "Order Quantity as per the current inventory policy – 15,000 units & Economic Order Quantity – 1,225 units", "Order Quantity as per the current inventory policy – 12,000 units & Economic Order Quantity – 1,095 units", "Order Quantity as per the current inventory policy – 12,500 units & Economic Order Quantity – 1,118 units" ], "marks": 2 }, { "text": "The net savings to inventory cost on migration from the current inventory policy to the Wilson’s Economic Order Quantity policy would be:", "options": [ "Savings from EOQ as compared to current discount policy – ₹26,820", "Savings from EOQ as compared to current discount policy – ₹20,500", "Savings from EOQ as compared to current discount policy – ₹33,253", "Savings from EOQ as compared to current discount policy – ₹25,546" ], "marks": 2 }, { "text": "The savings in labour cost achieved by implementation of incentive scheme over the overtime payments amounts to:", "options": [ "₹9,600", "₹5,600", "₹8,000", "₹3,200" ], "marks": 2 } ] }, { "scenario_title": "Case-Scenario 2: ABC Engineering Works", "scenario_text": """ ABC Engineering Works is a medium-scale engineering company employing both skilled and unskilled workers in its machining department. The company uses time wage system with a guaranteed minimum wage, and also operates a group bonus scheme for efficiency beyond the standard. Standard time allowed to produce 1 unit of Product X = 5 hours/unit. Actual time taken per unit by the group = 4 hours/unit. Normal time rate = ₹80/hour. The group comprises 6 workers (3 skilled and 3 unskilled). For a particular week: Total output = 120 units of Product X. Total actual time worked = 480 hours (by the group collectively). Out of 480 hours, 20 hours are recorded as idle time due to machine breakdown, which is beyond the workers’ control. Under the group bonus scheme, each worker is entitled to a bonus equal to 50% of the time saved × time rate, shared equally among all members of the group. In addition: There is a statutory requirement to pay overtime at double the normal rate for any hours worked beyond 48 hours per week per worker. Each worker worked 52 hours in the week. The company’s policy is to treat idle time as normal idle time and charge it to factory overhead. """, "questions": [ { "text": "What is the total bonus payable to the group for the week under the group bonus scheme?", "options": ["₹9,600", "₹12,000", "₹7,200", "₹14,400"], "marks": 2 }, { "text": "What is the total wages (including bonus, overtime & excluding idle time) payable to one skilled worker for the week, assuming time rate applies for normal time and overtime is paid at double rate?", "options": ["₹5,920", "₹5,760", "₹6,080", "₹6,240"], "marks": 2 }, { "text": "If the company decided to treat idle time as abnormal loss and charged to Costing Profit & Loss A/c, what would be the impact on factory cost for the week compared to the current treatment?", "options": ["Increase by ₹1,600", "Decrease by ₹1,600", "No impact", "Increase by ₹3,200"], "marks": 2 }, { "text": "What is the standard time allowed for actual production of 120 units?", "options": ["600 hours", "480 hours", "500 hours", "550 hours"], "marks": 1 } ] } ] # Auditing Sample MCQs (New) - Note: Flattening the structure for easier iteration if not scenario-based HARDCODED_MCQS.extend([ { "text": "CA Arjun Mehra is offered appointment as auditor of a company. His distant uncle, who recently passed away, held substantial shares in the same company and had nominated CA Arjun Mehra as the successor. What is the correct course of action for CA Arjun Mehra before accepting the audit assignment?", "options": [ "A. He can accept the appointment as the relative held the shares, not him.", "B. He should first renounce or dispose of the shares before accepting the appointment.", "C. He can accept the appointment and later sell the shares.", "D. He need not take any action, as the law does not prohibit holding shares by relatives." ], "marks": 2 }, # The Case Scenario 1 from Auditing is already handled by the `HARDCODED_MCQS` scenario structure for `RK & Associates`. # No need to add it again as it's part of the scenario_text and questions in the previous list. { "text": "Consider following statements in relation to “Limited assurance engagement”:–\nStatement I – It involves obtaining sufficient appropriate evidence to draw reasonable conclusions.\nStatement II – Review of interim financial information of a company is an example of limited assurance engagement.", "options": [ "(a) Statement I is correct. Statement II is incorrect.", "(b) Both Statements I and II are correct.", "(c) Both Statements I and II are incorrect.", "(d) Statement I is incorrect. Statement II is correct." ], "marks": 1 }, { "text": "Management of ECX Systems Limited acknowledged that going concern assumption will not be suitable while preparing financial statements and informs the auditor that the fact of uncertainty related to going concern would suitably be disclosed in the notes of accounts. How should the auditor address this while preparing Independent Auditor's Report?", "options": [ "(A) To include a separate section under the heading \"Material uncertainty related to Going Concern\" in the auditor's report as per the requirement of SA 570.", "(B) To include a separate section under the heading \"Key Audit Matters\" in the auditor's report as per the requirement of SA 701.", "(C) To include a sub section under the heading \"Material uncertainty related to Going Concern\" under separate section of \"Key Audit Matters\" in the auditor's report as per the requirement of SA 705.", "(D) To include a separate section under the heading \"Key Audit Matters\" in the auditor's report as per the requirement of SA 570." ], "marks": 2 }, { "text": "Which kind of audit report will the auditor issue, if the use of going concern basis of accounting is appropriate, but a material uncertainty exists and adequate disclosure of the material uncertainty is made in the financial statements by the management?", "options": [ "(A) Adverse opinion", "(B) Disclaimer of opinion", "(C) Unmodified opinion", "(D) Qualified opinion" ], "marks": 2 }, { "text": "ABC & Co., Chartered Accountants, are auditing the financial statements of XY Ltd. During the audit, they found that the company has disclosed a significant litigation in the notes to accounts. The disclosure is adequate, and the matter is fundamental to the users’ understanding of the financial statements. The auditor decides not to modify the opinion and also concludes that it is not a Key Audit Matter. Which of the following actions is most appropriate for the auditor?", "options": [ "(a) Issue a qualified opinion due to uncertainty of the litigation.", "(b) Disclose the matter under Key Audit Matters as per SA 701.", "(c) Include an Emphasis of Matter paragraph without modifying the opinion.", "(d) Do not report the matter since it is already disclosed in the notes to accounts." ], "marks": 2 }, { "text": "CA Sneha & Associates, a mid-sized audit firm, has grown rapidly over the past 2 years and hired several new audit assistants. During a recent review, the firm's quality reviewer noticed that:\nEngagement partners were not reviewing key working papers,\nStaff were deployed on audits without checking their prior experience,\nNo regular monitoring or peer reviews were conducted,\nEthical compliance declarations were last updated a year ago.\nCA Sneha wants to strengthen the firm’s system of quality control as per SQC 1.\nQuestion: Based on the above case, which of the following elements of quality control have likely been violated or inadequately implemented?", "options": [ "(a) Leadership responsibilities and Monitoring", "(b) Human Resources and Monitoring", "(c) Engagement Performance and Acceptance of Clients", "(d) Human Resources and Inventory Management" ], "marks": 2 } ]) # --- COMBINED Hardcoded Descriptive Problems from both samples --- HARDCODED_DESCRIPTIVE_PROBLEMS = [ # Accounting Sample Descriptive Problems (Original) { "title": "Question No 1", "text": """ In a manufacturing company, the overhead is recovered as follows: Factory Overheads: a fixed percentage basis on direct wages and Administrative overheads: a fixed percentage basis on factory cost. The company has furnished the following data relating to two jobs undertaken by it in a period. Job 1 (₹): Direct materials 1,08,000, Direct wages 84,000, Selling price 3,33,312, Profit percentage on total cost 12%. Job 2 (₹): Direct materials 75,000, Direct wages 60,000, Selling price 2,52,000, Profit percentage on total cost 20%. You are required to: (i) Compute the percentage recovery rates of factory overheads and administrative overheads. (ii) Calculate the amount of factory overheads, administrative overheads and profit for each of the two jobs. (iii) Using the above recovery rates, determine the selling price to be quoted for job 3. Additional data pertaining to Job 3 is as follows: Direct materials ₹ 68,750, Direct wages ₹ 22,500, Profit percentage on selling price 15%. """, "marks": 15 }, { "title": "Question No 2(a)", "text": """ The management of VDW Ltd. is worried about increasing labour turnover in the factory and, before analysing the causes and taking the remedial steps, the management wants to have an idea of the profit foregone as a result of the labour turnover in the last year. Last year sales amount to ₹ 98,23,600 and the P/V Ratio was 20%. The total number of actual hours worked by the direct labour force was 5.23 lac. As a result of the delays by the Personnel Department in filling vacancies due to labour turnover, 100000 productive hours were lost. The actual labour hours included 30000 hours attributable to training new recruits, out of which half of the hours was unproductive. An analysis of cost incurred consequent on labour turnover revealed the following: Settlement Costs due to leaving: ₹45,680, Recruitment Costs: ₹30,620, Selection Costs: ₹15,750, Training Costs: ₹37,250. Assuming that the potential loss as a consequence of labour turnover could have been sold at the prevailing prices, find out the profit foregone in the last year on account of labour turnover. """, "marks": 5 }, { "title": "Question No 2(b)", "text": """ Sky & Co., an unregistered supplier under GST, purchased material from Vye Ltd. which is registered under GST. The following information is available for one lot of 5,000 units of material purchased: Listed price of one lot ₹2,50,000, Trade discount @ 10% on listed price, CGST and SGST 12% (6% CGST + 6% SGST), Cash discount @ 10% (Will be given only if payment is made within 30 days.), Toll Tax paid ₹5,000, Freight and Insurance ₹17,000, Demurrage paid to transporter ₹5,000, Commission and brokerage on purchases ₹10,000, Amount deposited for returnable containers ₹30,000, Amount of refund on returning the container ₹20,000, Other Expenses @ 2% of total cost, 20% of material shortage is due to normal reasons. The payment to the supplier was made within 21 days of the purchases. You are required to CALCULATE cost per unit of material purchased by Sky & Co. """, "marks": 5 }, { "title": "Question No 3(a)", "text": """ A machine shop has 8 identical machines manned by 6 operators. The machine cannot work without an operator wholly engaged on it. The original cost of all the 8 machines works out to ₹ 32,00,000. The following particulars are furnished for a six months period: Normal available hours per month per operator 208, Absenteeism (without pay) hours per operator 18, Leave (with pay) hours per operator 20, Normal unavoidable idle time-hours per operator 10, Average rate of wages per day of 8 hours per operator ₹ 100, Production bonus estimated 10% on wages, Power consumed ₹ 40,250, Supervision and Indirect Labour ₹ 16,500, Lighting and Electricity ₹ 6,000. The following particulars are given for a year: Insurance ₹ 3,60,000, Sundry work Expenses ₹ 50,000, Management Expenses allocated ₹ 5,00,000, Depreciation 10% on the original cost, Repairs and Maintenance (including consumables): 5% of the value of all the machines. Prepare a statement showing the comprehensive machine hour rate for the machine shop. """, "marks": 5 }, { "title": "Question No 3(b)", "text": """ Ten men work as a group. When the weekly production of the group exceeds standard (200 pieces per hour) each man in the group is paid a bonus for the excess production in addition to his wages at hourly rates. The bonus is computed thus: The percentage of production in excess of the standard amount is found and one-half of this percentage is considered as the men’s share. Each man in the group is paid as bonus this percentage of a wage rate of ₹3.20 per hour. There is no relationship between the individual workman’s hourly rate and the bonus rate. The following is the week’s records. Monday: Hours Worked 90, Production 22,100. Tuesday: Hours Worked 88, Production 22,600. Wednesday: Hours Worked 90, Production 24,200. Thursday: Hours Worked 84, Production 20,100. Friday: Hours Worked 88, Production 20,400. Saturday: Hours Worked 40, Production 10,200. Total: Hours Worked 480, Production 1,19,600. (a) Compute the rate and amount of bonus for the week; (b) Compute the total pay of Jones who worked 41 ½ hours and was paid ₹2 per hour basic and of smith who worked 44 ½ hours and was paid ₹2.50 per hour basic. """, "marks": 5 }, { "title": "Question No 4(a)", "text": "Write a short note on VED analysis in inventory control.", "marks": 5 }, { "title": "Question No 4(b)", "text": """ State the method of costing that would be most suitable for: (i) Oil Refinery (ii) Interior Decoration (iii) Airlines Company (iv) Advertising (v) Car Assembly """, "marks": 5 }, # Auditing Sample Descriptive Problems (New) { "title": "Question No 1A", "text": """ M/S Zenith Info Corp Ltd., an unlisted public company, is under statutory audit for FY 2023 –24. While reviewing the company’s financials and compliance under the Companies Act, the auditor observed the following: 1. The company granted a loan of ₹75 lakhs to Zenith Digital Pvt. Ltd., a company in which the Managing Director’s son holds 45% of the shareholding. No board or special resolution was passed under Section 185 or 186. Further, no interest was charged on the loan. 2. The company accepted unsecured loans amounting to ₹2 crores from its customers. No intimation was filed with the ROC, and neither the Reserve Bank of India’s directives nor the provisions of Sections 73 to 76 were complied with. The company claims that these are ‘advances against future services’, not deposits. However, the auditor found that the services were not rendered for more than 12 months. 3. The Central Government has notified the company under Section 148(1) of the Companies Act for the maintenance of cost records relating to its chemical division. On enquiry, the auditor found that cost records were maintained manually only till October 2023. From November onwards, the company switched to a new ERP system, but no cost records were migrated or continued in the new system due to technical issues. Required: As a statutory auditor of M/S Zenith Info Corp Ltd., analyse and explain how you would report under Clause (iv), (v), and (vi) of the Companies (Auditor's Report) Order, 2020 (CARO, 2020). """, "marks": 6 }, { "title": "Question No 1B", "text": """ ABC & Co. and XYZ & Associates have been appointed as joint auditors for the statutory audit of LMN Ltd. for the financial year 2024-25. The audit work was divided among the joint auditors, and both firms carried out their respective responsibilities independently. During the audit, the following issues were noted: 1. Certain audit areas, like revenue recognition and related party transactions, were not explicitly divided between the two auditors. 2. A joint meeting was held where both auditors agreed to use a particular sampling technique for inventory verification. 3. One of the auditors identified a possible misstatement in the classification of liabilities and communicated it to the other firm. They mutually agreed not to raise the issue formally as it was considered immaterial. 4. The final audit report was prepared and signed jointly, confirming compliance with all relevant statutory requirements. Identify and explain the specific areas where both auditors would be jointly and severally responsible, despite the division of audit work between them. """, "marks": 4 }, { "title": "Question No 1C", "text": """ CA Rohan has been appointed as the statutory auditor of Aditi Limited for the financial year 2023–24. During the course of the audit, the management of Aditi Limited requests a modification in the original terms of audit engagement, citing expansion of business operations and the introduction of new products. Can CA Rohan accept the management’s request to revise the terms of the audit engagement? Under what circumstances is such a change in engagement terms permissible as per SA 210? """, "marks": 5 }, { "title": "Question No 2A", "text": """ CA. Nisha Sharma has completed the audit of a company. She instructed Riya, a CA student undergoing article ship in her firm and a part of the audit engagement team, to prepare the draft audit report. Riya was specifically told to draft the report expressing an unmodified opinion. After drafting the paragraph for the unmodified opinion, Riya assumed that there is no need to include the “Basis for Opinion” paragraph. With reference to the relevant Standard on Auditing, explain why Riya's understanding is not correct. """, "marks": 2 }, { "title": "Question No 2B", "text": """ CA. Meenal Gupta, while conducting the audit of a company, notices a complete lack of cooperation from the client. She is not being given proper support for the inventory count process at year-end. Additionally, the CFO has refused to share current contact details of customers and suppliers for sending external confirmations, citing business confidentiality. Even email IDs have not been provided. She is also unable to verify the company's revenue due to incomplete information. For expenses, although she requested bills on a sample basis, the staff kept giving excuses. The issue was escalated to senior management, but no action was taken. The auditor believes that there may be material misstatements which could affect multiple areas of the financial statements. Assuming the auditor cannot withdraw from the engagement, what type of opinion should be expressed? """, "marks": 3 }, { "title": "Question No 2C", "text": """ R Auto Ltd is a prominent Indian automobile company engaged in the manufacturing of commercial, passenger, and electric vehicles. The company is at the forefront of India’s transition towards electric mobility and holds a strong position in one of the fastest-growing automotive industries. SR & Co. have been appointed as the statutory auditors of R Auto Ltd for the financial year 2022-23. Two newly inducted audit assistants, Riya and Manav, are part of the audit engagement team for this assignment. However, both are unclear about the real purpose of the audit. They believe that an audit ensures that there are no errors or frauds in the financial statements. You are required to: 1. Comment on whether the perception of Riya and Manav is correct. 2. Briefly state the objectives of an independent audit as per the Standards on Auditing. """, "marks": 5 }, { "title": "Question No 3A", "text": """ Aeron Ltd is a listed entity engaged in the business of manufacturing electric vehicle components. During the audit of Aeron Ltd’s financial statements for FY 2023-24, the statutory audit team observed that the company had applied multiple changes in accounting policies and estimates due to the introduction of new production lines and government grants. The management believes all disclosures are sufficient as per their understanding. However, audit assistants Mehul and Tanya, while performing review procedures, are unsure about the specific evaluations an auditor must conduct while forming an opinion on financial statements. They believe the auditor only checks totals and trial balance agreement. You are required to: 1. Comment on the perception of Mehul and Tanya. 2. As per SA 700 (Forming an Opinion and Reporting on Financial Statements), list out specific evaluations that the auditor must perform while evaluating financial statements. """, "marks": 6 }, { "title": "Question No 3B", "text": """ Mr. X is a professional accountant in service. In terms of employment and professional relationships with the employer, he has to be alert to the possibility of inadvertent disclosure of any information outside the employing organization. However, in view of disclosure required by law, Mr. X had to divulge the information and documents as evidence in the course of legal proceedings. Whether Mr. X has violated any fundamental principle governing professional ethics in this case? Explain. """, "marks": 4 }, { "title": "Question No 4A", "text": """ Misha Creations is a partnership firm engaged in the business of selling designer apparel. The firm appointed KP & Associates, Chartered Accountants, to conduct the audit of their financial records for one financial year. The auditors reviewed the internal control mechanisms of the firm, performed required audit procedures, and carried out necessary verifications. Specifically, they checked purchase transactions on a sample basis, verified purchase bills with the GST portal, and Also obtained confirmations from suppliers. Based on the audit evidence collected, they expressed satisfaction and issued an audit report stating that the financial statements presented a true and fair view of the firm’s financial position. However, later it came to light that the purchase manager handling procurement from a particular location was involved in booking fictitious purchases of lower amounts by colluding with dishonest suppliers. The payments to such suppliers were routed through banking channels with the help of the accountant. The partners of the firm now criticize the auditors, calling the audit process ineffective. Are the partners justified in their opinion? Think and suggest any plausible reason behind this situation. """, "marks": 5 }, { "title": "Question No 4B", "text": """ CA Neha is a statutory auditor of XYZ Ltd. for the last seven years. During the current audit, one of the audit team members was found to be a cousin of the CFO of the company. Additionally, CA Neha has developed a close personal rapport with the finance head due to long association. The management of XYZ Ltd. also offers expensive gifts to the audit team during festivals. Meanwhile, during the audit, when CA Neha raised concerns over some aggressive revenue recognition practices, the company's directors hinted at replacing the audit firm next year and cutting down the audit fees if the audit report contains any adverse remarks. (a) Identify and explain the types of threats to auditor’s independence involved in the above scenario. And (b) Safeguard the Threats / Measure to reduce threats """, "marks": 5 } ] # --- Global variable for question generation pipeline --- qg_pipeline = None def initialize_question_generator(): """ Initializes the Hugging Face question generation pipeline. This function will be called once when the Gradio app starts. """ global qg_pipeline if qg_pipeline is None: print("Attempting to initialize question generation pipeline...") try: # Load the model and tokenizer from the local cache directory # The model and tokenizer are pre-downloaded during Docker build tokenizer = AutoTokenizer.from_pretrained(QG_MODEL_NAME, local_files_only=True, cache_dir=LOCAL_MODEL_CACHE_DIR) model = AutoModelForSeq2SeqLM.from_pretrained(QG_MODEL_NAME, local_files_only=True, cache_dir=LOCAL_MODEL_CACHE_DIR) qg_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer, use_fast=False) print(f"Question generation pipeline initialized successfully with model: {QG_MODEL_NAME}") except Exception as e: print(f"Error initializing question generation pipeline: {e}") print("Please ensure the model was correctly pre-downloaded during the Docker build.") qg_pipeline = None # Call initialization once at startup initialize_question_generator() # --- PDF Text Extraction --- def extract_text_from_pdf(pdf_file_path): text = "" try: with open(pdf_file_path, 'rb') as file: reader = PdfReader(file) for page in reader.pages: page_text = page.extract_text() if page_text: text += page_text + "\n" print(f"Successfully extracted text from {pdf_file_path}. Total characters: {len(text)}") return text except Exception as e: return f"Error extracting text from PDF: {e}" # --- Advanced Text Chunking --- def chunk_text_with_overlap_and_sentence_awareness(text, target_chunk_size, overlap_percentage): """ Splits text into chunks, respecting sentence boundaries and adding overlap. """ sentences = nltk.sent_tokenize(text) chunks = [] current_chunk_sentences = [] current_chunk_len = 0 overlap_chars = int(target_chunk_size * overlap_percentage) for i, sentence in enumerate(sentences): # If adding the next sentence exceeds target_chunk_size, finalize current chunk if current_chunk_len + len(sentence) + 1 > target_chunk_size and current_chunk_sentences: chunks.append(" ".join(current_chunk_sentences)) # Prepare for overlap: take sentences from the end of the current chunk overlap_buffer = [] temp_len = 0 # Iterate backwards to find sentences for overlap for j in range(len(current_chunk_sentences) - 1, -1, -1): s = current_chunk_sentences[j] if temp_len + len(s) + 1 <= overlap_chars: overlap_buffer.insert(0, s) # Add to the beginning to maintain order temp_len += len(s) + 1 else: break current_chunk_sentences = overlap_buffer # Start new chunk with overlap current_chunk_len = temp_len current_chunk_sentences.append(sentence) current_chunk_len += len(sentence) + 1 # +1 for space # Add the last chunk if it's not empty if current_chunk_sentences: chunks.append(" ".join(current_chunk_sentences)) # Fallback for very long individual sentences (shouldn't happen often with typical text) final_chunks = [] for chunk in chunks: if len(chunk) > target_chunk_size * 1.5: sub_chunks = [chunk[i:i+target_chunk_size] for i in range(0, len(chunk), target_chunk_size)] final_chunks.extend(sub_chunks) else: final_chunks.append(chunk) return final_chunks # --- Question Generation from Chunk (LLM-based) --- def generate_conceptual_question_from_chunk(text_chunk): """ Generates a conceptual question from a text chunk using the LLM. Returns a dictionary with 'question' and 'answer' (if generated by model). """ if qg_pipeline is None: return None try: prompt_text = ( f"Based on the following text, generate one clear and concise conceptual question suitable for an exam. " f"The question should be directly answerable from the text provided and should encourage understanding, " f"not just memorization. Ensure it ends with a question mark.\n\n" f"Text: {text_chunk}\n\nQuestion:" ) generated_outputs = qg_pipeline(prompt_text, max_length=150, num_return_sequences=1, do_sample=True, top_k=50, top_p=0.95) if generated_outputs and generated_outputs[0] and 'generated_text' in generated_outputs[0]: question_text = generated_outputs[0]['generated_text'].strip() if question_text.lower().startswith("question:"): question_text = question_text[len("question:"):].strip() if not question_text.endswith('?'): question_text += '?' return {'question': question_text, 'answer': None} except Exception as e: print(f"Error during conceptual question generation for chunk: {e}") return None # --- Formatting Functions --- def format_mcq(question_text, options, question_num, marks): formatted_str = f"Q no.{question_num}\n" formatted_str += f"{question_text}\n" for opt in options: # Options are assumed to be already formatted (A., B., etc.) formatted_str += f"{opt}\n" formatted_str += f"{marks} Marks\n\n" return formatted_str def format_descriptive_question(question_text, question_num_label, marks): # Use question_num_label directly (e.g., "1", "1A", "2(a)") formatted_str = f"Question No {question_num_label} ({marks} Marks)\n" formatted_str += f"{question_text}\n\n" return formatted_str # --- Main Gradio Function --- def generate_exam_paper(pdf_file, num_mcqs_requested, num_descriptive_requested, total_marks_target): if pdf_file is None: return None, "Please upload a PDF file to generate questions." pdf_path = pdf_file.name extracted_content = extract_text_from_pdf(pdf_path) if "Error" in extracted_content: return None, extracted_content if qg_pipeline is None: return None, "Question generation model not initialized. Please check console for errors." all_exam_questions_output = [] current_total_marks = 0 mcq_count = 0 descriptive_count = 0 # --- Part A: MCQs --- all_exam_questions_output.append("## Part A – MCQ’s\n\n") # Prioritize hardcoded MCQs from the combined list hardcoded_mcq_list_idx = 0 while mcq_count < num_mcqs_requested and hardcoded_mcq_list_idx < len(HARDCODED_MCQS): q_item = HARDCODED_MCQS[hardcoded_mcq_list_idx] # Check if it's a scenario with sub-questions (like Tropic Pvt Ltd or RK & Associates) if "questions" in q_item and q_item.get("scenario_text"): # Add scenario title and text once per scenario all_exam_questions_output.append(f"### {q_item['scenario_title']}\n") all_exam_questions_output.append(q_item['scenario_text'] + "\n") for sub_q_data in q_item["questions"]: if mcq_count < num_mcqs_requested: formatted_q = format_mcq(sub_q_data['text'], sub_q_data['options'], mcq_count + 1, sub_q_data['marks']) all_exam_questions_output.append(formatted_q) current_total_marks += sub_q_data['marks'] mcq_count += 1 else: break # Stop adding if requested count is met else: # This is a standalone MCQ formatted_q = format_mcq(q_item['text'], q_item['options'], mcq_count + 1, q_item['marks']) all_exam_questions_output.append(formatted_q) current_total_marks += q_item['marks'] mcq_count += 1 hardcoded_mcq_list_idx += 1 if mcq_count >= num_mcqs_requested: break # Generate additional conceptual MCQs if needed (with dummy options) text_chunks = chunk_text_with_overlap_and_sentence_awareness(extracted_content, CHUNK_SIZE_FOR_QG, OVERLAP_PERCENTAGE) chunk_idx = 0 while mcq_count < num_mcqs_requested and chunk_idx < len(text_chunks): chunk = text_chunks[chunk_idx] generated_qa = generate_conceptual_question_from_chunk(chunk) if generated_qa and generated_qa['question']: dummy_options = [ "Option A (generated by AI from PDF)", "Option B (generated by AI from PDF)", "Option C (generated by AI from PDF)", "Option D (generated by AI from PDF)" ] formatted_q = format_mcq(generated_qa['question'], dummy_options, mcq_count + 1, DEFAULT_MCQ_MARKS) all_exam_questions_output.append(f"\n**Note: This MCQ is conceptually generated from the PDF. Options are placeholders.**\n{formatted_q}") current_total_marks += DEFAULT_MCQ_MARKS mcq_count += 1 chunk_idx += 1 # --- Part B: Descriptive Questions --- all_exam_questions_output.append("\n" + "="*80 + "\n") all_exam_questions_output.append("## Part – B\n") # Using a generic instruction as the specific "Q1 is compulsory" etc. might not apply to combined set all_exam_questions_output.append("Instructions for descriptive questions are illustrative and may not apply to this combined set.\n\n") # Prioritize hardcoded descriptive problems from the combined list hardcoded_descriptive_list_idx = 0 while descriptive_count < num_descriptive_requested and hardcoded_descriptive_list_idx < len(HARDCODED_DESCRIPTIVE_PROBLEMS): problem = HARDCODED_DESCRIPTIVE_PROBLEMS[hardcoded_descriptive_list_idx] formatted_q = format_descriptive_question(problem['text'], problem['title'].replace("Question No ", ""), problem['marks']) all_exam_questions_output.append(formatted_q) current_total_marks += problem['marks'] descriptive_count += 1 hardcoded_descriptive_list_idx += 1 if descriptive_count >= num_descriptive_requested: break # Generate additional conceptual descriptive questions if needed chunk_idx = 0 while descriptive_count < num_descriptive_requested and chunk_idx < len(text_chunks): chunk = text_chunks[chunk_idx] generated_qa = generate_conceptual_question_from_chunk(chunk) if generated_qa and generated_qa['question']: formatted_q = format_descriptive_question(generated_qa['question'], f"D{descriptive_count + 1}", DEFAULT_DESCRIPTIVE_MARKS) all_exam_questions_output.append(f"\n**Note: This is a conceptual descriptive question generated from the PDF.**\n{formatted_q}") current_total_marks += DEFAULT_DESCRIPTIVE_MARKS descriptive_count += 1 chunk_idx += 1 # Final Summary (This will be part of the file content, not the status message) final_summary_for_file = f"\n--- Exam Summary ---\n" final_summary_for_file += f"Total MCQs Generated: {mcq_count}\n" final_summary_for_file += f"Total Descriptive Questions Generated: {descriptive_count}\n" final_summary_for_file += f"Approximate Total Marks: {current_total_marks} (Target: {total_marks_target})\n" final_summary_for_file += "\n**Important Notes:**\n" final_summary_for_file += "1. Complex case-scenario questions (e.g., numerical problems, those referencing specific SAs, CARO, or ethical threats) are *mimicked* from your provided samples. The AI model does NOT perform calculations, legal/auditing analysis, or generate new problems of this specific complexity from arbitrary PDF content.\n" final_summary_for_file += "2. Dynamically generated conceptual questions from your uploaded PDF will be general in nature, based on the text content, and may not be specific to advanced financial/auditing standards unless explicitly stated in the source text chunk.\n" final_summary_for_file += "3. MCQ options for dynamically generated questions are placeholders as the AI cannot reliably create plausible distractors for new content.\n" final_summary_for_file += "4. The mark tally is an approximation due to fixed marks for hardcoded questions and default marks for AI-generated ones.\n" all_exam_questions_output.append(final_summary_for_file) full_output_content = "".join(all_exam_questions_output) # Save the generated content to a temporary file temp_file_path = os.path.join(tempfile.gettempdir(), "generated_exam_questions.txt") try: with open(temp_file_path, "w", encoding="utf-8") as f: f.write(full_output_content) # Debugging prints for console (for download issues) print(f"\n--- DEBUG INFO ---") print(f"Attempted to save file to: {temp_file_path}") if os.path.exists(temp_file_path): file_size = os.path.getsize(temp_file_path) print(f"File exists. Size: {file_size} bytes.") if file_size == 0: print("Warning: Generated file is empty. Check content generation logic.") return None, "Error: Generated exam paper is empty. Please check PDF content or generation parameters." else: print(f"File content snippet (first 500 chars):\n{full_output_content[:500]}...") else: print("Error: File was not created at the specified path.") return None, "Error: Failed to create the downloadable file." print(f"Returning file path: {temp_file_path} to Gradio.") return temp_file_path, "Exam paper generated successfully! You can download it below." except Exception as e: print(f"An unexpected error occurred while saving the file: {e}") return None, f"An unexpected error occurred while saving the file: {e}" # --- Gradio Interface --- iface = gr.Interface( fn=generate_exam_paper, inputs=[ gr.File(label="Upload PDF Book"), gr.Slider(minimum=0, maximum=20, value=8, step=1, label="Number of Multiple Choice Questions (MCQs)"), gr.Slider(minimum=0, maximum=10, value=7, step=1, label="Number of Descriptive Questions"), gr.Number(label="Target Total Marks (Approximation)", value=50) ], outputs=[ gr.File(label="Download Generated Exam Paper"), gr.Textbox(label="Status/Messages", interactive=False) ], title="AI-Powered Exam Question Generator Prototype (Combined Domains)", description="Upload a PDF book. This prototype combines hardcoded questions from **both Accounting and Auditing samples** you provided. It then generates additional conceptual questions from your PDF. **Advanced chunking (sentence-aware & overlapping) is used for better context preservation.** **Important:** Complex numerical/scenario-based questions are *mimicked* from your samples; new ones are NOT generated by the AI. **If download fails, please provide the complete console output from your terminal!**" ) # Launch the Gradio app if __name__ == "__main__": # Set share=False to avoid potential hangs with public URL generation iface.launch(share=False)