AmelC commited on
Commit
ddad505
·
verified ·
1 Parent(s): 707f8ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -529
app.py CHANGED
@@ -1,532 +1,3 @@
1
- '''
2
-
3
- import gradio as gr
4
- import os
5
- import re
6
- import json
7
- import torch
8
- import numpy as np
9
- import logging
10
- from typing import Dict, List, Tuple, Optional
11
- from tqdm import tqdm
12
- from pydantic import BaseModel
13
- import pprint
14
- from transformers import (
15
- AutoTokenizer,
16
- AutoModelForSeq2SeqLM,
17
- AutoModelForQuestionAnswering,
18
- pipeline,
19
- LogitsProcessor,
20
- LogitsProcessorList,
21
- PreTrainedModel,
22
- PreTrainedTokenizer
23
- )
24
- from sentence_transformers import SentenceTransformer, CrossEncoder
25
- from sklearn.feature_extraction.text import TfidfVectorizer
26
- from rank_bm25 import BM25Okapi
27
- import PyPDF2
28
- from sklearn.cluster import KMeans
29
- import spacy
30
-
31
- logging.basicConfig(
32
- level=logging.INFO,
33
- format="%(asctime)s [%(levelname)s] %(message)s"
34
- )
35
-
36
- print('====================== VERSION 6 (Force Use Of GPU)======================')
37
-
38
-
39
- class ConfidenceCalibrator(LogitsProcessor):
40
- """Calibrates model confidence scores during generation"""
41
- def __init__(self, calibration_factor: float = 0.9):
42
- self.calibration_factor = calibration_factor
43
-
44
- def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
45
- # Apply temperature scaling to smooth probability distribution
46
- scores = scores / self.calibration_factor
47
- return scores
48
-
49
-
50
- class DocumentResult(BaseModel):
51
- """Structured output format for consistent results"""
52
- content: str
53
- confidence: float
54
- source_page: int
55
- supporting_evidence: List[str]
56
-
57
-
58
- class OptimalModelSelector:
59
- """Dynamically selects best performing model for each task"""
60
- def __init__(self):
61
- self.qa_models = {
62
- "deberta-v3": ("deepset/deberta-v3-large-squad2", 0.87),
63
- "minilm": ("deepset/minilm-uncased-squad2", 0.84),
64
- "roberta": ("deepset/roberta-base-squad2", 0.82)
65
- }
66
- self.summarization_models = {
67
- "bart": ("facebook/bart-large-cnn", 0.85),
68
- "pegasus": ("google/pegasus-xsum", 0.83)
69
- }
70
- self.current_models = {}
71
-
72
- def get_best_model(self, task_type: str) -> Tuple[PreTrainedModel, PreTrainedTokenizer, float]:
73
- """Returns model with highest validation score for given task"""
74
- model_map = self.qa_models if "qa" in task_type else self.summarization_models
75
- best_model_name, best_score = max(model_map.items(), key=lambda x: x[1][1])
76
-
77
- if best_model_name not in self.current_models:
78
- logging.info(f"Loading {best_model_name} for {task_type}")
79
- tokenizer = AutoTokenizer.from_pretrained(model_map[best_model_name][0])
80
- model = (AutoModelForQuestionAnswering if "qa" in task_type
81
- else AutoModelForSeq2SeqLM).from_pretrained(model_map[best_model_name][0])
82
-
83
- # Set model to high precision mode for stable confidence scores
84
- model = model.eval().half().to('cuda' if torch.cuda.is_available() else 'cpu')
85
- self.current_models[best_model_name] = (model, tokenizer)
86
-
87
- return *self.current_models[best_model_name], best_score
88
-
89
-
90
- class PDFAugmentedRetriever:
91
- """Enhanced context retrieval with hybrid search"""
92
- def __init__(self, document_texts: List[str]):
93
- self.documents = [(i, text) for i, text in enumerate(document_texts)]
94
- self.bm25 = BM25Okapi([text.split() for _, text in self.documents])
95
- self.encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
96
- self.tfidf = TfidfVectorizer(stop_words='english').fit([text for _, text in self.documents])
97
-
98
- def retrieve(self, query: str, top_k: int = 5) -> List[Tuple[int, str, float]]:
99
- """Hybrid retrieval combining lexical and semantic search"""
100
- # BM25 (lexical search)
101
- bm25_scores = self.bm25.get_scores(query.split())
102
-
103
- # Semantic similarity
104
- semantic_scores = self.encoder.predict([(query, doc) for _, doc in self.documents])
105
-
106
- # Combine scores with learned weights (from validation)
107
- combined_scores = 0.4 * bm25_scores + 0.6 * np.array(semantic_scores)
108
-
109
- # Get top passages
110
- top_indices = np.argsort(combined_scores)[-top_k:][::-1]
111
- return [(self.documents[i][0], self.documents[i][1], float(combined_scores[i]))
112
- for i in top_indices]
113
-
114
-
115
- class DetailedExplainer:
116
- """
117
- Extracts key concepts from a text and explains each in depth.
118
- """
119
- def __init__(self,
120
- explanation_model: str = "google/flan-t5-large",
121
- device: int = 0):
122
- # generation pipeline for deep explanations
123
- self.explainer = pipeline(
124
- "text2text-generation",
125
- model=explanation_model,
126
- tokenizer=explanation_model,
127
- device=device
128
- )
129
- # spaCy model for concept extraction
130
- self.nlp = spacy.load("en_core_web_sm")
131
-
132
- def extract_concepts(self, text: str) -> list:
133
- """
134
- Use noun chunks and named entities to identify concepts.
135
- Returns a list of unique concept strings.
136
- """
137
- doc = self.nlp(text)
138
- concepts = set()
139
- for chunk in doc.noun_chunks:
140
- if len(chunk) > 1 and not chunk.root.is_stop:
141
- concepts.add(chunk.text.strip())
142
- for ent in doc.ents:
143
- if ent.label_ in ["PERSON", "ORG", "GPE", "NORP", "EVENT", "WORK_OF_ART"]:
144
- concepts.add(ent.text.strip())
145
- return list(concepts)
146
-
147
- # The min_accurancy parameter ensures that the explanation is sufficiently accurate
148
- # by calibrating the prompt to require a minimum level of detail.
149
- # This is useful for complex concepts where a simple explanation may not suffice.
150
- #min_accuracy = 0.7 # Default minimum accuracy threshold
151
- def explain_concept(self, concept: str, context: str, min_accuracy: float = 0.50) -> str:
152
- """
153
- Generate an explanation for a single concept using context.
154
- Ensures at least `min_accuracy` via introspective prompt calibration.
155
- """
156
- prompt = (
157
- f"Explain the concept '{concept}' in depth using the following context. "
158
- f"Aim for at least {int(min_accuracy * 100)}% accuracy."
159
- f"\nContext:\n{context}\n"
160
- )
161
- result = self.explainer(
162
- prompt,
163
- max_length=200,
164
- min_length=80,
165
- do_sample=False
166
- )
167
- return result[0]["generated_text"].strip()
168
-
169
- def explain_text(self, text: str, context: str) -> dict:
170
- """
171
- For each concept in text, produce a detailed explanation.
172
- Returns:
173
- {
174
- 'concepts': [list of extracted concepts],
175
- 'explanations': {concept: explanation, ...}
176
- }
177
- """
178
- concepts = self.extract_concepts(text)
179
- explanations = {}
180
- for concept in concepts:
181
- explanations[concept] = self.explain_concept(concept, context)
182
- return {"concepts": concepts, "explanations": explanations}
183
-
184
-
185
- class AdvancedPDFAnalyzer:
186
- """
187
- High-precision PDF analysis engine with confidence calibration
188
- Confidence scores are empirically validated to reach 0.9+ on benchmark datasets
189
- """
190
- def __init__(self):
191
- """Initialize with optimized model selection and retrieval"""
192
- self.logger = logging.getLogger("PDFAnalyzer")
193
- self.model_selector = OptimalModelSelector()
194
- self._verify_dependencies()
195
-
196
- # Force use of GPU if available
197
- self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
198
- if torch.cuda.is_available():
199
- print("[INFO] Using GPU for inference.")
200
- else:
201
- print("[INFO] Using CPU for inference.")
202
-
203
- # Initialize with highest confidence models
204
- self.qa_model, self.qa_tokenizer, _ = self.model_selector.get_best_model("qa")
205
- self.qa_model = self.qa_model.to(self.device)
206
-
207
- self.summarizer = pipeline(
208
- "summarization",
209
- model="facebook/bart-large-cnn",
210
- device=0 if torch.cuda.is_available() else -1,
211
- framework="pt"
212
- )
213
-
214
- # Confidence calibration setup
215
- self.logits_processor = LogitsProcessorList([
216
- ConfidenceCalibrator(calibration_factor=0.85)
217
- ])
218
-
219
- # Initialize the detailed explainer here
220
- self.detailed_explainer = DetailedExplainer(
221
- device=0 if torch.cuda.is_available() else -1
222
- )
223
-
224
- def _verify_dependencies(self):
225
- """Check for critical dependencies"""
226
- try:
227
- PyPDF2.PdfReader
228
- except ImportError:
229
- raise ImportError("PyPDF2 required: pip install pypdf2")
230
-
231
- def extract_text_with_metadata(self, file_path: str) -> List[Dict]:
232
- """Extract text with page-level metadata and structural info"""
233
- self.logger.info(f"Processing {file_path}")
234
- documents = []
235
-
236
- with open(file_path, 'rb') as f:
237
- reader = PyPDF2.PdfReader(f)
238
-
239
- for i, page in enumerate(tqdm(reader.pages)):
240
- try:
241
- text = page.extract_text()
242
- if not text or not text.strip():
243
- continue
244
-
245
- # Add document context
246
- page_number = i + 1
247
- metadata = {
248
- 'source': os.path.basename(file_path),
249
- 'page': page_number,
250
- 'char_count': len(text),
251
- 'word_count': len(text.split()),
252
- }
253
- documents.append({
254
- 'content': self._clean_text(text),
255
- 'metadata': metadata
256
- })
257
- except Exception as e:
258
- self.logger.warning(f"Page {i + 1} error: {str(e)}")
259
-
260
- if not documents:
261
- raise ValueError("No extractable content found in PDF")
262
-
263
- return documents
264
-
265
- def _clean_text(self, text: str) -> str:
266
- """Advanced text normalization with document structure preservation"""
267
- text = re.sub(r'[\x00-\x1F\x7F-\x9F]', ' ', text) # Remove control chars
268
- text = re.sub(r'\s+', ' ', text) # Standardize whitespace
269
- text = re.sub(r'(\w)-\s+(\w)', r'\1\2', text) # Fix hyphenated words
270
- return text.strip()
271
-
272
- def analyze_document(self, file_path: str) -> Dict:
273
- """Full document analysis pipeline with confidence scoring"""
274
- documents = self.extract_text_with_metadata(file_path)
275
- text_chunks = [doc['content'] for doc in documents]
276
-
277
- # Initialize retriever with document chunks
278
- retriever = PDFAugmentedRetriever(text_chunks)
279
-
280
- # Generate summary with confidence
281
- summary = self._generate_summary_with_confidence(
282
- "\n".join(text_chunks),
283
- retriever
284
- )
285
-
286
- return {
287
- 'document_metadata': [doc['metadata'] for doc in documents],
288
- 'summary': summary,
289
- 'avg_confidence': np.mean([s.confidence for s in summary])
290
- }
291
-
292
- def _generate_summary_with_confidence(self, text: str, retriever: PDFAugmentedRetriever) -> List[DocumentResult]:
293
- """Generates summary with calibrated confidence scores"""
294
- sentences = [s.strip() for s in text.split('. ') if len(s.split()) > 6]
295
- if not sentences:
296
- return []
297
-
298
- # Cluster sentences into topics
299
- vectorizer = TfidfVectorizer(max_features=500)
300
- X = vectorizer.fit_transform(sentences)
301
-
302
- # Select most representative sentence per topic
303
- summary_sentences = []
304
- for cluster in self._cluster_text(X, n_clusters=min(5, len(sentences))):
305
- cluster_sents = [sentences[i] for i in cluster]
306
- sentence_scores = self._cross_validate_sentences(cluster_sents)
307
- best_sentence = max(zip(cluster_sents, sentence_scores), key=lambda x: x[1])
308
- summary_sentences.append(best_sentence)
309
-
310
- # Format with confidence
311
- return [
312
- DocumentResult(
313
- content=sent,
314
- confidence=min(0.95, score * 1.1), # Calibrated boost
315
- source_page=0,
316
- supporting_evidence=self._find_supporting_evidence(sent, retriever)
317
- )
318
- for sent, score in summary_sentences
319
- ]
320
-
321
- def answer_question(self, question: str, documents: List[Dict]) -> Dict:
322
- """High-confidence QA with evidence retrieval and detailed explanations"""
323
- # Create searchable index
324
- retriever = PDFAugmentedRetriever([doc['content'] for doc in documents])
325
-
326
- # Retrieve relevant context
327
- relevant_contexts = retriever.retrieve(question, top_k=3)
328
-
329
- answers = []
330
- for page_idx, context, similarity_score in relevant_contexts:
331
- # Prepare QA inputs dynamically
332
- inputs = self.qa_tokenizer(
333
- question,
334
- context,
335
- add_special_tokens=True,
336
- return_tensors="pt",
337
- max_length=512,
338
- truncation="only_second"
339
- )
340
- # Move inputs to the correct device
341
- inputs = {k: v.to(self.device) for k, v in inputs.items()}
342
-
343
- # Get model output with calibration
344
- with torch.no_grad():
345
- outputs = self.qa_model(**inputs)
346
- start_logits = outputs.start_logits
347
- end_logits = outputs.end_logits
348
-
349
- # Apply confidence calibration
350
- logits_processor = LogitsProcessorList([ConfidenceCalibrator()])
351
- start_logits = logits_processor(inputs['input_ids'], start_logits)
352
- end_logits = logits_processor(inputs['input_ids'], end_logits)
353
-
354
- start_prob = torch.nn.functional.softmax(start_logits, dim=-1)
355
- end_prob = torch.nn.functional.softmax(end_logits, dim=-1)
356
-
357
- # Get best answer span
358
- max_start_score, max_start_idx = torch.max(start_prob, dim=-1)
359
- max_start_idx_int = max_start_idx.item()
360
- max_end_score, max_end_idx = torch.max(end_prob[0, max_start_idx_int:], dim=-1)
361
- max_end_idx_int = max_end_idx.item() + max_start_idx_int
362
-
363
- confidence = float((max_start_score * max_end_score) * 0.9 * similarity_score)
364
-
365
- answer_tokens = inputs["input_ids"][0][max_start_idx_int:max_end_idx_int + 1]
366
- answer = self.qa_tokenizer.decode(answer_tokens, skip_special_tokens=True)
367
-
368
- # Generate detailed explanations for concepts in answer
369
- explanations_result = self.detailed_explainer.explain_text(answer, context)
370
-
371
- answers.append({
372
- "answer": answer,
373
- "confidence": confidence,
374
- "context": context,
375
- "page_number": documents[page_idx]['metadata']['page'],
376
- "explanations": explanations_result # contains 'concepts' and 'explanations'
377
- })
378
-
379
- # Select best answer with confidence validation
380
- if not answers:
381
- return {"answer": "No confident answer found", "confidence": 0.0, "explanations": {}}
382
-
383
- best_answer = max(answers, key=lambda x: x['confidence'])
384
-
385
- # Enforce minimum confidence threshold
386
- if best_answer['confidence'] < 0.85:
387
- best_answer['answer'] = f"[Low Confidence] {best_answer['answer']}"
388
-
389
- return best_answer
390
-
391
- def _cluster_text(self, X, n_clusters=5):
392
- """
393
- Cluster sentences using KMeans and return indices for each cluster.
394
- Returns a list of lists, where each sublist contains indices of sentences in that cluster.
395
- """
396
- if X.shape[0] < n_clusters:
397
- # Not enough sentences to cluster, return each as its own cluster
398
- return [[i] for i in range(X.shape[0])]
399
- kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
400
- labels = kmeans.fit_predict(X)
401
- clusters = [[] for _ in range(n_clusters)]
402
- for idx, label in enumerate(labels):
403
- clusters[label].append(idx)
404
- return clusters
405
-
406
- def _cross_validate_sentences(self, sentences: List[str]) -> List[float]:
407
- """
408
- Assigns a relevance/confidence score to each sentence in the cluster.
409
- Here, we use the average TF-IDF score as a proxy for importance.
410
- """
411
- if not sentences:
412
- return []
413
- vectorizer = TfidfVectorizer(stop_words='english')
414
- tfidf_matrix = vectorizer.fit_transform(sentences)
415
- # Score: sum of TF-IDF weights for each sentence
416
- scores = tfidf_matrix.sum(axis=1)
417
- # Flatten to 1D list of floats
418
- return [float(s) for s in scores]
419
-
420
- def _find_supporting_evidence(self, sentence: str, retriever, top_k: int = 2) -> List[str]:
421
- """
422
- Finds supporting evidence for a summary sentence using the retriever.
423
- Returns a list of the most relevant document passages.
424
- """
425
- results = retriever.retrieve(sentence, top_k=top_k)
426
- return [context for _, context, _ in results]
427
-
428
-
429
- if __name__ == "__main__":
430
- analyzer = AdvancedPDFAnalyzer()
431
- file_path = input("Enter PDF file path (default: example.pdf): ").strip() or "example.pdf"
432
- documents = analyzer.extract_text_with_metadata(file_path)
433
-
434
- print("\nYou can now ask questions about the document. Type 'exit' to stop.")
435
- while True:
436
- user_question = input("\nAsk a question (or type 'exit'): ").strip()
437
- if user_question.lower() in ["exit", "quit"]:
438
- break
439
- qa_result = analyzer.answer_question(user_question, documents)
440
- print(f"AI Answer: {qa_result['answer']} (Confidence: {qa_result['confidence']:.2f})")
441
- ## Check confidence level
442
- if qa_result['confidence'] >= 0.85:
443
- print("\n[Info] High confidence in answer, you can trust the response.")
444
- pprint.pprint(qa_result)
445
- print("\nConcepts explained in detail:")
446
- if 'explanations' in qa_result and qa_result['explanations']:
447
- for concept in qa_result['explanations']['concepts']:
448
- explanation = qa_result['explanations']['explanations'].get(concept, "")
449
- print(f"\n>> {concept}:\n{explanation}\n")
450
- if qa_result['confidence'] < 0.7 and qa_result['confidence'] >= 0.60:
451
- # Print warning for confidence below 0.7
452
- print(f"\n[Warning] Confidence below 0.7 , confidence {qa_result['confidence']}, Use the Quandans AI responses for reference only and confirm with the document. \n")
453
- pprint(qa_result) #Print the full QA result for debugging
454
- print("\nConcepts explained in detail:")
455
- if 'explanations' in qa_result and qa_result['explanations']:
456
- for concept in qa_result['explanations']['concepts']:
457
- explanation = qa_result['explanations']['explanations'].get(concept, "")
458
- print(f"\n>> {concept}:\n{explanation}\n")
459
-
460
- if qa_result['confidence'] < 0.60:
461
- print(f"[Warning] Low confidence in answer confidence:{qa_result['confidence']} . Consider rephrasing your question or checking the document.")
462
- # Print detailed explanations for each concept
463
- '''
464
- if 'explanations' in qa_result and qa_result['explanations']:
465
- print("\nConcepts explained in detail:")
466
- for concept in qa_result['explanations']['concepts']:
467
- explanation = qa_result['explanations']['explanations'].get(concept, "")
468
- print(f"\n>> {concept}:\n{explanation}")
469
- '''
470
-
471
- # Now the model asks the user questions
472
- print("\nNow the model will ask you questions about the document. Type 'exit' to stop.")
473
- # Generate questions from the document (use summary sentences as questions)
474
- summary = analyzer._generate_summary_with_confidence(
475
- "\n".join([doc['content'] for doc in documents]),
476
- PDFAugmentedRetriever([doc['content'] for doc in documents])
477
- )
478
- for i, doc_result in enumerate(summary):
479
- question = f"What is the meaning of: '{doc_result.content}'?"
480
- print(f"\nQuestion {i + 1}: {question}")
481
- user_answer = input("Your answer: ").strip()
482
- if user_answer.lower() in ["exit", "quit"]:
483
- break
484
- # Use sentence transformer for similarity
485
- try:
486
- model = SentenceTransformer('all-MiniLM-L6-v2')
487
- correct = doc_result.content
488
- emb_user = model.encode([user_answer])[0]
489
- emb_correct = model.encode([correct])[0]
490
- similarity = np.dot(emb_user, emb_correct) / (np.linalg.norm(emb_user) * np.linalg.norm(emb_correct))
491
- print(f"Your answer similarity score: {similarity:.2f}")
492
- except Exception as e:
493
- print(f"Could not evaluate answer similarity: {e}")
494
-
495
- print("Session ended.")
496
-
497
-
498
- # Initialize analyzer once
499
- analyzer = AdvancedPDFAnalyzer()
500
- documents = analyzer.extract_text_with_metadata("example.pdf") # Change path if needed
501
-
502
- def ask_question_gradio(question: str):
503
- if not question.strip():
504
- return "Please enter a valid question."
505
- try:
506
- result = analyzer.answer_question(question, documents)
507
- answer = result['answer']
508
- confidence = result['confidence']
509
- explanation = "\n\n".join(
510
- f"🔹 {concept}: {desc}"
511
- for concept, desc in result.get("explanations", {}).get("explanations", {}).items()
512
- )
513
- return f"📌 **Answer**: {answer}\n\n🔒 **Confidence**: {confidence:.2f}\n\n📘 **Explanations**:\n{explanation}"
514
- except Exception as e:
515
- return f"❌ Error: {str(e)}"
516
-
517
- # Gradio Interface
518
- demo = gr.Interface(
519
- fn=ask_question_gradio,
520
- inputs=gr.Textbox(label="Ask a question about the PDF"),
521
- outputs=gr.Markdown(label="Answer"),
522
- title="Quandans AI - Ask Questions",
523
- description="Enter a question based on the loaded PDF document. The system will provide an answer with confidence and concept explanations."
524
- )
525
-
526
- demo.launch()
527
-
528
- '''
529
-
530
  import os
531
  import re
532
  import json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import re
3
  import json