Spaces:

random2222
/

trykro

Sleeping

App Files Files Community

random2222 commited on Apr 11

Commit

0946a91

verified ·

1 Parent(s): 5326991

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -54

app.py CHANGED Viewed

@@ -8,95 +8,97 @@ from langchain.chains import RetrievalQA
 from langchain_community.llms import HuggingFacePipeline
 from transformers import pipeline, AutoTokenizer
-def load_documents(file_path="study_materials"):
     documents = []
-    for filename in os.listdir(file_path):
-        path = os.path.join(file_path, filename)
-        if filename.endswith(".pdf"):
             loader = PyMuPDFLoader(path)
             documents.extend(loader.load())
-        elif filename.endswith(".txt"):
             loader = TextLoader(path)
             documents.extend(loader.load())
     return documents
-def create_qa_system():
     try:
-        # Load and process documents
-        documents = load_documents()
-        if not documents:
-            raise ValueError("❗ No documents found in 'study_materials' folder")
-        # Document processing
-        text_splitter = CharacterTextSplitter(
-            chunk_size=800,
-            chunk_overlap=100,
             separator="\n\n"
         )
-        texts = text_splitter.split_documents(documents)
-        # Local embeddings
         embeddings = HuggingFaceEmbeddings(
             model_name="sentence-transformers/all-MiniLM-L6-v2"
         )
-        # Create vector store
-        db = FAISS.from_documents(texts, embeddings)
-        # Configure local LLM
-        tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
-        local_pipe = pipeline(
-            "text2text-generation",
-            model="google/flan-t5-base",
             tokenizer=tokenizer,
-            max_length=400,  # Increased response length
             temperature=0.4,
             device=-1  # Force CPU
         )
-        # LangChain integration
-        llm = HuggingFacePipeline(pipeline=local_pipe)
         return RetrievalQA.from_chain_type(
             llm=llm,
             chain_type="stuff",
-            retriever=db.as_retriever(search_kwargs={"k": 3}),
             return_source_documents=True
         )
-    except Exception as e:
-        raise gr.Error(f"Setup Error: {str(e)}")
-# Initialize system
 try:
-    qa = create_qa_system()
-except Exception as e:
-    print(f"Startup Failed: {str(e)}")
     raise
-def ask_question(question, history):
     try:
-        result = qa({"query": question})
-        answer = result["result"]
-        # Enforce minimum answer length
-        min_words = 75
-        if len(answer.split()) < min_words:
-            answer += f"\n\n[Note: This answer is shorter than {min_words} words. Consider rephrasing your question for more details.]"
-        # Show sources
-        sources = list({doc.metadata['source'] for doc in result['source_documents']})
-        return f"{answer}\n\n📚 Sources: {', '.join(sources)}"
-    except Exception as e:
-        return f"Error: {str(e)[:150]}"
-# Launch interface
 gr.ChatInterface(
-    ask_question,
-    title="Local Study Assistant",
-    description="100% local AI - No APIs required! Upload PDF/TXT files in 'study_materials' folder",
     examples=[
-        "Explain the key concepts from Chapter 4 in detail",
-        "What are the three main points made in section 2.3?",
-        "Compare and contrast the theories presented in pages 50-60"
     ]
 ).launch()

 from langchain_community.llms import HuggingFacePipeline
 from transformers import pipeline, AutoTokenizer
+# Configuration
+DOCS_FOLDER = "study_materials"
+CHUNK_SIZE = 1000
+CHUNK_OVERLAP = 150
+MODEL_NAME = "google/flan-t5-base"
+def get_documents():
+    """Load and process documents without external dependencies"""
     documents = []
+    for file in os.listdir(DOCS_FOLDER):
+        path = os.path.join(DOCS_FOLDER, file)
+        if file.endswith(".pdf"):
             loader = PyMuPDFLoader(path)
             documents.extend(loader.load())
+        elif file.endswith(".txt"):
             loader = TextLoader(path)
             documents.extend(loader.load())
     return documents
+def initialize_system():
     try:
+        # 1. Document Processing
+        docs = get_documents()
+        if not docs:
+            raise RuntimeError(f"⚠️ No documents found in {DOCS_FOLDER} folder")
+        # 2. Text Chunking
+        splitter = CharacterTextSplitter(
+            chunk_size=CHUNK_SIZE,
+            chunk_overlap=CHUNK_OVERLAP,
             separator="\n\n"
         )
+        chunks = splitter.split_documents(docs)
+        # 3. Local Embeddings
         embeddings = HuggingFaceEmbeddings(
             model_name="sentence-transformers/all-MiniLM-L6-v2"
         )
+        # 4. Vector Store
+        vector_db = FAISS.from_documents(chunks, embeddings)
+        # 5. Local LLM Setup
+        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+        text_gen = pipeline(
+            task="text2text-generation",
+            model=MODEL_NAME,
             tokenizer=tokenizer,
+            max_length=500,
             temperature=0.4,
             device=-1  # Force CPU
         )
+        # 6. LangChain Integration
+        llm = HuggingFacePipeline(pipeline=text_gen)
         return RetrievalQA.from_chain_type(
             llm=llm,
             chain_type="stuff",
+            retriever=vector_db.as_retriever(search_kwargs={"k": 3}),
             return_source_documents=True
         )
+    except Exception as error:
+        raise RuntimeError(f"Initialization failed: {str(error)}")
+# Initialize QA system
 try:
+    qa_system = initialize_system()
+except Exception as error:
+    print(f"Fatal Error: {str(error)}")
     raise
+def handle_query(query, history):
+    """Process user queries with enhanced error handling"""
     try:
+        result = qa_system.invoke({"query": query})
+        response = result["result"]
+        sources = {doc.metadata['source'] for doc in result['source_documents']}
+        return f"{response}\n\nSources: {', '.join(sources)}"
+    except Exception as error:
+        print(f"Query Error: {str(error)}")
+        return "Error processing request. Please check document formatting."
+# Create interface
 gr.ChatInterface(
+    fn=handle_query,
+    title="Local Document AI",
+    description="Upload PDF/TXT files to 'study_materials' folder and ask questions",
     examples=[
+        "Summarize the main points from chapter 3",
+        "Explain the key concepts in section 2.1",
+        "What are the advantages discussed on page 4?"
     ]
 ).launch()