Spaces:

masadonline
/

RAG-PDF

Sleeping

App Files Files Community

masadonline commited on May 18

Commit

3cdab77

verified ·

1 Parent(s): d899598

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -6

app.py CHANGED Viewed

@@ -44,7 +44,7 @@ def load_json_orders(json_file):
         if isinstance(data, list):
             for i, order in enumerate(data):
                 try:
-                    json.dumps(order)  # attempt serialization
                     valid_orders.append(order)
                 except Exception as e:
                     st.warning(f"⚠️ Skipping invalid order at index {i}: {e}")
@@ -59,7 +59,6 @@ def load_json_orders(json_file):
         st.error(f"❌ Error parsing JSON file: {e}")
     return valid_orders
 def build_index(text_chunks):
     vectors = embedder.encode(text_chunks)
     index = faiss.IndexFlatL2(vectors.shape[1])
@@ -67,11 +66,20 @@ def build_index(text_chunks):
     return index, text_chunks
 def ask_llm(context, query):
-    prompt = f"You are a helpful assistant for an online toy shop.\n\nKnowledge base:\n{context}\n\nQuestion: {query}"
     response = client.chat.completions.create(
         model=LLM_MODEL,
         messages=[{"role": "user", "content": prompt}]
     )
     return response.choices[0].message.content.strip()
 # --- File upload section ---
@@ -90,21 +98,24 @@ if orders_file:
     if orders:
         order_chunks = [json.dumps(order, ensure_ascii=False) for order in orders]
         st.success(f"✅ Loaded {len(order_chunks)} customer order records.")
-        # Try to flatten for DataFrame view
         try:
             df = pd.json_normalize(orders)
             st.dataframe(df, use_container_width=True)
         except Exception:
             st.warning("⚠️ Nested JSON detected. Showing raw JSON preview instead.")
             st.json(orders)
 # --- Process PDFs ---
 if pdf_files:
     for pdf_file in pdf_files:
         try:
             text = extract_pdf_text(pdf_file)
-            pdf_chunks.extend(text.split("\n\n"))  # paragraph-wise
             st.success(f"📄 Processed {pdf_file.name}")
         except Exception as e:
             st.error(f"❌ Failed to read {pdf_file.name}: {e}")
@@ -121,12 +132,15 @@ if combined_chunks:
     if user_query:
         query_vector = embedder.encode([user_query])
         D, I = index.search(query_vector, k=5)
         context = "\n---\n".join([sources[i] for i in I[0]])
         with st.spinner("🤔 Thinking..."):
             try:
                 answer = ask_llm(context, user_query)
                 st.markdown("### 🧠 Answer")
                 st.write(answer)
             except Exception as e:
                 st.error(f"❌ GROQ API Error: {e}")

         if isinstance(data, list):
             for i, order in enumerate(data):
                 try:
+                    json.dumps(order)  # test serialization
                     valid_orders.append(order)
                 except Exception as e:
                     st.warning(f"⚠️ Skipping invalid order at index {i}: {e}")
         st.error(f"❌ Error parsing JSON file: {e}")
     return valid_orders
 def build_index(text_chunks):
     vectors = embedder.encode(text_chunks)
     index = faiss.IndexFlatL2(vectors.shape[1])
     return index, text_chunks
 def ask_llm(context, query):
+    prompt = (
+        f"You are a helpful assistant for an online toy shop.\n\n"
+        f"Knowledge base:\n{context}\n\n"
+        f"Question: {query}"
+    )
+    # For debugging: show the prompt being sent.
+    st.expander("Prompt to LLM").code(prompt)
     response = client.chat.completions.create(
         model=LLM_MODEL,
         messages=[{"role": "user", "content": prompt}]
     )
+    # Log full response for inspection (can be commented out in production)
+    st.expander("Raw LLM API Response").json(response)
     return response.choices[0].message.content.strip()
 # --- File upload section ---
     if orders:
         order_chunks = [json.dumps(order, ensure_ascii=False) for order in orders]
         st.success(f"✅ Loaded {len(order_chunks)} customer order records.")
+        # Attempt to flatten for viewing
         try:
             df = pd.json_normalize(orders)
             st.dataframe(df, use_container_width=True)
         except Exception:
             st.warning("⚠️ Nested JSON detected. Showing raw JSON preview instead.")
             st.json(orders)
+    else:
+        st.error("No valid orders found in the JSON file.")
 # --- Process PDFs ---
 if pdf_files:
     for pdf_file in pdf_files:
         try:
             text = extract_pdf_text(pdf_file)
+            # Split into paragraphs (non-empty lines)
+            paragraphs = [p.strip() for p in text.split("\n\n") if p.strip()]
+            pdf_chunks.extend(paragraphs)
             st.success(f"📄 Processed {pdf_file.name}")
         except Exception as e:
             st.error(f"❌ Failed to read {pdf_file.name}: {e}")
     if user_query:
         query_vector = embedder.encode([user_query])
         D, I = index.search(query_vector, k=5)
+        # Prepare context from the top-K results:
         context = "\n---\n".join([sources[i] for i in I[0]])
+        st.expander("Combined Context").code(context)
         with st.spinner("🤔 Thinking..."):
             try:
                 answer = ask_llm(context, user_query)
                 st.markdown("### 🧠 Answer")
+                # Use st.write() to render the answer as text.
                 st.write(answer)
             except Exception as e:
                 st.error(f"❌ GROQ API Error: {e}")