In [None]:
!pip install -r requirements.txt -q
!pip install streamlit cloudflared -q
!wget -q https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64.deb
!dpkg -i cloudflared-linux-amd64.deb

!cloudflared --version


Selecting previously unselected package cloudflared.
(Reading database ... 126441 files and directories currently installed.)
Preparing to unpack cloudflared-linux-amd64.deb ...
Unpacking cloudflared (2025.9.1) ...
Setting up cloudflared (2025.9.1) ...
Processing triggers for man-db (2.10.2-1) ...
cloudflared version 2025.9.1 (built 2025-09-22-13:28 UTC)


In [None]:
import torch

if torch.cuda.is_available():
 print(f"✅ CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}")
 # return True
else:
 print("⚠️ CUDA not available. Falling back to CPU.")
 # return False


# # Load the allocator
# new_alloc = torch.cuda.memory.CUDAPluggableAllocator(
# 'alloc.so', 'my_malloc', 'my_free')
# # Swap the current allocator
# torch.cuda.memory.change_current_allocator(new_alloc)

✅ CUDA is available. Using GPU: Tesla T4


In [None]:
%%writefile Embeddings.py

import os
import glob
import pickle, json
from tqdm import tqdm
import numpy as np

# Try imports with friendly errors
try:
 import faiss
except Exception as e:
 raise ImportError("faiss is required. Install cpu version: `pip install faiss-cpu` or install via conda for GPU (faiss-gpu).") from e

try:
 from sentence_transformers import SentenceTransformer
except Exception as e:
 raise ImportError("sentence-transformers is required. `pip install sentence-transformers`") from e

from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch
from google.colab import userdata



# from Data_Cleaning import GetDataCleaning
# from Logger import GetLogger


class GetEmbeddings:
 """
 Embedding pipeline for cleaned text files.
 Generates embeddings using SentenceTransformers, builds a FAISS index,
 and allows searching queries against the vector database.
 """

 def __init__(self, config_path="config.json", logger=None):

 with open(config_path, "r") as f:
 self.config = json.load(f)

 cfg_paths = self.config["paths"]


 cfg_emb = self.config["embedding"]

 self.root = cfg_paths["root"]
 self.cleaned_suffix = "_cleaned_txt"
 self.chunk_words = cfg_emb["chunk_words"]
 self.batch_size = cfg_emb["batch_size"]
 self.faiss_index_path = cfg_paths["faiss_index"]
 self.metadata_path = cfg_paths["metadata"]
 self.embedding_model = cfg_emb["model"]

 # if not logger:
 # obj = GetLogger()
 # logger = obj.get_logger()
 # self.logger = logger
 # print("Initializing Embedding Pipeline...")

 # Device
 self.device = "cuda" if self.check_cuda() and cfg_emb["use_gpu"] else "cpu"
 self.hf_token = "your_token"

 def check_cuda(self):
 """Return True if CUDA is available and usable."""
 try:
 if torch.cuda.is_available():
 _ = torch.cuda.current_device()
 print(f"✅ CUDA available. Device: {torch.cuda.get_device_name(0)}")
 return True
 print("⚠️ CUDA not available. Using CPU.")
 return False
 except Exception as e:
 print(f"Error checking CUDA, defaulting to CPU. Error: {e}")
 return False

 def list_cleaned_files(self):
 """Return sorted list of cleaned text files under root/*{cleaned_suffix}/*.txt"""
 pattern = os.path.join(self.root, f"*{self.cleaned_suffix}", "*.txt")
 files = glob.glob(pattern)
 files.sort()
 return files

 def read_text_file(self, path):
 """Read a text file and return string content."""
 with open(path, "r", encoding="utf-8") as f:
 return f.read()

 def chunk_text_words(self, text):
 """
 Simple word-based chunking.
 Returns list of text chunks.
 """
 words = text.split()
 if not words:
 return []
 return [" ".join(words[i:i + self.chunk_words]) for i in range(0, len(words), self.chunk_words)]

 def save_index_and_metadata(self):
 """Save FAISS index and metadata to disk."""
 os.makedirs(os.path.dirname(self.faiss_index_path), exist_ok=True)
 faiss.write_index(self.index, self.faiss_index_path)
 with open(self.metadata_path, "wb") as f:
 pickle.dump(self.metadata, f)
 print(f"💾 Saved FAISS index to {self.faiss_index_path}")
 print(f"💾 Saved metadata to {self.metadata_path}")

 def load_index_and_metadata(self):
 """Load FAISS index and metadata if they exist."""
 if os.path.exists(self.faiss_index_path) and os.path.exists(self.metadata_path):
 try:
 self.index = faiss.read_index(self.faiss_index_path)
 with open(self.metadata_path, "rb") as f:
 self.metadata = pickle.load(f)
 print(f"✅ Loaded existing FAISS index + metadata from disk.")
 return True
 except Exception as e:
 print(f"⚠️ Failed to load FAISS index/metadata, will rebuild. Error: {e}")
 return False
 return False

 def load_encoder(self):
 """Loading Encoder"""
 self.encoder = SentenceTransformer(self.embedding_model, device=self.device)
 print(f"Loaded embedding model '{self.embedding_model}' on {self.device}")
 return self.encoder


 def building_embeddings_index(self, files):
 """Build embeddings for all text chunks and return FAISS index + metadata."""


 all_embeddings, metadata = [], []
 next_id = 0
 # Iterate files and chunks
 for fp in tqdm(files, desc="Files", unit="file"):
 text = self.read_text_file(fp)

 if not text.strip():
 continue

 # metadata: infer company and file from path
 # e.g., financial_reports/Infosys_cleaned_txt/Infosys_2023_AR.txt
 rel = os.path.relpath(fp, self.root)
 folder = rel.split(os.sep)[0]
 filename = os.path.basename(fp)

 chunks = self.chunk_text_words(text)
 if not chunks:
 continue

 for i in range(0, len(chunks), self.batch_size):
 batch = chunks[i:i + self.batch_size]
 embs = self.encoder.encode(batch, show_progress_bar=False, convert_to_numpy=True)
 embs = embs.astype(np.float32)

 for j, vec in enumerate(embs):
 all_embeddings.append(vec)
 metadata.append({
 "id": next_id,
 "source_folder": folder,
 "file": filename,
 "chunk_id": i + j,
 "text": batch[j] # store chunk text for retrieval
 })
 next_id += 1

 if not all_embeddings:
 raise RuntimeError("No embeddings were produced. Check cleaned files and chunking.")

 emb_matrix = np.vstack(all_embeddings).astype(np.float32)
 faiss.normalize_L2(emb_matrix)

 # Build FAISS index (IndexFlatIP over normalized vectors = cosine similarity)
 dim = emb_matrix.shape[1]
 self.index = faiss.IndexFlatIP(dim)
 self.index.add(emb_matrix)
 self.metadata = metadata
 print(f"✅ Built FAISS index with {self.index.ntotal} vectors, dim={dim}")

 return self.index, self.metadata

 def run(self):
 """Main entry: load or build embeddings + FAISS index."""
 if self.load_index_and_metadata():
 return

 files = self.list_cleaned_files()
 if not files:
 print("❌ No cleaned text files found.")
 raise SystemExit(1)
 self.load_encoder()
 self.building_embeddings_index(files)
 self.save_index_and_metadata()

 def load_summarizer(self, model_name="google/gemma-2b"):
 """
 Load summarizer LLM once.
 If already loaded, skip.
 """
 if hasattr(self, "summarizer_pipeline"):
 print("ℹ️ Summarizer already loaded, skipping reload.")
 return

 try:
 print(f"⏳ Loading summarizer model '{model_name}'...")
 self.tokenizer = AutoTokenizer.from_pretrained(model_name, token=self.hf_token)
 self.summarizer_model = AutoModelForCausalLM.from_pretrained(
 model_name,
 torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
 device_map=self.device,
 token=self.hf_token
 )
 self.summarizer_pipeline = pipeline(
 "text-generation",
 model=self.summarizer_model,
 tokenizer=self.tokenizer
 )
 print(f"✅ Summarizer model '{model_name}' loaded successfully.")

 except RuntimeError as e:
 if "CUDA out of memory" in str(e):
 print("⚠️ CUDA OOM while loading summarizer. Retrying on CPU...")
 self.device = "cpu"
 torch.cuda.empty_cache()
 return self.load_summarizer(model_name=model_name)
 else:
 print(f"❌ Failed to load summarizer: {e}")
 raise

 def summarize_chunks(self, chunks, max_content_tokens=2048, max_output_tokens=256):
 """
 Summarize list of text chunks using LLM.
 - Chunks are joined until they fit into max_context_tokens
 - Generates a concise summary.
 """

 if not hasattr(self, "summarizer_pipeline"):
 self.load_summarizer()
 print("Summarizer not initialized. Called load_summarizer(). pipeline will work with default parameters.")

 # Join chunks into one context, respecting token budget
 context = " ".join(chunks)
 input_tokens = len(self.tokenizer.encode(context))

 if input_tokens > max_content_tokens:
 # Trim to fit context window
 context = " ".join(context.split()[:max_content_tokens])
 print("⚠️ Context truncated to fit within model token limit.")

 # Build summarization prompt
 prompt = f"""
 Summarize the following financial report excerpts into a concise answer.
 Keep it factual, short, and grounded in the text.

 Excerpts:
 {context}

 Summary:
 """

 try:
 output = self.summarizer_pipeline(
 prompt,
 max_new_tokens=max_output_tokens,
 do_sample=False
 )[0]["generated_text"]

 if "Summary:" in output:
 summary = output.split("Summary:")[-1].strip()
 else:
 summary = output.strip()

 return summary

 except RuntimeError as e:
 if "CUDA out of memory" in str(e):
 print("⚠️ CUDA OOM during summarization. Retrying on CPU...")
 self.device = "cpu"
 torch.cuda.empty_cache()
 return self.summarize_chunks(chunks, max_content_tokens, max_output_tokens)
 else:
 print(f"❌ Summarizer failed: {e}. Falling back to raw chunks.")
 return " ".join(chunks[:2]) # fallback: return first 2 chunks


 def answer_query(self, query, top_k=3):
 """
 End-to-end QA:
 - Retrieve relevant chunks from FAISS
 - Summarize into a final answer.
 """
 try:
 #step 1: Retrieve
 print(f"🔍 searching vector DB for query: {query}")
 q_emb = self.encoder.encode(query, show_progress_bar=False, convert_to_numpy=True).reshape(1, -1)
 faiss.normalize_L2(q_emb)

 scores, idxs = self.index.search(q_emb, k=top_k)
 chunks = [self.metadata[idx]["text"] for idx in idxs[0]]

 # Step 2: Summarize
 summary = self.summarize_chunks(chunks)

 # Log results
 print(f"✅ Final Answer: {summary}")
 return summary

 except Exception as e:
 print(f"Error in answer_query: {e}")
 return None


# Example
# ge = GetEmbeddings()
# ge.run()
# # NEW STEP
# ge.load_summarizer("google/gemma-2b")
# answer = ge.answer_query("What are the key highlights from Q2 financial report?")
# print(answer)

Overwriting Embeddings.py


In [None]:
%%writefile Evaluator.py
import os
import json
import time
import numpy as np
from tqdm import tqdm

# from Logger import GetLogger, MetricsLogger
# from Embeddings import GetEmbeddings

# Metrics
from sklearn.metrics.pairwise import cosine_similarity
from rouge_score import rouge_scorer
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from bert_score import score as bert_score

class Evaluator:
 """
 Evaluation pipeline for financial QA Agent.
 Uses eval_dataset.json to run queries, collect answers, and compute metrics.
 """
 def __init__(self, config_path="config.json", logger=None):
 with open(config_path, "r") as f:
 self.config = json.load(f)
 self.paths = self.config["paths"]


 # if not logger:
 # obj = GetLogger()
 # logger = obj.get_logger()
 # self.logger = logger

		# # Metrics logger
 # self.metrics_logger = MetricsLogger(logger=self.logger)

 # Initialize Agent
 self.agent = GetEmbeddings(config_path=config_path, logger=None)
 self.agent.run() # Load or rebuild FAISS + embeddings
 self.agent.load_summarizer() # Load summarizer
 self.encoder = self.agent.load_encoder()

 # Load Dataset
 self.dataset = self.load_dataset()
 self.results = []
 self.failed_queries = []

 def load_dataset(self):
 path = self.paths["eval_dataset"]
 if not os.path.exists(path):
 raise FileNotFoundError(f"Dataset not found: {path}")
 with open(path, "r", encoding="utf-8") as f:
 return json.load(f)

 def measure_latency(self, func, *args, **kwargs):
 """Helper: measure time taken by a function call."""
 start = time.time()
 result = func(*args, **kwargs)
 latency = time.time() - start
 return result, latency

 def evaluate_query(self, query, reference):
 """Run one query, compare answer vs. reference, compute metrics."""
 # try:
 # Run pipeline
 system_answer, latency = self.measure_latency(self.agent.answer_query, query)

 # 1. Embedding similarity (proxy retrieval quality)
 ref_emb = self.encoder.encode([reference], convert_to_numpy=True)
 ans_emb = self.encoder.encode([system_answer], convert_to_numpy=True)
 retrieval_quality = float(cosine_similarity(ref_emb, ans_emb)[0][0])

 # 2. ROUGE-L
 scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
 rouge_score = scorer.score(reference, system_answer)['rougeL'].fmeasure

 # 3. BLEU (with smoothing for short texts)
 smoothie = SmoothingFunction().method4
 bleu = sentence_bleu([reference.split()], system_answer.split(), smoothing_function=smoothie)

 # 4. BERTScore (semantic similarity)
 P, R, F1 = bert_score([system_answer], [reference], lang="en")
 bert_f1 = float(F1.mean())

 metrics = {
 "query": query,
 "reference": reference,
 "system_answer": system_answer,
 "retrieval_quality": retrieval_quality,
 "rougeL": rouge_score,
 "bleu": bleu,
 "bertscore_f1": bert_f1,
 "latency_sec": latency
 }

 # Log into metrics logger
 # self.metrics_logger.log_query_metrics(query, metrics)

 return metrics

 # except Exception as e:
 # print(f"Error evaluating query '{query}': {e}")
 # return None


 def run(self):
 """Run evaluation on entire dataset."""
 print("Starting Evaluation...")

 for item in tqdm(self.dataset, desc="Queries"):
 query = item["query"]
 reference = item["reference"]
 result = self.evaluate_query(query, reference)
 if result:
 self.results.append(result)


 # Save result
 with open(self.paths["eval_results"], "w", encoding="utf-8") as f:
 json.dump(self.results, f, indent=2)

 if self.failed_queries:
 with open(self.paths["failed_queries"], "w", encoding="utf-8") as f:
 json.dump(self.failed_queries, f, indent=2)


 # Save metrics summary
 # summary = self.metrics_logger.save()
 summary = None
 print(f"Evaluation Complete.")
 print(f"📊 Evaluation summary: {summary}")

 return self.results, summary


if __name__ == "__main__":
 evaluator = Evaluator()
 results, summary = evaluator.run()

 print("\n=== Sample Results ===")
 print(json.dumps(results[:2], indent=2))
 print("\n=== Summary ===")
 print(json.dumps(summary, indent=2))


Writing Evaluator.py


In [None]:
%%writefile app.py
import streamlit as st
import json
import faiss
import numpy as np
import re
from Embeddings import GetEmbeddings
from Logger import GetLogger

# ================================
# Load Config
# ================================
with open("config.json", "r") as f:
 config = json.load(f)

# Initialize Logger
log_obj = GetLogger()
logger = log_obj.get_logger()

# Initialize QA Agent
@st.cache_resource
def load_agent():
 agent = GetEmbeddings(config_path="config.json", logger=logger)
 agent.run() # load or build FAISS index
 encoder = agent.load_encoder()
 agent.load_summarizer()
 return agent, encoder

agent, encoder = load_agent()

# ================================
# Streamlit UI
# ================================
st.set_page_config(page_title="Financial QA Agent", layout="wide")

# --- Header ---
st.title("💹 Financial Report QA Agent")
st.markdown(
 """
 Welcome!
 This tool lets you **query annual financial reports** (Infosys, ICICI Bank, etc.)
 and get **summarized answers** with supporting evidence from the text.
 """
)

# Sidebar - Settings
st.sidebar.header("⚙️ Settings")
top_k = st.sidebar.slider("Top K Chunks", 1, 10, 3)
max_output_tokens = st.sidebar.slider("Max Summary Tokens", 64, 512, 256)

# --- Keyword highlighting ---
def highlight_keywords(text, keywords=["risk", "revenue", "profit", "growth", "loss"]):
 pattern = re.compile(r"\b(" + "|".join(keywords) + r")\b", re.IGNORECASE)
 return pattern.sub(lambda m: f"**{m.group(0)}**", text)

# --- Session State for Query History ---
if "history" not in st.session_state:
 st.session_state["history"] = []

# --- Query input ---
query = st.text_input("🔍 Enter your question:", placeholder="e.g., What are the main risk factors in 2023?")

if st.button("Get Answer"):
 if query.strip() == "":
 st.warning("Please enter a query.")
 else:
 with st.spinner("Searching reports..."):
 try:
 # Retrieve + summarize
 answer = agent.answer_query(query, top_k=top_k)

 # --- Display final answer ---
 st.subheader("📌 Answer")
 st.success(answer)

 # --- Show supporting chunks ---
 st.subheader("📂 Supporting Chunks")
 q_emb = encoder.encode(query, convert_to_numpy=True).reshape(1, -1)
 faiss.normalize_L2(q_emb)
 scores, idxs = agent.index.search(q_emb.astype(np.float32), k=top_k)

 for score, idx in zip(scores[0], idxs[0]):
 meta = agent.metadata[idx]
 with st.expander(f"📄 {meta['file']} | Chunk {meta['chunk_id']} | Score: {score:.4f}"):
 chunk_text = highlight_keywords(meta['text'][:1000])
 st.markdown(chunk_text)

 # --- Save Query & Answer to History ---
 st.session_state["history"].append({"query": query, "answer": answer})

 # --- Log query + answer ---
 logger.info(f"User Query: {query}")
 logger.info(f"System Answer: {answer}")

 # --- Save persistent history JSON ---
 with open("ui_query_history.json", "w", encoding="utf-8") as f:
 json.dump(st.session_state["history"], f, indent=2)

 except Exception as e:
 st.error(f"Error: {e}")
 logger.error(f"Streamlit UI error: {e}")

# --- Show History in Sidebar ---
if st.session_state["history"]:
 st.sidebar.subheader("🕘 Query History")
 for item in st.session_state["history"][-5:]: # show last 5 queries
 st.sidebar.write(f"**Q:** {item['query']}")
 st.sidebar.write(f"**A:** {item['answer'][:100]}...")
 st.sidebar.markdown("---")


Overwriting app.py


In [None]:
import threading, os

# Kill anything on port 8501 (just in case)
os.system("kill -9 $(lsof -t -i:8501) 2>/dev/null")

# Run Streamlit in background
def run_app():
 os.system("streamlit run app.py --server.port 8501")

thread = threading.Thread(target=run_app)
thread.start()

# Start cloudflared tunnel
!cloudflared tunnel --url http://localhost:8501 --no-autoupdate


[90m2025-09-29T13:35:21Z[0m [32mINF[0m Thank you for trying Cloudflare Tunnel. Doing so, without a Cloudflare account, is a quick way to experiment and try it out. However, be aware that these account-less Tunnels have no uptime guarantee, are subject to the Cloudflare Online Services Terms of Use (https://www.cloudflare.com/website-terms/), and Cloudflare reserves the right to investigate your use of Tunnels for violations of such terms. If you intend to use Tunnels in production you should use a pre-created named tunnel by following: https://developers.cloudflare.com/cloudflare-one/connections/connect-apps
[90m2025-09-29T13:35:21Z[0m [32mINF[0m Requesting new quick Tunnel on trycloudflare.com...
[90m2025-09-29T13:35:25Z[0m [32mINF[0m +--------------------------------------------------------------------------------------------+
[90m2025-09-29T13:35:25Z[0m [32mINF[0m | Your quick Tunnel has been created! Visit it at (it may take some time to be reachable): |
[90m2025-0