Spaces:

shukdevdatta123
/

Python_Data_Driven_ChatBot

Running

shukdevdatta123 commited on Dec 17, 2024

Commit

69022a0

verified ·

1 Parent(s): aab2154

Upload 2 files

Files changed (2) hide show

data_summary.py ADDED Viewed

+# data_summary.py
+from llama_index.core.tools import FunctionTool
+import pandas as pd
+import os
+summary_file = os.path.join("data", "data_summary.txt")
+# Function to generate and save data summary
+def save_data_summary(df):
+    summary = df.describe().to_string()
+    if not os.path.exists(summary_file):
+        open(summary_file, "w").close()  # Create file if not exists
+    with open(summary_file, "w") as f:
+        f.write("Data Summary:\n")
+        f.write(summary)
+    return "data summary saved"
+population_path = os.path.join("data", "Population.csv")
+population_df = pd.read_csv(population_path)
+# Create FunctionTool for data summary saving
+data_summary_tool = FunctionTool.from_defaults(
+    fn=lambda: save_data_summary(population_df),  # Use the global dataframe
+    name="data_summary_saver",
+    description="This tool generates and saves a summary of the data to a file.",
+)

pdf.py ADDED Viewed

+#Now the way VectorStoreIndex works is that we're pretty much taking all our data and creating some embeddings which are
+#multidimensional objects vectors and we can very quick index and query them in this database so we that by checking the
+#similarity of intent and words
+#we turn out data into VectorStoreIndex and we can go to that index with out query and we can very quickly retrieve the
+#specific parts of this unstructured data that we're looking for to be able to answer that question
+import os
+from llama_index.core import StorageContext, VectorStoreIndex, load_index_from_storage
+from llama_index.readers.file.docs.base import PDFReader
+def get_index(data, index_name):
+    index = None
+    if not os.path.exists(index_name):
+        print("building index", index_name)
+        index = VectorStoreIndex.from_documents(data, show_progress=True)
+        index.storage_context.persist(persist_dir=index_name) # save the new created index in a folder
+    else:
+        index = load_index_from_storage(
+            StorageContext.from_defaults(persist_dir=index_name)
+        )
+    return index
+pdf_path = os.path.join("data", "Bangladesh.pdf")
+bangladesh_pdf = PDFReader().load_data(file=pdf_path)
+bangladesh_index = get_index(bangladesh_pdf, "bangladesh")
+bangladesh_engine = bangladesh_index.as_query_engine()