shukdevdatta123 commited on
Commit
69022a0
·
verified ·
1 Parent(s): aab2154

Upload 2 files

Browse files
Files changed (2) hide show
  1. data_summary.py +31 -0
  2. pdf.py +29 -0
data_summary.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # data_summary.py
2
+
3
+ from llama_index.core.tools import FunctionTool
4
+
5
+ import pandas as pd
6
+ import os
7
+
8
+ summary_file = os.path.join("data", "data_summary.txt")
9
+
10
+ # Function to generate and save data summary
11
+ def save_data_summary(df):
12
+ summary = df.describe().to_string()
13
+ if not os.path.exists(summary_file):
14
+ open(summary_file, "w").close() # Create file if not exists
15
+
16
+ with open(summary_file, "w") as f:
17
+ f.write("Data Summary:\n")
18
+ f.write(summary)
19
+
20
+ return "data summary saved"
21
+
22
+ population_path = os.path.join("data", "Population.csv")
23
+
24
+ population_df = pd.read_csv(population_path)
25
+
26
+ # Create FunctionTool for data summary saving
27
+ data_summary_tool = FunctionTool.from_defaults(
28
+ fn=lambda: save_data_summary(population_df), # Use the global dataframe
29
+ name="data_summary_saver",
30
+ description="This tool generates and saves a summary of the data to a file.",
31
+ )
pdf.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Now the way VectorStoreIndex works is that we're pretty much taking all our data and creating some embeddings which are
2
+ #multidimensional objects vectors and we can very quick index and query them in this database so we that by checking the
3
+ #similarity of intent and words
4
+ #we turn out data into VectorStoreIndex and we can go to that index with out query and we can very quickly retrieve the
5
+ #specific parts of this unstructured data that we're looking for to be able to answer that question
6
+
7
+ import os
8
+ from llama_index.core import StorageContext, VectorStoreIndex, load_index_from_storage
9
+ from llama_index.readers.file.docs.base import PDFReader
10
+
11
+
12
+ def get_index(data, index_name):
13
+ index = None
14
+ if not os.path.exists(index_name):
15
+ print("building index", index_name)
16
+ index = VectorStoreIndex.from_documents(data, show_progress=True)
17
+ index.storage_context.persist(persist_dir=index_name) # save the new created index in a folder
18
+ else:
19
+ index = load_index_from_storage(
20
+ StorageContext.from_defaults(persist_dir=index_name)
21
+ )
22
+
23
+ return index
24
+
25
+
26
+ pdf_path = os.path.join("data", "Bangladesh.pdf")
27
+ bangladesh_pdf = PDFReader().load_data(file=pdf_path)
28
+ bangladesh_index = get_index(bangladesh_pdf, "bangladesh")
29
+ bangladesh_engine = bangladesh_index.as_query_engine()