Docfile commited on
Commit
aafae3c
·
verified ·
1 Parent(s): 8ee41bc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -38
app.py CHANGED
@@ -4,13 +4,23 @@ from llama_index.core import (
4
  VectorStoreIndex,
5
  SimpleDirectoryReader,
6
  Settings,
 
 
7
  )
8
- from llama_index.core import PromptTemplate
9
  from llama_index.llms.gemini import Gemini
10
  from llama_index.embeddings.gemini import GeminiEmbedding
 
 
 
 
 
 
 
 
 
 
11
  import logging
12
  import google.generativeai as genai
13
- from dotenv import load_dotenv
14
  from pathlib import Path
15
 
16
  load_dotenv()
@@ -18,20 +28,29 @@ load_dotenv()
18
  # Set logging level
19
  logging.basicConfig(level=logging.INFO)
20
 
21
-
22
  # Configure Gemini Pro
23
  genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
24
 
25
- model_gemini_pro_vision = "gemini-1.5-flash"
 
 
26
 
27
  # Configure Gemini models
28
- Settings.llm = Gemini(model=model_gemini_pro_vision,
29
- api_key=os.getenv("GOOGLE_API_KEY"))
30
  Settings.embed_model = GeminiEmbedding(
31
  model_name="models/embedding-001",
32
- api_key=os.getenv("GOOGLE_API_KEY")
 
 
 
 
 
 
 
 
33
  )
34
 
 
35
  def load_and_index_pdf(pdf_path):
36
  """Loads and index the pdf.
37
 
@@ -42,25 +61,69 @@ def load_and_index_pdf(pdf_path):
42
  index (llama_index.core.VectorStoreIndex): The vector index
43
  """
44
  try:
45
- logging.info(f"Loading PDF document from: {pdf_path}")
46
- documents = SimpleDirectoryReader(input_files=[pdf_path]).load_data()
47
- if documents:
 
 
 
48
  logging.info("Creating vector store index")
49
- index = VectorStoreIndex.from_documents(documents)
50
  return index
51
- else:
52
  logging.warning("No documents found in the PDF")
53
  return None
54
  except Exception as e:
55
  logging.error(f"Error loading and indexing PDF: {e}")
56
  return None
 
 
 
 
57
 
58
- def translate_text(french_text, index):
59
- """Translates french text to Yipunu.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  Args :
62
  french_text (str): The french text to translate.
63
- index (llama_index.core.VectorStoreIndex): The vector index.
64
 
65
  Returns:
66
  (str): The yipunu translation or an error message.
@@ -76,43 +139,55 @@ def translate_text(french_text, index):
76
  )
77
 
78
  prompt_template = PromptTemplate(template)
79
-
80
- query_engine = index.as_query_engine(
81
- text_qa_template=prompt_template
82
- )
83
- response = query_engine.query(french_text)
84
  logging.info(f"Translation Result: {response.response}")
85
  return response.response
86
  except Exception as e:
87
  logging.error(f"Error during translation: {e}")
88
  return f"Error during translation: {str(e)}"
89
-
 
90
 
91
  def main():
92
  """Main function for streamlit app."""
93
 
94
  st.title("French to Yipunu Translation App")
95
-
96
- # PDF File Upload
97
- uploaded_file = st.file_uploader("Upload a PDF file containing the Punu grammar:", type="pdf")
98
-
99
- if uploaded_file is not None:
100
- # Save file to a temporary location
101
- temp_file_path = Path("temp_file.pdf")
102
- with open(temp_file_path, "wb") as f:
103
- f.write(uploaded_file.read())
104
-
105
- index = load_and_index_pdf(str(temp_file_path))
106
  if index:
 
107
  french_text = st.text_area("Enter French Text:", "Ni vosi yipunu")
108
  if st.button("Translate"):
109
- translation = translate_text(french_text, index)
110
- st.success(f"Yipunu Translation: {translation}")
111
-
112
- # Clean up temp files
113
- os.remove(temp_file_path)
114
  else:
115
- st.info("Please upload a pdf containing the punu grammar.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
 
118
  if __name__ == "__main__":
 
4
  VectorStoreIndex,
5
  SimpleDirectoryReader,
6
  Settings,
7
+ PromptTemplate,
8
+ QueryBundle,
9
  )
 
10
  from llama_index.llms.gemini import Gemini
11
  from llama_index.embeddings.gemini import GeminiEmbedding
12
+ from llama_index.core import get_response_synthesizer
13
+ from llama_index.core.node_parser import SemanticSplitterNodeParser
14
+ from llama_index.core.retrievers import VectorIndexRetriever
15
+ from llama_index.core.query_engine import RetrieverQueryEngine
16
+ from llama_index.core.query_transform import HyDEQueryTransform
17
+ from llama_index.core.postprocessor import SentenceTransformerRerank
18
+ from llama_index.core import load_index_from_storage
19
+ from llama_index.core import StorageContext
20
+ from llama_index.core.retrievers import QueryFusionRetriever
21
+ from dotenv import load_dotenv
22
  import logging
23
  import google.generativeai as genai
 
24
  from pathlib import Path
25
 
26
  load_dotenv()
 
28
  # Set logging level
29
  logging.basicConfig(level=logging.INFO)
30
 
 
31
  # Configure Gemini Pro
32
  genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
33
 
34
+ model_gemini_pro_vision = "gemini-pro-vision"
35
+ model_gemini_pro = "gemini-pro"
36
+
37
 
38
  # Configure Gemini models
39
+ Settings.llm = Gemini(model=model_gemini_pro, api_key=os.getenv("GOOGLE_API_KEY"))
 
40
  Settings.embed_model = GeminiEmbedding(
41
  model_name="models/embedding-001",
42
+ api_key=os.getenv("GOOGLE_API_KEY")
43
+ )
44
+
45
+
46
+ # Function to create a Semantic Splitter Node Parser
47
+ def create_semantic_splitter_node_parser():
48
+ """Creates a semantic splitter."""
49
+ return SemanticSplitterNodeParser(
50
+ buffer_size=1, breakpoint_percentile_threshold=95, embed_model=Settings.embed_model
51
  )
52
 
53
+
54
  def load_and_index_pdf(pdf_path):
55
  """Loads and index the pdf.
56
 
 
61
  index (llama_index.core.VectorStoreIndex): The vector index
62
  """
63
  try:
64
+ logging.info(f"Loading PDF document from: {pdf_path}")
65
+ documents = SimpleDirectoryReader(input_files=[pdf_path]).load_data()
66
+ if documents:
67
+ logging.info("Creating semantic splitter")
68
+ node_parser = create_semantic_splitter_node_parser()
69
+ nodes = node_parser.get_nodes_from_documents(documents)
70
  logging.info("Creating vector store index")
71
+ index = VectorStoreIndex(nodes=nodes)
72
  return index
73
+ else:
74
  logging.warning("No documents found in the PDF")
75
  return None
76
  except Exception as e:
77
  logging.error(f"Error loading and indexing PDF: {e}")
78
  return None
79
+
80
+
81
+ def create_rag_pipeline(index):
82
+ """Creates a RAG pipeline for translation.
83
 
84
+ Args :
85
+ index (llama_index.core.VectorStoreIndex): The vector index.
86
+
87
+ Returns :
88
+ query_engine(llama_index.core.query_engine.RetrieverQueryEngine): The query engine
89
+ """
90
+
91
+ logging.info("Initializing RAG Pipeline components")
92
+ # setup retriever
93
+
94
+ retriever = VectorStoreIndex(
95
+ index.nodes,
96
+ ).as_retriever(similarity_top_k=5)
97
+
98
+
99
+ # setup query transformer
100
+ hyde_query_transform = HyDEQueryTransform(llm=Settings.llm)
101
+
102
+ # setup reranker
103
+ reranker = SentenceTransformerRerank(top_n=3, model="BAAI/bge-reranker-base")
104
+
105
+ # response_synthesizer
106
+ response_synthesizer = get_response_synthesizer(
107
+ response_mode="refine",
108
+ )
109
+
110
+ # setup query engine
111
+ query_engine = RetrieverQueryEngine(
112
+ retriever=retriever,
113
+ response_synthesizer=response_synthesizer,
114
+ node_postprocessors=[reranker],
115
+ query_transform= hyde_query_transform
116
+ )
117
+
118
+ logging.info("RAG Pipeline is configured.")
119
+ return query_engine
120
+
121
+ def translate_text(french_text, query_engine):
122
+ """Translates french text to Yipunu using a highly optimized RAG.
123
 
124
  Args :
125
  french_text (str): The french text to translate.
126
+ query_engine (llama_index.core.query_engine.RetrieverQueryEngine): The query engine.
127
 
128
  Returns:
129
  (str): The yipunu translation or an error message.
 
139
  )
140
 
141
  prompt_template = PromptTemplate(template)
142
+ query_bundle = QueryBundle(french_text, custom_prompt=prompt_template)
143
+ response = query_engine.query(query_bundle)
 
 
 
144
  logging.info(f"Translation Result: {response.response}")
145
  return response.response
146
  except Exception as e:
147
  logging.error(f"Error during translation: {e}")
148
  return f"Error during translation: {str(e)}"
149
+
150
+
151
 
152
  def main():
153
  """Main function for streamlit app."""
154
 
155
  st.title("French to Yipunu Translation App")
156
+
157
+ # Construct the path to the PDF in the data folder
158
+ default_pdf_path = Path("data/parlons_yipunu.pdf")
159
+
160
+ # Check if the default pdf_file exists.
161
+ if default_pdf_path.exists():
162
+ index = load_and_index_pdf(str(default_pdf_path))
 
 
 
 
163
  if index:
164
+ query_engine = create_rag_pipeline(index)
165
  french_text = st.text_area("Enter French Text:", "Ni vosi yipunu")
166
  if st.button("Translate"):
167
+ translation = translate_text(french_text, query_engine)
168
+ st.success(f"Yipunu Translation: {translation}")
 
 
 
169
  else:
170
+ # PDF File Upload
171
+ uploaded_file = st.file_uploader("Upload a PDF file containing the Punu grammar:", type="pdf")
172
+ if uploaded_file is not None:
173
+ # Save file to a temporary location
174
+ temp_file_path = Path("temp_file.pdf")
175
+ with open(temp_file_path, "wb") as f:
176
+ f.write(uploaded_file.read())
177
+
178
+ index = load_and_index_pdf(str(temp_file_path))
179
+ if index:
180
+ query_engine = create_rag_pipeline(index)
181
+ french_text = st.text_area("Enter French Text:", "Ni vosi yipunu")
182
+ if st.button("Translate"):
183
+ translation = translate_text(french_text, query_engine)
184
+ st.success(f"Yipunu Translation: {translation}")
185
+
186
+ # Clean up temp files
187
+ os.remove(temp_file_path)
188
+ else:
189
+ st.info("Please upload a pdf containing the punu grammar.")
190
+
191
 
192
 
193
  if __name__ == "__main__":