tonic commited on
Commit
4643020
·
1 Parent(s): 553c8dd

Update app.py

Browse files
Files changed (1) hide show
  1. backend/app.py +54 -7
backend/app.py CHANGED
@@ -69,14 +69,49 @@ Article = {
69
  # "vectorizer": "text2vec-contextionary"
70
  }
71
 
72
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  schema = {
74
  "classes": [Article]
75
  }
76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  # Initialize vectorstore
78
  vectorstore = Weaviate(client, index_name="HereChat", text_key="text")
79
- client.schema.create(schema)
80
  vectorstore._query_attrs = ["text", "title", "url", "views", "lang", "_additional {distance}"]
81
  vectorstore.embedding = CohereEmbeddings(model="embed-multilingual-v2.0", cohere_api_key=cohere_api_key)
82
 
@@ -87,8 +122,16 @@ def embed_pdf(file, collection_name):
87
  # Save the uploaded file
88
  filename = file.name
89
  file_path = os.path.join('./', filename)
 
 
 
 
 
 
 
 
90
  with open(file_path, 'wb') as f:
91
- f.write(file.read())
92
 
93
  # Checking filetype for document parsing
94
  mime_type = mimetypes.guess_type(file_path)[0]
@@ -121,13 +164,15 @@ def retrieve_info(query):
121
  # Rerank the top results
122
  reranked_results = co.rerank(query=query, documents=top_docs, top_n=3, model='rerank-english-v2.0')
123
 
124
- # Format the reranked results
125
  formatted_results = []
126
  for idx, r in enumerate(reranked_results):
127
  formatted_result = {
128
  "Document Rank": idx + 1,
129
- "Document Index": r.index,
130
- "Document": r.document['text'],
 
 
131
  "Relevance Score": f"{r.relevance_score:.2f}"
132
  }
133
  formatted_results.append(formatted_result)
@@ -162,12 +207,14 @@ def retrieve_info(query):
162
 
163
  def combined_interface(query, file, collection_name):
164
  if query:
165
- return retrieve_info(query)
 
166
  elif file is not None and collection_name:
167
  return embed_pdf(file, collection_name)
168
  else:
169
  return "Please enter a query or upload a PDF file."
170
 
 
171
  iface = gr.Interface(
172
  fn=combined_interface,
173
  inputs=[
 
69
  # "vectorizer": "text2vec-contextionary"
70
  }
71
 
72
+ # Function to check if a class exists in the schema
73
+ def class_exists(class_name):
74
+ try:
75
+ existing_schema = client.schema.get()
76
+ existing_classes = [cls["class"] for cls in existing_schema["classes"]]
77
+ return class_name in existing_classes
78
+ except Exception as e:
79
+ print(f"Error checking if class exists: {e}")
80
+ return False
81
+
82
+ # Check if 'Article' class already exists
83
+ if not class_exists("Article"):
84
+ # Create the schema if 'Article' class does not exist
85
+ try:
86
+ client.schema.create(schema)
87
+ except Exception as e:
88
+ print(f"Error creating schema: {e}")
89
+ else:
90
+ print("Class 'Article' already exists in the schema.")
91
+
92
+ # Initialize the schema
93
  schema = {
94
  "classes": [Article]
95
  }
96
 
97
+ # Check if 'Article' class already exists
98
+ if not class_exists("Article"):
99
+ # Create the schema if 'Article' class does not exist
100
+ try:
101
+ client.schema.create(schema)
102
+ except Exception as e:
103
+ print(f"Error creating schema: {e}")
104
+ else:
105
+ # Retrieve the existing schema if 'Article' class exists
106
+ try:
107
+ existing_schema = client.schema.get()
108
+ print("Existing schema retrieved:", existing_schema)
109
+ except Exception as e:
110
+ print(f"Error retrieving existing schema: {e}")
111
+
112
+
113
  # Initialize vectorstore
114
  vectorstore = Weaviate(client, index_name="HereChat", text_key="text")
 
115
  vectorstore._query_attrs = ["text", "title", "url", "views", "lang", "_additional {distance}"]
116
  vectorstore.embedding = CohereEmbeddings(model="embed-multilingual-v2.0", cohere_api_key=cohere_api_key)
117
 
 
122
  # Save the uploaded file
123
  filename = file.name
124
  file_path = os.path.join('./', filename)
125
+
126
+ # Check if the file object has 'read' method
127
+ if hasattr(file, 'read'):
128
+ file_content = file.read()
129
+ else:
130
+ # Handle the case where 'read' method is not available
131
+ file_content = file.getvalue() # Assuming it's a NamedString or similar object
132
+
133
  with open(file_path, 'wb') as f:
134
+ f.write(file_content)
135
 
136
  # Checking filetype for document parsing
137
  mime_type = mimetypes.guess_type(file_path)[0]
 
164
  # Rerank the top results
165
  reranked_results = co.rerank(query=query, documents=top_docs, top_n=3, model='rerank-english-v2.0')
166
 
167
+ # Format the reranked results according to the Article schema
168
  formatted_results = []
169
  for idx, r in enumerate(reranked_results):
170
  formatted_result = {
171
  "Document Rank": idx + 1,
172
+ "Title": r.document['title'],
173
+ "Content": r.document['content'],
174
+ "Author": r.document['author'],
175
+ "Publish Date": r.document['publishDate'],
176
  "Relevance Score": f"{r.relevance_score:.2f}"
177
  }
178
  formatted_results.append(formatted_result)
 
207
 
208
  def combined_interface(query, file, collection_name):
209
  if query:
210
+ article_info = retrieve_info(query)
211
+ return article_info
212
  elif file is not None and collection_name:
213
  return embed_pdf(file, collection_name)
214
  else:
215
  return "Please enter a query or upload a PDF file."
216
 
217
+
218
  iface = gr.Interface(
219
  fn=combined_interface,
220
  inputs=[