MVPilgrim commited on
Commit
cee962e
·
1 Parent(s): 18a3a1b

Don't create objs each time.

Browse files
Files changed (1) hide show
  1. semsearch.py +90 -91
semsearch.py CHANGED
@@ -157,6 +157,7 @@ client = weaviate.WeaviateClient(
157
  grpc_host="localhost",
158
  grpc_port="50051",
159
  grpc_secure=False,
 
160
  ),
161
  # auth_client_secret=weaviate.auth.AuthApiKey("secr3tk3y"),
162
  # additional_headers={
@@ -174,107 +175,105 @@ client.connect()
174
  #wpCollection = createWebpageCollection()
175
  #wpChunkCollection = createChunksCollection()
176
  logger.info("#### createWebpageCollection() entered.")
177
- if client.collections.exists("Documents"):
178
- client.collections.delete("Documents")
179
-
180
- class_obj = {
181
- "class": "Documents",
182
- "description": "For first attempt at loading a Weviate database.",
183
- "vectorizer": "text2vec-transformers",
184
- "moduleConfig": {
185
- "text2vec-transformers": {
186
- "vectorizeClassName": False
187
- }
188
- },
189
- "vectorIndexType": "hnsw",
190
- "vectorIndexConfig": {
191
- "distance": "cosine",
192
- },
193
- "properties": [
194
- {
195
- "name": "title",
196
- "dataType": ["text"],
197
- "description": "HTML doc title.",
198
- "vectorizer": "text2vec-transformers",
199
- "moduleConfig": {
200
- "text2vec-transformers": {
201
- "vectorizePropertyName": True,
202
- "skip": False,
203
- "tokenization": "lowercase"
 
 
 
 
 
 
204
  }
205
  },
206
- "invertedIndexConfig": {
207
- "bm25": {
208
- "b": 0.75,
209
- "k1": 1.2
210
- },
211
- }
212
- },
213
- {
214
- "name": "content",
215
- "dataType": ["text"],
216
- "description": "HTML page content.",
217
- "moduleConfig": {
218
- "text2vec-transformers": {
219
- "vectorizePropertyName": True,
220
- "tokenization": "whitespace"
221
  }
222
  }
223
- }
224
- ]
225
- }
226
- wpCollection = client.collections.create_from_dict(class_obj)
227
 
228
  logger.info("#### createChunksCollection() entered.")
229
- if client.collections.exists("Chunks"):
230
- client.collections.delete("Chunks")
231
-
232
- class_obj = {
233
- "class": "Chunks",
234
- "description": "Collection for document chunks.",
235
- "vectorizer": "text2vec-transformers",
236
- "moduleConfig": {
237
- "text2vec-transformers": {
238
- "vectorizeClassName": True
239
- }
240
- },
241
- "vectorIndexType": "hnsw",
242
- "vectorIndexConfig": {
243
- "distance": "cosine",
244
- },
245
- "properties": [
246
- {
247
- "name": "chunk",
248
- "dataType": ["text"],
249
- "description": "Single webpage chunk.",
250
- "vectorizer": "text2vec-transformers",
251
- "moduleConfig": {
252
- "text2vec-transformers": {
253
- "vectorizePropertyName": False,
254
- "skip": False,
255
- "tokenization": "lowercase"
256
- }
257
  }
258
  },
259
- {
260
- "name": "chunk_index",
261
- "dataType": ["int"]
262
  },
263
- {
264
- "name": "webpage",
265
- "dataType": ["Documents"],
266
- "description": "Webpage content chunks.",
267
-
268
- "invertedIndexConfig": {
269
- "bm25": {
270
- "b": 0.75,
271
- "k1": 1.2
 
 
 
272
  }
273
- }
274
- }
275
- ]
276
- }
277
- wpChunkCollection = client.collections.create_from_dict(class_obj)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
 
279
 
280
  ###########################################################
 
157
  grpc_host="localhost",
158
  grpc_port="50051",
159
  grpc_secure=False,
160
+ log_level="WARNING"
161
  ),
162
  # auth_client_secret=weaviate.auth.AuthApiKey("secr3tk3y"),
163
  # additional_headers={
 
175
  #wpCollection = createWebpageCollection()
176
  #wpChunkCollection = createChunksCollection()
177
  logger.info("#### createWebpageCollection() entered.")
178
+ if not client.collections.exists("Documents"):
179
+ #client.collections.delete("Documents")
180
+ class_obj = {
181
+ "class": "Documents",
182
+ "description": "For first attempt at loading a Weviate database.",
183
+ "vectorizer": "text2vec-transformers",
184
+ "moduleConfig": {
185
+ "text2vec-transformers": {
186
+ "vectorizeClassName": False
187
+ }
188
+ },
189
+ "vectorIndexType": "hnsw",
190
+ "vectorIndexConfig": {
191
+ "distance": "cosine",
192
+ },
193
+ "properties": [
194
+ {
195
+ "name": "title",
196
+ "dataType": ["text"],
197
+ "description": "HTML doc title.",
198
+ "vectorizer": "text2vec-transformers",
199
+ "moduleConfig": {
200
+ "text2vec-transformers": {
201
+ "vectorizePropertyName": True,
202
+ "skip": False,
203
+ "tokenization": "lowercase"
204
+ }
205
+ },
206
+ "invertedIndexConfig": {
207
+ "bm25": {
208
+ "b": 0.75,
209
+ "k1": 1.2
210
+ },
211
  }
212
  },
213
+ {
214
+ "name": "content",
215
+ "dataType": ["text"],
216
+ "description": "HTML page content.",
217
+ "moduleConfig": {
218
+ "text2vec-transformers": {
219
+ "vectorizePropertyName": True,
220
+ "tokenization": "whitespace"
221
+ }
 
 
 
 
 
 
222
  }
223
  }
224
+ ]
225
+ }
226
+ wpCollection = client.collections.create_from_dict(class_obj)
 
227
 
228
  logger.info("#### createChunksCollection() entered.")
229
+ if not client.collections.exists("Chunks"):
230
+ #client.collections.delete("Chunks")
231
+ class_obj = {
232
+ "class": "Chunks",
233
+ "description": "Collection for document chunks.",
234
+ "vectorizer": "text2vec-transformers",
235
+ "moduleConfig": {
236
+ "text2vec-transformers": {
237
+ "vectorizeClassName": True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
  }
239
  },
240
+ "vectorIndexType": "hnsw",
241
+ "vectorIndexConfig": {
242
+ "distance": "cosine",
243
  },
244
+ "properties": [
245
+ {
246
+ "name": "chunk",
247
+ "dataType": ["text"],
248
+ "description": "Single webpage chunk.",
249
+ "vectorizer": "text2vec-transformers",
250
+ "moduleConfig": {
251
+ "text2vec-transformers": {
252
+ "vectorizePropertyName": False,
253
+ "skip": False,
254
+ "tokenization": "lowercase"
255
+ }
256
  }
257
+ },
258
+ {
259
+ "name": "chunk_index",
260
+ "dataType": ["int"]
261
+ },
262
+ {
263
+ "name": "webpage",
264
+ "dataType": ["Documents"],
265
+ "description": "Webpage content chunks.",
266
+
267
+ "invertedIndexConfig": {
268
+ "bm25": {
269
+ "b": 0.75,
270
+ "k1": 1.2
271
+ }
272
+ }
273
+ }
274
+ ]
275
+ }
276
+ wpChunkCollection = client.collections.create_from_dict(class_obj)
277
 
278
 
279
  ###########################################################