mszel commited on
Commit
9ac804b
·
1 Parent(s): 7baf2a1

Image search example

Browse files
examples/LynxScribe Image RAG ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "edges": [
3
+ {
4
+ "id": "LynxScribe Image Describer 1 LynxScribe Image RAG Builder 1",
5
+ "source": "LynxScribe Image Describer 1",
6
+ "sourceHandle": "output",
7
+ "target": "LynxScribe Image RAG Builder 1",
8
+ "targetHandle": "image_describer"
9
+ },
10
+ {
11
+ "id": "LynxScribe RAG Vector Store 1 LynxScribe Image RAG Builder 1",
12
+ "source": "LynxScribe RAG Vector Store 1",
13
+ "sourceHandle": "output",
14
+ "target": "LynxScribe Image RAG Builder 1",
15
+ "targetHandle": "rag_graph"
16
+ },
17
+ {
18
+ "id": "GCP Image Loader 1 LynxScribe Image RAG Builder 1",
19
+ "source": "GCP Image Loader 1",
20
+ "sourceHandle": "output",
21
+ "target": "LynxScribe Image RAG Builder 1",
22
+ "targetHandle": "image_urls"
23
+ }
24
+ ],
25
+ "env": "LynxScribe",
26
+ "nodes": [
27
+ {
28
+ "data": {
29
+ "__execution_delay": 0.0,
30
+ "collapsed": false,
31
+ "display": null,
32
+ "error": null,
33
+ "meta": {
34
+ "inputs": {},
35
+ "name": "LynxScribe Image Describer",
36
+ "outputs": {
37
+ "output": {
38
+ "name": "output",
39
+ "position": "top",
40
+ "type": {
41
+ "type": "None"
42
+ }
43
+ }
44
+ },
45
+ "params": {
46
+ "llm_interface": {
47
+ "default": "openai",
48
+ "name": "llm_interface",
49
+ "type": {
50
+ "type": "<class 'str'>"
51
+ }
52
+ },
53
+ "llm_prompt_name": {
54
+ "default": "cot_picture_descriptor",
55
+ "name": "llm_prompt_name",
56
+ "type": {
57
+ "type": "<class 'str'>"
58
+ }
59
+ },
60
+ "llm_prompt_path": {
61
+ "default": "/Users/mszel/git/lynxscribe-demos/component_tutorials/04_image_search/image_description_prompts.yaml",
62
+ "name": "llm_prompt_path",
63
+ "type": {
64
+ "type": "<class 'str'>"
65
+ }
66
+ },
67
+ "llm_visual_model": {
68
+ "default": "gpt-4o",
69
+ "name": "llm_visual_model",
70
+ "type": {
71
+ "type": "<class 'str'>"
72
+ }
73
+ }
74
+ },
75
+ "type": "basic"
76
+ },
77
+ "params": {
78
+ "llm_interface": "openai",
79
+ "llm_prompt_name": "cot_picture_descriptor",
80
+ "llm_prompt_path": "/Users/mszel/git/lynxscribe-demos/component_tutorials/04_image_search/image_description_prompts.yaml",
81
+ "llm_visual_model": "gpt-4o"
82
+ },
83
+ "status": "done",
84
+ "title": "LynxScribe Image Describer"
85
+ },
86
+ "dragHandle": ".bg-primary",
87
+ "height": 358.0,
88
+ "id": "LynxScribe Image Describer 1",
89
+ "position": {
90
+ "x": 97.54029108623294,
91
+ "y": 622.6506477264763
92
+ },
93
+ "type": "basic",
94
+ "width": 376.0
95
+ },
96
+ {
97
+ "data": {
98
+ "display": null,
99
+ "error": null,
100
+ "meta": {
101
+ "inputs": {},
102
+ "name": "GCP Image Loader",
103
+ "outputs": {
104
+ "output": {
105
+ "name": "output",
106
+ "position": "right",
107
+ "type": {
108
+ "type": "None"
109
+ }
110
+ }
111
+ },
112
+ "params": {
113
+ "gcp_bucket": {
114
+ "default": "lynxkite_public_data",
115
+ "name": "gcp_bucket",
116
+ "type": {
117
+ "type": "<class 'str'>"
118
+ }
119
+ },
120
+ "prefix": {
121
+ "default": "lynxscribe-images/image-rag-test",
122
+ "name": "prefix",
123
+ "type": {
124
+ "type": "<class 'str'>"
125
+ }
126
+ }
127
+ },
128
+ "type": "basic"
129
+ },
130
+ "params": {
131
+ "gcp_bucket": "lynxkite_public_data",
132
+ "prefix": "lynxscribe-images/image-rag-test"
133
+ },
134
+ "status": "done",
135
+ "title": "GCP Image Loader"
136
+ },
137
+ "dragHandle": ".bg-primary",
138
+ "height": 225.0,
139
+ "id": "GCP Image Loader 1",
140
+ "position": {
141
+ "x": -311.53709682624634,
142
+ "y": 246.80608993170358
143
+ },
144
+ "type": "basic",
145
+ "width": 282.0
146
+ },
147
+ {
148
+ "data": {
149
+ "__execution_delay": 0.0,
150
+ "collapsed": null,
151
+ "display": null,
152
+ "error": null,
153
+ "meta": {
154
+ "inputs": {},
155
+ "name": "LynxScribe RAG Vector Store",
156
+ "outputs": {
157
+ "output": {
158
+ "name": "output",
159
+ "position": "top",
160
+ "type": {
161
+ "type": "None"
162
+ }
163
+ }
164
+ },
165
+ "params": {
166
+ "collection_name": {
167
+ "default": "lynx",
168
+ "name": "collection_name",
169
+ "type": {
170
+ "type": "<class 'str'>"
171
+ }
172
+ },
173
+ "name": {
174
+ "default": "faiss",
175
+ "name": "name",
176
+ "type": {
177
+ "type": "<class 'str'>"
178
+ }
179
+ },
180
+ "num_dimensions": {
181
+ "default": 3072.0,
182
+ "name": "num_dimensions",
183
+ "type": {
184
+ "type": "<class 'int'>"
185
+ }
186
+ },
187
+ "text_embedder_interface": {
188
+ "default": "openai",
189
+ "name": "text_embedder_interface",
190
+ "type": {
191
+ "type": "<class 'str'>"
192
+ }
193
+ },
194
+ "text_embedder_model_name_or_path": {
195
+ "default": "text-embedding-3-large",
196
+ "name": "text_embedder_model_name_or_path",
197
+ "type": {
198
+ "type": "<class 'str'>"
199
+ }
200
+ }
201
+ },
202
+ "position": {
203
+ "x": 807.0,
204
+ "y": 315.0
205
+ },
206
+ "type": "basic"
207
+ },
208
+ "params": {
209
+ "collection_name": "lynx",
210
+ "name": "faiss",
211
+ "num_dimensions": 3072.0,
212
+ "text_embedder_interface": "openai",
213
+ "text_embedder_model_name_or_path": "text-embedding-3-large"
214
+ },
215
+ "status": "active",
216
+ "title": "LynxScribe RAG Vector Store"
217
+ },
218
+ "dragHandle": ".bg-primary",
219
+ "height": 435.0,
220
+ "id": "LynxScribe RAG Vector Store 1",
221
+ "position": {
222
+ "x": 507.56541832959726,
223
+ "y": 625.9615546166448
224
+ },
225
+ "type": "basic",
226
+ "width": 283.0
227
+ },
228
+ {
229
+ "data": {
230
+ "__execution_delay": 0.0,
231
+ "collapsed": false,
232
+ "display": null,
233
+ "error": null,
234
+ "meta": {
235
+ "inputs": {
236
+ "image_describer": {
237
+ "name": "image_describer",
238
+ "position": "bottom",
239
+ "type": {
240
+ "type": "<class 'inspect._empty'>"
241
+ }
242
+ },
243
+ "image_urls": {
244
+ "name": "image_urls",
245
+ "position": "left",
246
+ "type": {
247
+ "type": "<class 'inspect._empty'>"
248
+ }
249
+ },
250
+ "rag_graph": {
251
+ "name": "rag_graph",
252
+ "position": "bottom",
253
+ "type": {
254
+ "type": "<class 'inspect._empty'>"
255
+ }
256
+ }
257
+ },
258
+ "name": "LynxScribe Image RAG Builder",
259
+ "outputs": {
260
+ "output": {
261
+ "name": "output",
262
+ "position": "right",
263
+ "type": {
264
+ "type": "None"
265
+ }
266
+ }
267
+ },
268
+ "params": {
269
+ "image_rag_out_path": {
270
+ "default": "image_test_rag_graph.pickle",
271
+ "name": "image_rag_out_path",
272
+ "type": {
273
+ "type": "<class 'str'>"
274
+ }
275
+ }
276
+ },
277
+ "position": {
278
+ "x": 979.0,
279
+ "y": 238.0
280
+ },
281
+ "type": "basic"
282
+ },
283
+ "params": {
284
+ "image_rag_out_path": "image_test_rag_graph.pickle"
285
+ },
286
+ "status": "done",
287
+ "title": "LynxScribe Image RAG Builder"
288
+ },
289
+ "dragHandle": ".bg-primary",
290
+ "height": 298.0,
291
+ "id": "LynxScribe Image RAG Builder 1",
292
+ "position": {
293
+ "x": 202.17177613422314,
294
+ "y": 209.6180585281515
295
+ },
296
+ "type": "basic",
297
+ "width": 479.0
298
+ }
299
+ ]
300
+ }
lynxkite-lynxscribe/src/lynxkite_lynxscribe/lynxscribe_ops.py CHANGED
@@ -2,10 +2,17 @@
2
  LynxScribe configuration and testing in LynxKite.
3
  """
4
 
 
 
 
 
 
5
  from lynxscribe.core.llm.base import get_llm_engine
6
  from lynxscribe.core.vector_store.base import get_vector_store
7
  from lynxscribe.common.config import load_config
8
  from lynxscribe.components.text.embedder import TextEmbedder
 
 
9
  from lynxscribe.components.rag.rag_graph import RAGGraph
10
  from lynxscribe.components.rag.knowledge_base_graph import PandasKnowledgeBaseGraph
11
  from lynxscribe.components.rag.rag_chatbot import Scenario, ScenarioSelector, RAGChatbot
@@ -27,6 +34,193 @@ op = ops.op_registration(ENV)
27
  output_on_top = ops.output_position(output="top")
28
 
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  @output_on_top
31
  @op("Vector store")
32
  def vector_store(*, name="chromadb", collection_name="lynx"):
@@ -301,3 +495,43 @@ def get_lynxscribe_workspaces():
301
  pass # Ignore files that are not valid workspaces.
302
  workspaces.sort()
303
  return workspaces
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  LynxScribe configuration and testing in LynxKite.
3
  """
4
 
5
+ from google.cloud import storage
6
+ from copy import deepcopy
7
+ import asyncio
8
+ import pandas as pd
9
+
10
  from lynxscribe.core.llm.base import get_llm_engine
11
  from lynxscribe.core.vector_store.base import get_vector_store
12
  from lynxscribe.common.config import load_config
13
  from lynxscribe.components.text.embedder import TextEmbedder
14
+ from lynxscribe.core.models.embedding import Embedding
15
+
16
  from lynxscribe.components.rag.rag_graph import RAGGraph
17
  from lynxscribe.components.rag.knowledge_base_graph import PandasKnowledgeBaseGraph
18
  from lynxscribe.components.rag.rag_chatbot import Scenario, ScenarioSelector, RAGChatbot
 
34
  output_on_top = ops.output_position(output="top")
35
 
36
 
37
+ @op("GCP Image Loader")
38
+ def gcp_image_loader(
39
+ *,
40
+ gcp_bucket: str = "lynxkite_public_data",
41
+ prefix: str = "lynxscribe-images/image-rag-test",
42
+ ):
43
+ """
44
+ Gives back the list of URLs of all the images in the GCP storage.
45
+ """
46
+
47
+ client = storage.Client()
48
+ bucket = client.bucket(gcp_bucket)
49
+ blobs = bucket.list_blobs(prefix=prefix)
50
+ image_urls = [
51
+ blob.public_url
52
+ for blob in blobs
53
+ if blob.name.endswith((".jpg", ".jpeg", ".png"))
54
+ ]
55
+ return {"image_urls": image_urls}
56
+
57
+
58
+ @output_on_top
59
+ @op("LynxScribe RAG Vector Store")
60
+ def ls_rag_graph(
61
+ *,
62
+ name: str = "faiss",
63
+ num_dimensions: int = 3072,
64
+ collection_name: str = "lynx",
65
+ text_embedder_interface: str = "openai",
66
+ text_embedder_model_name_or_path: str = "text-embedding-3-large",
67
+ ):
68
+ """
69
+ Returns with a vector store instance.
70
+ """
71
+
72
+ # getting the text embedder instance
73
+ llm = get_llm_engine(name=text_embedder_interface)
74
+ text_embedder = TextEmbedder(llm=llm, model=text_embedder_model_name_or_path)
75
+
76
+ # getting the vector store
77
+ if name == "chromadb":
78
+ vector_store = get_vector_store(name=name, collection_name=collection_name)
79
+ elif name == "faiss":
80
+ vector_store = get_vector_store(name=name, num_dimensions=num_dimensions)
81
+ else:
82
+ raise ValueError(f"Vector store name '{name}' is not supported.")
83
+
84
+ # building up the RAG graph
85
+ rag_graph = RAGGraph(
86
+ PandasKnowledgeBaseGraph(vector_store=vector_store, text_embedder=text_embedder)
87
+ )
88
+
89
+ return {"rag_graph": rag_graph}
90
+
91
+
92
+ @output_on_top
93
+ @op("LynxScribe Image Describer")
94
+ def ls_image_describer(
95
+ *,
96
+ llm_interface: str = "openai",
97
+ llm_visual_model: str = "gpt-4o",
98
+ llm_prompt_path: str = "/Users/mszel/git/lynxscribe-demos/component_tutorials/04_image_search/image_description_prompts.yaml",
99
+ llm_prompt_name: str = "cot_picture_descriptor",
100
+ ):
101
+ """
102
+ Returns with an image describer instance.
103
+ TODO: adding a relative path to the prompt path + adding model kwargs
104
+ """
105
+
106
+ llm = get_llm_engine(name=llm_interface)
107
+ prompt_base = load_config(llm_prompt_path)[llm_prompt_name]
108
+
109
+ return {
110
+ "image_describer": {
111
+ "llm": llm,
112
+ "prompt_base": prompt_base,
113
+ "model": llm_visual_model,
114
+ }
115
+ }
116
+
117
+
118
+ @ops.input_position(image_describer="bottom", rag_graph="bottom")
119
+ @op("LynxScribe Image RAG Builder")
120
+ async def ls_image_rag_builder(
121
+ image_urls,
122
+ image_describer,
123
+ rag_graph,
124
+ *,
125
+ image_rag_out_path: str = "image_test_rag_graph.pickle",
126
+ ):
127
+ """
128
+ Based on an input image folder (currently only supports GCP storage),
129
+ the function builds up an image RAG graph, where the nodes are the
130
+ descriptions of the images (and of all image objects).
131
+
132
+ In a later phase, synthetic questions and "named entities" will also
133
+ be added to the graph.
134
+ """
135
+
136
+ # handling inputs
137
+ image_describer = image_describer[0]["image_describer"]
138
+ image_urls = image_urls[0]["image_urls"]
139
+ rag_graph = rag_graph[0]["rag_graph"]
140
+
141
+ # generate prompts from inputs
142
+ prompt_list = []
143
+ for i in range(len(image_urls)):
144
+ image = image_urls[i]
145
+
146
+ _prompt = deepcopy(image_describer["prompt_base"])
147
+ for message in _prompt:
148
+ if isinstance(message["content"], list):
149
+ for _message_part in message["content"]:
150
+ if "image_url" in _message_part:
151
+ _message_part["image_url"] = {"url": image}
152
+
153
+ prompt_list.append(_prompt)
154
+ ch_prompt_list = [
155
+ ChatCompletionPrompt(model=image_describer["model"], messages=prompt)
156
+ for prompt in prompt_list
157
+ ]
158
+
159
+ # get the image descriptions
160
+ llm = image_describer["llm"]
161
+ tasks = [
162
+ llm.acreate_completion(completion_prompt=_prompt) for _prompt in ch_prompt_list
163
+ ]
164
+ out_completions = await asyncio.gather(*tasks)
165
+ results = [
166
+ dictionary_corrector(result.choices[0].message.content)
167
+ for result in out_completions
168
+ ]
169
+
170
+ # generate combination of descriptions and embed them
171
+ text_embedder = rag_graph.kg_base.text_embedder
172
+
173
+ dict_list_df = []
174
+ for _i, _result in enumerate(results):
175
+ url_res = image_urls[_i]
176
+
177
+ if "overall description" in _result:
178
+ dict_list_df.append(
179
+ {
180
+ "image_url": url_res,
181
+ "description": _result["overall description"],
182
+ "source": "overall description",
183
+ }
184
+ )
185
+
186
+ if "details" in _result:
187
+ for dkey in _result["details"].keys():
188
+ text = f"The picture's description is: {_result['overall description']}\n\nThe description of the {dkey} is: {_result['details'][dkey]}"
189
+ dict_list_df.append(
190
+ {"image_url": url_res, "description": text, "source": "details"}
191
+ )
192
+
193
+ pdf_descriptions = pd.DataFrame(dict_list_df)
194
+ pdf_descriptions["embedding_values"] = await text_embedder.acreate_embedding(
195
+ pdf_descriptions["description"].to_list()
196
+ )
197
+ pdf_descriptions["id"] = "im_" + pdf_descriptions.index.astype(str)
198
+
199
+ # adding the embeddings to the RAG graph with metadata
200
+ pdf_descriptions["embedding"] = pdf_descriptions.apply(
201
+ lambda row: Embedding(
202
+ id=row["id"],
203
+ value=row["embedding_values"],
204
+ metadata={
205
+ "image_url": row["image_url"],
206
+ "image_part": row["source"],
207
+ "type": "image_description",
208
+ },
209
+ document=row["description"],
210
+ ),
211
+ axis=1,
212
+ )
213
+ embedding_list = pdf_descriptions["embedding"].tolist()
214
+
215
+ # adding the embeddings to the RAG graph
216
+ rag_graph.kg_base.vector_store.upsert(embedding_list)
217
+
218
+ # saving the RAG graph
219
+ rag_graph.kg_base.save(image_rag_out_path)
220
+
221
+ return {"image_rag_path": image_rag_out_path} # TODO: do we need an output?
222
+
223
+
224
  @output_on_top
225
  @op("Vector store")
226
  def vector_store(*, name="chromadb", collection_name="lynx"):
 
495
  pass # Ignore files that are not valid workspaces.
496
  workspaces.sort()
497
  return workspaces
498
+
499
+
500
+ def dictionary_corrector(dict_string: str, expected_keys: list | None = None) -> dict:
501
+ """
502
+ Processing LLM outputs: when the LLM returns with a dictionary (in a string format). It optionally
503
+ crosschecks the input with the expected keys and return a dictionary with the expected keys and their
504
+ values ('unknown' if not present). If there is an error during the processing, it will return with
505
+ a dictionary of the expected keys, all with 'error' as a value (or with an empty dictionary).
506
+
507
+ Currently the function does not delete the extra key-value pairs.
508
+ """
509
+
510
+ out_dict = {}
511
+
512
+ if len(dict_string) == 0:
513
+ return out_dict
514
+
515
+ # deleting the optional text before the first and after the last curly brackets
516
+ dstring_prc = dict_string
517
+ if dstring_prc[0] != "{":
518
+ dstring_prc = "{" + "{".join(dstring_prc.split("{")[1:])
519
+ if dstring_prc[-1] != "}":
520
+ dstring_prc = "}".join(dstring_prc.split("}")[:-1]) + "}"
521
+
522
+ try:
523
+ trf_dict = eval(dstring_prc)
524
+ if expected_keys:
525
+ for _key in expected_keys:
526
+ if _key in trf_dict:
527
+ out_dict[_key] = trf_dict[_key]
528
+ else:
529
+ out_dict[_key] = "unknown"
530
+ else:
531
+ out_dict = trf_dict
532
+ except Exception:
533
+ if expected_keys:
534
+ for _key in expected_keys:
535
+ out_dict[_key] = "error"
536
+
537
+ return out_dict