Spaces:

lynx-analytics
/

lynxkite

Running

App Files Files Community

mszel commited on Apr 11

Commit

2577d27

1 Parent(s): ed1dc36

Correcting the Image RAG pipeline

Browse files

Files changed (3) hide show

examples/LynxScribe Image RAG +158 -194
{lynxkite-lynxscribe/promptdb → examples/uploads}/image_description_prompts.yaml +0 -0
lynxkite-lynxscribe/src/lynxkite_lynxscribe/lynxscribe_ops.py +108 -59

examples/LynxScribe Image RAG CHANGED Viewed

@@ -1,24 +1,10 @@
 {
   "edges": [
     {
-      "id": "Input chat 1 LynxScribe Image RAG Query 1",
-      "source": "Input chat 1",
-      "sourceHandle": "output",
-      "target": "LynxScribe Image RAG Query 1",
-      "targetHandle": "text"
-    },
-    {
-      "id": "LynxScribe Image RAG Query 1 View image 1",
-      "source": "LynxScribe Image RAG Query 1",
       "sourceHandle": "output",
-      "target": "View image 1",
-      "targetHandle": "embedding_similarities"
-    },
-    {
-      "id": "Cloud-sourced File Loader 1 LynxScribe Image RAG Builder 1",
-      "source": "Cloud-sourced File Loader 1",
-      "sourceHandle": "output",
-      "target": "LynxScribe Image RAG Builder 1",
       "targetHandle": "file_urls"
     },
     {
@@ -26,21 +12,28 @@
       "source": "LynxScribe Image Describer 1",
       "sourceHandle": "output",
       "target": "LynxScribe Image RAG Builder 1",
-      "targetHandle": "image_describer"
     },
     {
-      "id": "LynxScribe RAG Graph Vector Store 1 LynxScribe Image RAG Builder 1",
-      "source": "LynxScribe RAG Graph Vector Store 1",
       "sourceHandle": "output",
-      "target": "LynxScribe Image RAG Builder 1",
       "targetHandle": "rag_graph"
     },
     {
-      "id": "LynxScribe Image RAG Builder 1 LynxScribe Image RAG Query 1",
-      "source": "LynxScribe Image RAG Builder 1",
       "sourceHandle": "output",
       "target": "LynxScribe Image RAG Query 1",
-      "targetHandle": "rag_graph"
     }
   ],
   "env": "LynxScribe",
@@ -75,7 +68,7 @@
           "type": "basic"
         },
         "params": {
-          "chat": "Show me a picture about pills!"
         },
         "status": "done",
         "title": "Input chat"
@@ -84,8 +77,8 @@
       "height": 214.0,
       "id": "Input chat 1",
       "position": {
-        "x": -302.70349900314835,
-        "y": -140.67386384008685
       },
       "type": "basic",
       "width": 387.0
@@ -97,23 +90,8 @@
         "display": null,
         "error": null,
         "meta": {
-          "inputs": {
-            "rag_graph": {
-              "name": "rag_graph",
-              "position": "bottom",
-              "type": {
-                "type": "<class 'inspect._empty'>"
-              }
-            },
-            "text": {
-              "name": "text",
-              "position": "left",
-              "type": {
-                "type": "<class 'inspect._empty'>"
-              }
-            }
-          },
-          "name": "LynxScribe Image RAG Query",
           "outputs": {
             "output": {
               "name": "output",
@@ -124,78 +102,77 @@
             }
           },
           "params": {
-            "top_k": {
-              "default": 3.0,
-              "name": "top_k",
               "type": {
-                "type": "<class 'int'>"
               }
             }
           },
           "type": "basic"
         },
         "params": {
-          "top_k": "3"
         },
         "status": "done",
-        "title": "LynxScribe Image RAG Query"
       },
       "dragHandle": ".bg-primary",
-      "height": 200.0,
-      "id": "LynxScribe Image RAG Query 1",
       "position": {
-        "x": 908.9211080204011,
-        "y": -132.3031800030364
       },
       "type": "basic",
-      "width": 200.0
     },
     {
       "data": {
-        "__execution_delay": null,
-        "collapsed": false,
-        "display": "https://storage.googleapis.com/lynxkite_public_data/lynxscribe-images/image-rag-test/capsules-1079838_1280.jpg",
         "error": null,
         "meta": {
           "inputs": {
-            "embedding_similarities": {
-              "name": "embedding_similarities",
               "position": "left",
               "type": {
                 "type": "<class 'inspect._empty'>"
               }
             }
           },
-          "name": "View image",
-          "outputs": {},
-          "params": {},
-          "type": "image"
-        },
-        "params": {},
-        "status": "done",
-        "title": "View image"
-      },
-      "dragHandle": ".bg-primary",
-      "height": 1170.0,
-      "id": "View image 1",
-      "position": {
-        "x": 1426.7020124006506,
-        "y": -293.16229409169125
-      },
-      "type": "image",
-      "width": 750.0
-    },
-    {
-      "data": {
-        "display": null,
-        "error": null,
-        "meta": {
-          "inputs": {},
           "name": "LynxScribe Image Describer",
           "outputs": {
             "output": {
               "name": "output",
-              "position": "top",
               "type": {
                 "type": "None"
               }
@@ -217,7 +194,7 @@
               }
             },
             "llm_prompt_path": {
-              "default": "../lynxkite-lynxscribe/promptdb/image_description_prompts.yaml",
               "name": "llm_prompt_path",
               "type": {
                 "type": "<class 'str'>"
@@ -232,108 +209,118 @@
             }
           },
           "position": {
-            "x": 1066.0,
-            "y": 713.0
           },
           "type": "basic"
         },
         "params": {
           "llm_interface": "openai",
           "llm_prompt_name": "cot_picture_descriptor",
-          "llm_prompt_path": "../lynxkite-lynxscribe/promptdb/image_description_prompts.yaml",
           "llm_visual_model": "gpt-4o"
         },
         "status": "done",
         "title": "LynxScribe Image Describer"
       },
       "dragHandle": ".bg-primary",
-      "height": 363.0,
       "id": "LynxScribe Image Describer 1",
       "position": {
-        "x": 127.85361236096924,
-        "y": 687.0518781863441
       },
       "type": "basic",
-      "width": 401.0
     },
     {
       "data": {
         "display": null,
         "error": null,
         "meta": {
-          "inputs": {},
-          "name": "LynxScribe RAG Graph Vector Store",
           "outputs": {
             "output": {
               "name": "output",
-              "position": "top",
               "type": {
                 "type": "None"
               }
             }
           },
           "params": {
-            "collection_name": {
-              "default": "lynx",
-              "name": "collection_name",
               "type": {
                 "type": "<class 'str'>"
               }
             },
-            "name": {
-              "default": "faiss",
-              "name": "name",
               "type": {
                 "type": "<class 'str'>"
               }
             },
-            "num_dimensions": {
-              "default": 3072.0,
-              "name": "num_dimensions",
               "type": {
-                "type": "<class 'int'>"
               }
             },
-            "text_embedder_interface": {
-              "default": "openai",
-              "name": "text_embedder_interface",
               "type": {
-                "type": "<class 'str'>"
               }
             },
-            "text_embedder_model_name_or_path": {
-              "default": "text-embedding-3-large",
-              "name": "text_embedder_model_name_or_path",
               "type": {
                 "type": "<class 'str'>"
               }
             }
           },
           "position": {
-            "x": 790.0,
-            "y": 633.0
           },
           "type": "basic"
         },
         "params": {
-          "collection_name": "lynx",
-          "name": "faiss",
-          "num_dimensions": 3072.0,
           "text_embedder_interface": "openai",
-          "text_embedder_model_name_or_path": "text-embedding-3-large"
         },
         "status": "done",
-        "title": "LynxScribe RAG Graph Vector Store"
       },
       "dragHandle": ".bg-primary",
-      "height": 436.0,
-      "id": "LynxScribe RAG Graph Vector Store 1",
       "position": {
-        "x": 595.4558693958389,
-        "y": 688.0989202130276
       },
       "type": "basic",
-      "width": 339.0
     },
     {
       "data": {
@@ -342,8 +329,23 @@
         "display": null,
         "error": null,
         "meta": {
-          "inputs": {},
-          "name": "Cloud-sourced File Loader",
           "outputs": {
             "output": {
               "name": "output",
@@ -354,110 +356,72 @@
             }
           },
           "params": {
-            "accepted_file_types": {
-              "default": ".jpg, .jpeg, .png",
-              "name": "accepted_file_types",
-              "type": {
-                "type": "<class 'str'>"
-              }
-            },
-            "cloud_provider": {
-              "default": "gcp",
-              "name": "cloud_provider",
-              "type": {
-                "type": "<class 'str'>"
-              }
-            },
-            "folder_URL": {
-              "default": "https://storage.googleapis.com/lynxkite_public_data/lynxscribe-images/image-rag-test",
-              "name": "folder_URL",
               "type": {
-                "type": "<class 'str'>"
               }
             }
           },
           "position": {
-            "x": 248.0,
-            "y": 419.0
           },
           "type": "basic"
         },
         "params": {
-          "accepted_file_types": ".jpg, .jpeg, .png",
-          "cloud_provider": "gcp",
-          "folder_URL": "https://storage.googleapis.com/lynxkite_public_data/lynxscribe-images/image-rag-test"
         },
         "status": "done",
-        "title": "Cloud-sourced File Loader"
       },
       "dragHandle": ".bg-primary",
-      "height": 291.0,
-      "id": "Cloud-sourced File Loader 1",
       "position": {
-        "x": -479.7367372966062,
-        "y": 213.81581567584843
       },
       "type": "basic",
-      "width": 512.0
     },
     {
       "data": {
-        "display": null,
         "error": null,
         "meta": {
           "inputs": {
-            "file_urls": {
-              "name": "file_urls",
               "position": "left",
               "type": {
                 "type": "<class 'inspect._empty'>"
               }
-            },
-            "image_describer": {
-              "name": "image_describer",
-              "position": "bottom",
-              "type": {
-                "type": "<class 'inspect._empty'>"
-              }
-            },
-            "rag_graph": {
-              "name": "rag_graph",
-              "position": "bottom",
-              "type": {
-                "type": "<class 'inspect._empty'>"
-              }
-            }
-          },
-          "name": "LynxScribe Image RAG Builder",
-          "outputs": {
-            "output": {
-              "name": "output",
-              "position": "right",
-              "type": {
-                "type": "None"
-              }
             }
           },
           "params": {},
           "position": {
-            "x": 480.0,
-            "y": 388.0
           },
-          "type": "basic"
         },
         "params": {},
         "status": "done",
-        "title": "LynxScribe Image RAG Builder"
       },
       "dragHandle": ".bg-primary",
-      "height": 313.0,
-      "id": "LynxScribe Image RAG Builder 1",
       "position": {
-        "x": 243.62049392420903,
-        "y": 215.6136303371116
       },
-      "type": "basic",
-      "width": 526.0
     }
   ]
 }

 {
   "edges": [
     {
+      "id": "Cloud-sourced File Listing 1 LynxScribe Image Describer 1",
+      "source": "Cloud-sourced File Listing 1",
       "sourceHandle": "output",
+      "target": "LynxScribe Image Describer 1",
       "targetHandle": "file_urls"
     },
     {
       "source": "LynxScribe Image Describer 1",
       "sourceHandle": "output",
       "target": "LynxScribe Image RAG Builder 1",
+      "targetHandle": "image_descriptions"
     },
     {
+      "id": "LynxScribe Image RAG Builder 1 LynxScribe Image RAG Query 1",
+      "source": "LynxScribe Image RAG Builder 1",
       "sourceHandle": "output",
+      "target": "LynxScribe Image RAG Query 1",
       "targetHandle": "rag_graph"
     },
     {
+      "id": "Input chat 1 LynxScribe Image RAG Query 1",
+      "source": "Input chat 1",
       "sourceHandle": "output",
       "target": "LynxScribe Image RAG Query 1",
+      "targetHandle": "text"
+    },
+    {
+      "id": "LynxScribe Image RAG Query 1 LynxScribe Image Result Viewer 1",
+      "source": "LynxScribe Image RAG Query 1",
+      "sourceHandle": "output",
+      "target": "LynxScribe Image Result Viewer 1",
+      "targetHandle": "embedding_similarities"
     }
   ],
   "env": "LynxScribe",
           "type": "basic"
         },
         "params": {
+          "chat": "show me a picture about 2 doctors"
         },
         "status": "done",
         "title": "Input chat"
       "height": 214.0,
       "id": "Input chat 1",
       "position": {
+        "x": 51.51211115780683,
+        "y": -147.75474103115954
       },
       "type": "basic",
       "width": 387.0
         "display": null,
         "error": null,
         "meta": {
+          "inputs": {},
+          "name": "Cloud-sourced File Listing",
           "outputs": {
             "output": {
               "name": "output",
             }
           },
           "params": {
+            "accepted_file_types": {
+              "default": ".jpg, .jpeg, .png",
+              "name": "accepted_file_types",
               "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            "cloud_provider": {
+              "default": "gcp",
+              "name": "cloud_provider",
+              "type": {
+                "enum": [
+                  "GCP",
+                  "AWS",
+                  "AZURE"
+                ]
+              }
+            },
+            "folder_URL": {
+              "default": "https://storage.googleapis.com/lynxkite_public_data/lynxscribe-images/image-rag-test",
+              "name": "folder_URL",
+              "type": {
+                "type": "<class 'str'>"
               }
             }
           },
+          "position": {
+            "x": 1271.0,
+            "y": 603.0
+          },
           "type": "basic"
         },
         "params": {
+          "accepted_file_types": ".jpg, .jpeg, .png",
+          "cloud_provider": "GCP",
+          "folder_URL": "https://storage.googleapis.com/lynxkite_public_data/lynxscribe-images/image-rag-test"
         },
         "status": "done",
+        "title": "Cloud-sourced File Listing"
       },
       "dragHandle": ".bg-primary",
+      "height": 308.0,
+      "id": "Cloud-sourced File Listing 1",
       "position": {
+        "x": -733.5815993327456,
+        "y": 418.3880816741662
       },
       "type": "basic",
+      "width": 613.0
     },
     {
       "data": {
+        "__execution_delay": 0.0,
+        "collapsed": null,
+        "display": null,
         "error": null,
         "meta": {
           "inputs": {
+            "file_urls": {
+              "name": "file_urls",
               "position": "left",
               "type": {
                 "type": "<class 'inspect._empty'>"
               }
             }
           },
           "name": "LynxScribe Image Describer",
           "outputs": {
             "output": {
               "name": "output",
+              "position": "right",
               "type": {
                 "type": "None"
               }
               }
             },
             "llm_prompt_path": {
+              "default": "uploads/image_description_prompts.yaml",
               "name": "llm_prompt_path",
               "type": {
                 "type": "<class 'str'>"
             }
           },
           "position": {
+            "x": 1331.0,
+            "y": 686.0
           },
           "type": "basic"
         },
         "params": {
           "llm_interface": "openai",
           "llm_prompt_name": "cot_picture_descriptor",
+          "llm_prompt_path": "uploads/image_description_prompts.yaml",
           "llm_visual_model": "gpt-4o"
         },
         "status": "done",
         "title": "LynxScribe Image Describer"
       },
       "dragHandle": ".bg-primary",
+      "height": 366.0,
       "id": "LynxScribe Image Describer 1",
       "position": {
+        "x": 94.4350838249984,
+        "y": 389.7616279503166
       },
       "type": "basic",
+      "width": 362.0
     },
     {
       "data": {
+        "__execution_delay": 0.0,
+        "collapsed": null,
         "display": null,
         "error": null,
         "meta": {
+          "inputs": {
+            "image_descriptions": {
+              "name": "image_descriptions",
+              "position": "left",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            }
+          },
+          "name": "LynxScribe Image RAG Builder",
           "outputs": {
             "output": {
               "name": "output",
+              "position": "right",
               "type": {
                 "type": "None"
               }
             }
           },
           "params": {
+            "text_embedder_interface": {
+              "default": "openai",
+              "name": "text_embedder_interface",
               "type": {
                 "type": "<class 'str'>"
               }
             },
+            "text_embedder_model_name_or_path": {
+              "default": "text-embedding-3-large",
+              "name": "text_embedder_model_name_or_path",
               "type": {
                 "type": "<class 'str'>"
               }
             },
+            "vdb_collection_name": {
+              "default": "lynx",
+              "name": "vdb_collection_name",
               "type": {
+                "type": "<class 'str'>"
               }
             },
+            "vdb_num_dimensions": {
+              "default": 3072.0,
+              "name": "vdb_num_dimensions",
               "type": {
+                "type": "<class 'int'>"
               }
             },
+            "vdb_provider_name": {
+              "default": "faiss",
+              "name": "vdb_provider_name",
               "type": {
                 "type": "<class 'str'>"
               }
             }
           },
           "position": {
+            "x": 1714.0,
+            "y": 740.0
           },
           "type": "basic"
         },
         "params": {
           "text_embedder_interface": "openai",
+          "text_embedder_model_name_or_path": "text-embedding-3-small",
+          "vdb_collection_name": "lynx",
+          "vdb_num_dimensions": "1536",
+          "vdb_provider_name": "faiss"
         },
         "status": "done",
+        "title": "LynxScribe Image RAG Builder"
       },
       "dragHandle": ".bg-primary",
+      "height": 463.0,
+      "id": "LynxScribe Image RAG Builder 1",
       "position": {
+        "x": 634.1082253159385,
+        "y": 341.7237080874875
       },
       "type": "basic",
+      "width": 309.0
     },
     {
       "data": {
         "display": null,
         "error": null,
         "meta": {
+          "inputs": {
+            "rag_graph": {
+              "name": "rag_graph",
+              "position": "bottom",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            },
+            "text": {
+              "name": "text",
+              "position": "left",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            }
+          },
+          "name": "LynxScribe Image RAG Query",
           "outputs": {
             "output": {
               "name": "output",
             }
           },
           "params": {
+            "top_k": {
+              "default": 3.0,
+              "name": "top_k",
               "type": {
+                "type": "<class 'int'>"
               }
             }
           },
           "position": {
+            "x": 1865.0,
+            "y": 363.0
           },
           "type": "basic"
         },
         "params": {
+          "top_k": "3"
         },
         "status": "done",
+        "title": "LynxScribe Image RAG Query"
       },
       "dragHandle": ".bg-primary",
+      "height": 205.0,
+      "id": "LynxScribe Image RAG Query 1",
       "position": {
+        "x": 1064.0579569918539,
+        "y": -140.79102876607624
       },
       "type": "basic",
+      "width": 263.0
     },
     {
       "data": {
+        "display": "https://storage.googleapis.com/lynxkite_public_data/lynxscribe-images/image-rag-test/surgery-1807541_1280.jpg",
         "error": null,
         "meta": {
           "inputs": {
+            "embedding_similarities": {
+              "name": "embedding_similarities",
               "position": "left",
               "type": {
                 "type": "<class 'inspect._empty'>"
               }
             }
           },
+          "name": "LynxScribe Image Result Viewer",
+          "outputs": {},
           "params": {},
           "position": {
+            "x": 2207.0,
+            "y": 327.0
           },
+          "type": "image"
         },
         "params": {},
         "status": "done",
+        "title": "LynxScribe Image Result Viewer"
       },
       "dragHandle": ".bg-primary",
+      "height": 622.0,
+      "id": "LynxScribe Image Result Viewer 1",
       "position": {
+        "x": 1550.5086064306404,
+        "y": -349.93521115271193
       },
+      "type": "image",
+      "width": 802.0
     }
   ]
 }

{lynxkite-lynxscribe/promptdb → examples/uploads}/image_description_prompts.yaml RENAMED Viewed

File without changes

lynxkite-lynxscribe/src/lynxkite_lynxscribe/lynxscribe_ops.py CHANGED Viewed

@@ -1,9 +1,11 @@
 """
 LynxScribe configuration and testing in LynxKite.
 """
 from google.cloud import storage
 from copy import deepcopy
 import asyncio
 import pandas as pd
 import joblib
@@ -44,10 +46,17 @@ op = ops.op_registration(ENV)
 output_on_top = ops.output_position(output="top")
-@op("Cloud-sourced File Loader")
 def cloud_file_loader(
     *,
-    cloud_provider: str = "gcp",
     folder_URL: str = "https://storage.googleapis.com/lynxkite_public_data/lynxscribe-images/image-rag-test",
     accepted_file_types: str = ".jpg, .jpeg, .png",
 ):
@@ -60,7 +69,7 @@ def cloud_file_loader(
     accepted_file_types = tuple([t.strip() for t in accepted_file_types.split(",")])
-    if cloud_provider == "gcp":
         client = storage.Client()
         url_useful_part = folder_URL.split(".com/")[-1]
         bucket_name = url_useful_part.split("/")[0]
@@ -118,66 +127,41 @@ def ls_rag_graph(
     return {"rag_graph": rag_graph}
-@output_on_top
 @op("LynxScribe Image Describer")
 @mem.cache
-def ls_image_describer(
     *,
     llm_interface: str = "openai",
     llm_visual_model: str = "gpt-4o",
-    llm_prompt_path: str = "../lynxkite-lynxscribe/promptdb/image_description_prompts.yaml",
     llm_prompt_name: str = "cot_picture_descriptor",
     # api_key_name: str = "OPENAI_API_KEY",
 ):
     """
-    Returns with an image describer instance.
-    TODO: adding a relative path to the prompt path + adding model kwargs
     """
     llm_params = {"name": llm_interface}
     # if api_key_name:
     #     llm_params["api_key"] = os.getenv(api_key_name)
     llm = get_llm_engine(**llm_params)
     prompt_base = load_config(llm_prompt_path)[llm_prompt_name]
-    return {
-        "image_describer": {
-            "llm": llm,
-            "prompt_base": prompt_base,
-            "model": llm_visual_model,
-        }
-    }
-@ops.input_position(image_describer="bottom", rag_graph="bottom")
-@op("LynxScribe Image RAG Builder")
-@mem.cache
-async def ls_image_rag_builder(
-    file_urls,
-    image_describer,
-    rag_graph,
-):
-    """
-    Based on an input image folder (currently only supports GCP storage),
-    the function builds up an image RAG graph, where the nodes are the
-    descriptions of the images (and of all image objects).
-    In a later phase, synthetic questions and "named entities" will also
-    be added to the graph.
-    """
-    # handling inputs
-    image_describer = image_describer[0]["image_describer"]
-    image_urls = file_urls["file_urls"]
-    rag_graph = rag_graph[0]["rag_graph"]
-    # generate prompts from inputs
     prompt_list = []
     for i in range(len(image_urls)):
         image = image_urls[i]
-        _prompt = deepcopy(image_describer["prompt_base"])
         for message in _prompt:
             if isinstance(message["content"], list):
                 for _message_part in message["content"]:
@@ -185,13 +169,14 @@ async def ls_image_rag_builder(
                         _message_part["image_url"] = {"url": image}
         prompt_list.append(_prompt)
     ch_prompt_list = [
-        ChatCompletionPrompt(model=image_describer["model"], messages=prompt)
         for prompt in prompt_list
     ]
     # get the image descriptions
-    llm = image_describer["llm"]
     tasks = [
         llm.acreate_completion(completion_prompt=_prompt) for _prompt in ch_prompt_list
     ]
@@ -201,27 +186,86 @@ async def ls_image_rag_builder(
         for result in out_completions
     ]
-    # generate combination of descriptions and embed them
-    text_embedder = rag_graph.kg_base.text_embedder
     dict_list_df = []
-    for _i, _result in enumerate(results):
-        url_res = image_urls[_i]
-        if "overall description" in _result:
             dict_list_df.append(
                 {
-                    "image_url": url_res,
-                    "description": _result["overall description"],
                     "source": "overall description",
                 }
             )
-        if "details" in _result:
-            for dkey in _result["details"].keys():
-                text = f"The picture's description is: {_result['overall description']}\n\nThe description of the {dkey} is: {_result['details'][dkey]}"
                 dict_list_df.append(
-                    {"image_url": url_res, "description": text, "source": "details"}
                 )
     pdf_descriptions = pd.DataFrame(dict_list_df)
@@ -257,7 +301,7 @@ async def ls_image_rag_builder(
 @op("LynxScribe RAG Graph Saver")
 def ls_save_rag_graph(
-    knowledge_base,
     *,
     image_rag_out_path: str = "image_test_rag_graph.pickle",
 ):
@@ -265,7 +309,10 @@ def ls_save_rag_graph(
     Saves the RAG graph to a pickle file.
     """
-    knowledge_base.kg_base.save(image_rag_out_path)
     return None
@@ -294,10 +341,12 @@ async def search_context(rag_graph, text, *, top_k=3):
     return {"embedding_similarities": result_list}
-@op("View image", view="image")
 def view_image(embedding_similarities):
     """
-    Plotting the selected image.
     """
     embedding_similarities = embedding_similarities["embedding_similarities"]
     return embedding_similarities[0]["image_url"]

 """
 LynxScribe configuration and testing in LynxKite.
+TODO: all these outputs should contain metadata. So the next task can check the input type, etc.
 """
 from google.cloud import storage
 from copy import deepcopy
+from enum import Enum
 import asyncio
 import pandas as pd
 import joblib
 output_on_top = ops.output_position(output="top")
+# defining the cloud provider enum
+class CloudProvider(Enum):
+    GCP = "gcp"
+    AWS = "aws"
+    AZURE = "azure"
+@op("Cloud-sourced File Listing")
 def cloud_file_loader(
     *,
+    cloud_provider: CloudProvider = CloudProvider.GCP,
     folder_URL: str = "https://storage.googleapis.com/lynxkite_public_data/lynxscribe-images/image-rag-test",
     accepted_file_types: str = ".jpg, .jpeg, .png",
 ):
     accepted_file_types = tuple([t.strip() for t in accepted_file_types.split(",")])
+    if cloud_provider == CloudProvider.GCP:
         client = storage.Client()
         url_useful_part = folder_URL.split(".com/")[-1]
         bucket_name = url_useful_part.split("/")[0]
     return {"rag_graph": rag_graph}
 @op("LynxScribe Image Describer")
 @mem.cache
+async def ls_image_describer(
+    file_urls,
     *,
     llm_interface: str = "openai",
     llm_visual_model: str = "gpt-4o",
+    llm_prompt_path: str = "uploads/image_description_prompts.yaml",
     llm_prompt_name: str = "cot_picture_descriptor",
     # api_key_name: str = "OPENAI_API_KEY",
 ):
     """
+    Returns with image descriptions from a list of image URLs.
+    TODO: making the inputs more flexible (e.g. accepting file locations, URLs, binaries, etc.).
+          the input dictionary should contain some meta info: e.g., what is in the list...
     """
+    # handling inputs
+    image_urls = file_urls["file_urls"]
+    # loading the LLM
     llm_params = {"name": llm_interface}
     # if api_key_name:
     #     llm_params["api_key"] = os.getenv(api_key_name)
     llm = get_llm_engine(**llm_params)
+    # preparing the prompts
     prompt_base = load_config(llm_prompt_path)[llm_prompt_name]
     prompt_list = []
     for i in range(len(image_urls)):
         image = image_urls[i]
+        _prompt = deepcopy(prompt_base)
         for message in _prompt:
             if isinstance(message["content"], list):
                 for _message_part in message["content"]:
                         _message_part["image_url"] = {"url": image}
         prompt_list.append(_prompt)
+    # creating the prompt objects
     ch_prompt_list = [
+        ChatCompletionPrompt(model=llm_visual_model, messages=prompt)
         for prompt in prompt_list
     ]
     # get the image descriptions
     tasks = [
         llm.acreate_completion(completion_prompt=_prompt) for _prompt in ch_prompt_list
     ]
         for result in out_completions
     ]
+    # getting the image descriptions (list of dictionaries {image_url: URL, description: description})
+    # TODO: some result class could be a better idea (will be developed in LynxScribe)
+    image_descriptions = [
+        {"image_url": image_urls[i], "description": results[i]}
+        for i in range(len(image_urls))
+    ]
+    return {"image_descriptions": image_descriptions}
+@op("LynxScribe Image RAG Builder")
+@mem.cache
+async def ls_image_rag_builder(
+    image_descriptions,
+    *,
+    vdb_provider_name: str = "faiss",
+    vdb_num_dimensions: int = 3072,
+    vdb_collection_name: str = "lynx",
+    text_embedder_interface: str = "openai",
+    text_embedder_model_name_or_path: str = "text-embedding-3-large",
+    # api_key_name: str = "OPENAI_API_KEY",
+):
+    """
+    Based on image descriptions, and embedding/VDB parameters,
+    the function builds up an image RAG graph, where the nodes are the
+    descriptions of the images (and of all image objects).
+    In a later phase, synthetic questions and "named entities" will also
+    be added to the graph.
+    """
+    # handling inputs
+    image_descriptions = image_descriptions["image_descriptions"]
+    # Building up the empty RAG graph
+    # a) Define LLM interface and get a text embedder
+    llm_params = {"name": text_embedder_interface}
+    # if api_key_name:
+    #     llm_params["api_key"] = os.getenv(api_key_name)
+    llm = get_llm_engine(**llm_params)
+    text_embedder = TextEmbedder(llm=llm, model=text_embedder_model_name_or_path)
+    # b) getting the vector store
+    # TODO: vdb_provider_name should be ENUM, and other parameters should appear accordingly
+    if vdb_provider_name == "chromadb":
+        vector_store = get_vector_store(
+            name=vdb_provider_name, collection_name=vdb_collection_name
+        )
+    elif vdb_provider_name == "faiss":
+        vector_store = get_vector_store(
+            name=vdb_provider_name, num_dimensions=vdb_num_dimensions
+        )
+    else:
+        raise ValueError(f"Vector store name '{vdb_provider_name}' is not supported.")
+    # c) building up the RAG graph
+    rag_graph = RAGGraph(
+        PandasKnowledgeBaseGraph(vector_store=vector_store, text_embedder=text_embedder)
+    )
     dict_list_df = []
+    for image_description_tuple in image_descriptions:
+        image_url = image_description_tuple["image_url"]
+        image_description = image_description_tuple["description"]
+        if "overall description" in image_description:
             dict_list_df.append(
                 {
+                    "image_url": image_url,
+                    "description": image_description["overall description"],
                     "source": "overall description",
                 }
             )
+        if "details" in image_description:
+            for dkey in image_description["details"].keys():
+                text = f"The picture's description is: {image_description['overall description']}\n\nThe description of the {dkey} is: {image_description['details'][dkey]}"
                 dict_list_df.append(
+                    {"image_url": image_url, "description": text, "source": "details"}
                 )
     pdf_descriptions = pd.DataFrame(dict_list_df)
 @op("LynxScribe RAG Graph Saver")
 def ls_save_rag_graph(
+    rag_graph,
     *,
     image_rag_out_path: str = "image_test_rag_graph.pickle",
 ):
     Saves the RAG graph to a pickle file.
     """
+    # reading inputs
+    rag_graph = rag_graph[0]["rag_graph"]
+    rag_graph.kg_base.save(image_rag_out_path)
     return None
     return {"embedding_similarities": result_list}
+@op("LynxScribe Image Result Viewer", view="image")
 def view_image(embedding_similarities):
     """
+    Plotting the TOP images (from embedding similarities).
+    TODO: later on, the user can scroll the images and send feedbacks
     """
     embedding_similarities = embedding_similarities["embedding_similarities"]
     return embedding_similarities[0]["image_url"]