Spaces:

lynx-analytics
/

lynxkite

Running

App Files Files Community

darabos commited on Apr 23

Commit

a4fa27a

2 Parent(s): 90b31da 23acd42

Merge remote-tracking branch 'origin/main' into darabos-folders

Browse files

Files changed (9) hide show

examples/LynxScribe Data Cleaning.lynxkite.json +536 -0
examples/LynxScribe Image Search.lynxkite.json +51 -57
examples/uploads/task_solver_examples.xlsx +0 -0
lynxkite-app/src/lynxkite_app/crdt.py +1 -0
lynxkite-core/src/lynxkite/core/executors/simple.py +64 -0
lynxkite-core/src/lynxkite/core/ops.py +1 -0
lynxkite-core/src/lynxkite/core/workspace.py +19 -0
lynxkite-lynxscribe/src/lynxkite_lynxscribe/lynxscribe_ops.py +138 -2
lynxkite-pillow-example/src/lynxkite_pillow_example/__init__.py +2 -2

examples/LynxScribe Data Cleaning.lynxkite.json ADDED Viewed

	@@ -0,0 +1,536 @@

+{
+  "edges": [
+    {
+      "id": "LynxScribe Message 3 LynxScribe Task Solver 1",
+      "source": "LynxScribe Message 3",
+      "sourceHandle": "output",
+      "target": "LynxScribe Task Solver 1",
+      "targetHandle": "system_prompt"
+    },
+    {
+      "id": "LynxScribe Message 1 LynxScribe Task Solver 1",
+      "source": "LynxScribe Message 1",
+      "sourceHandle": "output",
+      "target": "LynxScribe Task Solver 1",
+      "targetHandle": "instruction_prompt"
+    },
+    {
+      "id": "Read Excel 1 LynxScribe Task Solver 1",
+      "source": "Read Excel 1",
+      "sourceHandle": "output",
+      "target": "LynxScribe Task Solver 1",
+      "targetHandle": "dataframe"
+    },
+    {
+      "id": "LynxScribe Task Solver 1 View DataFrame 1",
+      "source": "LynxScribe Task Solver 1",
+      "sourceHandle": "output",
+      "target": "View DataFrame 1",
+      "targetHandle": "input"
+    },
+    {
+      "id": "Read Excel 1 View DataFrame 2",
+      "source": "Read Excel 1",
+      "sourceHandle": "output",
+      "target": "View DataFrame 2",
+      "targetHandle": "input"
+    }
+  ],
+  "env": "LynxScribe",
+  "nodes": [
+    {
+      "data": {
+        "__execution_delay": 0.0,
+        "collapsed": null,
+        "display": null,
+        "error": null,
+        "input_metadata": null,
+        "meta": {
+          "inputs": {},
+          "name": "LynxScribe Message",
+          "outputs": {
+            "output": {
+              "name": "output",
+              "position": "top",
+              "type": {
+                "type": "None"
+              }
+            }
+          },
+          "params": {
+            "prompt_content": {
+              "default": null,
+              "name": "prompt_content",
+              "type": {
+                "format": "textarea"
+              }
+            },
+            "prompt_role": {
+              "default": null,
+              "name": "prompt_role",
+              "type": {
+                "enum": [
+                  "SYSTEM",
+                  "USER"
+                ]
+              }
+            }
+          },
+          "position": {
+            "x": 653.0,
+            "y": 954.0
+          },
+          "type": "basic"
+        },
+        "params": {
+          "prompt_content": "You are an AI assistant designed to clean and extract structured address information from raw text.\nYour goal is to identify and extract relevant address components while ignoring any unrelated information.\nThe output must be formatted as a structured dictionary.\n\nYour task is to parse an address from raw text and return a dictionary with the following keys:\n - zip_code: The postal or ZIP code.\n - country: The country name.\n - state_or_county: The state, province, or county (if applicable).\n - city: The city or town name.\n - district: The district or borough name (if mentioned).\n - street_type: The type of public space (e.g., street, avenue, boulevard, square).\n - street_name: The name of the public space (e.g., Main, Baker, Champs-\u00c9lys\u00e9es).\n - house_number: The house or building number.\n - floor: The floor number (if mentioned).\n - flat_number: The apartment or unit number (if mentioned).\n - additional_info: Any other useful details, such as building names, or known landmarks.\n\nIf any information is missing from the input, leave the corresponding key as an empty string.\n\nYou must return only a python dictionary with the following keys:\n`zip_code`, `country`, `state_or_county`, `city`, `district`, `street_name`, \n`house_number`, `floor`, `flat_number`, `additional_info`.\n\nDo not include any extra text, comments, or explanations\u2014only return the dictionary.",
+          "prompt_role": "SYSTEM"
+        },
+        "status": "done",
+        "title": "LynxScribe Message"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 354.0,
+      "id": "LynxScribe Message 3",
+      "position": {
+        "x": 36.0,
+        "y": 569.0
+      },
+      "type": "basic",
+      "width": 740.0
+    },
+    {
+      "data": {
+        "__execution_delay": 0.0,
+        "collapsed": null,
+        "display": null,
+        "error": null,
+        "input_metadata": null,
+        "meta": {
+          "inputs": {},
+          "name": "LynxScribe Message",
+          "outputs": {
+            "output": {
+              "name": "output",
+              "position": "top",
+              "type": {
+                "type": "None"
+              }
+            }
+          },
+          "params": {
+            "prompt_content": {
+              "default": null,
+              "name": "prompt_content",
+              "type": {
+                "format": "textarea"
+              }
+            },
+            "prompt_role": {
+              "default": null,
+              "name": "prompt_role",
+              "type": {
+                "enum": [
+                  "SYSTEM",
+                  "USER"
+                ]
+              }
+            }
+          },
+          "position": {
+            "x": 1498.0,
+            "y": 660.0
+          },
+          "type": "basic"
+        },
+        "params": {
+          "prompt_content": "Extract structured address information from the following text: {message_parts}",
+          "prompt_role": "USER"
+        },
+        "status": "done",
+        "title": "LynxScribe Message"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 347.0,
+      "id": "LynxScribe Message 1",
+      "position": {
+        "x": 817.0,
+        "y": 566.0
+      },
+      "type": "basic",
+      "width": 498.0
+    },
+    {
+      "data": {
+        "__execution_delay": 0.0,
+        "collapsed": null,
+        "display": null,
+        "error": null,
+        "input_metadata": null,
+        "meta": {
+          "inputs": {},
+          "name": "Read Excel",
+          "outputs": {
+            "output": {
+              "name": "output",
+              "position": "right",
+              "type": {
+                "type": "None"
+              }
+            }
+          },
+          "params": {
+            "columns": {
+              "default": "",
+              "name": "columns",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            "file_path": {
+              "default": null,
+              "name": "file_path",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            "sheet_name": {
+              "default": "Sheet1",
+              "name": "sheet_name",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            }
+          },
+          "position": {
+            "x": 236.0,
+            "y": 150.0
+          },
+          "type": "basic"
+        },
+        "params": {
+          "columns": "",
+          "file_path": "uploads/task_solver_examples.xlsx",
+          "sheet_name": "address_example"
+        },
+        "status": "done",
+        "title": "Read Excel"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 284.0,
+      "id": "Read Excel 1",
+      "position": {
+        "x": 41.0,
+        "y": 168.0
+      },
+      "type": "basic",
+      "width": 332.0
+    },
+    {
+      "data": {
+        "__execution_delay": 0.0,
+        "collapsed": null,
+        "display": null,
+        "error": null,
+        "input_metadata": null,
+        "meta": {
+          "inputs": {
+            "dataframe": {
+              "name": "dataframe",
+              "position": "left",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            },
+            "instruction_prompt": {
+              "name": "instruction_prompt",
+              "position": "bottom",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            },
+            "system_prompt": {
+              "name": "system_prompt",
+              "position": "bottom",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            }
+          },
+          "name": "LynxScribe Task Solver",
+          "outputs": {
+            "output": {
+              "name": "output",
+              "position": "right",
+              "type": {
+                "type": "None"
+              }
+            }
+          },
+          "params": {
+            "llm_interface": {
+              "default": "openai",
+              "name": "llm_interface",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            "llm_model_name": {
+              "default": "gpt-4o",
+              "name": "llm_model_name",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            "new_column_names": {
+              "default": "processed_field",
+              "name": "new_column_names",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            }
+          },
+          "position": {
+            "x": 1511.0,
+            "y": 220.0
+          },
+          "type": "basic"
+        },
+        "params": {
+          "llm_interface": "openai",
+          "llm_model_name": "gpt-4o-mini",
+          "new_column_names": "zip_code, country, state_or_county, city, district, street_name, house_number, floor, flat_number, additional_info"
+        },
+        "status": "done",
+        "title": "LynxScribe Task Solver"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 282.0,
+      "id": "LynxScribe Task Solver 1",
+      "position": {
+        "x": 626.0,
+        "y": 170.0
+      },
+      "type": "basic",
+      "width": 272.0
+    },
+    {
+      "data": {
+        "display": {
+          "dataframes": {
+            "df": {
+              "columns": [
+                "message_parts",
+                "zip_code",
+                "country",
+                "state_or_county",
+                "city",
+                "district",
+                "street_name",
+                "house_number",
+                "floor",
+                "flat_number",
+                "additional_info"
+              ],
+              "data": [
+                [
+                  "John's old apartment: 742 Evergreen Terrace, Springfield, IL 62704, USA. Call me at +1-555-1234 or email [email protected].",
+                  "62704",
+                  "USA",
+                  "IL",
+                  "Springfield",
+                  "",
+                  "Evergreen Terrace",
+                  "742",
+                  "",
+                  "",
+                  "John's old apartment"
+                ],
+                [
+                  "Visit our office at 56B Baker Street, Marylebone, London W1U 8ED, UK. (Nearest Tube: Baker Street). Contact: [email protected].",
+                  "W1U 8ED",
+                  "UK",
+                  "",
+                  "London",
+                  "Marylebone",
+                  "Baker",
+                  "56B",
+                  "",
+                  "",
+                  ""
+                ],
+                [
+                  "New residence: 300, 5th Avenue, New York, NY 10001, USA. Floor 12, Apt 1204. My new phone is (212) 555-6789.",
+                  "10001",
+                  "USA",
+                  "NY",
+                  "New York",
+                  "",
+                  "5th Avenue",
+                  "300",
+                  "12",
+                  "1204",
+                  ""
+                ],
+                [
+                  "We just moved to 23 rue de la Paix, 75002 Paris, France. Floor 3, Flat 5. Send mail to my old address instead.",
+                  "75002",
+                  "France",
+                  "",
+                  "Paris",
+                  "",
+                  "rue de la Paix",
+                  "23",
+                  "3",
+                  "5",
+                  ""
+                ],
+                [
+                  "Warehouse location: 1024 Industrial Blvd, Houston, TX 77002, USA. Not open on weekends. Customer support: [email protected].",
+                  "77002",
+                  "USA",
+                  "TX",
+                  "Houston",
+                  "",
+                  "Industrial Blvd",
+                  "1024",
+                  "",
+                  "",
+                  "Warehouse location"
+                ],
+                [
+                  "My grandma lives at 2F, 15-3 Shinjuku, Tokyo 160-0022, Japan. Don't use my old phone number anymore!",
+                  "160-0022",
+                  "Japan",
+                  "",
+                  "Tokyo",
+                  "Shinjuku",
+                  "Shinjuku",
+                  "15-3",
+                  "2F",
+                  "",
+                  ""
+                ],
+                [
+                  "Delivery address: Apt 9, 88 Queen's Road Central, Central, Hong Kong. Landmark: Opposite IFC Mall.",
+                  "",
+                  "Hong Kong",
+                  "",
+                  "Central",
+                  "",
+                  "Queen's Road Central",
+                  "88",
+                  "",
+                  "9",
+                  "Opposite IFC Mall"
+                ]
+              ]
+            }
+          }
+        },
+        "error": null,
+        "input_metadata": null,
+        "meta": {
+          "inputs": {
+            "input": {
+              "name": "input",
+              "position": "bottom",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            }
+          },
+          "name": "View DataFrame",
+          "outputs": {},
+          "params": {},
+          "position": {
+            "x": 1719.0,
+            "y": 332.0
+          },
+          "type": "table_view"
+        },
+        "params": {},
+        "status": "done",
+        "title": "View DataFrame"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 316.0,
+      "id": "View DataFrame 1",
+      "position": {
+        "x": 1139.0,
+        "y": 91.0
+      },
+      "type": "table_view",
+      "width": 1118.0
+    },
+    {
+      "data": {
+        "display": {
+          "dataframes": {
+            "df": {
+              "columns": [
+                "message_parts"
+              ],
+              "data": [
+                [
+                  "John's old apartment: 742 Evergreen Terrace, Springfield, IL 62704, USA. Call me at +1-555-1234 or email [email protected]."
+                ],
+                [
+                  "Visit our office at 56B Baker Street, Marylebone, London W1U 8ED, UK. (Nearest Tube: Baker Street). Contact: [email protected]."
+                ],
+                [
+                  "New residence: 300, 5th Avenue, New York, NY 10001, USA. Floor 12, Apt 1204. My new phone is (212) 555-6789."
+                ],
+                [
+                  "We just moved to 23 rue de la Paix, 75002 Paris, France. Floor 3, Flat 5. Send mail to my old address instead."
+                ],
+                [
+                  "Warehouse location: 1024 Industrial Blvd, Houston, TX 77002, USA. Not open on weekends. Customer support: [email protected]."
+                ],
+                [
+                  "My grandma lives at 2F, 15-3 Shinjuku, Tokyo 160-0022, Japan. Don't use my old phone number anymore!"
+                ],
+                [
+                  "Delivery address: Apt 9, 88 Queen's Road Central, Central, Hong Kong. Landmark: Opposite IFC Mall."
+                ]
+              ]
+            }
+          }
+        },
+        "error": null,
+        "input_metadata": null,
+        "meta": {
+          "inputs": {
+            "input": {
+              "name": "input",
+              "position": "bottom",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            }
+          },
+          "name": "View DataFrame",
+          "outputs": {},
+          "params": {},
+          "position": {
+            "x": 1083.0,
+            "y": 134.0
+          },
+          "type": "table_view"
+        },
+        "params": {},
+        "status": "done",
+        "title": "View DataFrame"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 200.0,
+      "id": "View DataFrame 2",
+      "position": {
+        "x": 515.0,
+        "y": -135.125
+      },
+      "type": "table_view",
+      "width": 200.0
+    }
+  ]
+}

examples/LynxScribe Image Search.lynxkite.json CHANGED Viewed

@@ -14,6 +14,13 @@
       "target": "LynxScribe Image RAG Builder 1",
       "targetHandle": "image_descriptions"
     },
     {
       "id": "Input chat 1 LynxScribe Image RAG Query 1",
       "source": "Input chat 1",
@@ -27,13 +34,6 @@
       "sourceHandle": "output",
       "target": "LynxScribe Image RAG Query 1",
       "targetHandle": "rag_graph"
-    },
-    {
-      "id": "LynxScribe Image RAG Query 1 LynxScribe Image Result Viewer 1",
-      "source": "LynxScribe Image RAG Query 1",
-      "sourceHandle": "output",
-      "target": "LynxScribe Image Result Viewer 1",
-      "targetHandle": "embedding_similarities"
     }
   ],
   "env": "LynxScribe",
@@ -292,14 +292,10 @@
               }
             }
           },
-          "position": {
-            "x": 1260.0,
-            "y": 166.0
-          },
           "type": "basic"
         },
         "params": {
-          "chat": "Show me a picture about doctors and patients!"
         },
         "status": "done",
         "title": "Input chat"
@@ -316,6 +312,42 @@
     },
     {
       "data": {
         "display": null,
         "error": null,
         "input_metadata": null,
@@ -356,64 +388,26 @@
             }
           },
           "position": {
-            "x": 1987.0,
-            "y": 365.0
           },
           "type": "basic"
         },
         "params": {
-          "top_k": 3.0
         },
         "status": "done",
         "title": "LynxScribe Image RAG Query"
       },
       "dragHandle": ".bg-primary",
-      "height": 207.0,
       "id": "LynxScribe Image RAG Query 1",
       "position": {
-        "x": 1160.0,
-        "y": -40.0
       },
       "type": "basic",
-      "width": 283.0
-    },
-    {
-      "data": {
-        "display": "https://storage.googleapis.com/lynxkite_public_data/lynxscribe-images/image-rag-test/bethesda-naval-medical-center-80380_1280.jpg",
-        "error": null,
-        "input_metadata": null,
-        "meta": {
-          "inputs": {
-            "embedding_similarities": {
-              "name": "embedding_similarities",
-              "position": "left",
-              "type": {
-                "type": "<class 'inspect._empty'>"
-              }
-            }
-          },
-          "name": "LynxScribe Image Result Viewer",
-          "outputs": {},
-          "params": {},
-          "position": {
-            "x": 2326.0,
-            "y": 319.0
-          },
-          "type": "image"
-        },
-        "params": {},
-        "status": "done",
-        "title": "LynxScribe Image Result Viewer"
-      },
-      "dragHandle": ".bg-primary",
-      "height": 515.0,
-      "id": "LynxScribe Image Result Viewer 1",
-      "position": {
-        "x": 1657.0,
-        "y": -193.0
-      },
-      "type": "image",
-      "width": 707.0
     }
   ]
 }

       "target": "LynxScribe Image RAG Builder 1",
       "targetHandle": "image_descriptions"
     },
+    {
+      "id": "LynxScribe Image RAG Query 1 LynxScribe Image Result Viewer 1",
+      "source": "LynxScribe Image RAG Query 1",
+      "sourceHandle": "output",
+      "target": "LynxScribe Image Result Viewer 1",
+      "targetHandle": "embedding_similarities"
+    },
     {
       "id": "Input chat 1 LynxScribe Image RAG Query 1",
       "source": "Input chat 1",
       "sourceHandle": "output",
       "target": "LynxScribe Image RAG Query 1",
       "targetHandle": "rag_graph"
     }
   ],
   "env": "LynxScribe",
               }
             }
           },
           "type": "basic"
         },
         "params": {
+          "chat": "Show me a picture about healthy lifestyle"
         },
         "status": "done",
         "title": "Input chat"
     },
     {
       "data": {
+        "display": "https://storage.googleapis.com/lynxkite_public_data/lynxscribe-images/image-rag-test/food-405521_1280.jpg",
+        "error": null,
+        "input_metadata": null,
+        "meta": {
+          "inputs": {
+            "embedding_similarities": {
+              "name": "embedding_similarities",
+              "position": "left",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            }
+          },
+          "name": "LynxScribe Image Result Viewer",
+          "outputs": {},
+          "params": {},
+          "type": "image"
+        },
+        "params": {},
+        "status": "done",
+        "title": "LynxScribe Image Result Viewer"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 1008.0,
+      "id": "LynxScribe Image Result Viewer 1",
+      "position": {
+        "x": 1674.3708499095837,
+        "y": -254.88365280289335
+      },
+      "type": "image",
+      "width": 677.0
+    },
+    {
+      "data": {
+        "__execution_delay": 0.0,
+        "collapsed": null,
         "display": null,
         "error": null,
         "input_metadata": null,
             }
           },
           "position": {
+            "x": 1611.0,
+            "y": 353.0
           },
           "type": "basic"
         },
         "params": {
+          "top_k": "1"
         },
         "status": "done",
         "title": "LynxScribe Image RAG Query"
       },
       "dragHandle": ".bg-primary",
+      "height": 212.0,
       "id": "LynxScribe Image RAG Query 1",
       "position": {
+        "x": 1106.0332007233271,
+        "y": -44.51280289330922
       },
       "type": "basic",
+      "width": 281.0
     }
   ]
 }

examples/uploads/task_solver_examples.xlsx ADDED Viewed

Binary file (11.3 kB). View file

lynxkite-app/src/lynxkite_app/crdt.py CHANGED Viewed

@@ -273,6 +273,7 @@ async def execute(name: str, ws_crdt: pycrdt.Map, ws_pyd: workspace.Workspace, d
             nc["data"]["status"] = "planned"
             # Nodes get a reference to their CRDT maps, so they can update them as the results come in.
             np._crdt = nc
     await workspace.execute(ws_pyd)
     workspace.save(ws_pyd, path)
     print(f"Finished running {name} in {ws_pyd.env}.")

             nc["data"]["status"] = "planned"
             # Nodes get a reference to their CRDT maps, so they can update them as the results come in.
             np._crdt = nc
+    ws_pyd = ws_pyd.normalize()
     await workspace.execute(ws_pyd)
     workspace.save(ws_pyd, path)
     print(f"Finished running {name} in {ws_pyd.env}.")

lynxkite-core/src/lynxkite/core/executors/simple.py ADDED Viewed

	@@ -0,0 +1,64 @@

+"""A LynxKite executor that simply passes the output of one box to the other."""
+import os
+from .. import ops
+from .. import workspace
+import traceback
+import inspect
+import graphlib
+def register(env: str):
+    """Registers the one-by-one executor."""
+    ops.EXECUTORS[env] = lambda ws: execute(ws, ops.CATALOGS[env])
+async def await_if_needed(obj):
+    if inspect.isawaitable(obj):
+        return await obj
+    return obj
+async def execute(ws: workspace.Workspace, catalog: ops.Catalog):
+    nodes = {n.id: n for n in ws.nodes}
+    dependencies = {n: [] for n in nodes}
+    in_edges = {n: {} for n in nodes}
+    for e in ws.edges:
+        dependencies[e.target].append(e.source)
+        assert e.targetHandle not in in_edges[e.target], f"Duplicate input for {e.target}"
+        in_edges[e.target][e.targetHandle] = e.source, e.sourceHandle
+    outputs = {}
+    ts = graphlib.TopologicalSorter(dependencies)
+    for node_id in ts.static_order():
+        node = nodes[node_id]
+        op = catalog[node.data.title]
+        params = {**node.data.params}
+        node.publish_started()
+        try:
+            inputs = []
+            missing = []
+            for i in op.inputs.values():
+                edges = in_edges[node_id]
+                if i.name in edges and edges[i.name] in outputs:
+                    inputs.append(outputs[edges[i.name]])
+                else:
+                    missing.append(i.name)
+            if missing:
+                node.publish_error(f"Missing input: {', '.join(missing)}")
+                continue
+            result = op(*inputs, **params)
+            result.output = await await_if_needed(result.output)
+            result.display = await await_if_needed(result.display)
+            if len(op.outputs) == 1:
+                [output] = list(op.outputs.values())
+                outputs[node_id, output.name] = result.output
+            elif len(op.outputs) > 1:
+                assert type(result.output) is dict, "An op with multiple outputs must return a dict"
+                for output in op.outputs.values():
+                    outputs[node_id, output.name] = result.output[output.name]
+            node.publish_result(result)
+        except Exception as e:
+            if not os.environ.get("LYNXKITE_SUPPRESS_OP_ERRORS"):
+                traceback.print_exc()
+            node.publish_error(e)
+    return outputs

lynxkite-core/src/lynxkite/core/ops.py CHANGED Viewed

@@ -144,6 +144,7 @@ def _param_to_type(name, value, type):
         assert value != "", f"{name} is unset."
         return float(value)
     if isinstance(type, enum.EnumMeta):
         return type[value]
     if isinstance(type, types.UnionType):
         match type.__args__:

         assert value != "", f"{name} is unset."
         return float(value)
     if isinstance(type, enum.EnumMeta):
+        assert value in type.__members__, f"{value} is not an option for {name}."
         return type[value]
     if isinstance(type, types.UnionType):
         match type.__args__:

lynxkite-core/src/lynxkite/core/workspace.py CHANGED Viewed

@@ -97,6 +97,25 @@ class Workspace(BaseConfig):
     edges: list[WorkspaceEdge] = dataclasses.field(default_factory=list)
     _crdt: pycrdt.Map
 async def execute(ws: Workspace):
     if ws.env in ops.EXECUTORS:

     edges: list[WorkspaceEdge] = dataclasses.field(default_factory=list)
     _crdt: pycrdt.Map
+    def normalize(self):
+        if self.env not in ops.CATALOGS:
+            return self
+        catalog = ops.CATALOGS[self.env]
+        _ops = {n.id: catalog[n.data.title] for n in self.nodes if n.data.title in catalog}
+        valid_targets = set(
+            (n.id, h) for n in self.nodes for h in _ops[n.id].inputs if n.id in _ops
+        )
+        valid_sources = set(
+            (n.id, h) for n in self.nodes for h in _ops[n.id].outputs if n.id in _ops
+        )
+        edges = [
+            edge
+            for edge in self.edges
+            if (edge.source, edge.sourceHandle) in valid_sources
+            and (edge.target, edge.targetHandle) in valid_targets
+        ]
+        return self.model_copy(update={"edges": edges})
 async def execute(ws: Workspace):
     if ws.env in ops.EXECUTORS:

lynxkite-lynxscribe/src/lynxkite_lynxscribe/lynxscribe_ops.py CHANGED Viewed

@@ -28,7 +28,7 @@ from lynxscribe.components.chat.processors import (
     TruncateHistory,
 )
 from lynxscribe.components.chat.api import ChatAPI
-from lynxscribe.core.models.prompts import ChatCompletionPrompt
 from lynxscribe.components.rag.loaders import FAQTemplateLoader
 from lynxkite.core import ops
@@ -56,6 +56,11 @@ class RAGVersion(Enum):
     V2 = "v2"
 class RAGTemplate(BaseModel):
     """
     Model for RAG templates consisting of three tables: they are connected via scenario names.
@@ -365,6 +370,10 @@ def ls_save_rag_graph(
 @ops.input_position(rag_graph="bottom")
 @op("LynxScribe Image RAG Query")
 async def search_context(rag_graph, text, *, top_k=3):
     message = text["text"]
     rag_graph = rag_graph[0]["rag_graph"]
@@ -382,7 +391,9 @@ async def search_context(rag_graph, text, *, top_k=3):
         description = emb_sim.embedding.document
         result_list.append({"image_url": image_url, "score": score, "description": description})
-    return {"embedding_similarities": result_list}
 @op("LynxScribe Image Result Viewer", view="image")
@@ -672,6 +683,116 @@ def chat_processor(processor, *, _ctx: one_by_one.Context):
     return {"chat_processor": chat_processor, **cfg}
 @output_on_top
 @op("Truncate history")
 def truncate_history(*, max_tokens=10000):
@@ -718,6 +839,21 @@ def input_chat(*, chat: str):
     return {"text": chat}
 @op("View", view="table_view")
 def view(input):
     columns = [str(c) for c in input.keys() if not str(c).startswith("_")]

     TruncateHistory,
 )
 from lynxscribe.components.chat.api import ChatAPI
+from lynxscribe.core.models.prompts import ChatCompletionPrompt, Message
 from lynxscribe.components.rag.loaders import FAQTemplateLoader
 from lynxkite.core import ops
     V2 = "v2"
+class MessageRole(Enum):
+    SYSTEM = "system"
+    USER = "user"
 class RAGTemplate(BaseModel):
     """
     Model for RAG templates consisting of three tables: they are connected via scenario names.
 @ops.input_position(rag_graph="bottom")
 @op("LynxScribe Image RAG Query")
 async def search_context(rag_graph, text, *, top_k=3):
+    """
+    top_k: which results we are showing (TODO: when the image viewer is
+    updated w pager, change back to top k)
+    """
     message = text["text"]
     rag_graph = rag_graph[0]["rag_graph"]
         description = emb_sim.embedding.document
         result_list.append({"image_url": image_url, "score": score, "description": description})
+    real_k = min(top_k, len(result_list) - 1)
+    return {"embedding_similarities": [result_list[real_k]]}
 @op("LynxScribe Image Result Viewer", view="image")
     return {"chat_processor": chat_processor, **cfg}
+@output_on_top
+@op("LynxScribe Message")
+def lynxscribe_message(
+    *, prompt_role: MessageRole = MessageRole.SYSTEM, prompt_content: ops.LongStr
+):
+    return_message = Message(role=prompt_role.value, content=prompt_content.strip())
+    return {"prompt_message": return_message}
+@op("Read Excel")
+def read_excel(*, file_path: str, sheet_name: str = "Sheet1", columns: str = ""):
+    """
+    Reads an Excel file and returns the content of the specified sheet.
+    The columns parameter can be used to specify which columns to include in the output.
+    If not specified, all columns will be included (separate the values by comma).
+    TODO: more general: several input/output versions.
+    """
+    df = pd.read_excel(file_path, sheet_name=sheet_name)
+    if columns:
+        columns = [c.strip() for c in columns.split(",") if c.strip()]
+        columns = [c for c in columns if c in df.columns]
+        if len(columns) == 0:
+            raise ValueError("No valid columns specified.")
+        df = df[columns].copy()
+    return {"dataframe": df}
+@ops.input_position(system_prompt="bottom", instruction_prompt="bottom", dataframe="left")
+@op("LynxScribe Task Solver")
+@mem.cache
+async def ls_task_solver(
+    system_prompt,
+    instruction_prompt,
+    dataframe,
+    *,
+    llm_interface: str = "openai",
+    llm_model_name: str = "gpt-4o",
+    new_column_names: str = "processed_field",
+    # api_key_name: str = "OPENAI_API_KEY",
+):
+    """
+    Solving the described task on a data frame and put the results into a new column.
+    If there are multiple new_column_names provided, the structured dictionary output
+    will be split into multiple columns.
+    """
+    # handling inputs
+    system_message = system_prompt[0]["prompt_message"]
+    instruction_message = instruction_prompt[0]["prompt_message"]
+    df = dataframe["dataframe"]
+    # preparing output
+    out_df = df.copy()
+    # connecting to the LLM
+    llm_params = {"name": llm_interface}
+    # if api_key_name:
+    #     llm_params["api_key"] = os.getenv(api_key_name)
+    llm = get_llm_engine(**llm_params)
+    # getting the list of fieldnames used in the instruction message
+    fieldnames = []
+    for pot_fieldname in df.columns:
+        if "{" + pot_fieldname + "}" in instruction_message.content:
+            fieldnames.append(pot_fieldname)
+    # generate a list of instruction messages (from fieldnames)
+    # each row of the df is a separate instruction message
+    # TODO: make it fast for large dataframes
+    instruction_messages = []
+    for i in range(len(df)):
+        instruction_message_i = deepcopy(instruction_message)
+        for fieldname in fieldnames:
+            instruction_message_i.content = instruction_message_i.content.replace(
+                "{" + fieldname + "}", str(df.iloc[i][fieldname])
+            )
+        instruction_messages.append(instruction_message_i)
+    # generate completition prompt
+    completion_prompts = [
+        ChatCompletionPrompt(
+            model=llm_model_name,
+            messages=[system_message, instruction_message_j],
+        )
+        for instruction_message_j in instruction_messages
+    ]
+    # get the answers
+    tasks = [llm.acreate_completion(completion_prompt=_prompt) for _prompt in completion_prompts]
+    out_completions = await asyncio.gather(*tasks)
+    # answer post-processing: 1 vs more columns
+    col_list = [_c.strip() for _c in new_column_names.split(",") if _c.strip()]
+    if len(col_list) == 0:
+        raise ValueError("No valid column names specified.")
+    elif len(col_list) == 1:
+        out_df[col_list[0]] = [result.choices[0].message.content for result in out_completions]
+    else:
+        answers = [
+            dictionary_corrector(result.choices[0].message.content, expected_keys=col_list)
+            for result in out_completions
+        ]
+        for i, col in enumerate(col_list):
+            out_df[col] = [answer[col] for answer in answers]
+    return {"dataframe": out_df}
 @output_on_top
 @op("Truncate history")
 def truncate_history(*, max_tokens=10000):
     return {"text": chat}
+@ops.input_position(input="bottom")
+@op("View DataFrame", view="table_view")
+def view_df(input):
+    df = input[0]["dataframe"]
+    v = {
+        "dataframes": {
+            "df": {
+                "columns": [str(c) for c in df.columns],
+                "data": df.values.tolist(),
+            }
+        }
+    }
+    return v
 @op("View", view="table_view")
 def view(input):
     columns = [str(c) for c in input.keys() if not str(c).startswith("_")]

lynxkite-pillow-example/src/lynxkite_pillow_example/__init__.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """Demo for how easily we can provide a UI for popular open-source tools."""
 from lynxkite.core import ops
-from lynxkite.core.executors import one_by_one
 from PIL import Image, ImageFilter
 import base64
 import fsspec
@@ -9,7 +9,7 @@ import io
 ENV = "Pillow"
 op = ops.op_registration(ENV)
-one_by_one.register(ENV, cache=False)
 @op("Open image")

 """Demo for how easily we can provide a UI for popular open-source tools."""
 from lynxkite.core import ops
+from lynxkite.core.executors import simple
 from PIL import Image, ImageFilter
 import base64
 import fsspec
 ENV = "Pillow"
 op = ops.op_registration(ENV)
+simple.register(ENV)
 @op("Open image")