Spaces:

lynx-analytics
/

lynxkite

Running

App Files Files Community

darabos commited on 25 days ago

Commit

e05c3b0

1 Parent(s): 6ef6dd5

Make it simpler to declare operations "slow".

Browse files

Files changed (7) hide show

lynxkite-bio/src/lynxkite_bio/nims.py +4 -10
lynxkite-bio/src/lynxkite_bio/rdkit.py +0 -2
lynxkite-core/src/lynxkite/core/ops.py +6 -3
lynxkite-graph-analytics/src/lynxkite_graph_analytics/bionemo_ops.py +5 -12
lynxkite-graph-analytics/src/lynxkite_graph_analytics/ml_ops.py +2 -6
lynxkite-graph-analytics/src/lynxkite_graph_analytics/networkx_ops.py +1 -1
lynxkite-lynxscribe/src/lynxkite_lynxscribe/lynxscribe_ops.py +7 -15

lynxkite-bio/src/lynxkite_bio/nims.py CHANGED Viewed

@@ -2,13 +2,11 @@
 from lynxkite_graph_analytics import Bundle
 from lynxkite.core import ops
-import joblib
 import httpx
 import pandas as pd
 import os
-mem = joblib.Memory(".joblib-cache")
 ENV = "LynxKite Graph Analytics"
 op = ops.op_registration(ENV)
@@ -35,8 +33,7 @@ async def query_bionemo_nim(
         raise ValueError(f"Query failed: {e}")
-@op("MSA-search")
-@mem.cache
 async def msa_search(
     bundle: Bundle,
     *,
@@ -74,8 +71,7 @@ async def msa_search(
     return bundle
-@op("Query OpenFold2")
-@mem.cache
 async def query_openfold2(
     bundle: Bundle,
     *,
@@ -135,8 +131,7 @@ def view_molecule(
     }
-@op("Query GenMol")
-@mem.cache
 async def query_genmol(
     bundle: Bundle,
     *,
@@ -166,8 +161,7 @@ async def query_genmol(
     return bundle
-@op("Query DiffDock")
-@mem.cache
 async def query_diffdock(
     proteins: Bundle,
     ligands: Bundle,

 from lynxkite_graph_analytics import Bundle
 from lynxkite.core import ops
 import httpx
 import pandas as pd
 import os
 ENV = "LynxKite Graph Analytics"
 op = ops.op_registration(ENV)
         raise ValueError(f"Query failed: {e}")
+@op("MSA-search", slow=True)
 async def msa_search(
     bundle: Bundle,
     *,
     return bundle
+@op("Query OpenFold2", slow=True)
 async def query_openfold2(
     bundle: Bundle,
     *,
     }
+@op("Query GenMol", slow=True)
 async def query_genmol(
     bundle: Bundle,
     *,
     return bundle
+@op("Query DiffDock", slow=True)
 async def query_diffdock(
     proteins: Bundle,
     ligands: Bundle,

lynxkite-bio/src/lynxkite_bio/rdkit.py CHANGED Viewed

@@ -2,7 +2,6 @@
 from lynxkite_graph_analytics import Bundle, RelationDefinition
 from lynxkite.core import ops
-import joblib
 import numpy as np
 import pandas as pd
 import rdkit.Chem
@@ -10,7 +9,6 @@ import rdkit.Chem.rdFingerprintGenerator
 import rdkit.Chem.Fingerprints.ClusterMols
 import scipy
-mem = joblib.Memory(".joblib-cache")
 ENV = "LynxKite Graph Analytics"
 op = ops.op_registration(ENV)

 from lynxkite_graph_analytics import Bundle, RelationDefinition
 from lynxkite.core import ops
 import numpy as np
 import pandas as pd
 import rdkit.Chem
 import rdkit.Chem.Fingerprints.ClusterMols
 import scipy
 ENV = "LynxKite Graph Analytics"
 op = ops.op_registration(ENV)

lynxkite-core/src/lynxkite/core/ops.py CHANGED Viewed

@@ -255,7 +255,7 @@ def op(
             func = matplotlib_to_image(func)
         if slow:
             func = mem.cache(func)
-            func = _global_slow(func)
         # Positional arguments are inputs.
         inputs = [
             Input(name=name, type=param.annotation)
@@ -385,9 +385,13 @@ def passive_op_registration(env: str):
     return functools.partial(register_passive_op, env)
-def slow(func):
     """Decorator for slow, blocking operations. Turns them into separate threads."""
     @functools.wraps(func)
     async def wrapper(*args, **kwargs):
         return await asyncio.to_thread(func, *args, **kwargs)
@@ -395,7 +399,6 @@ def slow(func):
     return wrapper
-_global_slow = slow  # For access inside op().
 CATALOGS_SNAPSHOTS: dict[str, Catalogs] = {}

             func = matplotlib_to_image(func)
         if slow:
             func = mem.cache(func)
+            func = make_async(func)
         # Positional arguments are inputs.
         inputs = [
             Input(name=name, type=param.annotation)
     return functools.partial(register_passive_op, env)
+def make_async(func):
     """Decorator for slow, blocking operations. Turns them into separate threads."""
+    if asyncio.iscoroutinefunction(func):
+        # If the function is already a coroutine, return it as is.
+        return func
     @functools.wraps(func)
     async def wrapper(*args, **kwargs):
         return await asyncio.to_thread(func, *args, **kwargs)
     return wrapper
 CATALOGS_SNAPSHOTS: dict[str, Catalogs] = {}

lynxkite-graph-analytics/src/lynxkite_graph_analytics/bionemo_ops.py CHANGED Viewed

@@ -12,7 +12,6 @@ import tarfile
 import os
 from collections import Counter
 from . import core
-import joblib
 import numpy as np
 import torch
 from pathlib import Path
@@ -40,7 +39,6 @@ from bionemo.scdl.io.single_cell_collection import SingleCellCollection
 import scanpy
-mem = joblib.Memory(".joblib-cache")
 op = ops.op_registration(core.ENV)
 DATA_PATH = Path("/workspace")
@@ -56,8 +54,7 @@ def random_seed(seed: int):
         random.setstate(state)
-@op("BioNeMo > Download CELLxGENE dataset")
-@mem.cache()
 def download_cellxgene_dataset(
     *,
     save_path: str,
@@ -99,8 +96,7 @@ def import_h5ad(*, file_path: str):
     return scanpy.read_h5ad(DATA_PATH / Path(file_path))
-@op("BioNeMo > Download model")
-@mem.cache(verbose=1)
 def download_model(*, model_name: str) -> str:
     """Downloads a model."""
     model_download_parameters = {
@@ -144,8 +140,7 @@ def download_model(*, model_name: str) -> str:
     return model_filename
-@op("BioNeMo > Infer")
-@mem.cache(verbose=1)
 def infer(dataset_path: str, model_path: str | None = None, *, results_path: str) -> str:
     """Infer on a dataset."""
     # This import is slow, so we only import it when we need it.
@@ -218,8 +213,7 @@ def plot_labels(adata):
     return options
-@op("BioNeMo > Run benchmark")
-@mem.cache(verbose=1)
 def run_benchmark(data, labels, *, use_pca: bool = False):
     """
     data - contains the single cell expression (or whatever feature) in each row.
@@ -277,8 +271,7 @@ def run_benchmark(data, labels, *, use_pca: bool = False):
     return results_out, conf_matrix
-@op("BioNeMo > Plot confusion matrix", view="visualization")
-@mem.cache(verbose=1)
 def plot_confusion_matrix(benchmark_output, labels):
     cm = benchmark_output[1]
     labels = labels.classes_

 import os
 from collections import Counter
 from . import core
 import numpy as np
 import torch
 from pathlib import Path
 import scanpy
 op = ops.op_registration(core.ENV)
 DATA_PATH = Path("/workspace")
         random.setstate(state)
+@op("BioNeMo > Download CELLxGENE dataset", slow=True)
 def download_cellxgene_dataset(
     *,
     save_path: str,
     return scanpy.read_h5ad(DATA_PATH / Path(file_path))
+@op("BioNeMo > Download model", slow=True)
 def download_model(*, model_name: str) -> str:
     """Downloads a model."""
     model_download_parameters = {
     return model_filename
+@op("BioNeMo > Infer", slow=True)
 def infer(dataset_path: str, model_path: str | None = None, *, results_path: str) -> str:
     """Infer on a dataset."""
     # This import is slow, so we only import it when we need it.
     return options
+@op("BioNeMo > Run benchmark", slow=True)
 def run_benchmark(data, labels, *, use_pca: bool = False):
     """
     data - contains the single cell expression (or whatever feature) in each row.
     return results_out, conf_matrix
+@op("BioNeMo > Plot confusion matrix", view="visualization", slow=True)
 def plot_confusion_matrix(benchmark_output, labels):
     cm = benchmark_output[1]
     labels = labels.classes_

lynxkite-graph-analytics/src/lynxkite_graph_analytics/ml_ops.py CHANGED Viewed

@@ -8,12 +8,10 @@ from lynxkite.core import workspace
 from .pytorch import pytorch_core
 from lynxkite.core import ops
 from tqdm import tqdm
-import joblib
 import pandas as pd
 import pathlib
-mem = joblib.Memory(".joblib-cache")
 op = ops.op_registration(core.ENV)
@@ -57,8 +55,7 @@ class ModelOutputMapping(pytorch_core.ModelMapping):
     pass
-@op("Train model")
-@ops.slow
 def train_model(
     bundle: core.Bundle,
     *,
@@ -82,8 +79,7 @@ def train_model(
     return bundle
-@op("Model inference")
-@ops.slow
 def model_inference(
     bundle: core.Bundle,
     *,

 from .pytorch import pytorch_core
 from lynxkite.core import ops
 from tqdm import tqdm
 import pandas as pd
 import pathlib
 op = ops.op_registration(core.ENV)
     pass
+@op("Train model", slow=True)
 def train_model(
     bundle: core.Bundle,
     *,
     return bundle
+@op("Model inference", slow=True)
 def model_inference(
     bundle: core.Bundle,
     *,

lynxkite-graph-analytics/src/lynxkite_graph_analytics/networkx_ops.py CHANGED Viewed

@@ -156,7 +156,7 @@ def wrapped(name: str, func):
         for k, v in kwargs.items():
             if v == "None":
                 kwargs[k] = None
-        res = await ops.slow(func)(*args, **kwargs)
         # Figure out what the returned value is.
         if isinstance(res, nx.Graph):
             return res

         for k, v in kwargs.items():
             if v == "None":
                 kwargs[k] = None
+        res = await ops.run_in_thread(func)(*args, **kwargs)
         # Figure out what the returned value is.
         if isinstance(res, nx.Graph):
             return res

lynxkite-lynxscribe/src/lynxkite_lynxscribe/lynxscribe_ops.py CHANGED Viewed

@@ -8,7 +8,6 @@ from copy import deepcopy
 from enum import Enum
 import asyncio
 import pandas as pd
-import joblib
 from pydantic import BaseModel, ConfigDict
 import pathlib
@@ -39,7 +38,6 @@ DEFAULT_NEGATIVE_ANSWER = "I'm sorry, but the data I've been trained on does not
 ENV = "LynxScribe"
 one_by_one.register(ENV)
-mem = joblib.Memory("joblib-cache")
 op = ops.op_registration(ENV)
 output_on_top = ops.output_position(output="top")
@@ -149,8 +147,7 @@ def cloud_file_loader(
 # @output_on_top
-# @op("LynxScribe RAG Graph Vector Store")
-# @mem.cache
 # def ls_rag_graph(
 #     *,
 #     name: str = "faiss",
@@ -187,8 +184,7 @@ def cloud_file_loader(
 #     return {"rag_graph": rag_graph}
-@op("LynxScribe Image Describer")
-@mem.cache
 async def ls_image_describer(
     file_urls,
     *,
@@ -251,8 +247,7 @@ async def ls_image_describer(
     return {"image_descriptions": image_descriptions}
-@op("LynxScribe Image RAG Builder")
-@mem.cache
 async def ls_image_rag_builder(
     image_descriptions,
     *,
@@ -407,8 +402,7 @@ def view_image(embedding_similarities):
     return embedding_similarities[0]["image_url"]
-@op("LynxScribe Text RAG Loader")
-@mem.cache
 def ls_text_rag_loader(
     file_urls,
     *,
@@ -465,8 +459,7 @@ def ls_text_rag_loader(
     return {"rag_graph": rag_graph}
-@op("LynxScribe FAQ to RAG")
-@mem.cache
 async def ls_faq_to_rag(
     *,
     faq_excel_path: str = "",
@@ -712,8 +705,7 @@ def read_excel(*, file_path: str, sheet_name: str = "Sheet1", columns: str = "")
 @ops.input_position(system_prompt="bottom", instruction_prompt="bottom", dataframe="left")
-@op("LynxScribe Task Solver")
-@mem.cache
 async def ls_task_solver(
     system_prompt,
     instruction_prompt,
@@ -814,7 +806,7 @@ def mask(*, name="", regex="", exceptions="", mask_pattern=""):
 @ops.input_position(chat_api="bottom")
-@op("Test Chat API")
 async def test_chat_api(message, chat_api, *, show_details=False):
     chat_api = chat_api[0]["chat_api"]
     request = ChatCompletionPrompt(

 from enum import Enum
 import asyncio
 import pandas as pd
 from pydantic import BaseModel, ConfigDict
 import pathlib
 ENV = "LynxScribe"
 one_by_one.register(ENV)
 op = ops.op_registration(ENV)
 output_on_top = ops.output_position(output="top")
 # @output_on_top
+# @op("LynxScribe RAG Graph Vector Store", slow=True)
 # def ls_rag_graph(
 #     *,
 #     name: str = "faiss",
 #     return {"rag_graph": rag_graph}
+@op("LynxScribe Image Describer", slow=True)
 async def ls_image_describer(
     file_urls,
     *,
     return {"image_descriptions": image_descriptions}
+@op("LynxScribe Image RAG Builder", slow=True)
 async def ls_image_rag_builder(
     image_descriptions,
     *,
     return embedding_similarities[0]["image_url"]
+@op("LynxScribe Text RAG Loader", slow=True)
 def ls_text_rag_loader(
     file_urls,
     *,
     return {"rag_graph": rag_graph}
+@op("LynxScribe FAQ to RAG", slow=True)
 async def ls_faq_to_rag(
     *,
     faq_excel_path: str = "",
 @ops.input_position(system_prompt="bottom", instruction_prompt="bottom", dataframe="left")
+@op("LynxScribe Task Solver", slow=True)
 async def ls_task_solver(
     system_prompt,
     instruction_prompt,
 @ops.input_position(chat_api="bottom")
+@op("Test Chat API", slow=True)
 async def test_chat_api(message, chat_api, *, show_details=False):
     chat_api = chat_api[0]["chat_api"]
     request = ChatCompletionPrompt(