nbconvert

Sleeping

App Files Files Community

davanstrien HF Staff commited on Mar 10, 2023

Commit

8b5c9f7

1 Parent(s): e3344eb

update'

Browse files

Files changed (4) hide show

.gitmodules +3 -0
app.py +63 -6
nbconvert +1 -0
requirements.txt +2 -1

.gitmodules ADDED Viewed

	@@ -0,0 +1,3 @@

+[submodule "nbconvert"]
+	path = nbconvert
+	url = https://github.com/jupyter/nbconvert

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
-from typing import Literal
 import httpx
 import nbformat
 from nbformat import NotebookNode, ValidationError
@@ -8,9 +9,39 @@ from starlette.exceptions import HTTPException
 from starlette.responses import FileResponse, JSONResponse, HTMLResponse
 from starlette.requests import Request
 from starlette.routing import Route
 client = httpx.AsyncClient()
-html_exporter = HTMLExporter(template_name="lab")
 async def homepage(_):
@@ -21,7 +52,9 @@ async def healthz(_):
     return JSONResponse({"success": True})
-def convert(s: str, theme: Literal["light", "dark"], debug_info: str) -> str:
     # Capture potential validation error:
     try:
         notebook_node: NotebookNode = nbformat.reads(
@@ -43,10 +76,24 @@ def convert(s: str, theme: Literal["light", "dark"], debug_info: str) -> str:
     print(f"Input: nbformat v{notebook_node.nbformat}.{notebook_node.nbformat_minor}")
     html_exporter.theme = theme
-    body, _ = html_exporter.from_notebook_node(notebook_node)
     # TODO(customize or simplify template?)
     # TODO(also check source code for jupyter/nbviewer)
-    return body
 async def convert_from_url(req: Request):
@@ -66,7 +113,17 @@ async def convert_from_url(req: Request):
             400, f"Got an error {r.status_code} when fetching remote file"
         )
-    return HTMLResponse(content=convert(r.text, theme=theme, debug_info=f"url={url}"))
 async def convert_from_upload(req: Request):

+import contextlib
+from typing import Literal, Tuple, Dict, List
 import httpx
 import nbformat
 from nbformat import NotebookNode, ValidationError
 from starlette.responses import FileResponse, JSONResponse, HTMLResponse
 from starlette.requests import Request
 from starlette.routing import Route
+from nbconvert.preprocessors import Preprocessor
+import re
+from traitlets.config import Config
+from huggingface_hub import model_info, dataset_info
+from huggingface_hub.utils import RepositoryNotFoundError
+hub_id_regex = re.compile(r"[^\w]([a-zA-Z\d-]{3,32}\/[\w\-._]{3,64})[^\w/]")
+class HubIDCell(Preprocessor):
+    def preprocess_cell(self, cell, resources, index):
+        if cell["cell_type"] == "code":
+            resources.setdefault("dataset_matches", [])
+            resources.setdefault("model_matches", [])
+            match = re.search(hub_id_regex, cell["source"])
+            if match:
+                hub_id_match = match.groups(0)[0]
+                print(hub_id_match)
+                try:
+                    model = model_info(hub_id_match)
+                    resources["model_matches"].append(model.modelId)
+                except RepositoryNotFoundError:
+                    with contextlib.suppress(RepositoryNotFoundError):
+                        dataset = dataset_info(hub_id_match)
+                        resources["dataset_matches"].append(dataset.id)
+        return cell, resources
+c = Config()
+c.HTMLExporter.preprocessors = [HubIDCell]
 client = httpx.AsyncClient()
+html_exporter = HTMLExporter(config=c)
 async def homepage(_):
     return JSONResponse({"success": True})
+def convert(
+    s: str, theme: Literal["light", "dark"], debug_info: str
+) -> Tuple[str, List[str], List[str]]:
     # Capture potential validation error:
     try:
         notebook_node: NotebookNode = nbformat.reads(
     print(f"Input: nbformat v{notebook_node.nbformat}.{notebook_node.nbformat_minor}")
     html_exporter.theme = theme
+    body, metadata = html_exporter.from_notebook_node(notebook_node)
+    metadata = dict(metadata)
+    model_matches = metadata["model_matches"]
+    dataset_matches = metadata["dataset_matches"]
     # TODO(customize or simplify template?)
     # TODO(also check source code for jupyter/nbviewer)
+    for model_match in model_matches:
+        print(f"updating {model_match}")
+        body = body.replace(
+            model_match,
+            f"""<a href="https://huggingface.co/{model_match}">{model_match} </a>""",
+        )
+    for dataset_match in dataset_matches:
+        body = body.replace(
+            dataset_match,
+            f"""<a href="https://huggingface.co/dataset/{dataset_match}">{dataset_match} </a>""",
+        )
+    return body, metadata["model_matches"], metadata["dataset_matches"]
 async def convert_from_url(req: Request):
             400, f"Got an error {r.status_code} when fetching remote file"
         )
+    # return HTMLResponse(content=convert(r.text, theme=theme, debug_info=f"url={url}"))
+    html_text, model_matches, dataset_matches = convert(
+        r.text, theme=theme, debug_info=f"url={url}"
+    )
+    return JSONResponse(
+        content={
+            "html": html_text,
+            "model_matches": model_matches,
+            "dataset_matches": dataset_matches,
+        }
+    )
 async def convert_from_upload(req: Request):

nbconvert ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit acf41acf6d83cb725f3a2c48686c828eff7b24d8

requirements.txt CHANGED Viewed

@@ -1,5 +1,6 @@
 httpx==0.23.3
 starlette==0.23.1
-ipython==8.8.0
 nbconvert==7.2.8
 uvicorn==0.20.0

 httpx==0.23.3
 starlette==0.23.1
+ipython==8.10.0
 nbconvert==7.2.8
 uvicorn==0.20.0
+huggingface_hub