davanstrien HF Staff commited on
Commit
8b5c9f7
·
1 Parent(s): e3344eb
Files changed (4) hide show
  1. .gitmodules +3 -0
  2. app.py +63 -6
  3. nbconvert +1 -0
  4. requirements.txt +2 -1
.gitmodules ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [submodule "nbconvert"]
2
+ path = nbconvert
3
+ url = https://github.com/jupyter/nbconvert
app.py CHANGED
@@ -1,4 +1,5 @@
1
- from typing import Literal
 
2
  import httpx
3
  import nbformat
4
  from nbformat import NotebookNode, ValidationError
@@ -8,9 +9,39 @@ from starlette.exceptions import HTTPException
8
  from starlette.responses import FileResponse, JSONResponse, HTMLResponse
9
  from starlette.requests import Request
10
  from starlette.routing import Route
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  client = httpx.AsyncClient()
13
- html_exporter = HTMLExporter(template_name="lab")
14
 
15
 
16
  async def homepage(_):
@@ -21,7 +52,9 @@ async def healthz(_):
21
  return JSONResponse({"success": True})
22
 
23
 
24
- def convert(s: str, theme: Literal["light", "dark"], debug_info: str) -> str:
 
 
25
  # Capture potential validation error:
26
  try:
27
  notebook_node: NotebookNode = nbformat.reads(
@@ -43,10 +76,24 @@ def convert(s: str, theme: Literal["light", "dark"], debug_info: str) -> str:
43
 
44
  print(f"Input: nbformat v{notebook_node.nbformat}.{notebook_node.nbformat_minor}")
45
  html_exporter.theme = theme
46
- body, _ = html_exporter.from_notebook_node(notebook_node)
 
 
 
47
  # TODO(customize or simplify template?)
48
  # TODO(also check source code for jupyter/nbviewer)
49
- return body
 
 
 
 
 
 
 
 
 
 
 
50
 
51
 
52
  async def convert_from_url(req: Request):
@@ -66,7 +113,17 @@ async def convert_from_url(req: Request):
66
  400, f"Got an error {r.status_code} when fetching remote file"
67
  )
68
 
69
- return HTMLResponse(content=convert(r.text, theme=theme, debug_info=f"url={url}"))
 
 
 
 
 
 
 
 
 
 
70
 
71
 
72
  async def convert_from_upload(req: Request):
 
1
+ import contextlib
2
+ from typing import Literal, Tuple, Dict, List
3
  import httpx
4
  import nbformat
5
  from nbformat import NotebookNode, ValidationError
 
9
  from starlette.responses import FileResponse, JSONResponse, HTMLResponse
10
  from starlette.requests import Request
11
  from starlette.routing import Route
12
+ from nbconvert.preprocessors import Preprocessor
13
+ import re
14
+ from traitlets.config import Config
15
+ from huggingface_hub import model_info, dataset_info
16
+ from huggingface_hub.utils import RepositoryNotFoundError
17
+
18
+ hub_id_regex = re.compile(r"[^\w]([a-zA-Z\d-]{3,32}\/[\w\-._]{3,64})[^\w/]")
19
+
20
+
21
+ class HubIDCell(Preprocessor):
22
+ def preprocess_cell(self, cell, resources, index):
23
+ if cell["cell_type"] == "code":
24
+ resources.setdefault("dataset_matches", [])
25
+ resources.setdefault("model_matches", [])
26
+ match = re.search(hub_id_regex, cell["source"])
27
+ if match:
28
+ hub_id_match = match.groups(0)[0]
29
+ print(hub_id_match)
30
+ try:
31
+ model = model_info(hub_id_match)
32
+ resources["model_matches"].append(model.modelId)
33
+ except RepositoryNotFoundError:
34
+ with contextlib.suppress(RepositoryNotFoundError):
35
+ dataset = dataset_info(hub_id_match)
36
+ resources["dataset_matches"].append(dataset.id)
37
+ return cell, resources
38
+
39
+
40
+ c = Config()
41
+ c.HTMLExporter.preprocessors = [HubIDCell]
42
 
43
  client = httpx.AsyncClient()
44
+ html_exporter = HTMLExporter(config=c)
45
 
46
 
47
  async def homepage(_):
 
52
  return JSONResponse({"success": True})
53
 
54
 
55
+ def convert(
56
+ s: str, theme: Literal["light", "dark"], debug_info: str
57
+ ) -> Tuple[str, List[str], List[str]]:
58
  # Capture potential validation error:
59
  try:
60
  notebook_node: NotebookNode = nbformat.reads(
 
76
 
77
  print(f"Input: nbformat v{notebook_node.nbformat}.{notebook_node.nbformat_minor}")
78
  html_exporter.theme = theme
79
+ body, metadata = html_exporter.from_notebook_node(notebook_node)
80
+ metadata = dict(metadata)
81
+ model_matches = metadata["model_matches"]
82
+ dataset_matches = metadata["dataset_matches"]
83
  # TODO(customize or simplify template?)
84
  # TODO(also check source code for jupyter/nbviewer)
85
+ for model_match in model_matches:
86
+ print(f"updating {model_match}")
87
+ body = body.replace(
88
+ model_match,
89
+ f"""<a href="https://huggingface.co/{model_match}">{model_match} </a>""",
90
+ )
91
+ for dataset_match in dataset_matches:
92
+ body = body.replace(
93
+ dataset_match,
94
+ f"""<a href="https://huggingface.co/dataset/{dataset_match}">{dataset_match} </a>""",
95
+ )
96
+ return body, metadata["model_matches"], metadata["dataset_matches"]
97
 
98
 
99
  async def convert_from_url(req: Request):
 
113
  400, f"Got an error {r.status_code} when fetching remote file"
114
  )
115
 
116
+ # return HTMLResponse(content=convert(r.text, theme=theme, debug_info=f"url={url}"))
117
+ html_text, model_matches, dataset_matches = convert(
118
+ r.text, theme=theme, debug_info=f"url={url}"
119
+ )
120
+ return JSONResponse(
121
+ content={
122
+ "html": html_text,
123
+ "model_matches": model_matches,
124
+ "dataset_matches": dataset_matches,
125
+ }
126
+ )
127
 
128
 
129
  async def convert_from_upload(req: Request):
nbconvert ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit acf41acf6d83cb725f3a2c48686c828eff7b24d8
requirements.txt CHANGED
@@ -1,5 +1,6 @@
1
  httpx==0.23.3
2
  starlette==0.23.1
3
- ipython==8.8.0
4
  nbconvert==7.2.8
5
  uvicorn==0.20.0
 
 
1
  httpx==0.23.3
2
  starlette==0.23.1
3
+ ipython==8.10.0
4
  nbconvert==7.2.8
5
  uvicorn==0.20.0
6
+ huggingface_hub