Spaces:

davanstrien
/

collections-info

Runtime error

App Files Files Community

davanstrien HF Staff commited on Sep 28, 2023

Commit

1ab8a1a

1 Parent(s): d2cae6f

draft app

Browse files

Files changed (3) hide show

app.py +119 -0
requirements.in +8 -0
requirements.txt +253 -0

app.py ADDED Viewed

	@@ -0,0 +1,119 @@

+from huggingface_hub import get_collection, Collection, CollectionItem
+from toolz import groupby, valmap
+from typing import Dict, List
+import pandas as pd
+from huggingface_hub import model_info
+import gradio as gr
+from functools import lru_cache
+test_slug = "HF-IA-archiving/models-to-archive-65006a7fdadb8c628f33aac9"
+def group_collection_by_repo_type(
+    collection_slug: str,
+) -> Dict[str, List[CollectionItem]]:
+    collection = get_collection(collection_slug)
+    return groupby(lambda x: x.repoType, collection.items)
+def render_model_hub_link(hub_id):
+    link = f"https://huggingface.co/{hub_id}"
+    return (
+        f'<a target="_blank" href="{link}" style="color: var(--link-text-color);'
+        f' text-decoration: underline;text-decoration-style: dotted;">{hub_id}</a>'
+    )
+def load_to_dataframe(data):
+    # Columns to keep
+    columns = [
+        "item_id",
+        "downloads",
+        "author",
+        "likes",
+        "pipeline_tag",
+        "lastModified",
+    ]
+    # convert to dicts
+    data = [item.__dict__ for item in data]
+    filtered_data = [
+        {key: item[key] for key in columns if key in item} for item in data
+    ]
+    required_info_keys = ["language", "tags", "license", "datasets"]
+    for item in filtered_data:
+        try:
+            card = model_info(item["item_id"]).cardData
+            for key in required_info_keys:
+                item[key] = card.get(key)
+        except AttributeError as e:
+            print(e)
+            for key in required_info_keys:
+                item[key] = None
+    # Load into a DataFrame
+    df = pd.DataFrame(filtered_data)
+    df["item_id"] = df["item_id"].apply(render_model_hub_link)
+    return df
+def summary_of_na_values(df):
+    na_counts = df.isna().sum()
+    na_counts = na_counts[na_counts > 0]
+    na_percent = round(na_counts / len(df) * 100, 2)
+    return (
+        pd.DataFrame({"Missing Count": na_counts, "Missing Percent": na_percent})
+        .rename_axis(index="Metadata")
+        .reset_index()
+    )
+def value_counts(df, column_name):
+    return df[column_name].value_counts()
+@lru_cache(maxsize=10)
+def load_data():
+    repos_grouped_by_type = group_collection_by_repo_type(test_slug)
+    models = repos_grouped_by_type["model"]
+    df = load_to_dataframe(models)
+    column_names = df.columns.to_list()
+    return repos_grouped_by_type, column_names, df
+def generate_markdown_summary_of_collection(
+    grouped_collection: Dict[str, List[CollectionItem]]
+):
+    counts = valmap(len, grouped_collection)
+    results = "This collection contains the following items:\n"
+    for k, v in counts.items():
+        results += f"- {v} {k}s\n"
+    return results
+repos_grouped_by_type, column_names, df = load_data()
+def filter_df(columns_to_show=None):
+    *_, df = load_data()
+    return df if columns_to_show is None else df[columns_to_show]
+# with gr.Blocks() as demo:
+#     gr.Markdown("## Info about models in this collection")
+#     gr.Markdown(generate_markdown_summary_of_collection(repos_grouped_by_type))
+#     gr.Markdown("### Summary of missing metadata values")
+#     gr.DataFrame(summary_of_na_values(df))
+#     gr.Markdown("# Models in this collection")
+#     with gr.Accordion("Models", open=False):
+#         columns_to_show = gr.Dropdown(
+#             label="Columns to show",
+#             value=column_names,
+#             choices=column_names,
+#             multiselect=True,
+#         )
+#         models_df = gr.DataFrame(filter_df, datatype="markdown")
+#         columns_to_show.change(filter_df, columns_to_show, models_df)
+# demo.launch(debug=True)

requirements.in ADDED Viewed

	@@ -0,0 +1,8 @@

+gradio==3.41.2
+httpx[http2]
+git+https://github.com/huggingface/huggingface_hub
+polars
+python-dotenv
+toolz
+tqdm
+rich[jupyter]

requirements.txt ADDED Viewed

	@@ -0,0 +1,253 @@

+#
+# This file is autogenerated by pip-compile with Python 3.11
+# by the following command:
+#
+#    pip-compile
+#
+aiofiles==23.2.1
+    # via gradio
+altair==5.1.1
+    # via gradio
+annotated-types==0.5.0
+    # via pydantic
+anyio==3.7.1
+    # via
+    #   fastapi
+    #   httpcore
+    #   starlette
+appnope==0.1.3
+    # via ipython
+asttokens==2.4.0
+    # via stack-data
+attrs==23.1.0
+    # via
+    #   jsonschema
+    #   referencing
+backcall==0.2.0
+    # via ipython
+certifi==2023.7.22
+    # via
+    #   httpcore
+    #   httpx
+    #   requests
+charset-normalizer==3.2.0
+    # via requests
+click==8.1.7
+    # via uvicorn
+comm==0.1.4
+    # via ipywidgets
+contourpy==1.1.0
+    # via matplotlib
+cycler==0.11.0
+    # via matplotlib
+decorator==5.1.1
+    # via ipython
+executing==1.2.0
+    # via stack-data
+fastapi==0.103.1
+    # via gradio
+ffmpy==0.3.1
+    # via gradio
+filelock==3.12.3
+    # via huggingface-hub
+fonttools==4.42.1
+    # via matplotlib
+fsspec==2023.9.0
+    # via
+    #   gradio-client
+    #   huggingface-hub
+gradio==3.41.2
+    # via -r requirements.in
+gradio-client==0.5.0
+    # via gradio
+h11==0.14.0
+    # via
+    #   httpcore
+    #   uvicorn
+h2==4.1.0
+    # via httpx
+hpack==4.0.0
+    # via h2
+httpcore==0.18.0
+    # via httpx
+httpx[http2]==0.25.0
+    # via
+    #   -r requirements.in
+    #   gradio
+    #   gradio-client
+huggingface-hub @ git+https://github.com/huggingface/huggingface_hub
+    # via
+    #   -r requirements.in
+    #   gradio
+    #   gradio-client
+hyperframe==6.0.1
+    # via h2
+idna==3.4
+    # via
+    #   anyio
+    #   httpx
+    #   requests
+importlib-resources==6.0.1
+    # via gradio
+ipython==8.15.0
+    # via ipywidgets
+ipywidgets==8.1.1
+    # via rich
+jedi==0.19.0
+    # via ipython
+jinja2==3.1.2
+    # via
+    #   altair
+    #   gradio
+jsonschema==4.19.0
+    # via altair
+jsonschema-specifications==2023.7.1
+    # via jsonschema
+jupyterlab-widgets==3.0.9
+    # via ipywidgets
+kiwisolver==1.4.5
+    # via matplotlib
+markdown-it-py==3.0.0
+    # via rich
+markupsafe==2.1.3
+    # via
+    #   gradio
+    #   jinja2
+matplotlib==3.7.3
+    # via gradio
+matplotlib-inline==0.1.6
+    # via ipython
+mdurl==0.1.2
+    # via markdown-it-py
+numpy==1.25.2
+    # via
+    #   altair
+    #   contourpy
+    #   gradio
+    #   matplotlib
+    #   pandas
+orjson==3.9.7
+    # via gradio
+packaging==23.1
+    # via
+    #   altair
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+    #   matplotlib
+pandas==2.1.0
+    # via
+    #   altair
+    #   gradio
+parso==0.8.3
+    # via jedi
+pexpect==4.8.0
+    # via ipython
+pickleshare==0.7.5
+    # via ipython
+pillow==10.0.0
+    # via
+    #   gradio
+    #   matplotlib
+polars==0.19.2
+    # via -r requirements.in
+prompt-toolkit==3.0.39
+    # via ipython
+ptyprocess==0.7.0
+    # via pexpect
+pure-eval==0.2.2
+    # via stack-data
+pydantic==2.3.0
+    # via
+    #   fastapi
+    #   gradio
+pydantic-core==2.6.3
+    # via pydantic
+pydub==0.25.1
+    # via gradio
+pygments==2.16.1
+    # via
+    #   ipython
+    #   rich
+pyparsing==3.1.1
+    # via matplotlib
+python-dateutil==2.8.2
+    # via
+    #   matplotlib
+    #   pandas
+python-dotenv==1.0.0
+    # via -r requirements.in
+python-multipart==0.0.6
+    # via gradio
+pytz==2023.3.post1
+    # via pandas
+pyyaml==6.0.1
+    # via
+    #   gradio
+    #   huggingface-hub
+referencing==0.30.2
+    # via
+    #   jsonschema
+    #   jsonschema-specifications
+requests==2.31.0
+    # via
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+rich[jupyter]==13.5.2
+    # via -r requirements.in
+rpds-py==0.10.2
+    # via
+    #   jsonschema
+    #   referencing
+semantic-version==2.10.0
+    # via gradio
+six==1.16.0
+    # via
+    #   asttokens
+    #   python-dateutil
+sniffio==1.3.0
+    # via
+    #   anyio
+    #   httpcore
+    #   httpx
+stack-data==0.6.2
+    # via ipython
+starlette==0.27.0
+    # via fastapi
+toolz==0.12.0
+    # via
+    #   -r requirements.in
+    #   altair
+tqdm==4.66.1
+    # via
+    #   -r requirements.in
+    #   huggingface-hub
+traitlets==5.10.0
+    # via
+    #   comm
+    #   ipython
+    #   ipywidgets
+    #   matplotlib-inline
+typing-extensions==4.7.1
+    # via
+    #   fastapi
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+    #   pydantic
+    #   pydantic-core
+tzdata==2023.3
+    # via pandas
+urllib3==2.0.4
+    # via requests
+uvicorn==0.23.2
+    # via gradio
+wcwidth==0.2.6
+    # via prompt-toolkit
+websockets==11.0.3
+    # via
+    #   gradio
+    #   gradio-client
+widgetsnbextension==4.0.9
+    # via ipywidgets