File size: 7,860 Bytes
f84db45
32eeefa
f84db45
 
 
 
 
 
 
 
 
 
 
 
32eeefa
 
f84db45
32eeefa
f84db45
 
 
 
 
 
32eeefa
f84db45
 
 
32eeefa
f84db45
 
 
32eeefa
 
f84db45
 
 
 
 
 
 
 
 
 
 
 
32eeefa
 
f84db45
 
 
 
32eeefa
f84db45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32eeefa
f84db45
 
 
 
 
 
 
32eeefa
 
 
f84db45
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
#!/usr/bin/env python3
"""
gradio_app.py
--------------
Gradio application (with MCP support) exposing the functionality of
`extract_readme.py` as an interactive tool.  After launching, the app can be
used via a regular web UI *or* programmatically by any MCP-compatible LLM
client (Cursor, Claude Desktop, etc.).

Run locally:
    python gradio_app.py

This will start both the Gradio web server *and* the MCP endpoint.  The latter
is announced in the terminal when the app starts.
"""

from __future__ import annotations

import os
import re
import time
from types import TracebackType
from typing import Any, List, Sequence, Tuple, Type
from urllib.parse import urlparse

import gradio as gr
import requests
from huggingface_hub import HfApi, InferenceClient, ModelCard  # type: ignore

# -----------------------------------------------------------------------------
# Core logic (adapted from extract_readme.py)
# -----------------------------------------------------------------------------


def _extract_urls(text: str) -> List[str]:
    """Return a list of unique URLs found inside *text* preserving order."""
    url_pattern = re.compile(r"https?://[^\s\)\]\>'\"`]+")
    urls = url_pattern.findall(text)
    # Preserve insertion order while removing duplicates.
    seen: set[str] = set()
    unique_urls: List[str] = []
    for u in urls:
        if u not in seen:
            unique_urls.append(u)
            seen.add(u)
    return unique_urls


def _summarise_external_urls(urls: Sequence[str]) -> List[Tuple[str, str]]:
    """Return a list of (url, summary) tuples using the r.jina.ai proxy."""
    if not urls:
        return []

    summaries: List[Tuple[str, str]] = []
    url_pattern = re.compile(r"https?://[^\s\)\]\>'\"`]+")

    for idx, original_url in enumerate(urls):
        proxy_url = f"https://r.jina.ai/{original_url}"
        try:
            resp = requests.get(proxy_url, timeout=15)
            resp.raise_for_status()
            cleaned_text = url_pattern.sub("", resp.text)
            summaries.append((original_url, cleaned_text))
        except Exception as err:  # pylint: disable=broad-except
            summaries.append((original_url, f"❌ Failed to fetch summary: {err}"))
        # Respect ~15 req/min rate-limit of r.jina.ai
        if idx < len(urls) - 1:
            time.sleep(4.1)
    return summaries


# -----------------------------------------------------------------------------
# Public MCP-exposed function
# -----------------------------------------------------------------------------


def extract_model_info(
    model_id: str,
    llm_model_id: str = "CohereLabs/c4ai-command-a-03-2025",
) -> str:
    """Fetch a Hugging Face model card, analyse it and optionally summarise it.

    Args:
        model_id: The *repository ID* of the model on Hugging Face (e.g.
            "bert-base-uncased").
        llm_model_id: ID of the LLM used for summarisation via the Inference
            Endpoint.  Defaults to Cohere Command R+.
        open_pr: If *True*, the generated summary will be posted as a **new
            discussion** in the specified model repo.  Requires a valid
            `HF_TOKEN` environment variable with write permissions.

    Returns:
        A single markdown-formatted string containing:
            1. The raw README.
            2. Extracted external URLs.
            3. Brief summaries of the external URLs (via r.jina.ai).
            4. A concise LLM-generated summary of the model card.
    """

    # ------------------------------------------------------------------
    # 1. Load model card
    # ------------------------------------------------------------------
    try:
        card = ModelCard.load(model_id)
    except Exception as err:  # pylint: disable=broad-except
        return f"❌ Failed to load model card for '{model_id}': {err}"

    combined_sections: List[str] = ["=== README markdown ===", card.text]

    # ------------------------------------------------------------------
    # 2. Extract URLs
    # ------------------------------------------------------------------
    unique_urls = _extract_urls(card.text)
    if unique_urls:
        combined_sections.append("\n=== URLs found ===")
        combined_sections.extend(unique_urls)

        EXCLUDED_KEYWORDS = ("colab.research.google.com", "github.com")
        filtered_urls = [
            u for u in unique_urls if not any(k in urlparse(u).netloc for k in EXCLUDED_KEYWORDS)
        ]

        if filtered_urls:
            combined_sections.append("\n=== Summaries via r.jina.ai ===")
            for url, summary in _summarise_external_urls(filtered_urls):
                combined_sections.append(f"\n--- {url} ---\n{summary}")
        else:
            combined_sections.append("\nNo external URLs (after filtering) detected in the model card.")
    else:
        combined_sections.append("\nNo URLs detected in the model card.")

    # ------------------------------------------------------------------
    # 3. Summarise with LLM (if token available)
    # ------------------------------------------------------------------
    hf_token = os.getenv("HF_TOKEN")
    summary_text: str | None = None
    if hf_token:
        client = InferenceClient(provider="auto", api_key=hf_token)
        prompt = (
            "You are given a lot of information about a machine learning model "
            "available on Hugging Face. Create a concise, technical and to the point "
            "summary highlighting the technical details, comparisons and instructions "
            "to run the model (if available). Think of the summary as a gist with all "
            "the information someone should need to know about the model without "
            "overwhelming them. Do not add any text formatting to your output text, "
            "keep it simple and plain text. If you have to then sparingly just use "
            "markdown for Heading and lists. Specifically do not use ** to bold text, "
            "just use # for headings and - for lists. No need to put any contact "
            "information in the summary. The summary is supposed to be insightful and "
            "information dense and should not be more than 200-300 words. Don't "
            "hallucinate and refer only to the content provided to you. Remember to "
            "be concise. Here is the information:\n\n" + "\n".join(combined_sections)
        )
        try:
            completion = client.chat.completions.create(
                model=llm_model_id,
                messages=[{"role": "user", "content": prompt}],
            )
            summary_text = completion.choices[0].message.content
        except Exception as err:  # pylint: disable=broad-except
            return f"❌ Failed to generate summary: {err}"
    else:
        return "⚠️  HF_TOKEN environment variable not set. Please set it to enable summarisation."
    # Return only the summary text if available
    return summary_text or "❌ Summary generation failed for unknown reasons."


# -----------------------------------------------------------------------------
# Gradio UI & MCP launch
# -----------------------------------------------------------------------------

demo = gr.Interface(
    fn=extract_model_info,
    inputs=[
        gr.Textbox(value="bert-base-uncased", label="Model ID"),
        gr.Textbox(value="CohereLabs/c4ai-command-a-03-2025", label="LLM Model ID"),
    ],
    outputs=gr.Textbox(label="Result", lines=25),
    title="Model Card Inspector & Summariser",
    description=(
        "Fetch a model card from Hugging Face, extract useful links, optionally "
        "summarise it with an LLM and (optionally) open a discussion on the Hub. "
        "This tool is also available via MCP so LLM clients can call it directly."
    ),
)

if __name__ == "__main__":
    demo.launch(mcp_server=True)