File size: 6,745 Bytes
ba3b7a5
 
 
 
 
 
 
c164275
ba3b7a5
 
e02e83a
c164275
 
e02e83a
ba3b7a5
e02e83a
c164275
ba3b7a5
e02e83a
c164275
 
 
 
 
 
ba3b7a5
e02e83a
ba3b7a5
c164275
 
 
ba3b7a5
 
 
 
c164275
 
 
 
 
 
 
 
 
ba3b7a5
e02e83a
 
c164275
e02e83a
c164275
ba3b7a5
c164275
ba3b7a5
 
 
 
 
 
 
 
 
c164275
ba3b7a5
c164275
 
ba3b7a5
e02e83a
ba3b7a5
 
 
e02e83a
 
 
 
 
 
 
 
 
 
ba3b7a5
 
c164275
 
ba3b7a5
 
 
 
 
 
c164275
ba3b7a5
 
 
e02e83a
ba3b7a5
 
 
 
 
 
 
c164275
ba3b7a5
e02e83a
ba3b7a5
 
 
 
c164275
 
 
ba3b7a5
e02e83a
c164275
 
e02e83a
c164275
 
 
 
 
 
ba3b7a5
 
 
 
c164275
e02e83a
ba3b7a5
 
c164275
 
 
e02e83a
c164275
ba3b7a5
 
e02e83a
ba3b7a5
 
 
e02e83a
ba3b7a5
 
 
 
 
 
 
e02e83a
ba3b7a5
e02e83a
 
ba3b7a5
 
c164275
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import os
import streamlit as st
import pandas as pd
from pypdf import PdfReader
from pyvis.network import Network

from knowledge_graph_maker import (
    GraphMaker, Ontology, Document, OpenAIClient
)

# ── Page setup ──────────────────────────────────────────────────────────────────
st.set_page_config(page_title="Knowledge Graph (OpenRouter)", layout="wide")
st.title("Knowledge Graph from Text/PDF β€” OpenRouter")
st.caption("Builds a knowledge graph with knowledge-graph-maker via OpenRouter. Paste text or upload a PDF; choose a model.")

# ── Secrets / env ───────────────────────────────────────────────────────────────
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "")

# Preset OpenRouter models (you can add more)
OPENROUTER_MODELS = [
    "openai/gpt-oss-20b:free",
    "moonshotai/kimi-k2:free",
    "google/gemini-2.0-flash-exp:free",
    "google/gemma-3-27b-it:free",
]

# ── Sidebar controls ───────────────────────────────────────────────────────────
with st.sidebar:
    st.subheader("Model & Generation Settings")
    model_choice = st.selectbox("OpenRouter model", OPENROUTER_MODELS, index=0)
    custom_model = st.text_input("Custom model id (optional)", placeholder="e.g. meta-llama/llama-3.1-8b-instruct")
    temperature = st.slider("Temperature", 0.0, 1.0, 0.1, 0.05)
    top_p = st.slider("Top-p", 0.0, 1.0, 0.5, 0.05)

    st.markdown("### Ontology (labels)")
    labels_text = st.text_area(
        "Comma-separated labels",
        value="Person, Object, Event, Place, Document, Organisation, Action, Miscellanous",
        height=70,
    )
    relationships_text = st.text_input(
        "Relationships (comma-separated)",
        value="Relation between any pair of Entities",
    )

# ── Helpers ────────────────────────────────────────────────────────────────────
def parse_labels(text: str):
    return [lbl.strip() for lbl in text.split(",") if lbl.strip()] or [
        "Person", "Object", "Event", "Place", "Document", "Organisation", "Action", "Miscellanous"
    ]

def pdf_to_text(file) -> str:
    reader = PdfReader(file)
    parts = []
    for page in reader.pages:
        try:
            parts.append(page.extract_text() or "")
        except Exception:
            continue
    return "\n".join(parts)

def chunk_text(text: str, chars: int = 3500) -> list[Document]:
    docs = []
    for i in range(0, len(text), chars):
        chunk = text[i:i+chars].strip()
        if chunk:
            docs.append(Document(text=chunk, metadata={"chunk_id": i // chars}))
    return docs

def edges_to_pyvis(edges):
    # IMPORTANT: cdn_resources="in_line" prevents PyVis from creating a ./lib folder
    net = Network(
        height="700px",
        width="100%",
        bgcolor="#ffffff",
        font_color="#222222",
        notebook=False,
        directed=False,
        cdn_resources="in_line",
    )
    node_ids = {}
    for e in edges:
        n1 = f"{e.node_1.label}:{e.node_1.name}"
        n2 = f"{e.node_2.label}:{e.node_2.name}"
        if n1 not in node_ids:
            net.add_node(n1, label=e.node_1.name, title=e.node_1.label)
            node_ids[n1] = True
        if n2 not in node_ids:
            net.add_node(n2, label=e.node_2.name, title=e.node_2.label)
            node_ids[n2] = True
        net.add_edge(n1, n2, title=e.relationship or "", value=1)
    net.toggle_physics(True)
    return net

# ── Input tabs ─────────────────────────────────────────────────────────────────
tab_text, tab_pdf = st.tabs(["πŸ“ Paste Text", "πŸ“„ Upload PDF"])
input_text = ""
with tab_text:
    input_text = st.text_area("Paste your text here", height=220, placeholder="Paste text…")
with tab_pdf:
    pdf_file = st.file_uploader("Upload a PDF", type=["pdf"])
    if pdf_file:
        input_text = pdf_to_text(pdf_file)

# ── Action ─────────────────────────────────────────────────────────────────────
if st.button("Generate Knowledge Graph", type="primary"):
    if not input_text.strip():
        st.warning("Please provide text or a PDF.")
        st.stop()
    if not OPENROUTER_API_KEY:
        st.error("OPENROUTER_API_KEY is not set in Space Secrets.")
        st.stop()

    # Route OpenAI SDK traffic through OpenRouter (OpenAI-compatible)
    os.environ["OPENAI_API_KEY"] = OPENROUTER_API_KEY
    os.environ["OPENAI_BASE_URL"] = "https://openrouter.ai/api/v1"
    # Optional attribution headers for OpenRouter analytics/ranking
    os.environ["OPENAI_DEFAULT_HEADERS"] = (
        '{"HTTP-Referer":"https://huggingface.co/spaces/blazingbunny/rahulnyk_knowledge_graph",'
        '"X-Title":"Knowledge Graph (OpenRouter)"}'
    )

    selected_model = custom_model.strip() if custom_model.strip() else model_choice

    # Ontology
    ontology = Ontology(
        labels=parse_labels(labels_text),
        relationships=[r.strip() for r in relationships_text.split(",") if r.strip()] or
                      ["Relation between any pair of Entities"],
    )

    st.info("Chunking input and building graph…")
    docs = chunk_text(input_text)

    # LLM client (OpenRouter via OpenAI client)
    llm = OpenAIClient(model=selected_model, temperature=temperature, top_p=top_p)

    gm = GraphMaker(ontology=ontology, llm_client=llm, verbose=False)
    edges = gm.from_documents(docs, delay_s_between=0)  # tweak delay for rate limits if needed

    st.success(f"Graph built with {len(edges)} edges.")

    # Show edge table
    df = pd.DataFrame([{
        "node_1_label": e.node_1.label, "node_1": e.node_1.name,
        "node_2_label": e.node_2.label, "node_2": e.node_2.name,
        "relationship": e.relationship
    } for e in edges])
    st.dataframe(df, use_container_width=True)

    # Render graph in-memory (no writes)
    net = edges_to_pyvis(edges)
    html = net.generate_html()      # <- avoids creating ./lib
    st.components.v1.html(html, height=750, scrolling=True)

st.markdown("---")
st.caption("Powered by knowledge-graph-maker via OpenRouter.")