|
import os |
|
import streamlit as st |
|
import pandas as pd |
|
from pypdf import PdfReader |
|
from pyvis.network import Network |
|
|
|
from knowledge_graph_maker import ( |
|
GraphMaker, Ontology, Document, OpenAIClient |
|
) |
|
|
|
|
|
st.set_page_config(page_title="Knowledge Graph (OpenRouter)", layout="wide") |
|
st.title("Knowledge Graph from Text/PDF β OpenRouter") |
|
st.caption("Builds a knowledge graph with knowledge-graph-maker via OpenRouter. Paste text or upload a PDF; choose a model.") |
|
|
|
|
|
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "") |
|
|
|
|
|
OPENROUTER_MODELS = [ |
|
"openai/gpt-oss-20b:free", |
|
"moonshotai/kimi-k2:free", |
|
"google/gemini-2.0-flash-exp:free", |
|
"google/gemma-3-27b-it:free", |
|
] |
|
|
|
|
|
with st.sidebar: |
|
st.subheader("Model & Generation Settings") |
|
model_choice = st.selectbox("OpenRouter model", OPENROUTER_MODELS, index=0) |
|
custom_model = st.text_input("Custom model id (optional)", placeholder="e.g. meta-llama/llama-3.1-8b-instruct") |
|
temperature = st.slider("Temperature", 0.0, 1.0, 0.1, 0.05) |
|
top_p = st.slider("Top-p", 0.0, 1.0, 0.5, 0.05) |
|
|
|
st.markdown("### Ontology (labels)") |
|
labels_text = st.text_area( |
|
"Comma-separated labels", |
|
value="Person, Object, Event, Place, Document, Organisation, Action, Miscellanous", |
|
height=70, |
|
) |
|
relationships_text = st.text_input( |
|
"Relationships (comma-separated)", |
|
value="Relation between any pair of Entities", |
|
) |
|
|
|
|
|
def parse_labels(text: str): |
|
return [lbl.strip() for lbl in text.split(",") if lbl.strip()] or [ |
|
"Person", "Object", "Event", "Place", "Document", "Organisation", "Action", "Miscellanous" |
|
] |
|
|
|
def pdf_to_text(file) -> str: |
|
reader = PdfReader(file) |
|
parts = [] |
|
for page in reader.pages: |
|
try: |
|
parts.append(page.extract_text() or "") |
|
except Exception: |
|
continue |
|
return "\n".join(parts) |
|
|
|
def chunk_text(text: str, chars: int = 3500) -> list[Document]: |
|
docs = [] |
|
for i in range(0, len(text), chars): |
|
chunk = text[i:i+chars].strip() |
|
if chunk: |
|
docs.append(Document(text=chunk, metadata={"chunk_id": i // chars})) |
|
return docs |
|
|
|
def edges_to_pyvis(edges): |
|
|
|
net = Network( |
|
height="700px", |
|
width="100%", |
|
bgcolor="#ffffff", |
|
font_color="#222222", |
|
notebook=False, |
|
directed=False, |
|
cdn_resources="in_line", |
|
) |
|
node_ids = {} |
|
for e in edges: |
|
n1 = f"{e.node_1.label}:{e.node_1.name}" |
|
n2 = f"{e.node_2.label}:{e.node_2.name}" |
|
if n1 not in node_ids: |
|
net.add_node(n1, label=e.node_1.name, title=e.node_1.label) |
|
node_ids[n1] = True |
|
if n2 not in node_ids: |
|
net.add_node(n2, label=e.node_2.name, title=e.node_2.label) |
|
node_ids[n2] = True |
|
net.add_edge(n1, n2, title=e.relationship or "", value=1) |
|
net.toggle_physics(True) |
|
return net |
|
|
|
|
|
tab_text, tab_pdf = st.tabs(["π Paste Text", "π Upload PDF"]) |
|
input_text = "" |
|
with tab_text: |
|
input_text = st.text_area("Paste your text here", height=220, placeholder="Paste textβ¦") |
|
with tab_pdf: |
|
pdf_file = st.file_uploader("Upload a PDF", type=["pdf"]) |
|
if pdf_file: |
|
input_text = pdf_to_text(pdf_file) |
|
|
|
|
|
if st.button("Generate Knowledge Graph", type="primary"): |
|
if not input_text.strip(): |
|
st.warning("Please provide text or a PDF.") |
|
st.stop() |
|
if not OPENROUTER_API_KEY: |
|
st.error("OPENROUTER_API_KEY is not set in Space Secrets.") |
|
st.stop() |
|
|
|
|
|
os.environ["OPENAI_API_KEY"] = OPENROUTER_API_KEY |
|
os.environ["OPENAI_BASE_URL"] = "https://openrouter.ai/api/v1" |
|
|
|
os.environ["OPENAI_DEFAULT_HEADERS"] = ( |
|
'{"HTTP-Referer":"https://huggingface.co/spaces/blazingbunny/rahulnyk_knowledge_graph",' |
|
'"X-Title":"Knowledge Graph (OpenRouter)"}' |
|
) |
|
|
|
selected_model = custom_model.strip() if custom_model.strip() else model_choice |
|
|
|
|
|
ontology = Ontology( |
|
labels=parse_labels(labels_text), |
|
relationships=[r.strip() for r in relationships_text.split(",") if r.strip()] or |
|
["Relation between any pair of Entities"], |
|
) |
|
|
|
st.info("Chunking input and building graphβ¦") |
|
docs = chunk_text(input_text) |
|
|
|
|
|
llm = OpenAIClient(model=selected_model, temperature=temperature, top_p=top_p) |
|
|
|
gm = GraphMaker(ontology=ontology, llm_client=llm, verbose=False) |
|
edges = gm.from_documents(docs, delay_s_between=0) |
|
|
|
st.success(f"Graph built with {len(edges)} edges.") |
|
|
|
|
|
df = pd.DataFrame([{ |
|
"node_1_label": e.node_1.label, "node_1": e.node_1.name, |
|
"node_2_label": e.node_2.label, "node_2": e.node_2.name, |
|
"relationship": e.relationship |
|
} for e in edges]) |
|
st.dataframe(df, use_container_width=True) |
|
|
|
|
|
net = edges_to_pyvis(edges) |
|
html = net.generate_html() |
|
st.components.v1.html(html, height=750, scrolling=True) |
|
|
|
st.markdown("---") |
|
st.caption("Powered by knowledge-graph-maker via OpenRouter.") |
|
|