blazingbunny's picture
Update app.py
e02e83a verified
raw
history blame
6.75 kB
import os
import streamlit as st
import pandas as pd
from pypdf import PdfReader
from pyvis.network import Network
from knowledge_graph_maker import (
GraphMaker, Ontology, Document, OpenAIClient
)
# ── Page setup ──────────────────────────────────────────────────────────────────
st.set_page_config(page_title="Knowledge Graph (OpenRouter)", layout="wide")
st.title("Knowledge Graph from Text/PDF β€” OpenRouter")
st.caption("Builds a knowledge graph with knowledge-graph-maker via OpenRouter. Paste text or upload a PDF; choose a model.")
# ── Secrets / env ───────────────────────────────────────────────────────────────
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "")
# Preset OpenRouter models (you can add more)
OPENROUTER_MODELS = [
"openai/gpt-oss-20b:free",
"moonshotai/kimi-k2:free",
"google/gemini-2.0-flash-exp:free",
"google/gemma-3-27b-it:free",
]
# ── Sidebar controls ───────────────────────────────────────────────────────────
with st.sidebar:
st.subheader("Model & Generation Settings")
model_choice = st.selectbox("OpenRouter model", OPENROUTER_MODELS, index=0)
custom_model = st.text_input("Custom model id (optional)", placeholder="e.g. meta-llama/llama-3.1-8b-instruct")
temperature = st.slider("Temperature", 0.0, 1.0, 0.1, 0.05)
top_p = st.slider("Top-p", 0.0, 1.0, 0.5, 0.05)
st.markdown("### Ontology (labels)")
labels_text = st.text_area(
"Comma-separated labels",
value="Person, Object, Event, Place, Document, Organisation, Action, Miscellanous",
height=70,
)
relationships_text = st.text_input(
"Relationships (comma-separated)",
value="Relation between any pair of Entities",
)
# ── Helpers ────────────────────────────────────────────────────────────────────
def parse_labels(text: str):
return [lbl.strip() for lbl in text.split(",") if lbl.strip()] or [
"Person", "Object", "Event", "Place", "Document", "Organisation", "Action", "Miscellanous"
]
def pdf_to_text(file) -> str:
reader = PdfReader(file)
parts = []
for page in reader.pages:
try:
parts.append(page.extract_text() or "")
except Exception:
continue
return "\n".join(parts)
def chunk_text(text: str, chars: int = 3500) -> list[Document]:
docs = []
for i in range(0, len(text), chars):
chunk = text[i:i+chars].strip()
if chunk:
docs.append(Document(text=chunk, metadata={"chunk_id": i // chars}))
return docs
def edges_to_pyvis(edges):
# IMPORTANT: cdn_resources="in_line" prevents PyVis from creating a ./lib folder
net = Network(
height="700px",
width="100%",
bgcolor="#ffffff",
font_color="#222222",
notebook=False,
directed=False,
cdn_resources="in_line",
)
node_ids = {}
for e in edges:
n1 = f"{e.node_1.label}:{e.node_1.name}"
n2 = f"{e.node_2.label}:{e.node_2.name}"
if n1 not in node_ids:
net.add_node(n1, label=e.node_1.name, title=e.node_1.label)
node_ids[n1] = True
if n2 not in node_ids:
net.add_node(n2, label=e.node_2.name, title=e.node_2.label)
node_ids[n2] = True
net.add_edge(n1, n2, title=e.relationship or "", value=1)
net.toggle_physics(True)
return net
# ── Input tabs ─────────────────────────────────────────────────────────────────
tab_text, tab_pdf = st.tabs(["πŸ“ Paste Text", "πŸ“„ Upload PDF"])
input_text = ""
with tab_text:
input_text = st.text_area("Paste your text here", height=220, placeholder="Paste text…")
with tab_pdf:
pdf_file = st.file_uploader("Upload a PDF", type=["pdf"])
if pdf_file:
input_text = pdf_to_text(pdf_file)
# ── Action ─────────────────────────────────────────────────────────────────────
if st.button("Generate Knowledge Graph", type="primary"):
if not input_text.strip():
st.warning("Please provide text or a PDF.")
st.stop()
if not OPENROUTER_API_KEY:
st.error("OPENROUTER_API_KEY is not set in Space Secrets.")
st.stop()
# Route OpenAI SDK traffic through OpenRouter (OpenAI-compatible)
os.environ["OPENAI_API_KEY"] = OPENROUTER_API_KEY
os.environ["OPENAI_BASE_URL"] = "https://openrouter.ai/api/v1"
# Optional attribution headers for OpenRouter analytics/ranking
os.environ["OPENAI_DEFAULT_HEADERS"] = (
'{"HTTP-Referer":"https://huggingface.co/spaces/blazingbunny/rahulnyk_knowledge_graph",'
'"X-Title":"Knowledge Graph (OpenRouter)"}'
)
selected_model = custom_model.strip() if custom_model.strip() else model_choice
# Ontology
ontology = Ontology(
labels=parse_labels(labels_text),
relationships=[r.strip() for r in relationships_text.split(",") if r.strip()] or
["Relation between any pair of Entities"],
)
st.info("Chunking input and building graph…")
docs = chunk_text(input_text)
# LLM client (OpenRouter via OpenAI client)
llm = OpenAIClient(model=selected_model, temperature=temperature, top_p=top_p)
gm = GraphMaker(ontology=ontology, llm_client=llm, verbose=False)
edges = gm.from_documents(docs, delay_s_between=0) # tweak delay for rate limits if needed
st.success(f"Graph built with {len(edges)} edges.")
# Show edge table
df = pd.DataFrame([{
"node_1_label": e.node_1.label, "node_1": e.node_1.name,
"node_2_label": e.node_2.label, "node_2": e.node_2.name,
"relationship": e.relationship
} for e in edges])
st.dataframe(df, use_container_width=True)
# Render graph in-memory (no writes)
net = edges_to_pyvis(edges)
html = net.generate_html() # <- avoids creating ./lib
st.components.v1.html(html, height=750, scrolling=True)
st.markdown("---")
st.caption("Powered by knowledge-graph-maker via OpenRouter.")