blazingbunny's picture
Rename main.py to app.py
3bd7402 verified
raw
history blame
5.9 kB
import os
import io
import tempfile
import streamlit as st
import pandas as pd
from pypdf import PdfReader
from pyvis.network import Network
from knowledge_graph_maker import (
GraphMaker, Ontology, Document,
OpenAIClient, GroqClient
)
st.set_page_config(page_title="Knowledge Graph Maker", layout="wide")
st.title("Knowledge Graph from Text/PDF (Docker Space)")
st.caption("Uses knowledge-graph-maker with OpenAI or Groq. Paste text or upload a PDF; view the interactive graph below.")
# Choose LLM client based on available env vars
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")
with st.sidebar:
st.subheader("Model Settings")
temperature = st.slider("Temperature", 0.0, 1.0, 0.1, 0.05)
top_p = st.slider("Top-p", 0.0, 1.0, 0.5, 0.05)
provider = st.radio("Provider", ["OpenAI", "Groq"], index=0 if OPENAI_API_KEY else 1 if GROQ_API_KEY else 0)
if provider == "OpenAI":
oai_model = st.text_input("OpenAI model", value="gpt-3.5-turbo")
else:
groq_model = st.text_input("Groq model", value="mixtral-8x7b-32768")
st.markdown("### Ontology (labels)")
default_labels = [
{"Person": "Person name without adjectives (may appear as name or pronoun)"},
{"Object": "Avoid the definite article 'the' in name"},
{"Event": "Events involving multiple people; no verbs like gives/leaves"},
"Place", "Document", "Organisation", "Action",
{"Miscellanous": "Important concept that fits none of the above"}
]
labels_text = st.text_area("Labels (JSON or comma-separated)", value=", ".join(
[lbl if isinstance(lbl, str) else list(lbl.keys())[0] for lbl in default_labels]
), height=80)
st.markdown("### Relationships focus")
relationships_text = st.text_input("Relationships (comma-separated)", value="Relation between any pair of Entities")
def parse_labels(text):
# Allow simple "A, B, C" input; fall back to defaults above if empty
if not text.strip():
return [ "Person","Object","Event","Place","Document","Organisation","Action","Miscellanous" ]
return [lbl.strip() for lbl in text.split(",") if lbl.strip()]
def split_pdf(file) -> str:
reader = PdfReader(file)
parts = []
for page in reader.pages:
try:
parts.append(page.extract_text() or "")
except Exception:
continue
return "\n".join(parts)
def build_graph_documents(text: str) -> list[Document]:
# Simple chunking: ~900-1000 tokens ≈ ~3000-4000 chars heuristic
# Adjust if needed.
CHARS = 3500
docs = []
for i in range(0, len(text), CHARS):
chunk = text[i:i+CHARS].strip()
if chunk:
docs.append(Document(text=chunk, metadata={"chunk_id": i//CHARS}))
return docs
def edges_to_pyvis(edges):
net = Network(height="700px", width="100%", bgcolor="#ffffff", font_color="#222222", notebook=False, directed=False)
# Simple map to keep unique node IDs
node_ids = {}
def node_key(label, name): return f"{label}:{name}"
for e in edges:
n1 = node_key(e.node_1.label, e.node_1.name)
n2 = node_key(e.node_2.label, e.node_2.name)
if n1 not in node_ids:
net.add_node(n1, label=e.node_1.name, title=e.node_1.label)
node_ids[n1] = True
if n2 not in node_ids:
net.add_node(n2, label=e.node_2.name, title=e.node_2.label)
node_ids[n2] = True
rel = e.relationship or ""
net.add_edge(n1, n2, title=rel, value=1)
net.toggle_physics(True)
return net
# Input UI
tab_text, tab_pdf = st.tabs(["📝 Paste Text", "📄 Upload PDF"])
input_text = ""
with tab_text:
input_text = st.text_area("Paste your text here", height=220, placeholder="Paste text…")
with tab_pdf:
pdf_file = st.file_uploader("Upload a PDF", type=["pdf"])
if pdf_file:
input_text = split_pdf(pdf_file)
if st.button("Generate Knowledge Graph", type="primary"):
if not input_text.strip():
st.warning("Please provide text or a PDF.")
st.stop()
# Prepare LLM client
if provider == "OpenAI":
if not OPENAI_API_KEY:
st.error("OPENAI_API_KEY is not set in the Space Secrets.")
st.stop()
llm = OpenAIClient(model=oai_model, temperature=temperature, top_p=top_p)
else:
if not GROQ_API_KEY:
st.error("GROQ_API_KEY is not set in the Space Secrets.")
st.stop()
llm = GroqClient(model=groq_model, temperature=temperature, top_p=top_p)
# Ontology
ontology = Ontology(
labels=parse_labels(labels_text),
relationships=[r.strip() for r in relationships_text.split(",") if r.strip()] or ["Relation between any pair of Entities"]
)
st.info("Chunking input and building graph… this may take a bit for longer texts.")
gm = GraphMaker(ontology=ontology, llm_client=llm, verbose=False)
docs = build_graph_documents(input_text)
edges = gm.from_documents(docs, delay_s_between=0) # tune delay for rate limits
st.success(f"Graph built with {len(edges)} edges.")
# Show edges table
df = pd.DataFrame([{
"node_1_label": e.node_1.label, "node_1": e.node_1.name,
"node_2_label": e.node_2.label, "node_2": e.node_2.name,
"relationship": e.relationship
} for e in edges])
st.dataframe(df, use_container_width=True)
# Render with PyVis inside Streamlit
net = edges_to_pyvis(edges)
with tempfile.TemporaryDirectory() as td:
html_path = os.path.join(td, "graph.html")
net.save_graph(html_path)
html_content = open(html_path, "r", encoding="utf-8").read()
st.components.v1.html(html_content, height=750, scrolling=True)
st.markdown("---")
st.caption("Built with [knowledge-graph-maker](https://github.com/rahulnyk/knowledge_graph_maker).")