Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
import os
|
2 |
-
import io
|
3 |
import tempfile
|
4 |
import streamlit as st
|
5 |
import pandas as pd
|
@@ -7,50 +6,49 @@ from pypdf import PdfReader
|
|
7 |
from pyvis.network import Network
|
8 |
|
9 |
from knowledge_graph_maker import (
|
10 |
-
GraphMaker, Ontology, Document,
|
11 |
-
OpenAIClient, GroqClient
|
12 |
)
|
13 |
|
14 |
-
st.set_page_config(page_title="Knowledge Graph
|
|
|
|
|
15 |
|
16 |
-
|
17 |
-
|
18 |
|
19 |
-
#
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
|
|
22 |
|
|
|
23 |
with st.sidebar:
|
24 |
-
st.subheader("Model Settings")
|
|
|
|
|
25 |
temperature = st.slider("Temperature", 0.0, 1.0, 0.1, 0.05)
|
26 |
top_p = st.slider("Top-p", 0.0, 1.0, 0.5, 0.05)
|
27 |
-
provider = st.radio("Provider", ["OpenAI", "Groq"], index=0 if OPENAI_API_KEY else 1 if GROQ_API_KEY else 0)
|
28 |
-
if provider == "OpenAI":
|
29 |
-
oai_model = st.text_input("OpenAI model", value="gpt-3.5-turbo")
|
30 |
-
else:
|
31 |
-
groq_model = st.text_input("Groq model", value="mixtral-8x7b-32768")
|
32 |
|
33 |
st.markdown("### Ontology (labels)")
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
), height=80)
|
44 |
-
st.markdown("### Relationships focus")
|
45 |
-
relationships_text = st.text_input("Relationships (comma-separated)", value="Relation between any pair of Entities")
|
46 |
|
47 |
def parse_labels(text):
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
return [lbl.strip() for lbl in text.split(",") if lbl.strip()]
|
52 |
|
53 |
-
def
|
54 |
reader = PdfReader(file)
|
55 |
parts = []
|
56 |
for page in reader.pages:
|
@@ -60,42 +58,31 @@ def split_pdf(file) -> str:
|
|
60 |
continue
|
61 |
return "\n".join(parts)
|
62 |
|
63 |
-
def
|
64 |
-
# Simple chunking: ~900-1000 tokens ≈ ~3000-4000 chars heuristic
|
65 |
-
# Adjust if needed.
|
66 |
-
CHARS = 3500
|
67 |
docs = []
|
68 |
-
for i in range(0, len(text),
|
69 |
-
chunk = text[i:i+
|
70 |
if chunk:
|
71 |
-
docs.append(Document(text=chunk, metadata={"chunk_id": i//
|
72 |
return docs
|
73 |
|
74 |
def edges_to_pyvis(edges):
|
75 |
net = Network(height="700px", width="100%", bgcolor="#ffffff", font_color="#222222", notebook=False, directed=False)
|
76 |
-
# Simple map to keep unique node IDs
|
77 |
node_ids = {}
|
78 |
-
|
79 |
-
def node_key(label, name): return f"{label}:{name}"
|
80 |
-
|
81 |
for e in edges:
|
82 |
-
n1 =
|
83 |
-
n2 =
|
84 |
-
|
85 |
if n1 not in node_ids:
|
86 |
net.add_node(n1, label=e.node_1.name, title=e.node_1.label)
|
87 |
node_ids[n1] = True
|
88 |
if n2 not in node_ids:
|
89 |
net.add_node(n2, label=e.node_2.name, title=e.node_2.label)
|
90 |
node_ids[n2] = True
|
91 |
-
|
92 |
-
rel = e.relationship or ""
|
93 |
-
net.add_edge(n1, n2, title=rel, value=1)
|
94 |
-
|
95 |
net.toggle_physics(True)
|
96 |
return net
|
97 |
|
98 |
-
# Input
|
99 |
tab_text, tab_pdf = st.tabs(["📝 Paste Text", "📄 Upload PDF"])
|
100 |
input_text = ""
|
101 |
with tab_text:
|
@@ -103,40 +90,46 @@ with tab_text:
|
|
103 |
with tab_pdf:
|
104 |
pdf_file = st.file_uploader("Upload a PDF", type=["pdf"])
|
105 |
if pdf_file:
|
106 |
-
input_text =
|
107 |
|
108 |
if st.button("Generate Knowledge Graph", type="primary"):
|
109 |
if not input_text.strip():
|
110 |
st.warning("Please provide text or a PDF.")
|
111 |
st.stop()
|
|
|
|
|
|
|
112 |
|
113 |
-
#
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
|
125 |
# Ontology
|
126 |
ontology = Ontology(
|
127 |
labels=parse_labels(labels_text),
|
128 |
-
relationships=[r.strip() for r in relationships_text.split(",") if r.strip()] or
|
|
|
129 |
)
|
130 |
|
131 |
-
st.info("Chunking input and building graph…
|
|
|
|
|
|
|
|
|
132 |
|
133 |
gm = GraphMaker(ontology=ontology, llm_client=llm, verbose=False)
|
134 |
-
|
135 |
|
136 |
-
edges = gm.from_documents(docs, delay_s_between=0) # tune delay for rate limits
|
137 |
st.success(f"Graph built with {len(edges)} edges.")
|
138 |
|
139 |
-
# Show edges table
|
140 |
df = pd.DataFrame([{
|
141 |
"node_1_label": e.node_1.label, "node_1": e.node_1.name,
|
142 |
"node_2_label": e.node_2.label, "node_2": e.node_2.name,
|
@@ -144,13 +137,13 @@ if st.button("Generate Knowledge Graph", type="primary"):
|
|
144 |
} for e in edges])
|
145 |
st.dataframe(df, use_container_width=True)
|
146 |
|
147 |
-
# Render with PyVis inside Streamlit
|
148 |
net = edges_to_pyvis(edges)
|
149 |
with tempfile.TemporaryDirectory() as td:
|
150 |
html_path = os.path.join(td, "graph.html")
|
151 |
net.save_graph(html_path)
|
152 |
-
|
153 |
-
|
|
|
154 |
|
155 |
st.markdown("---")
|
156 |
-
st.caption("
|
|
|
1 |
import os
|
|
|
2 |
import tempfile
|
3 |
import streamlit as st
|
4 |
import pandas as pd
|
|
|
6 |
from pyvis.network import Network
|
7 |
|
8 |
from knowledge_graph_maker import (
|
9 |
+
GraphMaker, Ontology, Document, OpenAIClient
|
|
|
10 |
)
|
11 |
|
12 |
+
st.set_page_config(page_title="Knowledge Graph (OpenRouter)", layout="wide")
|
13 |
+
st.title("Knowledge Graph from Text/PDF — OpenRouter")
|
14 |
+
st.caption("Uses knowledge-graph-maker via OpenRouter. Paste text or upload a PDF; choose an OpenRouter model.")
|
15 |
|
16 |
+
# --- OpenRouter only ---
|
17 |
+
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "")
|
18 |
|
19 |
+
# Model choices
|
20 |
+
OPENROUTER_MODELS = [
|
21 |
+
"openai/gpt-oss-20b:free",
|
22 |
+
"moonshotai/kimi-k2:free",
|
23 |
+
"google/gemini-2.0-flash-exp:free",
|
24 |
+
"google/gemma-3-27b-it:free",
|
25 |
+
]
|
26 |
|
27 |
+
# Sidebar controls
|
28 |
with st.sidebar:
|
29 |
+
st.subheader("Model & Generation Settings")
|
30 |
+
model_choice = st.selectbox("OpenRouter model", OPENROUTER_MODELS, index=0)
|
31 |
+
custom_model = st.text_input("Custom model id (optional)", placeholder="e.g. meta-llama/llama-3.1-8b-instruct")
|
32 |
temperature = st.slider("Temperature", 0.0, 1.0, 0.1, 0.05)
|
33 |
top_p = st.slider("Top-p", 0.0, 1.0, 0.5, 0.05)
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
st.markdown("### Ontology (labels)")
|
36 |
+
labels_text = st.text_area(
|
37 |
+
"Comma-separated labels",
|
38 |
+
value="Person, Object, Event, Place, Document, Organisation, Action, Miscellanous",
|
39 |
+
height=70,
|
40 |
+
)
|
41 |
+
relationships_text = st.text_input(
|
42 |
+
"Relationships (comma-separated)",
|
43 |
+
value="Relation between any pair of Entities",
|
44 |
+
)
|
|
|
|
|
|
|
45 |
|
46 |
def parse_labels(text):
|
47 |
+
return [lbl.strip() for lbl in text.split(",") if lbl.strip()] or [
|
48 |
+
"Person","Object","Event","Place","Document","Organisation","Action","Miscellanous"
|
49 |
+
]
|
|
|
50 |
|
51 |
+
def pdf_to_text(file) -> str:
|
52 |
reader = PdfReader(file)
|
53 |
parts = []
|
54 |
for page in reader.pages:
|
|
|
58 |
continue
|
59 |
return "\n".join(parts)
|
60 |
|
61 |
+
def chunk_text(text: str, chars: int = 3500) -> list[Document]:
|
|
|
|
|
|
|
62 |
docs = []
|
63 |
+
for i in range(0, len(text), chars):
|
64 |
+
chunk = text[i:i+chars].strip()
|
65 |
if chunk:
|
66 |
+
docs.append(Document(text=chunk, metadata={"chunk_id": i//chars}))
|
67 |
return docs
|
68 |
|
69 |
def edges_to_pyvis(edges):
|
70 |
net = Network(height="700px", width="100%", bgcolor="#ffffff", font_color="#222222", notebook=False, directed=False)
|
|
|
71 |
node_ids = {}
|
|
|
|
|
|
|
72 |
for e in edges:
|
73 |
+
n1 = f"{e.node_1.label}:{e.node_1.name}"
|
74 |
+
n2 = f"{e.node_2.label}:{e.node_2.name}"
|
|
|
75 |
if n1 not in node_ids:
|
76 |
net.add_node(n1, label=e.node_1.name, title=e.node_1.label)
|
77 |
node_ids[n1] = True
|
78 |
if n2 not in node_ids:
|
79 |
net.add_node(n2, label=e.node_2.name, title=e.node_2.label)
|
80 |
node_ids[n2] = True
|
81 |
+
net.add_edge(n1, n2, title=e.relationship or "", value=1)
|
|
|
|
|
|
|
82 |
net.toggle_physics(True)
|
83 |
return net
|
84 |
|
85 |
+
# Input tabs
|
86 |
tab_text, tab_pdf = st.tabs(["📝 Paste Text", "📄 Upload PDF"])
|
87 |
input_text = ""
|
88 |
with tab_text:
|
|
|
90 |
with tab_pdf:
|
91 |
pdf_file = st.file_uploader("Upload a PDF", type=["pdf"])
|
92 |
if pdf_file:
|
93 |
+
input_text = pdf_to_text(pdf_file)
|
94 |
|
95 |
if st.button("Generate Knowledge Graph", type="primary"):
|
96 |
if not input_text.strip():
|
97 |
st.warning("Please provide text or a PDF.")
|
98 |
st.stop()
|
99 |
+
if not OPENROUTER_API_KEY:
|
100 |
+
st.error("OPENROUTER_API_KEY is not set in Space Secrets.")
|
101 |
+
st.stop()
|
102 |
|
103 |
+
# Point OpenAI SDK at OpenRouter
|
104 |
+
os.environ["OPENAI_API_KEY"] = OPENROUTER_API_KEY
|
105 |
+
os.environ["OPENAI_BASE_URL"] = "https://openrouter.ai/api/v1"
|
106 |
+
|
107 |
+
# Optional: attribution headers for OpenRouter
|
108 |
+
os.environ["OPENAI_DEFAULT_HEADERS"] = (
|
109 |
+
'{"HTTP-Referer":"https://huggingface.co/spaces/blazingbunny/rahulnyk_knowledge_graph",'
|
110 |
+
'"X-Title":"Knowledge Graph (OpenRouter)"}'
|
111 |
+
)
|
112 |
+
|
113 |
+
selected_model = custom_model.strip() if custom_model.strip() else model_choice
|
114 |
|
115 |
# Ontology
|
116 |
ontology = Ontology(
|
117 |
labels=parse_labels(labels_text),
|
118 |
+
relationships=[r.strip() for r in relationships_text.split(",") if r.strip()] or
|
119 |
+
["Relation between any pair of Entities"]
|
120 |
)
|
121 |
|
122 |
+
st.info("Chunking input and building graph…")
|
123 |
+
docs = chunk_text(input_text)
|
124 |
+
|
125 |
+
# LLM client (OpenRouter through OpenAI-compatible client)
|
126 |
+
llm = OpenAIClient(model=selected_model, temperature=temperature, top_p=top_p)
|
127 |
|
128 |
gm = GraphMaker(ontology=ontology, llm_client=llm, verbose=False)
|
129 |
+
edges = gm.from_documents(docs, delay_s_between=0)
|
130 |
|
|
|
131 |
st.success(f"Graph built with {len(edges)} edges.")
|
132 |
|
|
|
133 |
df = pd.DataFrame([{
|
134 |
"node_1_label": e.node_1.label, "node_1": e.node_1.name,
|
135 |
"node_2_label": e.node_2.label, "node_2": e.node_2.name,
|
|
|
137 |
} for e in edges])
|
138 |
st.dataframe(df, use_container_width=True)
|
139 |
|
|
|
140 |
net = edges_to_pyvis(edges)
|
141 |
with tempfile.TemporaryDirectory() as td:
|
142 |
html_path = os.path.join(td, "graph.html")
|
143 |
net.save_graph(html_path)
|
144 |
+
with open(html_path, "r", encoding="utf-8") as f:
|
145 |
+
html = f.read()
|
146 |
+
st.components.v1.html(html, height=750, scrolling=True)
|
147 |
|
148 |
st.markdown("---")
|
149 |
+
st.caption("Powered by knowledge-graph-maker via OpenRouter.")
|