blazingbunny commited on
Commit
c164275
·
verified ·
1 Parent(s): fe7c5f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -72
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import os
2
- import io
3
  import tempfile
4
  import streamlit as st
5
  import pandas as pd
@@ -7,50 +6,49 @@ from pypdf import PdfReader
7
  from pyvis.network import Network
8
 
9
  from knowledge_graph_maker import (
10
- GraphMaker, Ontology, Document,
11
- OpenAIClient, GroqClient
12
  )
13
 
14
- st.set_page_config(page_title="Knowledge Graph Maker", layout="wide")
 
 
15
 
16
- st.title("Knowledge Graph from Text/PDF (Docker Space)")
17
- st.caption("Uses knowledge-graph-maker with OpenAI or Groq. Paste text or upload a PDF; view the interactive graph below.")
18
 
19
- # Choose LLM client based on available env vars
20
- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
21
- GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")
 
 
 
 
22
 
 
23
  with st.sidebar:
24
- st.subheader("Model Settings")
 
 
25
  temperature = st.slider("Temperature", 0.0, 1.0, 0.1, 0.05)
26
  top_p = st.slider("Top-p", 0.0, 1.0, 0.5, 0.05)
27
- provider = st.radio("Provider", ["OpenAI", "Groq"], index=0 if OPENAI_API_KEY else 1 if GROQ_API_KEY else 0)
28
- if provider == "OpenAI":
29
- oai_model = st.text_input("OpenAI model", value="gpt-3.5-turbo")
30
- else:
31
- groq_model = st.text_input("Groq model", value="mixtral-8x7b-32768")
32
 
33
  st.markdown("### Ontology (labels)")
34
- default_labels = [
35
- {"Person": "Person name without adjectives (may appear as name or pronoun)"},
36
- {"Object": "Avoid the definite article 'the' in name"},
37
- {"Event": "Events involving multiple people; no verbs like gives/leaves"},
38
- "Place", "Document", "Organisation", "Action",
39
- {"Miscellanous": "Important concept that fits none of the above"}
40
- ]
41
- labels_text = st.text_area("Labels (JSON or comma-separated)", value=", ".join(
42
- [lbl if isinstance(lbl, str) else list(lbl.keys())[0] for lbl in default_labels]
43
- ), height=80)
44
- st.markdown("### Relationships focus")
45
- relationships_text = st.text_input("Relationships (comma-separated)", value="Relation between any pair of Entities")
46
 
47
  def parse_labels(text):
48
- # Allow simple "A, B, C" input; fall back to defaults above if empty
49
- if not text.strip():
50
- return [ "Person","Object","Event","Place","Document","Organisation","Action","Miscellanous" ]
51
- return [lbl.strip() for lbl in text.split(",") if lbl.strip()]
52
 
53
- def split_pdf(file) -> str:
54
  reader = PdfReader(file)
55
  parts = []
56
  for page in reader.pages:
@@ -60,42 +58,31 @@ def split_pdf(file) -> str:
60
  continue
61
  return "\n".join(parts)
62
 
63
- def build_graph_documents(text: str) -> list[Document]:
64
- # Simple chunking: ~900-1000 tokens ≈ ~3000-4000 chars heuristic
65
- # Adjust if needed.
66
- CHARS = 3500
67
  docs = []
68
- for i in range(0, len(text), CHARS):
69
- chunk = text[i:i+CHARS].strip()
70
  if chunk:
71
- docs.append(Document(text=chunk, metadata={"chunk_id": i//CHARS}))
72
  return docs
73
 
74
  def edges_to_pyvis(edges):
75
  net = Network(height="700px", width="100%", bgcolor="#ffffff", font_color="#222222", notebook=False, directed=False)
76
- # Simple map to keep unique node IDs
77
  node_ids = {}
78
-
79
- def node_key(label, name): return f"{label}:{name}"
80
-
81
  for e in edges:
82
- n1 = node_key(e.node_1.label, e.node_1.name)
83
- n2 = node_key(e.node_2.label, e.node_2.name)
84
-
85
  if n1 not in node_ids:
86
  net.add_node(n1, label=e.node_1.name, title=e.node_1.label)
87
  node_ids[n1] = True
88
  if n2 not in node_ids:
89
  net.add_node(n2, label=e.node_2.name, title=e.node_2.label)
90
  node_ids[n2] = True
91
-
92
- rel = e.relationship or ""
93
- net.add_edge(n1, n2, title=rel, value=1)
94
-
95
  net.toggle_physics(True)
96
  return net
97
 
98
- # Input UI
99
  tab_text, tab_pdf = st.tabs(["📝 Paste Text", "📄 Upload PDF"])
100
  input_text = ""
101
  with tab_text:
@@ -103,40 +90,46 @@ with tab_text:
103
  with tab_pdf:
104
  pdf_file = st.file_uploader("Upload a PDF", type=["pdf"])
105
  if pdf_file:
106
- input_text = split_pdf(pdf_file)
107
 
108
  if st.button("Generate Knowledge Graph", type="primary"):
109
  if not input_text.strip():
110
  st.warning("Please provide text or a PDF.")
111
  st.stop()
 
 
 
112
 
113
- # Prepare LLM client
114
- if provider == "OpenAI":
115
- if not OPENAI_API_KEY:
116
- st.error("OPENAI_API_KEY is not set in the Space Secrets.")
117
- st.stop()
118
- llm = OpenAIClient(model=oai_model, temperature=temperature, top_p=top_p)
119
- else:
120
- if not GROQ_API_KEY:
121
- st.error("GROQ_API_KEY is not set in the Space Secrets.")
122
- st.stop()
123
- llm = GroqClient(model=groq_model, temperature=temperature, top_p=top_p)
124
 
125
  # Ontology
126
  ontology = Ontology(
127
  labels=parse_labels(labels_text),
128
- relationships=[r.strip() for r in relationships_text.split(",") if r.strip()] or ["Relation between any pair of Entities"]
 
129
  )
130
 
131
- st.info("Chunking input and building graph… this may take a bit for longer texts.")
 
 
 
 
132
 
133
  gm = GraphMaker(ontology=ontology, llm_client=llm, verbose=False)
134
- docs = build_graph_documents(input_text)
135
 
136
- edges = gm.from_documents(docs, delay_s_between=0) # tune delay for rate limits
137
  st.success(f"Graph built with {len(edges)} edges.")
138
 
139
- # Show edges table
140
  df = pd.DataFrame([{
141
  "node_1_label": e.node_1.label, "node_1": e.node_1.name,
142
  "node_2_label": e.node_2.label, "node_2": e.node_2.name,
@@ -144,13 +137,13 @@ if st.button("Generate Knowledge Graph", type="primary"):
144
  } for e in edges])
145
  st.dataframe(df, use_container_width=True)
146
 
147
- # Render with PyVis inside Streamlit
148
  net = edges_to_pyvis(edges)
149
  with tempfile.TemporaryDirectory() as td:
150
  html_path = os.path.join(td, "graph.html")
151
  net.save_graph(html_path)
152
- html_content = open(html_path, "r", encoding="utf-8").read()
153
- st.components.v1.html(html_content, height=750, scrolling=True)
 
154
 
155
  st.markdown("---")
156
- st.caption("Built with [knowledge-graph-maker](https://github.com/rahulnyk/knowledge_graph_maker).")
 
1
  import os
 
2
  import tempfile
3
  import streamlit as st
4
  import pandas as pd
 
6
  from pyvis.network import Network
7
 
8
  from knowledge_graph_maker import (
9
+ GraphMaker, Ontology, Document, OpenAIClient
 
10
  )
11
 
12
+ st.set_page_config(page_title="Knowledge Graph (OpenRouter)", layout="wide")
13
+ st.title("Knowledge Graph from Text/PDF — OpenRouter")
14
+ st.caption("Uses knowledge-graph-maker via OpenRouter. Paste text or upload a PDF; choose an OpenRouter model.")
15
 
16
+ # --- OpenRouter only ---
17
+ OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "")
18
 
19
+ # Model choices
20
+ OPENROUTER_MODELS = [
21
+ "openai/gpt-oss-20b:free",
22
+ "moonshotai/kimi-k2:free",
23
+ "google/gemini-2.0-flash-exp:free",
24
+ "google/gemma-3-27b-it:free",
25
+ ]
26
 
27
+ # Sidebar controls
28
  with st.sidebar:
29
+ st.subheader("Model & Generation Settings")
30
+ model_choice = st.selectbox("OpenRouter model", OPENROUTER_MODELS, index=0)
31
+ custom_model = st.text_input("Custom model id (optional)", placeholder="e.g. meta-llama/llama-3.1-8b-instruct")
32
  temperature = st.slider("Temperature", 0.0, 1.0, 0.1, 0.05)
33
  top_p = st.slider("Top-p", 0.0, 1.0, 0.5, 0.05)
 
 
 
 
 
34
 
35
  st.markdown("### Ontology (labels)")
36
+ labels_text = st.text_area(
37
+ "Comma-separated labels",
38
+ value="Person, Object, Event, Place, Document, Organisation, Action, Miscellanous",
39
+ height=70,
40
+ )
41
+ relationships_text = st.text_input(
42
+ "Relationships (comma-separated)",
43
+ value="Relation between any pair of Entities",
44
+ )
 
 
 
45
 
46
  def parse_labels(text):
47
+ return [lbl.strip() for lbl in text.split(",") if lbl.strip()] or [
48
+ "Person","Object","Event","Place","Document","Organisation","Action","Miscellanous"
49
+ ]
 
50
 
51
+ def pdf_to_text(file) -> str:
52
  reader = PdfReader(file)
53
  parts = []
54
  for page in reader.pages:
 
58
  continue
59
  return "\n".join(parts)
60
 
61
+ def chunk_text(text: str, chars: int = 3500) -> list[Document]:
 
 
 
62
  docs = []
63
+ for i in range(0, len(text), chars):
64
+ chunk = text[i:i+chars].strip()
65
  if chunk:
66
+ docs.append(Document(text=chunk, metadata={"chunk_id": i//chars}))
67
  return docs
68
 
69
  def edges_to_pyvis(edges):
70
  net = Network(height="700px", width="100%", bgcolor="#ffffff", font_color="#222222", notebook=False, directed=False)
 
71
  node_ids = {}
 
 
 
72
  for e in edges:
73
+ n1 = f"{e.node_1.label}:{e.node_1.name}"
74
+ n2 = f"{e.node_2.label}:{e.node_2.name}"
 
75
  if n1 not in node_ids:
76
  net.add_node(n1, label=e.node_1.name, title=e.node_1.label)
77
  node_ids[n1] = True
78
  if n2 not in node_ids:
79
  net.add_node(n2, label=e.node_2.name, title=e.node_2.label)
80
  node_ids[n2] = True
81
+ net.add_edge(n1, n2, title=e.relationship or "", value=1)
 
 
 
82
  net.toggle_physics(True)
83
  return net
84
 
85
+ # Input tabs
86
  tab_text, tab_pdf = st.tabs(["📝 Paste Text", "📄 Upload PDF"])
87
  input_text = ""
88
  with tab_text:
 
90
  with tab_pdf:
91
  pdf_file = st.file_uploader("Upload a PDF", type=["pdf"])
92
  if pdf_file:
93
+ input_text = pdf_to_text(pdf_file)
94
 
95
  if st.button("Generate Knowledge Graph", type="primary"):
96
  if not input_text.strip():
97
  st.warning("Please provide text or a PDF.")
98
  st.stop()
99
+ if not OPENROUTER_API_KEY:
100
+ st.error("OPENROUTER_API_KEY is not set in Space Secrets.")
101
+ st.stop()
102
 
103
+ # Point OpenAI SDK at OpenRouter
104
+ os.environ["OPENAI_API_KEY"] = OPENROUTER_API_KEY
105
+ os.environ["OPENAI_BASE_URL"] = "https://openrouter.ai/api/v1"
106
+
107
+ # Optional: attribution headers for OpenRouter
108
+ os.environ["OPENAI_DEFAULT_HEADERS"] = (
109
+ '{"HTTP-Referer":"https://huggingface.co/spaces/blazingbunny/rahulnyk_knowledge_graph",'
110
+ '"X-Title":"Knowledge Graph (OpenRouter)"}'
111
+ )
112
+
113
+ selected_model = custom_model.strip() if custom_model.strip() else model_choice
114
 
115
  # Ontology
116
  ontology = Ontology(
117
  labels=parse_labels(labels_text),
118
+ relationships=[r.strip() for r in relationships_text.split(",") if r.strip()] or
119
+ ["Relation between any pair of Entities"]
120
  )
121
 
122
+ st.info("Chunking input and building graph…")
123
+ docs = chunk_text(input_text)
124
+
125
+ # LLM client (OpenRouter through OpenAI-compatible client)
126
+ llm = OpenAIClient(model=selected_model, temperature=temperature, top_p=top_p)
127
 
128
  gm = GraphMaker(ontology=ontology, llm_client=llm, verbose=False)
129
+ edges = gm.from_documents(docs, delay_s_between=0)
130
 
 
131
  st.success(f"Graph built with {len(edges)} edges.")
132
 
 
133
  df = pd.DataFrame([{
134
  "node_1_label": e.node_1.label, "node_1": e.node_1.name,
135
  "node_2_label": e.node_2.label, "node_2": e.node_2.name,
 
137
  } for e in edges])
138
  st.dataframe(df, use_container_width=True)
139
 
 
140
  net = edges_to_pyvis(edges)
141
  with tempfile.TemporaryDirectory() as td:
142
  html_path = os.path.join(td, "graph.html")
143
  net.save_graph(html_path)
144
+ with open(html_path, "r", encoding="utf-8") as f:
145
+ html = f.read()
146
+ st.components.v1.html(html, height=750, scrolling=True)
147
 
148
  st.markdown("---")
149
+ st.caption("Powered by knowledge-graph-maker via OpenRouter.")