|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import streamlit as st |
|
import nltk |
|
import spacy |
|
import benepar |
|
from nltk import Tree |
|
from graphviz import Digraph |
|
|
|
|
|
nltk_data_path = "/tmp/nltk_data" |
|
nltk.data.path.append(nltk_data_path) |
|
nltk.download('punkt', download_dir=nltk_data_path) |
|
|
|
|
|
nlp = spacy.load("en_core_web_sm") |
|
|
|
|
|
if "benepar" not in nlp.pipe_names: |
|
benepar.download("benepar_en3") |
|
nlp.add_pipe("benepar", config={"model": "benepar_en3"}) |
|
|
|
|
|
st.set_page_config( |
|
page_title="🌐 Syntax Parser Comparison Tool", |
|
page_icon="📝", |
|
layout="wide" |
|
) |
|
|
|
|
|
st.sidebar.title("ℹ️ About This Tool") |
|
st.sidebar.write(""" |
|
Compare **Dependency Parsing**, **Constituency Parsing**, |
|
and a simulated **Abstract Syntax Representation (ASR)**. |
|
""") |
|
st.sidebar.markdown("---") |
|
st.sidebar.info("💡 Enter a sentence in the input box to see all 3 parses.") |
|
|
|
|
|
st.title("🌐 Syntax Parser Comparison Tool") |
|
st.markdown(""" |
|
This tool demonstrates **three parsing styles** side-by-side: |
|
1. **Dependency Parsing** – Shows head–dependent word relationships. |
|
2. **Constituency Parsing** – Displays hierarchical phrase structures. |
|
3. **Abstract Syntax Representation (ASR)** – Merges phrase structure with dependency info. |
|
""") |
|
|
|
|
|
sentence = st.text_input("✏️ Enter a sentence:", "John eats an apple.") |
|
|
|
if sentence: |
|
doc = nlp(sentence) |
|
sent = list(doc.sents)[0] |
|
|
|
col1, col2, col3 = st.columns(3) |
|
|
|
|
|
with col1: |
|
st.subheader("🔗 Dependency Parsing") |
|
dep_graph = Digraph() |
|
dep_graph.attr(rankdir="TB") |
|
for token in sent: |
|
dep_graph.node(token.text, f"{token.text}\n({token.dep_})") |
|
if token.head != token: |
|
dep_graph.edge(token.head.text, token.text) |
|
st.graphviz_chart(dep_graph) |
|
with st.expander("Raw Dependency Tuples"): |
|
st.code(" ".join(f"({t.text}, {t.dep_}, {t.head.text})" for t in sent)) |
|
|
|
|
|
with col2: |
|
st.subheader("🌳 Constituency Parsing") |
|
tree_str = sent._.parse_string |
|
with st.expander("Tree String"): |
|
st.text(tree_str) |
|
st.code(Tree.fromstring(tree_str).pformat(), language="text") |
|
|
|
|
|
with col3: |
|
st.subheader("🧩 Simulated ASR Output") |
|
st.markdown("Combines **dependency heads**, **POS tags**, and **phrase info**.") |
|
highlighted_output = [] |
|
for token in sent: |
|
if token.dep_ in ("nsubj", "obj", "det", "ROOT"): |
|
highlighted_output.append( |
|
f"**[{token.text}]** - {token.dep_} → {token.head.text} ({token.pos_})" |
|
) |
|
st.write("\n".join(highlighted_output)) |
|
with st.expander("ASR Encoded String"): |
|
st.code( |
|
" ".join(f"[{t.text}: {t.dep_} → {t.head.text}]({t.pos_})" for t in sent) |
|
) |
|
|