|
import os |
|
os.environ["STREAMLIT_BROWSER_GATHER_USAGE_STATS"] = "false" |
|
|
|
import streamlit as st |
|
import spacy |
|
import benepar |
|
from nltk import Tree |
|
import nltk |
|
|
|
|
|
nltk.download('punkt') |
|
benepar.download('benepar_en3') |
|
|
|
nlp = spacy.load("en_core_web_sm") |
|
if "benepar" not in nlp.pipe_names: |
|
nlp.add_pipe("benepar", config={"model": "benepar_en3"}) |
|
|
|
st.set_page_config(page_title="Syntax Parser Comparison Tool", layout="wide") |
|
st.title("π Syntax Parser Comparison Tool") |
|
st.write("This tool compares Dependency Parsing, Constituency Parsing, and a simulated Abstract Syntax Representation (ASR).") |
|
|
|
sentence = st.text_input("Enter a sentence:", "John eats an apple.") |
|
|
|
if sentence: |
|
doc = nlp(sentence) |
|
sent = list(doc.sents)[0] |
|
|
|
col1, col2, col3 = st.columns(3) |
|
|
|
with col1: |
|
st.header("Dependency Parsing") |
|
for token in sent: |
|
st.write(f"{token.text} --> {token.dep_} --> {token.head.text}") |
|
st.code(" ".join(f"({token.text}, {token.dep_}, {token.head.text})" for token in sent)) |
|
|
|
with col2: |
|
st.header("Constituency Parsing") |
|
tree = sent._.parse_string |
|
st.text(tree) |
|
st.code(Tree.fromstring(tree).pformat()) |
|
|
|
with col3: |
|
st.header("Simulated ASR Output") |
|
st.write("Combining phrase structure with dependency head annotations:") |
|
for token in sent: |
|
if token.dep_ in ("nsubj", "obj", "det", "ROOT"): |
|
st.write(f"[{token.text}] - {token.dep_} --> {token.head.text} ({token.pos_})") |
|
st.markdown("_(ASR is simulated by combining POS tags, dependency heads, and phrase information.)_") |
|
st.code(" ".join(f"[{token.text}: {token.dep_} β {token.head.text}]({token.pos_})" for token in sent)) |
|
|