# import streamlit as st # import nltk # import spacy # import benepar # from nltk import Tree # # Configure nltk to use /tmp # nltk_data_path = "/tmp/nltk_data" # nltk.data.path.append(nltk_data_path) # nltk.download('punkt', download_dir=nltk_data_path) # # Load installed spaCy model # nlp = spacy.load("en_core_web_sm") # # Add benepar parser # if "benepar" not in nlp.pipe_names: # benepar.download("benepar_en3") # nlp.add_pipe("benepar", config={"model": "benepar_en3"}) # # Streamlit UI # st.set_page_config(page_title="Syntax Parser Comparison", layout="wide") # st.title("🌐 Syntax Parser Comparison Tool") # st.write("This tool compares Dependency Parsing, Constituency Parsing, and a simulated Abstract Syntax Representation (ASR).") # # Input # sentence = st.text_input("Enter a sentence:", "John eats an apple.") # if sentence: # doc = nlp(sentence) # sent = list(doc.sents)[0] # col1, col2, col3 = st.columns(3) # with col1: # st.header("Dependency Parsing") # for token in sent: # st.write(f"{token.text} --> {token.dep_} --> {token.head.text}") # st.code(" ".join(f"({token.text}, {token.dep_}, {token.head.text})" for token in sent)) # with col2: # st.header("Constituency Parsing") # tree = sent._.parse_string # st.text(tree) # st.code(Tree.fromstring(tree).pformat()) # with col3: # st.header("Simulated ASR Output") # st.write("Combining phrase structure with dependency head annotations:") # for token in sent: # if token.dep_ in ("nsubj", "obj", "det", "ROOT"): # st.write(f"[{token.text}] - {token.dep_} --> {token.head.text} ({token.pos_})") # st.markdown("_(ASR is simulated by combining POS tags, dependency heads, and phrase information.)_") # st.code(" ".join(f"[{token.text}: {token.dep_} β†’ {token.head.text}]({token.pos_})" for token in sent)) import streamlit as st import nltk import spacy import benepar from nltk import Tree from graphviz import Digraph # Configure NLTK to use /tmp for downloads nltk_data_path = "/tmp/nltk_data" nltk.data.path.append(nltk_data_path) nltk.download('punkt', download_dir=nltk_data_path) # Load spaCy model nlp = spacy.load("en_core_web_sm") # Add benepar if not already added if "benepar" not in nlp.pipe_names: benepar.download("benepar_en3") nlp.add_pipe("benepar", config={"model": "benepar_en3"}) # Streamlit page config st.set_page_config( page_title="🌐 Syntax Parser Comparison Tool", page_icon="πŸ“", layout="wide" ) # Sidebar Info st.sidebar.title("ℹ️ About This Tool") st.sidebar.write(""" Compare **Dependency Parsing**, **Constituency Parsing**, and a simulated **Abstract Syntax Representation (ASR)**. """) st.sidebar.markdown("---") st.sidebar.info("πŸ’‘ Enter a sentence in the input box to see all 3 parses.") # Title st.title("🌐 Syntax Parser Comparison Tool") st.markdown(""" This tool demonstrates **three parsing styles** side-by-side: 1. **Dependency Parsing** – Shows head–dependent word relationships. 2. **Constituency Parsing** – Displays hierarchical phrase structures. 3. **Abstract Syntax Representation (ASR)** – Merges phrase structure with dependency info. """) # Input sentence sentence = st.text_input("✏️ Enter a sentence:", "John eats an apple.") if sentence: doc = nlp(sentence) sent = list(doc.sents)[0] col1, col2, col3 = st.columns(3) # 1️⃣ Dependency Parsing with col1: st.subheader("πŸ”— Dependency Parsing") dep_graph = Digraph() dep_graph.attr(rankdir="TB") for token in sent: dep_graph.node(token.text, f"{token.text}\n({token.dep_})") if token.head != token: dep_graph.edge(token.head.text, token.text) st.graphviz_chart(dep_graph) with st.expander("Raw Dependency Tuples"): st.code(" ".join(f"({t.text}, {t.dep_}, {t.head.text})" for t in sent)) # 2️⃣ Constituency Parsing with col2: st.subheader("🌳 Constituency Parsing") tree_str = sent._.parse_string with st.expander("Tree String"): st.text(tree_str) st.code(Tree.fromstring(tree_str).pformat(), language="text") # 3️⃣ Simulated ASR with col3: st.subheader("🧩 Simulated ASR Output") st.markdown("Combines **dependency heads**, **POS tags**, and **phrase info**.") highlighted_output = [] for token in sent: if token.dep_ in ("nsubj", "obj", "det", "ROOT"): highlighted_output.append( f"**[{token.text}]** - {token.dep_} β†’ {token.head.text} ({token.pos_})" ) st.write("\n".join(highlighted_output)) with st.expander("ASR Encoded String"): st.code( " ".join(f"[{t.text}: {t.dep_} β†’ {t.head.text}]({t.pos_})" for t in sent) )