Spaces:

utkarsh1797
/

syntax-parser-comparison

Running

File size: 4,988 Bytes

4e662f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
991956b
98ffeff
b16059e
991956b
 
4e662f8
98ffeff
4e662f8
5687e79
b6bbf8d
991956b
 
4e662f8
76a5a4a
991956b
4e662f8
98ffeff
76a5a4a
98ffeff
4e662f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98ffeff
4e662f8
 
 
 
 
 
98ffeff
4e662f8
 
98ffeff
 
 
 
 
 
 
4e662f8
98ffeff
4e662f8
 
 
98ffeff
4e662f8
 
 
 
 
 
98ffeff
4e662f8
98ffeff
4e662f8
 
 
 
 
98ffeff
4e662f8
98ffeff
4e662f8
 
 
98ffeff
 
4e662f8

# import streamlit as st
# import nltk
# import spacy
# import benepar
# from nltk import Tree

# # Configure nltk to use /tmp
# nltk_data_path = "/tmp/nltk_data"
# nltk.data.path.append(nltk_data_path)
# nltk.download('punkt', download_dir=nltk_data_path)

# # Load installed spaCy model
# nlp = spacy.load("en_core_web_sm")

# # Add benepar parser
# if "benepar" not in nlp.pipe_names:
#     benepar.download("benepar_en3")
#     nlp.add_pipe("benepar", config={"model": "benepar_en3"})
# # Streamlit UI
# st.set_page_config(page_title="Syntax Parser Comparison", layout="wide")
# st.title("🌐 Syntax Parser Comparison Tool")
# st.write("This tool compares Dependency Parsing, Constituency Parsing, and a simulated Abstract Syntax Representation (ASR).")

# # Input
# sentence = st.text_input("Enter a sentence:", "John eats an apple.")

# if sentence:
#     doc = nlp(sentence)
#     sent = list(doc.sents)[0]

#     col1, col2, col3 = st.columns(3)

#     with col1:
#         st.header("Dependency Parsing")
#         for token in sent:
#             st.write(f"{token.text} --> {token.dep_} --> {token.head.text}")
#         st.code(" ".join(f"({token.text}, {token.dep_}, {token.head.text})" for token in sent))

#     with col2:
#         st.header("Constituency Parsing")
#         tree = sent._.parse_string
#         st.text(tree)
#         st.code(Tree.fromstring(tree).pformat())

#     with col3:
#         st.header("Simulated ASR Output")
#         st.write("Combining phrase structure with dependency head annotations:")
#         for token in sent:
#             if token.dep_ in ("nsubj", "obj", "det", "ROOT"):
#                 st.write(f"[{token.text}] - {token.dep_} --> {token.head.text} ({token.pos_})")
#         st.markdown("_(ASR is simulated by combining POS tags, dependency heads, and phrase information.)_")
#         st.code(" ".join(f"[{token.text}: {token.dep_} → {token.head.text}]({token.pos_})" for token in sent))



import streamlit as st
import nltk
import spacy
import benepar
from nltk import Tree
from graphviz import Digraph

# Configure NLTK to use /tmp for downloads
nltk_data_path = "/tmp/nltk_data"
nltk.data.path.append(nltk_data_path)
nltk.download('punkt', download_dir=nltk_data_path)

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Add benepar if not already added
if "benepar" not in nlp.pipe_names:
    benepar.download("benepar_en3")
    nlp.add_pipe("benepar", config={"model": "benepar_en3"})

# Streamlit page config
st.set_page_config(
    page_title="🌐 Syntax Parser Comparison Tool",
    page_icon="📝",
    layout="wide"
)

# Sidebar Info
st.sidebar.title("ℹ️ About This Tool")
st.sidebar.write("""
Compare **Dependency Parsing**, **Constituency Parsing**,  
and a simulated **Abstract Syntax Representation (ASR)**.
""")
st.sidebar.markdown("---")
st.sidebar.info("💡 Enter a sentence in the input box to see all 3 parses.")

# Title
st.title("🌐 Syntax Parser Comparison Tool")
st.markdown("""
This tool demonstrates **three parsing styles** side-by-side:
1. **Dependency Parsing** – Shows head–dependent word relationships.
2. **Constituency Parsing** – Displays hierarchical phrase structures.
3. **Abstract Syntax Representation (ASR)** – Merges phrase structure with dependency info.
""")

# Input sentence
sentence = st.text_input("✏️ Enter a sentence:", "John eats an apple.")

if sentence:
    doc = nlp(sentence)
    sent = list(doc.sents)[0]

    col1, col2, col3 = st.columns(3)

    # 1️⃣ Dependency Parsing
    with col1:
        st.subheader("🔗 Dependency Parsing")
        dep_graph = Digraph()
        dep_graph.attr(rankdir="TB")
        for token in sent:
            dep_graph.node(token.text, f"{token.text}\n({token.dep_})")
            if token.head != token:
                dep_graph.edge(token.head.text, token.text)
        st.graphviz_chart(dep_graph)
        with st.expander("Raw Dependency Tuples"):
            st.code(" ".join(f"({t.text}, {t.dep_}, {t.head.text})" for t in sent))

    # 2️⃣ Constituency Parsing
    with col2:
        st.subheader("🌳 Constituency Parsing")
        tree_str = sent._.parse_string
        with st.expander("Tree String"):
            st.text(tree_str)
        st.code(Tree.fromstring(tree_str).pformat(), language="text")

    # 3️⃣ Simulated ASR
    with col3:
        st.subheader("🧩 Simulated ASR Output")
        st.markdown("Combines **dependency heads**, **POS tags**, and **phrase info**.")
        highlighted_output = []
        for token in sent:
            if token.dep_ in ("nsubj", "obj", "det", "ROOT"):
                highlighted_output.append(
                    f"**[{token.text}]** - {token.dep_} → {token.head.text} ({token.pos_})"
                )
        st.write("\n".join(highlighted_output))
        with st.expander("ASR Encoded String"):
            st.code(
                " ".join(f"[{t.text}: {t.dep_} → {t.head.text}]({t.pos_})" for t in sent)
            )