Spaces:

utkarsh1797
/

syntax-parser-comparison

Sleeping

File size: 1,871 Bytes

991956b
98ffeff
b16059e
991956b
 
98ffeff
76a5a4a
5687e79
b6bbf8d
991956b
 
76a5a4a
 
991956b
76a5a4a
98ffeff
76a5a4a
98ffeff
991956b
 
98ffeff
 
 
991956b
98ffeff

import streamlit as st
import nltk
import spacy
import benepar
from nltk import Tree

# Configure nltk to use /tmp
nltk_data_path = "/tmp/nltk_data"
nltk.data.path.append(nltk_data_path)
nltk.download('punkt', download_dir=nltk_data_path)

# Load installed spaCy model
nlp = spacy.load("en_core_web_sm")

# Add benepar parser
if "benepar" not in nlp.pipe_names:
    benepar.download("benepar_en3")
    nlp.add_pipe("benepar", config={"model": "benepar_en3"})
# Streamlit UI
st.set_page_config(page_title="Syntax Parser Comparison", layout="wide")
st.title("🌐 Syntax Parser Comparison Tool")
st.write("This tool compares Dependency Parsing, Constituency Parsing, and a simulated Abstract Syntax Representation (ASR).")

# Input
sentence = st.text_input("Enter a sentence:", "John eats an apple.")

if sentence:
    doc = nlp(sentence)
    sent = list(doc.sents)[0]

    col1, col2, col3 = st.columns(3)

    with col1:
        st.header("Dependency Parsing")
        for token in sent:
            st.write(f"{token.text} --> {token.dep_} --> {token.head.text}")
        st.code(" ".join(f"({token.text}, {token.dep_}, {token.head.text})" for token in sent))

    with col2:
        st.header("Constituency Parsing")
        tree = sent._.parse_string
        st.text(tree)
        st.code(Tree.fromstring(tree).pformat())

    with col3:
        st.header("Simulated ASR Output")
        st.write("Combining phrase structure with dependency head annotations:")
        for token in sent:
            if token.dep_ in ("nsubj", "obj", "det", "ROOT"):
                st.write(f"[{token.text}] - {token.dep_} --> {token.head.text} ({token.pos_})")
        st.markdown("_(ASR is simulated by combining POS tags, dependency heads, and phrase information.)_")
        st.code(" ".join(f"[{token.text}: {token.dep_} → {token.head.text}]({token.pos_})" for token in sent))