File size: 4,988 Bytes
4e662f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
991956b
98ffeff
b16059e
991956b
 
4e662f8
98ffeff
4e662f8
5687e79
b6bbf8d
991956b
 
4e662f8
76a5a4a
991956b
4e662f8
98ffeff
76a5a4a
98ffeff
4e662f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98ffeff
4e662f8
 
 
 
 
 
98ffeff
4e662f8
 
98ffeff
 
 
 
 
 
 
4e662f8
98ffeff
4e662f8
 
 
98ffeff
4e662f8
 
 
 
 
 
98ffeff
4e662f8
98ffeff
4e662f8
 
 
 
 
98ffeff
4e662f8
98ffeff
4e662f8
 
 
98ffeff
 
4e662f8
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# import streamlit as st
# import nltk
# import spacy
# import benepar
# from nltk import Tree

# # Configure nltk to use /tmp
# nltk_data_path = "/tmp/nltk_data"
# nltk.data.path.append(nltk_data_path)
# nltk.download('punkt', download_dir=nltk_data_path)

# # Load installed spaCy model
# nlp = spacy.load("en_core_web_sm")

# # Add benepar parser
# if "benepar" not in nlp.pipe_names:
#     benepar.download("benepar_en3")
#     nlp.add_pipe("benepar", config={"model": "benepar_en3"})
# # Streamlit UI
# st.set_page_config(page_title="Syntax Parser Comparison", layout="wide")
# st.title("๐ŸŒ Syntax Parser Comparison Tool")
# st.write("This tool compares Dependency Parsing, Constituency Parsing, and a simulated Abstract Syntax Representation (ASR).")

# # Input
# sentence = st.text_input("Enter a sentence:", "John eats an apple.")

# if sentence:
#     doc = nlp(sentence)
#     sent = list(doc.sents)[0]

#     col1, col2, col3 = st.columns(3)

#     with col1:
#         st.header("Dependency Parsing")
#         for token in sent:
#             st.write(f"{token.text} --> {token.dep_} --> {token.head.text}")
#         st.code(" ".join(f"({token.text}, {token.dep_}, {token.head.text})" for token in sent))

#     with col2:
#         st.header("Constituency Parsing")
#         tree = sent._.parse_string
#         st.text(tree)
#         st.code(Tree.fromstring(tree).pformat())

#     with col3:
#         st.header("Simulated ASR Output")
#         st.write("Combining phrase structure with dependency head annotations:")
#         for token in sent:
#             if token.dep_ in ("nsubj", "obj", "det", "ROOT"):
#                 st.write(f"[{token.text}] - {token.dep_} --> {token.head.text} ({token.pos_})")
#         st.markdown("_(ASR is simulated by combining POS tags, dependency heads, and phrase information.)_")
#         st.code(" ".join(f"[{token.text}: {token.dep_} โ†’ {token.head.text}]({token.pos_})" for token in sent))



import streamlit as st
import nltk
import spacy
import benepar
from nltk import Tree
from graphviz import Digraph

# Configure NLTK to use /tmp for downloads
nltk_data_path = "/tmp/nltk_data"
nltk.data.path.append(nltk_data_path)
nltk.download('punkt', download_dir=nltk_data_path)

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Add benepar if not already added
if "benepar" not in nlp.pipe_names:
    benepar.download("benepar_en3")
    nlp.add_pipe("benepar", config={"model": "benepar_en3"})

# Streamlit page config
st.set_page_config(
    page_title="๐ŸŒ Syntax Parser Comparison Tool",
    page_icon="๐Ÿ“",
    layout="wide"
)

# Sidebar Info
st.sidebar.title("โ„น๏ธ About This Tool")
st.sidebar.write("""
Compare **Dependency Parsing**, **Constituency Parsing**,  
and a simulated **Abstract Syntax Representation (ASR)**.
""")
st.sidebar.markdown("---")
st.sidebar.info("๐Ÿ’ก Enter a sentence in the input box to see all 3 parses.")

# Title
st.title("๐ŸŒ Syntax Parser Comparison Tool")
st.markdown("""
This tool demonstrates **three parsing styles** side-by-side:
1. **Dependency Parsing** โ€“ Shows headโ€“dependent word relationships.
2. **Constituency Parsing** โ€“ Displays hierarchical phrase structures.
3. **Abstract Syntax Representation (ASR)** โ€“ Merges phrase structure with dependency info.
""")

# Input sentence
sentence = st.text_input("โœ๏ธ Enter a sentence:", "John eats an apple.")

if sentence:
    doc = nlp(sentence)
    sent = list(doc.sents)[0]

    col1, col2, col3 = st.columns(3)

    # 1๏ธโƒฃ Dependency Parsing
    with col1:
        st.subheader("๐Ÿ”— Dependency Parsing")
        dep_graph = Digraph()
        dep_graph.attr(rankdir="TB")
        for token in sent:
            dep_graph.node(token.text, f"{token.text}\n({token.dep_})")
            if token.head != token:
                dep_graph.edge(token.head.text, token.text)
        st.graphviz_chart(dep_graph)
        with st.expander("Raw Dependency Tuples"):
            st.code(" ".join(f"({t.text}, {t.dep_}, {t.head.text})" for t in sent))

    # 2๏ธโƒฃ Constituency Parsing
    with col2:
        st.subheader("๐ŸŒณ Constituency Parsing")
        tree_str = sent._.parse_string
        with st.expander("Tree String"):
            st.text(tree_str)
        st.code(Tree.fromstring(tree_str).pformat(), language="text")

    # 3๏ธโƒฃ Simulated ASR
    with col3:
        st.subheader("๐Ÿงฉ Simulated ASR Output")
        st.markdown("Combines **dependency heads**, **POS tags**, and **phrase info**.")
        highlighted_output = []
        for token in sent:
            if token.dep_ in ("nsubj", "obj", "det", "ROOT"):
                highlighted_output.append(
                    f"**[{token.text}]** - {token.dep_} โ†’ {token.head.text} ({token.pos_})"
                )
        st.write("\n".join(highlighted_output))
        with st.expander("ASR Encoded String"):
            st.code(
                " ".join(f"[{t.text}: {t.dep_} โ†’ {t.head.text}]({t.pos_})" for t in sent)
            )