utkarsh1797 commited on
Commit
4e662f8
·
verified ·
1 Parent(s): 3688557

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +113 -20
src/streamlit_app.py CHANGED
@@ -1,28 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import nltk
3
  import spacy
4
  import benepar
5
  from nltk import Tree
 
6
 
7
- # Configure nltk to use /tmp
8
  nltk_data_path = "/tmp/nltk_data"
9
  nltk.data.path.append(nltk_data_path)
10
  nltk.download('punkt', download_dir=nltk_data_path)
11
 
12
- # Load installed spaCy model
13
  nlp = spacy.load("en_core_web_sm")
14
 
15
- # Add benepar parser
16
  if "benepar" not in nlp.pipe_names:
17
  benepar.download("benepar_en3")
18
  nlp.add_pipe("benepar", config={"model": "benepar_en3"})
19
- # Streamlit UI
20
- st.set_page_config(page_title="Syntax Parser Comparison", layout="wide")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  st.title("🌐 Syntax Parser Comparison Tool")
22
- st.write("This tool compares Dependency Parsing, Constituency Parsing, and a simulated Abstract Syntax Representation (ASR).")
 
 
 
 
 
23
 
24
- # Input
25
- sentence = st.text_input("Enter a sentence:", "John eats an apple.")
26
 
27
  if sentence:
28
  doc = nlp(sentence)
@@ -30,23 +107,39 @@ if sentence:
30
 
31
  col1, col2, col3 = st.columns(3)
32
 
 
33
  with col1:
34
- st.header("Dependency Parsing")
 
 
35
  for token in sent:
36
- st.write(f"{token.text} --> {token.dep_} --> {token.head.text}")
37
- st.code(" ".join(f"({token.text}, {token.dep_}, {token.head.text})" for token in sent))
 
 
 
 
38
 
 
39
  with col2:
40
- st.header("Constituency Parsing")
41
- tree = sent._.parse_string
42
- st.text(tree)
43
- st.code(Tree.fromstring(tree).pformat())
 
44
 
 
45
  with col3:
46
- st.header("Simulated ASR Output")
47
- st.write("Combining phrase structure with dependency head annotations:")
 
48
  for token in sent:
49
  if token.dep_ in ("nsubj", "obj", "det", "ROOT"):
50
- st.write(f"[{token.text}] - {token.dep_} --> {token.head.text} ({token.pos_})")
51
- st.markdown("_(ASR is simulated by combining POS tags, dependency heads, and phrase information.)_")
52
- st.code(" ".join(f"[{token.text}: {token.dep_} → {token.head.text}]({token.pos_})" for token in sent))
 
 
 
 
 
 
1
+ # import streamlit as st
2
+ # import nltk
3
+ # import spacy
4
+ # import benepar
5
+ # from nltk import Tree
6
+
7
+ # # Configure nltk to use /tmp
8
+ # nltk_data_path = "/tmp/nltk_data"
9
+ # nltk.data.path.append(nltk_data_path)
10
+ # nltk.download('punkt', download_dir=nltk_data_path)
11
+
12
+ # # Load installed spaCy model
13
+ # nlp = spacy.load("en_core_web_sm")
14
+
15
+ # # Add benepar parser
16
+ # if "benepar" not in nlp.pipe_names:
17
+ # benepar.download("benepar_en3")
18
+ # nlp.add_pipe("benepar", config={"model": "benepar_en3"})
19
+ # # Streamlit UI
20
+ # st.set_page_config(page_title="Syntax Parser Comparison", layout="wide")
21
+ # st.title("🌐 Syntax Parser Comparison Tool")
22
+ # st.write("This tool compares Dependency Parsing, Constituency Parsing, and a simulated Abstract Syntax Representation (ASR).")
23
+
24
+ # # Input
25
+ # sentence = st.text_input("Enter a sentence:", "John eats an apple.")
26
+
27
+ # if sentence:
28
+ # doc = nlp(sentence)
29
+ # sent = list(doc.sents)[0]
30
+
31
+ # col1, col2, col3 = st.columns(3)
32
+
33
+ # with col1:
34
+ # st.header("Dependency Parsing")
35
+ # for token in sent:
36
+ # st.write(f"{token.text} --> {token.dep_} --> {token.head.text}")
37
+ # st.code(" ".join(f"({token.text}, {token.dep_}, {token.head.text})" for token in sent))
38
+
39
+ # with col2:
40
+ # st.header("Constituency Parsing")
41
+ # tree = sent._.parse_string
42
+ # st.text(tree)
43
+ # st.code(Tree.fromstring(tree).pformat())
44
+
45
+ # with col3:
46
+ # st.header("Simulated ASR Output")
47
+ # st.write("Combining phrase structure with dependency head annotations:")
48
+ # for token in sent:
49
+ # if token.dep_ in ("nsubj", "obj", "det", "ROOT"):
50
+ # st.write(f"[{token.text}] - {token.dep_} --> {token.head.text} ({token.pos_})")
51
+ # st.markdown("_(ASR is simulated by combining POS tags, dependency heads, and phrase information.)_")
52
+ # st.code(" ".join(f"[{token.text}: {token.dep_} → {token.head.text}]({token.pos_})" for token in sent))
53
+
54
+
55
+
56
  import streamlit as st
57
  import nltk
58
  import spacy
59
  import benepar
60
  from nltk import Tree
61
+ from graphviz import Digraph
62
 
63
+ # Configure NLTK to use /tmp for downloads
64
  nltk_data_path = "/tmp/nltk_data"
65
  nltk.data.path.append(nltk_data_path)
66
  nltk.download('punkt', download_dir=nltk_data_path)
67
 
68
+ # Load spaCy model
69
  nlp = spacy.load("en_core_web_sm")
70
 
71
+ # Add benepar if not already added
72
  if "benepar" not in nlp.pipe_names:
73
  benepar.download("benepar_en3")
74
  nlp.add_pipe("benepar", config={"model": "benepar_en3"})
75
+
76
+ # Streamlit page config
77
+ st.set_page_config(
78
+ page_title="🌐 Syntax Parser Comparison Tool",
79
+ page_icon="📝",
80
+ layout="wide"
81
+ )
82
+
83
+ # Sidebar Info
84
+ st.sidebar.title("ℹ️ About This Tool")
85
+ st.sidebar.write("""
86
+ Compare **Dependency Parsing**, **Constituency Parsing**,
87
+ and a simulated **Abstract Syntax Representation (ASR)**.
88
+ """)
89
+ st.sidebar.markdown("---")
90
+ st.sidebar.info("💡 Enter a sentence in the input box to see all 3 parses.")
91
+
92
+ # Title
93
  st.title("🌐 Syntax Parser Comparison Tool")
94
+ st.markdown("""
95
+ This tool demonstrates **three parsing styles** side-by-side:
96
+ 1. **Dependency Parsing** – Shows head–dependent word relationships.
97
+ 2. **Constituency Parsing** – Displays hierarchical phrase structures.
98
+ 3. **Abstract Syntax Representation (ASR)** – Merges phrase structure with dependency info.
99
+ """)
100
 
101
+ # Input sentence
102
+ sentence = st.text_input("✏️ Enter a sentence:", "John eats an apple.")
103
 
104
  if sentence:
105
  doc = nlp(sentence)
 
107
 
108
  col1, col2, col3 = st.columns(3)
109
 
110
+ # 1️⃣ Dependency Parsing
111
  with col1:
112
+ st.subheader("🔗 Dependency Parsing")
113
+ dep_graph = Digraph()
114
+ dep_graph.attr(rankdir="TB")
115
  for token in sent:
116
+ dep_graph.node(token.text, f"{token.text}\n({token.dep_})")
117
+ if token.head != token:
118
+ dep_graph.edge(token.head.text, token.text)
119
+ st.graphviz_chart(dep_graph)
120
+ with st.expander("Raw Dependency Tuples"):
121
+ st.code(" ".join(f"({t.text}, {t.dep_}, {t.head.text})" for t in sent))
122
 
123
+ # 2️⃣ Constituency Parsing
124
  with col2:
125
+ st.subheader("🌳 Constituency Parsing")
126
+ tree_str = sent._.parse_string
127
+ with st.expander("Tree String"):
128
+ st.text(tree_str)
129
+ st.code(Tree.fromstring(tree_str).pformat(), language="text")
130
 
131
+ # 3️⃣ Simulated ASR
132
  with col3:
133
+ st.subheader("🧩 Simulated ASR Output")
134
+ st.markdown("Combines **dependency heads**, **POS tags**, and **phrase info**.")
135
+ highlighted_output = []
136
  for token in sent:
137
  if token.dep_ in ("nsubj", "obj", "det", "ROOT"):
138
+ highlighted_output.append(
139
+ f"**[{token.text}]** - {token.dep_} {token.head.text} ({token.pos_})"
140
+ )
141
+ st.write("\n".join(highlighted_output))
142
+ with st.expander("ASR Encoded String"):
143
+ st.code(
144
+ " ".join(f"[{t.text}: {t.dep_} → {t.head.text}]({t.pos_})" for t in sent)
145
+ )