File size: 4,988 Bytes
4e662f8 991956b 98ffeff b16059e 991956b 4e662f8 98ffeff 4e662f8 5687e79 b6bbf8d 991956b 4e662f8 76a5a4a 991956b 4e662f8 98ffeff 76a5a4a 98ffeff 4e662f8 98ffeff 4e662f8 98ffeff 4e662f8 98ffeff 4e662f8 98ffeff 4e662f8 98ffeff 4e662f8 98ffeff 4e662f8 98ffeff 4e662f8 98ffeff 4e662f8 98ffeff 4e662f8 98ffeff 4e662f8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
# import streamlit as st
# import nltk
# import spacy
# import benepar
# from nltk import Tree
# # Configure nltk to use /tmp
# nltk_data_path = "/tmp/nltk_data"
# nltk.data.path.append(nltk_data_path)
# nltk.download('punkt', download_dir=nltk_data_path)
# # Load installed spaCy model
# nlp = spacy.load("en_core_web_sm")
# # Add benepar parser
# if "benepar" not in nlp.pipe_names:
# benepar.download("benepar_en3")
# nlp.add_pipe("benepar", config={"model": "benepar_en3"})
# # Streamlit UI
# st.set_page_config(page_title="Syntax Parser Comparison", layout="wide")
# st.title("๐ Syntax Parser Comparison Tool")
# st.write("This tool compares Dependency Parsing, Constituency Parsing, and a simulated Abstract Syntax Representation (ASR).")
# # Input
# sentence = st.text_input("Enter a sentence:", "John eats an apple.")
# if sentence:
# doc = nlp(sentence)
# sent = list(doc.sents)[0]
# col1, col2, col3 = st.columns(3)
# with col1:
# st.header("Dependency Parsing")
# for token in sent:
# st.write(f"{token.text} --> {token.dep_} --> {token.head.text}")
# st.code(" ".join(f"({token.text}, {token.dep_}, {token.head.text})" for token in sent))
# with col2:
# st.header("Constituency Parsing")
# tree = sent._.parse_string
# st.text(tree)
# st.code(Tree.fromstring(tree).pformat())
# with col3:
# st.header("Simulated ASR Output")
# st.write("Combining phrase structure with dependency head annotations:")
# for token in sent:
# if token.dep_ in ("nsubj", "obj", "det", "ROOT"):
# st.write(f"[{token.text}] - {token.dep_} --> {token.head.text} ({token.pos_})")
# st.markdown("_(ASR is simulated by combining POS tags, dependency heads, and phrase information.)_")
# st.code(" ".join(f"[{token.text}: {token.dep_} โ {token.head.text}]({token.pos_})" for token in sent))
import streamlit as st
import nltk
import spacy
import benepar
from nltk import Tree
from graphviz import Digraph
# Configure NLTK to use /tmp for downloads
nltk_data_path = "/tmp/nltk_data"
nltk.data.path.append(nltk_data_path)
nltk.download('punkt', download_dir=nltk_data_path)
# Load spaCy model
nlp = spacy.load("en_core_web_sm")
# Add benepar if not already added
if "benepar" not in nlp.pipe_names:
benepar.download("benepar_en3")
nlp.add_pipe("benepar", config={"model": "benepar_en3"})
# Streamlit page config
st.set_page_config(
page_title="๐ Syntax Parser Comparison Tool",
page_icon="๐",
layout="wide"
)
# Sidebar Info
st.sidebar.title("โน๏ธ About This Tool")
st.sidebar.write("""
Compare **Dependency Parsing**, **Constituency Parsing**,
and a simulated **Abstract Syntax Representation (ASR)**.
""")
st.sidebar.markdown("---")
st.sidebar.info("๐ก Enter a sentence in the input box to see all 3 parses.")
# Title
st.title("๐ Syntax Parser Comparison Tool")
st.markdown("""
This tool demonstrates **three parsing styles** side-by-side:
1. **Dependency Parsing** โ Shows headโdependent word relationships.
2. **Constituency Parsing** โ Displays hierarchical phrase structures.
3. **Abstract Syntax Representation (ASR)** โ Merges phrase structure with dependency info.
""")
# Input sentence
sentence = st.text_input("โ๏ธ Enter a sentence:", "John eats an apple.")
if sentence:
doc = nlp(sentence)
sent = list(doc.sents)[0]
col1, col2, col3 = st.columns(3)
# 1๏ธโฃ Dependency Parsing
with col1:
st.subheader("๐ Dependency Parsing")
dep_graph = Digraph()
dep_graph.attr(rankdir="TB")
for token in sent:
dep_graph.node(token.text, f"{token.text}\n({token.dep_})")
if token.head != token:
dep_graph.edge(token.head.text, token.text)
st.graphviz_chart(dep_graph)
with st.expander("Raw Dependency Tuples"):
st.code(" ".join(f"({t.text}, {t.dep_}, {t.head.text})" for t in sent))
# 2๏ธโฃ Constituency Parsing
with col2:
st.subheader("๐ณ Constituency Parsing")
tree_str = sent._.parse_string
with st.expander("Tree String"):
st.text(tree_str)
st.code(Tree.fromstring(tree_str).pformat(), language="text")
# 3๏ธโฃ Simulated ASR
with col3:
st.subheader("๐งฉ Simulated ASR Output")
st.markdown("Combines **dependency heads**, **POS tags**, and **phrase info**.")
highlighted_output = []
for token in sent:
if token.dep_ in ("nsubj", "obj", "det", "ROOT"):
highlighted_output.append(
f"**[{token.text}]** - {token.dep_} โ {token.head.text} ({token.pos_})"
)
st.write("\n".join(highlighted_output))
with st.expander("ASR Encoded String"):
st.code(
" ".join(f"[{t.text}: {t.dep_} โ {t.head.text}]({t.pos_})" for t in sent)
)
|