Spaces:
Build error
Build error
import streamlit as st | |
import os | |
import pandas as pd | |
import json | |
from html import escape | |
import difflib | |
def generate_diff_html_word_level(text1, text2): | |
""" | |
Generates word-level difference between text1 and text2 as HTML, correctly handling spaces. | |
""" | |
words1 = text1.split() | |
words2 = text2.split() | |
diff = [] | |
matcher = difflib.SequenceMatcher(None, words1, words2) | |
for opcode in matcher.get_opcodes(): | |
tag, i1, i2, j1, j2 = opcode | |
if tag == 'replace': | |
diff.append('<del style="background-color: #fbb6ce;">' + escape(' '.join(words1[i1:i2])) + '</del>') | |
diff.append('<ins style="background-color: #b7e4c7;">' + escape(' '.join(words2[j1:j2])) + '</ins>') | |
elif tag == 'delete': | |
diff.append('<del style="background-color: #fbb6ce;">' + escape(' '.join(words1[i1:i2])) + '</del>') | |
elif tag == 'insert': | |
diff.append('<ins style="background-color: #b7e4c7;">' + escape(' '.join(words2[j1:j2])) + '</ins>') | |
elif tag == 'equal': | |
diff.append(escape(' '.join(words1[i1:i2]))) | |
final_html = ' '.join(diff).replace('</del> <ins', '</del> <ins') | |
return f'<pre style="white-space: pre-wrap;">{final_html}</pre>' | |
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python" | |
st.set_page_config(layout="wide") | |
def convert_df(df): | |
return df.to_csv(index=False, quotechar='"').encode('utf-8') | |
def load_narratives_data(): | |
data = [] | |
with open("narratives.jsonl", "r") as f: | |
for line in f: | |
data.append(json.loads(line)) | |
return pd.DataFrame(data) | |
narratives_df = load_narratives_data() | |
# Extract language from id | |
narratives_df['language'] = narratives_df['id'].str.extract('-(rus|zho|fas)-') | |
col1, col2 = st.columns([1, 3], gap="large") | |
with st.sidebar: | |
st.title("Options") | |
with col1: | |
st.title("Narratives") | |
# Add language filter | |
selected_language = st.selectbox( | |
"Select language", | |
["All", "rus", "zho", "fas"] | |
) | |
if selected_language != "All": | |
filtered_df = narratives_df[narratives_df['language'] == selected_language] | |
else: | |
filtered_df = narratives_df | |
narrative_ids = filtered_df["id"].tolist() | |
container_for_nav = st.container() | |
def sync_from_drop(): | |
if st.session_state.selectbox_narrative == "Overview": | |
st.session_state.narrative_index = -1 | |
else: | |
st.session_state.narrative_index = narrative_ids.index(st.session_state.selectbox_narrative) | |
def sync_from_number(): | |
st.session_state.narrative_index = st.session_state.narrative_number | |
if st.session_state.narrative_number == -1: | |
st.session_state.selectbox_narrative = "Overview" | |
else: | |
st.session_state.selectbox_narrative = narrative_ids[st.session_state.narrative_number] | |
narrative_number = container_for_nav.number_input( | |
min_value=-1, step=1, max_value=len(narrative_ids) - 1, | |
on_change=sync_from_number, | |
label=f"Select narrative by index (up to **{len(narrative_ids) - 1}**)", | |
key="narrative_number" | |
) | |
selectbox_narrative = container_for_nav.selectbox( | |
"Select narrative by ID", | |
["Overview"] + narrative_ids, | |
on_change=sync_from_drop, | |
key="selectbox_narrative" | |
) | |
st.divider() | |
with col2: | |
narrative_index = narrative_number | |
if narrative_index >= 0: | |
narrative = filtered_df.iloc[narrative_index] | |
st.markdown("<h1 style='text-align: center; color: black;text-decoration: underline;'>Editor</h1>", unsafe_allow_html=True) | |
container = st.container() | |
container.subheader(f"Narrative ID: {narrative['id']}") | |
container.divider() | |
container.subheader("Diff: Original English vs Altered English") | |
processed_diff = generate_diff_html_word_level(narrative['original_english'].strip(), narrative['altered_english'].strip()) | |
with container.container(border=True): | |
st.markdown(processed_diff, unsafe_allow_html=True) | |
container.divider() | |
container.subheader("Original Text") | |
original_input = container.text_area("Edit the original text", value=narrative['original'].strip(), height=300) | |
elif narrative_index < 0: | |
st.title("Overview") | |
st.write(f"Total number of narratives: {len(filtered_df)}") | |
if selected_language != "All": | |
st.write(f"Selected language: {selected_language}") | |
st.write("Select a narrative from the sidebar to view and edit its details.") |