import gradio as gr import pandas as pd import numpy as np import tempfile import random import os import json from pathlib import Path from cxglearner.parser import Parser from cxglearner.config import DefaultConfigs, Config from cxglearner.utils import init_logger from cxglearner.utils.utils_cxs import convert_slots_to_str temp_dir = tempfile.gettempdir() log_dir = Path(temp_dir) / "logs" log_dir.mkdir(exist_ok=True) cahce_dir = Path(temp_dir) / "cache" config = Config(DefaultConfigs.eng) config.experiment.log_path = log_dir / "eng.log" logger = init_logger(config) parser = Parser(config=config, version="1.1", logger=logger, cache_dir=cahce_dir) examples = [["she should be more polite with the customers."]] MAX_EXAMPLAR = 10 with open("data/learner_examplar_1.1.json", "r", encoding="utf-8") as fp: examplars = json.load(fp) logger.debug(len(examplars)) def fill_input_box(example): return example[0] def parse_text(text): if not text: return gr.Dataframe(), gr.update(choices=[], value=None), gr.Dataframe() encoded_elements = parser.encoder.encode(text, raw=True) tokens, upos, xpos = np.array(encoded_elements["lexical"]), np.array(encoded_elements["upos"]["spaCy"]), np.array( encoded_elements["xpos"]["spaCy"]) encoded_elements = np.vstack((tokens, upos, xpos)) radio_parsed = parser.parse(text) radio_parsed = ["{} | {} | {}-{}".format(cxs[0], convert_slots_to_str(parser.cxs_decoder[cxs[0]], parser.encoder, logger), cxs[1] + 1, cxs[2]) for cxs in radio_parsed[0]] if len(radio_parsed) == 0: radio_display = gr.Radio(label="Constructions", choices=[]) else: radio_display = gr.Radio( label="Constructions", choices=radio_parsed, interactive=True, value=radio_parsed[0] ) if len(radio_parsed) == 0: cons_df = pd.DataFrame() else: cxs = radio_parsed[0] index, cxs, ranges = cxs.split("|") cxs = cxs.strip() if cxs in examplars: exams = random.choices(examplars[cxs], k=min(MAX_EXAMPLAR, len(examplars[cxs]))) cons_df = pd.DataFrame(exams, columns=[cxs]) else: cons_df = pd.DataFrame() return encoded_elements, radio_display, cons_df def refresh_examplar(option: str): print(option) index, cxs, ranges = option.split("|") index = eval(index) cxs = cxs.strip() if cxs in examplars: exams = random.choices(examplars[cxs], k=min(MAX_EXAMPLAR, len(examplars[cxs]))) return pd.DataFrame(exams, columns=[cxs]) return pd.DataFrame() def clear_text(): return "", pd.DataFrame(), gr.Radio(label="Constructions", choices=[]), pd.DataFrame() with gr.Blocks() as demo: with gr.Column(): gr.Markdown("## CxGLearner Parser") with gr.Row(): input_text = gr.Textbox(label="Input Text", placeholder="Enter a sentence here...") with gr.Row(): dataset = gr.Dataset(components=[input_text], samples=examples, label="Click an example") clear_buttton = gr.Button("Clear") parser_button = gr.Button("Parse") with gr.Column(): gr.Markdown("### Results of Encoding and Parsing") enc_display = gr.Dataframe() cxs_display = gr.Radio(label="Constructions", choices=[]) with gr.Column(): gr.Markdown("### Examplars") cons_display = gr.Dataframe() parser_button.click(fn=parse_text, inputs=[input_text], outputs=[enc_display, cxs_display, cons_display]) clear_buttton.click(fn=clear_text, inputs=[], outputs=[input_text, enc_display, cxs_display, cons_display]) dataset.click(fn=fill_input_box, inputs=dataset, outputs=input_text) cxs_display.select(refresh_examplar, inputs=[cxs_display], outputs=cons_display) demo.launch()