Spaces:
Build error
Build error
| import os | |
| import pickle | |
| import tempfile | |
| import gradio as gr | |
| from tqdm import tqdm | |
| from views.utils import ( | |
| create_input_instruction, | |
| format_prediction_ouptut, | |
| remove_temp_dir, | |
| EXAMPLE_CONVERSATIONS, | |
| ) | |
| from fairseq.data.data_utils import collate_tokens | |
| import sys | |
| sys.path.insert(0, "../") # neccesary to load modules outside of app | |
| from views import roberta, comet, COSMIC_MODEL, cosmic_args | |
| from preprocessing import preprocess | |
| from Model.COSMIC.erc_training.predict_epik import predict, get_valid_dataloader | |
| def cosmic_preprocess(input, dir="."): | |
| result = preprocess.process_user_input(input) | |
| if not result["success"]: | |
| raise gr.Error(result["message"]) | |
| data = result["data"] | |
| # processed the data and turn it into a csv file | |
| output_csv_path = os.path.join(dir, "epik.csv") | |
| grouped_df = preprocess.preapre_csv(data, output_csv_path, with_label=False) | |
| # convert the csv to pickle file of speakers, labels, sentences | |
| pickle_dest = os.path.join(dir, "epik.pkl") | |
| preprocess.convert_to_pickle( | |
| source=output_csv_path, | |
| dest=pickle_dest, | |
| index_col="ConversationId", | |
| list_type_columns=[ | |
| "Text", | |
| "ParticipantRoleEncoded", | |
| "LabelNumeric", | |
| ], | |
| order=[ | |
| "ParticipantRoleEncoded", | |
| "LabelNumeric", | |
| "Text", | |
| ], | |
| exclude=["ParticipantRole"], | |
| ) | |
| # split the id for prediction, we'll put these in validation ids | |
| preprocess.split_and_save_ids( | |
| grouped_df["ConversationId"].to_list(), 0, 0, 1, dir=dir | |
| ) | |
| # add ids into the pickle files | |
| preprocess.merge_pkl_with_ids( | |
| pickle_src=pickle_dest, | |
| ids_files=["train_set.txt", "test_set.txt", "validation_set.txt"], | |
| dir=dir, | |
| ) | |
| # generate the sentences pickle file | |
| sentences_pkl_path = os.path.join(dir, "epik_sentences.pkl") | |
| preprocess.convert_to_pickle( | |
| source=output_csv_path, | |
| dest=sentences_pkl_path, | |
| index_col="ConversationId", | |
| list_type_columns=["Text"], | |
| exclude=[ | |
| "ParticipantRole", | |
| "ParticipantRoleEncoded", | |
| "LabelNumeric", | |
| ], | |
| ) | |
| return pickle_dest, sentences_pkl_path | |
| def cosmic_roberta_extract(path, dest_dir="."): | |
| # load the feature from file at path | |
| speakers, labels, sentences, train_ids, test_ids, valid_ids = pickle.load( | |
| open(path, "rb") | |
| ) | |
| roberta1, roberta2, roberta3, roberta4 = {}, {}, {}, {} | |
| all_ids = train_ids + test_ids + valid_ids | |
| for i in tqdm(range(len(all_ids))): | |
| item = all_ids[i] | |
| sent = sentences[item] | |
| sent = [s.encode("ascii", errors="ignore").decode("utf-8") for s in sent] | |
| batch = collate_tokens([roberta.encode(s) for s in sent], pad_idx=1) | |
| feat = roberta.extract_features(batch, return_all_hiddens=True) | |
| roberta1[item] = [row for row in feat[-1][:, 0, :].detach().numpy()] | |
| roberta2[item] = [row for row in feat[-2][:, 0, :].detach().numpy()] | |
| roberta3[item] = [row for row in feat[-3][:, 0, :].detach().numpy()] | |
| roberta4[item] = [row for row in feat[-4][:, 0, :].detach().numpy()] | |
| roberta_feature_path = os.path.join(dest_dir, "epik_features_roberta.pkl") | |
| pickle.dump( | |
| [ | |
| speakers, | |
| labels, | |
| roberta1, | |
| roberta2, | |
| roberta3, | |
| roberta4, | |
| sentences, | |
| train_ids, | |
| test_ids, | |
| valid_ids, | |
| ], | |
| open(roberta_feature_path, "wb"), | |
| ) | |
| return roberta_feature_path | |
| def cosmic_comet_extract(path, dir="."): | |
| print("Extracting features in", path) | |
| sentences = pickle.load(open(path, "rb")) | |
| feaures = comet.extract(sentences) | |
| comet_feature_path = os.path.join(dir, "epik_features_comet.pkl") | |
| pickle.dump(feaures, open(comet_feature_path, "wb")) | |
| return comet_feature_path | |
| def cosmic_classifier(input): | |
| # create a temporary directory for the input data | |
| temp_dir = tempfile.mkdtemp(dir=os.getcwd(), prefix="temp") | |
| epik_path, epik_sentences_path = cosmic_preprocess(input, temp_dir) | |
| roberta_path = cosmic_roberta_extract(epik_path, temp_dir) | |
| comet_path = cosmic_comet_extract(epik_sentences_path, temp_dir) | |
| # use cosmic model to make predictions | |
| data_loader, ids = get_valid_dataloader(roberta_path, comet_path) | |
| predictions = predict(COSMIC_MODEL, data_loader, cosmic_args) | |
| speakers, _, sentences, _, _, valid_ids = pickle.load(open(epik_path, "rb")) | |
| # Assuming that there's only one conversation | |
| conv_id = ids[0] | |
| output = format_prediction_ouptut( | |
| speakers[conv_id], sentences[conv_id], predictions[0] | |
| ) | |
| print() | |
| print("======= Removing Temporary Directory =======") | |
| remove_temp_dir(temp_dir) | |
| return output | |
| def cosmic_ui(): | |
| with gr.Blocks() as cosmic_model: | |
| gr.Markdown( | |
| """ | |
| # COSMIC | |
| COSMIC is a popular model for predicting sentiment labels using the entire | |
| context of the conversation. In other words, it analyzes the previous | |
| messages to predict the sentiment label for the current message.<br/> | |
| The model was adopted from this | |
| [repo](https://github.com/declare-lab/conv-emotion.git), implemented based | |
| on this research [paper](https://arxiv.org/pdf/2010.02795.pdf). | |
| ```bash COSMIC: COmmonSense knowledge for eMotion Identification in | |
| Conversations. D. Ghosal, N. Majumder, A. Gelbukh, R. Mihalcea, & S. Poria. Findings of EMNLP 2020. | |
| ``` | |
| """ | |
| ) | |
| create_input_instruction() | |
| with gr.Row(): | |
| with gr.Column(): | |
| example_dropdown = gr.Dropdown( | |
| choices=["-- Not Selected --"] + list(EXAMPLE_CONVERSATIONS.keys()), | |
| value="-- Not Selected --", | |
| label="Select an example", | |
| ) | |
| gr.Markdown('<p style="text-align: center;color: gray;">--- OR ---</p>') | |
| conversation_input = gr.TextArea( | |
| value="", | |
| label="Input you conversation", | |
| placeholder="Plese input your conversation here.\n\n\n\nMaximum number of lines: 200", | |
| lines=5, | |
| max_lines=200, | |
| ) | |
| def on_example_change(input): | |
| if input in EXAMPLE_CONVERSATIONS: | |
| return EXAMPLE_CONVERSATIONS[input] | |
| return "" | |
| example_dropdown.input( | |
| on_example_change, | |
| inputs=example_dropdown, | |
| outputs=conversation_input, | |
| ) | |
| submit_btn = gr.Button(value="Submit") | |
| with gr.Column(): | |
| gr.Markdown( | |
| '</br></br></br></br><h3 style="text-align: center;">Predicted Sentiment Labels for the Conversation</h3></br>' | |
| ) | |
| output = gr.Markdown(value="", label="Output") | |
| submit_btn.click(cosmic_classifier, conversation_input, output) | |
| conversation_input.change(lambda x: "", conversation_input, output) | |
| return cosmic_model | |