Spaces:
Runtime error
Runtime error
| import re | |
| import spacy | |
| import json | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, AutoModel | |
| import streamlit as st | |
| he_regex = re.compile(r'\b(he|him|himself)\b', flags=re.IGNORECASE) | |
| his_regex = re.compile(r'\b(his)\b', flags=re.IGNORECASE) | |
| she_regex = re.compile(r'\b(she|herself)\b', flags=re.IGNORECASE) | |
| her_regex = re.compile(r'\b(her)\b', flags=re.IGNORECASE) | |
| def hide_footer(): | |
| hide_st_style = """ | |
| <style> | |
| footer {visibility: hidden;} | |
| </style> | |
| """ | |
| st.markdown(hide_st_style, unsafe_allow_html=True) | |
| def get_seq2seq_model(model_id): | |
| return AutoModelForSeq2SeqLM.from_pretrained(model_id) | |
| def get_causal_model(model_id): | |
| return AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True) | |
| def get_auto_model(model_id): | |
| return AutoModel.from_pretrained(model_id) | |
| def get_tokenizer(model_id): | |
| return AutoTokenizer.from_pretrained(model_id) | |
| def get_celeb_data(fpath): | |
| with open(fpath) as json_file: | |
| return json.load(json_file) | |
| def preprocess_text(name, gender, text, model_id): | |
| lname = name.split(" ")[-1] | |
| lname_regex = re.compile(rf'\b({lname})\b') | |
| name_regex = re.compile(rf'\b({name})\b') | |
| lnames = lname+"βs" if not lname.endswith("s") else lname+"β" | |
| lnames_regex = re.compile(rf'\b({lnames})\b') | |
| names = name+"βs" if not name.endswith("s") else name+"β" | |
| names_regex = re.compile(rf'\b({names})\b') | |
| if gender == "M": | |
| text = re.sub(he_regex, "I", text) | |
| text = re.sub(his_regex, "my", text) | |
| elif gender == "F": | |
| text = re.sub(she_regex, "I", text) | |
| text = re.sub(her_regex, "my", text) | |
| text = re.sub(names_regex, "my", text) | |
| text = re.sub(lnames_regex, "my", text) | |
| text = re.sub(name_regex, "I", text) | |
| text = re.sub(lname_regex, "I", text) | |
| spacy_model = spacy.load(model_id) | |
| texts = [i.text.strip() for i in spacy_model(text).sents] | |
| return spacy_model, texts |