Spaces:
Sleeping
Sleeping
| #%% | |
| import pandas as pd | |
| import numpy as np | |
| import torch | |
| from sentence_transformers.util import cos_sim | |
| from sentence_transformers import SentenceTransformer | |
| import gradio as gr | |
| #%% | |
| etalon = pd.read_csv("etalon_prod.csv") | |
| df = pd.read_csv("preprocessed_train_classify_rec_spec_filtered_by_etalon.csv") | |
| df = df[df['is_match'] == 1] | |
| model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2').to("cuda") | |
| device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
| unique_complaints = df['Жалобы'].values.tolist() | |
| with open("embeddings.npy", 'rb') as f: | |
| unique_complaints_embeddings_st = np.load(f) | |
| def get_recommend(user_input, | |
| top_k_spec = 3, | |
| top_k_services = 10, | |
| treshold = 0.8): | |
| cols_for_top_k = ["Специальность врача", | |
| "Рекомендуемые специалисты"] | |
| usr_embeddings = model.encode(user_input) | |
| cos_similarity = cos_sim(usr_embeddings, unique_complaints_embeddings_st).detach().numpy() | |
| sorted_idx = cos_similarity[0].argsort()[::-1] | |
| cos_similarity.sort() | |
| cos_similarity = cos_similarity[0][::-1] | |
| sorted_df = df.loc[sorted_idx].copy() | |
| sorted_df['cos_sim'] = cos_similarity | |
| sorted_df = sorted_df[sorted_df['cos_sim'] > treshold] | |
| result = {} | |
| for col in cols_for_top_k: | |
| result[col] = sorted_df[col].value_counts()[:top_k_spec].index.tolist() | |
| result['Жалобы'] = sorted_df['Жалобы'].value_counts()[:top_k_services].index.tolist() | |
| lst = [] | |
| categories = ['Инструментальная диагностика', 'Лабораторная диагностика'] | |
| for category in categories: | |
| list_top_k_services = sorted_df[sorted_df['preds'] == category]['Рекомендации по обследованию'].value_counts()[:top_k_services].index.tolist() | |
| lst.append({category:list_top_k_services}) | |
| result['Рекомендации по обследованию'] = lst | |
| return result | |
| #%% | |
| gradio_app = gr.Interface( | |
| get_recommend, | |
| inputs='text', | |
| outputs=gr.JSON(label='s'), | |
| # title="Предсказание топ-10 наиболее схожих услуг", | |
| description="Введите услугу:" | |
| ) | |
| if __name__ == "__main__": | |
| gradio_app.launch() | |
| # %% | |