vasilisNt commited on
Commit
2734263
·
verified ·
1 Parent(s): b90d195

Upload 5 files

Browse files
UniProtKB_id_names.csv ADDED
The diff for this file is too large to render. See raw diff
 
app.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ st.set_page_config(layout="wide")
3
+
4
+ import pandas as pd
5
+ import numpy as np
6
+ from zipfile import ZipFile
7
+
8
+ import plotly.express as px
9
+ import plotly.graph_objs as go
10
+
11
+ LLR_FILE = 'UniProtKB_human_VESM_llrs.zip'
12
+
13
+ df = pd.read_csv('UniProtKB_id_names.csv', index_col=0)
14
+ if 'shuffled_df' not in st.session_state:
15
+ st.session_state.shuffled_df = df.sample(frac=1)
16
+ df = st.session_state.shuffled_df
17
+ clinvar = pd.read_csv('clinvar_0325.csv.gz',index_col=0)
18
+
19
+ f = np.load("logreg_params.npz")
20
+ coef, intercept = f["coef"].item(), f["intercept"].item()
21
+
22
+ def load_LLR(uniprot_id):
23
+ '''Loads the LLRs for a given uniprot id. Returns a 20xL dataframe.
24
+ Rows are indexed by AA change,
25
+ (AAorder=['K','R','H','E','D','N','Q','T','S','C','G','A','V','L','I','M','P','Y','F','W'])
26
+ Columns indexed by WT_AA+position e.g., "G 12".
27
+ Usage: load_LLR('P01116') or load_LLR('P01116-2')
28
+ '''
29
+ with ZipFile(LLR_FILE) as myzip:
30
+ data = myzip.open(myzip.namelist()[0] + 'LLRs/' + uniprot_id + '.csv')
31
+ LLR = pd.read_csv(data, index_col=0)
32
+ if sigmoid:
33
+ p = 1/(1 + np.exp(-(LLR.values.ravel()*coef + intercept)))
34
+ LLR = pd.DataFrame(p.reshape(LLR.shape), index=LLR.index, columns=LLR.columns).round(4)
35
+ return LLR
36
+
37
+ def meltLLR(LLR, gene_prefix=None, ignore_pos=False):
38
+ vars = LLR.melt(ignore_index=False)
39
+ vars['variant'] = [''.join(i.split(' ')) + j for i, j in zip(vars['variable'], vars.index)]
40
+ vars['score'] = vars['value']
41
+ vars = vars.set_index('variant')
42
+ if not ignore_pos:
43
+ vars['pos'] = [int(i[1:-1]) for i in vars.index]
44
+ del vars['variable'], vars['value']
45
+ if gene_prefix is not None:
46
+ vars.index = gene_prefix + '_' + vars.index
47
+ return vars
48
+
49
+
50
+ def plot_interactive(uniprot_id, show_clinvar=False):
51
+ primaryLLR = load_LLR(uniprot_id)
52
+ template = 'plotly_white'
53
+ zmax=1.09 if sigmoid else 0
54
+ zmin=0 if sigmoid else -22
55
+ cmap='rdbu_r' if sigmoid else 'Viridis_r'
56
+ color = 'score' if sigmoid else 'LLR'
57
+ fig = px.imshow(
58
+ primaryLLR.values,
59
+ x=primaryLLR.columns,
60
+ y=primaryLLR.index,
61
+ color_continuous_scale=cmap,
62
+ zmax=zmax,
63
+ zmin=zmin,
64
+ labels=dict(y="Amino acid change", x="Protein sequence", color=color),
65
+ template=template,
66
+ title=selection
67
+ )
68
+
69
+ fig.update_xaxes(tickangle=-90,range=[0,99], rangeslider=dict(visible=True), dtick=1)
70
+ fig.update_yaxes(dtick=1)
71
+ fig.update_layout(
72
+ plot_bgcolor='rgba(0, 0, 0, 0)',
73
+ paper_bgcolor='rgba(0, 0, 0, 0)',
74
+ font={'family': 'Arial', 'size': 11},
75
+ hoverlabel=dict(font=dict(family='Arial', size=14))
76
+ )
77
+
78
+ fig.update_traces(
79
+ hovertemplate="<br>".join(["<b>%{x} %{y}</b> (%{z:.2f})"]) + '<extra></extra>'
80
+ )
81
+
82
+ if show_clinvar:
83
+ iso_clinvar = clinvar[clinvar.protein == uniprot_id]
84
+ iso_clinvar = iso_clinvar[iso_clinvar.GoldStars > 1]
85
+ b_mut = set(iso_clinvar[iso_clinvar.clinvar_label == 0.0].variant.values)
86
+ p_mut = set(iso_clinvar[iso_clinvar.clinvar_label == 1.0].variant.values)
87
+
88
+ hwt_x, hwt_y, cust = [], [], []
89
+ phwt_x, phwt_y, pcust = [], [], []
90
+
91
+ for i in primaryLLR.columns:
92
+ for j in list(primaryLLR.index):
93
+ mut = i[0] + i[2:] + j
94
+ if mut in b_mut:
95
+ hwt_x.append(i)
96
+ hwt_y.append(j)
97
+ cust.append(primaryLLR.loc[j, i])
98
+ elif mut in p_mut:
99
+ phwt_x.append(i)
100
+ phwt_y.append(j)
101
+ pcust.append(primaryLLR.loc[j, i])
102
+
103
+ # draw pathogenic
104
+ fig.add_trace(go.Scatter(
105
+ x=phwt_x, y=phwt_y, customdata=pcust,
106
+ mode='markers',
107
+ marker=dict(size=8, color='red'),
108
+ showlegend=False,
109
+ hoverlabel=dict(bgcolor='crimson', font_color='black'),
110
+ hovertemplate="<b>%{x} %{y}</b> (%{customdata:.2f})<extra></extra>"
111
+ ))
112
+ # draw benign
113
+ fig.add_trace(go.Scatter(
114
+ x=hwt_x, y=hwt_y, customdata=cust,
115
+ mode='markers',
116
+ marker=dict(size=8, color='white'),
117
+ showlegend=False,
118
+ hoverlabel=dict(bgcolor='white', font_color='black'),
119
+ hovertemplate="<b>%{x} %{y}</b> (%{customdata:.2f})<extra></extra>"
120
+ ))
121
+
122
+ fig.update_layout(
123
+ hovermode='closest',
124
+ hoverdistance=10
125
+ )
126
+
127
+ return fig
128
+
129
+
130
+ idx = df.index.get_loc('P32245') if 'P32245' in df.index else 0
131
+ selection = st.selectbox("uniprot_id:", df, index=idx)
132
+ uid = df[df.txt == selection].index.values[0]
133
+
134
+ col1, col2 = st.columns(2)
135
+ with col1:
136
+ sigmoid = st.checkbox(
137
+ "Calibrated VESM predictions (0: benign, 1: pathogenic)",
138
+ value=False
139
+ )
140
+ with col2:
141
+ show_clinvar = st.checkbox(
142
+ "Show ClinVar annotations (red: pathogenic, white: benign)",
143
+ value=False
144
+ )
145
+
146
+ fig = plot_interactive(uid, show_clinvar=show_clinvar)
147
+ fig.update_layout(width=800, height=600, autosize=False)
148
+ st.plotly_chart(fig, use_container_width=True)
149
+
150
+ st.download_button(
151
+ label="📥 Download as CSV",
152
+ data=meltLLR(load_LLR(uid)).to_csv(),
153
+ file_name=f"{selection}.csv",
154
+ mime='text/csv'
155
+ )
156
+ st.markdown("---")
157
+
158
+ st.markdown("""
159
+ - Bulk download precomputed scores at [VESM Effect Scores](https://huggingface.co/datasets/ntranoslab/vesm_scores) for all UniProt, hg19, and hg38 variants.
160
+ - Use VESM locally: Access the source code and installation instructions on [GitHub](https://github.com/ntranoslab/vesm).
161
+ """)
clinvar_0325.csv.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b692c7298c46bcf3397baaca93b89e8f22d32bc891dab0ac2e3af90ac8944c08
3
+ size 2128878
logreg_params.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6411c5d1bcab64080217d97be085397974a5f68b2a5a680e9081b7959289c81d
3
+ size 776
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ altair
2
+ streamlit
3
+ plotly
4
+ protobuf~=3.19.0