Spaces:
Build error
Build error
Upload 3 files
Browse files- .gitattributes +1 -0
- app.py +1 -1
- earnings_calls_cleaned_metadata.csv +3 -0
- utils.py +10 -8
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 35 |
earnings_calls_sentencewise.csv filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 35 |
earnings_calls_sentencewise.csv filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
earnings_calls_cleaned_metadata.csv filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
|
@@ -72,7 +72,7 @@ with st.sidebar:
|
|
| 72 |
|
| 73 |
# Choose encoder model
|
| 74 |
|
| 75 |
-
encoder_models_choice = ["
|
| 76 |
with st.sidebar:
|
| 77 |
encoder_model = st.selectbox("Select Encoder Model", encoder_models_choice)
|
| 78 |
|
|
|
|
| 72 |
|
| 73 |
# Choose encoder model
|
| 74 |
|
| 75 |
+
encoder_models_choice = ["MPNET", "SGPT"]
|
| 76 |
with st.sidebar:
|
| 77 |
encoder_model = st.selectbox("Select Encoder Model", encoder_models_choice)
|
| 78 |
|
earnings_calls_cleaned_metadata.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6c6474da1f710d2a6d2ea65c475baf6821db95a5cb81dd8703eec3c04cd22cbe
|
| 3 |
+
size 18988194
|
utils.py
CHANGED
|
@@ -17,7 +17,7 @@ import streamlit_scrollable_textbox as stx
|
|
| 17 |
|
| 18 |
@st.experimental_singleton
|
| 19 |
def get_data():
|
| 20 |
-
data = pd.read_csv("
|
| 21 |
return data
|
| 22 |
|
| 23 |
|
|
@@ -72,6 +72,7 @@ def query_pinecone(query, top_k, model, index, year, quarter, ticker, threshold=
|
|
| 72 |
"Year": int(year),
|
| 73 |
"Quarter": {"$eq": quarter},
|
| 74 |
"Ticker": {"$eq": ticker},
|
|
|
|
| 75 |
},
|
| 76 |
include_metadata=True,
|
| 77 |
)
|
|
@@ -103,7 +104,7 @@ def sentence_id_combine(data, query_results, lag=2):
|
|
| 103 |
]
|
| 104 |
# Create a list of context sentences by joining the sentences corresponding to the lookup IDs
|
| 105 |
context_list = [
|
| 106 |
-
"
|
| 107 |
]
|
| 108 |
return context_list
|
| 109 |
|
|
@@ -114,11 +115,11 @@ def text_lookup(data, sentence_ids):
|
|
| 114 |
|
| 115 |
|
| 116 |
def generate_prompt(query_text, context_list):
|
| 117 |
-
|
| 118 |
prompt = f"""
|
| 119 |
Context information is below:
|
| 120 |
---------------------
|
| 121 |
-
{
|
| 122 |
---------------------
|
| 123 |
Given the context information and prior knowledge, answer this question:
|
| 124 |
{query_text}
|
|
@@ -148,15 +149,16 @@ def retrieve_transcript(data, year, quarter, ticker):
|
|
| 148 |
(data.Year == int(year))
|
| 149 |
& (data.Quarter == quarter)
|
| 150 |
& (data.Ticker == ticker),
|
| 151 |
-
["
|
| 152 |
]
|
| 153 |
.drop_duplicates()
|
| 154 |
-
.iloc[0]
|
| 155 |
)
|
|
|
|
| 156 |
# convert row to a string and join values with "-"
|
| 157 |
-
row_str = "-".join(row.astype(str)) + ".txt"
|
| 158 |
open_file = open(
|
| 159 |
-
f"Transcripts/{ticker}/{
|
| 160 |
"r",
|
| 161 |
)
|
| 162 |
file_text = open_file.read()
|
|
|
|
| 17 |
|
| 18 |
@st.experimental_singleton
|
| 19 |
def get_data():
|
| 20 |
+
data = pd.read_csv("earnings_calls_cleaned_metadata.csv")
|
| 21 |
return data
|
| 22 |
|
| 23 |
|
|
|
|
| 72 |
"Year": int(year),
|
| 73 |
"Quarter": {"$eq": quarter},
|
| 74 |
"Ticker": {"$eq": ticker},
|
| 75 |
+
"QA_Flag": {"$eq": "Answer"},
|
| 76 |
},
|
| 77 |
include_metadata=True,
|
| 78 |
)
|
|
|
|
| 104 |
]
|
| 105 |
# Create a list of context sentences by joining the sentences corresponding to the lookup IDs
|
| 106 |
context_list = [
|
| 107 |
+
" ".join(data.Text.iloc[lookup_id].to_list()) for lookup_id in lookup_ids
|
| 108 |
]
|
| 109 |
return context_list
|
| 110 |
|
|
|
|
| 115 |
|
| 116 |
|
| 117 |
def generate_prompt(query_text, context_list):
|
| 118 |
+
context = " \n".join(context_list)
|
| 119 |
prompt = f"""
|
| 120 |
Context information is below:
|
| 121 |
---------------------
|
| 122 |
+
{context}
|
| 123 |
---------------------
|
| 124 |
Given the context information and prior knowledge, answer this question:
|
| 125 |
{query_text}
|
|
|
|
| 149 |
(data.Year == int(year))
|
| 150 |
& (data.Quarter == quarter)
|
| 151 |
& (data.Ticker == ticker),
|
| 152 |
+
["File_Name"],
|
| 153 |
]
|
| 154 |
.drop_duplicates()
|
| 155 |
+
.iloc[0,0]
|
| 156 |
)
|
| 157 |
+
print(row)
|
| 158 |
# convert row to a string and join values with "-"
|
| 159 |
+
#row_str = "-".join(row.astype(str)) + ".txt"
|
| 160 |
open_file = open(
|
| 161 |
+
f"Transcripts/{ticker}/{row}",
|
| 162 |
"r",
|
| 163 |
)
|
| 164 |
file_text = open_file.read()
|