Spaces:
Sleeping
Sleeping
cyberosa
commited on
Commit
·
35989d5
1
Parent(s):
ec75e86
data update and based tokens graphs
Browse files- app.py +37 -7
- live_data/markets_live_data.parquet +2 -2
- live_data/markets_live_data_sample.parquet +2 -2
- notebooks/analysis_of_markets_data.ipynb +0 -0
- scripts/live_markets_data.py +13 -7
- scripts/live_traders_data.py +7 -10
- tabs/dist_gap.py +0 -0
- tabs/tokens_votes_dist.py +34 -0
app.py
CHANGED
|
@@ -5,6 +5,7 @@ import pandas as pd
|
|
| 5 |
import seaborn as sns
|
| 6 |
import duckdb
|
| 7 |
import logging
|
|
|
|
| 8 |
|
| 9 |
|
| 10 |
def get_logger():
|
|
@@ -40,21 +41,50 @@ def prepare_data():
|
|
| 40 |
return df
|
| 41 |
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
demo = gr.Blocks()
|
| 44 |
markets_data = prepare_data()
|
| 45 |
|
| 46 |
with demo:
|
| 47 |
gr.HTML("<h1>Olas Predict Live Markets </h1>")
|
| 48 |
gr.Markdown("This app shows the distributions of predictions on the live markets.")
|
| 49 |
-
|
| 50 |
with gr.Tabs():
|
| 51 |
-
with gr.TabItem("💹Probability distributions"):
|
|
|
|
|
|
|
|
|
|
| 52 |
with gr.Row():
|
| 53 |
-
gr.Markdown("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
with gr.Row():
|
| 55 |
-
#
|
| 56 |
-
print("WIP")
|
| 57 |
-
gr.Markdown("Under construction (WIP)")
|
| 58 |
-
# daily_distributions = plot_daily_market_distributions(markets_data)
|
| 59 |
|
| 60 |
demo.queue(default_concurrency_limit=40).launch()
|
|
|
|
| 5 |
import seaborn as sns
|
| 6 |
import duckdb
|
| 7 |
import logging
|
| 8 |
+
from tabs.tokens_votes_dist import get_based_tokens_distribution
|
| 9 |
|
| 10 |
|
| 11 |
def get_logger():
|
|
|
|
| 41 |
return df
|
| 42 |
|
| 43 |
|
| 44 |
+
def get_extreme_cases(live_fpmms: pd.DataFrame):
|
| 45 |
+
"""Function to return the id of the best and worst case according to the dist gap metric"""
|
| 46 |
+
# select markets with more than 1 sample
|
| 47 |
+
samples_per_market = (
|
| 48 |
+
live_fpmms[["id", "tokens_timestamp"]].groupby("id").count().reset_index()
|
| 49 |
+
)
|
| 50 |
+
markets_with_multiple_samples = list(
|
| 51 |
+
samples_per_market.loc[samples_per_market["tokens_timestamp"] > 1, "id"].values
|
| 52 |
+
)
|
| 53 |
+
selected_markets = live_fpmms.loc[
|
| 54 |
+
live_fpmms["id"].isin(markets_with_multiple_samples)
|
| 55 |
+
]
|
| 56 |
+
selected_markets.sort_values(by="dist_gap_perc", ascending=False, inplace=True)
|
| 57 |
+
return selected_markets.iloc[0].id, selected_markets.iloc[-1].id
|
| 58 |
+
|
| 59 |
+
|
| 60 |
demo = gr.Blocks()
|
| 61 |
markets_data = prepare_data()
|
| 62 |
|
| 63 |
with demo:
|
| 64 |
gr.HTML("<h1>Olas Predict Live Markets </h1>")
|
| 65 |
gr.Markdown("This app shows the distributions of predictions on the live markets.")
|
| 66 |
+
best_market_id, worst_market_id = get_extreme_cases(markets_data)
|
| 67 |
with gr.Tabs():
|
| 68 |
+
with gr.TabItem("💹 Probability distributions of live markets"):
|
| 69 |
+
with gr.Row():
|
| 70 |
+
gr.Markdown("# Evolution of outcomes probability based on tokens")
|
| 71 |
+
|
| 72 |
with gr.Row():
|
| 73 |
+
gr.Markdown("Best case: a market with a low distribution gap metric")
|
| 74 |
+
with gr.Row():
|
| 75 |
+
best_market_tokens_dist = get_based_tokens_distribution(
|
| 76 |
+
best_market_id, markets_data
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
with gr.Row():
|
| 80 |
+
gr.Markdown("Worst case: a market with a high distribution gap metric")
|
| 81 |
+
|
| 82 |
+
with gr.Row():
|
| 83 |
+
worst_market_tokens_dist = get_based_tokens_distribution(
|
| 84 |
+
worst_market_id, markets_data
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
with gr.Row():
|
| 88 |
+
gr.Markdown("# Evolution of outcomes probability based on votes")
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
demo.queue(default_concurrency_limit=40).launch()
|
live_data/markets_live_data.parquet
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9f1aefc2dd441883aca8a95db7715a511b763a5b486307a903dcea30df7ef828
|
| 3 |
+
size 27422
|
live_data/markets_live_data_sample.parquet
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e601332794f53f1c65384434aa7bbcad617853f3aa7f89eeb68640f36edc7b14
|
| 3 |
+
size 22201
|
notebooks/analysis_of_markets_data.ipynb
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
scripts/live_markets_data.py
CHANGED
|
@@ -206,13 +206,13 @@ def get_answer(fpmm: pd.Series) -> str:
|
|
| 206 |
def get_first_token_perc(row):
|
| 207 |
if row["total_tokens"] == 0.0:
|
| 208 |
return 0
|
| 209 |
-
return round((row["token_first_amount"] / row["total_tokens"]) * 100, 2)
|
| 210 |
|
| 211 |
|
| 212 |
def get_second_token_perc(row):
|
| 213 |
if row["total_tokens"] == 0.0:
|
| 214 |
return 0
|
| 215 |
-
return round((row["token_second_amount"] / row["total_tokens"]) * 100, 2)
|
| 216 |
|
| 217 |
|
| 218 |
def transform_fpmms(fpmms: pd.DataFrame, filename: str, current_timestamp: int) -> None:
|
|
@@ -220,13 +220,14 @@ def transform_fpmms(fpmms: pd.DataFrame, filename: str, current_timestamp: int)
|
|
| 220 |
|
| 221 |
# prepare the new ones
|
| 222 |
# Add current timestamp
|
| 223 |
-
fpmms["
|
| 224 |
fpmms["open"] = True
|
| 225 |
fpmms["total_trades"] = 0
|
| 226 |
fpmms["dist_gap_perc"] = 0.0
|
| 227 |
fpmms["votes_first_outcome_perc"] = 0.0
|
| 228 |
fpmms["votes_second_outcome_perc"] = 0.0
|
| 229 |
-
|
|
|
|
| 230 |
# computation of token distributions
|
| 231 |
fpmms["token_first_amount"] = fpmms.outcomeTokenAmounts.apply(lambda x: int(x[0]))
|
| 232 |
fpmms["token_second_amount"] = fpmms.outcomeTokenAmounts.apply(lambda x: int(x[1]))
|
|
@@ -236,7 +237,12 @@ def transform_fpmms(fpmms: pd.DataFrame, filename: str, current_timestamp: int)
|
|
| 236 |
fpmms["first_token_perc"] = fpmms.apply(lambda x: get_first_token_perc(x), axis=1)
|
| 237 |
fpmms["second_token_perc"] = fpmms.apply(lambda x: get_second_token_perc(x), axis=1)
|
| 238 |
fpmms.drop(
|
| 239 |
-
columns=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
inplace=True,
|
| 241 |
)
|
| 242 |
# previous file to update?
|
|
@@ -263,7 +269,7 @@ def compute_distributions(filename: Optional[str]) -> pd.DataFrame:
|
|
| 263 |
"""Fetch, process, store and return the markets as a Dataframe."""
|
| 264 |
|
| 265 |
logger.info("fetching new markets information")
|
| 266 |
-
current_timestamp = int(datetime.now(UTC).timestamp())
|
| 267 |
fpmms = fetch_fpmms(current_timestamp)
|
| 268 |
logger.debug("New collected data")
|
| 269 |
logger.debug(fpmms.head())
|
|
@@ -276,7 +282,7 @@ def compute_distributions(filename: Optional[str]) -> pd.DataFrame:
|
|
| 276 |
add_trading_info(fpmms, current_timestamp)
|
| 277 |
|
| 278 |
logger.info("saving the data")
|
| 279 |
-
|
| 280 |
if filename:
|
| 281 |
fpmms.to_parquet(DATA_DIR / filename, index=False)
|
| 282 |
|
|
|
|
| 206 |
def get_first_token_perc(row):
|
| 207 |
if row["total_tokens"] == 0.0:
|
| 208 |
return 0
|
| 209 |
+
return 100.0 - round((row["token_first_amount"] / row["total_tokens"]) * 100, 2)
|
| 210 |
|
| 211 |
|
| 212 |
def get_second_token_perc(row):
|
| 213 |
if row["total_tokens"] == 0.0:
|
| 214 |
return 0
|
| 215 |
+
return 100.0 - round((row["token_second_amount"] / row["total_tokens"]) * 100, 2)
|
| 216 |
|
| 217 |
|
| 218 |
def transform_fpmms(fpmms: pd.DataFrame, filename: str, current_timestamp: int) -> None:
|
|
|
|
| 220 |
|
| 221 |
# prepare the new ones
|
| 222 |
# Add current timestamp
|
| 223 |
+
fpmms["sample_timestamp"] = current_timestamp
|
| 224 |
fpmms["open"] = True
|
| 225 |
fpmms["total_trades"] = 0
|
| 226 |
fpmms["dist_gap_perc"] = 0.0
|
| 227 |
fpmms["votes_first_outcome_perc"] = 0.0
|
| 228 |
fpmms["votes_second_outcome_perc"] = 0.0
|
| 229 |
+
fpmms["first_outcome"] = fpmms.question.apply(lambda x: x["outcomes"][0])
|
| 230 |
+
fpmms["second_outcome"] = fpmms.question.apply(lambda x: x["outcomes"][1])
|
| 231 |
# computation of token distributions
|
| 232 |
fpmms["token_first_amount"] = fpmms.outcomeTokenAmounts.apply(lambda x: int(x[0]))
|
| 233 |
fpmms["token_second_amount"] = fpmms.outcomeTokenAmounts.apply(lambda x: int(x[1]))
|
|
|
|
| 237 |
fpmms["first_token_perc"] = fpmms.apply(lambda x: get_first_token_perc(x), axis=1)
|
| 238 |
fpmms["second_token_perc"] = fpmms.apply(lambda x: get_second_token_perc(x), axis=1)
|
| 239 |
fpmms.drop(
|
| 240 |
+
columns=[
|
| 241 |
+
"token_first_amount",
|
| 242 |
+
"token_second_amount",
|
| 243 |
+
"total_tokens",
|
| 244 |
+
"question",
|
| 245 |
+
],
|
| 246 |
inplace=True,
|
| 247 |
)
|
| 248 |
# previous file to update?
|
|
|
|
| 269 |
"""Fetch, process, store and return the markets as a Dataframe."""
|
| 270 |
|
| 271 |
logger.info("fetching new markets information")
|
| 272 |
+
current_timestamp = int(datetime.now(UTC).timestamp()) # seconds
|
| 273 |
fpmms = fetch_fpmms(current_timestamp)
|
| 274 |
logger.debug("New collected data")
|
| 275 |
logger.debug(fpmms.head())
|
|
|
|
| 282 |
add_trading_info(fpmms, current_timestamp)
|
| 283 |
|
| 284 |
logger.info("saving the data")
|
| 285 |
+
logger.debug(fpmms.info())
|
| 286 |
if filename:
|
| 287 |
fpmms.to_parquet(DATA_DIR / filename, index=False)
|
| 288 |
|
scripts/live_traders_data.py
CHANGED
|
@@ -74,8 +74,6 @@ def transform_trades(trades_json: dict) -> pd.DataFrame:
|
|
| 74 |
logger.warning("No trades for this market")
|
| 75 |
return df
|
| 76 |
|
| 77 |
-
# print(df.info())
|
| 78 |
-
|
| 79 |
# convert creator to address
|
| 80 |
df["trade_creator"] = df["creator"].apply(lambda x: x["id"])
|
| 81 |
|
|
@@ -114,21 +112,21 @@ def compute_from_timestamp_value(
|
|
| 114 |
|
| 115 |
def compute_votes_distribution(market_trades: pd.DataFrame):
|
| 116 |
"""Function to compute the distribution of votes for the trades of a market"""
|
|
|
|
| 117 |
total_trades = len(market_trades)
|
| 118 |
-
|
| 119 |
# outcomeIndex is always 1 or 0?
|
| 120 |
sum_outcome_index_1 = sum(market_trades.outcomeIndex)
|
| 121 |
-
|
| 122 |
-
logger.info(f"The total number of votes for index 1 is {sum_outcome_index_1}")
|
| 123 |
percentage_index_1 = round((sum_outcome_index_1 / total_trades) * 100, 2)
|
| 124 |
return (100 - percentage_index_1), percentage_index_1
|
| 125 |
|
| 126 |
|
| 127 |
def add_trading_info(fpmms: pd.DataFrame, current_timestamp: int) -> None:
|
| 128 |
"""Function to update only the information related with the current timestamp"""
|
| 129 |
-
|
| 130 |
logger.info("Adding votes distribution per market")
|
| 131 |
-
|
| 132 |
for i, fpmm in tqdm(fpmms.iterrows(), total=len(fpmms), desc="Analysing trades"):
|
| 133 |
# update the trades for this market and at this specific current_timestamp
|
| 134 |
logger.debug(f"current timestamp = {current_timestamp} and market timestamp={fpmm["tokens_timestamp"]}")
|
|
@@ -138,7 +136,7 @@ def add_trading_info(fpmms: pd.DataFrame, current_timestamp: int) -> None:
|
|
| 138 |
continue
|
| 139 |
market_id = fpmm["id"]
|
| 140 |
|
| 141 |
-
logger.info(f"Adding information for the market {market_id}")
|
| 142 |
market_trades_json = _query_omen_xdai_subgraph(
|
| 143 |
fpmm_id=market_id,
|
| 144 |
)
|
|
@@ -146,8 +144,7 @@ def add_trading_info(fpmms: pd.DataFrame, current_timestamp: int) -> None:
|
|
| 146 |
if len(market_trades) == 0:
|
| 147 |
logger.info("No trades for this market")
|
| 148 |
continue
|
| 149 |
-
|
| 150 |
-
logger.info("Computing the votes distribution")
|
| 151 |
fpmms.at[i,"total_trades"] = len(market_trades)
|
| 152 |
first_outcome, second_outcome = compute_votes_distribution(market_trades)
|
| 153 |
logger.info(
|
|
|
|
| 74 |
logger.warning("No trades for this market")
|
| 75 |
return df
|
| 76 |
|
|
|
|
|
|
|
| 77 |
# convert creator to address
|
| 78 |
df["trade_creator"] = df["creator"].apply(lambda x: x["id"])
|
| 79 |
|
|
|
|
| 112 |
|
| 113 |
def compute_votes_distribution(market_trades: pd.DataFrame):
|
| 114 |
"""Function to compute the distribution of votes for the trades of a market"""
|
| 115 |
+
logger.info("Computing the votes distribution")
|
| 116 |
total_trades = len(market_trades)
|
| 117 |
+
logger.info(f"The total number of trades is {total_trades}")
|
| 118 |
# outcomeIndex is always 1 or 0?
|
| 119 |
sum_outcome_index_1 = sum(market_trades.outcomeIndex)
|
| 120 |
+
logger.debug(f"The total number of votes for index 1 is {sum_outcome_index_1}")
|
|
|
|
| 121 |
percentage_index_1 = round((sum_outcome_index_1 / total_trades) * 100, 2)
|
| 122 |
return (100 - percentage_index_1), percentage_index_1
|
| 123 |
|
| 124 |
|
| 125 |
def add_trading_info(fpmms: pd.DataFrame, current_timestamp: int) -> None:
|
| 126 |
"""Function to update only the information related with the current timestamp"""
|
| 127 |
+
|
| 128 |
logger.info("Adding votes distribution per market")
|
| 129 |
+
# Iterate over the markets
|
| 130 |
for i, fpmm in tqdm(fpmms.iterrows(), total=len(fpmms), desc="Analysing trades"):
|
| 131 |
# update the trades for this market and at this specific current_timestamp
|
| 132 |
logger.debug(f"current timestamp = {current_timestamp} and market timestamp={fpmm["tokens_timestamp"]}")
|
|
|
|
| 136 |
continue
|
| 137 |
market_id = fpmm["id"]
|
| 138 |
|
| 139 |
+
logger.info(f"Adding trades information for the market {market_id}")
|
| 140 |
market_trades_json = _query_omen_xdai_subgraph(
|
| 141 |
fpmm_id=market_id,
|
| 142 |
)
|
|
|
|
| 144 |
if len(market_trades) == 0:
|
| 145 |
logger.info("No trades for this market")
|
| 146 |
continue
|
| 147 |
+
|
|
|
|
| 148 |
fpmms.at[i,"total_trades"] = len(market_trades)
|
| 149 |
first_outcome, second_outcome = compute_votes_distribution(market_trades)
|
| 150 |
logger.info(
|
tabs/dist_gap.py
ADDED
|
File without changes
|
tabs/tokens_votes_dist.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import gradio as gr
|
| 3 |
+
import matplotlib.pyplot as plt
|
| 4 |
+
import seaborn as sns
|
| 5 |
+
from seaborn import FacetGrid
|
| 6 |
+
import plotly.express as px
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def get_based_tokens_distribution(market_id: str, all_markets: pd.DataFrame):
|
| 10 |
+
"""Function to paint the evolution of the probability of the outcomes based on the tokens distributions over time"""
|
| 11 |
+
selected_market = all_markets.loc[all_markets["id"] == market_id]
|
| 12 |
+
ax = selected_market.plot(
|
| 13 |
+
x="sample_datetime",
|
| 14 |
+
y=["first_token_perc", "second_token_perc"],
|
| 15 |
+
kind="bar",
|
| 16 |
+
rot=0,
|
| 17 |
+
stacked=True,
|
| 18 |
+
)
|
| 19 |
+
# add overall title
|
| 20 |
+
plt.title(
|
| 21 |
+
"Outcomes probability over time based on tokens distributions", fontsize=16
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
# add axis titles
|
| 25 |
+
plt.xlabel("Sample date")
|
| 26 |
+
plt.ylabel("Percentage")
|
| 27 |
+
first_outcome = selected_market.iloc[0].first_outcome
|
| 28 |
+
second_outcome = selected_market.iloc[0].second_outcome
|
| 29 |
+
ax.legend(
|
| 30 |
+
bbox_to_anchor=(1, 1.02),
|
| 31 |
+
loc="upper left",
|
| 32 |
+
labels=[first_outcome, second_outcome],
|
| 33 |
+
)
|
| 34 |
+
return gr.Plot(value=ax.figure)
|