Spaces:

valory
/

olas-prediction-live-dashboard

Running

App Files Files Community

rosacastillo commited on Nov 4, 2024

Commit

16e2cb0

1 Parent(s): 55c033b

updated information of the last week

Browse files

Files changed (16) hide show

data/all_trades_profitability.parquet +2 -2
data/fpmmTrades.parquet +2 -2
data/fpmms.parquet +2 -2
data/invalid_trades.parquet +2 -2
data/outliers.parquet +1 -1
data/service_map.pkl +2 -2
data/summary_profitability.parquet +2 -2
data/t_map.pkl +2 -2
data/tools.parquet +2 -2
data/tools_accuracy.csv +2 -2
scripts/get_mech_info.py +132 -33
scripts/mech_request_utils.py +42 -21
scripts/profitability.py +21 -13
scripts/pull_data.py +70 -17
scripts/tools.py +5 -7
scripts/update_tools_accuracy.py +21 -8

data/all_trades_profitability.parquet CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:83eed2f1cf4c39db6ca8bf152af976614dc1bffa36c9409b78fd311ab6629705
-size 2511824

 version https://git-lfs.github.com/spec/v1
+oid sha256:54d1564ac5393d51c6a7218a9d6afabb78c6166f2661afa5c68fe8ec166ba213
+size 2885806

data/fpmmTrades.parquet CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4f5340dc0529ae5f549a97b4967de8dc412c008c6f81d54bab7e7eec3cbd360c
-size 7422987

 version https://git-lfs.github.com/spec/v1
+oid sha256:6c95bdbd08b0fa875044c53bfc983c0e4a76d861c703da2f4a04cdec20a9f13c
+size 8600942

data/fpmms.parquet CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0d807d167e6c0aa9bb01dc507c57a4ef01aadc04b35cb77b112cc6ae7fe693c9
-size 483517

 version https://git-lfs.github.com/spec/v1
+oid sha256:2b4ad4e1780f6dcb812787d397a738141e53db0fc6b588f386586dddabca034e
+size 494787

data/invalid_trades.parquet CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0f87d6a2d1d7b0bdd4c516eae9aa845a49c61511bd46a6c5e4a8424e5d044bc2
-size 69418

 version https://git-lfs.github.com/spec/v1
+oid sha256:8199109e6224b609408037098e23f11f6e38a2f2526f9376fca895ee2728edd9
+size 77169

data/outliers.parquet CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8e210fd22652fa64353d6193810d6f6a548b446fd8a80157fdf7ea06f6f8aba
 size 18274

 version https://git-lfs.github.com/spec/v1
+oid sha256:3910fd14580aac1b02c49152dbc5fb7b282aaa52b81e3e634801bf673590c8fb
 size 18274

data/service_map.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a84f3c384646679893e6ecfc28345b9c967f4adcc757c30fc2496240fc60addf
-size 91915

 version https://git-lfs.github.com/spec/v1
+oid sha256:e587b4db61a24ab6787a3e11d7ab3936e8f6ba63614a39ba086db1819915434a
+size 91939

data/summary_profitability.parquet CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:de28befef8686ecb1d8e62f29dc562c14db69f649a7e4ac3a7719f44008a80bb
-size 54717

 version https://git-lfs.github.com/spec/v1
+oid sha256:2c7253ea40d66ba0f06955bb2b96a589fb20e154eb2cfd95db85872d80d80a4b
+size 71470

data/t_map.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e6bac74e8f4b7491a7f6d400ca974d1ae83a3c1cfddb331b4342f7301fada75e
-size 20731045

 version https://git-lfs.github.com/spec/v1
+oid sha256:b0705cfa2166283351129e1dac63aab41a4231e60f6873d3026eb23da5cdbf27
+size 21870626

data/tools.parquet CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fcc1ba4e141ee4cc15b0740449e077ec0affa9ab05c77bf470e290a982da5288
-size 319097335

 version https://git-lfs.github.com/spec/v1
+oid sha256:c5594bd432a7db7fc7bf8ccbf5c40a10bd452643e2a884b5ae221a9bfe21271e
+size 342399362

data/tools_accuracy.csv CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fa0da340e70a8823b0f5a494df206d32dd33f452460a40920810f119a83dd1a5
-size 1014

 version https://git-lfs.github.com/spec/v1
+oid sha256:9ca0e77ced7ed3d627b0cbc2028af4116629135ed781b4e12c3c410316208a72
+size 1121

scripts/get_mech_info.py CHANGED Viewed

@@ -3,6 +3,9 @@ from typing import Any
 from datetime import datetime, timedelta, UTC
 from utils import SUBGRAPH_API_KEY, measure_execution_time, DATA_DIR
 import requests
 from mech_request_utils import (
     collect_all_mech_delivers,
     collect_all_mech_requests,
@@ -11,6 +14,7 @@ from mech_request_utils import (
     merge_requests_delivers,
     get_ipfs_data,
     only_delivers_loop,
 )
 OLD_MECH_SUBGRAPH_URL = (
@@ -109,6 +113,78 @@ def fetch_block_number(timestamp_from: int, timestamp_to: int) -> dict:
     return blocks[0]
 def get_mech_info_2024() -> dict[str, Any]:
     """Query the subgraph to get the 2024 information from mech."""
@@ -195,38 +271,55 @@ def get_mech_info_last_60_days() -> dict[str, Any]:
     return MECH_TO_INFO
-def get_mech_info_last_10_days() -> dict[str, Any]:
-    """Query the subgraph to get the last 10 days of information from mech."""
-    timestamp_10_days_ago = int((DATETIME_10_DAYS_AGO).timestamp())
-    margin = timedelta(seconds=5)
-    timestamp_10_days_ago_plus_margin = int((DATETIME_10_DAYS_AGO + margin).timestamp())
-    last_month_block_number = fetch_block_number(
-        timestamp_10_days_ago, timestamp_10_days_ago_plus_margin
     )
-    # expecting only one block
-    last_month_block_number = last_month_block_number.get("number", "")
-    if last_month_block_number.isdigit():
-        last_month_block_number = int(last_month_block_number)
-    if last_month_block_number == "":
-        raise ValueError("Could not find a valid block number for last month data")
-    MECH_TO_INFO = {
-        # this block number is when the creator had its first tx ever, and after this mech's creation
-        "0xff82123dfb52ab75c417195c5fdb87630145ae81": (
-            "old_mech_abi.json",
-            last_month_block_number,
-        ),
-        # this block number is when this mech was created
-        "0x77af31de935740567cf4ff1986d04b2c964a786a": (
-            "new_mech_abi.json",
-            last_month_block_number,
-        ),
-    }
-    print(f"last 10 days block number {last_month_block_number}")
-    return MECH_TO_INFO
 @measure_execution_time
@@ -235,25 +328,31 @@ def get_mech_events_last_60_days():
     last_block_number = get_last_block_number()
     # mech requests
     requests_dict, duplicatedReqId = collect_all_mech_requests(
-        from_block=earliest_block_number, to_block=last_block_number
     )
     # mech delivers
     delivers_dict, duplicatedIds = collect_all_mech_delivers(
-        from_block=earliest_block_number, to_block=last_block_number
     )
     # clean delivers
-    clean_mech_delivers()
     # solve duplicated requestIds
-    block_map = fix_duplicate_requestIds()
     # merge the two files into one source
-    not_found = merge_requests_delivers()
     # Add ipfs contents
-    get_ipfs_data()
 if __name__ == "__main__":

 from datetime import datetime, timedelta, UTC
 from utils import SUBGRAPH_API_KEY, measure_execution_time, DATA_DIR
 import requests
+import json
+import pandas as pd
+import numpy as np
 from mech_request_utils import (
     collect_all_mech_delivers,
     collect_all_mech_requests,
     merge_requests_delivers,
     get_ipfs_data,
     only_delivers_loop,
+    merge_json_files,
 )
 OLD_MECH_SUBGRAPH_URL = (
     return blocks[0]
+def update_json_files():
+    merge_json_files("mech_requests.json", "new_mech_requests.json")
+    merge_json_files("mech_delivers.json", "new_mech_delivers.json")
+    merge_json_files("merged_requests.json", "new_merged_requests.json")
+    merge_json_files("tools_info.json", "new_tools_info.json")
+def update_trades_parquet(new_trades_df: pd.DataFrame) -> pd.DataFrame:
+    # Read old trades parquet file
+    try:
+        old_trades_df = pd.read_parquet(DATA_DIR / "fpmmTrades.parquet")
+    except Exception as e:
+        print(f"Error reading old trades parquet file {e}")
+        return None
+    # merge two dataframes
+    merge_df = pd.concat([old_trades_df, new_trades_df], ignore_index=True)
+    # Check for duplicates
+    duplicates = merge_df.duplicated()
+    # Print the duplicates
+    print(duplicates)
+    # Get the number of duplicates
+    num_duplicates = duplicates.sum()
+    print("Number of duplicates:", num_duplicates)
+    # Get the rows with duplicates
+    duplicate_rows = merge_df[duplicates]
+    print("Duplicate rows:\n", duplicate_rows)
+    # Remove duplicates
+    merge_df.drop_duplicates(inplace=True)
+    # save the parquet file
+    merge_df.to_parquet(DATA_DIR / "fpmmTrades.parquet", index=False)
+    return merge_df
+def update_tools_parquet(new_tools_df: pd.DataFrame):
+    try:
+        old_tools_df = pd.read_parquet(DATA_DIR / "tools.parquet")
+    except Exception as e:
+        print(f"Error reading old tools parquet file {e}")
+        return None
+    # merge two dataframes
+    merge_df = pd.concat([old_tools_df, new_tools_df], ignore_index=True)
+    # Check for duplicates
+    duplicates = merge_df.duplicated()
+    # Print the duplicates
+    print(duplicates)
+    # Get the number of duplicates
+    num_duplicates = duplicates.sum()
+    print("Number of duplicates:", num_duplicates)
+    # Get the rows with duplicates
+    duplicate_rows = merge_df[duplicates]
+    print("Duplicate rows:\n", duplicate_rows)
+    # Remove duplicates
+    merge_df.drop_duplicates(inplace=True)
+    # save the parquet file
+    merge_df.to_parquet(DATA_DIR / "tools.parquet", index=False)
 def get_mech_info_2024() -> dict[str, Any]:
     """Query the subgraph to get the 2024 information from mech."""
     return MECH_TO_INFO
+@measure_execution_time
+def get_mech_events_since_last_run():
+    """Function to download only the new events since the last execution."""
+    # Read the latest date from stored data
+    try:
+        all_trades = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
+        latest_timestamp = max(all_trades.creation_timestamp)
+        print(f"Updating data since {latest_timestamp}")
+    except Exception:
+        print("Error while reading the profitability parquet file")
+        return None
+    # Get the block number of lastest date
+    five_seconds = np.timedelta64(5, "s")
+    last_run_block_number = fetch_block_number(
+        int(latest_timestamp.timestamp()),
+        int((latest_timestamp + five_seconds).timestamp()),
+    )
+    last_block_number = get_last_block_number()
+    # mech requests
+    requests_dict, duplicatedReqId = collect_all_mech_requests(
+        from_block=last_run_block_number,
+        to_block=last_block_number,
+        filename="new_mech_requests.json",
+    )
+    # mech delivers
+    delivers_dict, duplicatedIds = collect_all_mech_delivers(
+        from_block=last_run_block_number,
+        to_block=last_block_number,
+        filename="new_mech_delivers.json",
     )
+    # clean delivers
+    clean_mech_delivers("new_mech_requests.json", "new_mech_delivers.json")
+    # solve duplicated requestIds
+    block_map = fix_duplicate_requestIds(
+        "new_mech_requests.json", "new_mech_delivers.json"
+    )
+    # merge the two files into one source
+    not_found = merge_requests_delivers(
+        "new_mech_requests.json", "new_mech_delivers.json", "new_merged_requests.json"
+    )
+    # Add ipfs contents
+    get_ipfs_data("new_merged_requests.json", "new_tools_info.json")
+    return latest_timestamp
 @measure_execution_time
     last_block_number = get_last_block_number()
     # mech requests
     requests_dict, duplicatedReqId = collect_all_mech_requests(
+        from_block=earliest_block_number,
+        to_block=last_block_number,
+        filename="mech_requests.json",
     )
     # mech delivers
     delivers_dict, duplicatedIds = collect_all_mech_delivers(
+        from_block=earliest_block_number,
+        to_block=last_block_number,
+        filename="mech_delivers.json",
     )
     # clean delivers
+    clean_mech_delivers("mech_requests.json", "mech_delivers.json")
     # solve duplicated requestIds
+    block_map = fix_duplicate_requestIds("mech_requests.json", "mech_delivers.json")
     # merge the two files into one source
+    not_found = merge_requests_delivers(
+        "mech_requests.json", "mech_delivers.json", "merged_requests.json"
+    )
     # Add ipfs contents
+    get_ipfs_data("merged_requests.json", "tools_info.json")
 if __name__ == "__main__":

scripts/mech_request_utils.py CHANGED Viewed

@@ -107,7 +107,7 @@ query delivers_query($requestId: BigInt, $blockNumber_gte: BigInt, $blockNumber_
 """
-def collect_all_mech_requests(from_block: int, to_block: int) -> Tuple:
     print(f"Fetching all mech requests from {from_block} to {to_block}")
     mech_requests = {}
@@ -146,17 +146,18 @@ def collect_all_mech_requests(from_block: int, to_block: int) -> Tuple:
         print(f"New execution for id_gt = {id_gt}")
         if len(duplicated_reqIds) > 0:
             print(f"Number of duplicated req Ids = {len(duplicated_reqIds)}")
-        save_json_file(mech_requests, "mech_requests.json")
     print(f"Number of requests = {len(mech_requests)}")
     print(f"Number of duplicated req Ids = {len(duplicated_reqIds)}")
-    save_json_file(mech_requests, "mech_requests.json")
     return mech_requests, duplicated_reqIds
-def collect_all_mech_delivers(from_block: int, to_block: int) -> Tuple:
     print(f"Fetching all mech delivers from {from_block} to {to_block}")
     mech_delivers = {}
     duplicated_requestIds = []
     transport = RequestsHTTPTransport(url=THEGRAPH_ENDPOINT)
@@ -196,10 +197,10 @@ def collect_all_mech_delivers(from_block: int, to_block: int) -> Tuple:
         print(f"New execution for id_gt = {id_gt}")
         if len(duplicated_requestIds) > 0:
             print(f"Number of duplicated request id = {len(duplicated_requestIds)}")
-        save_json_file(mech_delivers, "mech_delivers.json")
     print(f"Number of delivers = {len(mech_delivers)}")
     print(f"Number of duplicated request id = {len(duplicated_requestIds)}")
-    save_json_file(mech_delivers, "mech_delivers.json")
     return mech_delivers, duplicated_requestIds
@@ -343,10 +344,27 @@ def save_json_file(data: Dict[str, Any], filename: str):
         json.dump(data, file, indent=2)
-def clean_mech_delivers() -> None:
     """Function to remove from the delivers json file the request Ids that are not in the mech requests"""
     # read mech requests
-    with open(JSON_DATA_DIR / "mech_requests.json", "r") as file:
         mech_requests = json.load(file)
     list_reqIds = [mech_requests[k].get("requestId") for k in mech_requests.keys()]
@@ -354,7 +372,7 @@ def clean_mech_delivers() -> None:
     list_reqIds = list(set(list_reqIds))
     # remove requestIds from delivers that are not in this list
-    with open(JSON_DATA_DIR / "mech_delivers.json", "r") as file:
         mech_delivers = json.load(file)
     print(f"original size of the file {len(mech_delivers)}")
@@ -366,7 +384,7 @@ def clean_mech_delivers() -> None:
     for r in to_delete:
         mech_delivers.pop(r, None)
     print(f"final size of the file {len(mech_delivers)}")
-    save_json_file(mech_delivers, "mech_delivers.json")
 def get_request_block_numbers(
@@ -387,11 +405,11 @@ def update_block_request_map(block_request_id_map: dict) -> None:
         pickle.dump(block_request_id_map, handle, protocol=pickle.HIGHEST_PROTOCOL)
-def fix_duplicate_requestIds() -> dict:
-    with open(JSON_DATA_DIR / "mech_delivers.json", "r") as file:
         data_delivers = json.load(file)
-    with open(JSON_DATA_DIR / "mech_requests.json", "r") as file:
         mech_requests = json.load(file)
     list_request_Ids = list(data_delivers.keys())
@@ -423,12 +441,15 @@ def fix_duplicate_requestIds() -> dict:
     return block_request_id_map
-def merge_requests_delivers() -> None:
     """Function to map requests and delivers"""
-    with open(JSON_DATA_DIR / "mech_delivers.json", "r") as file:
         mech_delivers = json.load(file)
-    with open(JSON_DATA_DIR / "mech_requests.json", "r") as file:
         mech_requests = json.load(file)
     # read the block map for duplicated requestIds
@@ -458,12 +479,12 @@ def merge_requests_delivers() -> None:
         # extract the info and append it to the original mech request dictionary
         mech_req["deliver"] = deliver_dict
-    save_json_file(mech_requests, "merged_requests.json")
     return
-def get_ipfs_data():
-    with open(JSON_DATA_DIR / "merged_requests.json", "r") as file:
         mech_requests = json.load(file)
     total_keys_to_traverse = list(mech_requests.keys())
@@ -491,7 +512,7 @@ def get_ipfs_data():
             partial_dict = future.result()
             updated_mech_requests.update(partial_dict)
-    save_json_file(updated_mech_requests, "tools_info.json")
     # delivers
     print("UPDATING IPFS CONTENTS OF DELIVERS")
@@ -517,7 +538,7 @@ def get_ipfs_data():
             partial_dict = future.result()
             final_tools_content.update(partial_dict)
-    save_json_file(final_tools_content, "tools_info.json")
 def only_delivers_loop():

 """
+def collect_all_mech_requests(from_block: int, to_block: int, filename: str) -> Tuple:
     print(f"Fetching all mech requests from {from_block} to {to_block}")
     mech_requests = {}
         print(f"New execution for id_gt = {id_gt}")
         if len(duplicated_reqIds) > 0:
             print(f"Number of duplicated req Ids = {len(duplicated_reqIds)}")
+        save_json_file(mech_requests, filename)
     print(f"Number of requests = {len(mech_requests)}")
     print(f"Number of duplicated req Ids = {len(duplicated_reqIds)}")
+    save_json_file(mech_requests, filename)
     return mech_requests, duplicated_reqIds
+def collect_all_mech_delivers(from_block: int, to_block: int, filename: str) -> Tuple:
     print(f"Fetching all mech delivers from {from_block} to {to_block}")
+    # TODO save as new json file, check how to merge later json files
     mech_delivers = {}
     duplicated_requestIds = []
     transport = RequestsHTTPTransport(url=THEGRAPH_ENDPOINT)
         print(f"New execution for id_gt = {id_gt}")
         if len(duplicated_requestIds) > 0:
             print(f"Number of duplicated request id = {len(duplicated_requestIds)}")
+        save_json_file(mech_delivers, filename)
     print(f"Number of delivers = {len(mech_delivers)}")
     print(f"Number of duplicated request id = {len(duplicated_requestIds)}")
+    save_json_file(mech_delivers, filename)
     return mech_delivers, duplicated_requestIds
         json.dump(data, file, indent=2)
+def merge_json_files(old_file: str, new_file: str):
+    # read old file
+    with open(JSON_DATA_DIR / old_file, "r") as f:
+        old_data = json.load(f)
+    # read the new file
+    with open(JSON_DATA_DIR / new_file, "r") as f:
+        new_data = json.load(f)
+    # Merge the two JSON files and remove duplicates
+    old_data.update(new_data)
+    # Save the merged JSON file
+    print(f"{old_file} updated")
+    save_json_file(old_data, old_file)
+def clean_mech_delivers(requests_filename: str, delivers_filename: str) -> None:
     """Function to remove from the delivers json file the request Ids that are not in the mech requests"""
     # read mech requests
+    with open(JSON_DATA_DIR / requests_filename, "r") as file:
         mech_requests = json.load(file)
     list_reqIds = [mech_requests[k].get("requestId") for k in mech_requests.keys()]
     list_reqIds = list(set(list_reqIds))
     # remove requestIds from delivers that are not in this list
+    with open(JSON_DATA_DIR / delivers_filename, "r") as file:
         mech_delivers = json.load(file)
     print(f"original size of the file {len(mech_delivers)}")
     for r in to_delete:
         mech_delivers.pop(r, None)
     print(f"final size of the file {len(mech_delivers)}")
+    save_json_file(mech_delivers, delivers_filename)
 def get_request_block_numbers(
         pickle.dump(block_request_id_map, handle, protocol=pickle.HIGHEST_PROTOCOL)
+def fix_duplicate_requestIds(requests_filename: str, delivers_filename: str) -> dict:
+    with open(JSON_DATA_DIR / delivers_filename, "r") as file:
         data_delivers = json.load(file)
+    with open(JSON_DATA_DIR / requests_filename, "r") as file:
         mech_requests = json.load(file)
     list_request_Ids = list(data_delivers.keys())
     return block_request_id_map
+def merge_requests_delivers(
+    requests_filename: str, delivers_filename: str, filename: str
+) -> None:
+    # TODO
     """Function to map requests and delivers"""
+    with open(JSON_DATA_DIR / delivers_filename, "r") as file:
         mech_delivers = json.load(file)
+    with open(JSON_DATA_DIR / requests_filename, "r") as file:
         mech_requests = json.load(file)
     # read the block map for duplicated requestIds
         # extract the info and append it to the original mech request dictionary
         mech_req["deliver"] = deliver_dict
+    save_json_file(mech_requests, filename)
     return
+def get_ipfs_data(input_filename: str, output_filename: str):
+    with open(JSON_DATA_DIR / input_filename, "r") as file:
         mech_requests = json.load(file)
     total_keys_to_traverse = list(mech_requests.keys())
             partial_dict = future.result()
             updated_mech_requests.update(partial_dict)
+    save_json_file(updated_mech_requests, output_filename)
     # delivers
     print("UPDATING IPFS CONTENTS OF DELIVERS")
             partial_dict = future.result()
             final_tools_content.update(partial_dict)
+    save_json_file(final_tools_content, output_filename)
 def only_delivers_loop():

scripts/profitability.py CHANGED Viewed

@@ -29,7 +29,11 @@ from tqdm import tqdm
 import numpy as np
 import os
 from pathlib import Path
-from get_mech_info import DATETIME_60_DAYS_AGO
 from utils import SUBGRAPH_API_KEY, wei_to_unit, convert_hex_to_int, _to_content
 from queries import omen_xdai_trades_query, conditional_tokens_gc_user_query
 from staking import label_trades_by_staking
@@ -318,9 +322,9 @@ def create_fpmmTrades(rpc: str, from_timestamp: float = DEFAULT_FROM_TIMESTAMP):
 def prepare_profitalibity_data(
     rpc: str,
-    tools_filename: str = "tools.parquet",
-    trades_filename: str = "fpmmTrades.parquet",
-    from_timestamp: float = DEFAULT_60_DAYS_AGO_TIMESTAMP,
 ):
     """Prepare data for profitalibity analysis."""
@@ -343,13 +347,12 @@ def prepare_profitalibity_data(
         return
     # Check if fpmmTrades.parquet is in the same directory
     try:
-        fpmmTrades = pd.read_parquet(DATA_DIR / trades_filename)
-        print(f"{trades_filename} loaded")
-    except FileNotFoundError:
-        print("fpmmTrades.parquet not found. Creating fpmmTrades.parquet...")
         fpmmTrades = create_fpmmTrades(rpc, from_timestamp=from_timestamp)
-        fpmmTrades.to_parquet(DATA_DIR / "fpmmTrades.parquet", index=False)
     # make sure trader_address is in the columns
     assert "trader_address" in fpmmTrades.columns, "trader_address column not found"
@@ -547,9 +550,10 @@ def summary_analyse(df):
 def run_profitability_analysis(
     rpc: str,
-    tools_filename: str = "tools.parquet",
-    trades_filename: str = "fpmmTrades.parquet",
-    from_timestamp: float = DEFAULT_60_DAYS_AGO_TIMESTAMP,
 ):
     """Create all trades analysis."""
@@ -558,12 +562,16 @@ def run_profitability_analysis(
     fpmmTrades, tools = prepare_profitalibity_data(
         rpc, tools_filename, trades_filename, from_timestamp
     )
-    tools["trader_address"] = tools["trader_address"].str.lower()
     # all trades profitability df
     print("Analysing trades...")
     all_trades_df = analyse_all_traders(fpmmTrades, tools)
     # filter invalid markets. Condition: "is_invalid" is True
     invalid_trades = all_trades_df.loc[all_trades_df["is_invalid"] == True]
     invalid_trades.to_parquet(DATA_DIR / "invalid_trades.parquet", index=False)

 import numpy as np
 import os
 from pathlib import Path
+from get_mech_info import (
+    DATETIME_60_DAYS_AGO,
+    update_trades_parquet,
+    update_tools_parquet,
+)
 from utils import SUBGRAPH_API_KEY, wei_to_unit, convert_hex_to_int, _to_content
 from queries import omen_xdai_trades_query, conditional_tokens_gc_user_query
 from staking import label_trades_by_staking
 def prepare_profitalibity_data(
     rpc: str,
+    tools_filename: str,
+    trades_filename: str,
+    from_timestamp: float,
 ):
     """Prepare data for profitalibity analysis."""
         return
     # Check if fpmmTrades.parquet is in the same directory
+    print("Generating the trades file")
     try:
         fpmmTrades = create_fpmmTrades(rpc, from_timestamp=from_timestamp)
+        fpmmTrades.to_parquet(DATA_DIR / trades_filename, index=False)
+    except FileNotFoundError:
+        print(f"Error creating {trades_filename} file .")
     # make sure trader_address is in the columns
     assert "trader_address" in fpmmTrades.columns, "trader_address column not found"
 def run_profitability_analysis(
     rpc: str,
+    tools_filename: str,
+    trades_filename: str,
+    from_timestamp: float,
+    merge: bool = False,
 ):
     """Create all trades analysis."""
     fpmmTrades, tools = prepare_profitalibity_data(
         rpc, tools_filename, trades_filename, from_timestamp
     )
     # all trades profitability df
     print("Analysing trades...")
     all_trades_df = analyse_all_traders(fpmmTrades, tools)
+    # TODO merge previous files if requested
+    if merge:
+        all_trades_df = update_trades_parquet(all_trades_df)
+        update_tools_parquet(tools_filename)
     # filter invalid markets. Condition: "is_invalid" is True
     invalid_trades = all_trades_df.loc[all_trades_df["is_invalid"] == True]
     invalid_trades.to_parquet(DATA_DIR / "invalid_trades.parquet", index=False)

scripts/pull_data.py CHANGED Viewed

@@ -12,14 +12,13 @@ from markets import (
     etl as mkt_etl,
     DEFAULT_FILENAME as MARKETS_FILENAME,
 )
-from tools import (
-    DEFAULT_FILENAME as TOOLS_FILENAME,
-    generate_tools_file,
-)
-from profitability import run_profitability_analysis
 from utils import get_question, current_answer, RPC
 from get_mech_info import (
     get_mech_events_last_60_days,
 )
 from update_tools_accuracy import compute_tools_accuracy
 import gc
@@ -48,10 +47,10 @@ def parallelize_timestamp_conversion(df: pd.DataFrame, function: callable) -> li
     return results
-def add_current_answer():
     # Get currentAnswer from FPMMS
     fpmms = pd.read_parquet(DATA_DIR / MARKETS_FILENAME)
-    tools = pd.read_parquet(DATA_DIR / TOOLS_FILENAME)
     # Get the question from the tools
     logging.info("Getting the question and current answer for the tools")
@@ -61,14 +60,14 @@ def add_current_answer():
     tools["currentAnswer"] = tools["currentAnswer"].str.replace("yes", "Yes")
     tools["currentAnswer"] = tools["currentAnswer"].str.replace("no", "No")
     # Save the tools data after the updates on the content
-    tools.to_parquet(DATA_DIR / TOOLS_FILENAME, index=False)
     del fpmms
-def updating_timestamps(rpc: str):
     web3 = Web3(Web3.HTTPProvider(rpc))
-    tools = pd.read_parquet(DATA_DIR / TOOLS_FILENAME)
     # Convert block number to timestamp
     logging.info("Converting block number to timestamp")
@@ -100,7 +99,7 @@ def updating_timestamps(rpc: str):
     )
     # Save the tools data after the updates on the content
-    tools.to_parquet(DATA_DIR / TOOLS_FILENAME, index=False)
     # Update t_map with new timestamps
     new_timestamps = (
@@ -120,6 +119,57 @@ def updating_timestamps(rpc: str):
     gc.collect()
 def weekly_analysis():
     """Run weekly analysis for the FPMMS project."""
     rpc = RPC
@@ -130,26 +180,29 @@ def weekly_analysis():
     # New tools ETL
     logging.info("Generating the mech json files")
     get_mech_events_last_60_days()
     logging.info("Finished generating the mech json files")
     # Run tools ETL
     logging.info("Generate and parse the tools content")
-    generate_tools_file()
     logging.info("Tools ETL completed")
     # Run profitability analysis
     logging.info("Running profitability analysis")
-    if os.path.exists(DATA_DIR / "fpmmTrades.parquet"):
-        os.remove(DATA_DIR / "fpmmTrades.parquet")
     run_profitability_analysis(
         rpc=rpc,
     )
     logging.info("Profitability analysis completed")
-    add_current_answer()
     try:
-        updating_timestamps(rpc)
     except Exception as e:
         logging.error("Error while updating timestamps of tools")
         print(e)

     etl as mkt_etl,
     DEFAULT_FILENAME as MARKETS_FILENAME,
 )
+from tools import DEFAULT_FILENAME as TOOLS_FILENAME, generate_tools_file
+from profitability import run_profitability_analysis, DEFAULT_60_DAYS_AGO_TIMESTAMP
 from utils import get_question, current_answer, RPC
 from get_mech_info import (
     get_mech_events_last_60_days,
+    get_mech_events_since_last_run,
+    update_json_files,
 )
 from update_tools_accuracy import compute_tools_accuracy
 import gc
     return results
+def add_current_answer(tools_filename: str):
     # Get currentAnswer from FPMMS
     fpmms = pd.read_parquet(DATA_DIR / MARKETS_FILENAME)
+    tools = pd.read_parquet(DATA_DIR / tools_filename)
     # Get the question from the tools
     logging.info("Getting the question and current answer for the tools")
     tools["currentAnswer"] = tools["currentAnswer"].str.replace("yes", "Yes")
     tools["currentAnswer"] = tools["currentAnswer"].str.replace("no", "No")
     # Save the tools data after the updates on the content
+    tools.to_parquet(DATA_DIR / tools_filename, index=False)
     del fpmms
+def updating_timestamps(rpc: str, tools_filename: str):
     web3 = Web3(Web3.HTTPProvider(rpc))
+    tools = pd.read_parquet(DATA_DIR / tools_filename)
     # Convert block number to timestamp
     logging.info("Converting block number to timestamp")
     )
     # Save the tools data after the updates on the content
+    tools.to_parquet(DATA_DIR / tools_filename, index=False)
     # Update t_map with new timestamps
     new_timestamps = (
     gc.collect()
+def only_new_weekly_analysis():
+    """Run weekly analysis for the FPMMS project."""
+    rpc = RPC
+    # Run markets ETL
+    logging.info("Running markets ETL")
+    mkt_etl(MARKETS_FILENAME)
+    logging.info("Markets ETL completed")
+    # New tools ETL
+    logging.info("Generating the mech json files")
+    # get only new data
+    latest_timestamp = get_mech_events_since_last_run()
+    logging.info(f"Finished generating the mech json files from {latest_timestamp}")
+    # Run tools ETL
+    logging.info("Generate and parse the tools content")
+    # generate only new file
+    generate_tools_file("new_tools_info.json", "new_tools.parquet")
+    logging.info("Tools ETL completed")
+    add_current_answer("new_tools.parquet")
+    # Run profitability analysis
+    logging.info("Running profitability analysis")
+    run_profitability_analysis(
+        rpc=rpc,
+        tools_filename="new_tools.parquet",
+        trades_filename="new_fpmmTrades.parquet",
+        from_timestamp=int(latest_timestamp.timestamp()),
+        merge=True,
+    )
+    logging.info("Profitability analysis completed")
+    # merge new json files with old json files
+    update_json_files()
+    # TODO move new parquet files to a tmp folder
+    try:
+        updating_timestamps(rpc, TOOLS_FILENAME)
+    except Exception as e:
+        logging.error("Error while updating timestamps of tools")
+        print(e)
+    compute_tools_accuracy()
+    logging.info("Weekly analysis files generated and saved")
 def weekly_analysis():
     """Run weekly analysis for the FPMMS project."""
     rpc = RPC
     # New tools ETL
     logging.info("Generating the mech json files")
     get_mech_events_last_60_days()
     logging.info("Finished generating the mech json files")
     # Run tools ETL
     logging.info("Generate and parse the tools content")
+    generate_tools_file("tools_info.json", TOOLS_FILENAME)
     logging.info("Tools ETL completed")
+    add_current_answer(TOOLS_FILENAME)
     # Run profitability analysis
     logging.info("Running profitability analysis")
     run_profitability_analysis(
         rpc=rpc,
+        tools_filename=TOOLS_FILENAME,
+        trades_filename="fpmmTrades.parquet",
+        from_timestamp=DEFAULT_60_DAYS_AGO_TIMESTAMP,
     )
     logging.info("Profitability analysis completed")
     try:
+        updating_timestamps(rpc, TOOLS_FILENAME)
     except Exception as e:
         logging.error("Error while updating timestamps of tools")
         print(e)

scripts/tools.py CHANGED Viewed

@@ -560,9 +560,7 @@ def etl(
     return tools
-def parse_store_json_events_parallel(
-    json_events: Dict[str, Any], filename: str = DEFAULT_FILENAME
-):
     total_nr_events = len(json_events)
     ids_to_traverse = list(json_events.keys())
     print(f"Parsing {total_nr_events} events")
@@ -596,19 +594,19 @@ def parse_store_json_events_parallel(
     try:
         if "result" in tools.columns:
             tools = tools.drop(columns=["result"])
-        tools.to_parquet(DATA_DIR / filename, index=False)
     except Exception as e:
         print(f"Failed to write tools data: {e}")
     return tools
-def generate_tools_file():
     """Function to parse the json mech events and generate the parquet tools file"""
     try:
-        with open(JSON_DATA_DIR / "tools_info.json", "r") as file:
             file_contents = json.load(file)
-            parse_store_json_events_parallel(file_contents)
     except Exception as e:
         print(f"An Exception happened while parsing the json events {e}")

     return tools
+def parse_store_json_events_parallel(json_events: Dict[str, Any], output_filename: str):
     total_nr_events = len(json_events)
     ids_to_traverse = list(json_events.keys())
     print(f"Parsing {total_nr_events} events")
     try:
         if "result" in tools.columns:
             tools = tools.drop(columns=["result"])
+        tools.to_parquet(DATA_DIR / output_filename, index=False)
     except Exception as e:
         print(f"Failed to write tools data: {e}")
     return tools
+def generate_tools_file(input_filename: str, output_filename: str):
     """Function to parse the json mech events and generate the parquet tools file"""
     try:
+        with open(JSON_DATA_DIR / input_filename, "r") as file:
             file_contents = json.load(file)
+            parse_store_json_events_parallel(file_contents, output_filename)
     except Exception as e:
         print(f"An Exception happened while parsing the json events {e}")

scripts/update_tools_accuracy.py CHANGED Viewed

@@ -66,19 +66,32 @@ def update_tools_accuracy(
     print(tools_to_update)
     existing_tools = list(tools_acc["tool"].values)
     for tool in tools_to_update:
         if tool in existing_tools:
-            new_accuracy = acc_info[acc_info["tool"] == tool]["tool_accuracy"].values[0]
-            new_volume = acc_info[acc_info["tool"] == tool]["total_requests"].values[0]
-            if no_timeline_info:
-                new_min_timeline = None
-                new_max_timeline = None
-            else:
-                new_min_timeline = acc_info[acc_info["tool"] == tool]["min"].values[0]
-                new_max_timeline = acc_info[acc_info["tool"] == tool]["max"].values[0]
             tools_acc.loc[tools_acc["tool"] == tool, "tool_accuracy"] = new_accuracy
             tools_acc.loc[tools_acc["tool"] == tool, "total_requests"] = new_volume
             tools_acc.loc[tools_acc["tool"] == tool, "min"] = new_min_timeline
             tools_acc.loc[tools_acc["tool"] == tool, "max"] = new_max_timeline
     print(tools_acc)
     return tools_acc

     print(tools_to_update)
     existing_tools = list(tools_acc["tool"].values)
     for tool in tools_to_update:
+        new_accuracy = acc_info[acc_info["tool"] == tool]["tool_accuracy"].values[0]
+        new_volume = acc_info[acc_info["tool"] == tool]["total_requests"].values[0]
+        if no_timeline_info:
+            new_min_timeline = None
+            new_max_timeline = None
+        else:
+            new_min_timeline = acc_info[acc_info["tool"] == tool]["min"].values[0]
+            new_max_timeline = acc_info[acc_info["tool"] == tool]["max"].values[0]
         if tool in existing_tools:
             tools_acc.loc[tools_acc["tool"] == tool, "tool_accuracy"] = new_accuracy
             tools_acc.loc[tools_acc["tool"] == tool, "total_requests"] = new_volume
             tools_acc.loc[tools_acc["tool"] == tool, "min"] = new_min_timeline
             tools_acc.loc[tools_acc["tool"] == tool, "max"] = new_max_timeline
+        else:
+            # new tool to add to the file
+            # tool,tool_accuracy,total_requests,min,max
+            new_row = {
+                "tool": tool,
+                "tool_accuracy": new_accuracy,
+                "total_requests": new_volume,
+                "min": new_min_timeline,
+                "max": new_max_timeline,
+            }
+            tools_acc = pd.concat([tools_acc, pd.DataFrame(new_row)], ignore_index=True)
     print(tools_acc)
     return tools_acc