Yago Bolivar
fix: update file paths for downloaded files and saved questions to ensure proper directory structure
b9cda5d
import requests | |
import os | |
import json | |
from dotenv import load_dotenv | |
''' This script fetches all questions from the API and downloads associated files. | |
It saves the questions to a JSON file and downloads the files to a local | |
directory with name "downloaded_files". | |
Questions are saved in "new_gaia_questions.json". | |
''' | |
load_dotenv() | |
BASE_URL = os.getenv("BASE_URL") | |
def download_file(task_id, file_name): | |
file_endpoint = f"{BASE_URL}/files/{task_id}" | |
file_response = requests.get(file_endpoint) | |
if file_response.status_code == 200: | |
os.makedirs("data/downloaded_files", exist_ok=True) | |
file_path = os.path.join("downloaded_files", file_name) | |
with open(file_path, "wb") as f: | |
f.write(file_response.content) | |
print(f"Downloaded file for task_id {task_id} to {file_path}") | |
return True | |
else: | |
print(f"Failed to download file for task_id {task_id}. Status code: {file_response.status_code}") | |
return False | |
def get_all_questions(): | |
response = requests.get(f"{BASE_URL}/questions") | |
downloaded_file_counter = 0 | |
if response.status_code == 200: | |
questions = response.json() | |
for question in questions: | |
if "task_id" in question and "file_name" in question and question["file_name"]: | |
if download_file(question["task_id"], question["file_name"]): | |
downloaded_file_counter += 1 | |
print(f"Total downloaded files: {downloaded_file_counter}") | |
return questions | |
else: | |
raise Exception(f"API request failed with status code {response.status_code}") | |
questions = get_all_questions() | |
print(f"Total questions retrieved: {len(questions)}") | |
with open("data/question_set/new_gaia_questions.json", "w") as file: | |
json.dump(questions, file, indent=4) | |
print("Questions successfully saved to new_gaia_questions.json") |