import gradio as gr import earthview as ev import utils import random import pandas as pd import os from itertools import islice import json import pyarrow as pa import pyarrow.parquet as pq # Configuration chunk_size = 100 # Size of the chunks to shuffle # Load the Satellogic dataset (streaming) dataset = ev.load_dataset("satellogic", streaming=True) data_iter = iter(dataset) shuffled_chunk = [] # Initialize an empty list to hold the current chunk chunk_iter = None # Initialize the chunk iterator # Initialize an empty list to hold labels temporarily labels_list = [] def get_next_image(): global data_iter, labels_list, shuffled_chunk, chunk_iter while True: # If we don't have a current chunk or it's exhausted, get a new one if not shuffled_chunk or chunk_iter is None: chunk = list(islice(data_iter, chunk_size)) if not chunk: # If the dataset is exhausted, reset the iterator print("Dataset exhausted, resetting iterator.") data_iter = iter(ev.load_dataset("satellogic", streaming=True)) chunk = list(islice(data_iter, chunk_size)) if not chunk: print("Still no data after reset.") return None, "Dataset exhausted", None, None random.shuffle(chunk) shuffled_chunk = chunk chunk_iter = iter(shuffled_chunk) try: sample = next(chunk_iter) sample = ev.item_to_images("satellogic", sample) image = sample["rgb"][0] metadata = sample["metadata"] bounds = metadata["bounds"] google_maps_link = utils.get_google_map_link(sample, "satellogic") image_id = str(bounds) # Check if this image has already been labeled (based on image_id) if not any(label["image_id"] == image_id for label in labels_list): return image, image_id, bounds, google_maps_link except StopIteration: # Current chunk is exhausted, reset chunk variables to get a new one in the next iteration shuffled_chunk = [] chunk_iter = None def rate_image(image_id, bounds, rating): global labels_list labels_list.append( { "image_id": image_id, "bounds": bounds, "rating": rating, "google_maps_link": "", # Adding google maps link to the data to be downloaded } ) next_image, next_image_id, next_bounds, next_google_maps_link = get_next_image() return next_image, next_image_id, next_bounds, next_google_maps_link def save_labels(): global labels_list table = pa.Table.from_pylist(labels_list) pq.write_table(table, "labeled_data.parquet") return "labeled_data.parquet" # Gradio interface with gr.Blocks() as iface: with gr.Row(): with gr.Column(): image_out = gr.Image(label="Satellite Image") image_id_out = gr.Textbox(label="Image ID", visible=False) bounds_out = gr.Textbox(label="Bounds", visible=False) google_maps_link_out = gr.Textbox(label="Google Maps Link", visible=True) with gr.Column(): rating_radio = gr.Radio(["Cool", "Not Cool"], label="Rating") submit_button = gr.Button("Submit Rating") download_button = gr.Button("Download Labels") download_file = gr.File(label="Download") submit_button.click( rate_image, inputs=[image_id_out, bounds_out, rating_radio], outputs=[image_out, image_id_out, bounds_out, google_maps_link_out] ) download_button.click( save_labels, inputs=None, outputs=[download_file] ) # Get the first image and its details initial_image, initial_image_id, initial_bounds, initial_google_maps_link = get_next_image() image_out.value = initial_image image_id_out.value = initial_image_id bounds_out.value = initial_bounds google_maps_link_out.value = initial_google_maps_link iface.launch(share=True)