Spaces:

solfedge
/

Geologist_AI

Sleeping

File size: 2,459 Bytes

71c32d5


import os
import requests
from PIL import Image
from io import BytesIO
import time
from config import IMAGE_DIR, DATASET_SOURCES

class DataCollector:
    def __init__(self):
        self.image_dir = IMAGE_DIR
        self.sources = DATASET_SOURCES

    def collect_sample_images(self):
        """Collect sample images from public sources"""
        # These are example URLs - in practice you'd scrape or use APIs
        sample_urls = [
            "https://c7.alamy.com/comp/3AJ86J0/gold-on-quartz-bradshaw-mountains-arizona-gold-on-quartz-from-the-bradshaw-mountains-arizona-is-a-classic-and-highly-sought-after-mineral-associa-3AJ86J0.jpg",
            "https://www.nuggetsbygrant.com/cdn/shop/products/243A0948.jpg?v=1670014792&width=1080",
            "https://news.rice.edu/sites/g/files/bxs2656/files/inline-images/BIF5-0524_540_1.jpeg",
            "https://c7.alamy.com/comp/2FNKTF3/copper-bearing-rock-against-a-gravel-ground-surface-2FNKTF3.jpg",
            "https://www.shutterstock.com/shutterstock/photos/2618131965/display_1500/stock-photo-close-up-of-a-rough-weathered-copper-ore-stone-with-natural-crystal-formations-2618131965.jpg",
            "https://geologyistheway.com/wp-content/uploads/2021/06/118-milky-quartz.jpg",
            "https://geologyistheway.com/wp-content/uploads/2021/06/201210-4-1024x726.jpg"


        ]
            


        print("Collecting sample drill core images...")
        for i, url in enumerate(sample_urls):
            try:
                response = requests.get(url, timeout=10)
                response.raise_for_status()

                img = Image.open(BytesIO(response.content))
                img_path = os.path.join(self.image_dir, f"sample_core_{i+1}.jpg")
                img.save(img_path)
                print(f"Downloaded: sample_core_{i+1}.jpg")
                time.sleep(0.5)  # Be respectful to servers
            except Exception as e:
                print(f"Failed to download {url}: {e}")

        print(f"Collected {len(os.listdir(self.image_dir))} images")

    def get_dataset_info(self):
        """Return information about available datasets"""
        return self.sources

if __name__ == "__main__":
    collector = DataCollector()
    collector.collect_sample_images()
    print("\nAvailable geological datasets:")
    for source in collector.get_dataset_info():
        print(f"- {source['name']}: {source['description']}")
        print(f"  URL: {source['url']}\n")