Spaces:
Running
Running
File size: 5,854 Bytes
d4ca384 cb3900a d4ca384 cb3900a d4ca384 cb3900a d4ca384 fd1374f d4ca384 cb3900a d4ca384 cb3900a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
import io
import boto3
import requests
import numpy as np
import polars as pl
from PIL import Image
from botocore.config import Config
import logging
logger = logging.getLogger(__name__)
# S3 for sample images
my_config = Config(
region_name='us-east-1'
)
s3_client = boto3.client('s3', config=my_config)
# Set basepath for EOL pages for info
EOL_URL = "https://eol.org/pages/"
GBIF_URL = "https://gbif.org/species/"
RANKS = ["kingdom", "phylum", "class", "order", "family", "genus", "species"]
def get_sample(df, pred_taxon, rank):
'''
Function to retrieve a sample image of the predicted taxon and GBIF or EOL page link for more info.
Parameters:
-----------
df : DataFrame
DataFrame with all sample images listed and their filepaths (in "file_path" column).
pred_taxon : str
Predicted taxon of the uploaded image.
rank : int
Index of rank in RANKS chosen for prediction.
Returns:
--------
img : PIL.Image
Sample image of predicted taxon for display.
ref_page : str
URL to GBIF or EOL page for the taxon (may be a lower rank, e.g., species sample).
'''
logger.info(f"Getting sample for taxon: {pred_taxon} at rank: {rank}")
try:
filepath, gbif_taxon_id, eol_page_id, full_name, is_exact = get_sample_data(df, pred_taxon, rank)
except Exception as e:
logger.error(f"Error retrieving sample data: {e}")
return None, f"We encountered the following error trying to retrieve a sample image: {e}."
if filepath is None:
logger.warning(f"No sample image found for taxon: {pred_taxon}")
return None, f"Sorry, our GBIF and EOL images do not include {pred_taxon}."
# Get sample image of selected individual
try:
img_src = s3_client.generate_presigned_url('get_object',
Params={'Bucket': 'treeoflife-200m-sample-images',
'Key': filepath}
)
img_resp = requests.get(img_src)
img = Image.open(io.BytesIO(img_resp.content))
if gbif_taxon_id:
gbif_url = GBIF_URL + gbif_taxon_id
if eol_page_id:
eol_url = EOL_URL + eol_page_id
if is_exact:
ref_page = f"<p>Check out the <a href={eol_url} target='_blank'>EOL</a> or <a href={gbif_url} target='_blank'>GBIF</a> entry for {pred_taxon} to learn more.</p>"
else:
ref_page = f"<p>Check out an example entry within {pred_taxon} to learn more: {full_name} at <a href={eol_url} target='_blank'>EOL</a> or <a href={gbif_url} target='_blank'>GBIF</a>.</p>"
else:
if is_exact:
ref_page = f"<p>Check out the <a href={gbif_url} target='_blank'>GBIF</a> entry for {pred_taxon} to learn more.</p>"
else:
ref_page = f"<p>Check out an example GBIF entry within {pred_taxon} to learn more: <a href={gbif_url} target='_blank'>{full_name}</a>.</p>"
else:
eol_url = EOL_URL + eol_page_id
if is_exact:
ref_page = f"<p>Check out the <a href={eol_url} target='_blank'>EOL</a> entry for {pred_taxon} to learn more.</p>"
else:
ref_page = f"<p>Check out an example EOL entry within {pred_taxon} to learn more: <a href={eol_url} target='_blank'>{full_name}</a>.</p>"
logger.info(f"Successfully retrieved sample image and page for {pred_taxon}")
return img, ref_page
except Exception as e:
logger.error(f"Error retrieving sample image: {e}")
return None, f"We encountered the following error trying to retrieve a sample image: {e}."
def get_sample_data(df, pred_taxon, rank):
'''
Function to randomly select a sample individual of the given taxon and provide associated native location.
Parameters:
-----------
df : DataFrame
DataFrame with all sample images listed and their filepaths (in "file_path" column).
pred_taxon : str
Predicted taxon of the uploaded image.
rank : int
Index of rank in RANKS chosen for prediction.
Returns:
--------
filepath : str
Filepath of selected sample image for predicted taxon.
gbif_taxon_id: str
GBIF page ID associated with predicted taxon for more information.
eol_page_id : str
EOL page ID associated with predicted taxon for more information.
full_name : str
Full taxonomic name of the selected sample.
is_exact : bool
Flag indicating if the match is exact (i.e., with empty lower ranks).
'''
for idx in range(rank + 1):
taxon = RANKS[idx]
target_taxon = pred_taxon.split(" ")[idx]
df = df.filter(pl.col(taxon) == target_taxon)
if df.shape[0] == 0:
return None, np.nan, "", False
# First, try to find entries with empty lower ranks
exact_df = df
for lower_rank in RANKS[rank + 1:]:
exact_df = exact_df.filter((pl.col(lower_rank).is_null()) | (pl.col(lower_rank) == ""))
if exact_df.shape[0] > 0:
df_filtered = exact_df.sample()
full_name = " ".join(df_filtered.select(RANKS[:rank+1]).row(0))
return df_filtered["file_path"][0], df_filtered["gbif_taxon_id"].cast(pl.String)[0], df_filtered["eol_page_id"].cast(pl.String)[0], full_name, True
# If no exact matches, return any entry with the specified rank
df_filtered = df.sample()
full_name = " ".join(df_filtered.select(RANKS[:rank+1]).row(0)) + " " + " ".join(df_filtered.select(RANKS[rank+1:]).row(0))
return df_filtered["file_path"][0], df_filtered["gbif_taxon_id"].cast(pl.String)[0], df_filtered["eol_page_id"].cast(pl.String)[0], full_name, False
|