Spaces:
Sleeping
Sleeping
import gradio as gr | |
from PIL import Image, ImageDraw, ImageFont | |
import io | |
import base64 | |
import requests | |
import json | |
import threading | |
import uuid | |
import os | |
from datetime import datetime | |
import urllib.parse | |
# import openai | |
from dotenv import load_dotenv | |
load_dotenv() | |
hugging_face_api_key = os.getenv("HUGGING_FACE_API_KEY") | |
openai_api_key = os.getenv("OPENAI_API_KEY") | |
print (hugging_face_api_key) | |
print (openai_api_key) | |
# Set the API key for OpenAI | |
# openai.api_key = openai_api_key | |
import requests | |
openai_key = openai_api_key | |
user_prompt_examples =[ | |
"Find me the stars in the hayedes cluster.", | |
"Find me the stars in the orion cluster.", | |
"Find me the stars in the andromeda cluster.", | |
"Find me the stars in the milky way.", | |
"Find me the stars in the virgo cluster.", | |
"Find me the stars in the bootes cluster.", | |
"Find me the stars in the perseus cluster.", | |
"Find me the stars in the hydra cluster.", | |
"Find me the stars in the Pleiades cluster.", | |
"Retrieve stars within 50 light-years of Earth.", | |
"List all stars in the Orion Nebula.", | |
"Get data on stars with a radial velocity greater than 100 km/s.", | |
"Fetch all known white dwarfs in the Sirius star system.", | |
"Identify stars in the globular cluster Omega Centauri.", | |
"Display stars from the Andromeda Galaxy that are visible from Earth.", | |
"Find stars in the Scorpius constellation with a magnitude brighter than 5.", | |
"Search for stars with high proper motion in the Ursa Major group." | |
] | |
def talk_to_llm(user_prompt): | |
headers = { | |
"Content-Type": "application/json", | |
"Authorization": f"Bearer {openai_key}" | |
} | |
prompt_text = f''' | |
# As StarGateVR, your role is specialized in customizing ADQL (Astronomical Data Query Language) queries for astronomers. Your focus is particularly on integrating specific 'WHERE' clauses into a standard query template. We will put your WHERE clause into the completed query template. In some cases, you may need to add additional parameters to the SELECT clause. | |
# The query includes essential fields like source_id, positional data (RA, DEC), motion data (pmra, pmdec), light parameters, and placeholders for additional parameters. | |
# Customizing 'WHERE' Clause: Your primary task is to adapt the 'WHERE' clause to fit the user's specific astronomical requirements. This often involves filtering stars based on various criteria such as distance, location in the sky, brightness, etc. Always include, at a minimum, the SELECT and FROM clauses exactly as given in the template. Note that the WHERE clause must reference variables by the field name and not the "AS" name. There is a special case for the part of the SELECT that is " 'Gaia DR3 ' || source_id as source_id", in the WHERE clause this field should always be referred to by "source_id". | |
# Note that any parameter used in the WHERE clause should also be included in the SELECT clause. | |
# Here is an example of the WHERE clause: | |
# ``` | |
# WHERE (parallax >= 11.11 AND parallax_over_error>=20 AND | |
# astrometric_excess_noise<=2) | |
# ``` | |
# Bounds on Parallax: Always include bounds on parallax in the 'WHERE' clause. This is important as it helps in retrieving stars within a specified 3D region of space. | |
As StarGateVR, your role is specialized in customizing ADQL (Astronomical Data Query Language) queries for astronomers. Your focus is particularly on integrating specific 'WHERE' clauses into a standard query template, while maintaining the 'SELECT' and 'FROM' clauses exactly as provided. Here are the key features and instructions that define your functionality. | |
Standard Query Template: You always use one of the two "Query Template" (SampleGaiaArchiveQuery or StarGateVR MocaDB Query) attached, for your queries. If it isn't clear to you, ask the user if they want to access the Gaia or Moca database. This template includes essential fields like source_id, positional data (RA, DEC), motion data (pmra, pmdec), light parameters, and placeholders for additional parameters. You must ensure that all SELECT fields from this template are included in the query. | |
Customizing 'WHERE' Clause: Your primary task is to adapt the 'WHERE' clause to fit the user's specific astronomical requirements. This often involves filtering stars based on various criteria such as distance, location in the sky, brightness, etc. Always include the SELECT and FROM clauses exactly as given in the template. Note that the WHERE clause must reference variables by the field name and not the "AS" name. There is a special case for the part of the SELECT that is " '#' || source_id as source_id", in the WHERE clause this field should always be referred to by "source_id". | |
Bounds on Parallax: Always include bounds on parallax in the 'WHERE' clause. This is important as it helps in retrieving stars within a specified 3D region of space. | |
Gaia TAP service: Create a Gaia TAP URL encoding of the query. Include it your response in a text window. This means the URL should begin with "https://gea.esac.esa.int/tap-server/tap/sync?REQUEST=doQuery&LANG=ADQL&FORMAT=csv&QUERY=". Use a method of link creation that ensures strict adherence to URL encoding standards so that the embedded link is clickable. Notify the user if you see that the link may not work properly on certain browsers (Chrome, Safari, Edge, etc.). To formulate the GAIA TAP ADQL query, you encode it so it can be included in responses to web clients. This involves converting spaces to '+', ensuring the inclusion of '%27%23%27' for the 3 characters in the '#' entry in the query, and maintaining the overall integrity of the query in a URL format. Include the query encoding that it should return a CSV formatted file. It's crucial to note that all requests should be synchronous (sync), not asynchronous (async), to ensure real-time processing and immediate feedback. Always use the secure https URL, not http. When encoding ADQL queries for the GAIA TAP service, it's important to distinguish between actual plus signs ('+') in the query, which require URL encoding, and spaces, which should be replaced with a '+' character. This distinction is crucial for the correct interpretation of the query by the service. | |
Ensure Correct Database Schema References: Queries must use accurate and existing column names and table names as per the Gaia Archive database schema. Verify these details against the Gaia Archive documentation or schema browser to construct effective queries. In some cases data fields may be requested from the Hipparcos tables in the Gaia Archive, so also check for proper field names, as they vary between Gaia and Hipparcos. For example "ra" is "ra", but "dec" is "de". | |
Note that some users may refer to the database server call Moca or MocaDB. In that case use the StarGateVR MocaDB Query Template to model the query structure. In this case don't try to create the URL TAP version of the Query. | |
Query Validation Checklist: | |
1. Confirm that all fields from the SELECT section of the SampleGaiaArchiveQuery template are included. | |
2. Ensure the FROM clause specifies the correct data source as per the template. | |
3. Review the WHERE clause to include necessary conditions for data quality and relevance to the query objectives. | |
4. Verify URL encoding for the Gaia TAP service link to ensure all characters and spaces are correctly formatted. | |
5. Double-check that the query returns results in CSV format and is compatible with synchronous requests as required by the Gaia TAP service. | |
User-Friendly Communication: You are designed to interact with astronomers at the undergraduate level and higher, also with data analysts with varying levels of expertise in ADQL. This means you effectively communicate to ensure that the queries are accurate and clear to users of different skill levels. | |
Target Audience: Your role is to make the process of querying astronomical data more accessible and tailored to the specific needs of astronomers and researchers, ensuring that they can efficiently retrieve the data they require for their studies. | |
The json structure to return is | |
{{ | |
"reasoning": "<Think through what the user is asking for, and what you know about the GAIA DB and astrsonomy to create their request.>", | |
"where_clause": "<a properly formatted ADQL query that will return the stars the user is asking for>" | |
}} | |
The users prompt is "{user_prompt}" | |
''' | |
data = { | |
"model": "gpt-4-turbo", | |
"response_format": { "type": "json_object" }, | |
"messages": [ | |
{"role": "system", "content": prompt_text}, | |
], | |
"temperature": .6 | |
} | |
print ("PROMPT TEXT:\n") | |
print (prompt_text) | |
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=data) | |
response_json = response.json() | |
print(response_json) | |
output_json = json.loads(response_json['choices'][0]['message']['content']) | |
try: | |
where_clause = output_json['where_clause'] | |
reasoning = output_json['reasoning'] | |
print(f"dialog: {reasoning}") | |
print(f"query: {where_clause}") | |
# Update theme on the server | |
return reasoning, where_clause | |
except KeyError as e: | |
print(f"Key error: {e}") | |
return "Failed to generate query." | |
except json.JSONDecodeError: | |
print("JSON decoding failed") | |
return "Invalid response query." | |
def complete_query(partial_query): | |
query_template = f''' | |
SELECT TOP 300000 | |
-- IMPORTANT NOTE: Parameters that are in units of Magnitude must have an "as" name that ends in "_mag" | |
--Required parameters | |
-- ID - force a leading hash symbol to stop Excel from reading the ID number as a float | |
'Gaia DR3 ' || source_id as source_id, | |
-- Measured Position | |
ra, | |
dec, | |
(1 / parallax)*1000 as dist, | |
-- Measured Motion | |
pmra, | |
pmdec, | |
radial_velocity as rv, | |
--Key source light params for HR diagram | |
phot_g_mean_mag, | |
bp_rp as bp_rp_mag, | |
--Optional plot parameters (you can add anything you want here, just give good unique "as" names - it will show up in the .cvs and hence in StarGate | |
phot_rp_mean_mag, | |
phot_bp_mean_mag, | |
g_rp as g_rp_mag, | |
bp_g as bp_g_mag, | |
radial_velocity_error as rv_error, | |
parallax_error, | |
-- Additional parameters that appear in the WHERE clause should be added here | |
-- Example: MH_GSPPhot | |
-- Note: No comma after this last SELECT item | |
parallax_over_error | |
-- Use DR3 | |
FROM gaiadr3.gaia_source | |
{partial_query} | |
''' | |
print ("completed query") | |
print (query_template) | |
print (" ") | |
return query_template | |
def download_url_from_query(query, user_prompt): | |
# Create the TAP URL | |
tap_url = "https://gea.esac.esa.int/tap-server/tap/sync?REQUEST=doQuery&LANG=ADQL&FORMAT=csv&QUERY=" | |
query = urllib.parse.quote_plus(query) | |
tap_url = tap_url + query | |
print ("downloading from tap") | |
filename = user_prompt.replace(' ', '_').replace('.', '') + ".csv" | |
print (filename) | |
download_from_tap(tap_url, filename) | |
def create_markdown_url_from_query(query): | |
# Create the TAP URL | |
tap_url = "https://gea.esac.esa.int/tap-server/tap/sync?REQUEST=doQuery&LANG=ADQL&FORMAT=csv&QUERY=" | |
query = urllib.parse.quote_plus(query) | |
tap_url = tap_url + query | |
print (tap_url) | |
download_from_tap(tap_url, "output_file.csv") | |
markdown_link = download_data(tap_url) | |
return markdown_link | |
def download_from_tap(url, output_path): | |
try: | |
response = requests.get(url) | |
response.raise_for_status() # Raises an HTTPError for bad responses (4xx, 5xx) | |
with open(output_path, 'wb') as f: | |
f.write(response.content) | |
print(f"Data successfully downloaded to {output_path}.") | |
except requests.exceptions.HTTPError as err: | |
print(f"HTTP error occurred: {err}") # Handle specific HTTP errors | |
except Exception as err: | |
print(f"An error occurred: {err}") # Handle other possible errors | |
def download_data(tap_url): | |
return f"[Download Data]({tap_url})" | |
# Main function to process all queries | |
def process_queries(): | |
results = {} | |
for prompt in user_prompt_examples: | |
print (f"Processing prompt: {prompt}") | |
reasoning, where_clause = talk_to_llm(prompt) | |
query = complete_query(where_clause) | |
download_url_from_query(query, prompt) | |
return results | |
with gr.Blocks() as demo: | |
with gr.Row(): | |
user_prompt = gr.Textbox(label="Enter your query for the LLM", value="Find me the stars in the hayedes cluster.") | |
submit_btn = gr.Button("Ask LLM") | |
reasoning_output = gr.Textbox(label="Reasoning") | |
where_clause_output = gr.Textbox(label="Where Clause") | |
make_query_complete_bt = gr.Button("Complete Query") | |
complete_query_output = gr.Textbox(label="Complete Query") | |
make_query_complete_bt.click(fn=complete_query, inputs=where_clause_output, outputs=[complete_query_output]) | |
submit_btn.click(fn=talk_to_llm, inputs=user_prompt, outputs=[reasoning_output, where_clause_output]) | |
create_tap_url_bt = gr.Button("Create TAP URL") | |
download_data_output = gr.Markdown() | |
create_tap_url_bt.click(fn=create_markdown_url_from_query, inputs=complete_query_output, outputs=[download_data_output]) | |
#demo.launch(server_name="0.0.0.0", server_port=7861, share=True, debug=True) | |
demo.launch() | |
#process_queries() |