Spaces:
Sleeping
Sleeping
import os | |
import chromadb | |
from datetime import datetime | |
import streamlit as st | |
from patentwiz import preprocess_data, qa_agent | |
# Check if the API key is loaded | |
api_key = os.getenv("OPENAI_API_KEY") | |
if not api_key: | |
st.error("OPENAI_API_KEY not found! Please set it in the environment variables or Hugging Face Secrets.") | |
st.stop() | |
# Clear ChromaDB cache to fix tenant issue | |
chromadb.api.client.SharedSystemClient.clear_system_cache() | |
PROMPT = """ | |
Task: Carefully review the provided patent text and extract all relevant technical information, specifically for RF devices, antennas, and related hardware. Focus on the following parameters: | |
1. **Physical Measurements**: | |
- Frequency, impedance, voltage, current, power, gain, bandwidth, radiation efficiency, and other measurable RF parameters. | |
- For each measurement, provide the following details: | |
- Substance or component being measured. | |
- Specific value or range of the measurement. | |
- Unit of measurement (if provided). | |
- Measurement type or context (e.g., frequency, impedance, gain, etc.). | |
2. **Patent Metadata**: | |
- Title of the patent. | |
- Abstract summarizing the technical focus. | |
- Metadata, including: | |
- Patent number. | |
- Filing date. | |
- Inventors. | |
- Assignee (if applicable). | |
### Output Format: | |
The response should be formatted as a structured JSON object, as shown below: | |
{ | |
"Patent_Title": "Title", | |
"Patent_Abstract": "Abstract", | |
"Patent_Metadata": { | |
"Patent_Number": "Number", | |
"Filing_Date": "Date", | |
"Inventors": ["Name1", "Name2"], | |
"Assignee": "Assignee Name" | |
}, | |
"Content": [ | |
{ | |
"Measurement_substance": "substance", | |
"Measured_value": "value", | |
"Measured_unit": "unit", | |
"measurement_type": "type" | |
} | |
// Additional measurements | |
] | |
} | |
### Example: | |
If the patent discusses "A novel RF power amplifier operating at 2.4 GHz with a bandwidth of 20 MHz and an output power of 30 dBm," the output should be: | |
{ | |
"Patent_Title": "High-Efficiency RF Power Amplifier", | |
"Patent_Abstract": "A novel RF power amplifier with improved impedance matching for wireless communication devices.", | |
"Patent_Metadata": { | |
"Patent_Number": "US12345678B2", | |
"Filing_Date": "2024-06-20", | |
"Inventors": ["Jane Doe", "John Smith"], | |
"Assignee": "TechWave Inc." | |
}, | |
"Content": [ | |
{ | |
"Measurement_substance": "RF power amplifier", | |
"Measured_value": "2.4", | |
"Measured_unit": "GHz", | |
"measurement_type": "operating frequency" | |
}, | |
{ | |
"Measurement_substance": "RF power amplifier", | |
"Measured_value": "20", | |
"Measured_unit": "MHz", | |
"measurement_type": "bandwidth" | |
}, | |
{ | |
"Measurement_substance": "RF power amplifier", | |
"Measured_value": "30", | |
"Measured_unit": "dBm", | |
"measurement_type": "output power" | |
} | |
] | |
} | |
### Key Considerations: | |
- Extract as much detailed information as possible based on the text. | |
- Retain accuracy: Avoid inferring data not explicitly mentioned. | |
- Follow the structured JSON format strictly for consistency. | |
- Exclude any irrelevant or redundant information. Focus only on RF and related hardware technical data. | |
""" | |
# Title and description | |
st.title("Technical Measurements Extractor for Patents") | |
st.write( | |
"Analyze patents to extract physical measurements such as frequency, bandwidth, and more. " | |
"Provide a date range to download patents and analyze them using GPT models." | |
) | |
# User Input Section | |
st.header("Enter Date Range for Patent Analysis") | |
start_date_input = st.text_input("Enter the start date (YYYY-MM-DD):", value="2024-06-20") | |
end_date_input = st.text_input("Enter the end date (YYYY-MM-DD):", value="2024-06-27") | |
num_patents_to_analyze = st.number_input( | |
"Number of patents to analyze:", min_value=1, value=3, step=1, help="Specify how many patents you want to analyze." | |
) | |
model_choice = st.selectbox( | |
"Select a model for analysis:", ["gpt-3.5-turbo", "gpt-4"], help="Choose the OpenAI GPT model for the analysis." | |
) | |
logging_enabled = st.checkbox("Enable logging?", value=False, help="Toggle logging for debugging purposes.") | |
# Run Analysis Button | |
if st.button("Analyze Patents"): | |
if not start_date_input or not end_date_input: | |
st.error("Please enter both start and end dates!") | |
else: | |
try: | |
# Parse date inputs | |
start_date = datetime.strptime(start_date_input, "%Y-%m-%d") | |
end_date = datetime.strptime(end_date_input, "%Y-%m-%d") | |
# Validate date range | |
if start_date > end_date: | |
st.error("End date must be after start date!") | |
st.stop() | |
# Step 1: Download and preprocess patents | |
with st.spinner("Downloading and extracting patents..."): | |
saved_patent_names = preprocess_data.parse_and_save_patents( | |
start_date, end_date, logging_enabled | |
) | |
if not saved_patent_names: | |
st.error("No patents found for the given date range.") | |
st.stop() | |
st.success(f"{len(saved_patent_names)} patents found and processed!") | |
# Step 2: Analyze patents using GPT | |
random_patents = saved_patent_names[:num_patents_to_analyze] | |
total_cost = 0 | |
results = [] | |
st.write("Starting patent analysis...") | |
for i, patent_file in enumerate(random_patents): | |
cost, output = qa_agent.call_QA_to_json( | |
PROMPT, | |
start_date.year, # Pass start_date year, month, and day | |
start_date.month, | |
start_date.day, | |
saved_patent_names, | |
i, | |
logging_enabled, | |
model_choice, | |
) | |
total_cost += cost | |
results.append(output) | |
# Step 3: Display results | |
st.write(f"**Total Cost:** ${total_cost:.4f}") | |
st.write("### Analysis Results:") | |
for idx, result in enumerate(results): | |
st.subheader(f"Patent {idx + 1}") | |
st.json(result) | |
except ValueError as ve: | |
st.error(f"Invalid date format: {ve}") | |
except Exception as e: | |
st.error(f"An unexpected error occurred: {e}") |