Spaces:

Arxived
/

search-patents-datewise

Sleeping

App Files Files Community

search-patents-datewise / app.py

DrishtiSharma

Create app.py

dfbdb14 verified 8 months ago

raw

history blame

6.38 kB

	import os
	import chromadb
	from datetime import datetime
	import streamlit as st
	from patentwiz import preprocess_data, qa_agent

	# Check if the API key is loaded
	api_key = os.getenv("OPENAI_API_KEY")
	if not api_key:
	st.error("OPENAI_API_KEY not found! Please set it in the environment variables or Hugging Face Secrets.")
	st.stop()

	# Clear ChromaDB cache to fix tenant issue
	chromadb.api.client.SharedSystemClient.clear_system_cache()

	PROMPT = """
	Task: Carefully review the provided patent text and extract all relevant technical information, specifically for RF devices, antennas, and related hardware. Focus on the following parameters:
	1. Physical Measurements:
	- Frequency, impedance, voltage, current, power, gain, bandwidth, radiation efficiency, and other measurable RF parameters.
	- For each measurement, provide the following details:
	- Substance or component being measured.
	- Specific value or range of the measurement.
	- Unit of measurement (if provided).
	- Measurement type or context (e.g., frequency, impedance, gain, etc.).
	2. Patent Metadata:
	- Title of the patent.
	- Abstract summarizing the technical focus.
	- Metadata, including:
	- Patent number.
	- Filing date.
	- Inventors.
	- Assignee (if applicable).
	### Output Format:
	The response should be formatted as a structured JSON object, as shown below:
	{
	"Patent_Title": "Title",
	"Patent_Abstract": "Abstract",
	"Patent_Metadata": {
	"Patent_Number": "Number",
	"Filing_Date": "Date",
	"Inventors": ["Name1", "Name2"],
	"Assignee": "Assignee Name"
	},
	"Content": [
	{
	"Measurement_substance": "substance",
	"Measured_value": "value",
	"Measured_unit": "unit",
	"measurement_type": "type"
	}
	// Additional measurements
	]
	}
	### Example:
	If the patent discusses "A novel RF power amplifier operating at 2.4 GHz with a bandwidth of 20 MHz and an output power of 30 dBm," the output should be:
	{
	"Patent_Title": "High-Efficiency RF Power Amplifier",
	"Patent_Abstract": "A novel RF power amplifier with improved impedance matching for wireless communication devices.",
	"Patent_Metadata": {
	"Patent_Number": "US12345678B2",
	"Filing_Date": "2024-06-20",
	"Inventors": ["Jane Doe", "John Smith"],
	"Assignee": "TechWave Inc."
	},
	"Content": [
	{
	"Measurement_substance": "RF power amplifier",
	"Measured_value": "2.4",
	"Measured_unit": "GHz",
	"measurement_type": "operating frequency"
	},
	{
	"Measurement_substance": "RF power amplifier",
	"Measured_value": "20",
	"Measured_unit": "MHz",
	"measurement_type": "bandwidth"
	},
	{
	"Measurement_substance": "RF power amplifier",
	"Measured_value": "30",
	"Measured_unit": "dBm",
	"measurement_type": "output power"
	}
	]
	}
	### Key Considerations:
	- Extract as much detailed information as possible based on the text.
	- Retain accuracy: Avoid inferring data not explicitly mentioned.
	- Follow the structured JSON format strictly for consistency.
	- Exclude any irrelevant or redundant information. Focus only on RF and related hardware technical data.
	"""


	# Title and description
	st.title("Technical Measurements Extractor for Patents")
	st.write(
	"Analyze patents to extract physical measurements such as frequency, bandwidth, and more. "
	"Provide a date range to download patents and analyze them using GPT models."
	)

	# User Input Section
	st.header("Enter Date Range for Patent Analysis")
	start_date_input = st.text_input("Enter the start date (YYYY-MM-DD):", value="2024-06-20")
	end_date_input = st.text_input("Enter the end date (YYYY-MM-DD):", value="2024-06-27")

	num_patents_to_analyze = st.number_input(
	"Number of patents to analyze:", min_value=1, value=3, step=1, help="Specify how many patents you want to analyze."
	)

	model_choice = st.selectbox(
	"Select a model for analysis:", ["gpt-3.5-turbo", "gpt-4"], help="Choose the OpenAI GPT model for the analysis."
	)

	logging_enabled = st.checkbox("Enable logging?", value=False, help="Toggle logging for debugging purposes.")

	# Run Analysis Button
	if st.button("Analyze Patents"):
	if not start_date_input or not end_date_input:
	st.error("Please enter both start and end dates!")
	else:
	try:
	# Parse date inputs
	start_date = datetime.strptime(start_date_input, "%Y-%m-%d")
	end_date = datetime.strptime(end_date_input, "%Y-%m-%d")

	# Validate date range
	if start_date > end_date:
	st.error("End date must be after start date!")
	st.stop()

	# Step 1: Download and preprocess patents
	with st.spinner("Downloading and extracting patents..."):
	saved_patent_names = preprocess_data.parse_and_save_patents(
	start_date, end_date, logging_enabled
	)
	if not saved_patent_names:
	st.error("No patents found for the given date range.")
	st.stop()
	st.success(f"{len(saved_patent_names)} patents found and processed!")

	# Step 2: Analyze patents using GPT
	random_patents = saved_patent_names[:num_patents_to_analyze]
	total_cost = 0
	results = []

	st.write("Starting patent analysis...")
	for i, patent_file in enumerate(random_patents):
	cost, output = qa_agent.call_QA_to_json(
	PROMPT,
	start_date.year, # Pass start_date year, month, and day
	start_date.month,
	start_date.day,
	saved_patent_names,
	i,
	logging_enabled,
	model_choice,
	)
	total_cost += cost
	results.append(output)

	# Step 3: Display results
	st.write(f"Total Cost: ${total_cost:.4f}")
	st.write("### Analysis Results:")
	for idx, result in enumerate(results):
	st.subheader(f"Patent {idx + 1}")
	st.json(result)

	except ValueError as ve:
	st.error(f"Invalid date format: {ve}")
	except Exception as e:
	st.error(f"An unexpected error occurred: {e}")