Vela
Created a PdfExtraction application with basic functionality
f7d4608
# from pydantic import BaseModel
# from openai import OpenAI
# from typing import List, Dict, Optional, Union
# client = OpenAI()
# class GHGParameter(BaseModel):
# parameter: str
# data_type: str
# synonyms: Optional[List[str]] = None
# uom: Optional[str] = None
# description: Optional[str] = None
# value: Union[int, str, None]
# class GHGCategory(BaseModel):
# category: str
# parameters: List[GHGParameter]
# SCHEMA = """{
# "Gas (GHG)": {
# "Total GHG Emissions": {
# "data_type": "Numeric",
# "synonyms": ["Carbon Footprint"],
# "uom": "Metric Tons CO₂e",
# "description": "Total greenhouse gases emitted by the organization.",
# "value": null
# }"""
# PROMPT = (f"""You are a PDF parsing agent.
# Fetch the following data from pdf : {SCHEMA}"""
# )
# def extract_emissions_data_as_json(api, model, file_input):
# if api.lower() == "openai":
# file = client.files.create(
# file=("uploaded.pdf", file_input),
# purpose="assistants"
# )
# completion = client.beta.chat.completions.parse(
# model="gpt-4o-2024-08-06",
# messages=[
# {
# "role": "user",
# "content": [
# {
# "type": "file",
# "file": {
# "file_id": file.id,
# }
# },
# {
# "type": "text",
# "text":PROMPT,
# },
# ]
# }
# ],
# response_format=GHGCategory,
# )
# research_paper = completion.choices[0].message.parsed
# return research_paper
# from pydantic import BaseModel
# from openai import OpenAI
# client = OpenAI()
# class CalendarEvent(BaseModel):
# name: str
# date: str
# participants: list[str]
# def extract_emissions_data_as_json(api, model, file_input):
# if api.lower() == "openai":
# file = client.files.create(
# file=("uploaded.pdf", file_input),
# purpose="assistants"
# )
# completion = client.beta.chat.completions.parse(
# model="gpt-4o-2024-08-06",
# messages=[
# {
# "role": "user",
# "content": [
# {
# "type": "file",
# "file": {
# "file_id": file.id,
# }
# },
# {
# "type": "text",
# "text":PROMPT,
# },
# ]
# }
# ],
# response_format=GHGCategory,
# )
# event = completion.choices[0].message.parsed
# response = client.chat.completions.create(
# model="gpt-4o-2024-08-06",
# messages=[
# {"role": "system", "content": "You are a helpful math tutor. Guide the user through the solution step by step."},
# {"role": "user", "content": "how can I solve 8x + 7 = -23"}
# ],
# response_format={
# "type": "json_schema",
# "json_schema": {
# "name": "GHGCategory",
# "schema": {
# "type": "object",
# "properties": {
# "steps": {
# "type": "array",
# "items": {
# "type": "object",
# "properties": {
# "explanation": {"type": "string"},
# "output": {"type": "string"}
# },
# "required": ["explanation", "output"],
# "additionalProperties": False
# }
# },
# "final_answer": {"type": "string"}
# },
# "required": ["steps", "final_answer"],
# "additionalProperties": False
# },
# "strict": True
# }
# }
# )
# print(response.choices[0].message.content)
# response = await async_client.responses.create(
# model="gpt-4o",
# input=[
# {
# "role": "user",
# "content": [
# {
# "type": "input_file",
# "file_id": uploaded_file.id,
# },
# {
# "type": "input_text",
# "text": """
# You are an intelligent PDF data extractor designed to extract structured information from Brand Books. A Brand Book contains guidelines and details about a brand's identity, including its logo, colors, typography, messaging, and more.
# Ensure the extracted data follows this schema strictly.
# Return the extracted brand information in JSON format with no explaination.
# For brand_logo and favicon, always provide a direct URL to the image instead of just the image name or a placeholder. If no valid URLs are found, return an empty array. """
# }
# ]
# }
# ],
# text={
# "format": {
# "type": "json_schema",
# "name": "BrandBook",
# "strict": True,
# "schema": {
# "type": "object",
# "properties": {
# "brand_url": {
# "type": "string",
# "description": "The URL associated with the brand."
# },
# "brand_name": {
# "type": "string",
# "description": "The name of the brand."
# },
# "brand_category": {
# "type": "array",
# "description": "A list of categories that the brand belongs to.",
# "items": {
# "type": "string"
# }
# },
# "brand_description": {
# "type": "string",
# "description": "A brief description of the brand."
# },
# "brand_colors": {
# "type": "array",
# "description": "A list of colors associated with the brand.",
# "items": {
# "type": "string"
# }
# },
# "brand_fonts": {
# "type": "array",
# "description": "A list of fonts used by the brand.",
# "items": {
# "type": "string"
# }
# },
# "brand_logo": {
# "type": "array",
# "description": "A list of logo urls associated with the brand.",
# "items": {
# "type": "string"
# }
# },
# "target_audience": {
# "type": "string",
# "description": "The target audience for the brand."
# },
# "competitors": {
# "type": "string",
# "description": "The competitors of the brand."
# },
# "aspirational_brands": {
# "type": "string",
# "description": "Brands that the brand aspires to be like."
# },
# "favicon": {
# "type": "array",
# "description": "A list of favicon URLs associated with the brand.",
# "items": {
# "type": "string"
# }
# }
# },
# "required": [
# "brand_url",
# "brand_name",
# "brand_category",
# "brand_description",
# "brand_colors",
# "brand_fonts",
# "brand_logo",
# "target_audience",
# "competitors",
# "aspirational_brands",
# "favicon"
# ],
# "additionalProperties": False
# }
# }
# }
# )