ocrgpt / main.py
Nasma's picture
Create main.py
a952212 verified
raw
history blame
5.02 kB
import base64
import os
import io
import json
from fastapi import FastAPI, HTTPException, File, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from PyPDF2 import PdfReader
from PIL import Image
import fitz # PyMuPDF
from dotenv import load_dotenv
import openai
#from openai import OpenAI
# Load environment variables
load_dotenv()
openai.api_key = os.environ["OPENAI_API_KEY"]
#client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
if not openai.api_key:
raise RuntimeError("Missing OpenAI API key. Please set OPENAI_API_KEY in the environment variables.")
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
def vision(file_content):
pdf_document = fitz.open("pdf", file_content)
base64_images = []
vision_data = [ {
"type": "text",
"text": "extract the all text from this images",
}
]
# Convert PDF pages to images
for page_num in range(len(pdf_document)):
page = pdf_document.load_page(page_num)
pix = page.get_pixmap()
img_bytes = pix.tobytes("png")
img = Image.open(io.BytesIO(img_bytes))
# Convert the image to base64
buffered = io.BytesIO()
img.save(buffered, format="PNG")
img_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
base64_images.append(img_base64)
vision_data.append(
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{img_base64}"},
}
)
print("PDF pages converted to images successfully!")
# Use GPT-4 to process the images (textual representation)
try:
response = openai.ChatCompletion.create(
model="gpt-4o-mini",
messages=[
{
"role": "user",
"content": vision_data,
}
],
)
print(response.choices[0]["message"]["content"])
return response.choices[0]["message"]["content"]
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error generating response: {str(e)}")
@app.post("/get_ocr_data/")
def get_data(input_file: UploadFile = File(...)):
#try:
# Read the uploaded file
file_content = input_file.file.read()
file_type = input_file.content_type
text = ""
if file_type == "application/pdf":
pdf_reader = PdfReader(io.BytesIO(file_content))
for page in pdf_reader.pages:
text += page.extract_text()
if len(text.strip()): # If PDF text extraction is insufficient
print("\nvision running..........................\n")
text = vision(file_content)
else:
raise HTTPException(status_code=400, detail="Unsupported file type")
print(text.strip())
# Call GPT-4o to process extracted text into structured JSON
prompt = f"""This is CV data: {text.strip()}.
IMPORTANT: The output should be a JSON array! Make Sure the JSON is valid.if you not found data then fill with "none" don't add any extra explaition text
need only json
Example Output:
```json
[
"firstname": "firstname",
"lastname": "lastname",
"email": "email",
"contact_number": "contact number",
"home_address": "full home address",
"home_town": "home town or city",
"total_years_of_experience": "total years of experience",
"education": "Institution Name, Country, Degree Name, Graduation Year; Institution Name, Country, Degree Name, Graduation Year",
"LinkedIn_link": "LinkedIn link",
"experience": "experience",
"industry": "industry of work",
"skills": "skills(Identify and list specific skills mentioned in both the skills section and inferred from the experience section), formatted as: Skill 1, Skill 2, Skill 3, Skill 4, Skill 5",
"positions": ["Job title 1, Job title 2, Job title 3"]
]
```
"""
response = openai.ChatCompletion.create(
model="gpt-4o",
messages=[
{"role": "system", "content": """You are an assistant processing CV data and formatting it into structured JSON."""
},
{"role": "user", "content": prompt}
]
)
data = (response["choices"][0]["message"]["content"]).replace("json","").replace("```","")
print(data)
data = json.loads(data)
#data = response["choices"][0]["message"]["content"]
return {"data": data}
#except Exception as e:
#raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}")