import base64
import os
import io
import json
from fastapi import FastAPI, HTTPException, File, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from PyPDF2 import PdfReader
from PIL import Image
import fitz  # PyMuPDF
from dotenv import load_dotenv
import openai
#from openai import OpenAI


# Load environment variables
load_dotenv()
openai.api_key = os.environ["OPENAI_API_KEY"]
#client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])

if not openai.api_key:
    raise RuntimeError("Missing OpenAI API key. Please set OPENAI_API_KEY in the environment variables.")

app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)


def vision(file_content):
    pdf_document = fitz.open("pdf", file_content)
    base64_images = []
    vision_data = [ {
                    "type": "text",
                    "text": "extract the all text from this images",
                    }
                  ]

    # Convert PDF pages to images
    for page_num in range(len(pdf_document)):
        page = pdf_document.load_page(page_num)
        pix = page.get_pixmap()
        img_bytes = pix.tobytes("png")
        img = Image.open(io.BytesIO(img_bytes))

        # Convert the image to base64
        buffered = io.BytesIO()
        img.save(buffered, format="PNG")
        img_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
        base64_images.append(img_base64)
        vision_data.append(
                        {
                            "type": "image_url",
                            "image_url": {"url": f"data:image/jpeg;base64,{img_base64}"},
                        }
                        )

    print("PDF pages converted to images successfully!")

    # Use GPT-4 to process the images (textual representation)
    try:
        response = openai.ChatCompletion.create(
            model="gpt-4o-mini",
            messages=[
                {
                    "role": "user",
                    "content": vision_data,
                }
            ],
        )

        print(response.choices[0]["message"]["content"])
        return response.choices[0]["message"]["content"]
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error generating response: {str(e)}")


@app.post("/get_ocr_data/")
def get_data(input_file: UploadFile = File(...)):
    #try:
        # Read the uploaded file
        file_content = input_file.file.read()
        file_type = input_file.content_type
        text = ""

        if file_type == "application/pdf":
            pdf_reader = PdfReader(io.BytesIO(file_content))
            for page in pdf_reader.pages:
                text += page.extract_text()

            if len(text.strip()):  # If PDF text extraction is insufficient
                print("\nvision running..........................\n")
                text = vision(file_content)
        else:
            raise HTTPException(status_code=400, detail="Unsupported file type")
        print(text.strip())
        # Call GPT-4o to process extracted text into structured JSON
        prompt = f"""This is CV data: {text.strip()}.
        IMPORTANT: The output should be a JSON array! Make Sure the JSON is valid.if you not found data then fill with "none" don't add any extra explaition text 
        need only json 
        Example Output:
        ```json
        [
           "firstname": "firstname",
           "lastname": "lastname",
           "email": "email",
           "contact_number": "contact number",
           "home_address": "full home address",
           "home_town": "home town or city",
           "total_years_of_experience": "total years of experience",
           "education": "Institution Name, Country, Degree Name, Graduation Year; Institution Name, Country, Degree Name, Graduation Year",
           "LinkedIn_link": "LinkedIn link",
           "experience": "experience",
           "industry": "industry of work",
           "skills": "skills(Identify and list specific skills mentioned in both the skills section and inferred from the experience section), formatted as: Skill 1, Skill 2, Skill 3, Skill 4, Skill 5",
           "positions": ["Job title 1, Job title 2, Job title 3"]   
        ]
        ```
        """

        response = openai.ChatCompletion.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": """You are an assistant processing CV data and formatting it into structured JSON."""
                },
                {"role": "user", "content": prompt}
            ]
        )
        data = (response["choices"][0]["message"]["content"]).replace("json","").replace("```","")
        print(data)
        data = json.loads(data)
        #data = response["choices"][0]["message"]["content"]
        
        return {"data": data}

    #except Exception as e:
        #raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}")