Spaces:

Agents-MCP-Hackathon
/

LogosAI

Running

File size: 2,970 Bytes

from mistralai import Mistral
from mistralai.models import OCRResponse


OCR_MODEL = "mistral-ocr-latest"
CHAT_MODEL = "mistral-large-latest"


def ocr_from_file(file_path, api_key: str, mode="image"):

	if not api_key:
		raise ValueError("Mistral API Key is required.")

	try:
		client = Mistral(api_key=api_key)
	except Exception as e:
		raise ValueError("API invalid.")

	uploaded_image = client.files.upload(
		file={
			"file_name": file_path,
			"content": open(file_path, "rb"),
		},
		purpose="ocr"
	)
	signed_url = client.files.get_signed_url(file_id=uploaded_image.id)

	if mode == "image":
		ocr_response = client.ocr.process(
			model=OCR_MODEL,
			document={
				"type": "image_url",
				"image_url": signed_url.url,
			},
			include_image_base64=True
		)
	elif mode == "pdf":
		ocr_response = client.ocr.process(
			model=OCR_MODEL,
			document={
				"type": "document_url",
				"document_url": signed_url.url,
			},
			include_image_base64=True
		)

	return ocr_response


def get_combined_markdown(ocr_response: OCRResponse) -> str:

	markdowns: list[str] = []
	for page in ocr_response.pages:
		markdowns.append(page.markdown)

	return "\n\n".join(markdowns)


def correct_text_with_ai(text: str, api_key: str) -> str:

	if not api_key:
		raise ValueError("Mistral API Key is required.")

	try:
		client = Mistral(api_key=api_key)
	except Exception as e:
		return f"ERROR: {str(e)}"

	response = client.chat.complete(
		model=CHAT_MODEL,
		messages=[
			{
				"role": "system",
				"content":
					"""You are an expert proofreader specializing in Markdown formatting and OCR error correction. Your task is to meticulously review provided Markdown text that has been generated via OCR.
					Your primary goal is to identify and correct **typographical errors, spelling mistakes, and redundant symbols** that are clearly a result of the OCR process.
					Additionally, you must correct any illogical or jumbled line breaks to ensure proper Markdown paragraph formatting.

					**Crucially, you must NOT alter the original meaning or content of the text.** Your corrections should be limited to:
					* Obvious OCR-induced spelling errors
					* Erroneous or redundant symbols
					* Markdown formatting errors
					* Jumbled or incorrect line breaks for proper paragraphing

					After your thorough review, output the carefully corrected Markdown text. JUST the text."""
				},
			{
				"role": "user",
				"content": text
				},
		],
		temperature=0.1,
	)
	return(response.choices[0].message.content)


def perform_raw_ocr(input_file, api_key: str):
	if input_file != None:
		file_ext = input_file.name.split('.')[-1].lower()
	else:
		return "File/Text not found"

	if file_ext == "txt":
		with open(input_file, "r", encoding="utf-8") as f:
			return f.read()
	elif file_ext == "pdf":
		file_type = "pdf"
	else:
		file_type = "image"
	response = ocr_from_file(input_file, api_key, file_type)
	res_text = get_combined_markdown(response)
	return res_text