Spaces:

Agents-MCP-Hackathon
/

LogosAI

Running

App Files Files Community

LogosAI / process /ocr.py

IvanMiao

fix: typo, error handle

01dc73d 6 days ago

raw

history blame

2.97 kB

	from mistralai import Mistral
	from mistralai.models import OCRResponse


	OCR_MODEL = "mistral-ocr-latest"
	CHAT_MODEL = "mistral-large-latest"


	def ocr_from_file(file_path, api_key: str, mode="image"):

	if not api_key:
	raise ValueError("Mistral API Key is required.")

	try:
	client = Mistral(api_key=api_key)
	except Exception as e:
	raise ValueError("API invalid.")

	uploaded_image = client.files.upload(
	file={
	"file_name": file_path,
	"content": open(file_path, "rb"),
	},
	purpose="ocr"
	)
	signed_url = client.files.get_signed_url(file_id=uploaded_image.id)

	if mode == "image":
	ocr_response = client.ocr.process(
	model=OCR_MODEL,
	document={
	"type": "image_url",
	"image_url": signed_url.url,
	},
	include_image_base64=True
	)
	elif mode == "pdf":
	ocr_response = client.ocr.process(
	model=OCR_MODEL,
	document={
	"type": "document_url",
	"document_url": signed_url.url,
	},
	include_image_base64=True
	)

	return ocr_response


	def get_combined_markdown(ocr_response: OCRResponse) -> str:

	markdowns: list[str] = []
	for page in ocr_response.pages:
	markdowns.append(page.markdown)

	return "\n\n".join(markdowns)


	def correct_text_with_ai(text: str, api_key: str) -> str:

	if not api_key:
	raise ValueError("Mistral API Key is required.")

	try:
	client = Mistral(api_key=api_key)
	except Exception as e:
	return f"ERROR: {str(e)}"

	response = client.chat.complete(
	model=CHAT_MODEL,
	messages=[
	{
	"role": "system",
	"content":
	"""You are an expert proofreader specializing in Markdown formatting and OCR error correction. Your task is to meticulously review provided Markdown text that has been generated via OCR.
	Your primary goal is to identify and correct typographical errors, spelling mistakes, and redundant symbols that are clearly a result of the OCR process.
	Additionally, you must correct any illogical or jumbled line breaks to ensure proper Markdown paragraph formatting.

	Crucially, you must NOT alter the original meaning or content of the text. Your corrections should be limited to:
	* Obvious OCR-induced spelling errors
	* Erroneous or redundant symbols
	* Markdown formatting errors
	* Jumbled or incorrect line breaks for proper paragraphing

	After your thorough review, output the carefully corrected Markdown text. JUST the text."""
	},
	{
	"role": "user",
	"content": text
	},
	],
	temperature=0.1,
	)
	return(response.choices[0].message.content)


	def perform_raw_ocr(input_file, api_key: str):
	if input_file != None:
	file_ext = input_file.name.split('.')[-1].lower()
	else:
	return "File/Text not found"

	if file_ext == "txt":
	with open(input_file, "r", encoding="utf-8") as f:
	return f.read()
	elif file_ext == "pdf":
	file_type = "pdf"
	else:
	file_type = "image"
	response = ocr_from_file(input_file, api_key, file_type)
	res_text = get_combined_markdown(response)
	return res_text