Spaces:

profchaos
/

OCR-APP

Sleeping

App Files Files Community

profchaos commited on Sep 28, 2024

Commit

40398bd

verified ·

1 Parent(s): a56e05c

Delete ocr_using_qwenvl_by_ps.py

Browse files

Files changed (1) hide show

ocr_using_qwenvl_by_ps.py +0 -119

ocr_using_qwenvl_by_ps.py DELETED Viewed

@@ -1,119 +0,0 @@
-# -*- coding: utf-8 -*-
-"""OCR_using_QwenVL_by_PS.ipynb
-Automatically generated by Colab.
-Original file is located at
-    https://colab.research.google.com/drive/1lNLVl8FzVRrSv4dMd9vXqnz8SYtKoebf
-"""
-# Import libraries
-import cv2
-from PIL import Image
-from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
-import torch
-from byaldi import RAGMultiModalModel
-from google.colab import files
-from IPython.display import display, HTML
-import os
-import re
-# to detect cuda(GPU)
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-print("Using device:", device)
-#loading models
-RAG = RAGMultiModalModel.from_pretrained("vidore/colpali", verbose=0)
-model = Qwen2VLForConditionalGeneration.from_pretrained(
-    "Qwen/Qwen2-VL-2B-Instruct",
-    torch_dtype=torch.float16,
-    device_map="auto"
-)
-processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
-torch.cuda.empty_cache()
-#Upload image
-# def upload_image():
-#     uploaded = files.upload()
-#     for filename in uploaded.keys():
-#         print(f'Uploaded file: {filename}')
-#         return filename
-# image_path = upload_image()
-# Preprocessing using OpenCV
-def preprocess_image(image_path):
-    image = cv2.imread(image_path)
-    if image is None:
-        raise FileNotFoundError(f"Image not found at the path: {image_path}")
-    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-    # Maintain aspect ratio
-    height, width = gray.shape
-    if height > width:
-        new_height = 1024
-        new_width = int((width / height) * new_height)
-    else:
-        new_width = 1024
-        new_height = int((height / width) * new_width)
-    resized_image = cv2.resize(gray, (new_width, new_height))
-    blurred = cv2.GaussianBlur(resized_image, (5, 5), 0)
-    thresholded = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
-    denoised = cv2.fastNlMeansDenoising(thresholded, h=30)
-    pil_image = Image.fromarray(denoised)
-    return pil_image
- # Call the function and store the result
-# pil_image = preprocess_image(image_path)
-# display(pil_image) # Now pil_image is accessible here
-#extract the text
-def extract_text(image_path):
-    try:
-        processed_image = preprocess_image(image_path)
-        messages = [
-            {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": "PLease extract the both hindi and english text as they appear in the image"}]}
-        ]
-        text_prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
-        inputs = processor(text=[text_prompt], images=[processed_image], padding=True, return_tensors="pt").to(device)
-        output_ids = model.generate(**inputs, max_new_tokens=1042)
-        generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(inputs.input_ids, output_ids)]
-        extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True)[0]
-        return extracted_text
-    except Exception as e:
-        return f"An error occurred during text extraction: {e}"
-#keyword searching
-def keyword_search(extracted_text, keywords):
-    if not keywords:
-        return extracted_text, "Please enter a keyword to search and highlight."
-    keywords = [keyword.strip() for keyword in keywords.split(",") if keyword.strip()]
-    highlighted_text = ""
-    lines = extracted_text.split('\n')
-    for line in lines:
-        for keyword in keywords:
-            pattern = re.compile(re.escape(keyword), re.IGNORECASE)
-            line = pattern.sub(lambda m: f'<span style="color: red;">{m.group()}</span>', line)
-        highlighted_text += line + '\n'
-    return highlighted_text
-#OCR and keyword search interface
-def ocr_interface(image):
-    image_path = "temp_image.png"
-    image.save(image_path)
-    extracted_text = extract_text(image_path)
-    os.remove(image_path)
-    return extracted_text, ""
-def keyword_interface(extracted_text, keywords):
-    highlighted_text = keyword_search(extracted_text, keywords)
-    return highlighted_text