artificialguybr commited on
Commit
dbb32ab
·
verified ·
1 Parent(s): 9aaed47

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -56
app.py CHANGED
@@ -1,78 +1,108 @@
1
  import gradio as gr
2
  import json
 
3
  from PIL import Image
4
- # Assuming these imports work as expected, but you might need to adjust based on your actual package structure
5
- from surya.ocr import run_ocr
6
- from surya.detection import batch_detection
7
- from surya.model.detection.segformer import load_model as load_det_model, load_processor as load_det_processor
8
- from surya.model.recognition.model import load_model as load_rec_model
9
- from surya.model.recognition.processor import load_processor as load_rec_processor
10
- from surya.postprocessing.heatmap import draw_polys_on_image
11
 
12
- # Load models and processors with print statements to confirm loading
13
- print("Loading models and processors...")
14
- det_model, det_processor = load_det_model(), load_det_processor()
15
- rec_model, rec_processor = load_rec_model(), load_rec_processor()
16
- print("Models and processors loaded successfully.")
 
17
 
18
- # Load language codes
19
- print("Loading language codes...")
20
- with open("languages.json", "r") as file:
21
- languages = json.load(file)
22
- language_dict = {name: code for name, code in languages.items()}
23
- print(f"Loaded languages: {list(language_dict.keys())}")
24
-
25
- def ocr_function(img, lang_name):
26
- print(f"OCR Function Called with lang_name: {lang_name}")
27
- lang_code = language_dict[lang_name]
28
- print(f"Language Code: {lang_code}")
29
- # Ensure langs is a list of language codes, not a list of lists
30
- predictions = run_ocr([img], [lang_code], det_model, det_processor, rec_model, rec_processor) # Corrected
31
- print(f"Predictions: {predictions}")
32
- if predictions:
33
- img_with_text = draw_polys_on_image(predictions[0]["polys"], img)
34
- return img_with_text, predictions[0]["text"]
35
  else:
36
- return img, "No text detected"
37
-
38
-
39
- def text_line_detection_function(img):
40
- print("Text Line Detection Function Called")
41
- preds = batch_detection([img], det_model, det_processor)[0] # Assuming this returns a DetectionResult object
42
- print(f"Detection Predictions: {preds}")
43
 
44
- # Check if preds has an attribute 'bboxes' and use it
45
- if hasattr(preds, 'bboxes'):
46
- # Assuming draw_polys_on_image can work with the format of bboxes directly or you adapt it accordingly
47
- img_with_lines = draw_polys_on_image([bbox.polygon for bbox in preds.bboxes], img)
48
- return img_with_lines, preds
 
49
  else:
50
- raise AttributeError("DetectionResult object does not have 'bboxes' attribute")
51
-
 
 
 
 
 
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
 
54
  with gr.Blocks() as app:
55
- gr.Markdown("# Surya OCR and Text Line Detection")
56
  with gr.Tab("OCR"):
57
  with gr.Column():
58
- ocr_input_image = gr.Image(label="Input Image for OCR", type="pil")
59
- ocr_language_selector = gr.Dropdown(label="Select Language for OCR", choices=list(language_dict.keys()), value="English")
60
- ocr_run_button = gr.Button("Run OCR")
61
  with gr.Column():
62
- ocr_output_image = gr.Image(label="OCR Output Image", type="pil", interactive=False)
63
- ocr_text_output = gr.TextArea(label="Recognized Text")
64
 
65
- ocr_run_button.click(fn=ocr_function, inputs=[ocr_input_image, ocr_language_selector], outputs=[ocr_output_image, ocr_text_output])
66
 
67
- with gr.Tab("Text Line Detection"):
68
  with gr.Column():
69
- detection_input_image = gr.Image(label="Input Image for Detection", type="pil")
70
- detection_run_button = gr.Button("Run Text Line Detection")
71
  with gr.Column():
72
- detection_output_image = gr.Image(label="Detection Output Image", type="pil", interactive=False)
73
- detection_json_output = gr.JSON(label="Detection JSON Output")
74
 
75
- detection_run_button.click(fn=text_line_detection_function, inputs=detection_input_image, outputs=[detection_output_image, detection_json_output])
76
 
77
  if __name__ == "__main__":
78
  app.launch()
 
1
  import gradio as gr
2
  import json
3
+ import subprocess
4
  from PIL import Image
5
+ import os
6
+ import tempfile
 
 
 
 
 
7
 
8
+ # Função auxiliar para salvar imagem temporariamente e retornar o caminho
9
+ def save_temp_image(img):
10
+ temp_dir = tempfile.mkdtemp()
11
+ img_path = os.path.join(temp_dir, "input_image.png")
12
+ img.save(img_path)
13
+ return img_path, temp_dir
14
 
15
+ # Função para executar o OCR via linha de comando
16
+ def ocr_function_cli(img, lang_name):
17
+ img_path, temp_dir = save_temp_image(img)
18
+
19
+ # Substitua 'surya_ocr' pelo comando correto no seu sistema
20
+ command = f"surya_ocr {img_path} --langs {lang_name} --images --results_dir {temp_dir}"
21
+
22
+ # Executar o comando
23
+ subprocess.run(command, shell=True, check=True)
24
+
25
+ # Aqui você precisa ajustar os caminhos conforme a saída do seu comando
26
+ result_img_path = os.path.join(temp_dir, "image_with_text.png") # Ajuste conforme necessário
27
+ result_text_path = os.path.join(temp_dir, "results.json") # Ajuste conforme necessário
28
+
29
+ # Carregar a imagem resultante
30
+ if os.path.exists(result_img_path):
31
+ result_img = Image.open(result_img_path)
32
  else:
33
+ result_img = img # Retorna a imagem original se não encontrar a imagem processada
 
 
 
 
 
 
34
 
35
+ # Carregar o texto resultante
36
+ if os.path.exists(result_text_path):
37
+ with open(result_text_path, "r") as file:
38
+ result_text = json.load(file)
39
+ # Ajuste a extração do texto conforme o formato do seu JSON
40
+ text_output = "\n".join([str(page) for page in result_text.values()])
41
  else:
42
+ text_output = "No text detected"
43
+
44
+ # Limpeza
45
+ os.remove(img_path) # Remove a imagem temporária
46
+ # opcional: remover diretório temporário e seus conteúdos, se necessário
47
+
48
+ return result_img, text_output
49
 
50
+ # Função para detecção de linhas de texto via linha de comando
51
+ def text_line_detection_function_cli(img):
52
+ img_path, temp_dir = save_temp_image(img)
53
+
54
+ # Substitua 'surya_detect' pelo comando correto no seu sistema
55
+ command = f"surya_detect {img_path} --images --results_dir {temp_dir}"
56
+
57
+ # Executar o comando
58
+ subprocess.run(command, shell=True, check=True)
59
+
60
+ # Aqui você precisa ajustar os caminhos conforme a saída do seu comando
61
+ result_img_path = os.path.join(temp_dir, "image_with_lines.png") # Ajuste conforme necessário
62
+ result_json_path = os.path.join(temp_dir, "results.json") # Ajuste conforme necessário
63
+
64
+ # Carregar a imagem resultante
65
+ if os.path.exists(result_img_path):
66
+ result_img = Image.open(result_img_path)
67
+ else:
68
+ result_img = img # Retorna a imagem original se não encontrar a imagem processada
69
+
70
+ # Carregar os resultados JSON
71
+ if os.path.exists(result_json_path):
72
+ with open(result_json_path, "r") as file:
73
+ result_json = json.load(file)
74
+ else:
75
+ result_json = {"error": "No detection results found"}
76
+
77
+ # Limpeza
78
+ os.remove(img_path) # Remove a imagem temporária
79
+ # opcional: remover diretório temporário e seus conteúdos, se necessário
80
+
81
+ return result_img, result_json
82
 
83
+ # Interface Gradio
84
  with gr.Blocks() as app:
85
+ gr.Markdown("# Surya OCR e Detecção de Linhas de Texto via CLI")
86
  with gr.Tab("OCR"):
87
  with gr.Column():
88
+ ocr_input_image = gr.Image(label="Imagem de Entrada para OCR", type="pil")
89
+ ocr_language_selector = gr.Dropdown(label="Selecione o Idioma para OCR", choices=["English", "Portuguese"], value="English")
90
+ ocr_run_button = gr.Button("Executar OCR")
91
  with gr.Column():
92
+ ocr_output_image = gr.Image(label="Imagem de Saída do OCR", type="pil", interactive=False)
93
+ ocr_text_output = gr.TextArea(label="Texto Reconhecido")
94
 
95
+ ocr_run_button.click(fn=ocr_function_cli, inputs=[ocr_input_image, ocr_language_selector], outputs=[ocr_output_image, ocr_text_output])
96
 
97
+ with gr.Tab("Detecção de Linhas de Texto"):
98
  with gr.Column():
99
+ detection_input_image = gr.Image(label="Imagem de Entrada para Detecção", type="pil")
100
+ detection_run_button = gr.Button("Executar Detecção de Linhas de Texto")
101
  with gr.Column():
102
+ detection_output_image = gr.Image(label="Imagem de Saída da Detecção", type="pil", interactive=False)
103
+ detection_json_output = gr.JSON(label="Saída JSON da Detecção")
104
 
105
+ detection_run_button.click(fn=text_line_detection_function_cli, inputs=detection_input_image, outputs=[detection_output_image, detection_json_output])
106
 
107
  if __name__ == "__main__":
108
  app.launch()