added tesseract checks and debian install
Browse files- create_interest_areas_from_image.py +13 -4
- packages.txt +4 -4
create_interest_areas_from_image.py
CHANGED
@@ -3,19 +3,28 @@ import pandas as pd
|
|
3 |
import io
|
4 |
import csv
|
5 |
import os
|
|
|
6 |
|
7 |
if os.environ.get('TESSDATA_PREFIX') is None and os.name == 'nt':
|
8 |
-
os.environ['TESSDATA_PREFIX'] = 'C:/Program Files/Tesseract-OCR/tessdata/'
|
9 |
tessdata_prefix = 'C:/Program Files/Tesseract-OCR/tessdata/'
|
|
|
|
|
|
|
|
|
10 |
if os.environ.get('TESSDATA_PREFIX') is None and os.name != 'nt':
|
11 |
-
os.environ['TESSDATA_PREFIX'] = '/usr/share/tesseract-ocr/4.00/tessdata'
|
12 |
tessdata_prefix = '/usr/share/tesseract-ocr/4.00/tessdata'
|
|
|
|
|
|
|
|
|
13 |
|
14 |
import pytesseract
|
15 |
if os.name == 'nt':
|
16 |
-
|
|
|
17 |
else:
|
18 |
-
|
|
|
19 |
|
20 |
def recognize_text(image_path, tesseract_config='--psm 6 -l spa'):
|
21 |
"""
|
|
|
3 |
import io
|
4 |
import csv
|
5 |
import os
|
6 |
+
from pathlib import Path
|
7 |
|
8 |
if os.environ.get('TESSDATA_PREFIX') is None and os.name == 'nt':
|
|
|
9 |
tessdata_prefix = 'C:/Program Files/Tesseract-OCR/tessdata/'
|
10 |
+
if Path(tessdata_prefix).exists():
|
11 |
+
os.environ['TESSDATA_PREFIX'] = 'C:/Program Files/Tesseract-OCR/tessdata/'
|
12 |
+
else:
|
13 |
+
tessdata_prefix = None
|
14 |
if os.environ.get('TESSDATA_PREFIX') is None and os.name != 'nt':
|
|
|
15 |
tessdata_prefix = '/usr/share/tesseract-ocr/4.00/tessdata'
|
16 |
+
if Path(tessdata_prefix).exists():
|
17 |
+
os.environ['TESSDATA_PREFIX'] = '/usr/share/tesseract-ocr/4.00/tessdata'
|
18 |
+
else:
|
19 |
+
tessdata_prefix = None
|
20 |
|
21 |
import pytesseract
|
22 |
if os.name == 'nt':
|
23 |
+
if Path(r'c:/Program Files/Tesseract-OCR/tesseract.exe').exists():
|
24 |
+
pytesseract.pytesseract.tesseract_cmd = r'c:/Program Files/Tesseract-OCR/tesseract.exe'
|
25 |
else:
|
26 |
+
if Path(r'/usr/bin/tesseract').exists():
|
27 |
+
pytesseract.pytesseract.tesseract_cmd =r'/usr/bin/tesseract'
|
28 |
|
29 |
def recognize_text(image_path, tesseract_config='--psm 6 -l spa'):
|
30 |
"""
|
packages.txt
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
-
build-essential
|
2 |
-
curl
|
3 |
-
software-properties-common
|
4 |
libcairo2-dev
|
5 |
tesseract-ocr
|
6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
libcairo2-dev
|
2 |
tesseract-ocr
|
3 |
+
libtesseract-dev
|
4 |
+
tesseract-ocr-eng
|
5 |
+
tesseract-ocr-spa
|
6 |
+
tesseract-ocr-script-latn
|