hugpv commited on
Commit
5f3a1bb
·
1 Parent(s): 2383dd4

added tesseract checks and debian install

Browse files
create_interest_areas_from_image.py CHANGED
@@ -3,19 +3,28 @@ import pandas as pd
3
  import io
4
  import csv
5
  import os
 
6
 
7
  if os.environ.get('TESSDATA_PREFIX') is None and os.name == 'nt':
8
- os.environ['TESSDATA_PREFIX'] = 'C:/Program Files/Tesseract-OCR/tessdata/'
9
  tessdata_prefix = 'C:/Program Files/Tesseract-OCR/tessdata/'
 
 
 
 
10
  if os.environ.get('TESSDATA_PREFIX') is None and os.name != 'nt':
11
- os.environ['TESSDATA_PREFIX'] = '/usr/share/tesseract-ocr/4.00/tessdata'
12
  tessdata_prefix = '/usr/share/tesseract-ocr/4.00/tessdata'
 
 
 
 
13
 
14
  import pytesseract
15
  if os.name == 'nt':
16
- pytesseract.pytesseract.tesseract_cmd = r'c:/Program Files/Tesseract-OCR/tesseract.exe'
 
17
  else:
18
- pytesseract.pytesseract.tesseract_cmd =r'/usr/bin/tesseract'
 
19
 
20
  def recognize_text(image_path, tesseract_config='--psm 6 -l spa'):
21
  """
 
3
  import io
4
  import csv
5
  import os
6
+ from pathlib import Path
7
 
8
  if os.environ.get('TESSDATA_PREFIX') is None and os.name == 'nt':
 
9
  tessdata_prefix = 'C:/Program Files/Tesseract-OCR/tessdata/'
10
+ if Path(tessdata_prefix).exists():
11
+ os.environ['TESSDATA_PREFIX'] = 'C:/Program Files/Tesseract-OCR/tessdata/'
12
+ else:
13
+ tessdata_prefix = None
14
  if os.environ.get('TESSDATA_PREFIX') is None and os.name != 'nt':
 
15
  tessdata_prefix = '/usr/share/tesseract-ocr/4.00/tessdata'
16
+ if Path(tessdata_prefix).exists():
17
+ os.environ['TESSDATA_PREFIX'] = '/usr/share/tesseract-ocr/4.00/tessdata'
18
+ else:
19
+ tessdata_prefix = None
20
 
21
  import pytesseract
22
  if os.name == 'nt':
23
+ if Path(r'c:/Program Files/Tesseract-OCR/tesseract.exe').exists():
24
+ pytesseract.pytesseract.tesseract_cmd = r'c:/Program Files/Tesseract-OCR/tesseract.exe'
25
  else:
26
+ if Path(r'/usr/bin/tesseract').exists():
27
+ pytesseract.pytesseract.tesseract_cmd =r'/usr/bin/tesseract'
28
 
29
  def recognize_text(image_path, tesseract_config='--psm 6 -l spa'):
30
  """
packages.txt CHANGED
@@ -1,6 +1,6 @@
1
- build-essential
2
- curl
3
- software-properties-common
4
  libcairo2-dev
5
  tesseract-ocr
6
- tesseract-ocr-spa
 
 
 
 
 
 
 
1
  libcairo2-dev
2
  tesseract-ocr
3
+ libtesseract-dev
4
+ tesseract-ocr-eng
5
+ tesseract-ocr-spa
6
+ tesseract-ocr-script-latn