Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
π¨
Browse filesSigned-off-by: peter szemraj <[email protected]>
- pdf2text.py +1 -4
pdf2text.py
CHANGED
|
@@ -4,11 +4,10 @@ pdf2text.py - convert pdf files to text files using OCR
|
|
| 4 |
"""
|
| 5 |
import logging
|
| 6 |
import os
|
| 7 |
-
import pprint as pp
|
| 8 |
import re
|
| 9 |
import shutil
|
| 10 |
import time
|
| 11 |
-
from datetime import date
|
| 12 |
from os.path import basename, dirname, join
|
| 13 |
from pathlib import Path
|
| 14 |
|
|
@@ -24,9 +23,7 @@ os.environ["USE_TORCH"] = "1"
|
|
| 24 |
from cleantext import clean
|
| 25 |
from doctr.io import DocumentFile
|
| 26 |
from doctr.models import ocr_predictor
|
| 27 |
-
from libretranslatepy import LibreTranslateAPI
|
| 28 |
from spellchecker import SpellChecker
|
| 29 |
-
from tqdm.auto import tqdm
|
| 30 |
|
| 31 |
|
| 32 |
def simple_rename(filepath, target_ext=".txt"):
|
|
|
|
| 4 |
"""
|
| 5 |
import logging
|
| 6 |
import os
|
|
|
|
| 7 |
import re
|
| 8 |
import shutil
|
| 9 |
import time
|
| 10 |
+
from datetime import date
|
| 11 |
from os.path import basename, dirname, join
|
| 12 |
from pathlib import Path
|
| 13 |
|
|
|
|
| 23 |
from cleantext import clean
|
| 24 |
from doctr.io import DocumentFile
|
| 25 |
from doctr.models import ocr_predictor
|
|
|
|
| 26 |
from spellchecker import SpellChecker
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
def simple_rename(filepath, target_ext=".txt"):
|