Spaces:
Running
Running
File size: 2,489 Bytes
18f84ea 3083224 18f84ea 3083224 18f84ea 3083224 18f84ea 3083224 18f84ea 8c38015 18f84ea 8c38015 18f84ea 8c38015 18f84ea 8c38015 18f84ea 3083224 18f84ea 3083224 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
FROM openjdk:17-jdk-slim
WORKDIR /app
# 安装所有必要的工具和依赖
RUN apt-get update && apt-get install -y \
git \
curl \
wget \
gradle \
maven \
libreoffice \
poppler-utils \
tesseract-ocr \
tesseract-ocr-eng \
python3 \
python3-pip && \
pip3 install --no-cache-dir unoconv WeasyPrint pdf2image pillow && \
rm -rf /var/lib/apt/lists/*
# 下载Tesseract语言包
RUN mkdir -p /usr/share/tessdata && \
cd /usr/share/tessdata && \
wget -q https://github.com/tesseract-ocr/tessdata/raw/main/chi_sim.traineddata && \
wget -q https://github.com/tesseract-ocr/tessdata/raw/main/chi_tra.traineddata && \
wget -q https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata && \
wget -q https://github.com/tesseract-ocr/tessdata/raw/main/jpn.traineddata && \
wget -q https://github.com/tesseract-ocr/tessdata/raw/main/kor.traineddata && \
wget -q https://github.com/tesseract-ocr/tessdata/raw/main/rus.traineddata && \
wget -q https://github.com/tesseract-ocr/tessdata/raw/main/fra.traineddata && \
wget -q https://github.com/tesseract-ocr/tessdata/raw/main/deu.traineddata && \
wget -q https://github.com/tesseract-ocr/tessdata/raw/main/spa.traineddata && \
wget -q https://github.com/tesseract-ocr/tessdata/raw/main/ita.traineddata && \
wget -q https://github.com/tesseract-ocr/tessdata/raw/main/por.traineddata && \
wget -q https://github.com/tesseract-ocr/tessdata/raw/main/vie.traineddata && \
wget -q https://github.com/tesseract-ocr/tessdata/raw/main/tha.traineddata
# 在临时目录克隆并构建项目
RUN cd /tmp && \
git clone https://github.com/Stirling-Tools/Stirling-PDF.git stirling-pdf && \
cd stirling-pdf && \
./gradlew build && \
cp stirling-pdf/build/libs/*.jar /app/stirling-pdf.jar && \
cd / && \
rm -rf /tmp/stirling-pdf ~/.gradle /tmp/* && \
apt-get remove -y git gradle maven && \
apt-get autoremove -y && \
apt-get clean
# 创建必要的目录
RUN mkdir -p /app/logs /app/configs /app/customFiles && \
chmod -R 777 /app/logs /app/configs /app/customFiles /usr/share/tessdata
# 设置环境变量
ENV JAVA_TOOL_OPTIONS="-XX:MaxRAMPercentage=75"
ENV TESSDATA_PREFIX="/usr/share/tessdata"
# 暴露端口7860(Hugging Face Spaces默认端口)
EXPOSE 7860
# 启动应用程序
CMD ["java", "-Dserver.port=7860", "-Dserver.address=0.0.0.0", "-Dfile.encoding=UTF-8", "-jar", "/app/stirling-pdf.jar"] |