Spaces:
Running
Running
FROM openjdk:17-jdk-slim | |
WORKDIR /app | |
# 安装所有必要的工具和依赖 | |
RUN apt-get update && apt-get install -y \ | |
git \ | |
curl \ | |
wget \ | |
gradle \ | |
maven \ | |
libreoffice \ | |
poppler-utils \ | |
tesseract-ocr \ | |
tesseract-ocr-eng \ | |
python3 \ | |
python3-pip && \ | |
pip3 install --no-cache-dir unoconv WeasyPrint pdf2image pillow && \ | |
rm -rf /var/lib/apt/lists/* | |
# 下载Tesseract语言包 | |
RUN mkdir -p /usr/share/tessdata && \ | |
cd /usr/share/tessdata && \ | |
wget -q https://github.com/tesseract-ocr/tessdata/raw/main/chi_sim.traineddata && \ | |
wget -q https://github.com/tesseract-ocr/tessdata/raw/main/chi_tra.traineddata && \ | |
wget -q https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata && \ | |
wget -q https://github.com/tesseract-ocr/tessdata/raw/main/jpn.traineddata && \ | |
wget -q https://github.com/tesseract-ocr/tessdata/raw/main/kor.traineddata && \ | |
wget -q https://github.com/tesseract-ocr/tessdata/raw/main/rus.traineddata && \ | |
wget -q https://github.com/tesseract-ocr/tessdata/raw/main/fra.traineddata && \ | |
wget -q https://github.com/tesseract-ocr/tessdata/raw/main/deu.traineddata && \ | |
wget -q https://github.com/tesseract-ocr/tessdata/raw/main/spa.traineddata && \ | |
wget -q https://github.com/tesseract-ocr/tessdata/raw/main/ita.traineddata && \ | |
wget -q https://github.com/tesseract-ocr/tessdata/raw/main/por.traineddata && \ | |
wget -q https://github.com/tesseract-ocr/tessdata/raw/main/vie.traineddata && \ | |
wget -q https://github.com/tesseract-ocr/tessdata/raw/main/tha.traineddata | |
# 在临时目录克隆并构建项目 | |
RUN cd /tmp && \ | |
git clone https://github.com/Stirling-Tools/Stirling-PDF.git stirling-pdf && \ | |
cd stirling-pdf && \ | |
./gradlew build && \ | |
cp stirling-pdf/build/libs/*.jar /app/stirling-pdf.jar && \ | |
cd / && \ | |
rm -rf /tmp/stirling-pdf ~/.gradle /tmp/* && \ | |
apt-get remove -y git gradle maven && \ | |
apt-get autoremove -y && \ | |
apt-get clean | |
# 创建必要的目录 | |
RUN mkdir -p /app/logs /app/configs /app/customFiles && \ | |
chmod -R 777 /app/logs /app/configs /app/customFiles /usr/share/tessdata | |
# 设置环境变量 | |
ENV JAVA_TOOL_OPTIONS="-XX:MaxRAMPercentage=75" | |
ENV TESSDATA_PREFIX="/usr/share/tessdata" | |
# 暴露端口7860(Hugging Face Spaces默认端口) | |
EXPOSE 7860 | |
# 启动应用程序 | |
CMD ["java", "-Dserver.port=7860", "-Dserver.address=0.0.0.0", "-Dfile.encoding=UTF-8", "-jar", "/app/stirling-pdf.jar"] |