takatorury commited on
Commit
18f84ea
·
verified ·
1 Parent(s): 3083224

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +39 -77
Dockerfile CHANGED
@@ -1,96 +1,58 @@
1
- # FROM openjdk:17-jdk-slim AS builder
2
-
3
- # WORKDIR /app
4
-
5
- # # 安装必要的构建工具
6
- # RUN apt-get update && apt-get install -y \
7
- # git \
8
- # curl \
9
- # gradle \
10
- # maven
11
-
12
- # # 克隆项目代码
13
- # RUN git clone https://github.com/Stirling-Tools/Stirling-PDF.git .
14
-
15
- # # 构建项目
16
- # RUN ./gradlew build
17
-
18
- # FROM openjdk:17-jdk-slim
19
-
20
- # WORKDIR /app
21
-
22
- # COPY --from=builder /app/build/libs/*.jar /app/stirling-pdf.jar
23
-
24
- # RUN mkdir -p /app/logs /app/configs /app/customFiles && \
25
- # chmod -R 777 /app/logs /app/configs /app/customFiles
26
-
27
- # # 安装运行时依赖
28
- # RUN apt-get update && apt-get install -y \
29
- # libreoffice \
30
- # poppler-utils \
31
- # tesseract-ocr \
32
- # tesseract-ocr-eng \
33
- # python3 \
34
- # python3-pip && \
35
- # pip3 install --no-cache-dir unoconv WeasyPrint pdf2image pillow && \
36
- # rm -rf /var/lib/apt/lists/*
37
-
38
- # # 设置环境变量
39
- # ENV JAVA_TOOL_OPTIONS="-XX:MaxRAMPercentage=75"
40
-
41
- # # 暴露端口 7860
42
- # EXPOSE 7860
43
-
44
- # # 启动应用程序
45
- # CMD ["java", "-Dserver.port=7860", "-Dserver.address=0.0.0.0", "-Dfile.encoding=UTF-8", "-jar", "/app/stirling-pdf.jar"]
46
- FROM openjdk:17-jdk-slim AS builder
47
 
48
  WORKDIR /app
49
 
 
50
  RUN apt-get update && apt-get install -y \
51
  git \
52
  curl \
 
53
  gradle \
54
- maven
55
-
56
- RUN git clone https://github.com/Stirling-Tools/Stirling-PDF.git .
57
-
58
- RUN ./gradlew build
59
-
60
- FROM openjdk:17-jdk-slim
61
-
62
- WORKDIR /app
63
-
64
- COPY --from=builder /app/build/libs/*.jar /app/stirling-pdf.jar
65
-
66
- RUN mkdir -p /app/logs /app/configs /app/customFiles /usr/share/tessdata && \
67
- chmod -R 777 /app/logs /app/configs /app/customFiles /usr/share/tessdata
68
-
69
- RUN apt-get update && apt-get install -y \
70
  libreoffice \
71
  poppler-utils \
72
  tesseract-ocr \
73
  tesseract-ocr-eng \
74
- wget \
75
  python3 \
76
  python3-pip && \
77
  pip3 install --no-cache-dir unoconv WeasyPrint pdf2image pillow && \
78
- cd /usr/share/tessdata && \
79
- wget https://github.com/tesseract-ocr/tessdata/raw/main/chi_sim.traineddata && \
80
- wget https://github.com/tesseract-ocr/tessdata/raw/main/chi_tra.traineddata && \
81
- wget https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata && \
82
- wget https://github.com/tesseract-ocr/tessdata/raw/main/jpn.traineddata && \
83
- wget https://github.com/tesseract-ocr/tessdata/raw/main/kor.traineddata && \
84
- wget https://github.com/tesseract-ocr/tessdata/raw/main/rus.traineddata && \
85
- wget https://github.com/tesseract-ocr/tessdata/raw/main/fra.traineddata && \
86
- wget https://github.com/tesseract-ocr/tessdata/raw/main/deu.traineddata && \
87
- wget https://github.com/tesseract-ocr/tessdata/raw/main/spa.traineddata && \
88
- wget https://github.com/tesseract-ocr/tessdata/raw/main/ita.traineddata && \
89
- wget https://github.com/tesseract-ocr/tessdata/raw/main/por.traineddata && \
90
- wget https://github.com/tesseract-ocr/tessdata/raw/main/vie.traineddata && \
91
- wget https://github.com/tesseract-ocr/tessdata/raw/main/tha.traineddata && \
92
  rm -rf /var/lib/apt/lists/*
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  EXPOSE 7860
95
 
 
96
  CMD ["java", "-Dserver.port=7860", "-Dserver.address=0.0.0.0", "-Dfile.encoding=UTF-8", "-jar", "/app/stirling-pdf.jar"]
 
1
+ FROM openjdk:17-jdk-slim
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  WORKDIR /app
4
 
5
+ # 安装所有必要的工具和依赖
6
  RUN apt-get update && apt-get install -y \
7
  git \
8
  curl \
9
+ wget \
10
  gradle \
11
+ maven \
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  libreoffice \
13
  poppler-utils \
14
  tesseract-ocr \
15
  tesseract-ocr-eng \
 
16
  python3 \
17
  python3-pip && \
18
  pip3 install --no-cache-dir unoconv WeasyPrint pdf2image pillow && \
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  rm -rf /var/lib/apt/lists/*
20
 
21
+ # 创建必要的目录
22
+ RUN mkdir -p /app/logs /app/configs /app/customFiles /usr/share/tessdata && \
23
+ chmod -R 777 /app/logs /app/configs /app/customFiles /usr/share/tessdata
24
+
25
+ # 下载Tesseract语言包
26
+ RUN cd /usr/share/tessdata && \
27
+ wget -q https://github.com/tesseract-ocr/tessdata/raw/main/chi_sim.traineddata && \
28
+ wget -q https://github.com/tesseract-ocr/tessdata/raw/main/chi_tra.traineddata && \
29
+ wget -q https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata && \
30
+ wget -q https://github.com/tesseract-ocr/tessdata/raw/main/jpn.traineddata && \
31
+ wget -q https://github.com/tesseract-ocr/tessdata/raw/main/kor.traineddata && \
32
+ wget -q https://github.com/tesseract-ocr/tessdata/raw/main/rus.traineddata && \
33
+ wget -q https://github.com/tesseract-ocr/tessdata/raw/main/fra.traineddata && \
34
+ wget -q https://github.com/tesseract-ocr/tessdata/raw/main/deu.traineddata && \
35
+ wget -q https://github.com/tesseract-ocr/tessdata/raw/main/spa.traineddata && \
36
+ wget -q https://github.com/tesseract-ocr/tessdata/raw/main/ita.traineddata && \
37
+ wget -q https://github.com/tesseract-ocr/tessdata/raw/main/por.traineddata && \
38
+ wget -q https://github.com/tesseract-ocr/tessdata/raw/main/vie.traineddata && \
39
+ wget -q https://github.com/tesseract-ocr/tessdata/raw/main/tha.traineddata
40
+
41
+ # 克隆并构建项目
42
+ RUN git clone https://github.com/Stirling-Tools/Stirling-PDF.git . && \
43
+ ./gradlew build && \
44
+ cp stirling-pdf/build/libs/*.jar /app/stirling-pdf.jar && \
45
+ rm -rf ~/.gradle /tmp/* && \
46
+ apt-get remove -y git gradle maven && \
47
+ apt-get autoremove -y && \
48
+ apt-get clean
49
+
50
+ # 设置环境变量
51
+ ENV JAVA_TOOL_OPTIONS="-XX:MaxRAMPercentage=75"
52
+ ENV TESSDATA_PREFIX="/usr/share/tessdata"
53
+
54
+ # 暴露端口7860(Hugging Face Spaces默认端口)
55
  EXPOSE 7860
56
 
57
+ # 启动应用程序
58
  CMD ["java", "-Dserver.port=7860", "-Dserver.address=0.0.0.0", "-Dfile.encoding=UTF-8", "-jar", "/app/stirling-pdf.jar"]