devanghingu commited on
Commit
7fcd17e
·
verified ·
1 Parent(s): 2c88cb5

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +227 -0
  2. packages.txt +1 -0
  3. requirements.txt +9 -0
  4. s3_uploads.py +27 -0
app.py ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import copy
3
+ import os
4
+ import re
5
+ import subprocess
6
+ import tempfile
7
+ import base64
8
+ from pathlib import Path
9
+ import fitz
10
+ import gradio as gr
11
+ import time
12
+ import html
13
+ from openai import OpenAI
14
+ from s3_uploads import upload_to_s3
15
+ from environs import env
16
+
17
+
18
+ stop_generation = False
19
+
20
+ def stream_from_vllm(messages):
21
+ global stop_generation
22
+ client = OpenAI(
23
+ base_url="https://router.huggingface.co/v1",
24
+ api_key=env.str("HF_API_KEY"),
25
+ )
26
+
27
+
28
+ response = client.chat.completions.create(
29
+ model="THUDM/GLM-4.1V-9B-Thinking:novita",
30
+ messages=messages,
31
+ temperature=0.01,
32
+ stream=True,
33
+ max_tokens=8000
34
+ )
35
+
36
+ for chunk in response:
37
+ if stop_generation:
38
+ break
39
+
40
+ if chunk.choices and chunk.choices[0].delta:
41
+ delta = chunk.choices[0].delta
42
+ yield delta
43
+
44
+
45
+ class GLM4VModel:
46
+ def _strip_html(self, text: str) -> str:
47
+ return re.sub(r"<[^>]+>", "", text).strip()
48
+
49
+ def _wrap_text(self, text: str):
50
+ return [{"type": "text", "text": text}]
51
+
52
+ def _image_to_base64(self, image_path):
53
+ with open(image_path, "rb") as image_file:
54
+ encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
55
+ ext = Path(image_path).suffix.lower()
56
+ if ext in ['.jpg', '.jpeg']:
57
+ mime_type = 'image/jpeg'
58
+ elif ext == '.png':
59
+ mime_type = 'image/png'
60
+ elif ext == '.gif':
61
+ mime_type = 'image/gif'
62
+ elif ext == '.bmp':
63
+ mime_type = 'image/bmp'
64
+ elif ext in ['.tiff', '.tif']:
65
+ mime_type = 'image/tiff'
66
+ elif ext == '.webp':
67
+ mime_type = 'image/webp'
68
+ else:
69
+ mime_type = 'image/jpeg'
70
+
71
+ return f"data:{mime_type};base64,{encoded_string}"
72
+
73
+ def _pdf_to_imgs(self, pdf_path):
74
+ doc = fitz.open(pdf_path)
75
+ imgs = []
76
+ for i in range(doc.page_count):
77
+ pix = doc.load_page(i).get_pixmap(dpi=180)
78
+ img_p = os.path.join(tempfile.gettempdir(), f"{Path(pdf_path).stem}_{i}.png")
79
+ pix.save(img_p)
80
+ imgs.append(img_p)
81
+ doc.close()
82
+ return imgs
83
+
84
+ def _ppt_to_imgs(self, ppt_path):
85
+ tmp = tempfile.mkdtemp()
86
+ subprocess.run(
87
+ ["libreoffice", "--headless", "--convert-to", "pdf", "--outdir", tmp, ppt_path],
88
+ check=True,
89
+ )
90
+ pdf_path = os.path.join(tmp, Path(ppt_path).stem + ".pdf")
91
+ return self._pdf_to_imgs(pdf_path)
92
+
93
+ def _files_to_content(self, media):
94
+ out = []
95
+ for f in media or []:
96
+ ext = Path(f).suffix.lower()
97
+ if ext in [".mp4", ".avi", ".mkv", ".mov", ".wmv", ".flv", ".webm", ".mpeg", ".m4v"]:
98
+ out.append({"type": "video_url", "video_url": {"url": upload_to_s3(f)}})
99
+ elif ext in [".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".webp"]:
100
+ out.append({"type": "image_url", "image_url": {"url": upload_to_s3(f)}})
101
+ elif ext in [".ppt", ".pptx"]:
102
+ for p in self._ppt_to_imgs(f):
103
+ out.append({"type": "image_url", "image_url": {"url": upload_to_s3(p)}})
104
+ elif ext == ".pdf":
105
+ for p in self._pdf_to_imgs(f):
106
+ out.append({"type": "image_url", "image_url": {"url": upload_to_s3(p)}})
107
+ return out
108
+
109
+ def _stream_fragment(self, reasoning_content: str = "", content: str = "", skip_think: bool = True):
110
+ think_html = ""
111
+ answer_md = ""
112
+
113
+ if reasoning_content and not skip_think:
114
+ reasoning_content_clean = reasoning_content.strip()
115
+ think_html = (
116
+ "### 💭 Thinking\n"
117
+ "<details open>\n"
118
+ "<summary>Click to expand</summary>\n\n"
119
+ f"{reasoning_content_clean}\n"
120
+ "</details>\n"
121
+ )
122
+
123
+ if content:
124
+ answer_md = content.strip()
125
+
126
+ return think_html + "\n\n" + answer_md
127
+
128
+
129
+ def _build_messages(self, raw_hist, sys_prompt):
130
+ msgs = []
131
+ if sys_prompt.strip():
132
+ msgs.append({"role": "system", "content": [{"type": "text", "text": sys_prompt.strip()}]})
133
+ for h in raw_hist:
134
+ if h["role"] == "user":
135
+ msgs.append({"role": "user", "content": h["content"]})
136
+ else:
137
+ raw = re.sub(r"<details.*?</details>", "", h["content"], flags=re.DOTALL)
138
+ clean_content = self._strip_html(raw).strip()
139
+ if clean_content:
140
+ msgs.append({"role": "assistant", "content": self._wrap_text(clean_content)})
141
+ return msgs
142
+
143
+ def stream_generate(self, raw_hist, sys_prompt: str, *, skip_special_tokens: bool = False):
144
+ global stop_generation
145
+ stop_generation = False
146
+ msgs = self._build_messages(raw_hist, sys_prompt)
147
+ reasoning_buffer = ""
148
+ content_buffer = ""
149
+
150
+ try:
151
+ for delta in stream_from_vllm(msgs):
152
+ if stop_generation:
153
+ break
154
+ if hasattr(delta, 'reasoning_content') and delta.reasoning_content:
155
+ reasoning_buffer += delta.reasoning_content
156
+ elif hasattr(delta, 'content') and delta.content:
157
+ content_buffer += delta.content
158
+ else:
159
+ if isinstance(delta, dict):
160
+ if 'reasoning_content' in delta and delta['reasoning_content']:
161
+ reasoning_buffer += delta['reasoning_content']
162
+ if 'content' in delta and delta['content']:
163
+ content_buffer += delta['content']
164
+ elif hasattr(delta, 'content') and delta.content:
165
+ content_buffer += delta.content
166
+
167
+ yield self._stream_fragment(reasoning_buffer, content_buffer)
168
+
169
+ except Exception as e:
170
+ error_msg = f"Error during streaming: {str(e)}"
171
+ yield self._stream_fragment("", error_msg)
172
+
173
+
174
+ glm4v = GLM4VModel()
175
+
176
+ sys_prompt = """Instructions:
177
+ Extract only "BILL OF METERIAL" table containing columns same as it is!
178
+ colums: (POSITION, DESCRIPTION, N PIECES, MATERIAL (like SA 516 Gr.70N or SA 105 N), DIMENSIONS(like 1700 I.D. X 2045H 50 THK.), WT.Kgs
179
+
180
+ Ignore title blocks, revision notes, drawing numbers, and general annotations outside the "BILL OF METERIAL".
181
+ If a page contains multiple tables, extract only those explicitly related to BILL OF METERIAL.
182
+
183
+ Preserve the row and column structure as files.
184
+ Do not include any surrounding decorative lines or borders—only clean tabular data.
185
+ output format: markdown table format with following columns (POSITION, DESCRIPTION, N PIECES, MATERIAL, DIMENSIONS(like 1700 I.D. X 2045H 50 THK.) and WT.Kgs)"""
186
+
187
+ def extract_table_from_file(file):
188
+ if file is None:
189
+ return "Please upload a file."
190
+
191
+ payload = glm4v._files_to_content([file.name])
192
+ raw_hist = [{"role": "user", "content": payload}]
193
+
194
+ full_response = ""
195
+ yield "<h2>🌀 Processing...</h2>\n"
196
+ try:
197
+ for chunk in glm4v.stream_generate(raw_hist, sys_prompt):
198
+ full_response = chunk
199
+ yield full_response
200
+ except Exception as e:
201
+ yield f"<div style='color: red;'>Error: {html.escape(str(e))}</div>"
202
+
203
+ theme = gr.themes.Ocean(
204
+ primary_hue="gray",
205
+ )
206
+
207
+ with gr.Blocks(title="demo", theme=theme) as demo:
208
+ gr.Markdown(
209
+ "<div style='text-align:center; margin-bottom:20px;'><h1> PDF Extraction Demo</h1></div"
210
+ )
211
+ with gr.Row():
212
+ with gr.Column():
213
+ up = gr.File(label="Upload File", type="filepath")
214
+ format_selector = gr.Radio(choices=["CSV", "JSON"], label="Output Format", value="CSV")
215
+ submit_btn = gr.Button("Submit", variant="primary")
216
+ with gr.Column():
217
+ output_markdown = gr.Markdown(label="Extracted Table")
218
+
219
+ submit_btn.click(
220
+ extract_table_from_file,
221
+ inputs=[up],
222
+ outputs=[output_markdown],
223
+
224
+ )
225
+
226
+ if __name__ == "__main__":
227
+ demo.launch()
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ libreoffice
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ gradio==5.25.0
2
+ spaces>=0.37.1
3
+ PyMuPDF>=1.26.1
4
+ torchvision==0.20.1
5
+ torch==2.5.1
6
+ av>=14.4.0
7
+ openai
8
+ boto3
9
+ environs
s3_uploads.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import boto3
2
+ import uuid
3
+ from environs import env
4
+
5
+ AWS_SECRET_KEY=env.str("AWS_SECRET_KEY")
6
+ AWS_ACCESS_KEY=env.str("AWS_ACCESS_KEY")
7
+ BUCKET_NAME = env.str("BUCKET_NAME")
8
+ AWS_REGION = env.str("AWS_REGION")
9
+
10
+ AWS_USER=env.str("AWS_USER", default="default_user")
11
+ s3 = boto3.client(
12
+ 's3',
13
+ aws_access_key_id=AWS_ACCESS_KEY,
14
+ aws_secret_access_key=AWS_SECRET_KEY,
15
+ region_name=AWS_REGION
16
+ )
17
+
18
+ def upload_to_s3(file_path):
19
+ _file_path = file_path.split("/")[-1]
20
+ _file_path = _file_path.split(".")
21
+ _file_path[-2] = _file_path[-2]+"_" + str(uuid.uuid4())
22
+
23
+ s3_key = ".".join(_file_path)
24
+ s3.upload_file(file_path, BUCKET_NAME, s3_key)
25
+ file_path = f"https://{BUCKET_NAME}.s3.{AWS_REGION}.amazonaws.com/{s3_key}"
26
+
27
+ return file_path