Pavan147 commited on
Commit
dbef28c
·
verified ·
1 Parent(s): b85af28

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -55
app.py CHANGED
@@ -1,69 +1,69 @@
1
 
2
- import re
3
- import gradio as gr
4
- from transformers import AutoProcessor, AutoModelForImageTextToText
5
- from PIL import Image
6
 
7
- # Load model & processor once at startup
8
- processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
9
- model = AutoModelForImageTextToText.from_pretrained("ds4sd/SmolDocling-256M-preview")
10
 
11
- def smoldocling_readimage(image, prompt_text="Convert to docling"):
12
- messages = [
13
- {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": prompt_text}]}
14
- ]
15
- prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
16
- inputs = processor(text=prompt, images=[image], return_tensors="pt")
17
- outputs = model.generate(**inputs, max_new_tokens=1024)
18
- prompt_length = inputs.input_ids.shape[1]
19
- generated = outputs[:, prompt_length:]
20
- result = processor.batch_decode(generated, skip_special_tokens=False)[0]
21
- return result.replace("<end_of_utterance>", "").strip()
22
 
23
- def extract_numbers(docling_text):
24
- # Extract all floating numbers from the docling text using regex
25
- numbers = re.findall(r"[-+]?\d*\.\d+|\d+", docling_text)
26
- return list(map(float, numbers))
27
 
28
- def compare_outputs(img1, img2):
29
- # Extract docling text from both images
30
- output1 = smoldocling_readimage(img1)
31
- output2 = smoldocling_readimage(img2)
32
 
33
- # Extract numbers from both outputs
34
- nums1 = extract_numbers(output1)
35
- nums2 = extract_numbers(output2)
36
 
37
- # Compare numbers — find matching count based on position
38
- length = min(len(nums1), len(nums2))
39
- matches = sum(1 for i in range(length) if abs(nums1[i] - nums2[i]) < 1e-3)
40
 
41
- # Calculate similarity accuracy percentage
42
- total = max(len(nums1), len(nums2))
43
- accuracy = (matches / total) * 100 if total > 0 else 0
44
 
45
- # Prepare result text
46
- result_text = (
47
- f"Output for Image 1:\n{output1}\n\n"
48
- f"Output for Image 2:\n{output2}\n\n"
49
- f"Similarity Accuracy: {accuracy:.2f}%\n"
50
- f"Matching Values: {matches} out of {total}"
51
- )
52
- return result_text
53
 
54
- # Gradio UI: take 2 images, output similarity report
55
- demo = gr.Interface(
56
- fn=compare_outputs,
57
- inputs=[
58
- gr.Image(type="pil", label="Upload Image 1"),
59
- gr.Image(type="pil", label="Upload Image 2"),
60
- ],
61
- outputs="text",
62
- title="SmolDocling Image Comparison",
63
- description="Upload two document images. This app extracts data from both and compares similarity."
64
- )
65
 
66
- demo.launch()
67
 
68
 
69
  import re
 
1
 
2
+ # import re
3
+ # import gradio as gr
4
+ # from transformers import AutoProcessor, AutoModelForImageTextToText
5
+ # from PIL import Image
6
 
7
+ # # Load model & processor once at startup
8
+ # processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
9
+ # model = AutoModelForImageTextToText.from_pretrained("ds4sd/SmolDocling-256M-preview")
10
 
11
+ # def smoldocling_readimage(image, prompt_text="Convert to docling"):
12
+ # messages = [
13
+ # {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": prompt_text}]}
14
+ # ]
15
+ # prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
16
+ # inputs = processor(text=prompt, images=[image], return_tensors="pt")
17
+ # outputs = model.generate(**inputs, max_new_tokens=1024)
18
+ # prompt_length = inputs.input_ids.shape[1]
19
+ # generated = outputs[:, prompt_length:]
20
+ # result = processor.batch_decode(generated, skip_special_tokens=False)[0]
21
+ # return result.replace("<end_of_utterance>", "").strip()
22
 
23
+ # def extract_numbers(docling_text):
24
+ # # Extract all floating numbers from the docling text using regex
25
+ # numbers = re.findall(r"[-+]?\d*\.\d+|\d+", docling_text)
26
+ # return list(map(float, numbers))
27
 
28
+ # def compare_outputs(img1, img2):
29
+ # # Extract docling text from both images
30
+ # output1 = smoldocling_readimage(img1)
31
+ # output2 = smoldocling_readimage(img2)
32
 
33
+ # # Extract numbers from both outputs
34
+ # nums1 = extract_numbers(output1)
35
+ # nums2 = extract_numbers(output2)
36
 
37
+ # # Compare numbers — find matching count based on position
38
+ # length = min(len(nums1), len(nums2))
39
+ # matches = sum(1 for i in range(length) if abs(nums1[i] - nums2[i]) < 1e-3)
40
 
41
+ # # Calculate similarity accuracy percentage
42
+ # total = max(len(nums1), len(nums2))
43
+ # accuracy = (matches / total) * 100 if total > 0 else 0
44
 
45
+ # # Prepare result text
46
+ # result_text = (
47
+ # f"Output for Image 1:\n{output1}\n\n"
48
+ # f"Output for Image 2:\n{output2}\n\n"
49
+ # f"Similarity Accuracy: {accuracy:.2f}%\n"
50
+ # f"Matching Values: {matches} out of {total}"
51
+ # )
52
+ # return result_text
53
 
54
+ # # Gradio UI: take 2 images, output similarity report
55
+ # demo = gr.Interface(
56
+ # fn=compare_outputs,
57
+ # inputs=[
58
+ # gr.Image(type="pil", label="Upload Image 1"),
59
+ # gr.Image(type="pil", label="Upload Image 2"),
60
+ # ],
61
+ # outputs="text",
62
+ # title="SmolDocling Image Comparison",
63
+ # description="Upload two document images. This app extracts data from both and compares similarity."
64
+ # )
65
 
66
+ # demo.launch()
67
 
68
 
69
  import re