File size: 3,241 Bytes
3d1c35c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import gradio as gr
import cv2
import io
import pandas as pd

from LSBSteg import LSBSteg


def convert(file):
    print(f"Converting file {file}")
    in_img = cv2.imread(file, cv2.IMREAD_UNCHANGED)
    lsbsteg = LSBSteg(in_img)
    data = lsbsteg.decode_binary()
    bytes = io.BytesIO(data)
    dataframe = pd.read_parquet(bytes)

    # dataframe.to_csv('output.csv')
    return dataframe


with gr.Blocks() as demo:
    gr.Markdown("""
        ## Non-Suspicious image decoder

        This tool shows the extraction a dataframe hidden inside an image.

        There are a few ways to hide data into a PNG file, notably:
         * adding it after the end of the file (after the PNG IEND chunk), so that it gets
           ignored by image viewers
         * adding it as comments in the PNG file (tEXt chunks)

        These methods are kind of easy to spot! Also, a lot of software, browsers, image upload
         websites etc often just strip them.

        So, here, we have a different, more thoughtful (and arguably cooler) method.

        This class hides the data using a basic kind of **[steganography](https://en.wikipedia.org/wiki/Steganography)**:
         it hides it in the
         *least significant bits* of the raw (uncompressed) picture: tiny differences in the red, green and blue
         channel of the image encodes the data we're interested in.

        This means the resulting picture
         looks **very close to the original image**; and for the data we hide here, it is **inperceptible
         to the naked eye**.

        The resulting PNG file will probably get a little bit bigger as a result, since PNG uses compression,
         which will have a harder time when we have our stolen data injected into the image. This is
         not that much of a problem since it stays <100Ko, so it's not that noticeable.

        """)
    with gr.Row():
        im = gr.Image(label="Input image file", type="filepath")

        def preprocess(encoding: str) -> str:
            # We do our own preprocessing because gradio's deletes PNG metadata :(
            import tempfile
            import base64

            content = encoding.split(";")[1]
            image_encoded = content.split(",")[1]
            png_content = base64.b64decode(image_encoded)
            file_obj = tempfile.NamedTemporaryFile(
                delete=False,
                suffix=".input.png",
            )
            file_obj.write(png_content)
            return file_obj.name

        im.preprocess = preprocess
        df_out = gr.Dataframe(
            label="Output dataframe", max_rows=20, overflow_row_behaviour="paginate"
        )
        # file_out = gr.File(label="Full output CSV file")
    btn = gr.Button(value="Extract")
    gr.Markdown("Click on the example below to get the data from the associated colab notebook :)")
    gr.Examples(
        examples=["sample-picture.png"],
        inputs=[im],
        outputs=[df_out],
        fn=convert,
        cache_examples=True,
    )
    # demo = gr.Interface(convert, im, im_2)
    btn.click(convert, inputs=[im], outputs=[df_out])

    # example_img = os.path.join(os.path.dirname(__file__), "example-picture.png")

if __name__ == "__main__":
    demo.launch()