File size: 1,666 Bytes
8d9306e
 
 
 
 
 
 
 
 
 
f884ea7
 
9711ed9
 
 
f884ea7
 
 
fd6cb9f
 
 
8d9306e
d28411b
8d9306e
 
 
 
f705683
8d9306e
f705683
 
9a6a97f
f705683
8d9306e
3568832
9a6a97f
8d9306e
f705683
 
f4148f7
8d9306e
 
 
 
686f21e
8d9306e
d28411b
9a6a97f
310a439
 
8d9306e
686f21e
310a439
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import streamlit as st
from PIL import Image
import numpy as np


# Designing the interface
st.title("French Image Caption App")
# For newline
st.write('\n')

st.markdown(
    """
    An image caption model by combining the ViT model and a French GPT2 model. [Part of the [Huggingface JAX/Flax event](https://discuss.huggingface.co/t/open-to-the-community-community-week-using-jax-flax-for-nlp-cv/).]\n
    The pretained weights of both models are loaded, with a set of randomly initialized cross-attention weigths.
    The model is trained on 65000 images from the COCO dataset for about 1500 steps, with the original english cpationis are translated to french for training purpose.
    """
)

#image = Image.open('samples/val_000000039769.jpg')
#show = st.image(image, use_column_width=True)
#show.image(image, 'Preloaded Image', use_column_width=True)

with st.spinner('Loading and compiling ViT-GPT2 model ...'):

    from model import *
    st.sidebar.write(f'Vit-GPT2 model loaded :)')

st.sidebar.title("Select a sample image")

sample_name = st.sidebar.selectbox(
    "Please Choose the Model",
    sample_fns
)

sample_name = f"COCO_val2014_{sample_name.replace('.jpg', '').zfill(12)}.jpg"
sample_path = os.path.join(sample_dir, sample_name)

image = Image.open(sample_path)
show = st.image(image, use_column_width=True)
show.image(image, '\n\nSelected Image', use_column_width=True)

# For newline
st.sidebar.write('\n')

with st.spinner('Generating image caption ...'):

    caption = predict(image)
    image.close()
    # st.success(f'{caption}')
    st.header(f'{caption}')

st.sidebar.header("ViT-GPT2 predicts:")
st.sidebar.write(f"{caption}", '\n')