Spaces:
Running
Running
init
Browse files
app.py
CHANGED
@@ -1,8 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
import os
|
3 |
from utils.demo import load_video, ctc_decode
|
4 |
from utils.two_stream_infer import load_model
|
5 |
-
import os
|
6 |
from scripts.extract_lip_coordinates import generate_lip_coordinates
|
7 |
import options as opt
|
8 |
|
@@ -16,46 +78,59 @@ st.info(
|
|
16 |
"The inference speed is very slow on Huggingface spaces due to it being processed entirely on CPU ",
|
17 |
icon="ℹ️",
|
18 |
)
|
|
|
19 |
# Generating a list of options or videos
|
20 |
-
options = os.listdir(os.path.join("app_input"))
|
21 |
selected_video = st.selectbox("Choose video", options)
|
22 |
|
|
|
|
|
|
|
|
|
|
|
23 |
col1, col2 = st.columns(2)
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
|
|
26 |
with col1:
|
27 |
file_path = os.path.join("app_input", selected_video)
|
28 |
video_name = selected_video.split(".")[0]
|
29 |
-
|
30 |
-
|
31 |
-
# Rendering inside of the app
|
32 |
-
video = open(f"{video_name}.mp4", "rb")
|
33 |
-
video_bytes = video.read()
|
34 |
-
st.video(video_bytes)
|
35 |
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
-
|
|
|
38 |
video, img_p, files = load_video(f"{video_name}.mp4", opt.device)
|
39 |
-
prediction_video = video
|
40 |
-
st.markdown(f"Frames Generated:\n{files}")
|
41 |
-
frames_generated = True
|
42 |
-
with col1, st.spinner("Generating Lip Landmark Coordinates"):
|
43 |
coordinates = generate_lip_coordinates(f"{video_name}_samples")
|
44 |
-
|
45 |
-
|
46 |
coordinates_generated = True
|
|
|
|
|
|
|
47 |
|
48 |
with col2:
|
49 |
st.info("Ready to make prediction!")
|
50 |
generate = st.button("Generate")
|
51 |
if generate:
|
52 |
-
with
|
53 |
y = model(
|
54 |
-
|
55 |
-
|
56 |
)
|
57 |
txt = ctc_decode(y[0])
|
58 |
st.text(txt[-1])
|
59 |
-
|
60 |
-
st.info("Author ©️ :
|
61 |
-
st.info("Made with ❤️
|
|
|
1 |
+
# import streamlit as st
|
2 |
+
# import os
|
3 |
+
# from utils.demo import load_video, ctc_decode
|
4 |
+
# from utils.two_stream_infer import load_model
|
5 |
+
# import os
|
6 |
+
# from scripts.extract_lip_coordinates import generate_lip_coordinates
|
7 |
+
# import options as opt
|
8 |
+
|
9 |
+
# st.set_page_config(layout="wide")
|
10 |
+
|
11 |
+
# model = load_model()
|
12 |
+
|
13 |
+
# st.title("Lipreading final year project Demo")
|
14 |
+
|
15 |
+
# st.info(
|
16 |
+
# "The inference speed is very slow on Huggingface spaces due to it being processed entirely on CPU ",
|
17 |
+
# icon="ℹ️",
|
18 |
+
# )
|
19 |
+
# # Generating a list of options or videos
|
20 |
+
# options = os.listdir(os.path.join("app_input"))
|
21 |
+
# selected_video = st.selectbox("Choose video", options)
|
22 |
+
|
23 |
+
# col1, col2 = st.columns(2)
|
24 |
+
|
25 |
+
|
26 |
+
# with col1:
|
27 |
+
# file_path = os.path.join("app_input", selected_video)
|
28 |
+
# video_name = selected_video.split(".")[0]
|
29 |
+
# os.system(f"ffmpeg -i {file_path} -vcodec libx264 {video_name}.mp4 -y")
|
30 |
+
|
31 |
+
# # Rendering inside of the app
|
32 |
+
# video = open(f"{video_name}.mp4", "rb")
|
33 |
+
# video_bytes = video.read()
|
34 |
+
# st.video(video_bytes)
|
35 |
+
|
36 |
+
|
37 |
+
# with col1, st.spinner("Splitting video into frames"):
|
38 |
+
# video, img_p, files = load_video(f"{video_name}.mp4", opt.device)
|
39 |
+
# prediction_video = video
|
40 |
+
# st.markdown(f"Frames Generated:\n{files}")
|
41 |
+
# frames_generated = True
|
42 |
+
# with col1, st.spinner("Generating Lip Landmark Coordinates"):
|
43 |
+
# coordinates = generate_lip_coordinates(f"{video_name}_samples")
|
44 |
+
# prediction_coordinates = coordinates
|
45 |
+
# st.markdown(f"Coordinates Generated:\n{coordinates}")
|
46 |
+
# coordinates_generated = True
|
47 |
+
|
48 |
+
# with col2:
|
49 |
+
# st.info("Ready to make prediction!")
|
50 |
+
# generate = st.button("Generate")
|
51 |
+
# if generate:
|
52 |
+
# with col2, st.spinner("Generating..."):
|
53 |
+
# y = model(
|
54 |
+
# prediction_video[None, ...].to(opt.device),
|
55 |
+
# prediction_coordinates[None, ...].to(opt.device),
|
56 |
+
# )
|
57 |
+
# txt = ctc_decode(y[0])
|
58 |
+
# st.text(txt[-1])
|
59 |
+
|
60 |
+
# st.info("Author ©️ : wissem karous ")
|
61 |
+
# st.info("Made with ❤️ ")
|
62 |
+
|
63 |
+
|
64 |
import streamlit as st
|
65 |
import os
|
66 |
from utils.demo import load_video, ctc_decode
|
67 |
from utils.two_stream_infer import load_model
|
|
|
68 |
from scripts.extract_lip_coordinates import generate_lip_coordinates
|
69 |
import options as opt
|
70 |
|
|
|
78 |
"The inference speed is very slow on Huggingface spaces due to it being processed entirely on CPU ",
|
79 |
icon="ℹ️",
|
80 |
)
|
81 |
+
|
82 |
# Generating a list of options or videos
|
83 |
+
options = sorted(os.listdir(os.path.join("app_input"))) # Ensure the list is sorted
|
84 |
selected_video = st.selectbox("Choose video", options)
|
85 |
|
86 |
+
# Find the index of the selected video and calculate the index of the next video
|
87 |
+
selected_index = options.index(selected_video)
|
88 |
+
next_video_index = (selected_index + 1) % len(options) # Ensures looping back to start
|
89 |
+
next_video = options[next_video_index]
|
90 |
+
|
91 |
col1, col2 = st.columns(2)
|
92 |
|
93 |
+
# Function to display video in a column
|
94 |
+
def display_video(column, video_path, video_name):
|
95 |
+
os.system(f"ffmpeg -i {video_path} -vcodec libx264 {video_name}.mp4 -y")
|
96 |
+
video = open(f"{video_name}.mp4", "rb")
|
97 |
+
video_bytes = video.read()
|
98 |
+
column.video(video_bytes)
|
99 |
|
100 |
+
# Displaying the selected video in the first column
|
101 |
with col1:
|
102 |
file_path = os.path.join("app_input", selected_video)
|
103 |
video_name = selected_video.split(".")[0]
|
104 |
+
display_video(col1, file_path, video_name)
|
|
|
|
|
|
|
|
|
|
|
105 |
|
106 |
+
# Displaying the next video in the second column
|
107 |
+
with col2:
|
108 |
+
next_file_path = os.path.join("app_input", next_video)
|
109 |
+
next_video_name = next_video.split(".")[0]
|
110 |
+
display_video(col2, next_file_path, next_video_name)
|
111 |
|
112 |
+
# Assuming further processing (like generating predictions) is only intended for the first (selected) video
|
113 |
+
with col1, st.spinner("Processing video..."):
|
114 |
video, img_p, files = load_video(f"{video_name}.mp4", opt.device)
|
|
|
|
|
|
|
|
|
115 |
coordinates = generate_lip_coordinates(f"{video_name}_samples")
|
116 |
+
# Assuming 'frames_generated' and 'coordinates_generated' are used for control flow or further processing
|
117 |
+
frames_generated = True
|
118 |
coordinates_generated = True
|
119 |
+
if frames_generated and coordinates_generated:
|
120 |
+
st.markdown(f"Frames Generated for {video_name}:\n{files}")
|
121 |
+
st.markdown(f"Coordinates Generated for {video_name}:\n{coordinates}")
|
122 |
|
123 |
with col2:
|
124 |
st.info("Ready to make prediction!")
|
125 |
generate = st.button("Generate")
|
126 |
if generate:
|
127 |
+
with st.spinner("Generating..."):
|
128 |
y = model(
|
129 |
+
video[None, ...].to(opt.device),
|
130 |
+
coordinates[None, ...].to(opt.device),
|
131 |
)
|
132 |
txt = ctc_decode(y[0])
|
133 |
st.text(txt[-1])
|
134 |
+
|
135 |
+
st.info("Author ©️ : Wissem Karous ")
|
136 |
+
st.info("Made with ❤️")
|