arjunanand13 commited on
Commit
4303813
·
verified ·
1 Parent(s): b35172a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -34
app.py CHANGED
@@ -2,6 +2,8 @@ import gradio as gr
2
  import os
3
  import whisper
4
  import cv2
 
 
5
  import json
6
  import tempfile
7
  import torch
@@ -87,13 +89,13 @@ class VideoClassifier:
87
  start = time.time()
88
  mp4_file = video_input
89
  video_name = mp4_file.split("/")[-1]
90
- wav_file = "audiotrack.wav"
91
  video_clip = VideoFileClip(mp4_file)
92
  audioclip = video_clip.audio
93
  wav_file = audioclip.write_audiofile(wav_file)
94
  audioclip.close()
95
  video_clip.close()
96
- audiotrack = "audiotrack.wav"
97
  result = self.whisper_model.transcribe(audiotrack, fp16=False)
98
  transcript = result["text"]
99
  print("TRANSCRIPT",transcript)
@@ -102,35 +104,33 @@ class VideoClassifier:
102
  time_taken_1 = round(end - start, 3)
103
  # print("Time taken from video to transcript:", time_taken_1)
104
 
105
-
106
  video = cv2.VideoCapture(video_input)
107
  length = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
108
- # print(f'There are {length} frames in this video')
109
- # args = self.parser.parse_args()
110
  no_of_frame = int(self.no_of_frames)
111
- temp_div = length//(no_of_frame)
112
  currentframe = 50
113
  caption_text = []
 
114
  for i in range(no_of_frame):
115
- # reading from frame
116
  video.set(cv2.CAP_PROP_POS_FRAMES, currentframe)
117
- ret,frame = video.read()
118
  if ret:
119
- content = self.img_cap.predict_step([frame])
120
- print("content",content)
 
 
 
 
 
 
 
121
  caption_text.append(content[0])
122
- currentframe += temp_div-1
123
  else:
124
  break
125
- def listToString(s):
126
- str1 = ", "
127
- return (str1.join(s))
128
- captions = listToString(caption_text)
129
- print("CAPTIONS",captions)
130
 
131
- end1 = time.time()
132
- time_taken_2 = round(end1 - end, 3)
133
- # print("Time taken from transcript to image_caption:", time_taken_2)
134
  video.release()
135
  cv2.destroyAllWindows()
136
 
@@ -142,23 +142,18 @@ class VideoClassifier:
142
 
143
 
144
  template1 = '''Given below are the different type of main video classes
145
-
146
  {main_categories}
147
-
148
  You are a text classifier that catergorises the transcript and captions into one main class whose context match with one main class and only generate main class name no need of sub classe or explanation.
149
  Give more importance to Transcript while classifying .
150
  Transcript: {transcript}
151
-
152
  Captions: {captions}
153
-
154
  Return only the answer chosen from list and nothing else
155
  Main-class => '''
156
 
157
  prompt1 = PromptTemplate(template=template1, input_variables=['main_categories', 'transcript', 'captions'])
158
  print("PROMPT 1",prompt1)
159
  prompt_text = template1.format(main_categories=main_categories, transcript=transcript, captions=captions)
160
- start2 = time.time()
161
- time_taken_3 = round(start2 - end1, 3)
162
  response = self.genai_model.generate_content(contents=prompt_text)
163
  main_class = response.text
164
 
@@ -203,15 +198,11 @@ class VideoClassifier:
203
  sub_categories = Path("sub_categories.txt").read_text()
204
 
205
  template2 = '''Given below are the sub classes of {main_class}.
206
-
207
  {sub_categories}
208
-
209
  You are a text classifier that catergorises the transcript and captions into one sub class whose context match with one sub class and only generate sub class name, Don't give explanation .
210
  Give more importance to Transcript while classifying .
211
  Transcript: {transcript}
212
-
213
  Captions: {captions}
214
-
215
  Return only the Sub-class answer chosen from list and nothing else
216
  Answer in the format:
217
  Main-class => {main_class}
@@ -220,14 +211,10 @@ class VideoClassifier:
220
 
221
  prompt2 = PromptTemplate(template=template2, input_variables=['sub_categories', 'transcript', 'captions','main_class'])
222
  prompt_text2 = template1.format(main_categories=main_categories, transcript=transcript, captions=captions)
223
- start2 = time.time()
224
- time_taken_3 = round(start2 - end1, 3)
225
  response = self.genai_model.generate_content(contents=prompt_text2)
226
  sub_class = response.text
227
  print("Preprocess Answer",sub_class)
228
- end2 = time.time()
229
- time_taken_predict = round(end2 - start2, 3)
230
- time_taken_total = round(end2 - start, 3)
231
  # print("Time taken by model to predict:", time_taken_predict)
232
  # print("Total time taken:", time_taken_total)
233
 
 
2
  import os
3
  import whisper
4
  import cv2
5
+ import io
6
+ from PIL import Image
7
  import json
8
  import tempfile
9
  import torch
 
89
  start = time.time()
90
  mp4_file = video_input
91
  video_name = mp4_file.split("/")[-1]
92
+ wav_file = "results/audiotrack.wav"
93
  video_clip = VideoFileClip(mp4_file)
94
  audioclip = video_clip.audio
95
  wav_file = audioclip.write_audiofile(wav_file)
96
  audioclip.close()
97
  video_clip.close()
98
+ audiotrack = "results/audiotrack.wav"
99
  result = self.whisper_model.transcribe(audiotrack, fp16=False)
100
  transcript = result["text"]
101
  print("TRANSCRIPT",transcript)
 
104
  time_taken_1 = round(end - start, 3)
105
  # print("Time taken from video to transcript:", time_taken_1)
106
 
 
107
  video = cv2.VideoCapture(video_input)
108
  length = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
 
 
109
  no_of_frame = int(self.no_of_frames)
110
+ temp_div = length // no_of_frame
111
  currentframe = 50
112
  caption_text = []
113
+
114
  for i in range(no_of_frame):
 
115
  video.set(cv2.CAP_PROP_POS_FRAMES, currentframe)
116
+ ret, frame = video.read()
117
  if ret:
118
+
119
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
120
+ image = Image.fromarray(frame)
121
+ img_byte_arr = io.BytesIO()
122
+ image.save(img_byte_arr, format='JPEG') # Save as JPEG or any other format your model supports
123
+ img_byte_arr.seek(0)
124
+
125
+ content = self.img_cap.predict_from_memory([img_byte_arr])
126
+ print("content", content)
127
  caption_text.append(content[0])
128
+ currentframe += temp_div - 1
129
  else:
130
  break
 
 
 
 
 
131
 
132
+ captions = ", ".join(caption_text)
133
+ print("CAPTIONS", captions)
 
134
  video.release()
135
  cv2.destroyAllWindows()
136
 
 
142
 
143
 
144
  template1 = '''Given below are the different type of main video classes
 
145
  {main_categories}
 
146
  You are a text classifier that catergorises the transcript and captions into one main class whose context match with one main class and only generate main class name no need of sub classe or explanation.
147
  Give more importance to Transcript while classifying .
148
  Transcript: {transcript}
 
149
  Captions: {captions}
 
150
  Return only the answer chosen from list and nothing else
151
  Main-class => '''
152
 
153
  prompt1 = PromptTemplate(template=template1, input_variables=['main_categories', 'transcript', 'captions'])
154
  print("PROMPT 1",prompt1)
155
  prompt_text = template1.format(main_categories=main_categories, transcript=transcript, captions=captions)
156
+
 
157
  response = self.genai_model.generate_content(contents=prompt_text)
158
  main_class = response.text
159
 
 
198
  sub_categories = Path("sub_categories.txt").read_text()
199
 
200
  template2 = '''Given below are the sub classes of {main_class}.
 
201
  {sub_categories}
 
202
  You are a text classifier that catergorises the transcript and captions into one sub class whose context match with one sub class and only generate sub class name, Don't give explanation .
203
  Give more importance to Transcript while classifying .
204
  Transcript: {transcript}
 
205
  Captions: {captions}
 
206
  Return only the Sub-class answer chosen from list and nothing else
207
  Answer in the format:
208
  Main-class => {main_class}
 
211
 
212
  prompt2 = PromptTemplate(template=template2, input_variables=['sub_categories', 'transcript', 'captions','main_class'])
213
  prompt_text2 = template1.format(main_categories=main_categories, transcript=transcript, captions=captions)
 
 
214
  response = self.genai_model.generate_content(contents=prompt_text2)
215
  sub_class = response.text
216
  print("Preprocess Answer",sub_class)
217
+
 
 
218
  # print("Time taken by model to predict:", time_taken_predict)
219
  # print("Total time taken:", time_taken_total)
220