reab5555 commited on
Commit
0dfd8b5
·
verified ·
1 Parent(s): 3b48e31

Update processing.py

Browse files
Files changed (1) hide show
  1. processing.py +3 -10
processing.py CHANGED
@@ -2,10 +2,9 @@ import os
2
  import torch
3
  import math
4
  import time
5
- import shutil
6
  from moviepy.editor import VideoFileClip
7
  from pyannote.audio import Pipeline
8
- from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline, AutoTokenizer
9
  import librosa
10
  import datetime
11
  from collections import defaultdict
@@ -305,24 +304,18 @@ def process_input(input_file, progress=None):
305
  if file_extension == '.txt':
306
  with open(input_file.name, 'r', encoding='utf-8') as file:
307
  content = file.read()
308
- words, tokens = count_words_and_tokens(content)
309
- input_info = f"Text file processed. Words: {words}, Tokens: {tokens}"
310
  elif file_extension == '.pdf':
311
  loader = PyPDFLoader(input_file.name)
312
  pages = loader.load_and_split()
313
  content = '\n'.join([page.page_content for page in pages])
314
- words, tokens = count_words_and_tokens(content)
315
- input_info = f"PDF file processed. Words: {words}, Tokens: {tokens}"
316
  elif file_extension in ['.mp4', '.avi', '.mov']:
317
  safe_progress(0.2, desc="Processing video...")
318
  srt_path = process_video(input_file.name, os.environ.get('hf_secret'), "en")
319
  with open(srt_path, 'r', encoding='utf-8') as file:
320
  content = file.read()
321
- words, tokens = count_words_and_tokens(content)
322
- input_info = f"Video processed. Words: {words}, Tokens: {tokens}"
323
  os.remove(srt_path)
324
  else:
325
- return "Unsupported file format. Please upload a TXT, PDF, or video file.", None, None, None, None, None, None
326
 
327
  detected_language = detect_language(content)
328
 
@@ -337,5 +330,5 @@ def process_input(input_file, progress=None):
337
 
338
  safe_progress(1.0, desc="Analysis complete!")
339
 
340
- return ("Analysis complete!", execution_info, detected_language, input_info,
341
  attachments_answer, bigfive_answer, personalities_answer)
 
2
  import torch
3
  import math
4
  import time
 
5
  from moviepy.editor import VideoFileClip
6
  from pyannote.audio import Pipeline
7
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline, AutoTokenizer, AutoModelForCausalLM
8
  import librosa
9
  import datetime
10
  from collections import defaultdict
 
304
  if file_extension == '.txt':
305
  with open(input_file.name, 'r', encoding='utf-8') as file:
306
  content = file.read()
 
 
307
  elif file_extension == '.pdf':
308
  loader = PyPDFLoader(input_file.name)
309
  pages = loader.load_and_split()
310
  content = '\n'.join([page.page_content for page in pages])
 
 
311
  elif file_extension in ['.mp4', '.avi', '.mov']:
312
  safe_progress(0.2, desc="Processing video...")
313
  srt_path = process_video(input_file.name, os.environ.get('hf_secret'), "en")
314
  with open(srt_path, 'r', encoding='utf-8') as file:
315
  content = file.read()
 
 
316
  os.remove(srt_path)
317
  else:
318
+ return "Unsupported file format. Please upload a TXT, PDF, or video file.", None, None, None, None, None
319
 
320
  detected_language = detect_language(content)
321
 
 
330
 
331
  safe_progress(1.0, desc="Analysis complete!")
332
 
333
+ return ("Analysis complete!", execution_info, detected_language,
334
  attachments_answer, bigfive_answer, personalities_answer)