Michael Hu commited on
Commit
2477bc4
Β·
1 Parent(s): ee54430

update logging level

Browse files
Files changed (4) hide show
  1. app.py +7 -7
  2. utils/stt.py +5 -5
  3. utils/translation.py +3 -3
  4. utils/tts.py +3 -3
app.py CHANGED
@@ -26,12 +26,12 @@ from utils.tts_dummy import generate_speech
26
  # Hugging Face Spaces Setup Automation
27
  def setup_huggingface_space():
28
  """Automatically configure Hugging Face Space requirements"""
29
- logger.debug("Running Hugging Face space setup")
30
  st.sidebar.header("Space Configuration")
31
 
32
  try:
33
  subprocess.run(["espeak-ng", "--version"], check=True, capture_output=True)
34
- logger.debug("espeak-ng verification successful")
35
  except (FileNotFoundError, subprocess.CalledProcessError):
36
  logger.error("Missing espeak-ng dependency")
37
  st.sidebar.error("""
@@ -64,7 +64,7 @@ os.makedirs("temp/outputs", exist_ok=True)
64
 
65
  def configure_page():
66
  """Set up Streamlit page configuration"""
67
- logger.debug("Configuring Streamlit page")
68
  st.set_page_config(
69
  page_title="Audio Translator",
70
  page_icon="🎧",
@@ -93,7 +93,7 @@ def handle_file_processing(upload_path):
93
 
94
  try:
95
  # STT Phase
96
- logger.debug("Beginning STT processing")
97
  status_text.markdown("πŸ” **Performing Speech Recognition...**")
98
  with st.spinner("Initializing Whisper model..."):
99
  english_text = transcribe_audio(upload_path)
@@ -101,7 +101,7 @@ def handle_file_processing(upload_path):
101
  logger.info(f"STT completed. Text length: {len(english_text)} characters")
102
 
103
  # Translation Phase
104
- logger.debug("Beginning translation")
105
  status_text.markdown("🌐 **Translating Content...**")
106
  with st.spinner("Loading translation model..."):
107
  chinese_text = translate_text(english_text)
@@ -109,7 +109,7 @@ def handle_file_processing(upload_path):
109
  logger.info(f"Translation completed. Translated length: {len(chinese_text)} characters")
110
 
111
  # TTS Phase
112
- logger.debug("Beginning TTS generation")
113
  status_text.markdown("🎡 **Generating Chinese Speech...**")
114
  with st.spinner("Initializing TTS engine..."):
115
  output_path = generate_speech(chinese_text, language="zh")
@@ -131,7 +131,7 @@ def handle_file_processing(upload_path):
131
 
132
  def render_results(english_text, chinese_text, output_path):
133
  """Display processing results in organized columns"""
134
- logger.debug("Rendering results")
135
  st.divider()
136
 
137
  col1, col2 = st.columns([2, 1])
 
26
  # Hugging Face Spaces Setup Automation
27
  def setup_huggingface_space():
28
  """Automatically configure Hugging Face Space requirements"""
29
+ logger.info("Running Hugging Face space setup")
30
  st.sidebar.header("Space Configuration")
31
 
32
  try:
33
  subprocess.run(["espeak-ng", "--version"], check=True, capture_output=True)
34
+ logger.info("espeak-ng verification successful")
35
  except (FileNotFoundError, subprocess.CalledProcessError):
36
  logger.error("Missing espeak-ng dependency")
37
  st.sidebar.error("""
 
64
 
65
  def configure_page():
66
  """Set up Streamlit page configuration"""
67
+ logger.info("Configuring Streamlit page")
68
  st.set_page_config(
69
  page_title="Audio Translator",
70
  page_icon="🎧",
 
93
 
94
  try:
95
  # STT Phase
96
+ logger.info("Beginning STT processing")
97
  status_text.markdown("πŸ” **Performing Speech Recognition...**")
98
  with st.spinner("Initializing Whisper model..."):
99
  english_text = transcribe_audio(upload_path)
 
101
  logger.info(f"STT completed. Text length: {len(english_text)} characters")
102
 
103
  # Translation Phase
104
+ logger.info("Beginning translation")
105
  status_text.markdown("🌐 **Translating Content...**")
106
  with st.spinner("Loading translation model..."):
107
  chinese_text = translate_text(english_text)
 
109
  logger.info(f"Translation completed. Translated length: {len(chinese_text)} characters")
110
 
111
  # TTS Phase
112
+ logger.info("Beginning TTS generation")
113
  status_text.markdown("🎡 **Generating Chinese Speech...**")
114
  with st.spinner("Initializing TTS engine..."):
115
  output_path = generate_speech(chinese_text, language="zh")
 
131
 
132
  def render_results(english_text, chinese_text, output_path):
133
  """Display processing results in organized columns"""
134
+ logger.info("Rendering results")
135
  st.divider()
136
 
137
  col1, col2 = st.columns([2, 1])
utils/stt.py CHANGED
@@ -22,17 +22,17 @@ def transcribe_audio(audio_path):
22
 
23
  try:
24
  # Audio conversion
25
- logger.debug("Converting audio format")
26
  audio = AudioSegment.from_file(audio_path)
27
  processed_audio = audio.set_frame_rate(16000).set_channels(1)
28
  wav_path = audio_path.replace(".mp3", ".wav")
29
  processed_audio.export(wav_path, format="wav")
30
- logger.debug(f"Audio converted to: {wav_path}")
31
 
32
  # Model initialization
33
  logger.info("Loading Whisper model")
34
  device = "cuda" if torch.cuda.is_available() else "cpu"
35
- logger.debug(f"Using device: {device}")
36
 
37
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
38
  "openai/whisper-large-v3",
@@ -42,10 +42,10 @@ def transcribe_audio(audio_path):
42
  ).to(device)
43
 
44
  processor = AutoProcessor.from_pretrained("openai/whisper-large-v3")
45
- logger.debug("Model loaded successfully")
46
 
47
  # Processing
48
- logger.debug("Processing audio input")
49
  inputs = processor(
50
  wav_path,
51
  sampling_rate=16000,
 
22
 
23
  try:
24
  # Audio conversion
25
+ logger.info("Converting audio format")
26
  audio = AudioSegment.from_file(audio_path)
27
  processed_audio = audio.set_frame_rate(16000).set_channels(1)
28
  wav_path = audio_path.replace(".mp3", ".wav")
29
  processed_audio.export(wav_path, format="wav")
30
+ logger.info(f"Audio converted to: {wav_path}")
31
 
32
  # Model initialization
33
  logger.info("Loading Whisper model")
34
  device = "cuda" if torch.cuda.is_available() else "cpu"
35
+ logger.info(f"Using device: {device}")
36
 
37
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
38
  "openai/whisper-large-v3",
 
42
  ).to(device)
43
 
44
  processor = AutoProcessor.from_pretrained("openai/whisper-large-v3")
45
+ logger.info("Model loaded successfully")
46
 
47
  # Processing
48
+ logger.info("Processing audio input")
49
  inputs = processor(
50
  wav_path,
51
  sampling_rate=16000,
utils/translation.py CHANGED
@@ -23,7 +23,7 @@ def translate_text(text):
23
  logger.info("Loading NLLB model")
24
  tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-3.3B")
25
  model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-3.3B")
26
- logger.debug("Translation model loaded")
27
 
28
  # Text processing
29
  max_chunk_length = 1000
@@ -32,7 +32,7 @@ def translate_text(text):
32
 
33
  translated_chunks = []
34
  for i, chunk in enumerate(text_chunks):
35
- logger.debug(f"Processing chunk {i+1}/{len(text_chunks)}")
36
  inputs = tokenizer(
37
  chunk,
38
  return_tensors="pt",
@@ -47,7 +47,7 @@ def translate_text(text):
47
  )
48
  translated = tokenizer.decode(outputs[0], skip_special_tokens=True)
49
  translated_chunks.append(translated)
50
- logger.debug(f"Chunk {i+1} translated successfully")
51
 
52
  result = "".join(translated_chunks)
53
  logger.info(f"Translation completed. Total length: {len(result)}")
 
23
  logger.info("Loading NLLB model")
24
  tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-3.3B")
25
  model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-3.3B")
26
+ logger.info("Translation model loaded")
27
 
28
  # Text processing
29
  max_chunk_length = 1000
 
32
 
33
  translated_chunks = []
34
  for i, chunk in enumerate(text_chunks):
35
+ logger.info(f"Processing chunk {i+1}/{len(text_chunks)}")
36
  inputs = tokenizer(
37
  chunk,
38
  return_tensors="pt",
 
47
  )
48
  translated = tokenizer.decode(outputs[0], skip_special_tokens=True)
49
  translated_chunks.append(translated)
50
+ logger.info(f"Chunk {i+1} translated successfully")
51
 
52
  result = "".join(translated_chunks)
53
  logger.info(f"Translation completed. Total length: {len(result)}")
utils/tts.py CHANGED
@@ -19,7 +19,7 @@ class TTSEngine:
19
  def __init__(self):
20
  logger.info("Initializing TTS Engine")
21
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
22
- logger.debug(f"Using device: {self.device}")
23
  self._verify_model_files()
24
  logger.info("Loading Kokoro model")
25
  self.model = build_model(f"{MODEL_DIR}/kokoro-v0_19.pth", self.device)
@@ -56,7 +56,7 @@ class TTSEngine:
56
  logger.warning(f"Truncating long text ({len(text)} characters)")
57
  text = text[:495] + "[TRUNCATED]"
58
 
59
- logger.debug("Starting audio generation")
60
  audio, _ = generate_full(
61
  self.model,
62
  text,
@@ -66,7 +66,7 @@ class TTSEngine:
66
  )
67
 
68
  output_path = f"temp/outputs/output_{int(time.time())}.wav"
69
- logger.debug(f"Saving audio to {output_path}")
70
  AudioSegment(
71
  audio.numpy().tobytes(),
72
  frame_rate=24000,
 
19
  def __init__(self):
20
  logger.info("Initializing TTS Engine")
21
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
22
+ logger.info(f"Using device: {self.device}")
23
  self._verify_model_files()
24
  logger.info("Loading Kokoro model")
25
  self.model = build_model(f"{MODEL_DIR}/kokoro-v0_19.pth", self.device)
 
56
  logger.warning(f"Truncating long text ({len(text)} characters)")
57
  text = text[:495] + "[TRUNCATED]"
58
 
59
+ logger.info("Starting audio generation")
60
  audio, _ = generate_full(
61
  self.model,
62
  text,
 
66
  )
67
 
68
  output_path = f"temp/outputs/output_{int(time.time())}.wav"
69
+ logger.info(f"Saving audio to {output_path}")
70
  AudioSegment(
71
  audio.numpy().tobytes(),
72
  frame_rate=24000,