sungo-ganpare commited on
Commit
b0b7186
·
1 Parent(s): d206e43

デモの説明文を更新し、長時間音声対応の最適化を強調。日本語に翻訳し、主な特長を明確化。

Browse files
Files changed (2) hide show
  1. app.py +3 -2
  2. app_wsl.py +14 -13
app.py CHANGED
@@ -528,7 +528,8 @@ def write_lrc(segments, path):
528
 
529
  article = (
530
  "<p style='font-size: 1.1em;'>"
531
- "This demo showcases <code><a href='https://huggingface.co/nvidia/parakeet-tdt-0.6b-v2'>parakeet-tdt-0.6b-v2</a></code>, a 600-million-parameter model designed for high-quality English speech recognition."
 
532
  "</p>"
533
  "<p><strong style='color: red; font-size: 1.2em;'>Key Features:</strong></p>"
534
  "<ul style='font-size: 1.1em;'>"
@@ -565,7 +566,7 @@ nvidia_theme = gr_themes.Default(
565
 
566
  with gr.Blocks(theme=nvidia_theme) as demo:
567
  model_display_name = MODEL_NAME.split('/')[-1] if '/' in MODEL_NAME else MODEL_NAME
568
- gr.Markdown(f"<h1 style='text-align: center; margin: 0 auto;'>Speech Transcription with {model_display_name}</h1>")
569
  gr.HTML(article)
570
 
571
  current_audio_path_state = gr.State(None)
 
528
 
529
  article = (
530
  "<p style='font-size: 1.1em;'>"
531
+ "This demo showcases <code><a href='https://huggingface.co/nvidia/parakeet-tdt-0.6b-v2'>parakeet-tdt-0.6b-v2</a></code>, a 600M-parameter model for high-quality English ASR.<br>"
532
+ "<em>Now optimised for long recordings (hours) with automatic chunking & memory control.</em>"
533
  "</p>"
534
  "<p><strong style='color: red; font-size: 1.2em;'>Key Features:</strong></p>"
535
  "<ul style='font-size: 1.1em;'>"
 
566
 
567
  with gr.Blocks(theme=nvidia_theme) as demo:
568
  model_display_name = MODEL_NAME.split('/')[-1] if '/' in MODEL_NAME else MODEL_NAME
569
+ gr.Markdown(f"<h1 style='text-align: center; margin: 0 auto;'>Speech Transcription&nbsp;with&nbsp;{model_display_name} <span style='font-size:0.6em;'>(Long-audio&nbsp;ready)</span></h1>")
570
  gr.HTML(article)
571
 
572
  current_audio_path_state = gr.State(None)
app_wsl.py CHANGED
@@ -554,24 +554,25 @@ def write_lrc(segments, path):
554
 
555
  article = (
556
  "<p style='font-size: 1.1em;'>"
557
- "This demo showcases <code><a href='https://huggingface.co/nvidia/parakeet-tdt-0.6b-v2'>parakeet-tdt-0.6b-v2</a></code>, a 600-million-parameter model designed for high-quality English speech recognition."
 
558
  "</p>"
559
- "<p><strong style='color: red; font-size: 1.2em;'>Key Features:</strong></p>"
560
  "<ul style='font-size: 1.1em;'>"
561
- " <li>Automatic punctuation and capitalization</li>"
562
- " <li>Accurate word-level timestamps (click on a segment in the table below to play it!)</li>"
563
- " <li>Character-level timestamps now available in the 'Character View' tab.</li>"
564
- " <li>Efficiently transcribes long audio segments (<strong>updated to support upto 3 hours</strong>) <small>(For even longer audios, see <a href='https://github.com/NVIDIA/NeMo/blob/main/examples/asr/asr_chunked_inference/rnnt/speech_to_text_buffered_infer_rnnt.py' target='_blank'>this script</a>)</small></li>"
565
- " <li>Robust performance on spoken numbers, and song lyrics transcription </li>"
566
  "</ul>"
567
  "<p style='font-size: 1.1em;'>"
568
- "This model is <strong>available for commercial and non-commercial use</strong>."
569
  "</p>"
570
  "<p style='text-align: center;'>"
571
- "<a href='https://huggingface.co/nvidia/parakeet-tdt-0.6b-v2' target='_blank'>🎙️ Learn more about the Model</a> | "
572
- "<a href='https://arxiv.org/abs/2305.05084' target='_blank'>📄 Fast Conformer paper</a> | "
573
- "<a href='https://arxiv.org/abs/2304.06795' target='_blank'>📚 TDT paper</a> | "
574
- "<a href='https://github.com/NVIDIA/NeMo' target='_blank'>🧑‍💻 NeMo Repository</a>"
575
  "</p>"
576
  )
577
 
@@ -591,7 +592,7 @@ nvidia_theme = gr_themes.Default(
591
 
592
  with gr.Blocks(theme=nvidia_theme) as demo:
593
  model_display_name = MODEL_NAME.split('/')[-1] if '/' in MODEL_NAME else MODEL_NAME
594
- gr.Markdown(f"<h1 style='text-align: center; margin: 0 auto;'>Speech Transcription with {model_display_name}</h1>")
595
  gr.HTML(article)
596
 
597
  current_audio_path_state = gr.State(None)
 
554
 
555
  article = (
556
  "<p style='font-size: 1.1em;'>"
557
+ "このデモは <code><a href='https://huggingface.co/nvidia/parakeet-tdt-0.6b-v2' target='_blank'>parakeet-tdt-0.6b-v2</a></code> "
558
+ "(約6億パラメータ)を用いた高精度な英語音声文字起こしを実演します。"
559
  "</p>"
560
+ "<p><strong style='color: red; font-size: 1.2em;'>主な特長:</strong></p>"
561
  "<ul style='font-size: 1.1em;'>"
562
+ " <li>自動句読点・大文字化</li>"
563
+ " <li>単語レベルのタイムスタンプ(下表クリックで該当区間を再生)</li>"
564
+ " <li>文字レベルのタイムスタンプ表示にも対応</li>"
565
+ " <li><strong>最長3時間</strong> の長時間音声を自動チャンク処理で効率的に文字起こし</li>"
566
+ " <li>数字や歌詞など発話の多様なケースに高いロバスト性</li>"
567
  "</ul>"
568
  "<p style='font-size: 1.1em;'>"
569
+ "商用・非商用ともに <strong>ライセンス制限なく利用可能</strong> です。"
570
  "</p>"
571
  "<p style='text-align: center;'>"
572
+ "<a href='https://huggingface.co/nvidia/parakeet-tdt-0.6b-v2' target='_blank'>🎙️ モデル詳細</a> | "
573
+ "<a href='https://arxiv.org/abs/2305.05084' target='_blank'>📄 Fast&nbsp;Conformer 論文</a> | "
574
+ "<a href='https://arxiv.org/abs/2304.06795' target='_blank'>📚 TDT 論文</a> | "
575
+ "<a href='https://github.com/NVIDIA/NeMo' target='_blank'>🧑‍💻 NeMo リポジトリ</a>"
576
  "</p>"
577
  )
578
 
 
592
 
593
  with gr.Blocks(theme=nvidia_theme) as demo:
594
  model_display_name = MODEL_NAME.split('/')[-1] if '/' in MODEL_NAME else MODEL_NAME
595
+ gr.Markdown(f"<h1 style='text-align: center; margin: 0 auto;'>長時間対応 音声文字起こし ({model_display_name})</h1>")
596
  gr.HTML(article)
597
 
598
  current_audio_path_state = gr.State(None)