hackergeek98 commited on
Commit
4f15486
·
verified ·
1 Parent(s): 9bf4b27

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +71 -1
README.md CHANGED
@@ -12,4 +12,74 @@ pipeline_tag: automatic-speech-recognition
12
  library_name: transformers
13
  tags:
14
  - medical
15
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  library_name: transformers
13
  tags:
14
  - medical
15
+ ---
16
+ ## how to use the model in colab:
17
+
18
+ # Install required packages
19
+ !pip install torch torchaudio transformers pydub google-colab
20
+
21
+ import torch
22
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
23
+ from pydub import AudioSegment
24
+ import os
25
+ from google.colab import files
26
+
27
+ # Load the model and processor
28
+ model_id = "hackergeek98/whisper-fa-tinyyy"
29
+ device = "cuda" if torch.cuda.is_available() else "cpu"
30
+
31
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id).to(device)
32
+ processor = AutoProcessor.from_pretrained(model_id)
33
+
34
+ # Create pipeline
35
+ whisper_pipe = pipeline(
36
+ "automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, device=0 if torch.cuda.is_available() else -1
37
+ )
38
+
39
+ # Convert audio to WAV format
40
+ def convert_to_wav(audio_path):
41
+ audio = AudioSegment.from_file(audio_path)
42
+ wav_path = "converted_audio.wav"
43
+ audio.export(wav_path, format="wav")
44
+ return wav_path
45
+
46
+ # Split long audio into chunks
47
+ def split_audio(audio_path, chunk_length_ms=30000): # Default: 30 sec per chunk
48
+ audio = AudioSegment.from_wav(audio_path)
49
+ chunks = [audio[i:i+chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)]
50
+ chunk_paths = []
51
+
52
+ for i, chunk in enumerate(chunks):
53
+ chunk_path = f"chunk_{i}.wav"
54
+ chunk.export(chunk_path, format="wav")
55
+ chunk_paths.append(chunk_path)
56
+
57
+ return chunk_paths
58
+
59
+ # Transcribe a long audio file
60
+ def transcribe_long_audio(audio_path):
61
+ wav_path = convert_to_wav(audio_path)
62
+ chunk_paths = split_audio(wav_path)
63
+ transcription = ""
64
+
65
+ for chunk in chunk_paths:
66
+ result = whisper_pipe(chunk)
67
+ transcription += result["text"] + "\n"
68
+ os.remove(chunk) # Remove processed chunk
69
+
70
+ os.remove(wav_path) # Cleanup original file
71
+
72
+ # Save transcription to a text file
73
+ text_path = "transcription.txt"
74
+ with open(text_path, "w") as f:
75
+ f.write(transcription)
76
+
77
+ return text_path
78
+
79
+ # Upload and process audio in Colab
80
+ uploaded = files.upload()
81
+ audio_file = list(uploaded.keys())[0]
82
+ transcription_file = transcribe_long_audio(audio_file)
83
+
84
+ # Download the transcription file
85
+ files.download(transcription_file)