naman1102 commited on
Commit
09b1a3d
·
1 Parent(s): 7fb0070
Files changed (2) hide show
  1. requirements.txt +1 -2
  2. tools.py +23 -34
requirements.txt CHANGED
@@ -8,5 +8,4 @@ openai
8
  pandas
9
  langchain_openai
10
  langchain_community
11
- pydub
12
- whisper
 
8
  pandas
9
  langchain_openai
10
  langchain_community
11
+ openai
 
tools.py CHANGED
@@ -86,55 +86,44 @@ import os
86
  from pydub import AudioSegment
87
  from pydub.utils import make_chunks
88
 
89
- _whisper_model = whisper.load_model("base")
90
 
91
 
 
 
 
 
 
92
  def audio_transcriber_tool(state: AgentState) -> AgentState:
93
  """
94
- LangGraph tool for transcribing audio via Whisper.
95
- Expects: state["audio_path"] to be a path to a .wav/.mp3/.m4a file.
96
  Returns:
97
- {
98
- "audio_path": None,
99
- "transcript": "<full transcribed text>"
100
- }
101
- If no valid audio_path is found, returns {} to signal "no-op."
102
  """
103
  path = state.get("audio_path", "")
104
  if not path or not os.path.exists(path):
105
  return {}
106
 
107
  try:
108
- # Whisper API has a ~25 MB limit per request. If file is small, transcribe directly.
109
- max_bytes = 25 * 1024 * 1024
110
- if os.path.getsize(path) <= max_bytes:
111
- result = _whisper_model.transcribe(path)
112
- text = result["text"].strip()
113
- else:
114
- # For large files, split into 2-minute (120 s) chunks
115
- audio = AudioSegment.from_file(path)
116
- chunk_length_ms = 120 * 1000
117
- chunks = make_chunks(audio, chunk_length_ms)
118
-
119
- transcripts = []
120
- for i, chunk in enumerate(chunks):
121
- chunk_name = f"temp_chunk_{i}.wav"
122
- chunk.export(chunk_name, format="wav")
123
- res = _whisper_model.transcribe(chunk_name)
124
- transcripts.append(res["text"].strip())
125
- os.remove(chunk_name)
126
- text = "\n".join(transcripts)
127
 
 
128
  except Exception as e:
129
  text = f"Error during transcription: {e}"
130
 
131
  return {
132
  "audio_path": None,
133
  "transcript": text
134
- }
135
-
136
-
137
-
138
-
139
-
140
-
 
86
  from pydub import AudioSegment
87
  from pydub.utils import make_chunks
88
 
 
89
 
90
 
91
+
92
+ import os
93
+ import openai
94
+ from state import AgentState
95
+
96
  def audio_transcriber_tool(state: AgentState) -> AgentState:
97
  """
98
+ LangGraph tool for transcribing audio via OpenAI’s hosted Whisper API.
99
+ Expects: state["audio_path"] to be a valid path to a .wav/.mp3/.m4a file.
100
  Returns:
101
+ {
102
+ "audio_path": None,
103
+ "transcript": "<transcribed text or error message>"
104
+ }
105
+ If no valid audio_path is provided, returns {}.
106
  """
107
  path = state.get("audio_path", "")
108
  if not path or not os.path.exists(path):
109
  return {}
110
 
111
  try:
112
+ openai.api_key = os.getenv("OPENAI_API_KEY")
113
+ if not openai.api_key:
114
+ raise RuntimeError("OPENAI_API_KEY is not set in environment.")
115
+
116
+ with open(path, "rb") as audio_file:
117
+ # For OpenAI Python library v0.27.0+:
118
+ response = openai.Audio.transcribe("whisper-1", audio_file)
119
+ # If using an older OpenAI library, use:
120
+ # response = openai.Audio.create_transcription(file=audio_file, model="whisper-1")
 
 
 
 
 
 
 
 
 
 
121
 
122
+ text = response["text"].strip()
123
  except Exception as e:
124
  text = f"Error during transcription: {e}"
125
 
126
  return {
127
  "audio_path": None,
128
  "transcript": text
129
+ }