# -*- coding: utf-8 -*- """OpenAI Whisper from Hugging Face Transformers with Microsoft PHI 3 Integration""" import gradio as gr from transformers import pipeline import torch from huggingface_hub import InferenceClient import os import librosa # Fetch the token from Hugging Face Secrets HF_API_TOKEN = os.getenv("HF_API_TOKEN", "") client = InferenceClient( "microsoft/phi-4", token=HF_API_TOKEN ) # Check if a GPU is available and use it if possible device = 'cuda' if torch.cuda.is_available() else 'cpu' # Initialize the Whisper pipeline whisper = pipeline('automatic-speech-recognition', model='openai/whisper-tiny', device=device) # Instructions (can be set through Hugging Face Secrets or hardcoded) instructions = os.getenv("INST", "Your default instructions here.") def query_phi(prompt): print("Sending request to PHI 3 API...") response = "" try: for message in client.chat_completion( messages=[{"role": "user", "content": f"{instructions}\n{prompt}"}], max_tokens=500, stream=True, ): response += message.choices[0].delta.content except Exception as e: print("Error in PHI 3 API:", e) return "PHI 3 API Error: " + str(e) return response def transcribe_and_query(audio): try: # Load the audio file as waveform audio_data, sr = librosa.load(audio, sr=16000) # Transcribe using Whisper transcription = whisper(audio_data)["text"] transcription = "Prompt : " + transcription # Query Microsoft PHI 3 with the transcribed text phi_response = query_phi(transcription) return transcription, phi_response except Exception as e: return f"Error processing audio: {str(e)}", "No response from PHI 3" # Create Gradio interface iface = gr.Interface( fn=transcribe_and_query, inputs=gr.Audio(type="filepath"), outputs=["text", "text"], title="Scam Call Detector with BEEP", description="Upload your recorded call to see if it is a scam or not.\n Stay Safe, Stay Secure." ) # Launch the interface iface.launch(share=True)