Spaces:

Gigaverse
/

ivrit-ai-streaming

Sleeping

App Files Files Community

AshDavid12 commited on Sep 10, 2024

Commit

d1f9a9c

1 Parent(s): b7e558a

hf home change

Browse files

Files changed (3) hide show

Dockerfile +3 -8
infer.py +23 -80
requirements.txt +1 -3

Dockerfile CHANGED Viewed

@@ -1,5 +1,5 @@
 # Use an official Python runtime as a base image
-from python:3.11.1-buster
 # Set the working directory
 WORKDIR /app
@@ -10,7 +10,6 @@ RUN mkdir -p /app/hf_cache && chmod -R 777 /app/hf_cache
 # Set the environment variable for the Hugging Face cache
 ENV HF_HOME=/app/hf_cache
 # Copy the requirements.txt file and install the dependencies
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
@@ -18,9 +17,5 @@ RUN pip install --no-cache-dir -r requirements.txt
 # Copy the current directory contents into the container at /app
 COPY . .
-# Hugging Face Spaces will expose port 7860 by default for web applications
-EXPOSE 7860
-# Command to run the transcription script or API server on Hugging Face
-CMD ["uvicorn", "infer:app", "--host", "0.0.0.0", "--port", "7860"]

 # Use an official Python runtime as a base image
+FROM python:3.11.1-buster
 # Set the working directory
 WORKDIR /app
 # Set the environment variable for the Hugging Face cache
 ENV HF_HOME=/app/hf_cache
 # Copy the requirements.txt file and install the dependencies
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 # Copy the current directory contents into the container at /app
 COPY . .
+# Command to run the Python transcription script directly
+CMD ["python", "whisper_test.py"]

infer.py CHANGED Viewed

@@ -1,95 +1,38 @@
 import torch
 from transformers import WhisperProcessor, WhisperForConditionalGeneration
-import soundfile as sf
-from fastapi import FastAPI, File, UploadFile, Form
-import uvicorn
 import requests
-import os
-from datetime import datetime
-# Initialize FastAPI app
-app = FastAPI()
-# Print initialization of the application
-print("FastAPI application started.")
-# Load the Whisper model and processor
 model_name = "openai/whisper-base"
-print(f"Loading Whisper model: {model_name}")
-try:
-    processor = WhisperProcessor.from_pretrained(model_name)
-    model = WhisperForConditionalGeneration.from_pretrained(model_name)
-    print(f"Model {model_name} successfully loaded.")
-except Exception as e:
-    print(f"Error loading the model: {e}")
-    raise e
-# Move model to the appropriate device (GPU if available)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model.to(device)
-print(f"Model is using device: {device}")
-@app.post("/transcribe/")
-def transcribe_audio_url(audio_url: str = Form(...)):
-    # Download the audio file from the provided URL
-    try:
-        response = requests.get(audio_url)
-        if response.status_code != 200:
-            return {"error": f"Failed to download audio from URL. Status code: {response.status_code}"}
-        print(f"Successfully downloaded audio from URL: {audio_url}")
-        audio_data = io.BytesIO(response.content)  # Store audio data in memory
-    except Exception as e:
-        print(f"Error downloading the audio file: {e}")
-        return {"error": f"Error downloading the audio file: {e}"}
-    # Process the audio
-    try:
-        audio_input, _ = sf.read(audio_data)  # Read the audio from the in-memory BytesIO
-        print(f"Audio file from URL successfully read.")
-    except Exception as e:
-        print(f"Error reading the audio file: {e}")
-        return {"error": f"Error reading the audio file: {e}"}
-    # Preprocess the audio for Whisper
-    try:
-        inputs = processor(audio_input, return_tensors="pt", sampling_rate=16000)
-        print(f"Audio file preprocessed for transcription.")
-    except Exception as e:
-        print(f"Error processing the audio file: {e}")
-        return {"error": f"Error processing the audio file: {e}"}
-    # Move inputs to the appropriate device
-    inputs = {key: value.to(device) for key, value in inputs.items()}
-    print("Inputs moved to the appropriate device.")
-    # Generate the transcription
-    try:
-        with torch.no_grad():
-            predicted_ids = model.generate(inputs["input_features"])
-        print("Transcription successfully generated.")
-    except Exception as e:
-        print(f"Error during transcription generation: {e}")
-        return {"error": f"Error during transcription generation: {e}"}
-    # Decode the transcription
-    try:
-        transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
-        print("Transcription successfully decoded.")
-    except Exception as e:
-        print(f"Error decoding the transcription: {e}")
-        return {"error": f"Error decoding the transcription: {e}"}
-    return {"transcription": transcription}
-@app.get("/")
-def read_root():
-    return {"message": "Welcome to the Whisper transcription API"}
-if __name__ == "__main__":
-    # Print when starting the FastAPI server
-    print("Starting FastAPI server with Uvicorn...")
-    # Run the FastAPI app on the default port (7860)
-    uvicorn.run(app, host="0.0.0.0", port=7860)

 import torch
 from transformers import WhisperProcessor, WhisperForConditionalGeneration
 import requests
+import soundfile as sf
+import io
+# Load the Whisper model and processor from Hugging Face Model Hub
 model_name = "openai/whisper-base"
+processor = WhisperProcessor.from_pretrained(model_name)
+model = WhisperForConditionalGeneration.from_pretrained(model_name)
+# Use GPU if available, otherwise use CPU
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model.to(device)
+# URL of the audio file
+audio_url = "https://www.signalogic.com/melp/EngSamples/Orig/male.wav"
+# Download the audio file
+response = requests.get(audio_url)
+audio_data = io.BytesIO(response.content)
+# Read the audio using soundfile
+audio_input, _ = sf.read(audio_data)
+# Preprocess the audio for Whisper
+inputs = processor(audio_input, return_tensors="pt", sampling_rate=16000)
+inputs = {key: value.to(device) for key, value in inputs.items()}
+# Generate the transcription
+with torch.no_grad():
+    predicted_ids = model.generate(inputs["input_features"])
+# Decode the transcription
+transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
+# Print the transcription result
+print("Transcription:", transcription)

requirements.txt CHANGED Viewed

@@ -1,8 +1,6 @@
-fastapi
-uvicorn
 torch
 whisper
-python-multipart
 requests
 transformers
 soundfile

 torch
 whisper
 requests
 transformers
 soundfile