Spaces:

Detomo
/

naomi-app-api

Runtime error

App Files Files Community

vumichien commited on Feb 16, 2023

Commit

61fe542

1 Parent(s): 24c223a

Update main.py

Browse files

Files changed (1) hide show

main.py +11 -23

main.py CHANGED Viewed

@@ -2,6 +2,7 @@ from fastapi import FastAPI, File, Form
 import datetime
 import time
 import torch
 import os
 import numpy as np
@@ -40,78 +41,65 @@ quantized_pitch_model.load_state_dict(torch.load(QUANTIZED_PITCH_MODEL_PATH))
 app = FastAPI()
 @app.get("/")
 def read_root():
     return {"Message": "Application startup complete"}
 @app.post("/naomi_api_score/")
 async def predict(
-                file: bytes = File(...),
-                word: str = Form(...),
-                pitch: str = Form("None"),
-                temperature: int = Form(...),
-                 ):
     """ Transform input audio, get text and pitch from Huggingface api and calculate score by Levenshtein Distance Score
         Parameters:
          ----------
         file : bytes
             input audio file
         word : strings
             true hiragana word to calculate word score
         pitch : strings
             true pitch to calculate pitch score
         temperature: integer
             the difficulty of AI model
         Returns:
         -------
         timestamp: strings
             current time Year-Month-Day-Hours:Minutes:Second
         running_time : strings
             running time second
         error message : strings
             error message from api
         audio duration: integer
             durations of source audio
         target : integer
             durations of target audio
         method : string
             method applied to transform source audio
         word predict : strings
             text from api
         pitch predict : strings
             pitch from api
         wrong word index: strings (ex: 100)
             wrong word compare to target word
         wrong pitch index: strings (ex: 100)
             wrong word compare to target word
         score: integer
             Levenshtein Distance Score from pitch and word
     """
     upload_audio = ffmpeg_read(file, sampling_rate=16000)
     audio_duration = len(upload_audio) / 16000
     current_time = datetime.datetime.now().strftime("%Y-%h-%d-%H:%M:%S")
     start_time = time.time()
-    error_message, score , pitch_preds = None, None, None
-    if len(word) != len(pitch) and pitch is not None:
-        error_message = "Length of word and pitch input is not equal"
     word_preds = query_raw(upload_audio, word, processor, processor_with_lm, quantized_model, temperature=temperature)
     if pitch is not None:
         pitch_preds = query_dummy(upload_audio, processor_pitch, quantized_pitch_model)
     # find best word

 import datetime
 import time
 import torch
+from typing import Optional
 import os
 import numpy as np
 app = FastAPI()
 @app.get("/")
 def read_root():
     return {"Message": "Application startup complete"}
 @app.post("/naomi_api_score/")
 async def predict(
+        file: bytes = File(...),
+        word: str = Form(...),
+        pitch: Optional[str] = Form(None),
+        temperature: int = Form(...),
+):
     """ Transform input audio, get text and pitch from Huggingface api and calculate score by Levenshtein Distance Score
         Parameters:
          ----------
         file : bytes
             input audio file
         word : strings
             true hiragana word to calculate word score
         pitch : strings
             true pitch to calculate pitch score
         temperature: integer
             the difficulty of AI model
         Returns:
         -------
         timestamp: strings
             current time Year-Month-Day-Hours:Minutes:Second
         running_time : strings
             running time second
         error message : strings
             error message from api
         audio duration: integer
             durations of source audio
         target : integer
             durations of target audio
         method : string
             method applied to transform source audio
         word predict : strings
             text from api
         pitch predict : strings
             pitch from api
         wrong word index: strings (ex: 100)
             wrong word compare to target word
         wrong pitch index: strings (ex: 100)
             wrong word compare to target word
         score: integer
             Levenshtein Distance Score from pitch and word
     """
     upload_audio = ffmpeg_read(file, sampling_rate=16000)
     audio_duration = len(upload_audio) / 16000
     current_time = datetime.datetime.now().strftime("%Y-%h-%d-%H:%M:%S")
     start_time = time.time()
+    error_message, score, word_preds, pitch_preds = None, None, None, None
     word_preds = query_raw(upload_audio, word, processor, processor_with_lm, quantized_model, temperature=temperature)
     if pitch is not None:
+        if len(word) != len(pitch):
+            error_message = "Length of word and pitch input is not equal"
         pitch_preds = query_dummy(upload_audio, processor_pitch, quantized_pitch_model)
     # find best word