vumichien commited on
Commit
61fe542
·
1 Parent(s): 24c223a

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +11 -23
main.py CHANGED
@@ -2,6 +2,7 @@ from fastapi import FastAPI, File, Form
2
  import datetime
3
  import time
4
  import torch
 
5
 
6
  import os
7
  import numpy as np
@@ -40,78 +41,65 @@ quantized_pitch_model.load_state_dict(torch.load(QUANTIZED_PITCH_MODEL_PATH))
40
 
41
  app = FastAPI()
42
 
 
43
  @app.get("/")
44
  def read_root():
45
  return {"Message": "Application startup complete"}
46
 
 
47
  @app.post("/naomi_api_score/")
48
  async def predict(
49
- file: bytes = File(...),
50
- word: str = Form(...),
51
- pitch: str = Form("None"),
52
- temperature: int = Form(...),
53
- ):
54
  """ Transform input audio, get text and pitch from Huggingface api and calculate score by Levenshtein Distance Score
55
  Parameters:
56
  ----------
57
  file : bytes
58
  input audio file
59
-
60
  word : strings
61
  true hiragana word to calculate word score
62
-
63
  pitch : strings
64
  true pitch to calculate pitch score
65
-
66
  temperature: integer
67
  the difficulty of AI model
68
-
69
  Returns:
70
  -------
71
  timestamp: strings
72
  current time Year-Month-Day-Hours:Minutes:Second
73
-
74
  running_time : strings
75
  running time second
76
-
77
  error message : strings
78
  error message from api
79
-
80
  audio duration: integer
81
  durations of source audio
82
-
83
  target : integer
84
  durations of target audio
85
-
86
  method : string
87
  method applied to transform source audio
88
-
89
  word predict : strings
90
  text from api
91
-
92
  pitch predict : strings
93
  pitch from api
94
-
95
  wrong word index: strings (ex: 100)
96
  wrong word compare to target word
97
-
98
  wrong pitch index: strings (ex: 100)
99
  wrong word compare to target word
100
-
101
  score: integer
102
  Levenshtein Distance Score from pitch and word
103
-
104
  """
105
  upload_audio = ffmpeg_read(file, sampling_rate=16000)
106
  audio_duration = len(upload_audio) / 16000
107
  current_time = datetime.datetime.now().strftime("%Y-%h-%d-%H:%M:%S")
108
  start_time = time.time()
109
- error_message, score , pitch_preds = None, None, None
110
 
111
- if len(word) != len(pitch) and pitch is not None:
112
- error_message = "Length of word and pitch input is not equal"
113
  word_preds = query_raw(upload_audio, word, processor, processor_with_lm, quantized_model, temperature=temperature)
114
  if pitch is not None:
 
 
115
  pitch_preds = query_dummy(upload_audio, processor_pitch, quantized_pitch_model)
116
 
117
  # find best word
 
2
  import datetime
3
  import time
4
  import torch
5
+ from typing import Optional
6
 
7
  import os
8
  import numpy as np
 
41
 
42
  app = FastAPI()
43
 
44
+
45
  @app.get("/")
46
  def read_root():
47
  return {"Message": "Application startup complete"}
48
 
49
+
50
  @app.post("/naomi_api_score/")
51
  async def predict(
52
+ file: bytes = File(...),
53
+ word: str = Form(...),
54
+ pitch: Optional[str] = Form(None),
55
+ temperature: int = Form(...),
56
+ ):
57
  """ Transform input audio, get text and pitch from Huggingface api and calculate score by Levenshtein Distance Score
58
  Parameters:
59
  ----------
60
  file : bytes
61
  input audio file
 
62
  word : strings
63
  true hiragana word to calculate word score
 
64
  pitch : strings
65
  true pitch to calculate pitch score
 
66
  temperature: integer
67
  the difficulty of AI model
 
68
  Returns:
69
  -------
70
  timestamp: strings
71
  current time Year-Month-Day-Hours:Minutes:Second
 
72
  running_time : strings
73
  running time second
 
74
  error message : strings
75
  error message from api
 
76
  audio duration: integer
77
  durations of source audio
 
78
  target : integer
79
  durations of target audio
 
80
  method : string
81
  method applied to transform source audio
 
82
  word predict : strings
83
  text from api
 
84
  pitch predict : strings
85
  pitch from api
 
86
  wrong word index: strings (ex: 100)
87
  wrong word compare to target word
 
88
  wrong pitch index: strings (ex: 100)
89
  wrong word compare to target word
 
90
  score: integer
91
  Levenshtein Distance Score from pitch and word
 
92
  """
93
  upload_audio = ffmpeg_read(file, sampling_rate=16000)
94
  audio_duration = len(upload_audio) / 16000
95
  current_time = datetime.datetime.now().strftime("%Y-%h-%d-%H:%M:%S")
96
  start_time = time.time()
97
+ error_message, score, word_preds, pitch_preds = None, None, None, None
98
 
 
 
99
  word_preds = query_raw(upload_audio, word, processor, processor_with_lm, quantized_model, temperature=temperature)
100
  if pitch is not None:
101
+ if len(word) != len(pitch):
102
+ error_message = "Length of word and pitch input is not equal"
103
  pitch_preds = query_dummy(upload_audio, processor_pitch, quantized_pitch_model)
104
 
105
  # find best word