import {
  ClapProject,
  ClapSegment,
  getClapAssetSourceType,
  filterSegments,
  ClapSegmentFilteringMode,
  ClapSegmentCategory
} from "@aitube/clap"
import { ClapCompletionMode } from "@aitube/client"
import { getSpeechBackgroundAudioPrompt } from "@aitube/engine"

import { generateSpeechWithParlerTTS } from "@/app/api/generators/speech/generateVoiceWithParlerTTS"
import { getMediaInfo } from "@/app/api/utils/getMediaInfo"

export async function processShot({
  shotSegment,
  existingClap,
  newerClap,
  mode,
  turbo,
}: {
  shotSegment: ClapSegment
  existingClap: ClapProject
  newerClap: ClapProject
  mode: ClapCompletionMode
  turbo: boolean
}): Promise<void> {

  const shotSegments: ClapSegment[] = filterSegments(
    ClapSegmentFilteringMode.BOTH,
    shotSegment,
    existingClap.segments
  )
  
  const shotDialogueSegments: ClapSegment[] = shotSegments.filter(s =>
    s.category === ClapSegmentCategory.DIALOGUE
  )

  let shotDialogueSegment: ClapSegment | undefined = shotDialogueSegments.at(0)
  
  console.log(`[api/edit/dialogues] processShot: shot [${shotSegment.startTimeInMs}:${shotSegment.endTimeInMs}] has ${shotSegments.length} segments (${shotDialogueSegments.length} dialogues)`)

  if (shotDialogueSegment && !shotDialogueSegment.assetUrl) {
    // console.log(`[api/edit/dialogues] generating audio..`)

    try {
      // this generates a mp3
      shotDialogueSegment.assetUrl = await generateSpeechWithParlerTTS({
        text: shotDialogueSegment.prompt,
        audioId: getSpeechBackgroundAudioPrompt(
          shotSegments,
          existingClap.entityIndex,
          // TODO: use the entity description if it exists
          ["high quality", "crisp", "detailed"]
        ),
        debug: true,
      })
      shotDialogueSegment.assetSourceType = getClapAssetSourceType(shotDialogueSegment.assetUrl)

      const { durationInMs, durationInSec, hasAudio } = await getMediaInfo(shotDialogueSegment.assetUrl)
      
      if (hasAudio && durationInMs > 1000) {
        shotDialogueSegment.assetDurationInMs = durationInMs
        shotSegment.assetDurationInMs = durationInMs

        // we update the duration of all the segments for this shot
        // (it is possible that this makes the two previous lines redundant)
        existingClap.segments.forEach(s => {
          s.assetDurationInMs = durationInMs
        })
      }

    } catch (err) {
      console.log(`[api/edit/dialogues] processShot: failed to generate audio: ${err}`)
      throw err
    }

    console.log(`[api/edit/dialogues] processShot: generated dialogue audio: ${shotDialogueSegment?.assetUrl?.slice?.(0, 50)}...`)

  // if it's partial, we need to manually add it
  if (mode !== ClapCompletionMode.FULL) {
      newerClap.segments.push(shotDialogueSegment)
    }
  } else {
    console.log(`[api/edit/dialogues] processShot: there is already a dialogue audio: ${shotDialogueSegment?.assetUrl?.slice?.(0, 50)}...`)
  }
}