Spaces:

jonmatthis
/

skellychat

Sleeping

App Files Files Community

skellychat / utilities /html_stuff.py

jonmatthis

push for posterity

dc2726e about 2 years ago

raw

history blame contribute delete

4.12 kB

	import io
	import os
	from contextlib import closing

	import boto3
	import gradio as gr
	import requests

	from config.config import TALKING_HEAD_WIDTH, LOOPING_TALKING_HEAD_VIDEO_PATH
	from utilities.audio import AZURE_VOICE_DATA, POLLY_VOICE_DATA
	from utilities.polly_utils import NEURAL_ENGINE


	def create_html_video(file_name, width):
	tmp_file = gr.File(LOOPING_TALKING_HEAD_VIDEO_PATH, visible=False)
	temp_file_url = "/file=" + tmp_file.value['name']
	html_video = f'<video width={width} height={width} autoplay muted loop><source src={temp_file_url} type="video/mp4" poster="Masahiro.png"></video>'
	return html_video

	def update_talking_head(widget, state):
	if widget:
	state = widget

	video_html_talking_head = create_html_video(LOOPING_TALKING_HEAD_VIDEO_PATH, TALKING_HEAD_WIDTH)
	return state, video_html_talking_head
	else:
	# return state, create_html_video(LOOPING_TALKING_HEAD, "32")
	return None, "<pre></pre>"



	def do_html_audio_speak(words_to_speak, polly_language):
	polly_client = boto3.Session(
	aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
	aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
	region_name=os.environ["AWS_DEFAULT_REGION"]
	).client('polly')

	# voice_id, language_code, engine = POLLY_VOICE_DATA.get_voice(polly_language, "Female")
	voice_id, language_code, engine = POLLY_VOICE_DATA.get_voice(polly_language, "Male")
	if not voice_id:
	# voice_id = "Joanna"
	voice_id = "Matthew"
	language_code = "en-US"
	engine = NEURAL_ENGINE
	response = polly_client.synthesize_speech(
	Text=words_to_speak,
	OutputFormat='mp3',
	VoiceId=voice_id,
	LanguageCode=language_code,
	Engine=engine
	)

	html_audio = '<pre>no audio</pre>'

	# Save the audio stream returned by Amazon Polly on Lambda's temp directory
	if "AudioStream" in response:
	with closing(response["AudioStream"]) as stream:
	# output = os.path.join("/tmp/", "speech.mp3")

	try:
	with open('assets/audios/tempfile.mp3', 'wb') as f:
	f.write(stream.read())
	temp_aud_file = gr.File("assets/audios/tempfile.mp3")
	temp_aud_file_url = "/file=" + temp_aud_file.value['name']
	html_audio = f'<audio autoplay><source src={temp_aud_file_url} type="audio/mp3"></audio>'
	except IOError as error:
	# Could not write to file, exit gracefully
	print(error)
	return None, None
	else:
	# The response didn't contain audio data, exit gracefully
	print("Could not stream audio")
	return None, None

	return html_audio, "assets/audios/tempfile.mp3"




	def do_html_video_speak(words_to_speak, azure_language):
	azure_voice = AZURE_VOICE_DATA.get_voice(azure_language, "Male")
	if not azure_voice:
	azure_voice = "en-US-ChristopherNeural"

	headers = {"Authorization": f"Bearer {os.environ['EXHUMAN_API_KEY']}"}
	body = {
	'bot_name': 'Masahiro',
	'bot_response': words_to_speak,
	'azure_voice': azure_voice,
	'azure_style': 'friendly',
	'animation_pipeline': 'high_speed',
	}
	api_endpoint = "https://api.exh.ai/animations/v1/generate_lipsync"
	res = requests.post(api_endpoint, json=body, headers=headers)
	print("res.status_code: ", res.status_code)

	html_video = '<pre>no video</pre>'
	if isinstance(res.content, bytes):
	response_stream = io.BytesIO(res.content)
	print("len(res.content)): ", len(res.content))

	with open('videos/tempfile.mp4', 'wb') as f:
	f.write(response_stream.read())
	temp_file = gr.File("videos/tempfile.mp4")
	temp_file_url = "/file=" + temp_file.value['name']
	html_video = f'<video width={TALKING_HEAD_WIDTH} height={TALKING_HEAD_WIDTH} autoplay><source src={temp_file_url} type="video/mp4" poster="Masahiro.png"></video>'
	else:
	print('video url unknown')
	return html_video, "videos/tempfile.mp4"