import os
from pathlib import Path
import streamlit as st
from transformers import pipeline
from dotenv import load_dotenv
from langchain import PromptTemplate, LLMChain, OpenAI
import requests

if Path(".env").is_file():
    load_dotenv(".env")
st.set_page_config(layout="wide")
HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")

def img2Text(url):
    image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
    text = image_to_text(url)[0]["generated_text"]
    print(text)
    return text

#llm
def generate_story(scenario):
    template = """
    You are a story teller;
    You can generate a short story based on a simple narrative, the story should be no momre than 20 words;
    CONTEXT: {scenario}
    STORY:
    """

    prompt = PromptTemplate(template=template,input_variables=["scenario"])
    story_llm = LLMChain(llm=OpenAI(
        model_name="gpt-3.5-turbo", temperature=1), prompt=prompt, verbose=True)

    story = story_llm.predict(scenario=scenario)
    print(story)
    return story

#textToSpeech

def text2Speech(story) :
    API_URL = "https://api-inference.huggingface.co/models/espnet/kan-bayashi_ljspeech_vits"
    headers = {"Authorization": "Bearer {HF_TOKEN}"}

    def query(payload):
    	response = requests.post(API_URL, headers=headers, json=payload)
            with open('audio.flac','wb') as file:
            	file.write(response.content)
    	return response.json()
	
    output = query({
	"inputs": story,
    })


def main() :
    st.header("Turn img into Audio Story")
    uploaded_file = st.file_uploader("Choose an image(jpg type)", type="jpg")
    if uploaded_file is not None:
        print(uploaded_file)
        bytes_data = uploaded_file.getvalue()
        with open(uploaded_file.name, "wb") as file:
            file.write(bytes_data)
        st.image(uploaded_file, caption= 'Uploaded Image.',
                use_column_width=True)
        scenario = img2Text(uploaded_file.name)
        with st.expander("scenario"):
            st.write(scenario)
        story = generate_story(scenario)
        with st.expander("story"):
            st.write(story)
        text2Speech(story)
        st.audio("audio.flac")


if __name__ == "__main__":
    main()