File size: 2,239 Bytes
d3a1278
 
 
5e250bb
d3a1278
79081f5
0eea78e
d3a1278
 
 
 
0eea78e
5e250bb
30fc2e2
5e250bb
0eea78e
2382d71
5e250bb
118351d
 
 
 
 
 
 
 
 
 
 
79081f5
 
c0b4d8a
79081f5
2382d71
f4bce7b
 
48f0586
 
 
2c49025
48f0586
 
 
038de77
8362c2f
 
038de77
48f0586
 
 
 
dc96563
48f0586
41984dc
2382d71
e3c115f
2382d71
 
 
 
 
 
 
 
b426c95
2382d71
fdca0f8
b426c95
2382d71
dc96563
2382d71
 
 
067e028
2382d71
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import os
from pathlib import Path
import streamlit as st
from transformers import pipeline
from dotenv import load_dotenv
from langchain import PromptTemplate, LLMChain, OpenAI
import requests

if Path(".env").is_file():
    load_dotenv(".env")
st.set_page_config(layout="wide")
HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")

def img2Text(url):
    image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
    text = image_to_text(url)[0]["generated_text"]
    print(text)
    return text

#llm
def generate_story(scenario):
    template = """
    You are a story teller;
    You can generate a short story based on a simple narrative, the story should be no momre than 20 words;
    CONTEXT: {scenario}
    STORY:
    """

    prompt = PromptTemplate(template=template,input_variables=["scenario"])
    story_llm = LLMChain(llm=OpenAI(
        model_name="gpt-3.5-turbo", temperature=1), prompt=prompt, verbose=True)

    story = story_llm.predict(scenario=scenario)
    print(story)
    return story

#textToSpeech

def text2Speech(story) :
    API_URL = "https://api-inference.huggingface.co/models/espnet/kan-bayashi_ljspeech_vits"
    headers = {"Authorization": "Bearer {HF_TOKEN}"}

    def query(payload):
    	response = requests.post(API_URL, headers=headers, json=payload)
            with open('audio.flac','wb') as file:
            	file.write(response.content)
    	return response.json()
	
    output = query({
	"inputs": story,
    })


def main() :
    st.header("Turn img into Audio Story")
    uploaded_file = st.file_uploader("Choose an image(jpg type)", type="jpg")
    if uploaded_file is not None:
        print(uploaded_file)
        bytes_data = uploaded_file.getvalue()
        with open(uploaded_file.name, "wb") as file:
            file.write(bytes_data)
        st.image(uploaded_file, caption= 'Uploaded Image.',
                use_column_width=True)
        scenario = img2Text(uploaded_file.name)
        with st.expander("scenario"):
            st.write(scenario)
        story = generate_story(scenario)
        with st.expander("story"):
            st.write(story)
        text2Speech(story)
        st.audio("audio.flac")


if __name__ == "__main__":
    main()