File size: 2,272 Bytes
d3a1278
 
 
5e250bb
d3a1278
79081f5
0eea78e
d3a1278
 
 
 
0eea78e
5e250bb
30fc2e2
5e250bb
0eea78e
2382d71
5e250bb
118351d
 
 
 
 
 
 
 
 
 
 
79081f5
 
c0b4d8a
79081f5
2382d71
f4bce7b
 
48f0586
 
 
 
 
 
 
038de77
 
48f0586
 
 
 
 
 
 
2382d71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import os
from pathlib import Path
import streamlit as st
from transformers import pipeline
from dotenv import load_dotenv
from langchain import PromptTemplate, LLMChain, OpenAI
import requests

if Path(".env").is_file():
    load_dotenv(".env")
st.set_page_config(layout="wide")
HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")

def img2Text(url):
    image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
    text = image_to_text(url)[0]["generated_text"]
    print(text)
    return text

#llm
def generate_story(scenario):
    template = """
    You are a story teller;
    You can generate a short story based on a simple narrative, the story should be no momre than 20 words;
    CONTEXT: {scenario}
    STORY:
    """

    prompt = PromptTemplate(template=template,input_variables=["scenario"])
    story_llm = LLMChain(llm=OpenAI(
        model_name="gpt-3.5-turbo", temperature=1), prompt=prompt, verbose=True)

    story = story_llm.predict(scenario=scenario)
    print(story)
    return story

#textToSpeech

def text2Speech(story) :
    API_URL = "https://api-inference.huggingface.co/models/microsoft/speecht5_tts"
    headers = {"Authorization": "Bearer {HF_TOKEN}"}

    def query(payload):
    	response = requests.post(API_URL, headers=headers, json=payload)
    	return response.json()
	
    output = query({
	"inputs": story,
    })
    with open('audio.flac','wb') as file:
	file.write(output.content)

def main()
    st.set_page_config(page_title="Image to Short Story", page_icon="")
    st.header("Turn img into Audio Story")
    uploaded_file = st.file.uploader("Choose an image(jpg type)", type="jpg")
    if uploaded_file is not None:
        print(uploaded_file)
        bytes_data = uploaded_file.getvalue()
        with open(uploaded_file.name, "wb") as file:
            file.write(bytes_data)
        st.image(uploaded_file, caption= 'Uploaded Image.',
                use_column_width=True)
        scenario = img2Text(uploaded_file.name)
        story = generate_story(scenario)
        text2Speech(story)

        with st.expander("scenario")
            st.write(scenario)
        with st.expander("story")
            st.write(story)

        st.audio("audio.flac")


if _name_ == '_main_';
    main()