File size: 2,280 Bytes
d3a1278
 
 
5e250bb
d3a1278
79081f5
0eea78e
d3a1278
 
 
 
0eea78e
5e250bb
30fc2e2
5e250bb
0eea78e
2382d71
5e250bb
118351d
 
 
 
 
 
 
 
 
 
 
79081f5
 
c0b4d8a
79081f5
2382d71
f4bce7b
 
48f0586
 
 
 
 
 
 
038de77
 
48f0586
 
 
 
 
d25a720
48f0586
41984dc
2382d71
 
 
 
 
 
 
 
 
 
 
 
 
 
b426c95
2382d71
b426c95
2382d71
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import os
from pathlib import Path
import streamlit as st
from transformers import pipeline
from dotenv import load_dotenv
from langchain import PromptTemplate, LLMChain, OpenAI
import requests

if Path(".env").is_file():
    load_dotenv(".env")
st.set_page_config(layout="wide")
HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")

def img2Text(url):
    image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
    text = image_to_text(url)[0]["generated_text"]
    print(text)
    return text

#llm
def generate_story(scenario):
    template = """
    You are a story teller;
    You can generate a short story based on a simple narrative, the story should be no momre than 20 words;
    CONTEXT: {scenario}
    STORY:
    """

    prompt = PromptTemplate(template=template,input_variables=["scenario"])
    story_llm = LLMChain(llm=OpenAI(
        model_name="gpt-3.5-turbo", temperature=1), prompt=prompt, verbose=True)

    story = story_llm.predict(scenario=scenario)
    print(story)
    return story

#textToSpeech

def text2Speech(story) :
    API_URL = "https://api-inference.huggingface.co/models/microsoft/speecht5_tts"
    headers = {"Authorization": "Bearer {HF_TOKEN}"}

    def query(payload):
    	response = requests.post(API_URL, headers=headers, json=payload)
    	return response.json()
	
    output = query({
	"inputs": story,
    })
    with open('audio.flac','wb') as file:
    	file.write(output.content)

def main() :
    st.set_page_config(page_title="Image to Short Story", page_icon="")
    st.header("Turn img into Audio Story")
    uploaded_file = st.file.uploader("Choose an image(jpg type)", type="jpg")
    if uploaded_file is not None:
        print(uploaded_file)
        bytes_data = uploaded_file.getvalue()
        with open(uploaded_file.name, "wb") as file:
            file.write(bytes_data)
        st.image(uploaded_file, caption= 'Uploaded Image.',
                use_column_width=True)
        scenario = img2Text(uploaded_file.name)
        story = generate_story(scenario)
        text2Speech(story)

        with st.expander("scenario"):
            st.write(scenario)
        with st.expander("story"):
            st.write(story)

        st.audio("audio.flac")


if _name_ == '_main_';
    main()