flobbit commited on
Commit
015e873
Β·
0 Parent(s):

first commit

Browse files
Files changed (18) hide show
  1. .env.template +2 -0
  2. .gitignore +2 -0
  3. README.md +24 -0
  4. airplane.jpg +0 -0
  5. app.py +201 -0
  6. car.jpg +0 -0
  7. carolina.jpg +0 -0
  8. cats2.jpg +0 -0
  9. cows2.jpg +0 -0
  10. dogs.jpg +0 -0
  11. house.jpg +0 -0
  12. lady.jpg +0 -0
  13. mountains.jpg +0 -0
  14. punnypix.jpg +0 -0
  15. requirements.txt +7 -0
  16. sd1.png +0 -0
  17. swimming.jpg +0 -0
  18. viceroy.jpg +0 -0
.env.template ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ OPENAI_API_KEY = "sk-..."
2
+ CAPTION_PROMPT = "..."
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .env
2
+ .vscode/launch.json
README.md ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # πŸ“· PunnyPix πŸ“Έ
2
+
3
+ πŸ“· Generate "funny" photo captions from images πŸ“Έ
4
+
5
+ ![text](punnypix.jpg)
6
+
7
+ Select a photo, wait for the caption to appear, edit the caption as needed, and hit submit to generate a "funny" photo caption. Several sample photos are provided for convenience.
8
+
9
+ ## Running
10
+
11
+ - Install requirements.txt, fill in .env, and run app.py, or
12
+
13
+ - Try it at [huggingface.co/spaces/flobbit/punnypix](https://huggingface.co/spaces/flobbit/punnypix)
14
+
15
+ Note: for now, the huggingface version uses free OpenAI credits, so it is rate limited, and sometimes takes forever to return a response. So, you are advised to enter your API key to avoid delays.
16
+
17
+ ## About
18
+ Uses Langchain, Hugging Face transformers, OpenAI, Python.
19
+
20
+ ## Limitations
21
+ This app is provided for entertainment purposes only. Sometimes the image to caption process doesn't produce a correct caption, so edit as needed. Sometimes the "funny" caption isn't so funny, but what can you expect for free? πŸ˜„
22
+
23
+ ## Credits
24
+ A piece of the chat portion of the system comes from https://github.com/hwchase17/langchain-gradio-template with changes for this particular use case, and changes in langchain.
airplane.jpg ADDED
app.py ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Optional, Tuple
3
+
4
+ import gradio as gr
5
+ from threading import Lock
6
+
7
+ # possibly needed for loading the environment variables locally.
8
+ # not needed when hosted on hugging face if using HF secrets
9
+ #from dotenv import load_dotenv
10
+ #load_dotenv()
11
+
12
+ from langchain.prompts.chat import (
13
+ ChatPromptTemplate,
14
+ SystemMessagePromptTemplate,
15
+ HumanMessagePromptTemplate)
16
+ from langchain.chains import LLMChain
17
+ from langchain.chat_models import ChatOpenAI
18
+
19
+ # initialize the LLM as part of the conversatiion chain
20
+ # temperature of 0.2 produces more creativity
21
+ def load_chain():
22
+ """Logic for loading the chain you want to use should go here."""
23
+
24
+ template = os.getenv("CAPTION_PROMPT")
25
+ system_message_prompt = SystemMessagePromptTemplate.from_template(template)
26
+ human_template = "{text}"
27
+ human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
28
+
29
+ chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
30
+ #print(f"chat_prompt={chat_prompt}")
31
+
32
+ llm = ChatOpenAI(temperature=0.2,
33
+ model_name='gpt-3.5-turbo')
34
+ chain = LLMChain(llm=llm, prompt=chat_prompt)
35
+ return chain
36
+
37
+ # set the api key and load conversation chain once when the api key changes in input box
38
+ def set_openai_api_key(api_key: str):
39
+ """Set the api key and return chain.
40
+
41
+ If no api_key, then None is returned.
42
+ """
43
+ if api_key:
44
+ os.environ["OPENAI_API_KEY"] = api_key
45
+ chain = load_chain()
46
+ os.environ["OPENAI_API_KEY"] = ""
47
+ return chain
48
+
49
+ # load the hugging face image to text captioner
50
+ from transformers import pipeline
51
+ captioner = pipeline("image-to-text",model="Salesforce/blip-image-captioning-base")
52
+
53
+ import PIL
54
+ import numpy
55
+ # an image has been selected. it comes to this fn as a numpy ndarray
56
+ # convert it to a PIL image and feed to the captioner
57
+ # return the resulting caption
58
+ def image_supplied(img: numpy.ndarray):
59
+ if img is None: return
60
+ if img.any():
61
+ im = PIL.Image.fromarray(img)
62
+ caption = captioner(im, max_new_tokens=20)
63
+ result = caption[0]['generated_text']
64
+ return result
65
+
66
+ # class wrapping the chat
67
+ class ChatWrapper:
68
+
69
+ def __init__(self):
70
+ self.lock = Lock()
71
+
72
+ def __call__(
73
+ self, api_key: str, inp: str, #history: Optional[Tuple[str, str]],
74
+ chain: Optional[LLMChain]
75
+ ):
76
+ """Execute the chat functionality."""
77
+ self.lock.acquire()
78
+ try:
79
+ #history = history or []
80
+
81
+ # If chain is None, that is because no API key was provided by user.
82
+ if chain is None:
83
+ # attempt to load default rate limited key and initialize chain
84
+ key = openai_api_key_textbox.value
85
+ #print(key)
86
+ chain = set_openai_api_key(key)
87
+
88
+ # if chain is still None, the supplied key didn't work
89
+ if chain is None:
90
+ #history.append((inp, "Please paste your OpenAI key to use"))
91
+ #last = history[-1][-1] # get last element as message returned
92
+ #return last, history
93
+ return "Please paste your OpenAI key to use"
94
+
95
+ # Set OpenAI key
96
+ import openai
97
+ openai.api_key = api_key
98
+ openai.api_type = 'open_ai'
99
+ openai.api_base = 'https://api.openai.com/v1'
100
+
101
+ # Run chain and append input.
102
+ output = chain.run(inp)
103
+ #history.append((inp, output))
104
+ last = output #history[-1][-1] # get last element of list, and then last of that vector
105
+ except Exception as e:
106
+ raise e
107
+ finally:
108
+ self.lock.release()
109
+ return last #, history
110
+
111
+ chat = ChatWrapper()
112
+
113
+ # custom css
114
+ css = """
115
+ .gradio-container {background-color: lightgray; background: url('file=./sd1.png'); background-size: cover}
116
+ footer {visibility: hidden}
117
+ """
118
+
119
+ font_name = "Kalam"
120
+ block = gr.Blocks(title="πŸ“· PunnyPix πŸ“Έ", css=css,
121
+ theme=gr.themes.Default(
122
+ text_size = 'lg',
123
+ font=[gr.themes.GoogleFont(font_name),"Arial","sans-serif"],
124
+ spacing_size="sm", radius_size="sm"))
125
+
126
+ # create app layout
127
+ with block:
128
+ with gr.Row():
129
+ with gr.Column():
130
+ gr.Markdown("<h2><center>πŸ“· PunnyPix πŸ“Έ</center></h2>")
131
+ gr.Markdown("<h4><center>Load image. Edit automated caption. Click 'Submit' to get a funny (hopefully) caption.</center></h4>")
132
+
133
+ openai_api_key_textbox = gr.Textbox(
134
+ label="πŸ”‘ Default key is rate limited. Paste your OpenAI API key (sk-...)",
135
+ placeholder="Paste your OpenAI API key (sk-...)",
136
+ value = os.getenv("OPENAI_API_KEY"), # default to rate limited key
137
+ lines=1,
138
+ type="password"
139
+ )
140
+
141
+ with gr.Row():
142
+ with gr.Column():
143
+ image_box = gr.Image(show_label=False)
144
+
145
+ with gr.Row():
146
+ result_box = gr.Textbox(label="Original caption πŸ—¨οΈ", value="", interactive=True, lines=1, scale=3)
147
+ submit = gr.Button(value="Submit", variant="secondary", size='sm', scale=1) #scale button at 1/3 size of two text boxes
148
+ caption_box = gr.Textbox(
149
+ label="Converted caption πŸ—―οΈ",
150
+ value="",
151
+ lines=1,
152
+ interactive=False,
153
+ scale=3
154
+ )
155
+
156
+ gr.Examples(
157
+ label="Sample images",
158
+ examples=[
159
+ 'carolina.jpg',
160
+ 'house.jpg',
161
+ 'viceroy.jpg',
162
+ 'airplane.jpg',
163
+ 'swimming.jpg',
164
+ 'cats2.jpg',
165
+ 'car.jpg',
166
+ 'dogs.jpg',
167
+ 'cows2.jpg',
168
+ 'mountains.jpg'
169
+ ],
170
+ inputs=image_box
171
+ )
172
+
173
+ gr.HTML(
174
+ "<center><a style='color: white', href='https://github.com/flobbit1/punnypix'>Powered by LangChain πŸ¦œοΈπŸ”—, Hugging Face transformers, OpenAI</a></center>"
175
+ )
176
+
177
+ #state = gr.State()
178
+ agent_state = gr.State()
179
+
180
+ # once caption has been confirmed (either through enter in box or hitting "submit")
181
+ # pass to the chat to process and get result (which goes into caption_box)
182
+ submit.click(chat, inputs=[openai_api_key_textbox, result_box, agent_state], outputs=[caption_box])
183
+ result_box.submit(chat, inputs=[openai_api_key_textbox, result_box, agent_state], outputs=[caption_box])
184
+ #submit.click(chat, inputs=[openai_api_key_textbox, result_box, state, agent_state], outputs=[caption_box, state])
185
+ #result_box.submit(chat, inputs=[openai_api_key_textbox, result_box, state, agent_state], outputs=[caption_box, state])
186
+
187
+ # if image has changed, feed it to "image_supplied", and pass result to "result_box"
188
+ image_box.change(
189
+ image_supplied,
190
+ inputs=[image_box],
191
+ outputs=[result_box]
192
+ )
193
+
194
+ # if api key in input box has changed, update the key in app
195
+ openai_api_key_textbox.change(
196
+ set_openai_api_key,
197
+ inputs=[openai_api_key_textbox],
198
+ outputs=[agent_state],
199
+ )
200
+
201
+ block.launch(debug=True)
car.jpg ADDED
carolina.jpg ADDED
cats2.jpg ADDED
cows2.jpg ADDED
dogs.jpg ADDED
house.jpg ADDED
lady.jpg ADDED
mountains.jpg ADDED
punnypix.jpg ADDED
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ openai
2
+ gradio
3
+ langchain
4
+ transformers
5
+ torch
6
+
7
+ #setuptools
sd1.png ADDED
swimming.jpg ADDED
viceroy.jpg ADDED