Rooni commited on
Commit
99ad768
·
1 Parent(s): 18b43ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -86
app.py CHANGED
@@ -1,117 +1,83 @@
1
- # Import the necessary libraries
2
  import gradio as gr
3
- import openai
 
4
  import base64
 
5
  import io
6
- import os
7
- import requests
8
-
9
 
10
- # Function to encode the image to base64
11
  def encode_image_to_base64(image):
12
  buffered = io.BytesIO()
13
  image.save(buffered, format="JPEG")
14
  img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
15
  return img_str
16
 
17
-
18
- # Function to send the image to the OpenAI API and get a response
19
- def ask_openai_with_image(instruction, json_prompt, low_quality_mode, image):
20
- # Set the OpenAI API key
21
- openai.api_key = os.getenv("API_KEY")
22
-
23
- # Encode the uploaded image to base64
24
  base64_image = encode_image_to_base64(image)
25
 
 
26
  instruction = instruction.strip()
27
 
28
- if json_prompt.strip() != "":
29
- instruction = f"{instruction}\n\nReturn in JSON format and include the following attributes:\n\n{json_prompt.strip()}"
30
-
31
- # Create the payload with the base64 encoded image
32
  payload = {
33
  "model": "gpt-4-vision-preview",
34
  "messages": [
35
  {
36
  "role": "user",
37
- "content": [
38
- {
39
- "type": "text",
40
- "text": instruction,
41
- },
42
- {
43
- "type": "image_url",
44
- "image_url": {
45
- "url": f"data:image/jpeg;base64,{base64_image}",
46
- "detail": "low" if low_quality_mode else "high",
47
- },
48
- },
49
- ],
50
  }
51
  ],
52
- "max_tokens": 4095,
53
  }
54
 
55
- # Send the request to the OpenAI API
56
- response = requests.post(
57
- "https://api.openai.com/v1/chat/completions",
58
- headers={"Authorization": f"Bearer {openai.api_key}"},
59
- json=payload,
60
- )
 
 
 
 
 
 
 
 
61
 
62
- # Check if the request was successful
63
  if response.status_code == 200:
64
  response_json = response.json()
65
- print("Response JSON:", response_json) # Print the raw response JSON
66
  try:
67
- # Attempt to extract the content text
68
  return response_json["choices"][0]["message"]["content"]
69
  except Exception as e:
70
- # If there is an error in the JSON structure, print it
71
- print("Error in JSON structure:", e)
72
- print("Full JSON response:", response_json)
73
- return "Error processing the image response."
74
  else:
75
- # If an error occurred, return the error message
76
- return f"Error: {response.text}"
77
-
78
-
79
- json_schema = gr.Textbox(
80
- label="JSON Attributes",
81
- info="Define a list of attributes to force the model to respond in valid json format. Leave blank to disable json formatting.",
82
- lines=3,
83
- placeholder="""Example:
84
- - name: Name of the object
85
- - color: Color of the object
86
- """,
87
- )
88
-
89
- instructions = gr.Textbox(
90
- label="Instructions",
91
- info="Instructions for the vision model to follow. Leave blank to use default.",
92
- lines=2,
93
- placeholder="""Default:
94
- I've uploaded an image and I'd like to know what it depicts and any interesting details you can provide.""",
95
- )
96
-
97
- low_quality_mode = gr.Checkbox(
98
- label="Low Quality Mode",
99
- info="See here: https://platform.openai.com/docs/guides/vision/low-or-high-fidelity-image-understanding.",
100
- )
101
-
102
- # Create a Gradio interface
103
- vision_playground = gr.Interface(
104
- fn=ask_openai_with_image,
105
- inputs=[
106
- instructions,
107
- json_schema,
108
- low_quality_mode,
109
- gr.Image(type="pil", label="Image"),
110
- ],
111
- outputs=[gr.Markdown()],
112
- title="GPT-4-Vision Playground",
113
- description="Upload an image and get a description from GPT-4 with Vision.",
114
- )
115
 
116
- # Launch the app
117
- vision_playground.launch()
 
 
1
  import gradio as gr
2
+ import requests
3
+ import os
4
  import base64
5
+ from PIL import Image
6
  import io
 
 
 
7
 
8
+ # Функция для кодирования изображения в base64
9
  def encode_image_to_base64(image):
10
  buffered = io.BytesIO()
11
  image.save(buffered, format="JPEG")
12
  img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
13
  return img_str
14
 
15
+ # Функция для отправки запроса в OpenAI с изображением и получения ответа
16
+ def ask_openai_with_image(instruction, image):
17
+ # Кодируем загруженное изображение в base64
 
 
 
 
18
  base64_image = encode_image_to_base64(image)
19
 
20
+ # Убираем пробелы с начала и конца инструкции
21
  instruction = instruction.strip()
22
 
23
+ # Создаем данные для запроса с закодированным изображением
 
 
 
24
  payload = {
25
  "model": "gpt-4-vision-preview",
26
  "messages": [
27
  {
28
  "role": "user",
29
+ "content": instruction,
30
+ },
31
+ {
32
+ "role": "system",
33
+ "content": f"data:image/jpeg;base64,{base64_image}",
 
 
 
 
 
 
 
 
34
  }
35
  ],
36
+ "max_tokens": 5095,
37
  }
38
 
39
+ # API ключ для OpenAI
40
+ api_key = os.getenv("API_KEY")
41
+
42
+ # Заголовки для запроса
43
+ headers = {
44
+ 'Authorization': f'Bearer {api_key}',
45
+ 'Content-Type': 'application/json',
46
+ }
47
+
48
+ # URL для запроса к API OpenAI
49
+ url = "https://api.openai.com/v1/chat/completions"
50
+
51
+ # Отправляем запрос в OpenAI
52
+ response = requests.post(url, headers=headers, json=payload)
53
 
54
+ # Проверяем ответ и возвращаем результат
55
  if response.status_code == 200:
56
  response_json = response.json()
 
57
  try:
58
+ # Пытаемся извлечь текст из ответа
59
  return response_json["choices"][0]["message"]["content"]
60
  except Exception as e:
61
+ # Если есть ошибка в структуре JSON, выводим ее
62
+ return f"Error processing the image response: {e}"
 
 
63
  else:
64
+ # Если произошла ошибка, возвращаем сообщение об ошибке
65
+ return f"Error: {response.status_code} - {response.text}"
66
+
67
+ # Создаем интерфейс с помощью Gradio
68
+ with gr.Blocks() as demo:
69
+ with gr.Row():
70
+ with gr.Column():
71
+ instructions = gr.Textbox(label="Instructions", placeholder="Enter the instructions here...")
72
+ image_input = gr.Image(label="Upload an image", type="pil")
73
+ submit_button = gr.Button("Submit")
74
+ with gr.Column():
75
+ output_markdown = gr.Markdown(label="AI Response")
76
+
77
+ submit_button.click(
78
+ fn=ask_openai_with_image,
79
+ inputs=[instructions, image_input],
80
+ outputs=[output_markdown]
81
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
+ demo.launch()