File size: 4,607 Bytes
11fa05c 23ab8fa 11fa05c 23ab8fa 11fa05c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
# -*- coding: utf-8 -*-
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.agents import AgentType
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
import os
from langchain.chat_models import AzureChatOpenAI
#llm_fy = OpenAI(model_name="text-davinci-003", max_tokens=1024) #用来翻译的,已经用LLMChain替换了
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENAI_API_BASE = os.getenv("OPENAI_API_BASE")
#llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0, model_name='gpt-3.5-turbo',openai_api_base=OPENAI_API_BASE)
llm = AzureChatOpenAI(deployment_name="bitservice_chat_35",openai_api_base=OPENAI_API_BASE,openai_api_key=OPENAI_API_KEY,openai_api_version="2023-03-15-preview",model_name="gpt-3.5-turbo")
import torch
from transformers import BlipProcessor, BlipForConditionalGeneration
image_to_text_model = "Salesforce/blip-image-captioning-large"
device = 'cuda' if torch.cuda.is_available() else 'cpu'
processor = BlipProcessor.from_pretrained(image_to_text_model)
model = BlipForConditionalGeneration.from_pretrained(image_to_text_model).to(device)
from transformers.models.oneformer.modeling_oneformer import OneFormerModelOutput
import requests
from PIL import Image
def describeImageByUrl(image_url):
image_object = Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
# image
inputs = processor(image_object, return_tensors="pt").to(device)
outputs = model.generate(**inputs)
describe = processor.decode(outputs[0], skip_special_tokens=True)
return describe
def describeImageByPath(image_path):
image_object = Image.open(image_path).convert('RGB')
# image
inputs = processor(image_object, return_tensors="pt").to(device)
outputs = model.generate(**inputs)
describe = processor.decode(outputs[0], skip_special_tokens=True)
return describe
#description = describeImageByUrl('https://img0.baidu.com/it/u=4190066402,1916608022&fm=253&fmt=auto&app=120&f=JPEG?w=1280&h=800')
#description
from langchain.tools import BaseTool
class DescribeImageTool(BaseTool):
name = "Describe Image Tool"
description = 'use this tool to describe an image.'
def _run(self, url: str):
#description = describeImageByUrl(url)
description = describeImageByPath(url)
return description
def _arun(self, query: str):
raise NotImplementedError("Async operation not supported yet")
tools = [DescribeImageTool()]
agent = initialize_agent(
agent='chat-conversational-react-description',
tools=tools,
llm=llm,
verbose=True,
max_iterations=3,
early_stopping_method='generate',
memory=ConversationBufferWindowMemory(
memory_key='chat_history',
k=5,
return_messages=True
)
)
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
def enToChinese(english):
#ch = llm_fy("Please translate the following sentence from English to Chinese:"+english)
#return ch
pp = "Please translate the following sentence from English to Chinese:{english}"
prompt = PromptTemplate(
input_variables=["english"],
template=pp
)
llchain=LLMChain(llm=llm,prompt=prompt)
return llchain.run(english)
def chToEnglish(chinese):
#en = llm_fy("Please translate the following sentence from Chinese to English:"+chinese)
#return en
pp = "Please translate the following sentence from Chinese to English:{chinese}"
prompt = PromptTemplate(
input_variables=["chinese"],
template=pp
)
llchain=LLMChain(llm=llm,prompt=prompt)
return llchain.run(chinese)
#image_url = 'https://img0.baidu.com/it/u=4190066402,1916608022&fm=253&fmt=auto&app=120&f=JPEG?w=1280&h=800'
#agent(f"Describe the following image:\n{image_url}")
#en_result = agent(f"描述下面这张图片:\n{image_url}")['output']
#print(enToChinese(en_result))
#agent(f"What is the brand of car in the following image:\n{image_url}")
#en_result = agent(f"下面这张图片的汽车品牌是什么:\n{image_url}")['output']
#print(enToChinese(en_result))
def imageAnalyse(image_path,question):
question = question.strip();
if len(question) ==0:
question = "请描述这张图片"
print("question:"+question)
en_result = agent(f"{question}:\n{image_path}")['output']
print("en_result:"+en_result)
ch_result = enToChinese(en_result)
print("ch_result:"+ch_result)
return ch_result |