# -*- coding: utf-8 -*- from langchain.agents import load_tools from langchain.agents import initialize_agent from langchain.agents import AgentType from langchain.llms import OpenAI from langchain.chat_models import ChatOpenAI from langchain.chains.conversation.memory import ConversationBufferWindowMemory import os from langchain.chat_models import AzureChatOpenAI #llm_fy = OpenAI(model_name="text-davinci-003", max_tokens=1024) #用来翻译的,已经用LLMChain替换了 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") OPENAI_API_BASE = os.getenv("OPENAI_API_BASE") #llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0, model_name='gpt-3.5-turbo',openai_api_base=OPENAI_API_BASE) llm = AzureChatOpenAI(deployment_name="bitservice_chat_35",openai_api_base=OPENAI_API_BASE,openai_api_key=OPENAI_API_KEY,openai_api_version="2023-03-15-preview",model_name="gpt-3.5-turbo") import torch from transformers import BlipProcessor, BlipForConditionalGeneration image_to_text_model = "Salesforce/blip-image-captioning-large" device = 'cuda' if torch.cuda.is_available() else 'cpu' processor = BlipProcessor.from_pretrained(image_to_text_model) model = BlipForConditionalGeneration.from_pretrained(image_to_text_model).to(device) from transformers.models.oneformer.modeling_oneformer import OneFormerModelOutput import requests from PIL import Image def describeImageByUrl(image_url): image_object = Image.open(requests.get(image_url, stream=True).raw).convert('RGB') # image inputs = processor(image_object, return_tensors="pt").to(device) outputs = model.generate(**inputs) describe = processor.decode(outputs[0], skip_special_tokens=True) return describe def describeImageByPath(image_path): image_object = Image.open(image_path).convert('RGB') # image inputs = processor(image_object, return_tensors="pt").to(device) outputs = model.generate(**inputs) describe = processor.decode(outputs[0], skip_special_tokens=True) return describe #description = describeImageByUrl('https://img0.baidu.com/it/u=4190066402,1916608022&fm=253&fmt=auto&app=120&f=JPEG?w=1280&h=800') #description from langchain.tools import BaseTool class DescribeImageTool(BaseTool): name = "Describe Image Tool" description = 'use this tool to describe an image.' def _run(self, url: str): #description = describeImageByUrl(url) description = describeImageByPath(url) return description def _arun(self, query: str): raise NotImplementedError("Async operation not supported yet") tools = [DescribeImageTool()] agent = initialize_agent( agent='chat-conversational-react-description', tools=tools, llm=llm, verbose=True, max_iterations=3, early_stopping_method='generate', memory=ConversationBufferWindowMemory( memory_key='chat_history', k=5, return_messages=True ) ) from langchain.chains import LLMChain from langchain.prompts import PromptTemplate def enToChinese(english): #ch = llm_fy("Please translate the following sentence from English to Chinese:"+english) #return ch pp = "Please translate the following sentence from English to Chinese:{english}" prompt = PromptTemplate( input_variables=["english"], template=pp ) llchain=LLMChain(llm=llm,prompt=prompt) return llchain.run(english) def chToEnglish(chinese): #en = llm_fy("Please translate the following sentence from Chinese to English:"+chinese) #return en pp = "Please translate the following sentence from Chinese to English:{chinese}" prompt = PromptTemplate( input_variables=["chinese"], template=pp ) llchain=LLMChain(llm=llm,prompt=prompt) return llchain.run(chinese) #image_url = 'https://img0.baidu.com/it/u=4190066402,1916608022&fm=253&fmt=auto&app=120&f=JPEG?w=1280&h=800' #agent(f"Describe the following image:\n{image_url}") #en_result = agent(f"描述下面这张图片:\n{image_url}")['output'] #print(enToChinese(en_result)) #agent(f"What is the brand of car in the following image:\n{image_url}") #en_result = agent(f"下面这张图片的汽车品牌是什么:\n{image_url}")['output'] #print(enToChinese(en_result)) def imageAnalyse(image_path,question): question = question.strip(); if len(question) ==0: question = "请描述这张图片" print("question:"+question) en_result = agent(f"{question}:\n{image_path}")['output'] print("en_result:"+en_result) ch_result = enToChinese(en_result) print("ch_result:"+ch_result) return ch_result