guillaumefrd's picture
langgraph with openai + seperate llamaindex with subfolder
809f87e
from huggingface_hub import InferenceClient
from llama_index.core.tools import FunctionTool
# --- Functions --- #
def query_image(query: str, image_url: str) -> str:
"""Ask anything about an image using a Vision Language Model
Args:
query (str): the query about the image, e.g. how many persons are on the image?
image_url (str): the URL to the image
"""
client = InferenceClient(provider="nebius")
try:
completion = client.chat.completions.create(
# model="google/gemma-3-27b-it",
model="Qwen/Qwen2.5-VL-72B-Instruct",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": query
},
{
"type": "image_url",
"image_url": {
"url": image_url
}
}
]
}
],
max_tokens=512,
)
return completion.choices[0].message
except Exception as e:
return f"query_image failed: {e}"
def automatic_speech_recognition(file_url: str) -> str:
"""Transcribe an audio file to text
Args:
file_url (str): the URL to the audio file
"""
client = InferenceClient(provider="fal-ai")
try:
return client.automatic_speech_recognition(file_url, model="openai/whisper-large-v3")
except Exception as e:
return f"automatic_speech_recognition failed: {e}"
### --- Tool instance ---
query_image_tool = FunctionTool.from_defaults(query_image)
automatic_speech_recognition_tool = FunctionTool.from_defaults(automatic_speech_recognition)