|
import gradio as gr |
|
from duckduckgo_search import DDGS |
|
from transformers import pipeline |
|
from PIL import Image |
|
import requests |
|
from bs4 import BeautifulSoup |
|
import re |
|
import torch |
|
from io import BytesIO |
|
|
|
|
|
qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2") |
|
caption_pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") |
|
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") |
|
|
|
|
|
def search_web(query, max_results=3): |
|
with DDGS() as ddgs: |
|
results = ddgs.text(query, max_results=max_results) |
|
return "\n\n".join([f"**{r['title']}**\n{r['body']}\n{r['href']}" for r in results]) |
|
|
|
def explain_image(img): |
|
return caption_pipeline(img)[0]['generated_text'] |
|
|
|
def extract_text_from_url(url): |
|
try: |
|
res = requests.get(url, timeout=5) |
|
soup = BeautifulSoup(res.text, 'html.parser') |
|
|
|
for script in soup(["script", "style"]): script.extract() |
|
text = soup.get_text(separator=' ') |
|
clean_text = re.sub(r'\s+', ' ', text) |
|
return clean_text[:3000] |
|
except Exception as e: |
|
return f"Failed to extract text: {str(e)}" |
|
|
|
def summarize_url(url): |
|
text = extract_text_from_url(url) |
|
if len(text) > 100: |
|
summary = summarizer(text[:1024])[0]['summary_text'] |
|
return summary |
|
return "Not enough text to summarize." |
|
|
|
|
|
def ai_agent(input_text, image=None, url=None): |
|
results = [] |
|
|
|
|
|
if image: |
|
results.append("πΌοΈ **Image Explanation:**\n" + explain_image(image)) |
|
|
|
|
|
if url: |
|
if "youtube.com" in url or "youtu.be" in url: |
|
results.append("πΉ **Video URL detected.** Currently only summaries of page content are available.") |
|
results.append("π **Webpage Summary:**\n" + summarize_url(url)) |
|
|
|
|
|
if input_text: |
|
if len(input_text.split()) > 10: |
|
web_results = search_web(input_text) |
|
results.append("π **Web Search Results:**\n" + web_results) |
|
else: |
|
results.append("π§ **Answer:**\n" + search_web(input_text)) |
|
|
|
return "\n\n---\n\n".join(results) |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("## ππ§ Multi-Modal AI Agent (Web + Image + URL)") |
|
with gr.Row(): |
|
input_text = gr.Textbox(label="Ask a Question", lines=2, placeholder="E.g. What are the latest AI trends?") |
|
image = gr.Image(type="pil", label="Upload an Image (optional)") |
|
url = gr.Textbox(label="Provide a URL (optional)", placeholder="https://example.com") |
|
submit = gr.Button("Get Answer") |
|
output = gr.Markdown() |
|
|
|
submit.click(fn=ai_agent, inputs=[input_text, image, url], outputs=output) |
|
|
|
demo.launch() |