|
from smolagents import VisitWebpageTool, InferenceClientModel, CodeAgent, WebSearchTool, tool |
|
from flask import Flask, request, jsonify |
|
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig |
|
from crawl4ai.deep_crawling import BFSDeepCrawlStrategy |
|
from crawl4ai.content_scraping_strategy import LXMLWebScrapingStrategy |
|
from system_prompt import get_system_prompt |
|
import xml.etree.ElementTree as ET |
|
|
|
app = Flask(__name__) |
|
|
|
model = InferenceClientModel(model_id="meta-llama/Llama-3.3-70B-Instruct", provider="together") |
|
crawler_config = CrawlerRunConfig( |
|
deep_crawl_strategy=BFSDeepCrawlStrategy( |
|
max_depth=0, |
|
include_external=False |
|
), |
|
scraping_strategy=LXMLWebScrapingStrategy(), |
|
verbose=True |
|
) |
|
|
|
def extract_vehicle_info_as_string(adf_xml): |
|
root = ET.fromstring(adf_xml) |
|
|
|
|
|
vehicle = root.find('.//vehicle') |
|
|
|
if vehicle is not None: |
|
year = vehicle.find('year').text if vehicle.find('year') is not None else "" |
|
make = vehicle.find('make').text if vehicle.find('make') is not None else "" |
|
model = vehicle.find('model').text if vehicle.find('model') is not None else "" |
|
vehicle_info = f"{year} {make} {model}".strip() |
|
|
|
|
|
first_name = "" |
|
name_element = root.find('.//name[@part="first"]') |
|
if name_element is not None: |
|
first_name = name_element.text.strip() if name_element.text else "" |
|
return first_name, vehicle_info |
|
|
|
def safe_get_attr(obj, attr, default=None): |
|
try: |
|
return getattr(obj, attr, default) |
|
except (AttributeError, TypeError): |
|
return default |
|
|
|
@tool |
|
def custom_site_crawler(website_url: str) -> str: |
|
""" |
|
Crawl the car gurus site for the specific dealership and return the markdown content or crawl the dealership website and return the markdown content. Use this when the user asks about the car or dealership and cannot be answered by the ADF Lead. |
|
|
|
Args: |
|
website_url: The url of the website (car gurus or dealership website) to crawl. |
|
|
|
Returns: |
|
A string of markdown content of the website. |
|
""" |
|
import asyncio |
|
|
|
async def _crawl_site(url): |
|
async with AsyncWebCrawler() as crawler: |
|
try: |
|
result_container = await crawler.arun(url, config=crawler_config) |
|
|
|
|
|
if hasattr(result_container, '__iter__') and not isinstance(result_container, str): |
|
try: |
|
|
|
result = next(iter(result_container)) |
|
except (StopIteration, TypeError): |
|
result = result_container |
|
else: |
|
result = result_container |
|
|
|
markdown_result = safe_get_attr(result, 'markdown', None) |
|
if markdown_result: |
|
raw_markdown = safe_get_attr(markdown_result, 'raw_markdown', '') |
|
return raw_markdown if raw_markdown else "" |
|
else: |
|
return "" |
|
except Exception as e: |
|
print(f"Error crawling {url}: {e}") |
|
return "" |
|
|
|
return asyncio.run(_crawl_site(website_url)) |
|
|
|
agent = CodeAgent(tools=[VisitWebpageTool(), WebSearchTool(), custom_site_crawler], model=model, additional_authorized_imports=["xml.etree.ElementTree"]) |
|
|
|
|
|
dealership_phone = "(513) 800-0805" |
|
car_gurus_site = "https://www.cargurus.com/Cars/m-Ohio-Cars-sp458596" |
|
car_site = "https://www.Ohiocars.com" |
|
adf_lead = "<?xml version=\"1.0\"?><?ADF version=\"1.0\"?><adf><prospect><requestdate>2025-05-12T13:59:30</requestdate><vehicle status=\"used\"><id source=\"CarsForSale.com\">16f3114e-825f-4eb0-8165-ce43fe5143b6</id><year>2016</year><make>Toyota</make><model>Corolla</model><vin>5YFBURHE4GP511115</vin><stock></stock><comments>DP</comments><colorcombination><exteriorcolor>Super White</exteriorcolor></colorcombination><miles>131024.0</miles><price type=\"asking\">9950</price></vehicle><customer><contact><name part=\"first\">Test</name><name part=\"last\">Lead</name><name part=\"full\">Test Lead</name><email>[email protected]</email><phone>2582584568</phone><address><city></city><state></state><postalcode></postalcode></address></contact><comments><![CDATA[I'm interested and want to know more about the 2016 Toyota Corolla S Plus you have listed for $9,950 on Cars For Sale.]]></comments><timeframe><description></description></timeframe></customer><provider><id>19971</id><name part=\"full\">Carsforsale.com</name><service>Carsforsale.com</service><phone>866-388-9778</phone></provider><vendor><id>114483</id><vendorname>Ohio Cars</vendorname></vendor></prospect></adf>" |
|
first_name, vehicle_info = extract_vehicle_info_as_string(adf_lead) |
|
|
|
agent.prompt_templates["system_prompt"] = agent.prompt_templates["system_prompt"] + get_system_prompt(car_gurus_site, car_site, adf_lead, dealership_phone) |
|
|
|
@app.route('/chat', methods=['POST']) |
|
def chat(): |
|
""" |
|
Main chat endpoint to interact with the agent. |
|
Expects JSON payload with 'message' field. |
|
""" |
|
try: |
|
data = request.get_json() |
|
if not data or 'message' not in data: |
|
return jsonify({'error': 'Missing message field in request'}), 400 |
|
|
|
message = data['message'] |
|
response = agent.run(message) |
|
|
|
return jsonify({ |
|
'response': response, |
|
'status': 'success' |
|
}) |
|
|
|
except Exception as e: |
|
return jsonify({ |
|
'error': str(e), |
|
'status': 'error' |
|
}), 500 |
|
|
|
@app.route('/crawl', methods=['POST']) |
|
def crawl_website(): |
|
""" |
|
Endpoint to directly crawl a website using the custom site crawler. |
|
Expects JSON payload with 'url' field. |
|
""" |
|
try: |
|
data = request.get_json() |
|
if not data or 'url' not in data: |
|
return jsonify({'error': 'Missing url field in request'}), 400 |
|
|
|
url = data['url'] |
|
content = custom_site_crawler(url) |
|
|
|
return jsonify({ |
|
'content': content, |
|
'url': url, |
|
'status': 'success' |
|
}) |
|
|
|
except Exception as e: |
|
return jsonify({ |
|
'error': str(e), |
|
'status': 'error' |
|
}), 500 |
|
|
|
@app.route('/health', methods=['GET']) |
|
def health_check(): |
|
"""Health check endpoint.""" |
|
return jsonify({'status': 'healthy'}) |
|
|
|
if __name__ == '__main__': |
|
app.run(debug=True, host='0.0.0.0', port=4000) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|