File size: 4,714 Bytes
8afb52b
c18be72
 
8a760ab
c18be72
 
 
8afb52b
c18be72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8afb52b
 
ee1ba96
 
 
 
 
 
 
 
 
8afb52b
c18be72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8afb52b
c18be72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8afb52b
 
c18be72
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
from huggingface_hub import login
from smolagents import OpenAIServerModel, WikipediaSearchTool,VisitWebpageTool, ToolCallingAgent,CodeAgent, WebSearchTool
from smolagents import tool,load_tool
import os
import pandas as pd
import mdpd
from io import StringIO


inference_model = OpenAIServerModel(
    model_id="meta-llama/Llama-3.1-70B-Instruct",
    api_base="http://localhost:8188/v1",
    api_key="EMPTY"
)
@tool
def youtube_transcript(video_url: str) -> str:
    """This is a tool that retrieves the transcript of a YouTube video.
    
    Args:
        video_url (str): The URL of the YouTube video.
    
    Returns:
        str: The transcript of the video.
    """
    from youtube_transcript_api import YouTubeTranscriptApi

    video_id = video_url.split("v=")[-1]
    ytt_api = YouTubeTranscriptApi()
    transcript = ytt_api.fetch(video_id)
    tr = "Transcript:\n"
    tr += "Unknown Speaker: "
    tr += "\nUnknown Speaker: ".join([item['text'] for item in transcript.to_raw_data()])
    return tr

@tool 
def parse_md_table(md_table: str) -> pd.DataFrame:
    """This is a tool that parses a markdown table into a pandas DataFrame.
    
    Args:
        md_table (str): The markdown table as a string.
    
    Returns:
        pd.DataFrame: The parsed DataFrame.
    """
    # Use mdpd to parse the markdown table
    df = mdpd.from_md(md_table)
    return df


@tool
def reverse_string(input_string: str) -> str:
    """This is a tool that given a string, returns the reversed string.
    
    Args:
        input_string (str): The string to reverse.
    
    """
    return input_string[::-1]

@tool 
def read_excel_file(file_path: str) -> pd.DataFrame:
    """This is a tool that reads an Excel file and returns its content as a pandas DataFrame.
    
    Args:
        file_path (str): The path to the Excel file.
    
    Returns:
        pd.DataFrame: The content of the Excel file as a DataFrame.
    """
    return pd.read_excel(file_path).to_markdown()

@tool
def sum_numbers(numbers: str) -> int:
    """This is a tool that sums a list of numbers provided as a comma-separated string.
    
    Args:
        numbers (str): A comma-separated string of numbers.
    
    Returns:
        int: The sum of the numbers.
    """
    number_list = [float(num.strip()) for num in numbers.split(",")]
    result = sum(number_list)
    return f"{result:.2f}"  # Return the sum formatted to two decimal places


# agent = ToolCallingAgent(
#     tools=[DuckDuckGoSearchTool(),reverse_string,VisitWebpageTool(),youtube_transcript,WikipediaSearchTool(),sum_numbers,read_excel_file], 
#     model=inference_model,
#     planning_interval=3,
#     max_steps=10,
#     instructions="You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.",
# )
agent =  CodeAgent(
    tools=[WebSearchTool(max_results=5),reverse_string,VisitWebpageTool(),youtube_transcript,parse_md_table,WikipediaSearchTool()], 
    model=inference_model,
    planning_interval=2,
    max_steps=10,
    add_base_tools=True,  
    instructions="You are a general AI assistant. I will ask you a question. YOU CAN NOT INTERACT WITH WEBSITES JUST READ THEM. A horse doctor is also a synonym of a equine veterinarian. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.",
    additional_authorized_imports=("pandas", "numpy", "io")
)

if __name__ == "__main__":
    t = youtube_transcript("https://www.youtube.com/watch?v=1htKBjuUWec")
    print(t)