Commit
·
82e5cca
1
Parent(s):
26aec96
get_youtube_transcript + use o4-mini
Browse files- langgraph_dir/agent.py +8 -2
- langgraph_dir/config.py +3 -5
- langgraph_dir/custom_tools.py +24 -1
- requirements.txt +4 -1
langgraph_dir/agent.py
CHANGED
|
@@ -11,7 +11,8 @@ from langchain_community.tools import BraveSearch
|
|
| 11 |
|
| 12 |
from .prompt import system_prompt
|
| 13 |
from .custom_tools import (multiply, add, subtract, divide, modulus, power,
|
| 14 |
-
query_image, automatic_speech_recognition, get_webpage_content, python_repl_tool
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
class LangGraphAgent:
|
|
@@ -21,7 +22,11 @@ class LangGraphAgent:
|
|
| 21 |
show_prompt=True):
|
| 22 |
|
| 23 |
# =========== LLM definition ===========
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
print(f"LangGraphAgent initialized with model \"{model_name}\"")
|
| 26 |
|
| 27 |
# =========== Augment the LLM with tools ===========
|
|
@@ -36,6 +41,7 @@ class LangGraphAgent:
|
|
| 36 |
automatic_speech_recognition, # Transcribe an audio file to text
|
| 37 |
get_webpage_content, # Load a web page and return it to markdown
|
| 38 |
python_repl_tool, # Python code interpreter
|
|
|
|
| 39 |
]
|
| 40 |
|
| 41 |
tools = community_tools + custom_tools
|
|
|
|
| 11 |
|
| 12 |
from .prompt import system_prompt
|
| 13 |
from .custom_tools import (multiply, add, subtract, divide, modulus, power,
|
| 14 |
+
query_image, automatic_speech_recognition, get_webpage_content, python_repl_tool,
|
| 15 |
+
get_youtube_transcript)
|
| 16 |
|
| 17 |
|
| 18 |
class LangGraphAgent:
|
|
|
|
| 22 |
show_prompt=True):
|
| 23 |
|
| 24 |
# =========== LLM definition ===========
|
| 25 |
+
if model_name.startswith('o'):
|
| 26 |
+
# reasoning model (no temperature setting)
|
| 27 |
+
llm = ChatOpenAI(model=model_name) # needs OPENAI_API_KEY in env
|
| 28 |
+
else:
|
| 29 |
+
llm = ChatOpenAI(model=model_name, temperature=0)
|
| 30 |
print(f"LangGraphAgent initialized with model \"{model_name}\"")
|
| 31 |
|
| 32 |
# =========== Augment the LLM with tools ===========
|
|
|
|
| 41 |
automatic_speech_recognition, # Transcribe an audio file to text
|
| 42 |
get_webpage_content, # Load a web page and return it to markdown
|
| 43 |
python_repl_tool, # Python code interpreter
|
| 44 |
+
get_youtube_transcript, # Get the transcript of a YouTube video
|
| 45 |
]
|
| 46 |
|
| 47 |
tools = community_tools + custom_tools
|
langgraph_dir/config.py
CHANGED
|
@@ -1,6 +1,4 @@
|
|
| 1 |
-
# OPENAI_MODEL_NAME = "gpt-4.1-nano"
|
| 2 |
-
OPENAI_MODEL_NAME = "gpt-4.1-mini"
|
| 3 |
# OPENAI_MODEL_NAME = "gpt-4.1"
|
| 4 |
-
|
| 5 |
-
# QUERY_IMAGE_MODEL_NAME = "gpt-4.1-mini"
|
| 6 |
-
QUERY_IMAGE_MODEL_NAME = "o4-mini"
|
|
|
|
| 1 |
+
# OPENAI_MODEL_NAME = "gpt-4.1-nano"
|
| 2 |
+
# OPENAI_MODEL_NAME = "gpt-4.1-mini"
|
| 3 |
# OPENAI_MODEL_NAME = "gpt-4.1"
|
| 4 |
+
OPENAI_MODEL_NAME = "o4-mini"
|
|
|
|
|
|
langgraph_dir/custom_tools.py
CHANGED
|
@@ -8,6 +8,8 @@ from langchain_core.tools import tool, Tool
|
|
| 8 |
from langchain_experimental.utilities import PythonREPL
|
| 9 |
from pypdf import PdfReader
|
| 10 |
from io import BytesIO
|
|
|
|
|
|
|
| 11 |
|
| 12 |
|
| 13 |
# --- Basic operations --- #
|
|
@@ -241,4 +243,25 @@ You are allowed to download files from URLs.
|
|
| 241 |
Do NOT send commands that block indefinitely (e.g., `input()`).""",
|
| 242 |
func=python_repl.run,
|
| 243 |
args_schema=PythonREPLInput
|
| 244 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
from langchain_experimental.utilities import PythonREPL
|
| 9 |
from pypdf import PdfReader
|
| 10 |
from io import BytesIO
|
| 11 |
+
from youtube_transcript_api import YouTubeTranscriptApi
|
| 12 |
+
from pytube import extract
|
| 13 |
|
| 14 |
|
| 15 |
# --- Basic operations --- #
|
|
|
|
| 243 |
Do NOT send commands that block indefinitely (e.g., `input()`).""",
|
| 244 |
func=python_repl.run,
|
| 245 |
args_schema=PythonREPLInput
|
| 246 |
+
)
|
| 247 |
+
|
| 248 |
+
@tool
|
| 249 |
+
def get_youtube_transcript(page_url: str) -> str:
|
| 250 |
+
"""Get the transcript of a YouTube video
|
| 251 |
+
|
| 252 |
+
Args:
|
| 253 |
+
page_url (str): YouTube URL of the video
|
| 254 |
+
"""
|
| 255 |
+
try:
|
| 256 |
+
# get video ID from URL
|
| 257 |
+
video_id = extract.video_id(page_url)
|
| 258 |
+
|
| 259 |
+
# get transcript
|
| 260 |
+
ytt_api = YouTubeTranscriptApi()
|
| 261 |
+
transcript = ytt_api.fetch(video_id)
|
| 262 |
+
|
| 263 |
+
# keep only text
|
| 264 |
+
txt = '\n'.join([s.text for s in transcript.snippets])
|
| 265 |
+
return txt
|
| 266 |
+
except Exception as e:
|
| 267 |
+
return f"get_youtube_transcript failed: {e}"
|
requirements.txt
CHANGED
|
@@ -12,4 +12,7 @@ langchain-community
|
|
| 12 |
duckduckgo-search
|
| 13 |
markdownify
|
| 14 |
beautifulsoup4
|
| 15 |
-
langchain_experimental
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
duckduckgo-search
|
| 13 |
markdownify
|
| 14 |
beautifulsoup4
|
| 15 |
+
langchain_experimental
|
| 16 |
+
pypdf
|
| 17 |
+
youtube-transcript-api
|
| 18 |
+
pytube
|