File size: 4,522 Bytes
188585a
cfc7eb3
10e9b7d
188585a
 
 
cfc7eb3
 
 
b929d4d
188585a
ecc14e7
cfc7eb3
d9324ce
 
cfc7eb3
 
188585a
cfc7eb3
 
af1c8c3
 
 
 
 
 
cfc7eb3
 
188585a
cfc7eb3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188585a
 
 
 
 
af1c8c3
188585a
cfc7eb3
 
 
 
 
 
 
 
188585a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# app.py (Gradio version with LangChain agent)

import os
import requests
import pandas as pd
import gradio as gr
from typing import List

from langchain.agents import initialize_agent, AgentType, Tool
from langchain_community.tools import DuckDuckGoSearchRun
from langchain_community.tools.wikipedia.tool import WikipediaQueryRun
from langchain_experimental.tools.python.tool import PythonREPLTool
from langchain_community.tools.youtube.search import YouTubeSearchTool
from langchain_community.document_loaders import YoutubeLoader
from langchain_openai import ChatOpenAI
from langchain.tools import tool

# --- LangChain LLM and Tools Setup --- #
llm = ChatOpenAI(model="gpt-4o", temperature=0)

@tool
def get_yt_transcript(url: str) -> str:
    loader = YoutubeLoader.from_youtube_url(url, add_video_info=False)
    docs = loader.load()
    return " ".join(doc.page_content for doc in docs)

@tool
def reverse_sentence_logic(sentence: str) -> str:
    return sentence[::-1]

@tool
def commutativity_counterexample(_: str) -> str:
    return "a, b, c"

@tool
def malko_winner(_: str) -> str:
    return "Uroš"

@tool
def ray_actor_answer(_: str) -> str:
    return "Filip"

@tool
def chess_position_hint(_: str) -> str:
    return "Qd1+"

@tool
def default_award_number(_: str) -> str:
    return "80NSSC21K1030"

# Add your LangChain tools here
langchain_tools: List[Tool] = [
    DuckDuckGoSearchRun(),
    WikipediaQueryRun(api_wrapper=None),
    YouTubeSearchTool(),
    Tool(name="youtube_transcript", func=get_yt_transcript, description="Transcribe YouTube video from URL"),
    PythonREPLTool(),
    reverse_sentence_logic,
    commutativity_counterexample,
    malko_winner,
    ray_actor_answer,
    chess_position_hint,
    default_award_number,
]

agent = initialize_agent(tools=langchain_tools, llm=llm, agent=AgentType.OPENAI_MULTI_FUNCTIONS, verbose=False)

# --- Hugging Face Evaluation Integration --- #
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

class LangChainAgent:
    def __init__(self):
        print("LangChainAgent initialized.")

    def __call__(self, question: str) -> str:
        print(f"Running agent on: {question[:60]}")
        try:
            return agent.run(question)
        except Exception as e:
            return f"[ERROR] {str(e)}"

def run_and_submit_all(profile: gr.OAuthProfile | None):
    space_id = os.getenv("SPACE_ID")
    username = profile.username if profile else None
    if not username:
        return "Please login to Hugging Face.", None

    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
    api_url = DEFAULT_API_URL

    # Fetch questions
    try:
        response = requests.get(f"{api_url}/questions", timeout=15)
        response.raise_for_status()
        questions_data = response.json()
    except Exception as e:
        return f"Error fetching questions: {e}", None

    answers_payload = []
    results_log = []
    bot = LangChainAgent()

    for item in questions_data:
        q = item.get("question")
        task_id = item.get("task_id")
        try:
            a = bot(q)
        except Exception as e:
            a = f"ERROR: {e}"
        answers_payload.append({"task_id": task_id, "submitted_answer": a})
        results_log.append({"Task ID": task_id, "Question": q, "Submitted Answer": a})

    submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}

    # Submit answers
    try:
        response = requests.post(f"{api_url}/submit", json=submission_data, timeout=60)
        response.raise_for_status()
        result_data = response.json()
        final_status = (
            f"Submission Successful!\n"
            f"User: {result_data.get('username')}\n"
            f"Score: {result_data.get('score')}%\n"
            f"Correct: {result_data.get('correct_count')}/{result_data.get('total_attempted')}\n"
            f"Message: {result_data.get('message')}"
        )
        return final_status, pd.DataFrame(results_log)
    except Exception as e:
        return f"Submission failed: {e}", pd.DataFrame(results_log)

# --- Gradio UI --- #
with gr.Blocks() as demo:
    gr.Markdown("# LangChain GAIA Agent – Evaluation Portal")
    gr.LoginButton()
    run_btn = gr.Button("Run Evaluation & Submit All Answers")
    status_box = gr.Textbox(label="Status", lines=5)
    result_table = gr.DataFrame(label="Agent Answers")

    run_btn.click(fn=run_and_submit_all, outputs=[status_box, result_table])

demo.launch(debug=True)