File size: 14,561 Bytes
1f5cba5
 
0e29657
4f25f4e
1f5cba5
305d4ff
3563dd6
c927679
4f25f4e
266fff4
9af3089
14fa0cc
 
0c482eb
 
 
2871b51
9450587
0c482eb
0e29657
0c482eb
 
 
0e29657
9afd718
e339dd2
0c482eb
 
 
838224c
0c482eb
 
 
838224c
0c482eb
 
 
 
305d4ff
0c482eb
 
 
 
 
9af3089
 
1f5cba5
9017277
 
1f5cba5
9017277
 
 
 
f74ec57
9017277
 
a14b206
9017277
 
0c482eb
9017277
133d76b
9017277
133d76b
 
9017277
133d76b
9017277
133d76b
 
9017277
0c482eb
9017277
 
133d76b
9017277
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4f25f4e
9af3089
 
0e29657
4f25f4e
 
9af3089
4f25f4e
9af3089
4f25f4e
0e29657
9af3089
 
7fb0070
4f25f4e
 
 
7fb0070
4f25f4e
 
 
 
 
 
3872131
4f25f4e
 
 
 
09b1a3d
 
9af3089
 
7fb0070
7c5f7b3
9af3089
7fb0070
726938a
7c5f7b3
7fb0070
e339dd2
9af3089
7fb0070
7c5f7b3
 
 
9af3089
7c5f7b3
 
 
0c482eb
 
4f25f4e
 
0c482eb
7c5f7b3
7fb0070
09b1a3d
 
 
 
0c482eb
abff174
03df343
 
 
 
abff174
 
7fb0070
 
838224c
4f25f4e
a59a680
 
 
 
 
9af3089
 
a59a680
14fa0cc
9af3089
726938a
 
a59a680
 
6e69a67
ce34e8f
9017277
6e69a67
 
ce34e8f
 
 
6e69a67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9017277
 
 
 
 
ce34e8f
9017277
 
 
 
6e69a67
 
9017277
 
 
 
6e69a67
 
ce34e8f
6e69a67
ce34e8f
9017277
6e69a67
 
14fa0cc
 
 
 
 
 
 
 
6e69a67
ce34e8f
9017277
6e69a67
 
ce34e8f
 
 
6e69a67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9017277
 
 
 
 
ce34e8f
9017277
 
 
 
6e69a67
 
9017277
 
 
 
6e69a67
 
ce34e8f
6e69a67
ce34e8f
9017277
6e69a67
 
4f25f4e
 
3872131
 
 
4f25f4e
9af3089
 
3872131
9af3089
3872131
 
 
 
 
 
28ad120
3872131
9af3089
 
 
 
3872131
 
 
9af3089
 
3872131
 
 
 
 
 
 
 
 
 
 
 
9af3089
3872131
 
 
4f25f4e
 
 
 
 
 
a14b206
4f25f4e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
# tools.py

import pandas as pd

from pathlib import Path
import requests
import regex as re
import time
import os
from duckduckgo_search import DDGS
from langchain_core.tools import tool
from langchain_community.document_loaders import WikipediaLoader, ArxivLoader

DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"


# Removed complex safety wrapper - keeping things simple

def _download_file_for_task(task_id: str, ext: str) -> str:
    """
    Helper: attempt to GET the remote file for a given task_id.
    Saves under ./hf_files/{task_id}.{ext}. Returns the local path if successful,
    or an empty string if no file / download failed.
    """
    
    print("reached _download_file_for_task")
    os.makedirs("hf_files", exist_ok=True)
    local_path = os.path.join("hf_files", f"{task_id}.{ext}")
    url = f"{DEFAULT_API_URL}/files/{task_id}"
    
    try:
        resp = requests.get(url, timeout=10)
        if resp.status_code == 200 and resp.content:
            print(f"Downloaded file from {url} to {local_path}")
            with open(local_path, "wb") as f:
                f.write(resp.content)
            return local_path
    except Exception:
        print(f"Error downloading file from {url} to {local_path}")
        pass

    # If we get here, either 404 or download error
    return ""

@tool
def image_tool(task_id: str) -> str:
    """
    Expects: task_id (str) — a valid image task ID.
    Returns: image caption from Hugging Face API or error message.
    """

    import requests, os

    # Try downloading image with one of the allowed extensions
    for ext in ("png", "jpg", "jpeg"):
        file_path = _download_file_for_task(task_id, ext)
        if file_path and os.path.exists(file_path):
            break
    else:
        return f"Error: Image file for task_id '{task_id}' not found."

    # Read the image bytes
    try:
        with open(file_path, "rb") as f:
            image_bytes = f.read()
    except Exception as e:
        return f"Error reading image: {str(e)}"

    # Load HF token
    hf_token = os.getenv("HF_TOKEN")
    if not hf_token:
        return "Error: HF_TOKEN not set in environment."

    # Use a single reliable model
    model = "Salesforce/blip-image-captioning-base"
    headers = {"Authorization": f"Bearer {hf_token}"}

    try:
        response = requests.post(
            f"https://api-inference.huggingface.co/models/{model}",
            headers=headers,
            files={"file": image_bytes},
            timeout=30
        )
    except Exception as e:
        return f"Error calling HuggingFace API: {e}"

    # Parse response
    if response.status_code != 200:
        return f"Error from model ({model}): {response.status_code} - {response.text}"

    try:
        result = response.json()
        if isinstance(result, list) and result:
            caption = result[0].get("generated_text", "").strip()
        elif isinstance(result, dict):
            caption = result.get("generated_text", "").strip()
        else:
            caption = ""
    except Exception as e:
        return f"Error parsing response: {e}"

    if not caption:
        return "No caption generated by model."

    return f"Image Caption:\n{caption}"




@tool
def excel_tool(task_id: str) -> str:
    """
    Downloads <task_id>.xlsx (if any) and returns a stringified list of
    records from the specified sheet.  No fallback to user-supplied tables.
    Expected keys in `task_id`:
        • task_id            – required (used to download the file)

    returns: stringified list of records from the specified sheet
    """
    print("reached excel_tool")
    sheet   = "Sheet1"

    local_xlsx = _download_file_for_task(task_id, "xlsx")
    if not local_xlsx or not os.path.exists(local_xlsx):
        return "Error: Excel file not found for this task."

    try:
        xls = pd.ExcelFile(local_xlsx)
        df  = pd.read_excel(
            xls,
            sheet_name=sheet if sheet and sheet in xls.sheet_names else xls.sheet_names[0]
        )
        print(f"Excel file read successfully: {str(df.to_dict(orient='records'))}")
        return str(df.to_dict(orient="records"))
    except Exception as e:
        return f"Error reading Excel file: {e}"
    

import openai
@tool
def audio_transcriber_tool(task_id: str) -> str:
    """
    LangGraph tool for transcribing audio via OpenAI's Whisper API.
    Expects: task_id is a string
    Returns:
    "<text or error message>"
    Always attempts to download the file for the given path or task ID.
    """
    print("reached audio_transcriber_tool")


    # Always attempt to download the file, regardless of local existence
    local_audio = ""
    for ext in ("mp3", "wav", "m4a"):
        candidate = _download_file_for_task(task_id, ext)
        if candidate:
            local_audio = candidate
            break

    if not local_audio or not os.path.exists(local_audio):
        return "Error: No audio file found (download failed)."
        

    # Send to OpenAI Whisper
    try:
        openai.api_key = os.getenv("OPENAI_API_KEY")
        if not openai.api_key:
            raise RuntimeError("OPENAI_API_KEY is not set in environment.")

        with open(local_audio, "rb") as audio_file:
            print("reached openai.audio.transcriptions.create")
            response = openai.audio.transcriptions.create(
                model="whisper-1", 
                file=audio_file,
            )
            print("reached response")
        text = response.text.strip()
    except Exception as e:
        text = f"Error during transcription: {e}"
    print(f"Transcripted as transcript: {text}")
    return text
# tools.py

import re
import requests

@tool
def wikipedia_search_tool(wiki_query: str) -> str:
    """
    Searches Wikipedia for the given query and returns the first 5 pages.
    Expects: wiki_query is a non‐empty string.
    Returns: text summary of first matching page or an error message>"
      
    If no valid wiki_query is provided, returns {}.
    """
    print(f"DEBUG: reached wikipedia_search_tool with query: {wiki_query}")
    try:
        docs = WikipediaLoader(query=wiki_query, load_max_docs=3).load()  # Reduced from 5 to 3
        print(f"DEBUG: WikipediaLoader returned {len(docs)} documents")
        
        result = ""
        counter = 1
        for doc in docs:
            print(f"DEBUG: Processing Wikipedia document {counter}")
            print(f"DEBUG: Document metadata: {doc.metadata}")
            print(f"DEBUG: Document content length: {len(doc.page_content)}")
            
            # Handle different metadata structures
            title = "Unknown Title"
            if hasattr(doc, 'metadata') and doc.metadata:
                # Try different possible title keys
                if 'title' in doc.metadata:
                    title = doc.metadata['title']
                elif 'Title' in doc.metadata:
                    title = doc.metadata['Title']
                elif 'source' in doc.metadata:
                    title = doc.metadata['source']
                else:
                    # Use first available key as title
                    if doc.metadata:
                        first_key = list(doc.metadata.keys())[0]
                        title = f"Wikipedia: {doc.metadata[first_key]}"
            
            print(f"DEBUG: Using Wikipedia title: {title}")
            
            # Trim content to key information only (reduced from 2000 to 800 characters)
            content = doc.page_content[:800] if len(doc.page_content) > 800 else doc.page_content
            
            # Add document but keep it concise
            result += f"\n\nWikipedia Result {counter}: {title}\nSummary: {content}..."
            counter += 1
            
            # Stop after 2 documents to keep response manageable
            if counter > 2:
                break
        
        if not result.strip():
            return "No Wikipedia results found for the given query. [END_OF_SEARCH]"
        
        # Add clear end marker
        result += "\n\n[END_OF_SEARCH] - Wikipedia search complete. Use this information to answer the question."
        
        print(f"DEBUG: Final Wikipedia result length: {len(result)}")
        return result
        
    except Exception as e:
        error_msg = f"Error during Wikipedia search: {str(e)} [END_OF_SEARCH]"
        print(f"DEBUG: {error_msg}")
        return error_msg
    
@tool
def arxiv_search_tool(arxiv_query: str) -> str:
    """
    Searches Arxiv for the given query and returns the first 5 pages.
    Expects: arxiv_query is a non‐empty string.
    Returns: text summary of first matching page or an error message>"
    """
    print(f"DEBUG: reached arxiv_search_tool with query: {arxiv_query}")
    try:
        docs = ArxivLoader(query=arxiv_query, load_max_docs=3).load()  # Reduced from 5 to 3
        print(f"DEBUG: ArxivLoader returned {len(docs)} documents")
        
        result = ""
        counter = 1
        for doc in docs:
            print(f"DEBUG: Processing document {counter}")
            print(f"DEBUG: Document metadata: {doc.metadata}")
            print(f"DEBUG: Document content length: {len(doc.page_content)}")
            
            # Handle different metadata structures
            title = "Unknown Title"
            if hasattr(doc, 'metadata') and doc.metadata:
                # Try different possible title keys
                if 'title' in doc.metadata:
                    title = doc.metadata['title']
                elif 'Title' in doc.metadata:
                    title = doc.metadata['Title']
                elif 'entry_id' in doc.metadata:
                    title = doc.metadata['entry_id']
                elif 'summary' in doc.metadata:
                    title = f"ArXiv Paper {counter}"
                else:
                    # Use first available key as title
                    if doc.metadata:
                        first_key = list(doc.metadata.keys())[0]
                        title = f"{first_key}: {doc.metadata[first_key]}"
            
            print(f"DEBUG: Using title: {title}")
            
            # Trim content to key information only (reduced from 2000 to 800 characters)
            content = doc.page_content[:800] if len(doc.page_content) > 800 else doc.page_content
            
            # Add document but keep it concise
            result += f"\n\nArXiv Result {counter}: {title}\nAbstract/Summary: {content}..."
            counter += 1
            
            # Stop after 2 documents to keep response manageable
            if counter > 2:
                break
        
        if not result.strip():
            return "No ArXiv results found for the given query. [END_OF_SEARCH]"
        
        # Add clear end marker
        result += "\n\n[END_OF_SEARCH] - ArXiv search complete. Use this information to answer the question."
        
        print(f"DEBUG: Final ArXiv result length: {len(result)}")
        return result
        
    except Exception as e:
        error_msg = f"Error during Arxiv search: {str(e)} [END_OF_SEARCH]"
        print(f"DEBUG: {error_msg}")
        return error_msg


from langchain_openai import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage
LLM = ChatOpenAI(model_name="gpt-4.1-mini", temperature=0.2)

@tool
def analyze_code_tool(task_id: str) -> str:
    """
    Either task_id  OR  (file + task_id)
    Reads the code (max 400 lines / 10 kB) and asks the LLM for:
        • plain-language summary
        • list of key functions/classes
        • obvious bugs or style smells
    Returns that analysis as a string.
    """
    print("reached analyze_code_tool")
    code_txt = ""
    if not task_id:
        code_txt = "No code provided."
    else:
        path = _download_file_for_task(task_id, "py")
        if not path:
            return "Error: .py file not found for this task."
        code_txt = Path(path).read_text(encoding="utf-8", errors="ignore")
    # else:
    #     return "Error: neither snippet nor file provided."

    # Truncate for safety
    lines = code_txt.splitlines()[:400]
    code_sample = "\n".join(lines)[:10_000]

    prompt = [
        SystemMessage(content="You are a senior Python code reviewer."),
        HumanMessage(content=(
            "Please analyse the following code. "
            "Summarise what it does, list key functions/classes, "
            "and point out any obvious bugs, performance issues or style problems.\n\n"
            f"```python\n{code_sample}\n```"
            "If you can then find the output of the code and return it in the output."
        ))
    ]
    return LLM.invoke(prompt).content.strip()


# def web_search_tool(state: AgentState) -> AgentState:
#     """
#     Expects: state["web_search_query"] is a non‐empty string.
#     Returns: {"web_search_query": None, "web_search_result": <string>}.
#     Retries up to 5 times on either a DuckDuckGo "202 Ratelimit" response or any exception (e.g. timeout).
#     """
#     print("reached web_search_tool")
#     query = state.get("web_search_query", "")
#     if not query:
#         return {}  # nothing to do

#     ddg = DDGS()
#     max_retries = 5
#     result_text = ""

#     for attempt in range(1, max_retries + 1):
#         try:
#             result_text = str(ddg.text(query, max_results=5))
#         except Exception as e:
#             # Network error or timeout—retry up to max_retries
#             if attempt < max_retries:
#                 print(f"web_search_tool: exception '{e}', retrying in 4 seconds ({attempt}/{max_retries})")
#                 time.sleep(4)
#                 continue
#             else:
#                 # Final attempt failed
#                 return {
#                     "web_search_query": None,
#                     "web_search_result": f"Error during DuckDuckGo search: {e}"
#                 }

#         # Check for DuckDuckGo rate‐limit indicator
#         if "202 Ratelimit" in result_text:
#             if attempt < max_retries:
#                 print(f"web_search_tool: received '202 Ratelimit', retrying in 4 seconds ({attempt}/{max_retries})")
#                 time.sleep(4)
#                 continue
#             else:
#                 # Final attempt still rate‐limited
#                 break

#         # Successful response (no exception and no rate‐limit text)
#         break

#     return {
#         "web_search_query": None,
#         "web_search_result": result_text
#     }