wt002 commited on
Commit
2c66cb4
·
verified ·
1 Parent(s): 73ff364

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +170 -15
app.py CHANGED
@@ -1,34 +1,189 @@
1
  import os
2
  import gradio as gr
3
- from dotenv import load_dotenv
4
- import inspect
5
  import requests
 
6
  import pandas as pd
7
- from langchain_core.messages import HumanMessage
8
- from agent import build_graph
 
 
 
 
 
9
 
10
- load_dotenv()
11
 
12
  # (Keep Constants as is)
13
  # --- Constants ---
14
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
15
 
16
- # --- Basic Agent Definition ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  class BasicAgent:
19
- """A langgraph agent."""
20
  def __init__(self):
21
- print("BasicAgent initialized.")
22
- self.graph = build_graph()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  def __call__(self, question: str) -> str:
25
  print(f"Agent received question (first 50 chars): {question[:50]}...")
26
- # Wrap the question in a HumanMessage from langchain_core
27
- messages = [HumanMessage(content=question)]
28
- messages = self.graph.invoke({"messages": messages})
29
- answer = messages['messages'][-1].content
30
- return answer[14:]
31
-
32
 
33
  def run_and_submit_all( profile: gr.OAuthProfile | None):
34
  """
 
1
  import os
2
  import gradio as gr
 
 
3
  import requests
4
+ import inspect
5
  import pandas as pd
6
+ from smolagents import tool, Tool, CodeAgent, DuckDuckGoSearchTool, HfApiModel, WikipediaSearchTool, VisitWebpageTool, SpeechToTextTool, FinalAnswerTool
7
+ from dotenv import load_dotenv
8
+ import heapq
9
+ from collections import Counter
10
+ import re
11
+ from io import BytesIO
12
+ from youtube_transcript_api import YouTubeTranscriptApi
13
 
 
14
 
15
  # (Keep Constants as is)
16
  # --- Constants ---
17
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
18
 
19
+ #Load environment variables
20
+ load_dotenv()
21
+
22
+ class StringReverseTool(Tool):
23
+ name = "reverse_message"
24
+ description = "When you received a strange text, try to reverse it and perform action described in reversed message."
25
+ inputs = {
26
+ "message": {
27
+ "type": "string",
28
+ "description": "A message, which looks like strange and can be reversed to get actions to execute."
29
+ }
30
+ }
31
+ output_type = "string"
32
+
33
+ def __init__(self):
34
+ return
35
+
36
+ def forward(self, message: str):
37
+ return message[::-1]
38
+
39
+ class KeywordsExtractorTool(Tool):
40
+ """Extracts top 5 keywords from a given text based on frequency."""
41
+
42
+ name = "keywords_extractor"
43
+ description = "This tool returns the 5 most frequent keywords occur in provided block of text."
44
+
45
+ inputs = {
46
+ "text": {
47
+ "type": "string",
48
+ "description": "Text to analyze for keywords.",
49
+ }
50
+ }
51
+ output_type = "string"
52
+
53
+ def forward(self, text: str) -> str:
54
+ try:
55
+ all_words = re.findall(r'\b\w+\b', text.lower())
56
+ conjunctions = {'a', 'and', 'of', 'is', 'in', 'to', 'the'}
57
+ filtered_words = []
58
+ for w in all_words:
59
+ if w not in conjunctions:
60
+ filtered_words.push(w)
61
+ word_counts = Counter(filtered_words)
62
+ k = 5
63
+ return heapq.nlargest(k, word_counts.items(), key=lambda x: x[1])
64
+ except Exception as e:
65
+ return f"Error during extracting most common words: {e}"
66
+
67
+ @tool
68
+ def parse_excel_to_json(task_id: str) -> dict:
69
+ """
70
+ For a given task_id fetch and parse an Excel file and save parsed data in structured JSON file.
71
+ Args:
72
+ task_id: An task ID to fetch.
73
+
74
+ Returns:
75
+ {
76
+ "task_id": str,
77
+ "sheets": {
78
+ "SheetName1": [ {col1: val1, col2: val2, ...}, ... ],
79
+ ...
80
+ },
81
+ "status": "Success" | "Error"
82
+ }
83
+ """
84
+ url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
85
+
86
+ try:
87
+ response = requests.get(url, timeout=100)
88
+ if response.status_code != 200:
89
+ return {"task_id": task_id, "sheets": {}, "status": f"{response.status_code} - Failed"}
90
+
91
+ xls_content = pd.ExcelFile(BytesIO(response.content))
92
+ json_sheets = {}
93
+
94
+ for sheet in xls_content.sheet_names:
95
+ df = xls_content.parse(sheet)
96
+ df = df.dropna(how="all")
97
+ rows = df.head(20).to_dict(orient="records")
98
+ json_sheets[sheet] = rows
99
+
100
+ return {
101
+ "task_id": task_id,
102
+ "sheets": json_sheets,
103
+ "status": "Success"
104
+ }
105
+
106
+ except Exception as e:
107
+ return {
108
+ "task_id": task_id,
109
+ "sheets": {},
110
+ "status": f"Error in parsing Excel file: {str(e)}"
111
+ }
112
+
113
+ class VideoTranscriptionTool(Tool):
114
+ """Fetch transcripts from YouTube videos"""
115
+ name = "transcript_video"
116
+ description = "Fetch text transcript from YouTube movies with optional timestamps"
117
+ inputs = {
118
+ "url": {"type": "string", "description": "YouTube video URL or ID"},
119
+ "include_timestamps": {"type": "boolean", "description": "If timestamps should be included in output", "nullable": True}
120
+ }
121
+ output_type = "string"
122
+
123
+ def forward(self, url: str, include_timestamps: bool = False) -> str:
124
+
125
+ if "youtube.com/watch" in url:
126
+ video_id = url.split("v=")[1].split("&")[0]
127
+ elif "youtu.be/" in url:
128
+ video_id = url.split("youtu.be/")[1].split("?")[0]
129
+ elif len(url.strip()) == 11: # Direct ID
130
+ video_id = url.strip()
131
+ else:
132
+ return f"YouTube URL or ID: {url} is invalid!"
133
+
134
+ try:
135
+ transcription = YouTubeTranscriptApi.get_transcript(video_id)
136
+
137
+ if include_timestamps:
138
+ formatted_transcription = []
139
+ for part in transcription:
140
+ timestamp = f"{int(part['start']//60)}:{int(part['start']%60):02d}"
141
+ formatted_transcription.append(f"[{timestamp}] {part['text']}")
142
+ return "\n".join(formatted_transcription)
143
+ else:
144
+ return " ".join([part['text'] for part in transcription])
145
+
146
+ except Exception as e:
147
+ return f"Error in extracting YouTube transcript: {str(e)}"
148
 
149
  class BasicAgent:
 
150
  def __init__(self):
151
+ token = os.environ.get("HF_API_TOKEN")
152
+ model = HfApiModel(
153
+ temperature=0.1,
154
+ token=token
155
+ )
156
+
157
+ search_tool = DuckDuckGoSearchTool()
158
+ wiki_search_tool = WikipediaSearchTool()
159
+ str_reverse_tool = StringReverseTool()
160
+ keywords_extract_tool = KeywordsExtractorTool()
161
+ speech_to_text_tool = SpeechToTextTool()
162
+ visit_webpage_tool = VisitWebpageTool()
163
+ final_answer_tool = FinalAnswerTool()
164
+ video_transcription_tool = VideoTranscriptionTool()
165
+
166
+ system_prompt = f"""
167
+ You are my general AI assistant. Your task is to answer the question I asked.
168
+ First, provide an explanation of your reasoning, step by step, to arrive at the answer.
169
+ Then, return your final answer in a single line, formatted as follows: "FINAL ANSWER: [YOUR FINAL ANSWER]".
170
+ [YOUR FINAL ANSWER] should be a number, a string, or a comma-separated list of numbers and/or strings, depending on the question.
171
+ If the answer is a number, do not use commas or units (e.g., $, %) unless specified.
172
+ If the answer is a string, do not use articles or abbreviations (e.g., for cities), and write digits in plain text unless specified.
173
+ If the answer is a comma-separated list, apply the above rules for each element based on whether it is a number or a string.
174
+ """
175
+ self.agent = CodeAgent(
176
+ model=model,
177
+ tools=[search_tool, wiki_search_tool, str_reverse_tool, keywords_extract_tool, speech_to_text_tool, visit_webpage_tool, final_answer_tool, parse_excel_to_json, video_transcription_tool],
178
+ add_base_tools=True
179
+ )
180
+ self.agent.prompt_templates["system_prompt"] = self.agent.prompt_templates["system_prompt"] + system_prompt
181
 
182
  def __call__(self, question: str) -> str:
183
  print(f"Agent received question (first 50 chars): {question[:50]}...")
184
+ answer = self.agent.run(question)
185
+ print(f"Agent returning answer: {answer}")
186
+ return answer
 
 
 
187
 
188
  def run_and_submit_all( profile: gr.OAuthProfile | None):
189
  """