Final_Assignment_Project

Sleeping

App Files Files Community

wt002 commited on 29 days ago

Commit

c93c36d

verified ·

1 Parent(s): 2234779

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -41

app.py CHANGED Viewed

@@ -15,6 +15,7 @@ from langchain_community.document_loaders import WikipediaLoader
 from langchain_community.utilities import WikipediaAPIWrapper
 from langchain_community.document_loaders import ArxivLoader
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
@@ -25,7 +26,8 @@ import io
 import contextlib
 import traceback
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
-# from smolagents import Tool, CodeAgent, DuckDuckGoSearchTool, FinalAnswerTool, HfApiModel # These are already imported above
 class CodeLlamaTool(Tool):
     name = "code_llama_tool"
@@ -54,14 +56,13 @@ class CodeLlamaTool(Tool):
             "text-generation",
             model=self.model,
             tokenizer=self.tokenizer,
-            max_new_tokens=256, # 512
-            temperature=0.0,
             truncation=True
         )
     def forward(self, question: str) -> str:
-        # Corrected: Use self.prompt and then pass it to the pipeline
-        self.prompt = f"""You are an AI that uses Python code to answer questions.
 Question: {question}
 Instructions:
 - If solving requires code, use a block like <tool>code</tool>.
@@ -76,7 +77,7 @@ print(5 * math.sqrt(36))
 <final>30.0</final>
 Answer:"""
-        response = self.pipeline(self.prompt)[0]["generated_text"] # Pass self.prompt
         return self.parse_and_execute(response)
     def parse_and_execute(self, response: str) -> str:
@@ -106,6 +107,11 @@ Answer:"""
         except Exception:
             return f"Error executing code:\n{traceback.format_exc()}"
 class WikiSearchTool(Tool):
     name = "wiki_search"
     description = "Search Wikipedia for a query and return up to 2 results."
@@ -125,6 +131,9 @@ class WikiSearchTool(Tool):
         )
         return formatted_search_docs
 class StringReverseTool(Tool):
     name = "reverse_message"
     description = "When you received a strange text, try to reverse it and perform action described in reversed message."
@@ -163,10 +172,10 @@ class KeywordsExtractorTool(Tool):
             filtered_words = []
             for w in all_words:
                 if w not in conjunctions:
-                    filtered_words.append(w) # Corrected: Use append instead of push
             word_counts = Counter(filtered_words)
             k = 5
-            return str(heapq.nlargest(k, word_counts.items(), key=lambda x: x[1])) # Corrected: Convert list of tuples to string
         except Exception as e:
             return f"Error during extracting most common words: {e}"
@@ -216,6 +225,8 @@ def parse_excel_to_json(task_id: str) -> dict:
             "status": f"Error in parsing Excel file: {str(e)}"
         }
 class VideoTranscriptionTool(Tool):
     """Fetch transcripts from YouTube videos"""
     name = "transcript_video"
@@ -228,17 +239,14 @@ class VideoTranscriptionTool(Tool):
     def forward(self, url: str, include_timestamps: bool = False) -> str:
-        # Corrected: Handle various YouTube URL formats
-        video_id = None
-        if "youtube.com/watch?v=" in url:
             video_id = url.split("v=")[1].split("&")[0]
         elif "youtu.be/" in url:
             video_id = url.split("youtu.be/")[1].split("?")[0]
-        elif len(url.strip()) == 11 and not ("http://" in url or "https://" in url):  # Direct ID
             video_id = url.strip()
-        if not video_id:
-            return f"YouTube URL or ID: {url} is invalid or not supported!"
         try:
             transcription = YouTubeTranscriptApi.get_transcript(video_id)
@@ -257,16 +265,9 @@ class VideoTranscriptionTool(Tool):
 class BasicAgent:
     def __init__(self):
-        token = os.environ.get("HF_TOKEN") # Corrected: Use HF_TOKEN
-        # Initialize tokenizer
-        self.model_id = "codellama/CodeLlama-7b-Instruct-hf"
-        self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, token=token)
-        # Model (e.g., HfApiModel or other)
-        self.model = HfApiModel(
-            model=self.model_id,
-            temperature=0.0,
             token=token
         )
@@ -280,39 +281,35 @@ class BasicAgent:
         final_answer_tool = FinalAnswerTool()
         video_transcription_tool = VideoTranscriptionTool()
-        # New Llama Tool
         code_llama_tool = CodeLlamaTool()
-        self.system_prompt = f"""
 You are my general AI assistant. Your task is to answer the question I asked.
-First, provide reasoning. Then return: FINAL ANSWER: [your answer].
-Answer should be a short string, number, or comma-separated list. Keep it brief.
 """
         self.agent = CodeAgent(
-            model=self.model,
             tools=[
                 search_tool, wiki_search_tool, str_reverse_tool,
                 keywords_extract_tool, speech_to_text_tool,
                 visit_webpage_tool, final_answer_tool,
                 parse_excel_to_json, video_transcription_tool,
-                code_llama_tool
             ],
             add_base_tools=True
         )
-        self.agent.prompt_templates["system_prompt"] = self.agent.prompt_templates["system_prompt"] + self.system_prompt
-    def _build_safe_prompt(self, history: str, question: str, max_total_tokens=32768, reserve_for_output=2048):
-        max_input_tokens = max_total_tokens - reserve_for_output
-        full_prompt = f"{self.system_prompt}\n{history}\nQuestion: {question}"
-        tokenized = self.tokenizer(full_prompt, truncation=True, max_length=max_input_tokens, return_tensors="pt")
-        return self.tokenizer.decode(tokenized["input_ids"][0])
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
-        history = ""  # could be conversation history, if available
-        safe_prompt = self._build_safe_prompt(history, question)
-        answer = self.agent.run(safe_prompt)
         print(f"Agent returning answer: {answer}")
         return answer

 from langchain_community.utilities import WikipediaAPIWrapper
 from langchain_community.document_loaders import ArxivLoader
+# (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 import contextlib
 import traceback
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+from smolagents import Tool, CodeAgent, DuckDuckGoSearchTool, FinalAnswerTool, HfApiModel
 class CodeLlamaTool(Tool):
     name = "code_llama_tool"
             "text-generation",
             model=self.model,
             tokenizer=self.tokenizer,
+            max_new_tokens=512,
+            temperature=0.2,
             truncation=True
         )
     def forward(self, question: str) -> str:
+        prompt = f"""You are an AI that uses Python code to answer questions.
 Question: {question}
 Instructions:
 - If solving requires code, use a block like <tool>code</tool>.
 <final>30.0</final>
 Answer:"""
+        response = self.pipeline(prompt)[0]["generated_text"]
         return self.parse_and_execute(response)
     def parse_and_execute(self, response: str) -> str:
         except Exception:
             return f"Error executing code:\n{traceback.format_exc()}"
+#from smolagents import Tool
+#from langchain_community.document_loaders import WikipediaLoader
 class WikiSearchTool(Tool):
     name = "wiki_search"
     description = "Search Wikipedia for a query and return up to 2 results."
         )
         return formatted_search_docs
 class StringReverseTool(Tool):
     name = "reverse_message"
     description = "When you received a strange text, try to reverse it and perform action described in reversed message."
             filtered_words = []
             for w in all_words:
                 if w not in conjunctions:
+                    filtered_words.push(w)
             word_counts = Counter(filtered_words)
             k = 5
+            return heapq.nlargest(k, word_counts.items(), key=lambda x: x[1])
         except Exception as e:
             return f"Error during extracting most common words: {e}"
             "status": f"Error in parsing Excel file: {str(e)}"
         }
 class VideoTranscriptionTool(Tool):
     """Fetch transcripts from YouTube videos"""
     name = "transcript_video"
     def forward(self, url: str, include_timestamps: bool = False) -> str:
+        if "youtube.com/watch" in url:
             video_id = url.split("v=")[1].split("&")[0]
         elif "youtu.be/" in url:
             video_id = url.split("youtu.be/")[1].split("?")[0]
+        elif len(url.strip()) == 11:  # Direct ID
             video_id = url.strip()
+        else:
+            return f"YouTube URL or ID: {url} is invalid!"
         try:
             transcription = YouTubeTranscriptApi.get_transcript(video_id)
 class BasicAgent:
     def __init__(self):
+        token = os.environ.get("HF_API_TOKEN")
+        model = HfApiModel(
+            temperature=0.1,
             token=token
         )
         final_answer_tool = FinalAnswerTool()
         video_transcription_tool = VideoTranscriptionTool()
+        # ✅ New Llama Tool
         code_llama_tool = CodeLlamaTool()
+        system_prompt = f"""
 You are my general AI assistant. Your task is to answer the question I asked.
+First, provide an explanation of your reasoning, step by step, to arrive at the answer.
+Then, return your final answer in a single line, formatted as follows: "FINAL ANSWER: [YOUR FINAL ANSWER]".
+[YOUR FINAL ANSWER] should be a number, a string, or a comma-separated list of numbers and/or strings, depending on the question.
+If the answer is a number, do not use commas or units (e.g., $, %) unless specified.
+If the answer is a string, do not use articles or abbreviations (e.g., for cities), and write digits in plain text unless specified.
+If the answer is a comma-separated list, apply the above rules for each element based on whether it is a number or a string.
 """
         self.agent = CodeAgent(
+            model=model,
             tools=[
                 search_tool, wiki_search_tool, str_reverse_tool,
                 keywords_extract_tool, speech_to_text_tool,
                 visit_webpage_tool, final_answer_tool,
                 parse_excel_to_json, video_transcription_tool,
+                code_llama_tool  # 🔧 Add here
             ],
             add_base_tools=True
         )
+        self.agent.prompt_templates["system_prompt"] = self.agent.prompt_templates["system_prompt"] + system_prompt
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
+        answer = self.agent.run(question)
         print(f"Agent returning answer: {answer}")
         return answer