Spaces:

siyah1
/

Z-Agent

Build error

App Files Files

siyah1 commited on May 9

Commit

569dd78

verified ·

1 Parent(s): 52d8119

Update app.py

Browse files

Files changed (1) hide show

app.py +203 -270

app.py CHANGED Viewed

@@ -1,33 +1,75 @@
 import streamlit as st
-import numpy as np
-import pandas as pd
-from smolagents import CodeAgent, tool
-from typing import Union, List, Dict, Optional
-import matplotlib.pyplot as plt
-import seaborn as sns
-import os
 from groq import Groq
-from dataclasses import dataclass
-import tempfile
-import base64
-import io
 class GroqLLM:
-    """Compatible LLM interface for smolagents CodeAgent"""
     def __init__(self, model_name="llama-3.1-8B-Instant"):
         self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
         self.model_name = model_name
     def __call__(self, prompt: Union[str, dict, List[Dict]]) -> str:
-        """Make the class callable as required by smolagents"""
         try:
-            # Handle different prompt formats
-            if isinstance(prompt, (dict, list)):
-                prompt_str = str(prompt)
-            else:
-                prompt_str = str(prompt)
-            # Create a properly formatted message
             completion = self.client.chat.completions.create(
                 model=self.model_name,
                 messages=[{
@@ -40,274 +82,165 @@ class GroqLLM:
             )
             return completion.choices[0].message.content if completion.choices else "Error: No response generated"
         except Exception as e:
             error_msg = f"Error generating response: {str(e)}"
-            print(error_msg)
             return error_msg
-    def generate(self, prompt: Union[str, dict, List[Dict]], **kwargs) -> object:
-        """Add generate method to make compatible with smolagents CodeAgent
-        Args:
-            prompt: The prompt to send to the model
-            **kwargs: Additional keyword arguments to support CodeAgent API
-                     (stop_sequences, etc.) - these are ignored in the Groq implementation
-        Returns:
-            An object with a 'content' attribute containing the response text
-        """
-        response_text = self.__call__(prompt)
-        # Create a simple object with a content attribute
-        class Response:
-            def __init__(self, content):
-                self.content = content
-        return Response(response_text)
-class DataAnalysisAgent(CodeAgent):
-    """Extended CodeAgent with dataset awareness"""
-    def __init__(self, dataset: pd.DataFrame, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self._dataset = dataset
-    @property
-    def dataset(self) -> pd.DataFrame:
-        """Access the stored dataset"""
-        return self._dataset
-    def run(self, prompt: str) -> str:
-        """Override run method to include dataset context"""
-        dataset_info = f"""
-        Dataset Shape: {self.dataset.shape}
-        Columns: {', '.join(self.dataset.columns)}
-        Data Types: {self.dataset.dtypes.to_dict()}
-        """
-        enhanced_prompt = f"""
-        Analyze the following dataset:
-        {dataset_info}
-        Task: {prompt}
-        Use the provided tools to analyze this specific dataset and return detailed results.
-        """
-        return super().run(enhanced_prompt)
-@tool
-def analyze_basic_stats(data: pd.DataFrame) -> str:
-    """Calculate basic statistical measures for numerical columns in the dataset.
-    This function computes fundamental statistical metrics including mean, median,
-    standard deviation, skewness, and counts of missing values for all numerical
-    columns in the provided DataFrame.
-    Args:
-        data: A pandas DataFrame containing the dataset to analyze. The DataFrame
-            should contain at least one numerical column for meaningful analysis.
-    Returns:
-        str: A string containing formatted basic statistics for each numerical column,
-            including mean, median, standard deviation, skewness, and missing value counts.
     """
-    # Access dataset from agent if no data provided
-    if data is None:
-        data = tool.agent.dataset
-    stats = {}
-    numeric_cols = data.select_dtypes(include=[np.number]).columns
-    for col in numeric_cols:
-        stats[col] = {
-            'mean': float(data[col].mean()),
-            'median': float(data[col].median()),
-            'std': float(data[col].std()),
-            'skew': float(data[col].skew()),
-            'missing': int(data[col].isnull().sum())
-        }
-    return str(stats)
-@tool
-def generate_correlation_matrix(data: pd.DataFrame) -> str:
-    """Generate a visual correlation matrix for numerical columns in the dataset.
-    This function creates a heatmap visualization showing the correlations between
-    all numerical columns in the dataset. The correlation values are displayed
-    using a color-coded matrix for easy interpretation.
-    Args:
-        data: A pandas DataFrame containing the dataset to analyze. The DataFrame
-            should contain at least two numerical columns for correlation analysis.
-    Returns:
-        str: A base64 encoded string representing the correlation matrix plot image,
-            which can be displayed in a web interface or saved as an image file.
     """
-    # Access dataset from agent if no data provided
-    if data is None:
-        data = tool.agent.dataset
-    numeric_data = data.select_dtypes(include=[np.number])
-    plt.figure(figsize=(10, 8))
-    sns.heatmap(numeric_data.corr(), annot=True, cmap='coolwarm')
-    plt.title('Correlation Matrix')
-    buf = io.BytesIO()
-    plt.savefig(buf, format='png')
-    plt.close()
-    return base64.b64encode(buf.getvalue()).decode()
-@tool
-def analyze_categorical_columns(data: pd.DataFrame) -> str:
-    """Analyze categorical columns in the dataset for distribution and frequencies.
-    This function examines categorical columns to identify unique values, top categories,
-    and missing value counts, providing insights into the categorical data distribution.
-    Args:
-        data: A pandas DataFrame containing the dataset to analyze. The DataFrame
-            should contain at least one categorical column for meaningful analysis.
-    Returns:
-        str: A string containing formatted analysis results for each categorical column,
-            including unique value counts, top categories, and missing value counts.
-    """
-    # Access dataset from agent if no data provided
-    if data is None:
-        data = tool.agent.dataset
-    categorical_cols = data.select_dtypes(include=['object', 'category']).columns
-    analysis = {}
-    for col in categorical_cols:
-        analysis[col] = {
-            'unique_values': int(data[col].nunique()),
-            'top_categories': data[col].value_counts().head(5).to_dict(),
-            'missing': int(data[col].isnull().sum())
-        }
-    return str(analysis)
-@tool
-def suggest_features(data: pd.DataFrame) -> str:
-    """Suggest potential feature engineering steps based on data characteristics.
-    This function analyzes the dataset's structure and statistical properties to
-    recommend possible feature engineering steps that could improve model performance.
-    Args:
-        data: A pandas DataFrame containing the dataset to analyze. The DataFrame
-            can contain both numerical and categorical columns.
-    Returns:
-        str: A string containing suggestions for feature engineering based on
-            the characteristics of the input data.
     """
-    # Access dataset from agent if no data provided
-    if data is None:
-        data = tool.agent.dataset
-    suggestions = []
-    numeric_cols = data.select_dtypes(include=[np.number]).columns
-    categorical_cols = data.select_dtypes(include=['object', 'category']).columns
-    if len(numeric_cols) >= 2:
-        suggestions.append("Consider creating interaction terms between numerical features")
-    if len(categorical_cols) > 0:
-        suggestions.append("Consider one-hot encoding for categorical variables")
-    for col in numeric_cols:
-        if data[col].skew() > 1 or data[col].skew() < -1:
-            suggestions.append(f"Consider log transformation for {col} due to skewness")
-    return '\n'.join(suggestions)
-def main():
-    st.title("Data Analysis Assistant")
-    st.write("Upload your dataset and get automated analysis with natural language interaction.")
-    # Initialize session state
-    if 'data' not in st.session_state:
-        st.session_state['data'] = None
-    if 'agent' not in st.session_state:
-        st.session_state['agent'] = None
-    uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
-    try:
-        if uploaded_file is not None:
-            with st.spinner('Loading and processing your data...'):
-                # Load the dataset
-                data = pd.read_csv(uploaded_file)
-                st.session_state['data'] = data
-                # Initialize the agent with the dataset
-                st.session_state['agent'] = DataAnalysisAgent(
-                    dataset=data,
-                    tools=[analyze_basic_stats, generate_correlation_matrix,
-                           analyze_categorical_columns, suggest_features],
-                    model=GroqLLM(),
-                    additional_authorized_imports=["pandas", "numpy", "matplotlib", "seaborn"]
-                )
-                st.success(f'Successfully loaded dataset with {data.shape[0]} rows and {data.shape[1]} columns')
-                st.subheader("Data Preview")
-                st.dataframe(data.head())
-        if st.session_state['data'] is not None:
-            analysis_type = st.selectbox(
-                "Choose analysis type",
-                ["Basic Statistics", "Correlation Analysis", "Categorical Analysis",
-                 "Feature Engineering", "Custom Question"]
-            )
-            if analysis_type == "Basic Statistics":
-                with st.spinner('Analyzing basic statistics...'):
-                    result = st.session_state['agent'].run(
-                        "Use the analyze_basic_stats tool to analyze this dataset and "
-                        "provide insights about the numerical distributions."
-                    )
-                    st.write(result)
-            elif analysis_type == "Correlation Analysis":
-                with st.spinner('Generating correlation matrix...'):
-                    result = st.session_state['agent'].run(
-                        "Use the generate_correlation_matrix tool to analyze correlations "
-                        "and explain any strong relationships found."
-                    )
-                    if isinstance(result, str) and result.startswith('data:image') or ',' in result:
-                        st.image(f"data:image/png;base64,{result.split(',')[-1]}")
-                    else:
-                        st.write(result)
-            elif analysis_type == "Categorical Analysis":
-                with st.spinner('Analyzing categorical columns...'):
-                    result = st.session_state['agent'].run(
-                        "Use the analyze_categorical_columns tool to examine the "
-                        "categorical variables and explain the distributions."
-                    )
-                    st.write(result)
-            elif analysis_type == "Feature Engineering":
-                with st.spinner('Generating feature suggestions...'):
-                    result = st.session_state['agent'].run(
-                        "Use the suggest_features tool to recommend potential "
-                        "feature engineering steps for this dataset."
                     )
-                    st.write(result)
-            elif analysis_type == "Custom Question":
-                question = st.text_input("What would you like to know about your data?")
-                if question:
-                    with st.spinner('Analyzing...'):
-                        result = st.session_state['agent'].run(question)
-                        st.write(result)
-    except Exception as e:
-        st.error(f"An error occurred: {str(e)}")
-if __name__ == "__main__":
-    main()

 import streamlit as st
+import pandas as pd
+from typing import Union, List, Dict
 from groq import Groq
+import os
+from duckduckgo_search import DDGS
+class DuckDuckGoSearch:
+    """
+    Custom DuckDuckGo search implementation with robust error handling and result processing.
+    Uses the duckduckgo_search library to fetch and format news results.
+    """
+    def __init__(self):
+        # Initialize the DuckDuckGo search session
+        self.ddgs = DDGS()
+    def __call__(self, query: str, max_results: int = 5) -> str:
+        try:
+            # Perform the search and get results
+            # The news method is more appropriate for recent news analysis
+            search_results = list(self.ddgs.news(
+                query,
+                max_results=max_results,
+                region='wt-wt',  # Worldwide results
+                safesearch='on'
+            ))
+            if not search_results:
+                return "No results found. Try modifying your search query."
+            # Format the results into a readable string
+            formatted_results = []
+            for idx, result in enumerate(search_results, 1):
+                # Extract available fields with fallbacks for missing data
+                title = result.get('title', 'No title available')
+                snippet = result.get('body', result.get('snippet', 'No description available'))
+                source = result.get('source', 'Unknown source')
+                url = result.get('url', result.get('link', 'No link available'))
+                date = result.get('date', 'Date not available')
+                # Format each result with available information
+                formatted_results.append(
+                    f"{idx}. Title: {title}\n"
+                    f"   Date: {date}\n"
+                    f"   Source: {source}\n"
+                    f"   Summary: {snippet}\n"
+                    f"   URL: {url}\n"
+                )
+            return "\n".join(formatted_results)
+        except Exception as e:
+            # Provide detailed error information for debugging
+            error_msg = f"Search error: {str(e)}\nTry again with a different search term or check your internet connection."
+            print(f"DuckDuckGo search error: {str(e)}")  # For logging
+            return error_msg
 class GroqLLM:
+    """
+    LLM interface using Groq's LLama model.
+    Handles API communication and response processing.
+    """
     def __init__(self, model_name="llama-3.1-8B-Instant"):
         self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
         self.model_name = model_name
     def __call__(self, prompt: Union[str, dict, List[Dict]]) -> str:
         try:
+            # Convert prompt to string if it's a complex structure
+            prompt_str = str(prompt) if isinstance(prompt, (dict, list)) else prompt
+            # Make API call to Groq
             completion = self.client.chat.completions.create(
                 model=self.model_name,
                 messages=[{
             )
             return completion.choices[0].message.content if completion.choices else "Error: No response generated"
         except Exception as e:
             error_msg = f"Error generating response: {str(e)}"
+            print(error_msg)  # For logging
             return error_msg
+def create_analysis_prompt(topic: str, search_results: str) -> str:
     """
+    Creates a detailed prompt for news analysis, structuring the request
+    to get comprehensive and well-organized results from the LLM.
     """
+    return f"""Analyze the following news information about {topic}.
+    Search Results: {search_results}
+    Please provide a comprehensive analysis including:
+    1. Key Points Summary:
+       - Main events and developments
+       - Critical updates and changes
+    2. Stakeholder Analysis:
+       - Primary parties involved
+       - Their roles and positions
+    3. Impact Assessment:
+       - Immediate implications
+       - Potential long-term effects
+       - Broader context and significance
+    4. Multiple Perspectives:
+       - Different viewpoints on the issue
+       - Areas of agreement and contention
+    5. Fact Check & Reliability:
+       - Verification of major claims
+       - Consistency across sources
+       - Source credibility assessment
+    Please format the analysis in a clear, journalistic style with section headers."""
+def log_agent_activity(prompt: str, result: str, agent_name: str):
     """
+    Creates an expandable log of agent activities in the Streamlit interface
+    for transparency and debugging purposes.
+    """
+    with st.expander("View Agent Activity Log"):
+        st.write(f"### Agent Activity ({agent_name}):")
+        st.write("**Input Prompt:**")
+        st.code(prompt, language="text")
+        st.write("**Analysis Output:**")
+        st.code(result, language="text")
+# Initialize Streamlit app
+st.set_page_config(page_title="News Analysis Tool", layout="wide")
+# Title and description
+st.title("🔍 AI News Analysis Tool")
+st.write("""
+This tool combines the power of Groq's LLama 3.1 8B Instant model with DuckDuckGo
+search to provide in-depth news analysis. Get comprehensive insights and multiple
+perspectives on any news topic.
+""")
+# Initialize the components
+try:
+    # Initialize LLM and search tool
+    llm = GroqLLM()
+    search_tool = DuckDuckGoSearch()
+    # Input section
+    news_topic = st.text_input(
+        "Enter News Topic or Query:",
+        placeholder="E.g., Recent developments in renewable energy"
+    )
+    # Analysis options
+    col1, col2 = st.columns(2)
+    with col1:
+        search_depth = st.slider(
+            "Search Depth (number of results)",
+            min_value=3,
+            max_value=10,
+            value=5
+        )
+    with col2:
+        analysis_type = st.selectbox(
+            "Analysis Type",
+            ["Comprehensive", "Quick Summary", "Technical", "Simplified"]
+        )
+    # Generate analysis button
+    if st.button("Analyze News"):
+        if news_topic:
+            with st.spinner("Gathering information and analyzing..."):
+                try:
+                    # Show search progress
+                    search_placeholder = st.empty()
+                    search_placeholder.info("Searching for recent news...")
+                    # Perform search
+                    search_results = search_tool(
+                        f"Latest news about {news_topic} last 7 days",
+                        max_results=search_depth
                     )
+                    if not search_results.startswith(("Search error", "No results")):
+                        # Update progress
+                        search_placeholder.info("Analyzing search results...")
+                        # Create analysis prompt
+                        analysis_prompt = create_analysis_prompt(news_topic, search_results)
+                        # Get analysis from LLM
+                        analysis_result = llm(analysis_prompt)
+                        # Clear progress messages
+                        search_placeholder.empty()
+                        # Display results
+                        st.subheader("📊 Analysis Results")
+                        st.markdown(analysis_result)
+                        # Log the activity
+                        log_agent_activity(
+                            analysis_prompt,
+                            analysis_result,
+                            "News Analysis Agent"
+                        )
+                    else:
+                        search_placeholder.empty()
+                        st.error(search_results)
+                except Exception as e:
+                    st.error(f"An error occurred during analysis: {str(e)}")
+        else:
+            st.warning("Please enter a news topic to analyze.")
+    # Add helpful tips
+    with st.expander("💡 Tips for Better Results"):
+        st.write("""
+        - Be specific with your topic for more focused analysis
+        - Use keywords related to recent events for timely information
+        - Consider including timeframes in your query
+        - Try different analysis types for various perspectives
+        - For complex topics, start with a broader search and then narrow down
+        """)
+except Exception as e:
+    st.error(f"""
+    Failed to initialize the application: {str(e)}
+    Please ensure:
+    1. Your GROQ_API_KEY is properly set in environment variables
+    2. All required packages are installed:
+       - pip install streamlit groq duckduckgo-search
+    3. You have internet connectivity for DuckDuckGo searches
+    """)
+# Footer
+st.markdown("---")
+st.caption(
+    "Powered by Groq LLama 3.1 8B Instant, DuckDuckGo, and Streamlit | "
+    "Created for news analysis and research purposes"
+)