Spaces:

Agents-MCP-Hackathon
/

HF_RepoSense

Running

App Files Files Community

naman1102 commited on 13 days ago

Commit

3330689

1 Parent(s): 6a5d12d

ui_fix

Browse files

Files changed (2) hide show

analyzer.py +27 -8
app.py +53 -15

analyzer.py CHANGED Viewed

@@ -14,10 +14,11 @@ def analyze_code(code: str) -> str:
     system_prompt = (
         "You are a highly precise and strict JSON generator. Analyze the code given to you. "
         "Your ONLY output must be a valid JSON object with the following keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. "
         "Do NOT include any explanation, markdown, or text outside the JSON. Do NOT add any commentary, preamble, or postscript. "
         "If you cannot answer, still return a valid JSON with empty strings for each key. "
         "Example of the ONLY valid output:\n"
-        "{\n  'strength': '...', \n  'weaknesses': '...', \n  'speciality': '...', \n  'relevance rating': '...'\n}"
     )
     response = client.chat.completions.create(
         model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",  # Updated model
@@ -108,22 +109,31 @@ def combine_repo_files_for_llm(repo_dir="repo_files", output_file="combined_repo
         out_f.write("\n".join(combined_content))
     return output_file
-def analyze_code_chunk(code: str) -> str:
     """
     Analyzes a code chunk and returns a JSON summary for that chunk.
     """
     from openai import OpenAI
     client = OpenAI(api_key=os.getenv("modal_api"))
     client.base_url = os.getenv("base_url")
     chunk_prompt = (
         "You are a highly precise and strict JSON generator. Analyze the following code chunk. "
         "Your ONLY output must be a valid JSON object with the following keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. "
         "All property names and string values MUST use double quotes (\"). Do NOT use single quotes. "
         "Do NOT include any explanation, markdown, or text outside the JSON. Do NOT add any commentary, preamble, or postscript. "
         "If you cannot answer, still return a valid JSON with empty strings for each key. "
         "Example of the ONLY valid output:\n"
-        '{\n  "strength": "...", \n  "weaknesses": "...", \n  "speciality": "...", \n  "relevance rating": "..."\n}'
     )
     response = client.chat.completions.create(
         model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
         messages=[
@@ -135,21 +145,29 @@ def analyze_code_chunk(code: str) -> str:
     )
     return response.choices[0].message.content
-def aggregate_chunk_analyses(chunk_jsons: list) -> str:
     """
     Aggregates a list of chunk JSONs into a single JSON summary using the LLM.
     """
     from openai import OpenAI
     client = OpenAI(api_key=os.getenv("modal_api"))
     client.base_url = os.getenv("base_url")
     aggregation_prompt = (
         "You are a highly precise and strict, code analyzer and JSON generator. You are given a list of JSON analyses of code chunks. "
         "Aggregate these into a SINGLE overall JSON summary with the same keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. "
         "All property names and string values MUST use double quotes (\"). Do NOT use single quotes. "
         "Summarize and combine the information from all chunks. Do NOT include any explanation, markdown, or text outside the JSON. "
         "If a key is missing in all chunks, use an empty string. "
         "Example of the ONLY valid output:\n"
-        '{\n  "strength": "...", \n  "weaknesses": "...", \n  "speciality": "...", \n  "relevance rating": "..."\n}'
     )
     user_content = "Here are the chunk analyses:\n" + "\n".join(chunk_jsons)
     response = client.chat.completions.create(
@@ -163,9 +181,10 @@ def aggregate_chunk_analyses(chunk_jsons: list) -> str:
     )
     return response.choices[0].message.content
-def analyze_combined_file(output_file="combined_repo.txt"):
     """
     Reads the combined file, splits it into 500-line chunks, analyzes each chunk, and aggregates the LLM's output into a final summary.
     Returns the chunk JSONs (for debugging) and the aggregated analysis as a string.
     """
     try:
@@ -175,9 +194,9 @@ def analyze_combined_file(output_file="combined_repo.txt"):
         chunk_jsons = []
         for i in range(0, len(lines), chunk_size):
             chunk = "".join(lines[i:i+chunk_size])
-            analysis = analyze_code_chunk(chunk)
             chunk_jsons.append(analysis)
-        final_summary = aggregate_chunk_analyses(chunk_jsons)
         debug_output = (
             "==== Chunk JSON Outputs ===="
             + "\n\n".join([f"Chunk {i+1} JSON:\n{chunk_jsons[i]}" for i in range(len(chunk_jsons))])

     system_prompt = (
         "You are a highly precise and strict JSON generator. Analyze the code given to you. "
         "Your ONLY output must be a valid JSON object with the following keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. "
+        "For 'relevance rating', you MUST use ONLY one of these exact values: 'very low', 'low', 'high', 'very high'. "
         "Do NOT include any explanation, markdown, or text outside the JSON. Do NOT add any commentary, preamble, or postscript. "
         "If you cannot answer, still return a valid JSON with empty strings for each key. "
         "Example of the ONLY valid output:\n"
+        "{\n  'strength': '...', \n  'weaknesses': '...', \n  'speciality': '...', \n  'relevance rating': 'high'\n}"
     )
     response = client.chat.completions.create(
         model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",  # Updated model
         out_f.write("\n".join(combined_content))
     return output_file
+def analyze_code_chunk(code: str, user_requirements: str = "") -> str:
     """
     Analyzes a code chunk and returns a JSON summary for that chunk.
     """
     from openai import OpenAI
     client = OpenAI(api_key=os.getenv("modal_api"))
     client.base_url = os.getenv("base_url")
+    # Build the user requirements section
+    requirements_section = ""
+    if user_requirements.strip():
+        requirements_section = f"\n\nUSER REQUIREMENTS:\n{user_requirements}\n\nWhen rating relevance, consider how well this code matches the user's stated requirements."
     chunk_prompt = (
         "You are a highly precise and strict JSON generator. Analyze the following code chunk. "
         "Your ONLY output must be a valid JSON object with the following keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. "
         "All property names and string values MUST use double quotes (\"). Do NOT use single quotes. "
+        "For 'relevance rating', you MUST use ONLY one of these exact values: 'very low', 'low', 'high', 'very high'. "
         "Do NOT include any explanation, markdown, or text outside the JSON. Do NOT add any commentary, preamble, or postscript. "
         "If you cannot answer, still return a valid JSON with empty strings for each key. "
+        f"{requirements_section}"
         "Example of the ONLY valid output:\n"
+        '{\n  "strength": "...", \n  "weaknesses": "...", \n  "speciality": "...", \n  "relevance rating": "high"\n}'
     )
     response = client.chat.completions.create(
         model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
         messages=[
     )
     return response.choices[0].message.content
+def aggregate_chunk_analyses(chunk_jsons: list, user_requirements: str = "") -> str:
     """
     Aggregates a list of chunk JSONs into a single JSON summary using the LLM.
     """
     from openai import OpenAI
     client = OpenAI(api_key=os.getenv("modal_api"))
     client.base_url = os.getenv("base_url")
+    # Build the user requirements section
+    requirements_section = ""
+    if user_requirements.strip():
+        requirements_section = f"\n\nUSER REQUIREMENTS:\n{user_requirements}\n\nWhen aggregating the relevance rating, consider how well the overall repository matches the user's stated requirements."
     aggregation_prompt = (
         "You are a highly precise and strict, code analyzer and JSON generator. You are given a list of JSON analyses of code chunks. "
         "Aggregate these into a SINGLE overall JSON summary with the same keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. "
         "All property names and string values MUST use double quotes (\"). Do NOT use single quotes. "
+        "For 'relevance rating', you MUST use ONLY one of these exact values: 'very low', 'low', 'high', 'very high'. "
         "Summarize and combine the information from all chunks. Do NOT include any explanation, markdown, or text outside the JSON. "
         "If a key is missing in all chunks, use an empty string. "
+        f"{requirements_section}"
         "Example of the ONLY valid output:\n"
+        '{\n  "strength": "...", \n  "weaknesses": "...", \n  "speciality": "...", \n  "relevance rating": "high"\n}'
     )
     user_content = "Here are the chunk analyses:\n" + "\n".join(chunk_jsons)
     response = client.chat.completions.create(
     )
     return response.choices[0].message.content
+def analyze_combined_file(output_file="combined_repo.txt", user_requirements: str = ""):
     """
     Reads the combined file, splits it into 500-line chunks, analyzes each chunk, and aggregates the LLM's output into a final summary.
+    Now includes user requirements for better relevance rating.
     Returns the chunk JSONs (for debugging) and the aggregated analysis as a string.
     """
     try:
         chunk_jsons = []
         for i in range(0, len(lines), chunk_size):
             chunk = "".join(lines[i:i+chunk_size])
+            analysis = analyze_code_chunk(chunk, user_requirements)
             chunk_jsons.append(analysis)
+        final_summary = aggregate_chunk_analyses(chunk_jsons, user_requirements)
         debug_output = (
             "==== Chunk JSON Outputs ===="
             + "\n\n".join([f"Chunk {i+1} JSON:\n{chunk_jsons[i]}" for i in range(len(chunk_jsons))])

app.py CHANGED Viewed

@@ -48,9 +48,10 @@ def read_csv_to_dataframe() -> pd.DataFrame:
         logger.error(f"Error reading CSV: {e}")
         return pd.DataFrame()
-def analyze_and_update_single_repo(repo_id: str) -> Tuple[str, str, pd.DataFrame]:
     """
     Downloads, analyzes a single repo, updates the CSV, and returns results.
     This function combines the logic of downloading, analyzing, and updating the CSV for one repo.
     """
     try:
@@ -61,7 +62,7 @@ def analyze_and_update_single_repo(repo_id: str) -> Tuple[str, str, pd.DataFrame
         with open(txt_path, "r", encoding="utf-8") as f:
             combined_content = f.read()
-        llm_output = analyze_combined_file(txt_path)
         last_start = llm_output.rfind('{')
         last_end = llm_output.rfind('}')
@@ -73,7 +74,8 @@ def analyze_and_update_single_repo(repo_id: str) -> Tuple[str, str, pd.DataFrame
         if isinstance(llm_json, dict) and "error" not in llm_json:
             strengths = llm_json.get("strength", "N/A")
             weaknesses = llm_json.get("weaknesses", "N/A")
-            summary = f"JSON extraction: SUCCESS\n\nStrengths:\n{strengths}\n\nWeaknesses:\n{weaknesses}"
         else:
             summary = f"JSON extraction: FAILED\nRaw response might not be valid JSON."
@@ -128,7 +130,7 @@ def create_ui() -> gr.Blocks:
     /* Modern sleek design */
     .gradio-container {
         font-family: 'Inter', 'system-ui', sans-serif;
-        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
         min-height: 100vh;
     }
@@ -239,6 +241,7 @@ def create_ui() -> gr.Blocks:
         # Using simple, separate state objects for robustness.
         repo_ids_state = gr.State([])
         current_repo_idx_state = gr.State(0)
         gr.Markdown(
             """
@@ -284,6 +287,15 @@ def create_ui() -> gr.Blocks:
             with gr.TabItem("🔬 Analysis", id="analysis_tab"):
                 gr.Markdown("### 🧪 Repository Analysis Engine")
                 with gr.Row():
                     analyze_next_btn = gr.Button("⚡ Analyze Next Repository", variant="primary", size="lg", scale=2)
                     with gr.Column(scale=3):
@@ -396,7 +408,24 @@ def create_ui() -> gr.Blocks:
             status = f"Status: Found {len(unique_repo_ids)} repositories. Ready for analysis."
             return unique_repo_ids, 0, df, status, gr.update(selected="analysis_tab")
-        def handle_analyze_next(repo_ids: List[str], current_idx: int) -> Tuple[str, str, pd.DataFrame, int, str]:
             """Analyzes the next repository in the list."""
             if not repo_ids:
                 return "", "", pd.DataFrame(), 0, "Status: No repositories to analyze. Please submit repo IDs first."
@@ -405,8 +434,10 @@ def create_ui() -> gr.Blocks:
             repo_id_to_analyze = repo_ids[current_idx]
             status = f"Status: Analyzing repository {current_idx + 1}/{len(repo_ids)}: {repo_id_to_analyze}"
-            content, summary, df = analyze_and_update_single_repo(repo_id_to_analyze)
             next_idx = current_idx + 1
             if next_idx >= len(repo_ids):
@@ -437,15 +468,15 @@ def create_ui() -> gr.Blocks:
             history.append({"role": "assistant", "content": response})
             return history
-        def handle_end_chat(history: List[Dict[str, str]]) -> Tuple[str, str]:
-            """Ends the chat, extracts and sanitizes keywords from the conversation."""
             if not history:
-                return "", "Status: Chat is empty, nothing to analyze."
             # Convert the full, valid history for the extraction logic
             tuple_history = convert_messages_to_tuples(history)
             if not tuple_history:
-                return "", "Status: No completed conversations to analyze."
             # Get raw keywords string from the LLM
             raw_keywords_str = extract_keywords_from_conversation(tuple_history)
@@ -458,13 +489,16 @@ def create_ui() -> gr.Blocks:
             cleaned_keywords = [kw.strip() for kw in cleaned_keywords if kw.strip()]
             if not cleaned_keywords:
-                return "", f"Status: Could not extract valid keywords. Raw LLM output: '{raw_keywords_str}'"
             # Join them into a clean, comma-separated string for the search tool
             final_keywords_str = ", ".join(cleaned_keywords)
-            status = "Status: Keywords extracted. You can now use them to search."
-            return final_keywords_str, status
         # --- Component Event Wiring ---
@@ -489,7 +523,7 @@ def create_ui() -> gr.Blocks:
         # Analysis Tab
         analyze_next_btn.click(
             fn=handle_analyze_next,
-            inputs=[repo_ids_state, current_repo_idx_state],
             outputs=[content_output, summary_output, df_output, current_repo_idx_state, status_box_analysis]
         )
@@ -515,7 +549,11 @@ def create_ui() -> gr.Blocks:
         end_chat_btn.click(
             fn=handle_end_chat,
             inputs=[chatbot],
-            outputs=[extracted_keywords_output, status_box_chatbot]
         )
         use_keywords_btn.click(
             fn=handle_keyword_search,

         logger.error(f"Error reading CSV: {e}")
         return pd.DataFrame()
+def analyze_and_update_single_repo(repo_id: str, user_requirements: str = "") -> Tuple[str, str, pd.DataFrame]:
     """
     Downloads, analyzes a single repo, updates the CSV, and returns results.
+    Now includes user requirements for better relevance rating.
     This function combines the logic of downloading, analyzing, and updating the CSV for one repo.
     """
     try:
         with open(txt_path, "r", encoding="utf-8") as f:
             combined_content = f.read()
+        llm_output = analyze_combined_file(txt_path, user_requirements)
         last_start = llm_output.rfind('{')
         last_end = llm_output.rfind('}')
         if isinstance(llm_json, dict) and "error" not in llm_json:
             strengths = llm_json.get("strength", "N/A")
             weaknesses = llm_json.get("weaknesses", "N/A")
+            relevance = llm_json.get("relevance rating", "N/A")
+            summary = f"JSON extraction: SUCCESS\n\nStrengths:\n{strengths}\n\nWeaknesses:\n{weaknesses}\n\nRelevance: {relevance}"
         else:
             summary = f"JSON extraction: FAILED\nRaw response might not be valid JSON."
     /* Modern sleek design */
     .gradio-container {
         font-family: 'Inter', 'system-ui', sans-serif;
+        background: linear-gradient(135deg, #0a0a0a 0%, #1a1a1a 100%);
         min-height: 100vh;
     }
         # Using simple, separate state objects for robustness.
         repo_ids_state = gr.State([])
         current_repo_idx_state = gr.State(0)
+        user_requirements_state = gr.State("")  # Store user requirements from chatbot
         gr.Markdown(
             """
             with gr.TabItem("🔬 Analysis", id="analysis_tab"):
                 gr.Markdown("### 🧪 Repository Analysis Engine")
+                # Display current user requirements
+                with gr.Row():
+                    current_requirements_display = gr.Textbox(
+                        label="📋 Current User Requirements",
+                        interactive=False,
+                        lines=3,
+                        info="Requirements extracted from AI chat conversation for relevance rating"
+                    )
                 with gr.Row():
                     analyze_next_btn = gr.Button("⚡ Analyze Next Repository", variant="primary", size="lg", scale=2)
                     with gr.Column(scale=3):
             status = f"Status: Found {len(unique_repo_ids)} repositories. Ready for analysis."
             return unique_repo_ids, 0, df, status, gr.update(selected="analysis_tab")
+        def extract_user_requirements_from_chat(history: List[Dict[str, str]]) -> str:
+            """Extract user requirements from chatbot conversation."""
+            if not history:
+                return ""
+            user_messages = []
+            for msg in history:
+                if msg.get('role') == 'user':
+                    user_messages.append(msg.get('content', ''))
+            if not user_messages:
+                return ""
+            # Combine all user messages as requirements
+            requirements = "\n".join([f"- {msg}" for msg in user_messages if msg.strip()])
+            return requirements
+        def handle_analyze_next(repo_ids: List[str], current_idx: int, user_requirements: str) -> Tuple[str, str, pd.DataFrame, int, str]:
             """Analyzes the next repository in the list."""
             if not repo_ids:
                 return "", "", pd.DataFrame(), 0, "Status: No repositories to analyze. Please submit repo IDs first."
             repo_id_to_analyze = repo_ids[current_idx]
             status = f"Status: Analyzing repository {current_idx + 1}/{len(repo_ids)}: {repo_id_to_analyze}"
+            if user_requirements.strip():
+                status += f"\nUsing user requirements for relevance rating."
+            content, summary, df = analyze_and_update_single_repo(repo_id_to_analyze, user_requirements)
             next_idx = current_idx + 1
             if next_idx >= len(repo_ids):
             history.append({"role": "assistant", "content": response})
             return history
+        def handle_end_chat(history: List[Dict[str, str]]) -> Tuple[str, str, str]:
+            """Ends the chat, extracts and sanitizes keywords from the conversation, and extracts user requirements."""
             if not history:
+                return "", "Status: Chat is empty, nothing to analyze.", ""
             # Convert the full, valid history for the extraction logic
             tuple_history = convert_messages_to_tuples(history)
             if not tuple_history:
+                return "", "Status: No completed conversations to analyze.", ""
             # Get raw keywords string from the LLM
             raw_keywords_str = extract_keywords_from_conversation(tuple_history)
             cleaned_keywords = [kw.strip() for kw in cleaned_keywords if kw.strip()]
             if not cleaned_keywords:
+                return "", f"Status: Could not extract valid keywords. Raw LLM output: '{raw_keywords_str}'", ""
             # Join them into a clean, comma-separated string for the search tool
             final_keywords_str = ", ".join(cleaned_keywords)
+            # Extract user requirements for analysis
+            user_requirements = extract_user_requirements_from_chat(history)
+            status = "Status: Keywords extracted. User requirements saved for analysis."
+            return final_keywords_str, status, user_requirements
         # --- Component Event Wiring ---
         # Analysis Tab
         analyze_next_btn.click(
             fn=handle_analyze_next,
+            inputs=[repo_ids_state, current_repo_idx_state, user_requirements_state],
             outputs=[content_output, summary_output, df_output, current_repo_idx_state, status_box_analysis]
         )
         end_chat_btn.click(
             fn=handle_end_chat,
             inputs=[chatbot],
+            outputs=[extracted_keywords_output, status_box_chatbot, user_requirements_state]
+        ).then(
+            fn=lambda req: req if req.strip() else "No specific requirements extracted from conversation.",
+            inputs=[user_requirements_state],
+            outputs=[current_requirements_display]
         )
         use_keywords_btn.click(
             fn=handle_keyword_search,