Spaces:

jyo01
/

repochat

Running

App Files Files Community

jyo01 commited on Mar 27

Commit

30a49e8

verified ·

1 Parent(s): c143ab3

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -52

app.py CHANGED Viewed

@@ -3,22 +3,21 @@ import json
 import base64
 import requests
 import torch
 import nest_asyncio
-from fastapi import HTTPException
 from pydantic import BaseModel
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 from sentence_transformers import SentenceTransformer, models
 import gradio as gr
-# Apply nest_asyncio to allow async operations in the notebook/Spaces
-nest_asyncio.apply()
-import os
-HF_TOKEN = os.environ.get("HF_TOKEN")
-GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN")
 ############################################
 # GitHub API Functions
@@ -44,9 +43,7 @@ def get_repo_tree(owner: str, repo: str, branch: str):
     headers = {'Authorization': f'token {GITHUB_TOKEN}'}
     tree_url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/{branch}?recursive=1"
     response = requests.get(tree_url, headers=headers)
-    data = response.json()
-    print("Repo Tree Data:", json.dumps(data, indent=2))
-    return data
 def get_file_content(owner: str, repo: str, file_path: str):
     headers = {'Authorization': f'token {GITHUB_TOKEN}'}
@@ -69,8 +66,7 @@ def preprocess_text(text: str) -> str:
 def load_embedding_model(model_name: str = 'huggingface/CodeBERTa-small-v1') -> SentenceTransformer:
     transformer_model = models.Transformer(model_name)
-    pooling_model = models.Pooling(transformer_model.get_word_embedding_dimension(),
-                                   pooling_mode_mean_tokens=True)
     model = SentenceTransformer(modules=[transformer_model, pooling_model])
     return model
@@ -109,7 +105,6 @@ def get_llm_response(prompt: str, model_name: str = "meta-llama/Llama-2-7b-chat-
     torch.cuda.empty_cache()
-    # Load tokenizer and model with authentication using the 'token' parameter.
     tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, token=HF_TOKEN)
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
@@ -132,6 +127,43 @@ def get_llm_response(prompt: str, model_name: str = "meta-llama/Llama-2-7b-chat-
     return answer
 ############################################
 # Gradio Interface Setup
 ############################################
@@ -144,7 +176,7 @@ with gr.Blocks() as demo:
             gr.Markdown("### Repository Information")
             github_url_input = gr.Textbox(label="GitHub Repository URL", placeholder="https://github.com/username/repository")
             load_repo_btn = gr.Button("Load Repository Contents")
-            # Initialize dropdown with empty choices and empty default value.
             file_dropdown = gr.Dropdown(label="Select a File", interactive=True, value="", choices=[])
             repo_content_output = gr.Textbox(label="File Content", interactive=False, lines=10)
         with gr.Column(scale=2):
@@ -153,59 +185,32 @@ with gr.Blocks() as demo:
             chat_output = gr.Textbox(label="Chatbot Response", interactive=False, lines=10)
             chat_btn = gr.Button("Send Query")
-    # Function to load repository contents from GitHub.
-    def load_repo_contents_backend(github_url: str):
-        try:
-            owner, repo = extract_repo_info(github_url)
-        except Exception as e:
-            return f"Error: {str(e)}"
-        repo_data = get_repo_metadata(owner, repo)
-        default_branch = repo_data.get("default_branch", "main")
-        tree_data = get_repo_tree(owner, repo, default_branch)
-        if "tree" not in tree_data:
-            return "Error: Could not fetch repository tree."
-        file_list = [item["path"] for item in tree_data["tree"] if item["type"] == "blob"]
-        return file_list
-    # Callback to update the file dropdown.
     def update_file_dropdown(github_url):
         files = load_repo_contents_backend(github_url)
-        if isinstance(files, str):  # Error message case.
             print("Error loading files:", files)
             return gr.update(choices=[], value="")
         print("Files loaded:", files)
-        # Return the updated dropdown with choices but with no default value selected.
         return gr.update(choices=files, value="")
     load_repo_btn.click(fn=update_file_dropdown, inputs=[github_url_input], outputs=[file_dropdown])
-    # Callback to update the repository content display based on the selected file.
     def update_repo_content(github_url, file_choice):
         if not file_choice:
             return "No file selected."
-        try:
-            file_index = int(file_choice)
-        except Exception as e:
-            print("Error converting file choice:", str(e))
-            return "Invalid file selection."
-        content_tuple = get_file_content_for_choice(github_url, file_index)
-        if isinstance(content_tuple, str):
-            # Return error message if one occurred.
-            return content_tuple
-        content, _ = content_tuple
         return content
     file_dropdown.change(fn=update_repo_content, inputs=[github_url_input, file_dropdown], outputs=[repo_content_output])
-    # Callback to process the chat query.
     def process_chat(github_url, file_choice, chat_query):
         if not file_choice:
             return "Please select a file first."
-        try:
-            file_index = int(file_choice)
-        except Exception as e:
-            return "Invalid file selection."
-        return chat_with_file(github_url, file_index, chat_query)
     chat_btn.click(fn=process_chat, inputs=[github_url_input, file_dropdown, chat_query_input], outputs=[chat_output])

 import base64
 import requests
 import torch
+import uvicorn
 import nest_asyncio
+from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 from sentence_transformers import SentenceTransformer, models
 import gradio as gr
+############################################
+# Configuration
+############################################
+# Replace with your actual tokens.
+HF_TOKEN = "YOUR_HF_TOKEN"
+GITHUB_TOKEN = "YOUR_GITHUB_TOKEN"
 ############################################
 # GitHub API Functions
     headers = {'Authorization': f'token {GITHUB_TOKEN}'}
     tree_url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/{branch}?recursive=1"
     response = requests.get(tree_url, headers=headers)
+    return response.json()
 def get_file_content(owner: str, repo: str, file_path: str):
     headers = {'Authorization': f'token {GITHUB_TOKEN}'}
 def load_embedding_model(model_name: str = 'huggingface/CodeBERTa-small-v1') -> SentenceTransformer:
     transformer_model = models.Transformer(model_name)
+    pooling_model = models.Pooling(transformer_model.get_word_embedding_dimension(), pooling_mode_mean_tokens=True)
     model = SentenceTransformer(modules=[transformer_model, pooling_model])
     return model
     torch.cuda.empty_cache()
     tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, token=HF_TOKEN)
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
     return answer
+############################################
+# Gradio Interface Functions
+############################################
+# For file content retrieval, we now use the file path directly.
+def get_file_content_for_choice(github_url: str, file_path: str):
+    try:
+        owner, repo = extract_repo_info(github_url)
+    except Exception as e:
+        return str(e)
+    content = get_file_content(owner, repo, file_path)
+    return content, file_path
+def chat_with_file(github_url: str, file_path: str, user_query: str):
+    result = get_file_content_for_choice(github_url, file_path)
+    if isinstance(result, str):
+        return result  # Error message
+    file_content, selected_file = result
+    preprocessed = preprocess_text(file_content)
+    context_snippet = preprocessed[:1000]  # use first 1000 characters as context
+    prompt = generate_prompt(user_query, [context_snippet])
+    llm_response = get_llm_response(prompt)
+    return f"File: {selected_file}\n\nLLM Response:\n{llm_response}"
+def load_repo_contents_backend(github_url: str):
+    try:
+        owner, repo = extract_repo_info(github_url)
+    except Exception as e:
+        return f"Error: {str(e)}"
+    repo_data = get_repo_metadata(owner, repo)
+    default_branch = repo_data.get("default_branch", "main")
+    tree_data = get_repo_tree(owner, repo, default_branch)
+    if "tree" not in tree_data:
+        return "Error: Could not fetch repository tree."
+    file_list = [item["path"] for item in tree_data["tree"] if item["type"] == "blob"]
+    return file_list
 ############################################
 # Gradio Interface Setup
 ############################################
             gr.Markdown("### Repository Information")
             github_url_input = gr.Textbox(label="GitHub Repository URL", placeholder="https://github.com/username/repository")
             load_repo_btn = gr.Button("Load Repository Contents")
+            # Dropdown with choices as file paths; default value is empty.
             file_dropdown = gr.Dropdown(label="Select a File", interactive=True, value="", choices=[])
             repo_content_output = gr.Textbox(label="File Content", interactive=False, lines=10)
         with gr.Column(scale=2):
             chat_output = gr.Textbox(label="Chatbot Response", interactive=False, lines=10)
             chat_btn = gr.Button("Send Query")
+    # Callback: Update file dropdown choices.
     def update_file_dropdown(github_url):
         files = load_repo_contents_backend(github_url)
+        if isinstance(files, str):  # Error message
             print("Error loading files:", files)
             return gr.update(choices=[], value="")
         print("Files loaded:", files)
+        # Do not pre-select any file (empty value)
         return gr.update(choices=files, value="")
     load_repo_btn.click(fn=update_file_dropdown, inputs=[github_url_input], outputs=[file_dropdown])
+    # Callback: Update repository content when a file is selected.
     def update_repo_content(github_url, file_choice):
         if not file_choice:
             return "No file selected."
+        content, _ = get_file_content_for_choice(github_url, file_choice)
         return content
     file_dropdown.change(fn=update_repo_content, inputs=[github_url_input, file_dropdown], outputs=[repo_content_output])
+    # Callback: Process chat query.
     def process_chat(github_url, file_choice, chat_query):
         if not file_choice:
             return "Please select a file first."
+        return chat_with_file(github_url, file_choice, chat_query)
     chat_btn.click(fn=process_chat, inputs=[github_url_input, file_dropdown, chat_query_input], outputs=[chat_output])