Update app.py
Browse files
app.py
CHANGED
@@ -3,22 +3,21 @@ import json
|
|
3 |
import base64
|
4 |
import requests
|
5 |
import torch
|
|
|
6 |
import nest_asyncio
|
7 |
-
from fastapi import HTTPException
|
8 |
from pydantic import BaseModel
|
9 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
10 |
from sentence_transformers import SentenceTransformer, models
|
11 |
import gradio as gr
|
12 |
|
|
|
|
|
|
|
13 |
|
14 |
-
#
|
15 |
-
|
16 |
-
|
17 |
-
import os
|
18 |
-
|
19 |
-
HF_TOKEN = os.environ.get("HF_TOKEN")
|
20 |
-
GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN")
|
21 |
-
|
22 |
|
23 |
############################################
|
24 |
# GitHub API Functions
|
@@ -44,9 +43,7 @@ def get_repo_tree(owner: str, repo: str, branch: str):
|
|
44 |
headers = {'Authorization': f'token {GITHUB_TOKEN}'}
|
45 |
tree_url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/{branch}?recursive=1"
|
46 |
response = requests.get(tree_url, headers=headers)
|
47 |
-
|
48 |
-
print("Repo Tree Data:", json.dumps(data, indent=2))
|
49 |
-
return data
|
50 |
|
51 |
def get_file_content(owner: str, repo: str, file_path: str):
|
52 |
headers = {'Authorization': f'token {GITHUB_TOKEN}'}
|
@@ -69,8 +66,7 @@ def preprocess_text(text: str) -> str:
|
|
69 |
|
70 |
def load_embedding_model(model_name: str = 'huggingface/CodeBERTa-small-v1') -> SentenceTransformer:
|
71 |
transformer_model = models.Transformer(model_name)
|
72 |
-
pooling_model = models.Pooling(transformer_model.get_word_embedding_dimension(),
|
73 |
-
pooling_mode_mean_tokens=True)
|
74 |
model = SentenceTransformer(modules=[transformer_model, pooling_model])
|
75 |
return model
|
76 |
|
@@ -109,7 +105,6 @@ def get_llm_response(prompt: str, model_name: str = "meta-llama/Llama-2-7b-chat-
|
|
109 |
|
110 |
torch.cuda.empty_cache()
|
111 |
|
112 |
-
# Load tokenizer and model with authentication using the 'token' parameter.
|
113 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, token=HF_TOKEN)
|
114 |
model = AutoModelForCausalLM.from_pretrained(
|
115 |
model_name,
|
@@ -132,6 +127,43 @@ def get_llm_response(prompt: str, model_name: str = "meta-llama/Llama-2-7b-chat-
|
|
132 |
|
133 |
return answer
|
134 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
############################################
|
136 |
# Gradio Interface Setup
|
137 |
############################################
|
@@ -144,7 +176,7 @@ with gr.Blocks() as demo:
|
|
144 |
gr.Markdown("### Repository Information")
|
145 |
github_url_input = gr.Textbox(label="GitHub Repository URL", placeholder="https://github.com/username/repository")
|
146 |
load_repo_btn = gr.Button("Load Repository Contents")
|
147 |
-
#
|
148 |
file_dropdown = gr.Dropdown(label="Select a File", interactive=True, value="", choices=[])
|
149 |
repo_content_output = gr.Textbox(label="File Content", interactive=False, lines=10)
|
150 |
with gr.Column(scale=2):
|
@@ -153,59 +185,32 @@ with gr.Blocks() as demo:
|
|
153 |
chat_output = gr.Textbox(label="Chatbot Response", interactive=False, lines=10)
|
154 |
chat_btn = gr.Button("Send Query")
|
155 |
|
156 |
-
#
|
157 |
-
def load_repo_contents_backend(github_url: str):
|
158 |
-
try:
|
159 |
-
owner, repo = extract_repo_info(github_url)
|
160 |
-
except Exception as e:
|
161 |
-
return f"Error: {str(e)}"
|
162 |
-
repo_data = get_repo_metadata(owner, repo)
|
163 |
-
default_branch = repo_data.get("default_branch", "main")
|
164 |
-
tree_data = get_repo_tree(owner, repo, default_branch)
|
165 |
-
if "tree" not in tree_data:
|
166 |
-
return "Error: Could not fetch repository tree."
|
167 |
-
file_list = [item["path"] for item in tree_data["tree"] if item["type"] == "blob"]
|
168 |
-
return file_list
|
169 |
-
|
170 |
-
# Callback to update the file dropdown.
|
171 |
def update_file_dropdown(github_url):
|
172 |
files = load_repo_contents_backend(github_url)
|
173 |
-
if isinstance(files, str): # Error message
|
174 |
print("Error loading files:", files)
|
175 |
return gr.update(choices=[], value="")
|
176 |
print("Files loaded:", files)
|
177 |
-
#
|
178 |
return gr.update(choices=files, value="")
|
179 |
-
|
180 |
load_repo_btn.click(fn=update_file_dropdown, inputs=[github_url_input], outputs=[file_dropdown])
|
181 |
|
182 |
-
# Callback
|
183 |
def update_repo_content(github_url, file_choice):
|
184 |
if not file_choice:
|
185 |
return "No file selected."
|
186 |
-
|
187 |
-
file_index = int(file_choice)
|
188 |
-
except Exception as e:
|
189 |
-
print("Error converting file choice:", str(e))
|
190 |
-
return "Invalid file selection."
|
191 |
-
content_tuple = get_file_content_for_choice(github_url, file_index)
|
192 |
-
if isinstance(content_tuple, str):
|
193 |
-
# Return error message if one occurred.
|
194 |
-
return content_tuple
|
195 |
-
content, _ = content_tuple
|
196 |
return content
|
197 |
-
|
198 |
file_dropdown.change(fn=update_repo_content, inputs=[github_url_input, file_dropdown], outputs=[repo_content_output])
|
199 |
|
200 |
-
# Callback
|
201 |
def process_chat(github_url, file_choice, chat_query):
|
202 |
if not file_choice:
|
203 |
return "Please select a file first."
|
204 |
-
|
205 |
-
file_index = int(file_choice)
|
206 |
-
except Exception as e:
|
207 |
-
return "Invalid file selection."
|
208 |
-
return chat_with_file(github_url, file_index, chat_query)
|
209 |
|
210 |
chat_btn.click(fn=process_chat, inputs=[github_url_input, file_dropdown, chat_query_input], outputs=[chat_output])
|
211 |
|
|
|
3 |
import base64
|
4 |
import requests
|
5 |
import torch
|
6 |
+
import uvicorn
|
7 |
import nest_asyncio
|
8 |
+
from fastapi import FastAPI, HTTPException
|
9 |
from pydantic import BaseModel
|
10 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
11 |
from sentence_transformers import SentenceTransformer, models
|
12 |
import gradio as gr
|
13 |
|
14 |
+
############################################
|
15 |
+
# Configuration
|
16 |
+
############################################
|
17 |
|
18 |
+
# Replace with your actual tokens.
|
19 |
+
HF_TOKEN = "YOUR_HF_TOKEN"
|
20 |
+
GITHUB_TOKEN = "YOUR_GITHUB_TOKEN"
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
############################################
|
23 |
# GitHub API Functions
|
|
|
43 |
headers = {'Authorization': f'token {GITHUB_TOKEN}'}
|
44 |
tree_url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/{branch}?recursive=1"
|
45 |
response = requests.get(tree_url, headers=headers)
|
46 |
+
return response.json()
|
|
|
|
|
47 |
|
48 |
def get_file_content(owner: str, repo: str, file_path: str):
|
49 |
headers = {'Authorization': f'token {GITHUB_TOKEN}'}
|
|
|
66 |
|
67 |
def load_embedding_model(model_name: str = 'huggingface/CodeBERTa-small-v1') -> SentenceTransformer:
|
68 |
transformer_model = models.Transformer(model_name)
|
69 |
+
pooling_model = models.Pooling(transformer_model.get_word_embedding_dimension(), pooling_mode_mean_tokens=True)
|
|
|
70 |
model = SentenceTransformer(modules=[transformer_model, pooling_model])
|
71 |
return model
|
72 |
|
|
|
105 |
|
106 |
torch.cuda.empty_cache()
|
107 |
|
|
|
108 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, token=HF_TOKEN)
|
109 |
model = AutoModelForCausalLM.from_pretrained(
|
110 |
model_name,
|
|
|
127 |
|
128 |
return answer
|
129 |
|
130 |
+
############################################
|
131 |
+
# Gradio Interface Functions
|
132 |
+
############################################
|
133 |
+
|
134 |
+
# For file content retrieval, we now use the file path directly.
|
135 |
+
def get_file_content_for_choice(github_url: str, file_path: str):
|
136 |
+
try:
|
137 |
+
owner, repo = extract_repo_info(github_url)
|
138 |
+
except Exception as e:
|
139 |
+
return str(e)
|
140 |
+
content = get_file_content(owner, repo, file_path)
|
141 |
+
return content, file_path
|
142 |
+
|
143 |
+
def chat_with_file(github_url: str, file_path: str, user_query: str):
|
144 |
+
result = get_file_content_for_choice(github_url, file_path)
|
145 |
+
if isinstance(result, str):
|
146 |
+
return result # Error message
|
147 |
+
file_content, selected_file = result
|
148 |
+
preprocessed = preprocess_text(file_content)
|
149 |
+
context_snippet = preprocessed[:1000] # use first 1000 characters as context
|
150 |
+
prompt = generate_prompt(user_query, [context_snippet])
|
151 |
+
llm_response = get_llm_response(prompt)
|
152 |
+
return f"File: {selected_file}\n\nLLM Response:\n{llm_response}"
|
153 |
+
|
154 |
+
def load_repo_contents_backend(github_url: str):
|
155 |
+
try:
|
156 |
+
owner, repo = extract_repo_info(github_url)
|
157 |
+
except Exception as e:
|
158 |
+
return f"Error: {str(e)}"
|
159 |
+
repo_data = get_repo_metadata(owner, repo)
|
160 |
+
default_branch = repo_data.get("default_branch", "main")
|
161 |
+
tree_data = get_repo_tree(owner, repo, default_branch)
|
162 |
+
if "tree" not in tree_data:
|
163 |
+
return "Error: Could not fetch repository tree."
|
164 |
+
file_list = [item["path"] for item in tree_data["tree"] if item["type"] == "blob"]
|
165 |
+
return file_list
|
166 |
+
|
167 |
############################################
|
168 |
# Gradio Interface Setup
|
169 |
############################################
|
|
|
176 |
gr.Markdown("### Repository Information")
|
177 |
github_url_input = gr.Textbox(label="GitHub Repository URL", placeholder="https://github.com/username/repository")
|
178 |
load_repo_btn = gr.Button("Load Repository Contents")
|
179 |
+
# Dropdown with choices as file paths; default value is empty.
|
180 |
file_dropdown = gr.Dropdown(label="Select a File", interactive=True, value="", choices=[])
|
181 |
repo_content_output = gr.Textbox(label="File Content", interactive=False, lines=10)
|
182 |
with gr.Column(scale=2):
|
|
|
185 |
chat_output = gr.Textbox(label="Chatbot Response", interactive=False, lines=10)
|
186 |
chat_btn = gr.Button("Send Query")
|
187 |
|
188 |
+
# Callback: Update file dropdown choices.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
def update_file_dropdown(github_url):
|
190 |
files = load_repo_contents_backend(github_url)
|
191 |
+
if isinstance(files, str): # Error message
|
192 |
print("Error loading files:", files)
|
193 |
return gr.update(choices=[], value="")
|
194 |
print("Files loaded:", files)
|
195 |
+
# Do not pre-select any file (empty value)
|
196 |
return gr.update(choices=files, value="")
|
197 |
+
|
198 |
load_repo_btn.click(fn=update_file_dropdown, inputs=[github_url_input], outputs=[file_dropdown])
|
199 |
|
200 |
+
# Callback: Update repository content when a file is selected.
|
201 |
def update_repo_content(github_url, file_choice):
|
202 |
if not file_choice:
|
203 |
return "No file selected."
|
204 |
+
content, _ = get_file_content_for_choice(github_url, file_choice)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
205 |
return content
|
206 |
+
|
207 |
file_dropdown.change(fn=update_repo_content, inputs=[github_url_input, file_dropdown], outputs=[repo_content_output])
|
208 |
|
209 |
+
# Callback: Process chat query.
|
210 |
def process_chat(github_url, file_choice, chat_query):
|
211 |
if not file_choice:
|
212 |
return "Please select a file first."
|
213 |
+
return chat_with_file(github_url, file_choice, chat_query)
|
|
|
|
|
|
|
|
|
214 |
|
215 |
chat_btn.click(fn=process_chat, inputs=[github_url_input, file_dropdown, chat_query_input], outputs=[chat_output])
|
216 |
|