jyo01 commited on
Commit
30a49e8
·
verified ·
1 Parent(s): c143ab3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -52
app.py CHANGED
@@ -3,22 +3,21 @@ import json
3
  import base64
4
  import requests
5
  import torch
 
6
  import nest_asyncio
7
- from fastapi import HTTPException
8
  from pydantic import BaseModel
9
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
10
  from sentence_transformers import SentenceTransformer, models
11
  import gradio as gr
12
 
 
 
 
13
 
14
- # Apply nest_asyncio to allow async operations in the notebook/Spaces
15
- nest_asyncio.apply()
16
-
17
- import os
18
-
19
- HF_TOKEN = os.environ.get("HF_TOKEN")
20
- GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN")
21
-
22
 
23
  ############################################
24
  # GitHub API Functions
@@ -44,9 +43,7 @@ def get_repo_tree(owner: str, repo: str, branch: str):
44
  headers = {'Authorization': f'token {GITHUB_TOKEN}'}
45
  tree_url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/{branch}?recursive=1"
46
  response = requests.get(tree_url, headers=headers)
47
- data = response.json()
48
- print("Repo Tree Data:", json.dumps(data, indent=2))
49
- return data
50
 
51
  def get_file_content(owner: str, repo: str, file_path: str):
52
  headers = {'Authorization': f'token {GITHUB_TOKEN}'}
@@ -69,8 +66,7 @@ def preprocess_text(text: str) -> str:
69
 
70
  def load_embedding_model(model_name: str = 'huggingface/CodeBERTa-small-v1') -> SentenceTransformer:
71
  transformer_model = models.Transformer(model_name)
72
- pooling_model = models.Pooling(transformer_model.get_word_embedding_dimension(),
73
- pooling_mode_mean_tokens=True)
74
  model = SentenceTransformer(modules=[transformer_model, pooling_model])
75
  return model
76
 
@@ -109,7 +105,6 @@ def get_llm_response(prompt: str, model_name: str = "meta-llama/Llama-2-7b-chat-
109
 
110
  torch.cuda.empty_cache()
111
 
112
- # Load tokenizer and model with authentication using the 'token' parameter.
113
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, token=HF_TOKEN)
114
  model = AutoModelForCausalLM.from_pretrained(
115
  model_name,
@@ -132,6 +127,43 @@ def get_llm_response(prompt: str, model_name: str = "meta-llama/Llama-2-7b-chat-
132
 
133
  return answer
134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  ############################################
136
  # Gradio Interface Setup
137
  ############################################
@@ -144,7 +176,7 @@ with gr.Blocks() as demo:
144
  gr.Markdown("### Repository Information")
145
  github_url_input = gr.Textbox(label="GitHub Repository URL", placeholder="https://github.com/username/repository")
146
  load_repo_btn = gr.Button("Load Repository Contents")
147
- # Initialize dropdown with empty choices and empty default value.
148
  file_dropdown = gr.Dropdown(label="Select a File", interactive=True, value="", choices=[])
149
  repo_content_output = gr.Textbox(label="File Content", interactive=False, lines=10)
150
  with gr.Column(scale=2):
@@ -153,59 +185,32 @@ with gr.Blocks() as demo:
153
  chat_output = gr.Textbox(label="Chatbot Response", interactive=False, lines=10)
154
  chat_btn = gr.Button("Send Query")
155
 
156
- # Function to load repository contents from GitHub.
157
- def load_repo_contents_backend(github_url: str):
158
- try:
159
- owner, repo = extract_repo_info(github_url)
160
- except Exception as e:
161
- return f"Error: {str(e)}"
162
- repo_data = get_repo_metadata(owner, repo)
163
- default_branch = repo_data.get("default_branch", "main")
164
- tree_data = get_repo_tree(owner, repo, default_branch)
165
- if "tree" not in tree_data:
166
- return "Error: Could not fetch repository tree."
167
- file_list = [item["path"] for item in tree_data["tree"] if item["type"] == "blob"]
168
- return file_list
169
-
170
- # Callback to update the file dropdown.
171
  def update_file_dropdown(github_url):
172
  files = load_repo_contents_backend(github_url)
173
- if isinstance(files, str): # Error message case.
174
  print("Error loading files:", files)
175
  return gr.update(choices=[], value="")
176
  print("Files loaded:", files)
177
- # Return the updated dropdown with choices but with no default value selected.
178
  return gr.update(choices=files, value="")
179
-
180
  load_repo_btn.click(fn=update_file_dropdown, inputs=[github_url_input], outputs=[file_dropdown])
181
 
182
- # Callback to update the repository content display based on the selected file.
183
  def update_repo_content(github_url, file_choice):
184
  if not file_choice:
185
  return "No file selected."
186
- try:
187
- file_index = int(file_choice)
188
- except Exception as e:
189
- print("Error converting file choice:", str(e))
190
- return "Invalid file selection."
191
- content_tuple = get_file_content_for_choice(github_url, file_index)
192
- if isinstance(content_tuple, str):
193
- # Return error message if one occurred.
194
- return content_tuple
195
- content, _ = content_tuple
196
  return content
197
-
198
  file_dropdown.change(fn=update_repo_content, inputs=[github_url_input, file_dropdown], outputs=[repo_content_output])
199
 
200
- # Callback to process the chat query.
201
  def process_chat(github_url, file_choice, chat_query):
202
  if not file_choice:
203
  return "Please select a file first."
204
- try:
205
- file_index = int(file_choice)
206
- except Exception as e:
207
- return "Invalid file selection."
208
- return chat_with_file(github_url, file_index, chat_query)
209
 
210
  chat_btn.click(fn=process_chat, inputs=[github_url_input, file_dropdown, chat_query_input], outputs=[chat_output])
211
 
 
3
  import base64
4
  import requests
5
  import torch
6
+ import uvicorn
7
  import nest_asyncio
8
+ from fastapi import FastAPI, HTTPException
9
  from pydantic import BaseModel
10
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
11
  from sentence_transformers import SentenceTransformer, models
12
  import gradio as gr
13
 
14
+ ############################################
15
+ # Configuration
16
+ ############################################
17
 
18
+ # Replace with your actual tokens.
19
+ HF_TOKEN = "YOUR_HF_TOKEN"
20
+ GITHUB_TOKEN = "YOUR_GITHUB_TOKEN"
 
 
 
 
 
21
 
22
  ############################################
23
  # GitHub API Functions
 
43
  headers = {'Authorization': f'token {GITHUB_TOKEN}'}
44
  tree_url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/{branch}?recursive=1"
45
  response = requests.get(tree_url, headers=headers)
46
+ return response.json()
 
 
47
 
48
  def get_file_content(owner: str, repo: str, file_path: str):
49
  headers = {'Authorization': f'token {GITHUB_TOKEN}'}
 
66
 
67
  def load_embedding_model(model_name: str = 'huggingface/CodeBERTa-small-v1') -> SentenceTransformer:
68
  transformer_model = models.Transformer(model_name)
69
+ pooling_model = models.Pooling(transformer_model.get_word_embedding_dimension(), pooling_mode_mean_tokens=True)
 
70
  model = SentenceTransformer(modules=[transformer_model, pooling_model])
71
  return model
72
 
 
105
 
106
  torch.cuda.empty_cache()
107
 
 
108
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, token=HF_TOKEN)
109
  model = AutoModelForCausalLM.from_pretrained(
110
  model_name,
 
127
 
128
  return answer
129
 
130
+ ############################################
131
+ # Gradio Interface Functions
132
+ ############################################
133
+
134
+ # For file content retrieval, we now use the file path directly.
135
+ def get_file_content_for_choice(github_url: str, file_path: str):
136
+ try:
137
+ owner, repo = extract_repo_info(github_url)
138
+ except Exception as e:
139
+ return str(e)
140
+ content = get_file_content(owner, repo, file_path)
141
+ return content, file_path
142
+
143
+ def chat_with_file(github_url: str, file_path: str, user_query: str):
144
+ result = get_file_content_for_choice(github_url, file_path)
145
+ if isinstance(result, str):
146
+ return result # Error message
147
+ file_content, selected_file = result
148
+ preprocessed = preprocess_text(file_content)
149
+ context_snippet = preprocessed[:1000] # use first 1000 characters as context
150
+ prompt = generate_prompt(user_query, [context_snippet])
151
+ llm_response = get_llm_response(prompt)
152
+ return f"File: {selected_file}\n\nLLM Response:\n{llm_response}"
153
+
154
+ def load_repo_contents_backend(github_url: str):
155
+ try:
156
+ owner, repo = extract_repo_info(github_url)
157
+ except Exception as e:
158
+ return f"Error: {str(e)}"
159
+ repo_data = get_repo_metadata(owner, repo)
160
+ default_branch = repo_data.get("default_branch", "main")
161
+ tree_data = get_repo_tree(owner, repo, default_branch)
162
+ if "tree" not in tree_data:
163
+ return "Error: Could not fetch repository tree."
164
+ file_list = [item["path"] for item in tree_data["tree"] if item["type"] == "blob"]
165
+ return file_list
166
+
167
  ############################################
168
  # Gradio Interface Setup
169
  ############################################
 
176
  gr.Markdown("### Repository Information")
177
  github_url_input = gr.Textbox(label="GitHub Repository URL", placeholder="https://github.com/username/repository")
178
  load_repo_btn = gr.Button("Load Repository Contents")
179
+ # Dropdown with choices as file paths; default value is empty.
180
  file_dropdown = gr.Dropdown(label="Select a File", interactive=True, value="", choices=[])
181
  repo_content_output = gr.Textbox(label="File Content", interactive=False, lines=10)
182
  with gr.Column(scale=2):
 
185
  chat_output = gr.Textbox(label="Chatbot Response", interactive=False, lines=10)
186
  chat_btn = gr.Button("Send Query")
187
 
188
+ # Callback: Update file dropdown choices.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  def update_file_dropdown(github_url):
190
  files = load_repo_contents_backend(github_url)
191
+ if isinstance(files, str): # Error message
192
  print("Error loading files:", files)
193
  return gr.update(choices=[], value="")
194
  print("Files loaded:", files)
195
+ # Do not pre-select any file (empty value)
196
  return gr.update(choices=files, value="")
197
+
198
  load_repo_btn.click(fn=update_file_dropdown, inputs=[github_url_input], outputs=[file_dropdown])
199
 
200
+ # Callback: Update repository content when a file is selected.
201
  def update_repo_content(github_url, file_choice):
202
  if not file_choice:
203
  return "No file selected."
204
+ content, _ = get_file_content_for_choice(github_url, file_choice)
 
 
 
 
 
 
 
 
 
205
  return content
206
+
207
  file_dropdown.change(fn=update_repo_content, inputs=[github_url_input, file_dropdown], outputs=[repo_content_output])
208
 
209
+ # Callback: Process chat query.
210
  def process_chat(github_url, file_choice, chat_query):
211
  if not file_choice:
212
  return "Please select a file first."
213
+ return chat_with_file(github_url, file_choice, chat_query)
 
 
 
 
214
 
215
  chat_btn.click(fn=process_chat, inputs=[github_url_input, file_dropdown, chat_query_input], outputs=[chat_output])
216