naman1102 commited on
Commit
48d3c35
Β·
1 Parent(s): 72a1c3a
Files changed (4) hide show
  1. analyzer.py +4 -4
  2. app.py +1 -1
  3. hf_utils.py +35 -7
  4. repo_explorer.py +10 -4
analyzer.py CHANGED
@@ -73,13 +73,13 @@ def parse_llm_json_response(response: str):
73
 
74
  def combine_repo_files_for_llm(repo_dir="repo_files", output_file="combined_repo.txt"):
75
  """
76
- Combines all .py and .md files in the given directory (recursively) into a single text file.
77
  Returns the path to the combined file.
78
  """
79
  combined_content = []
80
  seen_files = set()
81
  # Priority files
82
- priority_files = ["app.py", "README.md"]
83
  for pf in priority_files:
84
  pf_path = os.path.join(repo_dir, pf)
85
  if os.path.isfile(pf_path):
@@ -90,10 +90,10 @@ def combine_repo_files_for_llm(repo_dir="repo_files", output_file="combined_repo
90
  seen_files.add(os.path.abspath(pf_path))
91
  except Exception as e:
92
  combined_content.append(f"\n# Could not read {pf_path}: {e}\n")
93
- # All other .py and .md files
94
  for root, _, files in os.walk(repo_dir):
95
  for file in files:
96
- if file.endswith(".py") or file.endswith(".md"):
97
  file_path = os.path.join(root, file)
98
  abs_path = os.path.abspath(file_path)
99
  if abs_path in seen_files:
 
73
 
74
  def combine_repo_files_for_llm(repo_dir="repo_files", output_file="combined_repo.txt"):
75
  """
76
+ Combines all .py, .md, and .txt files in the given directory (recursively) into a single text file.
77
  Returns the path to the combined file.
78
  """
79
  combined_content = []
80
  seen_files = set()
81
  # Priority files
82
+ priority_files = ["app.py", "README.md", "requirements.txt"]
83
  for pf in priority_files:
84
  pf_path = os.path.join(repo_dir, pf)
85
  if os.path.isfile(pf_path):
 
90
  seen_files.add(os.path.abspath(pf_path))
91
  except Exception as e:
92
  combined_content.append(f"\n# Could not read {pf_path}: {e}\n")
93
+ # All other .py, .md, and .txt files
94
  for root, _, files in os.walk(repo_dir):
95
  for file in files:
96
+ if file.endswith(".py") or file.endswith(".md") or file.endswith(".txt"):
97
  file_path = os.path.join(root, file)
98
  abs_path = os.path.abspath(file_path)
99
  if abs_path in seen_files:
app.py CHANGED
@@ -196,7 +196,7 @@ def analyze_and_update_single_repo(repo_id: str, user_requirements: str = "") ->
196
  """
197
  try:
198
  logger.info(f"Starting analysis for repo: {repo_id}")
199
- download_space_repo(repo_id, local_dir="repo_files")
200
  txt_path = combine_repo_files_for_llm()
201
 
202
  with open(txt_path, "r", encoding="utf-8") as f:
 
196
  """
197
  try:
198
  logger.info(f"Starting analysis for repo: {repo_id}")
199
+ download_space_repo(repo_id, local_dir="repo_files", file_extensions=['.py', '.md', '.txt'])
200
  txt_path = combine_repo_files_for_llm()
201
 
202
  with open(txt_path, "r", encoding="utf-8") as f:
hf_utils.py CHANGED
@@ -2,15 +2,20 @@ from huggingface_hub import snapshot_download
2
  import os
3
  import shutil
4
 
5
- def download_space_repo(space_id: str, local_dir: str = "repo_files"):
6
  """
7
- Downloads all files from a Hugging Face Space repository.
8
 
9
  Args:
10
  space_id (str): The ID of the Hugging Face Space (e.g., "naman1102/Final_Assignment_Template").
11
  local_dir (str): Local directory to store the downloaded files.
 
 
12
  """
13
  print(f"Downloading Space '{space_id}'...")
 
 
 
14
 
15
  # Download the snapshot of the space repo
16
  repo_path = snapshot_download(repo_id=space_id, repo_type="space")
@@ -19,13 +24,36 @@ def download_space_repo(space_id: str, local_dir: str = "repo_files"):
19
  if os.path.exists(local_dir):
20
  shutil.rmtree(local_dir)
21
 
22
- # Copy contents to target directory
23
- shutil.copytree(repo_path, local_dir)
24
-
25
- print(f"All files from Space '{space_id}' downloaded to: {local_dir}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  # Example usage
28
- # download_space_repo("finegrain/finegrain-image-enhancer")
 
29
 
30
  from huggingface_hub import list_spaces
31
 
 
2
  import os
3
  import shutil
4
 
5
+ def download_space_repo(space_id: str, local_dir: str = "repo_files", file_extensions: list = None):
6
  """
7
+ Downloads files from a Hugging Face Space repository, optionally filtering by file extensions.
8
 
9
  Args:
10
  space_id (str): The ID of the Hugging Face Space (e.g., "naman1102/Final_Assignment_Template").
11
  local_dir (str): Local directory to store the downloaded files.
12
+ file_extensions (list): Optional list of file extensions to download (e.g., ['.py', '.md']).
13
+ If None, downloads all files.
14
  """
15
  print(f"Downloading Space '{space_id}'...")
16
+
17
+ if file_extensions:
18
+ print(f"Filtering for file types: {', '.join(file_extensions)}")
19
 
20
  # Download the snapshot of the space repo
21
  repo_path = snapshot_download(repo_id=space_id, repo_type="space")
 
24
  if os.path.exists(local_dir):
25
  shutil.rmtree(local_dir)
26
 
27
+ if file_extensions is None:
28
+ # Download all files (original behavior)
29
+ shutil.copytree(repo_path, local_dir)
30
+ print(f"All files from Space '{space_id}' downloaded to: {local_dir}")
31
+ else:
32
+ # Filter and copy only specified file types
33
+ os.makedirs(local_dir, exist_ok=True)
34
+ copied_files = 0
35
+
36
+ for root, dirs, files in os.walk(repo_path):
37
+ for file in files:
38
+ # Check if file has one of the desired extensions
39
+ if any(file.lower().endswith(ext.lower()) for ext in file_extensions):
40
+ source_path = os.path.join(root, file)
41
+ # Maintain directory structure
42
+ relative_path = os.path.relpath(source_path, repo_path)
43
+ dest_path = os.path.join(local_dir, relative_path)
44
+
45
+ # Create destination directory if it doesn't exist
46
+ os.makedirs(os.path.dirname(dest_path), exist_ok=True)
47
+
48
+ # Copy the file
49
+ shutil.copy2(source_path, dest_path)
50
+ copied_files += 1
51
+
52
+ print(f"Filtered download complete: {copied_files} files with extensions {file_extensions} from Space '{space_id}' downloaded to: {local_dir}")
53
 
54
  # Example usage
55
+ # download_space_repo("finegrain/finegrain-image-enhancer") # Downloads all files
56
+ # download_space_repo("finegrain/finegrain-image-enhancer", file_extensions=['.py', '.md', '.txt']) # Downloads only .py, .md, and .txt files
57
 
58
  from huggingface_hub import list_spaces
59
 
repo_explorer.py CHANGED
@@ -206,11 +206,17 @@ def handle_load_repository(repo_id: str) -> Tuple[str, str]:
206
  try:
207
  logger.info(f"Loading repository for exploration: {repo_id}")
208
 
209
- # Download and combine repository files
210
- download_space_repo(repo_id, local_dir="repo_files")
211
- txt_path = combine_repo_files_for_llm()
 
 
 
 
 
 
212
 
213
- with open(txt_path, "r", encoding="utf-8") as f:
214
  repo_content = f.read()
215
 
216
  status = f"βœ… Repository '{repo_id}' loaded successfully!\nπŸ“ Files processed and ready for exploration.\nπŸ”„ Analyzing repository in chunks for comprehensive context...\nπŸ’¬ You can now ask questions about this repository."
 
206
  try:
207
  logger.info(f"Loading repository for exploration: {repo_id}")
208
 
209
+ # Download and process the repository
210
+ try:
211
+ download_space_repo(repo_id, local_dir="repo_files", file_extensions=['.py', '.md', '.txt'])
212
+ combined_text_path = combine_repo_files_for_llm()
213
+
214
+ except Exception as e:
215
+ logger.error(f"Error downloading repository {repo_id}: {e}")
216
+ error_status = f"❌ Error downloading repository: {e}"
217
+ return error_status, ""
218
 
219
+ with open(combined_text_path, "r", encoding="utf-8") as f:
220
  repo_content = f.read()
221
 
222
  status = f"βœ… Repository '{repo_id}' loaded successfully!\nπŸ“ Files processed and ready for exploration.\nπŸ”„ Analyzing repository in chunks for comprehensive context...\nπŸ’¬ You can now ask questions about this repository."