naman1102 commited on
Commit
adcb6a8
·
1 Parent(s): 48d3c35
Files changed (4) hide show
  1. app.py +2 -2
  2. app_old.py +4 -4
  3. hf_utils.py +26 -38
  4. repo_explorer.py +2 -2
app.py CHANGED
@@ -9,7 +9,7 @@ import time
9
 
10
  # Import core logic from other modules, as in app_old.py
11
  from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
12
- from hf_utils import download_space_repo, search_top_spaces
13
  from chatbot_page import chat_with_user, extract_keywords_from_conversation
14
  from repo_explorer import create_repo_explorer_tab, setup_repo_explorer_events
15
 
@@ -196,7 +196,7 @@ def analyze_and_update_single_repo(repo_id: str, user_requirements: str = "") ->
196
  """
197
  try:
198
  logger.info(f"Starting analysis for repo: {repo_id}")
199
- download_space_repo(repo_id, local_dir="repo_files", file_extensions=['.py', '.md', '.txt'])
200
  txt_path = combine_repo_files_for_llm()
201
 
202
  with open(txt_path, "r", encoding="utf-8") as f:
 
9
 
10
  # Import core logic from other modules, as in app_old.py
11
  from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
12
+ from hf_utils import download_filtered_space_files, search_top_spaces
13
  from chatbot_page import chat_with_user, extract_keywords_from_conversation
14
  from repo_explorer import create_repo_explorer_tab, setup_repo_explorer_events
15
 
 
196
  """
197
  try:
198
  logger.info(f"Starting analysis for repo: {repo_id}")
199
+ download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=['.py', '.md', '.txt'])
200
  txt_path = combine_repo_files_for_llm()
201
 
202
  with open(txt_path, "r", encoding="utf-8") as f:
app_old.py CHANGED
@@ -3,7 +3,7 @@ import regex as re
3
  import csv
4
  import pandas as pd
5
  from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
6
- from hf_utils import download_space_repo, search_top_spaces
7
  from chatbot_page import chat_with_user, extract_keywords_from_conversation
8
  # Import chatbot logic
9
  from analyzer import analyze_code
@@ -98,7 +98,7 @@ def show_combined_repo_and_llm():
98
  return "All repo IDs have been processed.", "", read_csv_as_text("repo_ids.csv")
99
  repo_id = last_repo_ids[current_repo_idx]
100
  try:
101
- download_space_repo(repo_id, local_dir="repo_files")
102
  except Exception as e:
103
  return f"Error downloading repo: {e}", "", read_csv_as_text("repo_ids.csv")
104
  txt_path = combine_repo_files_for_llm()
@@ -221,7 +221,7 @@ def batch_analyze_and_select_top():
221
  for idx, row in df.iterrows():
222
  repo_id = row["repo id"]
223
  try:
224
- download_space_repo(repo_id, local_dir="repo_files")
225
  txt_path = combine_repo_files_for_llm()
226
  llm_output = analyze_combined_file(txt_path)
227
  last_start = llm_output.rfind('{')
@@ -277,7 +277,7 @@ def batch_analyze_and_select_top_for_chat(state):
277
  for idx, row in df.iterrows():
278
  repo_id = row["repo id"]
279
  try:
280
- download_space_repo(repo_id, local_dir="repo_files")
281
  txt_path = combine_repo_files_for_llm()
282
  llm_output = analyze_combined_file(txt_path)
283
  last_start = llm_output.rfind('{')
 
3
  import csv
4
  import pandas as pd
5
  from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
6
+ from hf_utils import download_filtered_space_files, search_top_spaces
7
  from chatbot_page import chat_with_user, extract_keywords_from_conversation
8
  # Import chatbot logic
9
  from analyzer import analyze_code
 
98
  return "All repo IDs have been processed.", "", read_csv_as_text("repo_ids.csv")
99
  repo_id = last_repo_ids[current_repo_idx]
100
  try:
101
+ download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=[".py", ".md", ".txt"])
102
  except Exception as e:
103
  return f"Error downloading repo: {e}", "", read_csv_as_text("repo_ids.csv")
104
  txt_path = combine_repo_files_for_llm()
 
221
  for idx, row in df.iterrows():
222
  repo_id = row["repo id"]
223
  try:
224
+ download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=[".py", ".md", ".txt"])
225
  txt_path = combine_repo_files_for_llm()
226
  llm_output = analyze_combined_file(txt_path)
227
  last_start = llm_output.rfind('{')
 
277
  for idx, row in df.iterrows():
278
  repo_id = row["repo id"]
279
  try:
280
+ download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=[".py", ".md", ".txt"])
281
  txt_path = combine_repo_files_for_llm()
282
  llm_output = analyze_combined_file(txt_path)
283
  last_start = llm_output.rfind('{')
hf_utils.py CHANGED
@@ -2,58 +2,46 @@ from huggingface_hub import snapshot_download
2
  import os
3
  import shutil
4
 
5
- def download_space_repo(space_id: str, local_dir: str = "repo_files", file_extensions: list = None):
6
  """
7
- Downloads files from a Hugging Face Space repository, optionally filtering by file extensions.
8
 
9
  Args:
10
  space_id (str): The ID of the Hugging Face Space (e.g., "naman1102/Final_Assignment_Template").
11
  local_dir (str): Local directory to store the downloaded files.
12
- file_extensions (list): Optional list of file extensions to download (e.g., ['.py', '.md']).
13
- If None, downloads all files.
14
  """
15
- print(f"Downloading Space '{space_id}'...")
16
-
17
- if file_extensions:
18
- print(f"Filtering for file types: {', '.join(file_extensions)}")
19
 
20
- # Download the snapshot of the space repo
21
  repo_path = snapshot_download(repo_id=space_id, repo_type="space")
22
 
23
- # Remove existing directory if it exists
24
  if os.path.exists(local_dir):
25
  shutil.rmtree(local_dir)
26
 
27
- if file_extensions is None:
28
- # Download all files (original behavior)
29
- shutil.copytree(repo_path, local_dir)
30
- print(f"All files from Space '{space_id}' downloaded to: {local_dir}")
31
- else:
32
- # Filter and copy only specified file types
33
- os.makedirs(local_dir, exist_ok=True)
34
- copied_files = 0
35
-
36
- for root, dirs, files in os.walk(repo_path):
37
- for file in files:
38
- # Check if file has one of the desired extensions
39
- if any(file.lower().endswith(ext.lower()) for ext in file_extensions):
40
- source_path = os.path.join(root, file)
41
- # Maintain directory structure
42
- relative_path = os.path.relpath(source_path, repo_path)
43
- dest_path = os.path.join(local_dir, relative_path)
44
-
45
- # Create destination directory if it doesn't exist
46
- os.makedirs(os.path.dirname(dest_path), exist_ok=True)
47
-
48
- # Copy the file
49
- shutil.copy2(source_path, dest_path)
50
- copied_files += 1
51
-
52
- print(f"Filtered download complete: {copied_files} files with extensions {file_extensions} from Space '{space_id}' downloaded to: {local_dir}")
53
 
54
  # Example usage
55
- # download_space_repo("finegrain/finegrain-image-enhancer") # Downloads all files
56
- # download_space_repo("finegrain/finegrain-image-enhancer", file_extensions=['.py', '.md', '.txt']) # Downloads only .py, .md, and .txt files
57
 
58
  from huggingface_hub import list_spaces
59
 
 
2
  import os
3
  import shutil
4
 
5
+ def download_filtered_space_files(space_id: str, local_dir: str = "repo_files", file_extensions: list = None):
6
  """
7
+ Downloads only files with specified extensions from a Hugging Face Space repository.
8
 
9
  Args:
10
  space_id (str): The ID of the Hugging Face Space (e.g., "naman1102/Final_Assignment_Template").
11
  local_dir (str): Local directory to store the downloaded files.
12
+ file_extensions (list): List of file extensions to include (e.g., ['.py', '.md']).
13
+ If None, no filtering is applied (all files are downloaded).
14
  """
15
+ if not file_extensions:
16
+ raise ValueError("You must specify a list of file extensions to filter by.")
17
+
18
+ print(f"Downloading Space '{space_id}' and filtering for: {', '.join(file_extensions)}")
19
 
20
+ # Download the full snapshot to a temp directory
21
  repo_path = snapshot_download(repo_id=space_id, repo_type="space")
22
 
23
+ # Clear out local_dir if it already exists
24
  if os.path.exists(local_dir):
25
  shutil.rmtree(local_dir)
26
 
27
+ os.makedirs(local_dir, exist_ok=True)
28
+ copied_files = 0
29
+
30
+ # Walk through the snapshot and copy only files with desired extensions
31
+ for root, _, files in os.walk(repo_path):
32
+ for file in files:
33
+ if any(file.endswith(ext) for ext in file_extensions):
34
+ src_file = os.path.join(root, file)
35
+ rel_path = os.path.relpath(src_file, repo_path)
36
+ dest_file = os.path.join(local_dir, rel_path)
37
+ os.makedirs(os.path.dirname(dest_file), exist_ok=True)
38
+ shutil.copy2(src_file, dest_file)
39
+ copied_files += 1
40
+
41
+ print(f"Downloaded {copied_files} filtered file(s) to: {local_dir}")
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  # Example usage
44
+ # download_filtered_space_files("finegrain/finegrain-image-enhancer", file_extensions=['.py', '.md', '.txt']) # Downloads only .py, .md, and .txt files
 
45
 
46
  from huggingface_hub import list_spaces
47
 
repo_explorer.py CHANGED
@@ -3,7 +3,7 @@ import os
3
  import logging
4
  from typing import List, Dict, Tuple
5
  from analyzer import combine_repo_files_for_llm
6
- from hf_utils import download_space_repo
7
 
8
  # Setup logger
9
  logger = logging.getLogger(__name__)
@@ -208,7 +208,7 @@ def handle_load_repository(repo_id: str) -> Tuple[str, str]:
208
 
209
  # Download and process the repository
210
  try:
211
- download_space_repo(repo_id, local_dir="repo_files", file_extensions=['.py', '.md', '.txt'])
212
  combined_text_path = combine_repo_files_for_llm()
213
 
214
  except Exception as e:
 
3
  import logging
4
  from typing import List, Dict, Tuple
5
  from analyzer import combine_repo_files_for_llm
6
+ from hf_utils import download_filtered_space_files
7
 
8
  # Setup logger
9
  logger = logging.getLogger(__name__)
 
208
 
209
  # Download and process the repository
210
  try:
211
+ download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=['.py', '.md', '.txt'])
212
  combined_text_path = combine_repo_files_for_llm()
213
 
214
  except Exception as e: