new
Browse files- app.py +2 -2
- app_old.py +4 -4
- hf_utils.py +26 -38
- repo_explorer.py +2 -2
app.py
CHANGED
@@ -9,7 +9,7 @@ import time
|
|
9 |
|
10 |
# Import core logic from other modules, as in app_old.py
|
11 |
from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
|
12 |
-
from hf_utils import
|
13 |
from chatbot_page import chat_with_user, extract_keywords_from_conversation
|
14 |
from repo_explorer import create_repo_explorer_tab, setup_repo_explorer_events
|
15 |
|
@@ -196,7 +196,7 @@ def analyze_and_update_single_repo(repo_id: str, user_requirements: str = "") ->
|
|
196 |
"""
|
197 |
try:
|
198 |
logger.info(f"Starting analysis for repo: {repo_id}")
|
199 |
-
|
200 |
txt_path = combine_repo_files_for_llm()
|
201 |
|
202 |
with open(txt_path, "r", encoding="utf-8") as f:
|
|
|
9 |
|
10 |
# Import core logic from other modules, as in app_old.py
|
11 |
from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
|
12 |
+
from hf_utils import download_filtered_space_files, search_top_spaces
|
13 |
from chatbot_page import chat_with_user, extract_keywords_from_conversation
|
14 |
from repo_explorer import create_repo_explorer_tab, setup_repo_explorer_events
|
15 |
|
|
|
196 |
"""
|
197 |
try:
|
198 |
logger.info(f"Starting analysis for repo: {repo_id}")
|
199 |
+
download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=['.py', '.md', '.txt'])
|
200 |
txt_path = combine_repo_files_for_llm()
|
201 |
|
202 |
with open(txt_path, "r", encoding="utf-8") as f:
|
app_old.py
CHANGED
@@ -3,7 +3,7 @@ import regex as re
|
|
3 |
import csv
|
4 |
import pandas as pd
|
5 |
from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
|
6 |
-
from hf_utils import
|
7 |
from chatbot_page import chat_with_user, extract_keywords_from_conversation
|
8 |
# Import chatbot logic
|
9 |
from analyzer import analyze_code
|
@@ -98,7 +98,7 @@ def show_combined_repo_and_llm():
|
|
98 |
return "All repo IDs have been processed.", "", read_csv_as_text("repo_ids.csv")
|
99 |
repo_id = last_repo_ids[current_repo_idx]
|
100 |
try:
|
101 |
-
|
102 |
except Exception as e:
|
103 |
return f"Error downloading repo: {e}", "", read_csv_as_text("repo_ids.csv")
|
104 |
txt_path = combine_repo_files_for_llm()
|
@@ -221,7 +221,7 @@ def batch_analyze_and_select_top():
|
|
221 |
for idx, row in df.iterrows():
|
222 |
repo_id = row["repo id"]
|
223 |
try:
|
224 |
-
|
225 |
txt_path = combine_repo_files_for_llm()
|
226 |
llm_output = analyze_combined_file(txt_path)
|
227 |
last_start = llm_output.rfind('{')
|
@@ -277,7 +277,7 @@ def batch_analyze_and_select_top_for_chat(state):
|
|
277 |
for idx, row in df.iterrows():
|
278 |
repo_id = row["repo id"]
|
279 |
try:
|
280 |
-
|
281 |
txt_path = combine_repo_files_for_llm()
|
282 |
llm_output = analyze_combined_file(txt_path)
|
283 |
last_start = llm_output.rfind('{')
|
|
|
3 |
import csv
|
4 |
import pandas as pd
|
5 |
from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
|
6 |
+
from hf_utils import download_filtered_space_files, search_top_spaces
|
7 |
from chatbot_page import chat_with_user, extract_keywords_from_conversation
|
8 |
# Import chatbot logic
|
9 |
from analyzer import analyze_code
|
|
|
98 |
return "All repo IDs have been processed.", "", read_csv_as_text("repo_ids.csv")
|
99 |
repo_id = last_repo_ids[current_repo_idx]
|
100 |
try:
|
101 |
+
download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=[".py", ".md", ".txt"])
|
102 |
except Exception as e:
|
103 |
return f"Error downloading repo: {e}", "", read_csv_as_text("repo_ids.csv")
|
104 |
txt_path = combine_repo_files_for_llm()
|
|
|
221 |
for idx, row in df.iterrows():
|
222 |
repo_id = row["repo id"]
|
223 |
try:
|
224 |
+
download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=[".py", ".md", ".txt"])
|
225 |
txt_path = combine_repo_files_for_llm()
|
226 |
llm_output = analyze_combined_file(txt_path)
|
227 |
last_start = llm_output.rfind('{')
|
|
|
277 |
for idx, row in df.iterrows():
|
278 |
repo_id = row["repo id"]
|
279 |
try:
|
280 |
+
download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=[".py", ".md", ".txt"])
|
281 |
txt_path = combine_repo_files_for_llm()
|
282 |
llm_output = analyze_combined_file(txt_path)
|
283 |
last_start = llm_output.rfind('{')
|
hf_utils.py
CHANGED
@@ -2,58 +2,46 @@ from huggingface_hub import snapshot_download
|
|
2 |
import os
|
3 |
import shutil
|
4 |
|
5 |
-
def
|
6 |
"""
|
7 |
-
Downloads files from a Hugging Face Space repository
|
8 |
|
9 |
Args:
|
10 |
space_id (str): The ID of the Hugging Face Space (e.g., "naman1102/Final_Assignment_Template").
|
11 |
local_dir (str): Local directory to store the downloaded files.
|
12 |
-
file_extensions (list):
|
13 |
-
|
14 |
"""
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
|
20 |
-
# Download the snapshot
|
21 |
repo_path = snapshot_download(repo_id=space_id, repo_type="space")
|
22 |
|
23 |
-
#
|
24 |
if os.path.exists(local_dir):
|
25 |
shutil.rmtree(local_dir)
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
relative_path = os.path.relpath(source_path, repo_path)
|
43 |
-
dest_path = os.path.join(local_dir, relative_path)
|
44 |
-
|
45 |
-
# Create destination directory if it doesn't exist
|
46 |
-
os.makedirs(os.path.dirname(dest_path), exist_ok=True)
|
47 |
-
|
48 |
-
# Copy the file
|
49 |
-
shutil.copy2(source_path, dest_path)
|
50 |
-
copied_files += 1
|
51 |
-
|
52 |
-
print(f"Filtered download complete: {copied_files} files with extensions {file_extensions} from Space '{space_id}' downloaded to: {local_dir}")
|
53 |
|
54 |
# Example usage
|
55 |
-
#
|
56 |
-
# download_space_repo("finegrain/finegrain-image-enhancer", file_extensions=['.py', '.md', '.txt']) # Downloads only .py, .md, and .txt files
|
57 |
|
58 |
from huggingface_hub import list_spaces
|
59 |
|
|
|
2 |
import os
|
3 |
import shutil
|
4 |
|
5 |
+
def download_filtered_space_files(space_id: str, local_dir: str = "repo_files", file_extensions: list = None):
|
6 |
"""
|
7 |
+
Downloads only files with specified extensions from a Hugging Face Space repository.
|
8 |
|
9 |
Args:
|
10 |
space_id (str): The ID of the Hugging Face Space (e.g., "naman1102/Final_Assignment_Template").
|
11 |
local_dir (str): Local directory to store the downloaded files.
|
12 |
+
file_extensions (list): List of file extensions to include (e.g., ['.py', '.md']).
|
13 |
+
If None, no filtering is applied (all files are downloaded).
|
14 |
"""
|
15 |
+
if not file_extensions:
|
16 |
+
raise ValueError("You must specify a list of file extensions to filter by.")
|
17 |
+
|
18 |
+
print(f"Downloading Space '{space_id}' and filtering for: {', '.join(file_extensions)}")
|
19 |
|
20 |
+
# Download the full snapshot to a temp directory
|
21 |
repo_path = snapshot_download(repo_id=space_id, repo_type="space")
|
22 |
|
23 |
+
# Clear out local_dir if it already exists
|
24 |
if os.path.exists(local_dir):
|
25 |
shutil.rmtree(local_dir)
|
26 |
|
27 |
+
os.makedirs(local_dir, exist_ok=True)
|
28 |
+
copied_files = 0
|
29 |
+
|
30 |
+
# Walk through the snapshot and copy only files with desired extensions
|
31 |
+
for root, _, files in os.walk(repo_path):
|
32 |
+
for file in files:
|
33 |
+
if any(file.endswith(ext) for ext in file_extensions):
|
34 |
+
src_file = os.path.join(root, file)
|
35 |
+
rel_path = os.path.relpath(src_file, repo_path)
|
36 |
+
dest_file = os.path.join(local_dir, rel_path)
|
37 |
+
os.makedirs(os.path.dirname(dest_file), exist_ok=True)
|
38 |
+
shutil.copy2(src_file, dest_file)
|
39 |
+
copied_files += 1
|
40 |
+
|
41 |
+
print(f"Downloaded {copied_files} filtered file(s) to: {local_dir}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
# Example usage
|
44 |
+
# download_filtered_space_files("finegrain/finegrain-image-enhancer", file_extensions=['.py', '.md', '.txt']) # Downloads only .py, .md, and .txt files
|
|
|
45 |
|
46 |
from huggingface_hub import list_spaces
|
47 |
|
repo_explorer.py
CHANGED
@@ -3,7 +3,7 @@ import os
|
|
3 |
import logging
|
4 |
from typing import List, Dict, Tuple
|
5 |
from analyzer import combine_repo_files_for_llm
|
6 |
-
from hf_utils import
|
7 |
|
8 |
# Setup logger
|
9 |
logger = logging.getLogger(__name__)
|
@@ -208,7 +208,7 @@ def handle_load_repository(repo_id: str) -> Tuple[str, str]:
|
|
208 |
|
209 |
# Download and process the repository
|
210 |
try:
|
211 |
-
|
212 |
combined_text_path = combine_repo_files_for_llm()
|
213 |
|
214 |
except Exception as e:
|
|
|
3 |
import logging
|
4 |
from typing import List, Dict, Tuple
|
5 |
from analyzer import combine_repo_files_for_llm
|
6 |
+
from hf_utils import download_filtered_space_files
|
7 |
|
8 |
# Setup logger
|
9 |
logger = logging.getLogger(__name__)
|
|
|
208 |
|
209 |
# Download and process the repository
|
210 |
try:
|
211 |
+
download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=['.py', '.md', '.txt'])
|
212 |
combined_text_path = combine_repo_files_for_llm()
|
213 |
|
214 |
except Exception as e:
|