Update app.py
Browse files
app.py
CHANGED
@@ -1,29 +1,37 @@
|
|
1 |
-
import
|
2 |
-
import
|
|
|
|
|
3 |
import requests
|
4 |
import base64
|
5 |
import json
|
|
|
6 |
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
-
|
|
|
|
|
|
|
9 |
try:
|
10 |
-
# Parse the
|
11 |
-
parts =
|
12 |
owner = parts[3]
|
13 |
repo = parts[4].split('.git')[0]
|
14 |
branch = 'main' # You might want to make this configurable
|
15 |
|
16 |
# List of common dependency files to look for
|
17 |
dependency_files = [
|
18 |
-
'requirements.txt',
|
19 |
-
'
|
20 |
-
'Gemfile',
|
21 |
-
'pom.xml',
|
22 |
-
'build.gradle',
|
23 |
-
'composer.json',
|
24 |
-
'Cargo.toml',
|
25 |
-
'go.mod',
|
26 |
-
'Pipfile'
|
27 |
]
|
28 |
|
29 |
all_content = ""
|
@@ -34,31 +42,42 @@ def fetch_github_files(github_url, personal_access_token):
|
|
34 |
"Accept": "application/vnd.github.v3+json"
|
35 |
}
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
for file_path in dependency_files:
|
38 |
-
# Construct the API URL
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
# Make the API request
|
42 |
response = requests.get(api_url, headers=headers)
|
43 |
|
44 |
if response.status_code == 200:
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
49 |
all_content += f"\n\n--- {file_path} ---\n{file_content}"
|
50 |
-
else:
|
51 |
-
# This is a directory or something else, skip it
|
52 |
-
continue
|
53 |
|
54 |
if not all_content:
|
55 |
return "Error: No dependency files found in the repository."
|
56 |
|
57 |
return all_content
|
58 |
except requests.exceptions.RequestException as e:
|
59 |
-
return f"Error accessing
|
60 |
except json.JSONDecodeError:
|
61 |
-
return f"Error: Unable to parse
|
62 |
|
63 |
def process_chunk_with_gemini(chunk, gemini_api_key):
|
64 |
genai.configure(api_key=gemini_api_key)
|
@@ -86,7 +105,6 @@ def process_chunk_with_gemini(chunk, gemini_api_key):
|
|
86 |
|
87 |
@retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=4, max=10), retry=retry_if_exception_type(Exception))
|
88 |
def process_with_gemini(file_content, gemini_api_key):
|
89 |
-
# Split the content into smaller chunks
|
90 |
chunk_size = 2000
|
91 |
chunks = [file_content[i:i+chunk_size] for i in range(0, len(file_content), chunk_size)]
|
92 |
|
@@ -99,38 +117,100 @@ def process_with_gemini(file_content, gemini_api_key):
|
|
99 |
print(f"Error processing chunk: {str(e)}")
|
100 |
results.append(f"Error processing chunk: {str(e)}")
|
101 |
|
102 |
-
# Combine the results
|
103 |
combined_result = "\n\n".join(results)
|
104 |
return combined_result
|
105 |
|
106 |
-
def process_input(
|
107 |
-
|
108 |
-
|
|
|
|
|
|
|
109 |
if not personal_access_token.strip():
|
110 |
return "Error: Personal Access Token is empty. Please provide a valid token."
|
111 |
|
112 |
-
file_content =
|
113 |
if file_content.startswith("Error:"):
|
114 |
return file_content
|
115 |
|
116 |
try:
|
117 |
# Process the file content with Gemini
|
118 |
-
analysis = process_with_gemini(file_content,
|
119 |
-
|
|
|
120 |
except Exception as e:
|
121 |
return f"Error processing the files: {str(e)}"
|
122 |
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
|
135 |
-
if __name__ ==
|
136 |
-
|
|
|
|
|
|
1 |
+
import dash
|
2 |
+
from dash import dcc, html, Input, Output, State
|
3 |
+
import dash_bootstrap_components as dbc
|
4 |
+
from dash.exceptions import PreventUpdate
|
5 |
import requests
|
6 |
import base64
|
7 |
import json
|
8 |
+
import google.generativeai as genai
|
9 |
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
|
10 |
+
import threading
|
11 |
+
import os
|
12 |
+
from io import BytesIO
|
13 |
+
|
14 |
+
# Hugging Face variables
|
15 |
+
GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN')
|
16 |
+
GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY')
|
17 |
+
|
18 |
+
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
|
19 |
|
20 |
+
# Global variable to store generated file
|
21 |
+
generated_file = None
|
22 |
+
|
23 |
+
def fetch_git_files(git_url, personal_access_token, git_provider):
|
24 |
try:
|
25 |
+
# Parse the Git URL
|
26 |
+
parts = git_url.split('/')
|
27 |
owner = parts[3]
|
28 |
repo = parts[4].split('.git')[0]
|
29 |
branch = 'main' # You might want to make this configurable
|
30 |
|
31 |
# List of common dependency files to look for
|
32 |
dependency_files = [
|
33 |
+
'requirements.txt', 'package.json', 'Gemfile', 'pom.xml',
|
34 |
+
'build.gradle', 'composer.json', 'Cargo.toml', 'go.mod', 'Pipfile'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
]
|
36 |
|
37 |
all_content = ""
|
|
|
42 |
"Accept": "application/vnd.github.v3+json"
|
43 |
}
|
44 |
|
45 |
+
base_url = {
|
46 |
+
'GitHub': 'https://api.github.com',
|
47 |
+
'GitLab': 'https://gitlab.com/api/v4',
|
48 |
+
'Gitea': 'https://gitea.com/api/v1' # Adjust this URL for your Gitea instance
|
49 |
+
}.get(git_provider)
|
50 |
+
|
51 |
for file_path in dependency_files:
|
52 |
+
# Construct the API URL based on the git provider
|
53 |
+
if git_provider == 'GitHub':
|
54 |
+
api_url = f"{base_url}/repos/{owner}/{repo}/contents/{file_path}?ref={branch}"
|
55 |
+
elif git_provider == 'GitLab':
|
56 |
+
api_url = f"{base_url}/projects/{owner}%2F{repo}/repository/files/{file_path}/raw?ref={branch}"
|
57 |
+
elif git_provider == 'Gitea':
|
58 |
+
api_url = f"{base_url}/repos/{owner}/{repo}/contents/{file_path}?ref={branch}"
|
59 |
|
60 |
# Make the API request
|
61 |
response = requests.get(api_url, headers=headers)
|
62 |
|
63 |
if response.status_code == 200:
|
64 |
+
if git_provider == 'GitHub' or git_provider == 'Gitea':
|
65 |
+
content = response.json()
|
66 |
+
if isinstance(content, dict) and 'content' in content:
|
67 |
+
file_content = base64.b64decode(content['content']).decode('utf-8')
|
68 |
+
all_content += f"\n\n--- {file_path} ---\n{file_content}"
|
69 |
+
elif git_provider == 'GitLab':
|
70 |
+
file_content = response.text
|
71 |
all_content += f"\n\n--- {file_path} ---\n{file_content}"
|
|
|
|
|
|
|
72 |
|
73 |
if not all_content:
|
74 |
return "Error: No dependency files found in the repository."
|
75 |
|
76 |
return all_content
|
77 |
except requests.exceptions.RequestException as e:
|
78 |
+
return f"Error accessing {git_provider}: {str(e)}"
|
79 |
except json.JSONDecodeError:
|
80 |
+
return f"Error: Unable to parse {git_provider} API response for {file_path}"
|
81 |
|
82 |
def process_chunk_with_gemini(chunk, gemini_api_key):
|
83 |
genai.configure(api_key=gemini_api_key)
|
|
|
105 |
|
106 |
@retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=4, max=10), retry=retry_if_exception_type(Exception))
|
107 |
def process_with_gemini(file_content, gemini_api_key):
|
|
|
108 |
chunk_size = 2000
|
109 |
chunks = [file_content[i:i+chunk_size] for i in range(0, len(file_content), chunk_size)]
|
110 |
|
|
|
117 |
print(f"Error processing chunk: {str(e)}")
|
118 |
results.append(f"Error processing chunk: {str(e)}")
|
119 |
|
|
|
120 |
combined_result = "\n\n".join(results)
|
121 |
return combined_result
|
122 |
|
123 |
+
def process_input(git_url, personal_access_token, git_provider):
|
124 |
+
global generated_file
|
125 |
+
generated_file = None
|
126 |
+
|
127 |
+
if not git_url.startswith(f"https://{git_provider.lower()}.com/"):
|
128 |
+
return f"Error: Invalid {git_provider} URL. Please use the format: https://{git_provider.lower()}.com/username/repository.git"
|
129 |
if not personal_access_token.strip():
|
130 |
return "Error: Personal Access Token is empty. Please provide a valid token."
|
131 |
|
132 |
+
file_content = fetch_git_files(git_url, personal_access_token, git_provider)
|
133 |
if file_content.startswith("Error:"):
|
134 |
return file_content
|
135 |
|
136 |
try:
|
137 |
# Process the file content with Gemini
|
138 |
+
analysis = process_with_gemini(file_content, GEMINI_API_KEY)
|
139 |
+
generated_file = analysis.encode()
|
140 |
+
return "Analysis complete. Click the download button to get the results."
|
141 |
except Exception as e:
|
142 |
return f"Error processing the files: {str(e)}"
|
143 |
|
144 |
+
app.layout = dbc.Container([
|
145 |
+
html.H1("Open Source License Extractor", className="my-4"),
|
146 |
+
html.P("Provide a Git repository URL to analyze open-source licenses from dependency files.", className="mb-4"),
|
147 |
+
dbc.Card([
|
148 |
+
dbc.CardBody([
|
149 |
+
dbc.Row([
|
150 |
+
dbc.Col([
|
151 |
+
dcc.Dropdown(
|
152 |
+
id='git-provider',
|
153 |
+
options=[
|
154 |
+
{'label': 'GitHub', 'value': 'GitHub'},
|
155 |
+
{'label': 'GitLab', 'value': 'GitLab'},
|
156 |
+
{'label': 'Gitea', 'value': 'Gitea'}
|
157 |
+
],
|
158 |
+
value='GitHub',
|
159 |
+
className="mb-3"
|
160 |
+
),
|
161 |
+
dbc.Input(id="git-url", placeholder="Enter Git Repository URL", type="text", className="mb-3"),
|
162 |
+
dbc.Input(id="personal-access-token", placeholder="Enter Git Personal Access Token", type="password", className="mb-3"),
|
163 |
+
dbc.Button("Analyze", id="analyze-button", color="primary", className="mb-3"),
|
164 |
+
dbc.Button("Download Results", id="download-button", color="secondary", className="mb-3 ml-2", disabled=True),
|
165 |
+
dcc.Download(id="download-analysis"),
|
166 |
+
html.Div(id="output", className="mt-3"),
|
167 |
+
dcc.Loading(
|
168 |
+
id="loading",
|
169 |
+
type="dot",
|
170 |
+
children=[html.Div(id="loading-output")]
|
171 |
+
)
|
172 |
+
])
|
173 |
+
])
|
174 |
+
])
|
175 |
+
])
|
176 |
+
], fluid=True)
|
177 |
+
|
178 |
+
@app.callback(
|
179 |
+
[Output("output", "children"),
|
180 |
+
Output("download-button", "disabled"),
|
181 |
+
Output("loading-output", "children")],
|
182 |
+
[Input("analyze-button", "n_clicks")],
|
183 |
+
[State("git-url", "value"),
|
184 |
+
State("personal-access-token", "value"),
|
185 |
+
State("git-provider", "value")],
|
186 |
+
prevent_initial_call=True
|
187 |
)
|
188 |
+
def update_output(n_clicks, git_url, personal_access_token, git_provider):
|
189 |
+
if n_clicks is None:
|
190 |
+
raise PreventUpdate
|
191 |
+
|
192 |
+
def process():
|
193 |
+
global generated_file
|
194 |
+
result = process_input(git_url, personal_access_token, git_provider)
|
195 |
+
return result, generated_file is not None, ""
|
196 |
+
|
197 |
+
return process()
|
198 |
+
|
199 |
+
@app.callback(
|
200 |
+
Output("download-analysis", "data"),
|
201 |
+
Input("download-button", "n_clicks"),
|
202 |
+
prevent_initial_call=True
|
203 |
+
)
|
204 |
+
def download_analysis(n_clicks):
|
205 |
+
if n_clicks is None:
|
206 |
+
raise PreventUpdate
|
207 |
+
|
208 |
+
if generated_file is None:
|
209 |
+
return dash.no_update
|
210 |
+
|
211 |
+
return dcc.send_bytes(generated_file, "license_analysis.txt")
|
212 |
|
213 |
+
if __name__ == '__main__':
|
214 |
+
print("Starting the Dash application...")
|
215 |
+
app.run(debug=True, host='0.0.0.0', port=7860)
|
216 |
+
print("Dash application has finished running.")
|