bluenevus commited on
Commit
108be36
·
verified ·
1 Parent(s): 4ada003

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +125 -45
app.py CHANGED
@@ -1,29 +1,37 @@
1
- import gradio as gr
2
- import google.generativeai as genai
 
 
3
  import requests
4
  import base64
5
  import json
 
6
  from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
 
 
 
 
 
 
 
 
 
7
 
8
- def fetch_github_files(github_url, personal_access_token):
 
 
 
9
  try:
10
- # Parse the GitHub URL
11
- parts = github_url.split('/')
12
  owner = parts[3]
13
  repo = parts[4].split('.git')[0]
14
  branch = 'main' # You might want to make this configurable
15
 
16
  # List of common dependency files to look for
17
  dependency_files = [
18
- 'requirements.txt',
19
- 'package.json',
20
- 'Gemfile',
21
- 'pom.xml',
22
- 'build.gradle',
23
- 'composer.json',
24
- 'Cargo.toml',
25
- 'go.mod',
26
- 'Pipfile'
27
  ]
28
 
29
  all_content = ""
@@ -34,31 +42,42 @@ def fetch_github_files(github_url, personal_access_token):
34
  "Accept": "application/vnd.github.v3+json"
35
  }
36
 
 
 
 
 
 
 
37
  for file_path in dependency_files:
38
- # Construct the API URL
39
- api_url = f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}?ref={branch}"
 
 
 
 
 
40
 
41
  # Make the API request
42
  response = requests.get(api_url, headers=headers)
43
 
44
  if response.status_code == 200:
45
- content = response.json()
46
- if isinstance(content, dict) and 'content' in content:
47
- # This is a file
48
- file_content = base64.b64decode(content['content']).decode('utf-8')
 
 
 
49
  all_content += f"\n\n--- {file_path} ---\n{file_content}"
50
- else:
51
- # This is a directory or something else, skip it
52
- continue
53
 
54
  if not all_content:
55
  return "Error: No dependency files found in the repository."
56
 
57
  return all_content
58
  except requests.exceptions.RequestException as e:
59
- return f"Error accessing GitHub: {str(e)}"
60
  except json.JSONDecodeError:
61
- return f"Error: Unable to parse GitHub API response for {file_path}"
62
 
63
  def process_chunk_with_gemini(chunk, gemini_api_key):
64
  genai.configure(api_key=gemini_api_key)
@@ -86,7 +105,6 @@ def process_chunk_with_gemini(chunk, gemini_api_key):
86
 
87
  @retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=4, max=10), retry=retry_if_exception_type(Exception))
88
  def process_with_gemini(file_content, gemini_api_key):
89
- # Split the content into smaller chunks
90
  chunk_size = 2000
91
  chunks = [file_content[i:i+chunk_size] for i in range(0, len(file_content), chunk_size)]
92
 
@@ -99,38 +117,100 @@ def process_with_gemini(file_content, gemini_api_key):
99
  print(f"Error processing chunk: {str(e)}")
100
  results.append(f"Error processing chunk: {str(e)}")
101
 
102
- # Combine the results
103
  combined_result = "\n\n".join(results)
104
  return combined_result
105
 
106
- def process_input(github_url, personal_access_token, gemini_api_key):
107
- if not github_url.startswith("https://github.com/"):
108
- return "Error: Invalid GitHub URL. Please use the format: https://github.com/username/repository.git"
 
 
 
109
  if not personal_access_token.strip():
110
  return "Error: Personal Access Token is empty. Please provide a valid token."
111
 
112
- file_content = fetch_github_files(github_url, personal_access_token)
113
  if file_content.startswith("Error:"):
114
  return file_content
115
 
116
  try:
117
  # Process the file content with Gemini
118
- analysis = process_with_gemini(file_content, gemini_api_key)
119
- return analysis
 
120
  except Exception as e:
121
  return f"Error processing the files: {str(e)}"
122
 
123
- iface = gr.Interface(
124
- fn=process_input,
125
- inputs=[
126
- gr.Textbox(label="GitHub Repository URL (format: https://github.com/username/repository.git)"),
127
- gr.Textbox(label="GitHub Personal Access Token", type="password"),
128
- gr.Textbox(label="Gemini API Key", type="password"),
129
- ],
130
- outputs=gr.Textbox(label="License Information and Analysis"),
131
- title="Open Source License Extractor",
132
- description="Provide a GitHub repository URL to analyze open-source licenses from dependency files.",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
- if __name__ == "__main__":
136
- iface.launch()
 
 
 
1
+ import dash
2
+ from dash import dcc, html, Input, Output, State
3
+ import dash_bootstrap_components as dbc
4
+ from dash.exceptions import PreventUpdate
5
  import requests
6
  import base64
7
  import json
8
+ import google.generativeai as genai
9
  from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
10
+ import threading
11
+ import os
12
+ from io import BytesIO
13
+
14
+ # Hugging Face variables
15
+ GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN')
16
+ GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY')
17
+
18
+ app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
19
 
20
+ # Global variable to store generated file
21
+ generated_file = None
22
+
23
+ def fetch_git_files(git_url, personal_access_token, git_provider):
24
  try:
25
+ # Parse the Git URL
26
+ parts = git_url.split('/')
27
  owner = parts[3]
28
  repo = parts[4].split('.git')[0]
29
  branch = 'main' # You might want to make this configurable
30
 
31
  # List of common dependency files to look for
32
  dependency_files = [
33
+ 'requirements.txt', 'package.json', 'Gemfile', 'pom.xml',
34
+ 'build.gradle', 'composer.json', 'Cargo.toml', 'go.mod', 'Pipfile'
 
 
 
 
 
 
 
35
  ]
36
 
37
  all_content = ""
 
42
  "Accept": "application/vnd.github.v3+json"
43
  }
44
 
45
+ base_url = {
46
+ 'GitHub': 'https://api.github.com',
47
+ 'GitLab': 'https://gitlab.com/api/v4',
48
+ 'Gitea': 'https://gitea.com/api/v1' # Adjust this URL for your Gitea instance
49
+ }.get(git_provider)
50
+
51
  for file_path in dependency_files:
52
+ # Construct the API URL based on the git provider
53
+ if git_provider == 'GitHub':
54
+ api_url = f"{base_url}/repos/{owner}/{repo}/contents/{file_path}?ref={branch}"
55
+ elif git_provider == 'GitLab':
56
+ api_url = f"{base_url}/projects/{owner}%2F{repo}/repository/files/{file_path}/raw?ref={branch}"
57
+ elif git_provider == 'Gitea':
58
+ api_url = f"{base_url}/repos/{owner}/{repo}/contents/{file_path}?ref={branch}"
59
 
60
  # Make the API request
61
  response = requests.get(api_url, headers=headers)
62
 
63
  if response.status_code == 200:
64
+ if git_provider == 'GitHub' or git_provider == 'Gitea':
65
+ content = response.json()
66
+ if isinstance(content, dict) and 'content' in content:
67
+ file_content = base64.b64decode(content['content']).decode('utf-8')
68
+ all_content += f"\n\n--- {file_path} ---\n{file_content}"
69
+ elif git_provider == 'GitLab':
70
+ file_content = response.text
71
  all_content += f"\n\n--- {file_path} ---\n{file_content}"
 
 
 
72
 
73
  if not all_content:
74
  return "Error: No dependency files found in the repository."
75
 
76
  return all_content
77
  except requests.exceptions.RequestException as e:
78
+ return f"Error accessing {git_provider}: {str(e)}"
79
  except json.JSONDecodeError:
80
+ return f"Error: Unable to parse {git_provider} API response for {file_path}"
81
 
82
  def process_chunk_with_gemini(chunk, gemini_api_key):
83
  genai.configure(api_key=gemini_api_key)
 
105
 
106
  @retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=4, max=10), retry=retry_if_exception_type(Exception))
107
  def process_with_gemini(file_content, gemini_api_key):
 
108
  chunk_size = 2000
109
  chunks = [file_content[i:i+chunk_size] for i in range(0, len(file_content), chunk_size)]
110
 
 
117
  print(f"Error processing chunk: {str(e)}")
118
  results.append(f"Error processing chunk: {str(e)}")
119
 
 
120
  combined_result = "\n\n".join(results)
121
  return combined_result
122
 
123
+ def process_input(git_url, personal_access_token, git_provider):
124
+ global generated_file
125
+ generated_file = None
126
+
127
+ if not git_url.startswith(f"https://{git_provider.lower()}.com/"):
128
+ return f"Error: Invalid {git_provider} URL. Please use the format: https://{git_provider.lower()}.com/username/repository.git"
129
  if not personal_access_token.strip():
130
  return "Error: Personal Access Token is empty. Please provide a valid token."
131
 
132
+ file_content = fetch_git_files(git_url, personal_access_token, git_provider)
133
  if file_content.startswith("Error:"):
134
  return file_content
135
 
136
  try:
137
  # Process the file content with Gemini
138
+ analysis = process_with_gemini(file_content, GEMINI_API_KEY)
139
+ generated_file = analysis.encode()
140
+ return "Analysis complete. Click the download button to get the results."
141
  except Exception as e:
142
  return f"Error processing the files: {str(e)}"
143
 
144
+ app.layout = dbc.Container([
145
+ html.H1("Open Source License Extractor", className="my-4"),
146
+ html.P("Provide a Git repository URL to analyze open-source licenses from dependency files.", className="mb-4"),
147
+ dbc.Card([
148
+ dbc.CardBody([
149
+ dbc.Row([
150
+ dbc.Col([
151
+ dcc.Dropdown(
152
+ id='git-provider',
153
+ options=[
154
+ {'label': 'GitHub', 'value': 'GitHub'},
155
+ {'label': 'GitLab', 'value': 'GitLab'},
156
+ {'label': 'Gitea', 'value': 'Gitea'}
157
+ ],
158
+ value='GitHub',
159
+ className="mb-3"
160
+ ),
161
+ dbc.Input(id="git-url", placeholder="Enter Git Repository URL", type="text", className="mb-3"),
162
+ dbc.Input(id="personal-access-token", placeholder="Enter Git Personal Access Token", type="password", className="mb-3"),
163
+ dbc.Button("Analyze", id="analyze-button", color="primary", className="mb-3"),
164
+ dbc.Button("Download Results", id="download-button", color="secondary", className="mb-3 ml-2", disabled=True),
165
+ dcc.Download(id="download-analysis"),
166
+ html.Div(id="output", className="mt-3"),
167
+ dcc.Loading(
168
+ id="loading",
169
+ type="dot",
170
+ children=[html.Div(id="loading-output")]
171
+ )
172
+ ])
173
+ ])
174
+ ])
175
+ ])
176
+ ], fluid=True)
177
+
178
+ @app.callback(
179
+ [Output("output", "children"),
180
+ Output("download-button", "disabled"),
181
+ Output("loading-output", "children")],
182
+ [Input("analyze-button", "n_clicks")],
183
+ [State("git-url", "value"),
184
+ State("personal-access-token", "value"),
185
+ State("git-provider", "value")],
186
+ prevent_initial_call=True
187
  )
188
+ def update_output(n_clicks, git_url, personal_access_token, git_provider):
189
+ if n_clicks is None:
190
+ raise PreventUpdate
191
+
192
+ def process():
193
+ global generated_file
194
+ result = process_input(git_url, personal_access_token, git_provider)
195
+ return result, generated_file is not None, ""
196
+
197
+ return process()
198
+
199
+ @app.callback(
200
+ Output("download-analysis", "data"),
201
+ Input("download-button", "n_clicks"),
202
+ prevent_initial_call=True
203
+ )
204
+ def download_analysis(n_clicks):
205
+ if n_clicks is None:
206
+ raise PreventUpdate
207
+
208
+ if generated_file is None:
209
+ return dash.no_update
210
+
211
+ return dcc.send_bytes(generated_file, "license_analysis.txt")
212
 
213
+ if __name__ == '__main__':
214
+ print("Starting the Dash application...")
215
+ app.run(debug=True, host='0.0.0.0', port=7860)
216
+ print("Dash application has finished running.")