bluenevus's picture
Update app.py
108be36 verified
raw
history blame
8.55 kB
import dash
from dash import dcc, html, Input, Output, State
import dash_bootstrap_components as dbc
from dash.exceptions import PreventUpdate
import requests
import base64
import json
import google.generativeai as genai
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
import threading
import os
from io import BytesIO
# Hugging Face variables
GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN')
GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY')
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
# Global variable to store generated file
generated_file = None
def fetch_git_files(git_url, personal_access_token, git_provider):
try:
# Parse the Git URL
parts = git_url.split('/')
owner = parts[3]
repo = parts[4].split('.git')[0]
branch = 'main' # You might want to make this configurable
# List of common dependency files to look for
dependency_files = [
'requirements.txt', 'package.json', 'Gemfile', 'pom.xml',
'build.gradle', 'composer.json', 'Cargo.toml', 'go.mod', 'Pipfile'
]
all_content = ""
# Set up headers with the personal access token
headers = {
"Authorization": f"token {personal_access_token}",
"Accept": "application/vnd.github.v3+json"
}
base_url = {
'GitHub': 'https://api.github.com',
'GitLab': 'https://gitlab.com/api/v4',
'Gitea': 'https://gitea.com/api/v1' # Adjust this URL for your Gitea instance
}.get(git_provider)
for file_path in dependency_files:
# Construct the API URL based on the git provider
if git_provider == 'GitHub':
api_url = f"{base_url}/repos/{owner}/{repo}/contents/{file_path}?ref={branch}"
elif git_provider == 'GitLab':
api_url = f"{base_url}/projects/{owner}%2F{repo}/repository/files/{file_path}/raw?ref={branch}"
elif git_provider == 'Gitea':
api_url = f"{base_url}/repos/{owner}/{repo}/contents/{file_path}?ref={branch}"
# Make the API request
response = requests.get(api_url, headers=headers)
if response.status_code == 200:
if git_provider == 'GitHub' or git_provider == 'Gitea':
content = response.json()
if isinstance(content, dict) and 'content' in content:
file_content = base64.b64decode(content['content']).decode('utf-8')
all_content += f"\n\n--- {file_path} ---\n{file_content}"
elif git_provider == 'GitLab':
file_content = response.text
all_content += f"\n\n--- {file_path} ---\n{file_content}"
if not all_content:
return "Error: No dependency files found in the repository."
return all_content
except requests.exceptions.RequestException as e:
return f"Error accessing {git_provider}: {str(e)}"
except json.JSONDecodeError:
return f"Error: Unable to parse {git_provider} API response for {file_path}"
def process_chunk_with_gemini(chunk, gemini_api_key):
genai.configure(api_key=gemini_api_key)
model = genai.GenerativeModel('gemini-2.5-pro-preview-03-25')
prompt = f"""
Analyze the following file content for open-source license information:
{chunk}
Please provide:
1. A numbered list with the name dependency and version as the title
2. 1st bullet under title has a brief summary of what the dependency does
3. 2nd bullet under title has the license name
4. 3rd bullet under title has a hyperlink to the license file
5. Provide no other information such as greeting or summary as the purpose is to catalog and document all open source licenses used.
"""
try:
response = model.generate_content(prompt)
return response.text
except Exception as e:
print(f"Error processing chunk: {str(e)}")
return f"Error processing chunk: {str(e)}"
@retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=4, max=10), retry=retry_if_exception_type(Exception))
def process_with_gemini(file_content, gemini_api_key):
chunk_size = 2000
chunks = [file_content[i:i+chunk_size] for i in range(0, len(file_content), chunk_size)]
results = []
for chunk in chunks:
try:
result = process_chunk_with_gemini(chunk, gemini_api_key)
results.append(result)
except Exception as e:
print(f"Error processing chunk: {str(e)}")
results.append(f"Error processing chunk: {str(e)}")
combined_result = "\n\n".join(results)
return combined_result
def process_input(git_url, personal_access_token, git_provider):
global generated_file
generated_file = None
if not git_url.startswith(f"https://{git_provider.lower()}.com/"):
return f"Error: Invalid {git_provider} URL. Please use the format: https://{git_provider.lower()}.com/username/repository.git"
if not personal_access_token.strip():
return "Error: Personal Access Token is empty. Please provide a valid token."
file_content = fetch_git_files(git_url, personal_access_token, git_provider)
if file_content.startswith("Error:"):
return file_content
try:
# Process the file content with Gemini
analysis = process_with_gemini(file_content, GEMINI_API_KEY)
generated_file = analysis.encode()
return "Analysis complete. Click the download button to get the results."
except Exception as e:
return f"Error processing the files: {str(e)}"
app.layout = dbc.Container([
html.H1("Open Source License Extractor", className="my-4"),
html.P("Provide a Git repository URL to analyze open-source licenses from dependency files.", className="mb-4"),
dbc.Card([
dbc.CardBody([
dbc.Row([
dbc.Col([
dcc.Dropdown(
id='git-provider',
options=[
{'label': 'GitHub', 'value': 'GitHub'},
{'label': 'GitLab', 'value': 'GitLab'},
{'label': 'Gitea', 'value': 'Gitea'}
],
value='GitHub',
className="mb-3"
),
dbc.Input(id="git-url", placeholder="Enter Git Repository URL", type="text", className="mb-3"),
dbc.Input(id="personal-access-token", placeholder="Enter Git Personal Access Token", type="password", className="mb-3"),
dbc.Button("Analyze", id="analyze-button", color="primary", className="mb-3"),
dbc.Button("Download Results", id="download-button", color="secondary", className="mb-3 ml-2", disabled=True),
dcc.Download(id="download-analysis"),
html.Div(id="output", className="mt-3"),
dcc.Loading(
id="loading",
type="dot",
children=[html.Div(id="loading-output")]
)
])
])
])
])
], fluid=True)
@app.callback(
[Output("output", "children"),
Output("download-button", "disabled"),
Output("loading-output", "children")],
[Input("analyze-button", "n_clicks")],
[State("git-url", "value"),
State("personal-access-token", "value"),
State("git-provider", "value")],
prevent_initial_call=True
)
def update_output(n_clicks, git_url, personal_access_token, git_provider):
if n_clicks is None:
raise PreventUpdate
def process():
global generated_file
result = process_input(git_url, personal_access_token, git_provider)
return result, generated_file is not None, ""
return process()
@app.callback(
Output("download-analysis", "data"),
Input("download-button", "n_clicks"),
prevent_initial_call=True
)
def download_analysis(n_clicks):
if n_clicks is None:
raise PreventUpdate
if generated_file is None:
return dash.no_update
return dcc.send_bytes(generated_file, "license_analysis.txt")
if __name__ == '__main__':
print("Starting the Dash application...")
app.run(debug=True, host='0.0.0.0', port=7860)
print("Dash application has finished running.")