bluenevus's picture
Update app.py
aca813e verified
import dash
from dash import dcc, html, Input, Output, State
import dash_bootstrap_components as dbc
from dash.exceptions import PreventUpdate
import requests
import base64
import json
import google.generativeai as genai
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
import threading
import os
from io import BytesIO
from docx import Document
import markdown
from github import Github
from github import GithubException
# Hugging Face variables
GIT_TOKEN = os.environ.get('GIT_TOKEN')
GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY')
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
# Global variable to store generated file
generated_file = None
def fetch_git_files(git_url, git_provider):
try:
# Parse the Git URL
parts = git_url.split('/')
owner = parts[3]
repo = parts[4].split('.git')[0]
# Initialize PyGitHub with the token
g = Github(GIT_TOKEN)
# Get the repository
repo = g.get_repo(f"{owner}/{repo}")
# List of common dependency files to look for
dependency_files = [
'requirements.txt', 'package.json', 'Gemfile', 'pom.xml',
'build.gradle', 'composer.json', 'Cargo.toml', 'go.mod', 'Pipfile'
]
all_content = ""
for file_path in dependency_files:
try:
file_content = repo.get_contents(file_path)
all_content += f"\n\n--- {file_path} ---\n{file_content.decoded_content.decode('utf-8')}"
except GithubException:
# File not found, skip to next file
continue
if not all_content:
return "Error: No dependency files found in the repository."
return all_content
except GithubException as e:
return f"Error accessing {git_provider}: {str(e)}"
def process_chunk_with_gemini(chunk, gemini_api_key):
genai.configure(api_key=gemini_api_key)
model = genai.GenerativeModel('gemini-2.5-pro-preview-03-25')
prompt = f"""
Analyze the following file content for open-source license information:
{chunk}
Please provide:
1. A numbered list with the name dependency and version as the title
2. 1st bullet under title has a brief summary of what the dependency does
3. 2nd bullet under title has the license name
4. 3rd bullet under title has a hyperlink to the license file
5. Provide no other information such as greeting or summary as the purpose is to catalog and document all open source licenses used.
"""
try:
response = model.generate_content(prompt)
return response.text
except Exception as e:
print(f"Error processing chunk: {str(e)}")
return f"Error processing chunk: {str(e)}"
@retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=4, max=10), retry=retry_if_exception_type(Exception))
def process_with_gemini(file_content, gemini_api_key):
chunk_size = 2000
chunks = [file_content[i:i+chunk_size] for i in range(0, len(file_content), chunk_size)]
results = []
for chunk in chunks:
try:
result = process_chunk_with_gemini(chunk, gemini_api_key)
results.append(result)
except Exception as e:
print(f"Error processing chunk: {str(e)}")
results.append(f"Error processing chunk: {str(e)}")
combined_result = "\n\n".join(results)
return combined_result
def process_input(git_url, git_provider):
global generated_file
generated_file = None
if not git_url.startswith(f"https://{git_provider.lower()}.com/"):
return f"Error: Invalid {git_provider} URL. Please use the format: https://{git_provider.lower()}.com/username/repository.git", None
file_content = fetch_git_files(git_url, git_provider)
if file_content.startswith("Error:"):
return file_content, None
try:
# Process the file content with Gemini
analysis = process_with_gemini(file_content, GEMINI_API_KEY)
generated_file = analysis.encode()
return "Analysis complete. You can now preview the results and download them.", analysis
except Exception as e:
return f"Error processing the files: {str(e)}", None
app.layout = dbc.Container([
dbc.Row([
dbc.Col(html.H1("Open Source License Extractor", className="my-4"), width=12)
]),
dbc.Row([
dbc.Col([
dbc.Card([
dbc.CardBody([
html.P("Provide a Git repository URL to analyze open-source licenses from dependency files.", className="mb-4"),
dcc.Dropdown(
id='git-provider',
options=[
{'label': 'GitHub', 'value': 'GitHub'},
{'label': 'GitLab', 'value': 'GitLab'},
{'label': 'Gitea', 'value': 'Gitea'}
],
value='GitHub',
className="mb-3"
),
dbc.Input(id="git-url", placeholder="Enter Git Repository URL", type="text", className="mb-3"),
dbc.Button("Analyze", id="analyze-button", color="primary", className="mb-3"),
dcc.Loading(
id="loading",
type="dot",
children=[html.Div(id="loading-output")]
)
])
])
], md=6),
dbc.Col([
dbc.Card([
dbc.CardBody([
html.H4("Analysis Results", className="mb-3"),
html.Div(id="output", className="mb-3"),
dbc.Button("Download as Word", id="download-word-button", color="secondary", className="mb-3 me-2", disabled=True),
dbc.Button("Download as Markdown", id="download-markdown-button", color="secondary", className="mb-3", disabled=True),
dcc.Download(id="download-word"),
dcc.Download(id="download-markdown"),
])
])
], md=6)
])
], fluid=True)
@app.callback(
[Output("output", "children"),
Output("download-word-button", "disabled"),
Output("download-markdown-button", "disabled"),
Output("loading-output", "children")],
[Input("analyze-button", "n_clicks")],
[State("git-url", "value"),
State("git-provider", "value")],
prevent_initial_call=True
)
def update_output(n_clicks, git_url, git_provider):
if n_clicks is None:
raise PreventUpdate
def process():
global generated_file
result, analysis = process_input(git_url, git_provider)
return analysis if analysis else result, generated_file is None, generated_file is None, ""
return process()
@app.callback(
Output("download-word", "data"),
Input("download-word-button", "n_clicks"),
prevent_initial_call=True
)
def download_word(n_clicks):
if n_clicks is None or generated_file is None:
raise PreventUpdate
doc = Document()
doc.add_paragraph(generated_file.decode())
buffer = BytesIO()
doc.save(buffer)
buffer.seek(0)
return dcc.send_bytes(buffer.getvalue(), "license_analysis.docx")
@app.callback(
Output("download-markdown", "data"),
Input("download-markdown-button", "n_clicks"),
prevent_initial_call=True
)
def download_markdown(n_clicks):
if n_clicks is None or generated_file is None:
raise PreventUpdate
return dcc.send_string(generated_file.decode(), "license_analysis.md")
if __name__ == '__main__':
print("Starting the Dash application...")
app.run(debug=True, host='0.0.0.0', port=7860)
print("Dash application has finished running.")