bluenevus's picture
Update app.py
13bae25 verified
raw
history blame
4.28 kB
import gradio as gr
import google.generativeai as genai
import requests
import base64
import json
def fetch_github_files(github_url, personal_access_token):
try:
# Parse the GitHub URL
parts = github_url.split('/')
owner = parts[3]
repo = parts[4].split('.git')[0]
branch = 'main' # You might want to make this configurable
# List of common dependency files to look for
dependency_files = [
'requirements.txt',
'package.json',
'Gemfile',
'pom.xml',
'build.gradle',
'composer.json',
'Cargo.toml',
'go.mod',
'Pipfile'
]
all_content = ""
# Set up headers with the personal access token
headers = {
"Authorization": f"token {personal_access_token}",
"Accept": "application/vnd.github.v3+json"
}
for file_path in dependency_files:
# Construct the API URL
api_url = f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}?ref={branch}"
# Make the API request
response = requests.get(api_url, headers=headers)
if response.status_code == 200:
content = response.json()
if isinstance(content, dict) and 'content' in content:
# This is a file
file_content = base64.b64decode(content['content']).decode('utf-8')
all_content += f"\n\n--- {file_path} ---\n{file_content}"
else:
# This is a directory or something else, skip it
continue
if not all_content:
return "Error: No dependency files found in the repository."
return all_content
except requests.exceptions.RequestException as e:
return f"Error accessing GitHub: {str(e)}"
except json.JSONDecodeError:
return f"Error: Unable to parse GitHub API response for {file_path}"
def process_with_gemini(file_content, gemini_api_key):
genai.configure(api_key=gemini_api_key)
model = genai.GenerativeModel('gemini-2.5-pro-preview-03-25')
prompt = f"""
Analyze the following file content for open-source license information:
{file_content}
Please provide:
1. A numbered with the name dependency and version as the title
2. 1st bullet under title has a brief summary of what the depency does
3. 2nd bullet under title has the license name apache 2.0
4. 3rd bullet under title has a hyperlink to the license file
5. New title called potential license conflicts describes issues between licences that may cause legal promptlems
5. Provide no other information such as greeting or summary as the purpose is to catalog and document all open source licenses used.
"""
response = model.generate_content(prompt, timeout=120)
return response.text
def process_input(github_url, personal_access_token, gemini_api_key):
if not github_url.startswith("https://github.com/"):
return "Error: Invalid GitHub URL. Please use the format: https://github.com/username/repository.git"
if not personal_access_token.strip():
return "Error: Personal Access Token is empty. Please provide a valid token."
file_content = fetch_github_files(github_url, personal_access_token)
if file_content.startswith("Error:"):
return file_content
try:
# Process the file content with Gemini
analysis = process_with_gemini(file_content, gemini_api_key)
return analysis
except Exception as e:
return f"Error processing the files: {str(e)}"
iface = gr.Interface(
fn=process_input,
inputs=[
gr.Textbox(label="GitHub Repository URL (format: https://github.com/username/repository.git)"),
gr.Textbox(label="GitHub Personal Access Token", type="password"),
gr.Textbox(label="Gemini API Key", type="password"),
],
outputs=gr.Textbox(label="License Information and Analysis"),
title="Open Source License Extractor",
description="Provide a GitHub repository URL to analyze open-source licenses from dependency files.",
)
if __name__ == "__main__":
iface.launch()