bluenevus's picture
Update app.py
d4e593e verified
raw
history blame
4.15 kB
import gradio as gr
import google.generativeai as genai
import requests
import base64
import json
def fetch_github_files(github_url, personal_access_token):
try:
# Parse the GitHub URL
parts = github_url.split('/')
owner = parts[3]
repo = parts[4].split('.git')[0]
branch = 'main' # You might want to make this configurable
# List of common dependency files to look for
dependency_files = [
'requirements.txt',
'package.json',
'Gemfile',
'pom.xml',
'build.gradle',
'composer.json',
'Cargo.toml',
'go.mod',
'Pipfile'
]
all_content = ""
# Set up headers with the personal access token
headers = {
"Authorization": f"token {personal_access_token}",
"Accept": "application/vnd.github.v3+json"
}
for file_path in dependency_files:
# Construct the API URL
api_url = f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}?ref={branch}"
# Make the API request
response = requests.get(api_url, headers=headers)
if response.status_code == 200:
content = response.json()
if isinstance(content, dict) and 'content' in content:
# This is a file
file_content = base64.b64decode(content['content']).decode('utf-8')
all_content += f"\n\n--- {file_path} ---\n{file_content}"
else:
# This is a directory or something else, skip it
continue
if not all_content:
return "Error: No dependency files found in the repository."
return all_content
except requests.exceptions.RequestException as e:
return f"Error accessing GitHub: {str(e)}"
except json.JSONDecodeError:
return f"Error: Unable to parse GitHub API response for {file_path}"
def process_with_gemini(file_content, gemini_api_key):
genai.configure(api_key=gemini_api_key)
model = genai.GenerativeModel('gemini-2.5-pro-preview-03-25')
prompt = f"""
Analyze the following file content for open-source license information:
{file_content}
Please provide:
1. A numbered with the name dependency and version as the title
2. 1st bullet under title has a brief summary of what the depency does
3. 2nd bullet under title has the license name apache 2.0
4. 3rd bullet under title has a hyperlink to the license file
5. Provide no other information such as greeting or summary as the purpose is to catalog and document all open source licenses used.
"""
response = model.generate_content(prompt)
return response.text
def process_input(github_url, personal_access_token, gemini_api_key):
if not github_url.startswith("https://github.com/"):
return "Error: Invalid GitHub URL. Please use the format: https://github.com/username/repository.git"
if not personal_access_token.strip():
return "Error: Personal Access Token is empty. Please provide a valid token."
file_content = fetch_github_files(github_url, personal_access_token)
if file_content.startswith("Error:"):
return file_content
try:
# Process the file content with Gemini
analysis = process_with_gemini(file_content, gemini_api_key)
return analysis
except Exception as e:
return f"Error processing the files: {str(e)}"
iface = gr.Interface(
fn=process_input,
inputs=[
gr.Textbox(label="GitHub Repository URL (format: https://github.com/username/repository.git)"),
gr.Textbox(label="GitHub Personal Access Token", type="password"),
gr.Textbox(label="Gemini API Key", type="password"),
],
outputs=gr.Textbox(label="License Information and Analysis"),
title="Open Source License Extractor",
description="Provide a GitHub repository URL to analyze open-source licenses from dependency files.",
)
if __name__ == "__main__":
iface.launch()