bluenevus's picture
Update app.py
ec4142d verified
raw
history blame
6.34 kB
import gradio as gr
import requests
import json
import re
from github import Github, GithubException
import google.generativeai as genai
import paramiko
import base64
import os
def extract_licenses(file_content, github_url, ssh_private_key, gemini_api_key):
# Parse the dependency file
dependencies = parse_dependency_file(file_content)
# Fetch license information
licenses = fetch_license_info(dependencies)
# Determine probable packages
probable_packages = determine_probable_packages(file_content, dependencies)
# Enrich information using Gemini
enriched_info = enrich_with_gemini(licenses, probable_packages, gemini_api_key)
return enriched_info
def parse_dependency_file(file_content):
dependencies = []
lines = file_content.split('\n')
for line in lines:
if '=' in line or '@' in line or ':' in line:
parts = re.split(r'[=@:]', line)
package = parts[0].strip()
version = parts[1].strip() if len(parts) > 1 else "latest"
dependencies.append((package, version))
return dependencies
def fetch_license_info(dependencies):
licenses = []
for package, version in dependencies:
try:
response = requests.get(f"https://pypi.org/pypi/{package}/{version}/json")
data = response.json()
license = data['info'].get('license', 'Unknown')
description = data['info'].get('summary', 'No description available')
licenses.append(f"Package: {package}\nVersion: {version}\nLicense: {license}\nDescription: {description}\n")
except:
licenses.append(f"Package: {package}\nVersion: {version}\nLicense: Unknown\nDescription: Unable to fetch information\n")
return "\n".join(licenses)
def determine_probable_packages(file_content, dependencies):
probable_packages = []
if "package.json" in file_content.lower():
probable_packages.append("npm (Node Package Manager)")
elif "gemfile" in file_content.lower():
probable_packages.append("Bundler (Ruby)")
elif "requirements.txt" in file_content.lower():
probable_packages.append("pip (Python Package Installer)")
# Add more probable packages based on common dependencies
common_packages = {
"react": "React (JavaScript library)",
"django": "Django (Python web framework)",
"rails": "Ruby on Rails (Web application framework)",
}
for package, _ in dependencies:
if package.lower() in common_packages:
probable_packages.append(common_packages[package.lower()])
return "\n".join(probable_packages)
def enrich_with_gemini(licenses, probable_packages, api_key):
genai.configure(api_key=api_key)
model = genai.GenerativeModel('gemini-2.5-pro-preview-03-25')
prompt = f"""
Analyze the following open-source license information and probable packages:
License Information:
{licenses}
Probable Packages:
{probable_packages}
Please provide a summary of the project based on these dependencies, including:
1. The likely type of project (e.g., web application, data science, etc.)
2. Any potential license conflicts or considerations
3. Suggestions for best practices in open-source license management for this project
"""
response = model.generate_content(prompt)
return response.text
def fetch_github_info(github_url, ssh_private_key):
try:
# Parse the GitHub URL
parts = github_url.split('/')
owner = parts[3]
repo = parts[4]
branch = parts[6]
file_path = '/'.join(parts[7:])
# Set up SSH client
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
# Create a temporary file to store the SSH private key
with open('temp_key', 'w') as key_file:
key_file.write(ssh_private_key)
# Connect to GitHub using SSH
ssh.connect('github.com', username='git', key_filename='temp_key')
# Execute git command to fetch file content
stdin, stdout, stderr = ssh.exec_command(f"git archive [email protected]:{owner}/{repo}.git {branch} {file_path} | tar -xO")
file_content = stdout.read().decode('utf-8')
# Close SSH connection and remove temporary key file
ssh.close()
os.remove('temp_key')
if not file_content:
return "Error: File not found or empty."
return file_content
except Exception as e:
return f"Error accessing GitHub: {str(e)}"
def process_input(file, github_url, ssh_private_key, gemini_api_key):
if file is not None and github_url:
return "Error: Please either upload a file OR provide a GitHub URL, not both."
if file is not None:
file_content = file.decode('utf-8')
elif github_url and ssh_private_key:
if not github_url.startswith("https://github.com/"):
return "Error: Invalid GitHub URL. Please use the format: https://github.com/username/repository/blob/branch/path/to/file"
if not ssh_private_key.strip():
return "Error: SSH Private Key is empty. Please provide a valid key."
file_content = fetch_github_info(github_url, ssh_private_key)
if file_content.startswith("Error:"):
return file_content
else:
return "Error: Please either upload a file OR provide both GitHub URL and SSH Private Key."
try:
return extract_licenses(file_content, github_url, ssh_private_key, gemini_api_key)
except Exception as e:
return f"Error processing the file: {str(e)}"
iface = gr.Interface(
fn=process_input,
inputs=[
gr.File(label="Upload dependency file (e.g., requirements.txt, package.json, Gemfile)"),
gr.Textbox(label="GitHub File URL (optional)"),
gr.Textbox(label="SSH Private Key (required if using GitHub URL)", type="password"),
gr.Textbox(label="Gemini API Key", type="password"),
],
outputs=gr.Textbox(label="License Information and Analysis"),
title="Open Source License Extractor",
description="Upload a dependency file OR provide a GitHub file URL to extract and analyze open-source license information.",
)
if __name__ == "__main__":
iface.launch()