|
import gradio as gr |
|
import requests |
|
import json |
|
import re |
|
from github import Github, GithubException |
|
import google.generativeai as genai |
|
import paramiko |
|
import base64 |
|
import os |
|
|
|
def extract_licenses(file_content, github_url, ssh_private_key, gemini_api_key): |
|
|
|
dependencies = parse_dependency_file(file_content) |
|
|
|
|
|
licenses = fetch_license_info(dependencies) |
|
|
|
|
|
probable_packages = determine_probable_packages(file_content, dependencies) |
|
|
|
|
|
enriched_info = enrich_with_gemini(licenses, probable_packages, gemini_api_key) |
|
|
|
return enriched_info |
|
|
|
def parse_dependency_file(file_content): |
|
dependencies = [] |
|
lines = file_content.split('\n') |
|
for line in lines: |
|
if '=' in line or '@' in line or ':' in line: |
|
parts = re.split(r'[=@:]', line) |
|
package = parts[0].strip() |
|
version = parts[1].strip() if len(parts) > 1 else "latest" |
|
dependencies.append((package, version)) |
|
return dependencies |
|
|
|
def fetch_license_info(dependencies): |
|
licenses = [] |
|
for package, version in dependencies: |
|
try: |
|
response = requests.get(f"https://pypi.org/pypi/{package}/{version}/json") |
|
data = response.json() |
|
license = data['info'].get('license', 'Unknown') |
|
description = data['info'].get('summary', 'No description available') |
|
licenses.append(f"Package: {package}\nVersion: {version}\nLicense: {license}\nDescription: {description}\n") |
|
except: |
|
licenses.append(f"Package: {package}\nVersion: {version}\nLicense: Unknown\nDescription: Unable to fetch information\n") |
|
return "\n".join(licenses) |
|
|
|
def determine_probable_packages(file_content, dependencies): |
|
probable_packages = [] |
|
if "package.json" in file_content.lower(): |
|
probable_packages.append("npm (Node Package Manager)") |
|
elif "gemfile" in file_content.lower(): |
|
probable_packages.append("Bundler (Ruby)") |
|
elif "requirements.txt" in file_content.lower(): |
|
probable_packages.append("pip (Python Package Installer)") |
|
|
|
|
|
common_packages = { |
|
"react": "React (JavaScript library)", |
|
"django": "Django (Python web framework)", |
|
"rails": "Ruby on Rails (Web application framework)", |
|
} |
|
|
|
for package, _ in dependencies: |
|
if package.lower() in common_packages: |
|
probable_packages.append(common_packages[package.lower()]) |
|
|
|
return "\n".join(probable_packages) |
|
|
|
def enrich_with_gemini(licenses, probable_packages, api_key): |
|
genai.configure(api_key=api_key) |
|
model = genai.GenerativeModel('gemini-2.5-pro-preview-03-25') |
|
|
|
prompt = f""" |
|
Analyze the following open-source license information and probable packages: |
|
|
|
License Information: |
|
{licenses} |
|
|
|
Probable Packages: |
|
{probable_packages} |
|
|
|
Please provide a summary of the project based on these dependencies, including: |
|
1. The likely type of project (e.g., web application, data science, etc.) |
|
2. Any potential license conflicts or considerations |
|
3. Suggestions for best practices in open-source license management for this project |
|
""" |
|
|
|
response = model.generate_content(prompt) |
|
return response.text |
|
|
|
def fetch_github_info(github_url, ssh_private_key): |
|
try: |
|
|
|
parts = github_url.split('/') |
|
owner = parts[3] |
|
repo = parts[4] |
|
branch = parts[6] |
|
file_path = '/'.join(parts[7:]) |
|
|
|
|
|
ssh = paramiko.SSHClient() |
|
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) |
|
|
|
|
|
with open('temp_key', 'w') as key_file: |
|
key_file.write(ssh_private_key) |
|
|
|
|
|
ssh.connect('github.com', username='git', key_filename='temp_key') |
|
|
|
|
|
stdin, stdout, stderr = ssh.exec_command(f"git archive [email protected]:{owner}/{repo}.git {branch} {file_path} | tar -xO") |
|
file_content = stdout.read().decode('utf-8') |
|
|
|
|
|
ssh.close() |
|
os.remove('temp_key') |
|
|
|
if not file_content: |
|
return "Error: File not found or empty." |
|
|
|
return file_content |
|
except Exception as e: |
|
return f"Error accessing GitHub: {str(e)}" |
|
|
|
def process_input(file, github_url, ssh_private_key, gemini_api_key): |
|
if file is not None and github_url: |
|
return "Error: Please either upload a file OR provide a GitHub URL, not both." |
|
|
|
if file is not None: |
|
file_content = file.decode('utf-8') |
|
elif github_url and ssh_private_key: |
|
if not github_url.startswith("https://github.com/"): |
|
return "Error: Invalid GitHub URL. Please use the format: https://github.com/username/repository/blob/branch/path/to/file" |
|
if not ssh_private_key.strip(): |
|
return "Error: SSH Private Key is empty. Please provide a valid key." |
|
file_content = fetch_github_info(github_url, ssh_private_key) |
|
if file_content.startswith("Error:"): |
|
return file_content |
|
else: |
|
return "Error: Please either upload a file OR provide both GitHub URL and SSH Private Key." |
|
|
|
try: |
|
return extract_licenses(file_content, github_url, ssh_private_key, gemini_api_key) |
|
except Exception as e: |
|
return f"Error processing the file: {str(e)}" |
|
|
|
iface = gr.Interface( |
|
fn=process_input, |
|
inputs=[ |
|
gr.File(label="Upload dependency file (e.g., requirements.txt, package.json, Gemfile)"), |
|
gr.Textbox(label="GitHub File URL (optional)"), |
|
gr.Textbox(label="SSH Private Key (required if using GitHub URL)", type="password"), |
|
gr.Textbox(label="Gemini API Key", type="password"), |
|
], |
|
outputs=gr.Textbox(label="License Information and Analysis"), |
|
title="Open Source License Extractor", |
|
description="Upload a dependency file OR provide a GitHub file URL to extract and analyze open-source license information.", |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch() |