|
import gradio as gr |
|
import requests |
|
import json |
|
import re |
|
from github import Github, GithubException, Auth |
|
import google.generativeai as genai |
|
|
|
def extract_licenses(file_content, github_url, github_token, gemini_api_key): |
|
|
|
dependencies = parse_dependency_file(file_content) |
|
|
|
|
|
licenses = fetch_license_info(dependencies) |
|
|
|
|
|
probable_packages = determine_probable_packages(file_content, dependencies) |
|
|
|
|
|
enriched_info = enrich_with_gemini(licenses, probable_packages, gemini_api_key) |
|
|
|
return enriched_info |
|
|
|
def parse_dependency_file(file_content): |
|
dependencies = [] |
|
lines = file_content.split('\n') |
|
for line in lines: |
|
if '=' in line or '@' in line or ':' in line: |
|
parts = re.split(r'[=@:]', line) |
|
package = parts[0].strip() |
|
version = parts[1].strip() if len(parts) > 1 else "latest" |
|
dependencies.append((package, version)) |
|
return dependencies |
|
|
|
def fetch_license_info(dependencies): |
|
licenses = [] |
|
for package, version in dependencies: |
|
try: |
|
response = requests.get(f"https://pypi.org/pypi/{package}/{version}/json") |
|
data = response.json() |
|
license = data['info'].get('license', 'Unknown') |
|
description = data['info'].get('summary', 'No description available') |
|
licenses.append(f"Package: {package}\nVersion: {version}\nLicense: {license}\nDescription: {description}\n") |
|
except: |
|
licenses.append(f"Package: {package}\nVersion: {version}\nLicense: Unknown\nDescription: Unable to fetch information\n") |
|
return "\n".join(licenses) |
|
|
|
def determine_probable_packages(file_content, dependencies): |
|
probable_packages = [] |
|
if "package.json" in file_content.lower(): |
|
probable_packages.append("npm (Node Package Manager)") |
|
elif "gemfile" in file_content.lower(): |
|
probable_packages.append("Bundler (Ruby)") |
|
elif "requirements.txt" in file_content.lower(): |
|
probable_packages.append("pip (Python Package Installer)") |
|
|
|
|
|
common_packages = { |
|
"react": "React (JavaScript library)", |
|
"django": "Django (Python web framework)", |
|
"rails": "Ruby on Rails (Web application framework)", |
|
} |
|
|
|
for package, _ in dependencies: |
|
if package.lower() in common_packages: |
|
probable_packages.append(common_packages[package.lower()]) |
|
|
|
return "\n".join(probable_packages) |
|
|
|
def enrich_with_gemini(licenses, probable_packages, api_key): |
|
genai.configure(api_key=api_key) |
|
model = genai.GenerativeModel('gemini-2.5-pro-preview-03-25') |
|
|
|
prompt = f""" |
|
Analyze the following open-source license information and probable packages: |
|
|
|
License Information: |
|
{licenses} |
|
|
|
Probable Packages: |
|
{probable_packages} |
|
|
|
Please provide a summary of the project based on these dependencies, including: |
|
1. The likely type of project (e.g., web application, data science, etc.) |
|
2. Any potential license conflicts or considerations |
|
3. Suggestions for best practices in open-source license management for this project |
|
""" |
|
|
|
response = model.generate_content(prompt) |
|
return response.text |
|
|
|
def fetch_github_info(github_url, github_token): |
|
try: |
|
|
|
auth = Auth.Token(github_token) |
|
|
|
|
|
g = Github(auth=auth) |
|
|
|
|
|
_, _, _, owner, repo, _, *path_parts = github_url.split('/') |
|
repo_name = f"{owner}/{repo}" |
|
file_path = '/'.join(path_parts) |
|
|
|
print(f"Attempting to access file: {file_path} in repository: {repo_name}") |
|
|
|
|
|
repo = g.get_repo(repo_name) |
|
|
|
|
|
contents = repo.get_contents(file_path) |
|
file_content = contents.decoded_content.decode('utf-8') |
|
print(f"Successfully retrieved {file_path}") |
|
|
|
return file_content |
|
except GithubException as e: |
|
if e.status == 404: |
|
return f"Error: File or repository not found. Please check the URL and ensure you have the correct access permissions. Details: {str(e)}" |
|
else: |
|
return f"Error accessing GitHub: {str(e)}" |
|
except Exception as e: |
|
return f"Unexpected error: {str(e)}" |
|
|
|
def process_input(file, github_url, github_token, gemini_api_key): |
|
if file is not None and github_url: |
|
return "Error: Please either upload a file OR provide a GitHub URL, not both." |
|
|
|
if file is not None: |
|
file_content = file.decode('utf-8') |
|
elif github_url and github_token: |
|
if not github_url.startswith("https://github.com/"): |
|
return "Error: Invalid GitHub URL. Please use the format: https://github.com/username/repository/blob/branch/path/to/file" |
|
if not github_token.strip(): |
|
return "Error: GitHub Personal Access Token is empty. Please provide a valid token." |
|
file_content = fetch_github_info(github_url, github_token) |
|
if file_content.startswith("Error:"): |
|
return file_content |
|
else: |
|
return "Error: Please either upload a file OR provide both GitHub URL and access token." |
|
|
|
try: |
|
return extract_licenses(file_content, github_url, github_token, gemini_api_key) |
|
except Exception as e: |
|
return f"Error processing the file: {str(e)}" |
|
|
|
iface = gr.Interface( |
|
fn=process_input, |
|
inputs=[ |
|
gr.File(label="Upload dependency file (e.g., requirements.txt, package.json, Gemfile)"), |
|
gr.Textbox(label="GitHub File URL (optional)"), |
|
gr.Textbox(label="GitHub Personal Access Token (required if using GitHub URL)", type="password"), |
|
gr.Textbox(label="Gemini API Key", type="password"), |
|
], |
|
outputs=gr.Textbox(label="License Information and Analysis"), |
|
title="Open Source License Extractor", |
|
description="Upload a dependency file OR provide a GitHub file URL to extract and analyze open-source license information.", |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch() |