import gradio as gr import requests import json import re from github import Github, GithubException, Auth import google.generativeai as genai def extract_licenses(file_content, github_url, github_token, gemini_api_key): # Parse the dependency file dependencies = parse_dependency_file(file_content) # Fetch license information licenses = fetch_license_info(dependencies) # Determine probable packages probable_packages = determine_probable_packages(file_content, dependencies) # Enrich information using Gemini enriched_info = enrich_with_gemini(licenses, probable_packages, gemini_api_key) return enriched_info def parse_dependency_file(file_content): dependencies = [] lines = file_content.split('\n') for line in lines: if '=' in line or '@' in line or ':' in line: parts = re.split(r'[=@:]', line) package = parts[0].strip() version = parts[1].strip() if len(parts) > 1 else "latest" dependencies.append((package, version)) return dependencies def fetch_license_info(dependencies): licenses = [] for package, version in dependencies: try: response = requests.get(f"https://pypi.org/pypi/{package}/{version}/json") data = response.json() license = data['info'].get('license', 'Unknown') description = data['info'].get('summary', 'No description available') licenses.append(f"Package: {package}\nVersion: {version}\nLicense: {license}\nDescription: {description}\n") except: licenses.append(f"Package: {package}\nVersion: {version}\nLicense: Unknown\nDescription: Unable to fetch information\n") return "\n".join(licenses) def determine_probable_packages(file_content, dependencies): probable_packages = [] if "package.json" in file_content.lower(): probable_packages.append("npm (Node Package Manager)") elif "gemfile" in file_content.lower(): probable_packages.append("Bundler (Ruby)") elif "requirements.txt" in file_content.lower(): probable_packages.append("pip (Python Package Installer)") # Add more probable packages based on common dependencies common_packages = { "react": "React (JavaScript library)", "django": "Django (Python web framework)", "rails": "Ruby on Rails (Web application framework)", } for package, _ in dependencies: if package.lower() in common_packages: probable_packages.append(common_packages[package.lower()]) return "\n".join(probable_packages) def enrich_with_gemini(licenses, probable_packages, api_key): genai.configure(api_key=api_key) model = genai.GenerativeModel('gemini-2.5-pro-preview-03-25') prompt = f""" Analyze the following open-source license information and probable packages: License Information: {licenses} Probable Packages: {probable_packages} Please provide a summary of the project based on these dependencies, including: 1. The likely type of project (e.g., web application, data science, etc.) 2. Any potential license conflicts or considerations 3. Suggestions for best practices in open-source license management for this project """ response = model.generate_content(prompt) return response.text def fetch_github_info(github_url, github_token): try: # Create an authentication object auth = Auth.Token(github_token) # Create a Github instance with authentication g = Github(auth=auth) # Extract the repository name and file path from the URL _, _, _, owner, repo, _, *path_parts = github_url.split('/') repo_name = f"{owner}/{repo}" file_path = '/'.join(path_parts) print(f"Attempting to access file: {file_path} in repository: {repo_name}") # Get the repository repo = g.get_repo(repo_name) # Get the file contents contents = repo.get_contents(file_path) file_content = contents.decoded_content.decode('utf-8') print(f"Successfully retrieved {file_path}") return file_content except GithubException as e: if e.status == 404: return f"Error: File or repository not found. Please check the URL and ensure you have the correct access permissions. Details: {str(e)}" else: return f"Error accessing GitHub: {str(e)}" except Exception as e: return f"Unexpected error: {str(e)}" def process_input(file, github_url, github_token, gemini_api_key): if file is not None and github_url: return "Error: Please either upload a file OR provide a GitHub URL, not both." if file is not None: file_content = file.decode('utf-8') elif github_url and github_token: if not github_url.startswith("https://github.com/"): return "Error: Invalid GitHub URL. Please use the format: https://github.com/username/repository/blob/branch/path/to/file" if not github_token.strip(): return "Error: GitHub Personal Access Token is empty. Please provide a valid token." file_content = fetch_github_info(github_url, github_token) if file_content.startswith("Error:"): return file_content else: return "Error: Please either upload a file OR provide both GitHub URL and access token." try: return extract_licenses(file_content, github_url, github_token, gemini_api_key) except Exception as e: return f"Error processing the file: {str(e)}" iface = gr.Interface( fn=process_input, inputs=[ gr.File(label="Upload dependency file (e.g., requirements.txt, package.json, Gemfile)"), gr.Textbox(label="GitHub File URL (optional)"), gr.Textbox(label="GitHub Personal Access Token (required if using GitHub URL)", type="password"), gr.Textbox(label="Gemini API Key", type="password"), ], outputs=gr.Textbox(label="License Information and Analysis"), title="Open Source License Extractor", description="Upload a dependency file OR provide a GitHub file URL to extract and analyze open-source license information.", ) if __name__ == "__main__": iface.launch()