import gradio as gr import google.generativeai as genai import requests import base64 import json def fetch_github_files(github_url, personal_access_token): try: # Parse the GitHub URL parts = github_url.split('/') owner = parts[3] repo = parts[4].split('.git')[0] branch = 'main' # You might want to make this configurable # List of common dependency files to look for dependency_files = [ 'requirements.txt', 'package.json', 'Gemfile', 'pom.xml', 'build.gradle', 'composer.json', 'Cargo.toml', 'go.mod', 'Pipfile' ] all_content = "" # Set up headers with the personal access token headers = { "Authorization": f"token {personal_access_token}", "Accept": "application/vnd.github.v3+json" } for file_path in dependency_files: # Construct the API URL api_url = f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}?ref={branch}" # Make the API request response = requests.get(api_url, headers=headers) if response.status_code == 200: content = response.json() if isinstance(content, dict) and 'content' in content: # This is a file file_content = base64.b64decode(content['content']).decode('utf-8') all_content += f"\n\n--- {file_path} ---\n{file_content}" else: # This is a directory or something else, skip it continue if not all_content: return "Error: No dependency files found in the repository." return all_content except requests.exceptions.RequestException as e: return f"Error accessing GitHub: {str(e)}" except json.JSONDecodeError: return f"Error: Unable to parse GitHub API response for {file_path}" def process_with_gemini(file_content, gemini_api_key): genai.configure(api_key=gemini_api_key) model = genai.GenerativeModel('gemini-2.5-pro-preview-03-25') prompt = f""" Analyze the following file content for open-source license information: {file_content} Please provide: 1. A numbered with the name dependency and version as the title 2. 1st bullet under title has a brief summary of what the depency does 3. 2nd bullet under title has the license name apache 2.0 4. 3rd bullet under title has a hyperlink to the license file 5. New title called potential license conflicts describes issues between licences that may cause legal promptlems 5. Provide no other information such as greeting or summary as the purpose is to catalog and document all open source licenses used. """ response = model.generate_content(prompt, timeout=120) return response.text def process_input(github_url, personal_access_token, gemini_api_key): if not github_url.startswith("https://github.com/"): return "Error: Invalid GitHub URL. Please use the format: https://github.com/username/repository.git" if not personal_access_token.strip(): return "Error: Personal Access Token is empty. Please provide a valid token." file_content = fetch_github_files(github_url, personal_access_token) if file_content.startswith("Error:"): return file_content try: # Process the file content with Gemini analysis = process_with_gemini(file_content, gemini_api_key) return analysis except Exception as e: return f"Error processing the files: {str(e)}" iface = gr.Interface( fn=process_input, inputs=[ gr.Textbox(label="GitHub Repository URL (format: https://github.com/username/repository.git)"), gr.Textbox(label="GitHub Personal Access Token", type="password"), gr.Textbox(label="Gemini API Key", type="password"), ], outputs=gr.Textbox(label="License Information and Analysis"), title="Open Source License Extractor", description="Provide a GitHub repository URL to analyze open-source licenses from dependency files.", ) if __name__ == "__main__": iface.launch()