import gradio as gr import google.generativeai as genai import requests import base64 import json def fetch_github_files(github_url, personal_access_token): try: # Parse the GitHub URL parts = github_url.split('/') owner = parts[3] repo = parts[4].split('.git')[0] branch = 'main' # You might want to make this configurable # List of common dependency files to look for dependency_files = [ 'requirements.txt', 'package.json', 'Gemfile', 'pom.xml', 'build.gradle', 'composer.json', 'Cargo.toml', 'go.mod', 'Pipfile' ] all_content = "" # Set up headers with the personal access token headers = { "Authorization": f"token {personal_access_token}", "Accept": "application/vnd.github.v3+json" } for file_path in dependency_files: # Construct the API URL api_url = f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}?ref={branch}" # Make the API request response = requests.get(api_url, headers=headers) if response.status_code == 200: content = response.json() if isinstance(content, dict) and 'content' in content: # This is a file file_content = base64.b64decode(content['content']).decode('utf-8') all_content += f"\n\n--- {file_path} ---\n{file_content}" else: # This is a directory or something else, skip it continue if not all_content: return "Error: No dependency files found in the repository." return all_content except requests.exceptions.RequestException as e: return f"Error accessing GitHub: {str(e)}" except json.JSONDecodeError: return f"Error: Unable to parse GitHub API response for {file_path}" def process_with_gemini(file_content, gemini_api_key): genai.configure(api_key=gemini_api_key) model = genai.GenerativeModel('gemini-2.5-pro-preview-03-25') prompt = f""" Analyze the following file content for open-source license information: {file_content} Please provide: 1. A numbered with the name dependency and version as the title 2. 1st bullet under title has a brief summary of what the depency does 3. 2nd bullet under title has the license name apache 2.0 4. 3rd bullet under title has a hyperlink to the license file 5. Provide no other information such as greeting or summary as the purpose is to catalog and document all open source licenses used. """ response = model.generate_content(prompt) return response.text def process_input(github_url, personal_access_token, gemini_api_key): if not github_url.startswith("https://github.com/"): return "Error: Invalid GitHub URL. Please use the format: https://github.com/username/repository.git" if not personal_access_token.strip(): return "Error: Personal Access Token is empty. Please provide a valid token." file_content = fetch_github_files(github_url, personal_access_token) if file_content.startswith("Error:"): return file_content try: # Process the file content with Gemini analysis = process_with_gemini(file_content, gemini_api_key) return analysis except Exception as e: return f"Error processing the files: {str(e)}" iface = gr.Interface( fn=process_input, inputs=[ gr.Textbox(label="GitHub Repository URL (format: https://github.com/username/repository.git)"), gr.Textbox(label="GitHub Personal Access Token", type="password"), gr.Textbox(label="Gemini API Key", type="password"), ], outputs=gr.Textbox(label="License Information and Analysis"), title="Open Source License Extractor", description="Provide a GitHub repository URL to analyze open-source licenses from dependency files.", ) if __name__ == "__main__": iface.launch()