import gradio as gr import google.generativeai as genai import requests import base64 import json from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type def fetch_github_files(github_url, personal_access_token): try: # Parse the GitHub URL parts = github_url.split('/') owner = parts[3] repo = parts[4].split('.git')[0] branch = 'main' # You might want to make this configurable # List of common dependency files to look for dependency_files = [ 'requirements.txt', 'package.json', 'Gemfile', 'pom.xml', 'build.gradle', 'composer.json', 'Cargo.toml', 'go.mod', 'Pipfile' ] all_content = "" # Set up headers with the personal access token headers = { "Authorization": f"token {personal_access_token}", "Accept": "application/vnd.github.v3+json" } for file_path in dependency_files: # Construct the API URL api_url = f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}?ref={branch}" # Make the API request response = requests.get(api_url, headers=headers) if response.status_code == 200: content = response.json() if isinstance(content, dict) and 'content' in content: # This is a file file_content = base64.b64decode(content['content']).decode('utf-8') all_content += f"\n\n--- {file_path} ---\n{file_content}" else: # This is a directory or something else, skip it continue if not all_content: return "Error: No dependency files found in the repository." return all_content except requests.exceptions.RequestException as e: return f"Error accessing GitHub: {str(e)}" except json.JSONDecodeError: return f"Error: Unable to parse GitHub API response for {file_path}" def process_chunk_with_gemini(chunk, gemini_api_key): genai.configure(api_key=gemini_api_key) model = genai.GenerativeModel('gemini-2.5-pro-preview-03-25') prompt = f""" Analyze the following file content for open-source license information: {chunk} Please provide: 1. A numbered list with the name dependency and version as the title 2. 1st bullet under title has a brief summary of what the dependency does 3. 2nd bullet under title has the license name 4. 3rd bullet under title has a hyperlink to the license file 5. Provide no other information such as greeting or summary as the purpose is to catalog and document all open source licenses used. """ try: response = model.generate_content(prompt) return response.text except Exception as e: print(f"Error processing chunk: {str(e)}") return f"Error processing chunk: {str(e)}" @retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=4, max=10), retry=retry_if_exception_type(Exception)) def process_with_gemini(file_content, gemini_api_key): # Split the content into smaller chunks chunk_size = 2000 chunks = [file_content[i:i+chunk_size] for i in range(0, len(file_content), chunk_size)] results = [] for chunk in chunks: try: result = process_chunk_with_gemini(chunk, gemini_api_key) results.append(result) except Exception as e: print(f"Error processing chunk: {str(e)}") results.append(f"Error processing chunk: {str(e)}") # Combine the results combined_result = "\n\n".join(results) return combined_result def process_input(github_url, personal_access_token, gemini_api_key): if not github_url.startswith("https://github.com/"): return "Error: Invalid GitHub URL. Please use the format: https://github.com/username/repository.git" if not personal_access_token.strip(): return "Error: Personal Access Token is empty. Please provide a valid token." file_content = fetch_github_files(github_url, personal_access_token) if file_content.startswith("Error:"): return file_content try: # Process the file content with Gemini analysis = process_with_gemini(file_content, gemini_api_key) return analysis except Exception as e: return f"Error processing the files: {str(e)}" iface = gr.Interface( fn=process_input, inputs=[ gr.Textbox(label="GitHub Repository URL (format: https://github.com/username/repository.git)"), gr.Textbox(label="GitHub Personal Access Token", type="password"), gr.Textbox(label="Gemini API Key", type="password"), ], outputs=gr.Textbox(label="License Information and Analysis"), title="Open Source License Extractor", description="Provide a GitHub repository URL to analyze open-source licenses from dependency files.", ) if __name__ == "__main__": iface.launch()