bluenevus commited on
Commit
1a646d6
·
verified ·
1 Parent(s): 00d9616

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -76
app.py CHANGED
@@ -3,97 +3,45 @@ import google.generativeai as genai
3
  import requests
4
  import base64
5
  import json
6
- from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
7
 
8
  def fetch_github_files(github_url, personal_access_token):
9
- try:
10
- # Parse the GitHub URL
11
- parts = github_url.split('/')
12
- owner = parts[3]
13
- repo = parts[4].split('.git')[0]
14
- branch = 'main' # You might want to make this configurable
15
-
16
- # List of common dependency files to look for
17
- dependency_files = [
18
- 'requirements.txt',
19
- 'package.json',
20
- 'Gemfile',
21
- 'pom.xml',
22
- 'build.gradle',
23
- 'composer.json',
24
- 'Cargo.toml',
25
- 'go.mod',
26
- 'Pipfile'
27
- ]
28
-
29
- all_content = ""
30
-
31
- # Set up headers with the personal access token
32
- headers = {
33
- "Authorization": f"token {personal_access_token}",
34
- "Accept": "application/vnd.github.v3+json"
35
- }
36
-
37
- for file_path in dependency_files:
38
- # Construct the API URL
39
- api_url = f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}?ref={branch}"
40
-
41
- # Make the API request
42
- response = requests.get(api_url, headers=headers)
43
-
44
- if response.status_code == 200:
45
- content = response.json()
46
- if isinstance(content, dict) and 'content' in content:
47
- # This is a file
48
- file_content = base64.b64decode(content['content']).decode('utf-8')
49
- all_content += f"\n\n--- {file_path} ---\n{file_content}"
50
- else:
51
- # This is a directory or something else, skip it
52
- continue
53
-
54
- if not all_content:
55
- return "Error: No dependency files found in the repository."
56
-
57
- return all_content
58
- except requests.exceptions.RequestException as e:
59
- return f"Error accessing GitHub: {str(e)}"
60
- except json.JSONDecodeError:
61
- return f"Error: Unable to parse GitHub API response for {file_path}"
62
 
63
- @retry(
64
- stop=stop_after_attempt(3),
65
- wait=wait_exponential(multiplier=1, min=4, max=10),
66
- retry=retry_if_exception_type(Exception),
67
- reraise=True
68
- )
69
- def process_with_gemini(file_content, gemini_api_key):
70
  genai.configure(api_key=gemini_api_key)
71
  model = genai.GenerativeModel('gemini-2.5-pro-preview-03-25')
72
 
73
- # Limit content size (adjust the limit as needed)
74
- max_content_length = 10000 # characters
75
- if len(file_content) > max_content_length:
76
- file_content = file_content[:max_content_length] + "..."
77
-
78
  prompt = f"""
79
  Analyze the following file content for open-source license information:
80
 
81
- {file_content}
82
 
83
  Please provide:
84
- 1. A numbered with the name dependency and version as the title
85
- 2. 1st bullet under title has a brief summary of what the depency does
86
- 3. 2nd bullet under title has the license name apache 2.0
87
  4. 3rd bullet under title has a hyperlink to the license file
88
  5. Provide no other information such as greeting or summary as the purpose is to catalog and document all open source licenses used.
89
  """
90
 
91
- try:
92
- response = model.generate_content(prompt, timeout=60) # Set a timeout of 60 seconds
93
- return response.text
94
- except Exception as e:
95
- print(f"Error in Gemini API call: {str(e)}")
96
- raise ValueError(f"Gemini API error: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
97
 
98
  def process_input(github_url, personal_access_token, gemini_api_key):
99
  if not github_url.startswith("https://github.com/"):
 
3
  import requests
4
  import base64
5
  import json
6
+ from tenacity import retry, stop_after_attempt, wait_fixed
7
 
8
  def fetch_github_files(github_url, personal_access_token):
9
+ # ... (keep this function as is) ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
+ def process_chunk_with_gemini(chunk, gemini_api_key):
 
 
 
 
 
 
12
  genai.configure(api_key=gemini_api_key)
13
  model = genai.GenerativeModel('gemini-2.5-pro-preview-03-25')
14
 
 
 
 
 
 
15
  prompt = f"""
16
  Analyze the following file content for open-source license information:
17
 
18
+ {chunk}
19
 
20
  Please provide:
21
+ 1. A numbered list with the name dependency and version as the title
22
+ 2. 1st bullet under title has a brief summary of what the dependency does
23
+ 3. 2nd bullet under title has the license name
24
  4. 3rd bullet under title has a hyperlink to the license file
25
  5. Provide no other information such as greeting or summary as the purpose is to catalog and document all open source licenses used.
26
  """
27
 
28
+ response = model.generate_content(prompt)
29
+ return response.text
30
+
31
+ @retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
32
+ def process_with_gemini(file_content, gemini_api_key):
33
+ # Split the content into chunks of approximately 4000 characters
34
+ chunk_size = 4000
35
+ chunks = [file_content[i:i+chunk_size] for i in range(0, len(file_content), chunk_size)]
36
+
37
+ results = []
38
+ for chunk in chunks:
39
+ result = process_chunk_with_gemini(chunk, gemini_api_key)
40
+ results.append(result)
41
+
42
+ # Combine the results
43
+ combined_result = "\n\n".join(results)
44
+ return combined_result
45
 
46
  def process_input(github_url, personal_access_token, gemini_api_key):
47
  if not github_url.startswith("https://github.com/"):