Omartificial-Intelligence-Space commited on
Commit
1512726
Β·
verified Β·
1 Parent(s): 9021fab

Create check_tokens.py

Browse files
Files changed (1) hide show
  1. check_tokens.py +177 -0
check_tokens.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Utility script to check if documents meet token requirements for Gemini API caching
4
+ """
5
+
6
+ import os
7
+ import io
8
+ import httpx
9
+ from google import genai
10
+ from dotenv import load_dotenv
11
+
12
+ # Load environment variables
13
+ load_dotenv()
14
+
15
+ def check_document_tokens(file_path=None, url=None):
16
+ """Check if a document meets the minimum token requirements for caching"""
17
+
18
+ # Initialize client
19
+ client = genai.Client(api_key=os.getenv('GOOGLE_API_KEY'))
20
+
21
+ print("πŸ” Document Token Checker")
22
+ print("=" * 50)
23
+
24
+ try:
25
+ if file_path:
26
+ print(f"πŸ“„ Checking local file: {file_path}")
27
+ with open(file_path, 'rb') as f:
28
+ file_content = f.read()
29
+ file_io = io.BytesIO(file_content)
30
+ document_name = file_path
31
+ elif url:
32
+ print(f"πŸ“„ Checking URL: {url}")
33
+ response = httpx.get(url)
34
+ response.raise_for_status()
35
+ file_io = io.BytesIO(response.content)
36
+ document_name = url
37
+ else:
38
+ print("❌ Error: Please provide either file_path or url")
39
+ return
40
+
41
+ print("πŸ“€ Uploading to Gemini File API...")
42
+
43
+ # Upload to Gemini File API
44
+ document = client.files.upload(
45
+ file=file_io,
46
+ config=dict(mime_type='application/pdf')
47
+ )
48
+
49
+ print("βœ… File uploaded successfully!")
50
+
51
+ # Try to create a cache to check token count
52
+ print("πŸ’Ύ Attempting to create cache to check token count...")
53
+
54
+ try:
55
+ cache = client.caches.create(
56
+ model="gemini-2.0-flash-001",
57
+ config=genai.types.CreateCachedContentConfig(
58
+ system_instruction="Test system instruction for token counting.",
59
+ contents=[document],
60
+ )
61
+ )
62
+
63
+ token_count = getattr(cache.usage_metadata, 'cached_token_count', 0)
64
+
65
+ print(f"πŸ“Š Token count: {token_count:,}")
66
+ print(f"πŸ“ Minimum required: 4,096")
67
+
68
+ if token_count >= 4096:
69
+ print("βœ… Document meets caching requirements!")
70
+ print("πŸ’‘ This document is suitable for caching.")
71
+
72
+ # Calculate cost benefits
73
+ questions = [5, 10, 20, 50]
74
+ print("\nπŸ’° Cost-Benefit Analysis:")
75
+ print("Questions | Without Cache | With Cache | Savings")
76
+ print("-" * 50)
77
+
78
+ for q in questions:
79
+ without_cache = token_count * q
80
+ with_cache = token_count + (50 * q) # Assuming 50 tokens per question
81
+ savings = ((without_cache - with_cache) / without_cache) * 100
82
+ print(f"{q:9d} | {without_cache:12,} | {with_cache:10,} | {savings:6.1f}%")
83
+
84
+ else:
85
+ print("❌ Document does not meet caching requirements")
86
+ print(f"πŸ“ Need {4096 - token_count:,} more tokens")
87
+ print("πŸ’‘ Consider:")
88
+ print(" β€’ Uploading a longer document")
89
+ print(" β€’ Combining multiple documents")
90
+ print(" β€’ Using regular analysis (without caching)")
91
+
92
+ # Clean up
93
+ print(f"\nπŸ—‘οΈ Cleaning up test cache...")
94
+ client.caches.delete(cache.name)
95
+ print("βœ… Test cache deleted!")
96
+
97
+ except Exception as e:
98
+ if "Cached content is too small" in str(e):
99
+ print("❌ Document is too small for caching")
100
+ print("πŸ’‘ This document has fewer than 4,096 tokens")
101
+ print("πŸ“ Recommendations:")
102
+ print(" β€’ Upload a longer document")
103
+ print(" β€’ Combine multiple small documents")
104
+ print(" β€’ Use regular analysis without caching")
105
+ else:
106
+ print(f"❌ Error creating cache: {e}")
107
+
108
+ except Exception as e:
109
+ print(f"❌ Error: {e}")
110
+
111
+ def estimate_tokens_from_file_size(file_path):
112
+ """Rough estimation of tokens based on file size"""
113
+ try:
114
+ file_size = os.path.getsize(file_path)
115
+ # Rough estimation: 1 token β‰ˆ 4 characters, 1 character β‰ˆ 1 byte for text
116
+ # For PDFs, this is very rough as they contain formatting, images, etc.
117
+ estimated_tokens = file_size // 4
118
+
119
+ print(f"πŸ“ File size: {file_size:,} bytes")
120
+ print(f"πŸ“Š Estimated tokens: {estimated_tokens:,}")
121
+
122
+ if estimated_tokens >= 4096:
123
+ print("βœ… Likely meets caching requirements")
124
+ else:
125
+ print("❌ Likely too small for caching")
126
+
127
+ except Exception as e:
128
+ print(f"❌ Error estimating tokens: {e}")
129
+
130
+ def main():
131
+ """Main function with interactive menu"""
132
+
133
+ print("🎯 Gemini API Document Token Checker")
134
+ print("=" * 60)
135
+
136
+ # Check if API key is set
137
+ if not os.getenv('GOOGLE_API_KEY'):
138
+ print("❌ Error: GOOGLE_API_KEY not found in environment variables")
139
+ print("Please set your API key in the .env file")
140
+ return
141
+
142
+ while True:
143
+ print("\nπŸ“‹ Options:")
144
+ print("1. Check local PDF file")
145
+ print("2. Check PDF from URL")
146
+ print("3. Estimate tokens from file size")
147
+ print("4. Exit")
148
+
149
+ choice = input("\nEnter your choice (1-4): ").strip()
150
+
151
+ if choice == '1':
152
+ file_path = input("Enter the path to your PDF file: ").strip()
153
+ if os.path.exists(file_path):
154
+ check_document_tokens(file_path=file_path)
155
+ else:
156
+ print("❌ File not found!")
157
+
158
+ elif choice == '2':
159
+ url = input("Enter the URL to your PDF: ").strip()
160
+ check_document_tokens(url=url)
161
+
162
+ elif choice == '3':
163
+ file_path = input("Enter the path to your PDF file: ").strip()
164
+ if os.path.exists(file_path):
165
+ estimate_tokens_from_file_size(file_path)
166
+ else:
167
+ print("❌ File not found!")
168
+
169
+ elif choice == '4':
170
+ print("πŸ‘‹ Goodbye!")
171
+ break
172
+
173
+ else:
174
+ print("❌ Invalid choice. Please enter 1-4.")
175
+
176
+ if __name__ == "__main__":
177
+ main()