File size: 19,687 Bytes
a963d65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
#!/usr/bin/env python3
"""
Standalone Mistral API Test Script
Comprehensive diagnostic tool to identify why Mistral API calls aren't reaching the console
"""

import asyncio
import httpx
import base64
import os
import json
import sys
from datetime import datetime
from pathlib import Path
from PIL import Image, ImageDraw, ImageFont
import io

class MistralAPITester:
    """Comprehensive Mistral API testing suite"""
    
    def __init__(self):
        self.api_key = os.getenv("MISTRAL_API_KEY")
        self.base_url = "https://api.mistral.ai/v1"
        self.test_results = {}
        
        # Test configuration
        self.timeout = 30.0
        self.test_model = "pixtral-12b-2409"
        
        print(f"πŸ”§ Mistral API Diagnostic Tool")
        print(f"⏰ Timestamp: {datetime.now().isoformat()}")
        print(f"πŸ”‘ API Key: {'βœ… Present' if self.api_key else '❌ Missing'}")
        if self.api_key:
            print(f"πŸ”‘ Key format: {self.api_key[:8]}...{self.api_key[-4:]}")
        print(f"🌐 Base URL: {self.base_url}")
        print(f"πŸ€– Test Model: {self.test_model}")
        print("=" * 70)

    async def test_1_basic_connectivity(self):
        """Test 1: Basic network connectivity to Mistral API"""
        print("\nπŸ”Œ TEST 1: Basic Connectivity")
        print("-" * 30)
        
        try:
            async with httpx.AsyncClient(timeout=self.timeout) as client:
                # Test basic connectivity to the API endpoint
                response = await client.get(f"{self.base_url}/models")
                
                print(f"πŸ“‘ Network Status: βœ… Connected")
                print(f"🌐 Response Code: {response.status_code}")
                print(f"⏱️  Response Time: {response.elapsed.total_seconds():.3f}s")
                
                if response.status_code == 401:
                    print("πŸ” Authentication Required (Expected for /models endpoint)")
                    self.test_results["connectivity"] = "βœ… PASS - Network reachable"
                elif response.status_code == 200:
                    print("πŸ“‹ Models endpoint accessible")
                    self.test_results["connectivity"] = "βœ… PASS - Full access"
                else:
                    print(f"⚠️  Unexpected status: {response.status_code}")
                    print(f"πŸ“„ Response: {response.text[:200]}")
                    self.test_results["connectivity"] = f"⚠️  PARTIAL - Status {response.status_code}"
                    
        except httpx.ConnectTimeout:
            print("❌ Connection timeout - Network/firewall issue")
            self.test_results["connectivity"] = "❌ FAIL - Connection timeout"
        except httpx.ConnectError as e:
            print(f"❌ Connection error: {e}")
            self.test_results["connectivity"] = f"❌ FAIL - Connection error: {e}"
        except Exception as e:
            print(f"❌ Unexpected error: {e}")
            self.test_results["connectivity"] = f"❌ FAIL - {type(e).__name__}: {e}"

    async def test_2_authentication(self):
        """Test 2: API key authentication"""
        print("\nπŸ” TEST 2: Authentication")
        print("-" * 30)
        
        if not self.api_key:
            print("❌ No API key provided")
            self.test_results["authentication"] = "❌ FAIL - No API key"
            return
            
        try:
            async with httpx.AsyncClient(timeout=self.timeout) as client:
                # Test authentication with a simple chat completion
                response = await client.post(
                    f"{self.base_url}/chat/completions",
                    headers={
                        "Authorization": f"Bearer {self.api_key}",
                        "Content-Type": "application/json"
                    },
                    json={
                        "model": "mistral-tiny",  # Use basic model for auth test
                        "messages": [{"role": "user", "content": "Hello"}],
                        "max_tokens": 10
                    }
                )
                
                print(f"πŸ”‘ Auth Status: {response.status_code}")
                print(f"πŸ“Š Response Size: {len(response.content)} bytes")
                
                if response.status_code == 200:
                    result = response.json()
                    print("βœ… Authentication successful")
                    print(f"πŸ“ Response: {result.get('choices', [{}])[0].get('message', {}).get('content', 'N/A')[:50]}...")
                    self.test_results["authentication"] = "βœ… PASS - Valid API key"
                elif response.status_code == 401:
                    print("❌ Authentication failed - Invalid API key")
                    error_detail = response.text[:200]
                    print(f"πŸ“„ Error: {error_detail}")
                    self.test_results["authentication"] = f"❌ FAIL - Invalid key: {error_detail}"
                elif response.status_code == 429:
                    print("⏸️  Rate limited - API key works but quota exceeded")
                    self.test_results["authentication"] = "βœ… PASS - Valid key (rate limited)"
                else:
                    print(f"⚠️  Unexpected status: {response.status_code}")
                    print(f"πŸ“„ Response: {response.text[:200]}")
                    self.test_results["authentication"] = f"⚠️  UNKNOWN - Status {response.status_code}"
                    
        except Exception as e:
            print(f"❌ Authentication test failed: {e}")
            self.test_results["authentication"] = f"❌ FAIL - {type(e).__name__}: {e}"

    async def test_3_vision_model_availability(self):
        """Test 3: Vision model availability"""
        print("\nπŸ‘οΈ  TEST 3: Vision Model Availability")
        print("-" * 30)
        
        if not self.api_key:
            print("⏭️  Skipping - No API key")
            self.test_results["vision_model"] = "⏭️  SKIP - No API key"
            return
            
        try:
            # Create a simple test image
            test_image = Image.new('RGB', (100, 100), color='white')
            
            # Add some text to the image
            from PIL import ImageDraw, ImageFont
            draw = ImageDraw.Draw(test_image)
            try:
                # Try to use default font
                draw.text((10, 10), "TEST IMAGE", fill='black')
            except:
                # If font fails, just draw without text
                pass
            
            # Convert to base64
            img_byte_arr = io.BytesIO()
            test_image.save(img_byte_arr, format='JPEG')
            img_bytes = img_byte_arr.getvalue()
            b64_data = base64.b64encode(img_bytes).decode()
            
            print(f"πŸ–ΌοΈ  Created test image: {len(img_bytes)} bytes")
            print(f"πŸ“Š Base64 length: {len(b64_data)} chars")
            
            async with httpx.AsyncClient(timeout=self.timeout) as client:
                response = await client.post(
                    f"{self.base_url}/chat/completions",
                    headers={
                        "Authorization": f"Bearer {self.api_key}",
                        "Content-Type": "application/json"
                    },
                    json={
                        "model": self.test_model,
                        "messages": [
                            {
                                "role": "user",
                                "content": [
                                    {
                                        "type": "text",
                                        "text": "Describe this image briefly."
                                    },
                                    {
                                        "type": "image_url",
                                        "image_url": {
                                            "url": f"data:image/jpeg;base64,{b64_data}"
                                        }
                                    }
                                ]
                            }
                        ],
                        "max_tokens": 50
                    }
                )
                
                print(f"πŸ€– Vision API Status: {response.status_code}")
                
                if response.status_code == 200:
                    result = response.json()
                    content = result.get('choices', [{}])[0].get('message', {}).get('content', 'N/A')
                    print(f"βœ… Vision model works: {content[:100]}...")
                    self.test_results["vision_model"] = "βœ… PASS - Vision API working"
                elif response.status_code == 400:
                    error_detail = response.text[:200]
                    print(f"❌ Bad request - Model or format issue: {error_detail}")
                    self.test_results["vision_model"] = f"❌ FAIL - Bad request: {error_detail}"
                elif response.status_code == 404:
                    print(f"❌ Model not found - {self.test_model} may not exist")
                    self.test_results["vision_model"] = f"❌ FAIL - Model not found: {self.test_model}"
                else:
                    print(f"⚠️  Unexpected status: {response.status_code}")
                    print(f"πŸ“„ Response: {response.text[:200]}")
                    self.test_results["vision_model"] = f"⚠️  UNKNOWN - Status {response.status_code}"
                    
        except Exception as e:
            print(f"❌ Vision model test failed: {e}")
            self.test_results["vision_model"] = f"❌ FAIL - {type(e).__name__}: {e}"

    async def test_4_exact_app_request(self):
        """Test 4: Exact request format from main application"""
        print("\n🎯 TEST 4: Exact App Request Format")
        print("-" * 30)
        
        if not self.api_key:
            print("⏭️  Skipping - No API key")
            self.test_results["app_request"] = "⏭️  SKIP - No API key"
            return
            
        try:
            # Create the same test image as the app would process
            test_image = Image.new('RGB', (200, 100), color='white')
            draw = ImageDraw.Draw(test_image)
            draw.text((10, 10), "MEDICAL DOCUMENT TEST", fill='black')
            draw.text((10, 30), "Patient: John Doe", fill='black')
            draw.text((10, 50), "DOB: 01/01/1980", fill='black')
            
            # Convert exactly like the app does
            if test_image.mode != 'RGB':
                test_image = test_image.convert('RGB')
            
            img_byte_arr = io.BytesIO()
            test_image.save(img_byte_arr, format='JPEG', quality=95)
            img_bytes = img_byte_arr.getvalue()
            b64_data = base64.b64encode(img_bytes).decode()
            
            print(f"πŸ“„ Simulated medical document: {len(img_bytes)} bytes")
            
            # Use EXACT request format from the main app
            request_payload = {
                "model": "pixtral-12b-2409",
                "messages": [
                    {
                        "role": "user",
                        "content": [
                            {
                                "type": "text",
                                "text": """You are a strict OCR text extraction tool. Your job is to extract ONLY the actual text that appears in this image - nothing more, nothing less.

CRITICAL RULES:
- Extract ONLY text that is actually visible in the image
- Do NOT generate, invent, or create any content
- Do NOT add examples or sample data
- Do NOT fill in missing information
- If the image contains minimal text, return minimal text
- If the image is blank or contains no medical content, return what you actually see

Extract exactly what text appears in this image:"""
                            },
                            {
                                "type": "image_url",
                                "image_url": {
                                    "url": f"data:image/jpeg;base64,{b64_data}"
                                }
                            }
                        ]
                    }
                ],
                "max_tokens": 8000,
                "temperature": 0.0
            }
            
            print(f"πŸ“ Request payload size: {len(json.dumps(request_payload))} chars")
            
            async with httpx.AsyncClient(timeout=180.0) as client:  # Same timeout as app
                print("πŸš€ Sending exact app request...")
                
                response = await client.post(
                    "https://api.mistral.ai/v1/chat/completions",  # Exact URL from app
                    headers={
                        "Authorization": f"Bearer {self.api_key}",
                        "Content-Type": "application/json"
                    },
                    json=request_payload
                )
                
                print(f"πŸ“Š App Format Status: {response.status_code}")
                print(f"πŸ“ Response Size: {len(response.content)} bytes")
                print(f"πŸ•’ Response Headers: {dict(response.headers)}")
                
                if response.status_code == 200:
                    result = response.json()
                    content = result.get('choices', [{}])[0].get('message', {}).get('content', 'N/A')
                    print(f"βœ… Exact app request works!")
                    print(f"πŸ“ Extracted text: {content[:200]}...")
                    self.test_results["app_request"] = "βœ… PASS - App format works perfectly"
                    
                    # This is the smoking gun - if this works, the app should work too
                    print("\n🚨 CRITICAL: This exact request format WORKS!")
                    print("🚨 The main app should be using Mistral API successfully!")
                    print("🚨 Check app logs for why it's falling back to multimodal processor!")
                    
                else:
                    error_detail = response.text[:300]
                    print(f"❌ App request format failed: {error_detail}")
                    self.test_results["app_request"] = f"❌ FAIL - {response.status_code}: {error_detail}"
                    
        except Exception as e:
            print(f"❌ App request test failed: {e}")
            self.test_results["app_request"] = f"❌ FAIL - {type(e).__name__}: {e}"

    async def test_5_environment_check(self):
        """Test 5: Environment and configuration check"""
        print("\n🌍 TEST 5: Environment Check")
        print("-" * 30)
        
        # Check environment variables
        env_vars = {
            "MISTRAL_API_KEY": os.getenv("MISTRAL_API_KEY"),
            "USE_MISTRAL_FALLBACK": os.getenv("USE_MISTRAL_FALLBACK"),
            "USE_MULTIMODAL_FALLBACK": os.getenv("USE_MULTIMODAL_FALLBACK"),
            "PYTHONPATH": os.getenv("PYTHONPATH"),
        }
        
        print("πŸ“‹ Environment Variables:")
        for key, value in env_vars.items():
            if key == "MISTRAL_API_KEY" and value:
                print(f"  {key}: {value[:8]}...{value[-4:]}")
            else:
                print(f"  {key}: {value}")
        
        # Check if we're in Docker
        in_docker = os.path.exists('/.dockerenv') or os.path.exists('/proc/1/cgroup')
        print(f"🐳 Docker Environment: {'Yes' if in_docker else 'No'}")
        
        # Check Python environment
        print(f"🐍 Python Version: {sys.version}")
        print(f"πŸ“ Working Directory: {os.getcwd()}")
        
        # Check required libraries
        try:
            import httpx
            print(f"πŸ“¦ httpx version: {httpx.__version__}")
        except ImportError:
            print("❌ httpx not available")
        
        # Check if main app files exist
        app_files = ["src/file_processor.py", "src/workflow_orchestrator.py", ".env"]
        print("\nπŸ“ App Files:")
        for file in app_files:
            exists = Path(file).exists()
            print(f"  {file}: {'βœ… Exists' if exists else '❌ Missing'}")
        
        self.test_results["environment"] = "βœ… Environment checked"

    def generate_report(self):
        """Generate comprehensive diagnostic report"""
        print("\n" + "=" * 70)
        print("πŸ“Š DIAGNOSTIC REPORT")
        print("=" * 70)
        
        print(f"⏰ Test completed: {datetime.now().isoformat()}")
        print(f"πŸ”‘ API Key: {'Present' if self.api_key else 'Missing'}")
        
        print("\nπŸ§ͺ Test Results:")
        for test_name, result in self.test_results.items():
            print(f"  {test_name.replace('_', ' ').title()}: {result}")
        
        # Analysis and recommendations
        print("\nπŸ” ANALYSIS:")
        
        connectivity_ok = "βœ… PASS" in self.test_results.get("connectivity", "")
        auth_ok = "βœ… PASS" in self.test_results.get("authentication", "")
        vision_ok = "βœ… PASS" in self.test_results.get("vision_model", "")
        app_format_ok = "βœ… PASS" in self.test_results.get("app_request", "")
        
        if not connectivity_ok:
            print("❌ NETWORK ISSUE: Cannot reach Mistral API servers")
            print("   β†’ Check firewall, DNS, or network connectivity")
        elif not auth_ok:
            print("❌ AUTHENTICATION ISSUE: API key is invalid")
            print("   β†’ Verify API key in Mistral dashboard")
        elif not vision_ok:
            print("❌ MODEL ISSUE: Vision model unavailable or incorrect")
            print("   β†’ Check if pixtral-12b-2409 model exists")
        elif app_format_ok:
            print("🚨 CRITICAL FINDING: Mistral API works perfectly!")
            print("   β†’ The main app SHOULD be working")
            print("   β†’ Issue is in the app's error handling or fallback logic")
            print("   β†’ Check app logs for silent failures")
        else:
            print("❓ UNKNOWN ISSUE: API reachable but requests failing")
            print("   β†’ Check request format or API changes")
        
        print("\n🎯 NEXT STEPS:")
        if app_format_ok:
            print("1. Check main app logs for 'mistral_fallback_failed' events")
            print("2. Add more detailed error logging in _extract_with_mistral()")
            print("3. Verify environment variables in Docker container")
            print("4. Check if multimodal fallback is masking Mistral errors")
        else:
            print("1. Fix the identified API issues above")
            print("2. Re-run this test script")
            print("3. Test the main application after fixes")

async def main():
    """Run all diagnostic tests"""
    tester = MistralAPITester()
    
    # Run all tests in sequence
    await tester.test_1_basic_connectivity()
    await tester.test_2_authentication()
    await tester.test_3_vision_model_availability()
    await tester.test_4_exact_app_request()
    await tester.test_5_environment_check()
    
    # Generate final report
    tester.generate_report()

if __name__ == "__main__":
    # Load environment variables from .env file if present
    env_file = Path(".env")
    if env_file.exists():
        print(f"πŸ“„ Loading environment from {env_file}")
        with open(env_file) as f:
            for line in f:
                if line.strip() and not line.startswith('#'):
                    key, _, value = line.partition('=')
                    os.environ[key.strip()] = value.strip()
    
    # Run the diagnostic tests
    asyncio.run(main())