MoraxCheng commited on
Commit
01b4983
·
1 Parent(s): 3c96d15

Enhance URL handling in transformers by patching multiple functions for comprehensive URL fixing; improve requests.get handling for malformed URLs

Browse files
Files changed (1) hide show
  1. app.py +50 -2
app.py CHANGED
@@ -23,15 +23,20 @@ def patch_transformers_url():
23
  """Fix URL scheme issue in transformers 4.17.0 with comprehensive URL handling"""
24
  try:
25
  import transformers.file_utils
 
 
 
26
  original_get_from_cache = transformers.file_utils.get_from_cache
27
 
28
  def patched_get_from_cache(url, *args, **kwargs):
29
  # Comprehensive URL fixing for various formats
30
  if isinstance(url, str):
 
31
  # Handle different types of malformed URLs
32
- if url.startswith('/api/'):
33
  # Fix relative API URLs - ensure proper base URL
34
  url = 'https://huggingface.co' + url
 
35
  elif url.startswith('//'):
36
  # Fix protocol-relative URLs
37
  url = 'https:' + url
@@ -67,8 +72,51 @@ def patch_transformers_url():
67
  print(f"All download attempts failed for {url}: {e}")
68
  raise
69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  transformers.file_utils.get_from_cache = patched_get_from_cache
71
- print("Applied enhanced URL patch for transformers")
72
  except Exception as e:
73
  print(f"Warning: Could not patch transformers URL handling: {e}")
74
 
 
23
  """Fix URL scheme issue in transformers 4.17.0 with comprehensive URL handling"""
24
  try:
25
  import transformers.file_utils
26
+ import requests
27
+
28
+ # Patch multiple functions for comprehensive URL fixing
29
  original_get_from_cache = transformers.file_utils.get_from_cache
30
 
31
  def patched_get_from_cache(url, *args, **kwargs):
32
  # Comprehensive URL fixing for various formats
33
  if isinstance(url, str):
34
+ original_url = url
35
  # Handle different types of malformed URLs
36
+ if url.startswith('/api/resolve-cache/') or url.startswith('/api/'):
37
  # Fix relative API URLs - ensure proper base URL
38
  url = 'https://huggingface.co' + url
39
+ print(f"Fixed relative API URL: {original_url} -> {url}")
40
  elif url.startswith('//'):
41
  # Fix protocol-relative URLs
42
  url = 'https:' + url
 
72
  print(f"All download attempts failed for {url}: {e}")
73
  raise
74
 
75
+ # Also patch cached_path function which might be causing the issue
76
+ if hasattr(transformers.file_utils, 'cached_path'):
77
+ original_cached_path = transformers.file_utils.cached_path
78
+
79
+ def patched_cached_path(url_or_filename, *args, **kwargs):
80
+ if isinstance(url_or_filename, str):
81
+ if url_or_filename.startswith('/api/resolve-cache/') or url_or_filename.startswith('/api/'):
82
+ url_or_filename = 'https://huggingface.co' + url_or_filename
83
+ print(f"Fixed cached_path URL: {url_or_filename}")
84
+ return original_cached_path(url_or_filename, *args, **kwargs)
85
+
86
+ transformers.file_utils.cached_path = patched_cached_path
87
+
88
+ # Patch http_get function to handle malformed URLs at the lowest level
89
+ if hasattr(transformers.file_utils, 'http_get'):
90
+ original_http_get = transformers.file_utils.http_get
91
+
92
+ def patched_http_get(url, *args, **kwargs):
93
+ if isinstance(url, str):
94
+ if url.startswith('/api/resolve-cache/') or url.startswith('/api/'):
95
+ url = 'https://huggingface.co' + url
96
+ print(f"Fixed http_get URL: {url}")
97
+ return original_http_get(url, *args, **kwargs)
98
+
99
+ transformers.file_utils.http_get = patched_http_get
100
+
101
+ # Patch requests.get at the lowest level to catch any remaining malformed URLs
102
+ original_requests_get = requests.get
103
+
104
+ def patched_requests_get(url, *args, **kwargs):
105
+ if isinstance(url, str):
106
+ if url.startswith('/api/resolve-cache/') or url.startswith('/api/'):
107
+ original_url = url
108
+ url = 'https://huggingface.co' + url
109
+ print(f"Fixed requests.get URL: {original_url} -> {url}")
110
+ elif not url.startswith(('http://', 'https://', 'ftp://')):
111
+ if url.startswith('/'):
112
+ url = 'https://huggingface.co' + url
113
+ print(f"Fixed relative URL in requests.get: {url}")
114
+ return original_requests_get(url, *args, **kwargs)
115
+
116
+ requests.get = patched_requests_get
117
+
118
  transformers.file_utils.get_from_cache = patched_get_from_cache
119
+ print("Applied comprehensive URL patch for transformers and requests")
120
  except Exception as e:
121
  print(f"Warning: Could not patch transformers URL handling: {e}")
122