mehhl commited on
Commit
09826d0
·
verified ·
1 Parent(s): 15eb7f7

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. temp_venv/lib/python3.13/site-packages/IPython/core/alias.py +268 -0
  2. temp_venv/lib/python3.13/site-packages/IPython/core/async_helpers.py +155 -0
  3. temp_venv/lib/python3.13/site-packages/IPython/core/completerlib.py +382 -0
  4. temp_venv/lib/python3.13/site-packages/IPython/core/display_trap.py +74 -0
  5. temp_venv/lib/python3.13/site-packages/IPython/core/error.py +60 -0
  6. temp_venv/lib/python3.13/site-packages/IPython/core/historyapp.py +158 -0
  7. temp_venv/lib/python3.13/site-packages/IPython/core/hooks.py +158 -0
  8. temp_venv/lib/python3.13/site-packages/IPython/core/magic.py +760 -0
  9. temp_venv/lib/python3.13/site-packages/IPython/core/prefilter.py +707 -0
  10. temp_venv/lib/python3.13/site-packages/IPython/core/profiledir.py +244 -0
  11. temp_venv/lib/python3.13/site-packages/IPython/core/release.py +45 -0
  12. temp_venv/lib/python3.13/site-packages/charset_normalizer-3.4.2.dist-info/licenses/LICENSE +21 -0
  13. temp_venv/lib/python3.13/site-packages/executing/__pycache__/__init__.cpython-313.pyc +0 -0
  14. temp_venv/lib/python3.13/site-packages/executing/__pycache__/_exceptions.cpython-313.pyc +0 -0
  15. temp_venv/lib/python3.13/site-packages/executing/__pycache__/_position_node_finder.cpython-313.pyc +0 -0
  16. temp_venv/lib/python3.13/site-packages/executing/__pycache__/_pytest_utils.cpython-313.pyc +0 -0
  17. temp_venv/lib/python3.13/site-packages/executing/__pycache__/executing.cpython-313.pyc +0 -0
  18. temp_venv/lib/python3.13/site-packages/executing/__pycache__/version.cpython-313.pyc +0 -0
  19. temp_venv/lib/python3.13/site-packages/fsspec-2025.3.2.dist-info/licenses/LICENSE +29 -0
  20. temp_venv/lib/python3.13/site-packages/fsspec/implementations/__init__.py +0 -0
  21. temp_venv/lib/python3.13/site-packages/fsspec/implementations/arrow.py +304 -0
  22. temp_venv/lib/python3.13/site-packages/fsspec/implementations/asyn_wrapper.py +103 -0
  23. temp_venv/lib/python3.13/site-packages/fsspec/implementations/cache_mapper.py +75 -0
  24. temp_venv/lib/python3.13/site-packages/fsspec/implementations/cache_metadata.py +232 -0
  25. temp_venv/lib/python3.13/site-packages/fsspec/implementations/cached.py +941 -0
  26. temp_venv/lib/python3.13/site-packages/fsspec/implementations/dask.py +152 -0
  27. temp_venv/lib/python3.13/site-packages/fsspec/implementations/data.py +58 -0
  28. temp_venv/lib/python3.13/site-packages/fsspec/implementations/dbfs.py +467 -0
  29. temp_venv/lib/python3.13/site-packages/fsspec/implementations/dirfs.py +388 -0
  30. temp_venv/lib/python3.13/site-packages/fsspec/implementations/ftp.py +395 -0
  31. temp_venv/lib/python3.13/site-packages/fsspec/implementations/git.py +115 -0
  32. temp_venv/lib/python3.13/site-packages/fsspec/implementations/github.py +267 -0
  33. temp_venv/lib/python3.13/site-packages/fsspec/implementations/http.py +880 -0
  34. temp_venv/lib/python3.13/site-packages/fsspec/implementations/http_sync.py +931 -0
  35. temp_venv/lib/python3.13/site-packages/fsspec/implementations/jupyter.py +124 -0
  36. temp_venv/lib/python3.13/site-packages/fsspec/implementations/libarchive.py +213 -0
  37. temp_venv/lib/python3.13/site-packages/fsspec/implementations/local.py +477 -0
  38. temp_venv/lib/python3.13/site-packages/fsspec/implementations/memory.py +312 -0
  39. temp_venv/lib/python3.13/site-packages/fsspec/implementations/reference.py +1305 -0
  40. temp_venv/lib/python3.13/site-packages/fsspec/implementations/sftp.py +180 -0
  41. temp_venv/lib/python3.13/site-packages/fsspec/implementations/smb.py +416 -0
  42. temp_venv/lib/python3.13/site-packages/fsspec/implementations/tar.py +124 -0
  43. temp_venv/lib/python3.13/site-packages/fsspec/implementations/webhdfs.py +485 -0
  44. temp_venv/lib/python3.13/site-packages/fsspec/implementations/zip.py +177 -0
  45. temp_venv/lib/python3.13/site-packages/fsspec/tests/abstract/__init__.py +289 -0
  46. temp_venv/lib/python3.13/site-packages/fsspec/tests/abstract/common.py +175 -0
  47. temp_venv/lib/python3.13/site-packages/fsspec/tests/abstract/copy.py +557 -0
  48. temp_venv/lib/python3.13/site-packages/fsspec/tests/abstract/get.py +587 -0
  49. temp_venv/lib/python3.13/site-packages/fsspec/tests/abstract/mv.py +57 -0
  50. temp_venv/lib/python3.13/site-packages/fsspec/tests/abstract/open.py +11 -0
temp_venv/lib/python3.13/site-packages/IPython/core/alias.py ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # encoding: utf-8
2
+ """
3
+ System command aliases.
4
+
5
+ Authors:
6
+
7
+ * Fernando Perez
8
+ * Brian Granger
9
+ """
10
+
11
+ #-----------------------------------------------------------------------------
12
+ # Copyright (C) 2008-2011 The IPython Development Team
13
+ #
14
+ # Distributed under the terms of the BSD License.
15
+ #
16
+ # The full license is in the file COPYING.txt, distributed with this software.
17
+ #-----------------------------------------------------------------------------
18
+
19
+ #-----------------------------------------------------------------------------
20
+ # Imports
21
+ #-----------------------------------------------------------------------------
22
+
23
+ import os
24
+ import re
25
+ import sys
26
+
27
+ from traitlets.config.configurable import Configurable
28
+ from .error import UsageError
29
+
30
+ from traitlets import List, Instance
31
+ from logging import error
32
+
33
+ import typing as t
34
+
35
+
36
+ #-----------------------------------------------------------------------------
37
+ # Utilities
38
+ #-----------------------------------------------------------------------------
39
+
40
+ # This is used as the pattern for calls to split_user_input.
41
+ shell_line_split = re.compile(r'^(\s*)()(\S+)(.*$)')
42
+
43
+ def default_aliases() -> t.List[t.Tuple[str, str]]:
44
+ """Return list of shell aliases to auto-define.
45
+ """
46
+ # Note: the aliases defined here should be safe to use on a kernel
47
+ # regardless of what frontend it is attached to. Frontends that use a
48
+ # kernel in-process can define additional aliases that will only work in
49
+ # their case. For example, things like 'less' or 'clear' that manipulate
50
+ # the terminal should NOT be declared here, as they will only work if the
51
+ # kernel is running inside a true terminal, and not over the network.
52
+
53
+ if os.name == 'posix':
54
+ default_aliases = [('mkdir', 'mkdir'), ('rmdir', 'rmdir'),
55
+ ('mv', 'mv'), ('rm', 'rm'), ('cp', 'cp'),
56
+ ('cat', 'cat'),
57
+ ]
58
+ # Useful set of ls aliases. The GNU and BSD options are a little
59
+ # different, so we make aliases that provide as similar as possible
60
+ # behavior in ipython, by passing the right flags for each platform
61
+ if sys.platform.startswith('linux'):
62
+ ls_aliases = [('ls', 'ls -F --color'),
63
+ # long ls
64
+ ('ll', 'ls -F -o --color'),
65
+ # ls normal files only
66
+ ('lf', 'ls -F -o --color %l | grep ^-'),
67
+ # ls symbolic links
68
+ ('lk', 'ls -F -o --color %l | grep ^l'),
69
+ # directories or links to directories,
70
+ ('ldir', 'ls -F -o --color %l | grep /$'),
71
+ # things which are executable
72
+ ('lx', 'ls -F -o --color %l | grep ^-..x'),
73
+ ]
74
+ elif sys.platform.startswith('openbsd') or sys.platform.startswith('netbsd'):
75
+ # OpenBSD, NetBSD. The ls implementation on these platforms do not support
76
+ # the -G switch and lack the ability to use colorized output.
77
+ ls_aliases = [('ls', 'ls -F'),
78
+ # long ls
79
+ ('ll', 'ls -F -l'),
80
+ # ls normal files only
81
+ ('lf', 'ls -F -l %l | grep ^-'),
82
+ # ls symbolic links
83
+ ('lk', 'ls -F -l %l | grep ^l'),
84
+ # directories or links to directories,
85
+ ('ldir', 'ls -F -l %l | grep /$'),
86
+ # things which are executable
87
+ ('lx', 'ls -F -l %l | grep ^-..x'),
88
+ ]
89
+ else:
90
+ # BSD, OSX, etc.
91
+ ls_aliases = [('ls', 'ls -F -G'),
92
+ # long ls
93
+ ('ll', 'ls -F -l -G'),
94
+ # ls normal files only
95
+ ('lf', 'ls -F -l -G %l | grep ^-'),
96
+ # ls symbolic links
97
+ ('lk', 'ls -F -l -G %l | grep ^l'),
98
+ # directories or links to directories,
99
+ ('ldir', 'ls -F -G -l %l | grep /$'),
100
+ # things which are executable
101
+ ('lx', 'ls -F -l -G %l | grep ^-..x'),
102
+ ]
103
+ default_aliases = default_aliases + ls_aliases
104
+ elif os.name in ['nt', 'dos']:
105
+ default_aliases = [('ls', 'dir /on'),
106
+ ('ddir', 'dir /ad /on'), ('ldir', 'dir /ad /on'),
107
+ ('mkdir', 'mkdir'), ('rmdir', 'rmdir'),
108
+ ('echo', 'echo'), ('ren', 'ren'), ('copy', 'copy'),
109
+ ]
110
+ else:
111
+ default_aliases = []
112
+
113
+ return default_aliases
114
+
115
+
116
+ class AliasError(Exception):
117
+ pass
118
+
119
+
120
+ class InvalidAliasError(AliasError):
121
+ pass
122
+
123
+
124
+ class Alias:
125
+ """Callable object storing the details of one alias.
126
+
127
+ Instances are registered as magic functions to allow use of aliases.
128
+ """
129
+
130
+ # Prepare blacklist
131
+ blacklist = {'cd','popd','pushd','dhist','alias','unalias'}
132
+
133
+ def __init__(self, shell, name, cmd):
134
+ self.shell = shell
135
+ self.name = name
136
+ self.cmd = cmd
137
+ self.__doc__ = "Alias for `!{}`".format(cmd)
138
+ self.nargs = self.validate()
139
+
140
+ def validate(self):
141
+ """Validate the alias, and return the number of arguments."""
142
+ if self.name in self.blacklist:
143
+ raise InvalidAliasError("The name %s can't be aliased "
144
+ "because it is a keyword or builtin." % self.name)
145
+ try:
146
+ caller = self.shell.magics_manager.magics['line'][self.name]
147
+ except KeyError:
148
+ pass
149
+ else:
150
+ if not isinstance(caller, Alias):
151
+ raise InvalidAliasError("The name %s can't be aliased "
152
+ "because it is another magic command." % self.name)
153
+
154
+ if not (isinstance(self.cmd, str)):
155
+ raise InvalidAliasError("An alias command must be a string, "
156
+ "got: %r" % self.cmd)
157
+
158
+ nargs = self.cmd.count('%s') - self.cmd.count('%%s')
159
+
160
+ if (nargs > 0) and (self.cmd.find('%l') >= 0):
161
+ raise InvalidAliasError('The %s and %l specifiers are mutually '
162
+ 'exclusive in alias definitions.')
163
+
164
+ return nargs
165
+
166
+ def __repr__(self):
167
+ return "<alias {} for {!r}>".format(self.name, self.cmd)
168
+
169
+ def __call__(self, rest=''):
170
+ cmd = self.cmd
171
+ nargs = self.nargs
172
+ # Expand the %l special to be the user's input line
173
+ if cmd.find('%l') >= 0:
174
+ cmd = cmd.replace('%l', rest)
175
+ rest = ''
176
+
177
+ if nargs==0:
178
+ if cmd.find('%%s') >= 1:
179
+ cmd = cmd.replace('%%s', '%s')
180
+ # Simple, argument-less aliases
181
+ cmd = '%s %s' % (cmd, rest)
182
+ else:
183
+ # Handle aliases with positional arguments
184
+ args = rest.split(None, nargs)
185
+ if len(args) < nargs:
186
+ raise UsageError('Alias <%s> requires %s arguments, %s given.' %
187
+ (self.name, nargs, len(args)))
188
+ cmd = '%s %s' % (cmd % tuple(args[:nargs]),' '.join(args[nargs:]))
189
+
190
+ self.shell.system(cmd)
191
+
192
+ #-----------------------------------------------------------------------------
193
+ # Main AliasManager class
194
+ #-----------------------------------------------------------------------------
195
+
196
+ class AliasManager(Configurable):
197
+ default_aliases: List = List(default_aliases()).tag(config=True)
198
+ user_aliases: List = List(default_value=[]).tag(config=True)
199
+ shell = Instance(
200
+ "IPython.core.interactiveshell.InteractiveShellABC", allow_none=True
201
+ )
202
+
203
+ def __init__(self, shell=None, **kwargs):
204
+ super(AliasManager, self).__init__(shell=shell, **kwargs)
205
+ # For convenient access
206
+ if self.shell is not None:
207
+ self.linemagics = self.shell.magics_manager.magics["line"]
208
+ self.init_aliases()
209
+
210
+ def init_aliases(self):
211
+ # Load default & user aliases
212
+ for name, cmd in self.default_aliases + self.user_aliases:
213
+ if (
214
+ cmd.startswith("ls ")
215
+ and self.shell is not None
216
+ and self.shell.colors == "nocolor"
217
+ ):
218
+ cmd = cmd.replace(" --color", "")
219
+ self.soft_define_alias(name, cmd)
220
+
221
+ @property
222
+ def aliases(self):
223
+ return [(n, func.cmd) for (n, func) in self.linemagics.items()
224
+ if isinstance(func, Alias)]
225
+
226
+ def soft_define_alias(self, name, cmd):
227
+ """Define an alias, but don't raise on an AliasError."""
228
+ try:
229
+ self.define_alias(name, cmd)
230
+ except AliasError as e:
231
+ error("Invalid alias: %s" % e)
232
+
233
+ def define_alias(self, name, cmd):
234
+ """Define a new alias after validating it.
235
+
236
+ This will raise an :exc:`AliasError` if there are validation
237
+ problems.
238
+ """
239
+ caller = Alias(shell=self.shell, name=name, cmd=cmd)
240
+ self.shell.magics_manager.register_function(caller, magic_kind='line',
241
+ magic_name=name)
242
+
243
+ def get_alias(self, name):
244
+ """Return an alias, or None if no alias by that name exists."""
245
+ aname = self.linemagics.get(name, None)
246
+ return aname if isinstance(aname, Alias) else None
247
+
248
+ def is_alias(self, name):
249
+ """Return whether or not a given name has been defined as an alias"""
250
+ return self.get_alias(name) is not None
251
+
252
+ def undefine_alias(self, name):
253
+ if self.is_alias(name):
254
+ del self.linemagics[name]
255
+ else:
256
+ raise ValueError('%s is not an alias' % name)
257
+
258
+ def clear_aliases(self):
259
+ for name, _ in self.aliases:
260
+ self.undefine_alias(name)
261
+
262
+ def retrieve_alias(self, name):
263
+ """Retrieve the command to which an alias expands."""
264
+ caller = self.get_alias(name)
265
+ if caller:
266
+ return caller.cmd
267
+ else:
268
+ raise ValueError('%s is not an alias' % name)
temp_venv/lib/python3.13/site-packages/IPython/core/async_helpers.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Async helper function that are invalid syntax on Python 3.5 and below.
3
+
4
+ This code is best effort, and may have edge cases not behaving as expected. In
5
+ particular it contain a number of heuristics to detect whether code is
6
+ effectively async and need to run in an event loop or not.
7
+
8
+ Some constructs (like top-level `return`, or `yield`) are taken care of
9
+ explicitly to actually raise a SyntaxError and stay as close as possible to
10
+ Python semantics.
11
+ """
12
+
13
+ import ast
14
+ import asyncio
15
+ import inspect
16
+ from functools import wraps
17
+
18
+ _asyncio_event_loop = None
19
+
20
+
21
+ def get_asyncio_loop():
22
+ """asyncio has deprecated get_event_loop
23
+
24
+ Replicate it here, with our desired semantics:
25
+
26
+ - always returns a valid, not-closed loop
27
+ - not thread-local like asyncio's,
28
+ because we only want one loop for IPython
29
+ - if called from inside a coroutine (e.g. in ipykernel),
30
+ return the running loop
31
+
32
+ .. versionadded:: 8.0
33
+ """
34
+ try:
35
+ return asyncio.get_running_loop()
36
+ except RuntimeError:
37
+ # not inside a coroutine,
38
+ # track our own global
39
+ pass
40
+
41
+ # not thread-local like asyncio's,
42
+ # because we only track one event loop to run for IPython itself,
43
+ # always in the main thread.
44
+ global _asyncio_event_loop
45
+ if _asyncio_event_loop is None or _asyncio_event_loop.is_closed():
46
+ _asyncio_event_loop = asyncio.new_event_loop()
47
+ return _asyncio_event_loop
48
+
49
+
50
+ class _AsyncIORunner:
51
+ def __call__(self, coro):
52
+ """
53
+ Handler for asyncio autoawait
54
+ """
55
+ return get_asyncio_loop().run_until_complete(coro)
56
+
57
+ def __str__(self):
58
+ return "asyncio"
59
+
60
+
61
+ _asyncio_runner = _AsyncIORunner()
62
+
63
+
64
+ class _AsyncIOProxy:
65
+ """Proxy-object for an asyncio
66
+
67
+ Any coroutine methods will be wrapped in event_loop.run_
68
+ """
69
+
70
+ def __init__(self, obj, event_loop):
71
+ self._obj = obj
72
+ self._event_loop = event_loop
73
+
74
+ def __repr__(self):
75
+ return f"<_AsyncIOProxy({self._obj!r})>"
76
+
77
+ def __getattr__(self, key):
78
+ attr = getattr(self._obj, key)
79
+ if inspect.iscoroutinefunction(attr):
80
+ # if it's a coroutine method,
81
+ # return a threadsafe wrapper onto the _current_ asyncio loop
82
+ @wraps(attr)
83
+ def _wrapped(*args, **kwargs):
84
+ concurrent_future = asyncio.run_coroutine_threadsafe(
85
+ attr(*args, **kwargs), self._event_loop
86
+ )
87
+ return asyncio.wrap_future(concurrent_future)
88
+
89
+ return _wrapped
90
+ else:
91
+ return attr
92
+
93
+ def __dir__(self):
94
+ return dir(self._obj)
95
+
96
+
97
+ def _curio_runner(coroutine):
98
+ """
99
+ handler for curio autoawait
100
+ """
101
+ import curio
102
+
103
+ return curio.run(coroutine)
104
+
105
+
106
+ def _trio_runner(async_fn):
107
+ import trio
108
+
109
+ async def loc(coro):
110
+ """
111
+ We need the dummy no-op async def to protect from
112
+ trio's internal. See https://github.com/python-trio/trio/issues/89
113
+ """
114
+ return await coro
115
+
116
+ return trio.run(loc, async_fn)
117
+
118
+
119
+ def _pseudo_sync_runner(coro):
120
+ """
121
+ A runner that does not really allow async execution, and just advance the coroutine.
122
+
123
+ See discussion in https://github.com/python-trio/trio/issues/608,
124
+
125
+ Credit to Nathaniel Smith
126
+ """
127
+ try:
128
+ coro.send(None)
129
+ except StopIteration as exc:
130
+ return exc.value
131
+ else:
132
+ # TODO: do not raise but return an execution result with the right info.
133
+ raise RuntimeError(
134
+ "{coro_name!r} needs a real async loop".format(coro_name=coro.__name__)
135
+ )
136
+
137
+
138
+ def _should_be_async(cell: str) -> bool:
139
+ """Detect if a block of code need to be wrapped in an `async def`
140
+
141
+ Attempt to parse the block of code, it it compile we're fine.
142
+ Otherwise we wrap if and try to compile.
143
+
144
+ If it works, assume it should be async. Otherwise Return False.
145
+
146
+ Not handled yet: If the block of code has a return statement as the top
147
+ level, it will be seen as async. This is a know limitation.
148
+ """
149
+ try:
150
+ code = compile(
151
+ cell, "<>", "exec", flags=getattr(ast, "PyCF_ALLOW_TOP_LEVEL_AWAIT", 0x0)
152
+ )
153
+ return inspect.CO_COROUTINE & code.co_flags == inspect.CO_COROUTINE
154
+ except (SyntaxError, MemoryError):
155
+ return False
temp_venv/lib/python3.13/site-packages/IPython/core/completerlib.py ADDED
@@ -0,0 +1,382 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # encoding: utf-8
2
+ """Implementations for various useful completers.
3
+
4
+ These are all loaded by default by IPython.
5
+ """
6
+ #-----------------------------------------------------------------------------
7
+ # Copyright (C) 2010-2011 The IPython Development Team.
8
+ #
9
+ # Distributed under the terms of the BSD License.
10
+ #
11
+ # The full license is in the file COPYING.txt, distributed with this software.
12
+ #-----------------------------------------------------------------------------
13
+
14
+ #-----------------------------------------------------------------------------
15
+ # Imports
16
+ #-----------------------------------------------------------------------------
17
+
18
+ # Stdlib imports
19
+ import glob
20
+ import inspect
21
+ import os
22
+ import re
23
+ import sys
24
+ from importlib import import_module
25
+ from importlib.machinery import all_suffixes
26
+
27
+
28
+ # Third-party imports
29
+ from time import time
30
+ from zipimport import zipimporter
31
+
32
+ # Our own imports
33
+ from .completer import expand_user, compress_user
34
+ from .error import TryNext
35
+ from ..utils._process_common import arg_split
36
+
37
+ # FIXME: this should be pulled in with the right call via the component system
38
+ from IPython import get_ipython
39
+
40
+ from typing import List
41
+
42
+ #-----------------------------------------------------------------------------
43
+ # Globals and constants
44
+ #-----------------------------------------------------------------------------
45
+ _suffixes = all_suffixes()
46
+
47
+ # Time in seconds after which the rootmodules will be stored permanently in the
48
+ # ipython ip.db database (kept in the user's .ipython dir).
49
+ TIMEOUT_STORAGE = 2
50
+
51
+ # Time in seconds after which we give up
52
+ TIMEOUT_GIVEUP = 20
53
+
54
+ # Regular expression for the python import statement
55
+ import_re = re.compile(r'(?P<name>[^\W\d]\w*?)'
56
+ r'(?P<package>[/\\]__init__)?'
57
+ r'(?P<suffix>%s)$' %
58
+ r'|'.join(re.escape(s) for s in _suffixes))
59
+
60
+ # RE for the ipython %run command (python + ipython scripts)
61
+ magic_run_re = re.compile(r'.*(\.ipy|\.ipynb|\.py[w]?)$')
62
+
63
+ #-----------------------------------------------------------------------------
64
+ # Local utilities
65
+ #-----------------------------------------------------------------------------
66
+
67
+
68
+ def module_list(path: str) -> List[str]:
69
+ """
70
+ Return the list containing the names of the modules available in the given
71
+ folder.
72
+ """
73
+ # sys.path has the cwd as an empty string, but isdir/listdir need it as '.'
74
+ if path == '':
75
+ path = '.'
76
+
77
+ # A few local constants to be used in loops below
78
+ pjoin = os.path.join
79
+
80
+ if os.path.isdir(path):
81
+ # Build a list of all files in the directory and all files
82
+ # in its subdirectories. For performance reasons, do not
83
+ # recurse more than one level into subdirectories.
84
+ files: List[str] = []
85
+ for root, dirs, nondirs in os.walk(path, followlinks=True):
86
+ subdir = root[len(path)+1:]
87
+ if subdir:
88
+ files.extend(pjoin(subdir, f) for f in nondirs)
89
+ dirs[:] = [] # Do not recurse into additional subdirectories.
90
+ else:
91
+ files.extend(nondirs)
92
+
93
+ else:
94
+ try:
95
+ files = list(zipimporter(path)._files.keys()) # type: ignore
96
+ except Exception:
97
+ files = []
98
+
99
+ # Build a list of modules which match the import_re regex.
100
+ modules = []
101
+ for f in files:
102
+ m = import_re.match(f)
103
+ if m:
104
+ modules.append(m.group('name'))
105
+ return list(set(modules))
106
+
107
+
108
+ def get_root_modules():
109
+ """
110
+ Returns a list containing the names of all the modules available in the
111
+ folders of the pythonpath.
112
+
113
+ ip.db['rootmodules_cache'] maps sys.path entries to list of modules.
114
+ """
115
+ ip = get_ipython()
116
+ if ip is None:
117
+ # No global shell instance to store cached list of modules.
118
+ # Don't try to scan for modules every time.
119
+ return list(sys.builtin_module_names)
120
+
121
+ if getattr(ip.db, "_mock", False):
122
+ rootmodules_cache = {}
123
+ else:
124
+ rootmodules_cache = ip.db.get("rootmodules_cache", {})
125
+ rootmodules = list(sys.builtin_module_names)
126
+ start_time = time()
127
+ store = False
128
+ for path in sys.path:
129
+ try:
130
+ modules = rootmodules_cache[path]
131
+ except KeyError:
132
+ modules = module_list(path)
133
+ try:
134
+ modules.remove('__init__')
135
+ except ValueError:
136
+ pass
137
+ if path not in ('', '.'): # cwd modules should not be cached
138
+ rootmodules_cache[path] = modules
139
+ if time() - start_time > TIMEOUT_STORAGE and not store:
140
+ store = True
141
+ print("\nCaching the list of root modules, please wait!")
142
+ print("(This will only be done once - type '%rehashx' to "
143
+ "reset cache!)\n")
144
+ sys.stdout.flush()
145
+ if time() - start_time > TIMEOUT_GIVEUP:
146
+ print("This is taking too long, we give up.\n")
147
+ return []
148
+ rootmodules.extend(modules)
149
+ if store:
150
+ ip.db['rootmodules_cache'] = rootmodules_cache
151
+ rootmodules = list(set(rootmodules))
152
+ return rootmodules
153
+
154
+
155
+ def is_importable(module, attr: str, only_modules) -> bool:
156
+ if only_modules:
157
+ try:
158
+ mod = getattr(module, attr)
159
+ except ModuleNotFoundError:
160
+ # See gh-14434
161
+ return False
162
+ return inspect.ismodule(mod)
163
+ else:
164
+ return not(attr[:2] == '__' and attr[-2:] == '__')
165
+
166
+ def is_possible_submodule(module, attr):
167
+ try:
168
+ obj = getattr(module, attr)
169
+ except AttributeError:
170
+ # Is possibly an unimported submodule
171
+ return True
172
+ except TypeError:
173
+ # https://github.com/ipython/ipython/issues/9678
174
+ return False
175
+ return inspect.ismodule(obj)
176
+
177
+
178
+ def try_import(mod: str, only_modules=False) -> List[str]:
179
+ """
180
+ Try to import given module and return list of potential completions.
181
+ """
182
+ mod = mod.rstrip('.')
183
+ try:
184
+ m = import_module(mod)
185
+ except:
186
+ return []
187
+
188
+ m_is_init = '__init__' in (getattr(m, '__file__', '') or '')
189
+
190
+ completions = []
191
+ if (not hasattr(m, '__file__')) or (not only_modules) or m_is_init:
192
+ completions.extend( [attr for attr in dir(m) if
193
+ is_importable(m, attr, only_modules)])
194
+
195
+ m_all = getattr(m, "__all__", [])
196
+ if only_modules:
197
+ completions.extend(attr for attr in m_all if is_possible_submodule(m, attr))
198
+ else:
199
+ completions.extend(m_all)
200
+
201
+ if m_is_init:
202
+ file_ = m.__file__
203
+ file_path = os.path.dirname(file_) # type: ignore
204
+ if file_path is not None:
205
+ completions.extend(module_list(file_path))
206
+ completions_set = {c for c in completions if isinstance(c, str)}
207
+ completions_set.discard('__init__')
208
+ return list(completions_set)
209
+
210
+
211
+ #-----------------------------------------------------------------------------
212
+ # Completion-related functions.
213
+ #-----------------------------------------------------------------------------
214
+
215
+ def quick_completer(cmd, completions):
216
+ r""" Easily create a trivial completer for a command.
217
+
218
+ Takes either a list of completions, or all completions in string (that will
219
+ be split on whitespace).
220
+
221
+ Example::
222
+
223
+ [d:\ipython]|1> import ipy_completers
224
+ [d:\ipython]|2> ipy_completers.quick_completer('foo', ['bar','baz'])
225
+ [d:\ipython]|3> foo b<TAB>
226
+ bar baz
227
+ [d:\ipython]|3> foo ba
228
+ """
229
+
230
+ if isinstance(completions, str):
231
+ completions = completions.split()
232
+
233
+ def do_complete(self, event):
234
+ return completions
235
+
236
+ get_ipython().set_hook('complete_command',do_complete, str_key = cmd)
237
+
238
+ def module_completion(line):
239
+ """
240
+ Returns a list containing the completion possibilities for an import line.
241
+
242
+ The line looks like this :
243
+ 'import xml.d'
244
+ 'from xml.dom import'
245
+ """
246
+
247
+ words = line.split(' ')
248
+ nwords = len(words)
249
+
250
+ # from whatever <tab> -> 'import '
251
+ if nwords == 3 and words[0] == 'from':
252
+ return ['import ']
253
+
254
+ # 'from xy<tab>' or 'import xy<tab>'
255
+ if nwords < 3 and (words[0] in {'%aimport', 'import', 'from'}) :
256
+ if nwords == 1:
257
+ return get_root_modules()
258
+ mod = words[1].split('.')
259
+ if len(mod) < 2:
260
+ return get_root_modules()
261
+ completion_list = try_import('.'.join(mod[:-1]), True)
262
+ return ['.'.join(mod[:-1] + [el]) for el in completion_list]
263
+
264
+ # 'from xyz import abc<tab>'
265
+ if nwords >= 3 and words[0] == 'from':
266
+ mod = words[1]
267
+ return try_import(mod)
268
+
269
+ #-----------------------------------------------------------------------------
270
+ # Completers
271
+ #-----------------------------------------------------------------------------
272
+ # These all have the func(self, event) signature to be used as custom
273
+ # completers
274
+
275
+ def module_completer(self,event):
276
+ """Give completions after user has typed 'import ...' or 'from ...'"""
277
+
278
+ # This works in all versions of python. While 2.5 has
279
+ # pkgutil.walk_packages(), that particular routine is fairly dangerous,
280
+ # since it imports *EVERYTHING* on sys.path. That is: a) very slow b) full
281
+ # of possibly problematic side effects.
282
+ # This search the folders in the sys.path for available modules.
283
+
284
+ return module_completion(event.line)
285
+
286
+ # FIXME: there's a lot of logic common to the run, cd and builtin file
287
+ # completers, that is currently reimplemented in each.
288
+
289
+ def magic_run_completer(self, event):
290
+ """Complete files that end in .py or .ipy or .ipynb for the %run command.
291
+ """
292
+ comps = arg_split(event.line, strict=False)
293
+ # relpath should be the current token that we need to complete.
294
+ if (len(comps) > 1) and (not event.line.endswith(' ')):
295
+ relpath = comps[-1].strip("'\"")
296
+ else:
297
+ relpath = ''
298
+
299
+ #print("\nev=", event) # dbg
300
+ #print("rp=", relpath) # dbg
301
+ #print('comps=', comps) # dbg
302
+
303
+ lglob = glob.glob
304
+ isdir = os.path.isdir
305
+ relpath, tilde_expand, tilde_val = expand_user(relpath)
306
+
307
+ # Find if the user has already typed the first filename, after which we
308
+ # should complete on all files, since after the first one other files may
309
+ # be arguments to the input script.
310
+
311
+ if any(magic_run_re.match(c) for c in comps):
312
+ matches = [f.replace('\\','/') + ('/' if isdir(f) else '')
313
+ for f in lglob(relpath+'*')]
314
+ else:
315
+ dirs = [f.replace('\\','/') + "/" for f in lglob(relpath+'*') if isdir(f)]
316
+ pys = [f.replace('\\','/')
317
+ for f in lglob(relpath+'*.py') + lglob(relpath+'*.ipy') +
318
+ lglob(relpath+'*.ipynb') + lglob(relpath + '*.pyw')]
319
+
320
+ matches = dirs + pys
321
+
322
+ #print('run comp:', dirs+pys) # dbg
323
+ return [compress_user(p, tilde_expand, tilde_val) for p in matches]
324
+
325
+
326
+ def cd_completer(self, event):
327
+ """Completer function for cd, which only returns directories."""
328
+ ip = get_ipython()
329
+ relpath = event.symbol
330
+
331
+ #print(event) # dbg
332
+ if event.line.endswith('-b') or ' -b ' in event.line:
333
+ # return only bookmark completions
334
+ bkms = self.db.get('bookmarks', None)
335
+ if bkms:
336
+ return bkms.keys()
337
+ else:
338
+ return []
339
+
340
+ if event.symbol == '-':
341
+ width_dh = str(len(str(len(ip.user_ns['_dh']) + 1)))
342
+ # jump in directory history by number
343
+ fmt = '-%0' + width_dh +'d [%s]'
344
+ ents = [ fmt % (i,s) for i,s in enumerate(ip.user_ns['_dh'])]
345
+ if len(ents) > 1:
346
+ return ents
347
+ return []
348
+
349
+ if event.symbol.startswith('--'):
350
+ return ["--" + os.path.basename(d) for d in ip.user_ns['_dh']]
351
+
352
+ # Expand ~ in path and normalize directory separators.
353
+ relpath, tilde_expand, tilde_val = expand_user(relpath)
354
+ relpath = relpath.replace('\\','/')
355
+
356
+ found = []
357
+ for d in [f.replace('\\','/') + '/' for f in glob.glob(relpath+'*')
358
+ if os.path.isdir(f)]:
359
+ if ' ' in d:
360
+ # we don't want to deal with any of that, complex code
361
+ # for this is elsewhere
362
+ raise TryNext
363
+
364
+ found.append(d)
365
+
366
+ if not found:
367
+ if os.path.isdir(relpath):
368
+ return [compress_user(relpath, tilde_expand, tilde_val)]
369
+
370
+ # if no completions so far, try bookmarks
371
+ bks = self.db.get('bookmarks',{})
372
+ bkmatches = [s for s in bks if s.startswith(event.symbol)]
373
+ if bkmatches:
374
+ return bkmatches
375
+
376
+ raise TryNext
377
+
378
+ return [compress_user(p, tilde_expand, tilde_val) for p in found]
379
+
380
+ def reset_completer(self, event):
381
+ "A completer for %reset magic"
382
+ return '-f -s in out array dhist'.split()
temp_venv/lib/python3.13/site-packages/IPython/core/display_trap.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # encoding: utf-8
2
+ """
3
+ A context manager for handling sys.displayhook.
4
+
5
+ Authors:
6
+
7
+ * Robert Kern
8
+ * Brian Granger
9
+ """
10
+
11
+ #-----------------------------------------------------------------------------
12
+ # Copyright (C) 2008-2011 The IPython Development Team
13
+ #
14
+ # Distributed under the terms of the BSD License. The full license is in
15
+ # the file COPYING, distributed as part of this software.
16
+ #-----------------------------------------------------------------------------
17
+
18
+ #-----------------------------------------------------------------------------
19
+ # Imports
20
+ #-----------------------------------------------------------------------------
21
+
22
+ import sys
23
+
24
+ from traitlets.config.configurable import Configurable
25
+ from traitlets import Any
26
+
27
+ #-----------------------------------------------------------------------------
28
+ # Classes and functions
29
+ #-----------------------------------------------------------------------------
30
+
31
+
32
+ class DisplayTrap(Configurable):
33
+ """Object to manage sys.displayhook.
34
+
35
+ This came from IPython.core.kernel.display_hook, but is simplified
36
+ (no callbacks or formatters) until more of the core is refactored.
37
+ """
38
+
39
+ hook = Any()
40
+
41
+ def __init__(self, hook=None):
42
+ super(DisplayTrap, self).__init__(hook=hook, config=None)
43
+ self.old_hook = None
44
+ # We define this to track if a single BuiltinTrap is nested.
45
+ # Only turn off the trap when the outermost call to __exit__ is made.
46
+ self._nested_level = 0
47
+
48
+ def __enter__(self):
49
+ if self._nested_level == 0:
50
+ self.set()
51
+ self._nested_level += 1
52
+ return self
53
+
54
+ def __exit__(self, type, value, traceback):
55
+ if self._nested_level == 1:
56
+ self.unset()
57
+ self._nested_level -= 1
58
+ # Returning False will cause exceptions to propagate
59
+ return False
60
+
61
+ @property
62
+ def is_active(self) -> bool:
63
+ return self._nested_level != 0
64
+
65
+ def set(self):
66
+ """Set the hook."""
67
+ if sys.displayhook is not self.hook:
68
+ self.old_hook = sys.displayhook
69
+ sys.displayhook = self.hook
70
+
71
+ def unset(self):
72
+ """Unset the hook."""
73
+ sys.displayhook = self.old_hook
74
+
temp_venv/lib/python3.13/site-packages/IPython/core/error.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # encoding: utf-8
2
+ """
3
+ Global exception classes for IPython.core.
4
+
5
+ Authors:
6
+
7
+ * Brian Granger
8
+ * Fernando Perez
9
+ * Min Ragan-Kelley
10
+
11
+ Notes
12
+ -----
13
+ """
14
+
15
+ #-----------------------------------------------------------------------------
16
+ # Copyright (C) 2008 The IPython Development Team
17
+ #
18
+ # Distributed under the terms of the BSD License. The full license is in
19
+ # the file COPYING, distributed as part of this software.
20
+ #-----------------------------------------------------------------------------
21
+
22
+ #-----------------------------------------------------------------------------
23
+ # Imports
24
+ #-----------------------------------------------------------------------------
25
+
26
+ #-----------------------------------------------------------------------------
27
+ # Exception classes
28
+ #-----------------------------------------------------------------------------
29
+
30
+ class IPythonCoreError(Exception):
31
+ pass
32
+
33
+
34
+ class TryNext(IPythonCoreError):
35
+ """Try next hook exception.
36
+
37
+ Raise this in your hook function to indicate that the next hook handler
38
+ should be used to handle the operation.
39
+ """
40
+
41
+ class UsageError(IPythonCoreError):
42
+ """Error in magic function arguments, etc.
43
+
44
+ Something that probably won't warrant a full traceback, but should
45
+ nevertheless interrupt a macro / batch file.
46
+ """
47
+
48
+ class StdinNotImplementedError(IPythonCoreError, NotImplementedError):
49
+ """raw_input was requested in a context where it is not supported
50
+
51
+ For use in IPython kernels, where only some frontends may support
52
+ stdin requests.
53
+ """
54
+
55
+ class InputRejected(Exception):
56
+ """Input rejected by ast transformer.
57
+
58
+ Raise this in your NodeTransformer to indicate that InteractiveShell should
59
+ not execute the supplied input.
60
+ """
temp_venv/lib/python3.13/site-packages/IPython/core/historyapp.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # encoding: utf-8
2
+ """
3
+ An application for managing IPython history.
4
+
5
+ To be invoked as the `ipython history` subcommand.
6
+ """
7
+
8
+ import sqlite3
9
+ from pathlib import Path
10
+
11
+ from traitlets.config.application import Application
12
+ from .application import BaseIPythonApplication
13
+ from traitlets import Bool, Int, Dict
14
+ from ..utils.io import ask_yes_no
15
+
16
+ trim_hist_help = """Trim the IPython history database to the last 1000 entries.
17
+
18
+ This actually copies the last 1000 entries to a new database, and then replaces
19
+ the old file with the new. Use the `--keep=` argument to specify a number
20
+ other than 1000.
21
+ """
22
+
23
+ clear_hist_help = """Clear the IPython history database, deleting all entries.
24
+
25
+ Because this is a destructive operation, IPython will prompt the user if they
26
+ really want to do this. Passing a `-f` flag will force clearing without a
27
+ prompt.
28
+
29
+ This is an handy alias to `ipython history trim --keep=0`
30
+ """
31
+
32
+
33
+ class HistoryTrim(BaseIPythonApplication):
34
+ description = trim_hist_help
35
+
36
+ backup = Bool(False, help="Keep the old history file as history.sqlite.<N>").tag(
37
+ config=True
38
+ )
39
+
40
+ keep = Int(1000, help="Number of recent lines to keep in the database.").tag(
41
+ config=True
42
+ )
43
+
44
+ flags = Dict( # type: ignore
45
+ dict(backup=({"HistoryTrim": {"backup": True}}, backup.help))
46
+ )
47
+
48
+ aliases = Dict(dict(keep="HistoryTrim.keep")) # type: ignore
49
+
50
+ def start(self):
51
+ profile_dir = Path(self.profile_dir.location)
52
+ hist_file = profile_dir / "history.sqlite"
53
+ con = sqlite3.connect(hist_file)
54
+
55
+ # Grab the recent history from the current database.
56
+ inputs = list(con.execute('SELECT session, line, source, source_raw FROM '
57
+ 'history ORDER BY session DESC, line DESC LIMIT ?', (self.keep+1,)))
58
+ if len(inputs) <= self.keep:
59
+ print("There are already at most %d entries in the history database." % self.keep)
60
+ print("Not doing anything. Use --keep= argument to keep fewer entries")
61
+ return
62
+
63
+ print("Trimming history to the most recent %d entries." % self.keep)
64
+
65
+ inputs.pop() # Remove the extra element we got to check the length.
66
+ inputs.reverse()
67
+ if inputs:
68
+ first_session = inputs[0][0]
69
+ outputs = list(con.execute('SELECT session, line, output FROM '
70
+ 'output_history WHERE session >= ?', (first_session,)))
71
+ sessions = list(con.execute('SELECT session, start, end, num_cmds, remark FROM '
72
+ 'sessions WHERE session >= ?', (first_session,)))
73
+ con.close()
74
+
75
+ # Create the new history database.
76
+ new_hist_file = profile_dir / "history.sqlite.new"
77
+ i = 0
78
+ while new_hist_file.exists():
79
+ # Make sure we don't interfere with an existing file.
80
+ i += 1
81
+ new_hist_file = profile_dir / ("history.sqlite.new" + str(i))
82
+ new_db = sqlite3.connect(new_hist_file)
83
+ new_db.execute("""CREATE TABLE IF NOT EXISTS sessions (session integer
84
+ primary key autoincrement, start timestamp,
85
+ end timestamp, num_cmds integer, remark text)""")
86
+ new_db.execute("""CREATE TABLE IF NOT EXISTS history
87
+ (session integer, line integer, source text, source_raw text,
88
+ PRIMARY KEY (session, line))""")
89
+ new_db.execute("""CREATE TABLE IF NOT EXISTS output_history
90
+ (session integer, line integer, output text,
91
+ PRIMARY KEY (session, line))""")
92
+ new_db.commit()
93
+
94
+
95
+ if inputs:
96
+ with new_db:
97
+ # Add the recent history into the new database.
98
+ new_db.executemany('insert into sessions values (?,?,?,?,?)', sessions)
99
+ new_db.executemany('insert into history values (?,?,?,?)', inputs)
100
+ new_db.executemany('insert into output_history values (?,?,?)', outputs)
101
+ new_db.close()
102
+
103
+ if self.backup:
104
+ i = 1
105
+ backup_hist_file = profile_dir / ("history.sqlite.old.%d" % i)
106
+ while backup_hist_file.exists():
107
+ i += 1
108
+ backup_hist_file = profile_dir / ("history.sqlite.old.%d" % i)
109
+ hist_file.rename(backup_hist_file)
110
+ print("Backed up longer history file to", backup_hist_file)
111
+ else:
112
+ hist_file.unlink()
113
+
114
+ new_hist_file.rename(hist_file)
115
+
116
+
117
+ class HistoryClear(HistoryTrim):
118
+ description = clear_hist_help
119
+ keep = Int(0, help="Number of recent lines to keep in the database.")
120
+
121
+ force = Bool(False, help="Don't prompt user for confirmation").tag(config=True)
122
+
123
+ flags = Dict( # type: ignore
124
+ dict(
125
+ force=({"HistoryClear": {"force": True}}, force.help),
126
+ f=({"HistoryTrim": {"force": True}}, force.help),
127
+ )
128
+ )
129
+ aliases = Dict() # type: ignore
130
+
131
+ def start(self):
132
+ if self.force or ask_yes_no(
133
+ "Really delete all ipython history? ", default="no", interrupt="no"
134
+ ):
135
+ HistoryTrim.start(self)
136
+
137
+
138
+ class HistoryApp(Application):
139
+ name = "ipython-history"
140
+ description = "Manage the IPython history database."
141
+
142
+ subcommands = Dict(dict(
143
+ trim = (HistoryTrim, HistoryTrim.description.splitlines()[0]),
144
+ clear = (HistoryClear, HistoryClear.description.splitlines()[0]),
145
+ ))
146
+
147
+ def start(self):
148
+ if self.subapp is None:
149
+ print(
150
+ "No subcommand specified. Must specify one of: "
151
+ + ", ".join(map(repr, self.subcommands))
152
+ + ".\n"
153
+ )
154
+ self.print_description()
155
+ self.print_subcommands()
156
+ self.exit(1)
157
+ else:
158
+ return self.subapp.start()
temp_venv/lib/python3.13/site-packages/IPython/core/hooks.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Hooks for IPython.
2
+
3
+ In Python, it is possible to overwrite any method of any object if you really
4
+ want to. But IPython exposes a few 'hooks', methods which are *designed* to
5
+ be overwritten by users for customization purposes. This module defines the
6
+ default versions of all such hooks, which get used by IPython if not
7
+ overridden by the user.
8
+
9
+ Hooks are simple functions, but they should be declared with ``self`` as their
10
+ first argument, because when activated they are registered into IPython as
11
+ instance methods. The self argument will be the IPython running instance
12
+ itself, so hooks have full access to the entire IPython object.
13
+
14
+ If you wish to define a new hook and activate it, you can make an :doc:`extension
15
+ </config/extensions/index>` or a :ref:`startup script <startup_files>`. For
16
+ example, you could use a startup file like this::
17
+
18
+ import os
19
+
20
+ def calljed(self,filename, linenum):
21
+ "My editor hook calls the jed editor directly."
22
+ print("Calling my own editor, jed ...")
23
+ if os.system('jed +%d %s' % (linenum,filename)) != 0:
24
+ raise TryNext()
25
+
26
+ def load_ipython_extension(ip):
27
+ ip.set_hook('editor', calljed)
28
+
29
+ """
30
+
31
+ #*****************************************************************************
32
+ # Copyright (C) 2005 Fernando Perez. <[email protected]>
33
+ #
34
+ # Distributed under the terms of the BSD License. The full license is in
35
+ # the file COPYING, distributed as part of this software.
36
+ #*****************************************************************************
37
+
38
+ import os
39
+ import subprocess
40
+ import sys
41
+
42
+ from .error import TryNext
43
+
44
+ # List here all the default hooks. For now it's just the editor functions
45
+ # but over time we'll move here all the public API for user-accessible things.
46
+
47
+ __all__ = [
48
+ "editor",
49
+ "synchronize_with_editor",
50
+ "show_in_pager",
51
+ "clipboard_get",
52
+ ]
53
+
54
+ def editor(self, filename, linenum=None, wait=True):
55
+ """Open the default editor at the given filename and linenumber.
56
+
57
+ This is IPython's default editor hook, you can use it as an example to
58
+ write your own modified one. To set your own editor function as the
59
+ new editor hook, call ip.set_hook('editor',yourfunc)."""
60
+
61
+ # IPython configures a default editor at startup by reading $EDITOR from
62
+ # the environment, and falling back on vi (unix) or notepad (win32).
63
+ editor = self.editor
64
+
65
+ # marker for at which line to open the file (for existing objects)
66
+ if linenum is None or editor=='notepad':
67
+ linemark = ''
68
+ else:
69
+ linemark = '+%d' % int(linenum)
70
+
71
+ # Enclose in quotes if necessary and legal
72
+ if ' ' in editor and os.path.isfile(editor) and editor[0] != '"':
73
+ editor = '"%s"' % editor
74
+
75
+ # Call the actual editor
76
+ proc = subprocess.Popen('%s %s %s' % (editor, linemark, filename),
77
+ shell=True)
78
+ if wait and proc.wait() != 0:
79
+ raise TryNext()
80
+
81
+
82
+ def synchronize_with_editor(self, filename, linenum, column):
83
+ pass
84
+
85
+
86
+ class CommandChainDispatcher:
87
+ """ Dispatch calls to a chain of commands until some func can handle it
88
+
89
+ Usage: instantiate, execute "add" to add commands (with optional
90
+ priority), execute normally via f() calling mechanism.
91
+
92
+ """
93
+ def __init__(self,commands=None):
94
+ if commands is None:
95
+ self.chain = []
96
+ else:
97
+ self.chain = commands
98
+
99
+
100
+ def __call__(self,*args, **kw):
101
+ """ Command chain is called just like normal func.
102
+
103
+ This will call all funcs in chain with the same args as were given to
104
+ this function, and return the result of first func that didn't raise
105
+ TryNext"""
106
+ last_exc = TryNext()
107
+ for prio,cmd in self.chain:
108
+ # print("prio",prio,"cmd",cmd) # dbg
109
+ try:
110
+ return cmd(*args, **kw)
111
+ except TryNext as exc:
112
+ last_exc = exc
113
+ # if no function will accept it, raise TryNext up to the caller
114
+ raise last_exc
115
+
116
+ def __str__(self):
117
+ return str(self.chain)
118
+
119
+ def add(self, func, priority=0):
120
+ """ Add a func to the cmd chain with given priority """
121
+ self.chain.append((priority, func))
122
+ self.chain.sort(key=lambda x: x[0])
123
+
124
+ def __iter__(self):
125
+ """ Return all objects in chain.
126
+
127
+ Handy if the objects are not callable.
128
+ """
129
+ return iter(self.chain)
130
+
131
+
132
+ def show_in_pager(self, data, start, screen_lines):
133
+ """ Run a string through pager """
134
+ # raising TryNext here will use the default paging functionality
135
+ raise TryNext
136
+
137
+
138
+
139
+ def clipboard_get(self):
140
+ """ Get text from the clipboard.
141
+ """
142
+ from ..lib.clipboard import (
143
+ osx_clipboard_get,
144
+ tkinter_clipboard_get,
145
+ win32_clipboard_get,
146
+ wayland_clipboard_get,
147
+ )
148
+ if sys.platform == 'win32':
149
+ chain = [win32_clipboard_get, tkinter_clipboard_get]
150
+ elif sys.platform == 'darwin':
151
+ chain = [osx_clipboard_get, tkinter_clipboard_get]
152
+ else:
153
+ chain = [wayland_clipboard_get, tkinter_clipboard_get]
154
+ dispatcher = CommandChainDispatcher()
155
+ for func in chain:
156
+ dispatcher.add(func)
157
+ text = dispatcher()
158
+ return text
temp_venv/lib/python3.13/site-packages/IPython/core/magic.py ADDED
@@ -0,0 +1,760 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # encoding: utf-8
2
+ """Magic functions for InteractiveShell.
3
+ """
4
+
5
+ #-----------------------------------------------------------------------------
6
+ # Copyright (C) 2001 Janko Hauser <[email protected]> and
7
+ # Copyright (C) 2001 Fernando Perez <[email protected]>
8
+ # Copyright (C) 2008 The IPython Development Team
9
+
10
+ # Distributed under the terms of the BSD License. The full license is in
11
+ # the file COPYING, distributed as part of this software.
12
+ #-----------------------------------------------------------------------------
13
+
14
+ import os
15
+ import re
16
+ import sys
17
+ from getopt import getopt, GetoptError
18
+
19
+ from traitlets.config.configurable import Configurable
20
+ from . import oinspect
21
+ from .error import UsageError
22
+ from .inputtransformer2 import ESC_MAGIC, ESC_MAGIC2
23
+ from ..utils.ipstruct import Struct
24
+ from ..utils.process import arg_split
25
+ from ..utils.text import dedent
26
+ from traitlets import Bool, Dict, Instance, observe
27
+ from logging import error
28
+
29
+ import typing as t
30
+
31
+ #-----------------------------------------------------------------------------
32
+ # Globals
33
+ #-----------------------------------------------------------------------------
34
+
35
+ # A dict we'll use for each class that has magics, used as temporary storage to
36
+ # pass information between the @line/cell_magic method decorators and the
37
+ # @magics_class class decorator, because the method decorators have no
38
+ # access to the class when they run. See for more details:
39
+ # http://stackoverflow.com/questions/2366713/can-a-python-decorator-of-an-instance-method-access-the-class
40
+
41
+ magics: t.Dict = dict(line={}, cell={})
42
+
43
+ magic_kinds = ('line', 'cell')
44
+ magic_spec = ('line', 'cell', 'line_cell')
45
+ magic_escapes = dict(line=ESC_MAGIC, cell=ESC_MAGIC2)
46
+
47
+ #-----------------------------------------------------------------------------
48
+ # Utility classes and functions
49
+ #-----------------------------------------------------------------------------
50
+
51
+ class Bunch: pass
52
+
53
+
54
+ def on_off(tag):
55
+ """Return an ON/OFF string for a 1/0 input. Simple utility function."""
56
+ return ['OFF','ON'][tag]
57
+
58
+
59
+ def compress_dhist(dh):
60
+ """Compress a directory history into a new one with at most 20 entries.
61
+
62
+ Return a new list made from the first and last 10 elements of dhist after
63
+ removal of duplicates.
64
+ """
65
+ head, tail = dh[:-10], dh[-10:]
66
+
67
+ newhead = []
68
+ done = set()
69
+ for h in head:
70
+ if h in done:
71
+ continue
72
+ newhead.append(h)
73
+ done.add(h)
74
+
75
+ return newhead + tail
76
+
77
+
78
+ def needs_local_scope(func):
79
+ """Decorator to mark magic functions which need to local scope to run."""
80
+ func.needs_local_scope = True
81
+ return func
82
+
83
+ #-----------------------------------------------------------------------------
84
+ # Class and method decorators for registering magics
85
+ #-----------------------------------------------------------------------------
86
+
87
+ def magics_class(cls):
88
+ """Class decorator for all subclasses of the main Magics class.
89
+
90
+ Any class that subclasses Magics *must* also apply this decorator, to
91
+ ensure that all the methods that have been decorated as line/cell magics
92
+ get correctly registered in the class instance. This is necessary because
93
+ when method decorators run, the class does not exist yet, so they
94
+ temporarily store their information into a module global. Application of
95
+ this class decorator copies that global data to the class instance and
96
+ clears the global.
97
+
98
+ Obviously, this mechanism is not thread-safe, which means that the
99
+ *creation* of subclasses of Magic should only be done in a single-thread
100
+ context. Instantiation of the classes has no restrictions. Given that
101
+ these classes are typically created at IPython startup time and before user
102
+ application code becomes active, in practice this should not pose any
103
+ problems.
104
+ """
105
+ cls.registered = True
106
+ cls.magics = dict(line = magics['line'],
107
+ cell = magics['cell'])
108
+ magics['line'] = {}
109
+ magics['cell'] = {}
110
+ return cls
111
+
112
+
113
+ def record_magic(dct, magic_kind, magic_name, func):
114
+ """Utility function to store a function as a magic of a specific kind.
115
+
116
+ Parameters
117
+ ----------
118
+ dct : dict
119
+ A dictionary with 'line' and 'cell' subdicts.
120
+ magic_kind : str
121
+ Kind of magic to be stored.
122
+ magic_name : str
123
+ Key to store the magic as.
124
+ func : function
125
+ Callable object to store.
126
+ """
127
+ if magic_kind == 'line_cell':
128
+ dct['line'][magic_name] = dct['cell'][magic_name] = func
129
+ else:
130
+ dct[magic_kind][magic_name] = func
131
+
132
+
133
+ def validate_type(magic_kind):
134
+ """Ensure that the given magic_kind is valid.
135
+
136
+ Check that the given magic_kind is one of the accepted spec types (stored
137
+ in the global `magic_spec`), raise ValueError otherwise.
138
+ """
139
+ if magic_kind not in magic_spec:
140
+ raise ValueError('magic_kind must be one of %s, %s given' %
141
+ magic_kinds, magic_kind)
142
+
143
+
144
+ # The docstrings for the decorator below will be fairly similar for the two
145
+ # types (method and function), so we generate them here once and reuse the
146
+ # templates below.
147
+ _docstring_template = \
148
+ """Decorate the given {0} as {1} magic.
149
+
150
+ The decorator can be used with or without arguments, as follows.
151
+
152
+ i) without arguments: it will create a {1} magic named as the {0} being
153
+ decorated::
154
+
155
+ @deco
156
+ def foo(...)
157
+
158
+ will create a {1} magic named `foo`.
159
+
160
+ ii) with one string argument: which will be used as the actual name of the
161
+ resulting magic::
162
+
163
+ @deco('bar')
164
+ def foo(...)
165
+
166
+ will create a {1} magic named `bar`.
167
+
168
+ To register a class magic use ``Interactiveshell.register_magic(class or instance)``.
169
+ """
170
+
171
+ # These two are decorator factories. While they are conceptually very similar,
172
+ # there are enough differences in the details that it's simpler to have them
173
+ # written as completely standalone functions rather than trying to share code
174
+ # and make a single one with convoluted logic.
175
+
176
+ def _method_magic_marker(magic_kind):
177
+ """Decorator factory for methods in Magics subclasses.
178
+ """
179
+
180
+ validate_type(magic_kind)
181
+
182
+ # This is a closure to capture the magic_kind. We could also use a class,
183
+ # but it's overkill for just that one bit of state.
184
+ def magic_deco(arg):
185
+ if callable(arg):
186
+ # "Naked" decorator call (just @foo, no args)
187
+ func = arg
188
+ name = func.__name__
189
+ retval = arg
190
+ record_magic(magics, magic_kind, name, name)
191
+ elif isinstance(arg, str):
192
+ # Decorator called with arguments (@foo('bar'))
193
+ name = arg
194
+ def mark(func, *a, **kw):
195
+ record_magic(magics, magic_kind, name, func.__name__)
196
+ return func
197
+ retval = mark
198
+ else:
199
+ raise TypeError("Decorator can only be called with "
200
+ "string or function")
201
+ return retval
202
+
203
+ # Ensure the resulting decorator has a usable docstring
204
+ magic_deco.__doc__ = _docstring_template.format('method', magic_kind)
205
+ return magic_deco
206
+
207
+
208
+ def _function_magic_marker(magic_kind):
209
+ """Decorator factory for standalone functions.
210
+ """
211
+ validate_type(magic_kind)
212
+
213
+ # This is a closure to capture the magic_kind. We could also use a class,
214
+ # but it's overkill for just that one bit of state.
215
+ def magic_deco(arg):
216
+ # Find get_ipython() in the caller's namespace
217
+ caller = sys._getframe(1)
218
+ for ns in ['f_locals', 'f_globals', 'f_builtins']:
219
+ get_ipython = getattr(caller, ns).get('get_ipython')
220
+ if get_ipython is not None:
221
+ break
222
+ else:
223
+ raise NameError('Decorator can only run in context where '
224
+ '`get_ipython` exists')
225
+
226
+ ip = get_ipython()
227
+
228
+ if callable(arg):
229
+ # "Naked" decorator call (just @foo, no args)
230
+ func = arg
231
+ name = func.__name__
232
+ ip.register_magic_function(func, magic_kind, name)
233
+ retval = arg
234
+ elif isinstance(arg, str):
235
+ # Decorator called with arguments (@foo('bar'))
236
+ name = arg
237
+ def mark(func, *a, **kw):
238
+ ip.register_magic_function(func, magic_kind, name)
239
+ return func
240
+ retval = mark
241
+ else:
242
+ raise TypeError("Decorator can only be called with "
243
+ "string or function")
244
+ return retval
245
+
246
+ # Ensure the resulting decorator has a usable docstring
247
+ ds = _docstring_template.format('function', magic_kind)
248
+
249
+ ds += dedent("""
250
+ Note: this decorator can only be used in a context where IPython is already
251
+ active, so that the `get_ipython()` call succeeds. You can therefore use
252
+ it in your startup files loaded after IPython initializes, but *not* in the
253
+ IPython configuration file itself, which is executed before IPython is
254
+ fully up and running. Any file located in the `startup` subdirectory of
255
+ your configuration profile will be OK in this sense.
256
+ """)
257
+
258
+ magic_deco.__doc__ = ds
259
+ return magic_deco
260
+
261
+
262
+ MAGIC_NO_VAR_EXPAND_ATTR = "_ipython_magic_no_var_expand"
263
+ MAGIC_OUTPUT_CAN_BE_SILENCED = "_ipython_magic_output_can_be_silenced"
264
+
265
+
266
+ def no_var_expand(magic_func):
267
+ """Mark a magic function as not needing variable expansion
268
+
269
+ By default, IPython interprets `{a}` or `$a` in the line passed to magics
270
+ as variables that should be interpolated from the interactive namespace
271
+ before passing the line to the magic function.
272
+ This is not always desirable, e.g. when the magic executes Python code
273
+ (%timeit, %time, etc.).
274
+ Decorate magics with `@no_var_expand` to opt-out of variable expansion.
275
+
276
+ .. versionadded:: 7.3
277
+ """
278
+ setattr(magic_func, MAGIC_NO_VAR_EXPAND_ATTR, True)
279
+ return magic_func
280
+
281
+
282
+ def output_can_be_silenced(magic_func):
283
+ """Mark a magic function so its output may be silenced.
284
+
285
+ The output is silenced if the Python code used as a parameter of
286
+ the magic ends in a semicolon, not counting a Python comment that can
287
+ follow it.
288
+ """
289
+ setattr(magic_func, MAGIC_OUTPUT_CAN_BE_SILENCED, True)
290
+ return magic_func
291
+
292
+ # Create the actual decorators for public use
293
+
294
+ # These three are used to decorate methods in class definitions
295
+ line_magic = _method_magic_marker('line')
296
+ cell_magic = _method_magic_marker('cell')
297
+ line_cell_magic = _method_magic_marker('line_cell')
298
+
299
+ # These three decorate standalone functions and perform the decoration
300
+ # immediately. They can only run where get_ipython() works
301
+ register_line_magic = _function_magic_marker('line')
302
+ register_cell_magic = _function_magic_marker('cell')
303
+ register_line_cell_magic = _function_magic_marker('line_cell')
304
+
305
+ #-----------------------------------------------------------------------------
306
+ # Core Magic classes
307
+ #-----------------------------------------------------------------------------
308
+
309
+ class MagicsManager(Configurable):
310
+ """Object that handles all magic-related functionality for IPython.
311
+ """
312
+ # Non-configurable class attributes
313
+
314
+ # A two-level dict, first keyed by magic type, then by magic function, and
315
+ # holding the actual callable object as value. This is the dict used for
316
+ # magic function dispatch
317
+ magics = Dict()
318
+ lazy_magics = Dict(
319
+ help="""
320
+ Mapping from magic names to modules to load.
321
+
322
+ This can be used in IPython/IPykernel configuration to declare lazy magics
323
+ that will only be imported/registered on first use.
324
+
325
+ For example::
326
+
327
+ c.MagicsManager.lazy_magics = {
328
+ "my_magic": "slow.to.import",
329
+ "my_other_magic": "also.slow",
330
+ }
331
+
332
+ On first invocation of `%my_magic`, `%%my_magic`, `%%my_other_magic` or
333
+ `%%my_other_magic`, the corresponding module will be loaded as an ipython
334
+ extensions as if you had previously done `%load_ext ipython`.
335
+
336
+ Magics names should be without percent(s) as magics can be both cell
337
+ and line magics.
338
+
339
+ Lazy loading happen relatively late in execution process, and
340
+ complex extensions that manipulate Python/IPython internal state or global state
341
+ might not support lazy loading.
342
+ """
343
+ ).tag(
344
+ config=True,
345
+ )
346
+
347
+ # A registry of the original objects that we've been given holding magics.
348
+ registry = Dict()
349
+
350
+ shell = Instance('IPython.core.interactiveshell.InteractiveShellABC', allow_none=True)
351
+
352
+ auto_magic = Bool(True, help=
353
+ "Automatically call line magics without requiring explicit % prefix"
354
+ ).tag(config=True)
355
+ @observe('auto_magic')
356
+ def _auto_magic_changed(self, change):
357
+ self.shell.automagic = change['new']
358
+
359
+ _auto_status = [
360
+ 'Automagic is OFF, % prefix IS needed for line magics.',
361
+ 'Automagic is ON, % prefix IS NOT needed for line magics.']
362
+
363
+ user_magics = Instance('IPython.core.magics.UserMagics', allow_none=True)
364
+
365
+ def __init__(self, shell=None, config=None, user_magics=None, **traits):
366
+
367
+ super(MagicsManager, self).__init__(shell=shell, config=config,
368
+ user_magics=user_magics, **traits)
369
+ self.magics = dict(line={}, cell={})
370
+ # Let's add the user_magics to the registry for uniformity, so *all*
371
+ # registered magic containers can be found there.
372
+ self.registry[user_magics.__class__.__name__] = user_magics
373
+
374
+ def auto_status(self):
375
+ """Return descriptive string with automagic status."""
376
+ return self._auto_status[self.auto_magic]
377
+
378
+ def lsmagic(self):
379
+ """Return a dict of currently available magic functions.
380
+
381
+ The return dict has the keys 'line' and 'cell', corresponding to the
382
+ two types of magics we support. Each value is a list of names.
383
+ """
384
+ return self.magics
385
+
386
+ def lsmagic_docs(self, brief=False, missing=''):
387
+ """Return dict of documentation of magic functions.
388
+
389
+ The return dict has the keys 'line' and 'cell', corresponding to the
390
+ two types of magics we support. Each value is a dict keyed by magic
391
+ name whose value is the function docstring. If a docstring is
392
+ unavailable, the value of `missing` is used instead.
393
+
394
+ If brief is True, only the first line of each docstring will be returned.
395
+ """
396
+ docs = {}
397
+ for m_type in self.magics:
398
+ m_docs = {}
399
+ for m_name, m_func in self.magics[m_type].items():
400
+ if m_func.__doc__:
401
+ if brief:
402
+ m_docs[m_name] = m_func.__doc__.split('\n', 1)[0]
403
+ else:
404
+ m_docs[m_name] = m_func.__doc__.rstrip()
405
+ else:
406
+ m_docs[m_name] = missing
407
+ docs[m_type] = m_docs
408
+ return docs
409
+
410
+ def register_lazy(self, name: str, fully_qualified_name: str):
411
+ """
412
+ Lazily register a magic via an extension.
413
+
414
+
415
+ Parameters
416
+ ----------
417
+ name : str
418
+ Name of the magic you wish to register.
419
+ fully_qualified_name :
420
+ Fully qualified name of the module/submodule that should be loaded
421
+ as an extensions when the magic is first called.
422
+ It is assumed that loading this extensions will register the given
423
+ magic.
424
+ """
425
+
426
+ self.lazy_magics[name] = fully_qualified_name
427
+
428
+ def register(self, *magic_objects):
429
+ """Register one or more instances of Magics.
430
+
431
+ Take one or more classes or instances of classes that subclass the main
432
+ `core.Magic` class, and register them with IPython to use the magic
433
+ functions they provide. The registration process will then ensure that
434
+ any methods that have decorated to provide line and/or cell magics will
435
+ be recognized with the `%x`/`%%x` syntax as a line/cell magic
436
+ respectively.
437
+
438
+ If classes are given, they will be instantiated with the default
439
+ constructor. If your classes need a custom constructor, you should
440
+ instanitate them first and pass the instance.
441
+
442
+ The provided arguments can be an arbitrary mix of classes and instances.
443
+
444
+ Parameters
445
+ ----------
446
+ *magic_objects : one or more classes or instances
447
+ """
448
+ # Start by validating them to ensure they have all had their magic
449
+ # methods registered at the instance level
450
+ for m in magic_objects:
451
+ if not m.registered:
452
+ raise ValueError("Class of magics %r was constructed without "
453
+ "the @register_magics class decorator")
454
+ if isinstance(m, type):
455
+ # If we're given an uninstantiated class
456
+ m = m(shell=self.shell)
457
+
458
+ # Now that we have an instance, we can register it and update the
459
+ # table of callables
460
+ self.registry[m.__class__.__name__] = m
461
+ for mtype in magic_kinds:
462
+ self.magics[mtype].update(m.magics[mtype])
463
+
464
+ def register_function(self, func, magic_kind='line', magic_name=None):
465
+ """Expose a standalone function as magic function for IPython.
466
+
467
+ This will create an IPython magic (line, cell or both) from a
468
+ standalone function. The functions should have the following
469
+ signatures:
470
+
471
+ * For line magics: `def f(line)`
472
+ * For cell magics: `def f(line, cell)`
473
+ * For a function that does both: `def f(line, cell=None)`
474
+
475
+ In the latter case, the function will be called with `cell==None` when
476
+ invoked as `%f`, and with cell as a string when invoked as `%%f`.
477
+
478
+ Parameters
479
+ ----------
480
+ func : callable
481
+ Function to be registered as a magic.
482
+ magic_kind : str
483
+ Kind of magic, one of 'line', 'cell' or 'line_cell'
484
+ magic_name : optional str
485
+ If given, the name the magic will have in the IPython namespace. By
486
+ default, the name of the function itself is used.
487
+ """
488
+
489
+ # Create the new method in the user_magics and register it in the
490
+ # global table
491
+ validate_type(magic_kind)
492
+ magic_name = func.__name__ if magic_name is None else magic_name
493
+ setattr(self.user_magics, magic_name, func)
494
+ record_magic(self.magics, magic_kind, magic_name, func)
495
+
496
+ def register_alias(self, alias_name, magic_name, magic_kind='line', magic_params=None):
497
+ """Register an alias to a magic function.
498
+
499
+ The alias is an instance of :class:`MagicAlias`, which holds the
500
+ name and kind of the magic it should call. Binding is done at
501
+ call time, so if the underlying magic function is changed the alias
502
+ will call the new function.
503
+
504
+ Parameters
505
+ ----------
506
+ alias_name : str
507
+ The name of the magic to be registered.
508
+ magic_name : str
509
+ The name of an existing magic.
510
+ magic_kind : str
511
+ Kind of magic, one of 'line' or 'cell'
512
+ """
513
+
514
+ # `validate_type` is too permissive, as it allows 'line_cell'
515
+ # which we do not handle.
516
+ if magic_kind not in magic_kinds:
517
+ raise ValueError('magic_kind must be one of %s, %s given' %
518
+ magic_kinds, magic_kind)
519
+
520
+ alias = MagicAlias(self.shell, magic_name, magic_kind, magic_params)
521
+ setattr(self.user_magics, alias_name, alias)
522
+ record_magic(self.magics, magic_kind, alias_name, alias)
523
+
524
+ # Key base class that provides the central functionality for magics.
525
+
526
+
527
+ class Magics(Configurable):
528
+ """Base class for implementing magic functions.
529
+
530
+ Shell functions which can be reached as %function_name. All magic
531
+ functions should accept a string, which they can parse for their own
532
+ needs. This can make some functions easier to type, eg `%cd ../`
533
+ vs. `%cd("../")`
534
+
535
+ Classes providing magic functions need to subclass this class, and they
536
+ MUST:
537
+
538
+ - Use the method decorators `@line_magic` and `@cell_magic` to decorate
539
+ individual methods as magic functions, AND
540
+
541
+ - Use the class decorator `@magics_class` to ensure that the magic
542
+ methods are properly registered at the instance level upon instance
543
+ initialization.
544
+
545
+ See :mod:`magic_functions` for examples of actual implementation classes.
546
+ """
547
+ # Dict holding all command-line options for each magic.
548
+ options_table = None
549
+ # Dict for the mapping of magic names to methods, set by class decorator
550
+ magics = None
551
+ # Flag to check that the class decorator was properly applied
552
+ registered = False
553
+ # Instance of IPython shell
554
+ shell = None
555
+
556
+ def __init__(self, shell=None, **kwargs):
557
+ if not(self.__class__.registered):
558
+ raise ValueError('Magics subclass without registration - '
559
+ 'did you forget to apply @magics_class?')
560
+ if shell is not None:
561
+ if hasattr(shell, 'configurables'):
562
+ shell.configurables.append(self)
563
+ if hasattr(shell, 'config'):
564
+ kwargs.setdefault('parent', shell)
565
+
566
+ self.shell = shell
567
+ self.options_table = {}
568
+ # The method decorators are run when the instance doesn't exist yet, so
569
+ # they can only record the names of the methods they are supposed to
570
+ # grab. Only now, that the instance exists, can we create the proper
571
+ # mapping to bound methods. So we read the info off the original names
572
+ # table and replace each method name by the actual bound method.
573
+ # But we mustn't clobber the *class* mapping, in case of multiple instances.
574
+ class_magics = self.magics
575
+ self.magics = {}
576
+ for mtype in magic_kinds:
577
+ tab = self.magics[mtype] = {}
578
+ cls_tab = class_magics[mtype]
579
+ for magic_name, meth_name in cls_tab.items():
580
+ if isinstance(meth_name, str):
581
+ # it's a method name, grab it
582
+ tab[magic_name] = getattr(self, meth_name)
583
+ else:
584
+ # it's the real thing
585
+ tab[magic_name] = meth_name
586
+ # Configurable **needs** to be initiated at the end or the config
587
+ # magics get screwed up.
588
+ super(Magics, self).__init__(**kwargs)
589
+
590
+ def arg_err(self,func):
591
+ """Print docstring if incorrect arguments were passed"""
592
+ print('Error in arguments:')
593
+ print(oinspect.getdoc(func))
594
+
595
+ def format_latex(self, strng):
596
+ """Format a string for latex inclusion."""
597
+
598
+ # Characters that need to be escaped for latex:
599
+ escape_re = re.compile(r'(%|_|\$|#|&)',re.MULTILINE)
600
+ # Magic command names as headers:
601
+ cmd_name_re = re.compile(r'^(%s.*?):' % ESC_MAGIC,
602
+ re.MULTILINE)
603
+ # Magic commands
604
+ cmd_re = re.compile(r'(?P<cmd>%s.+?\b)(?!\}\}:)' % ESC_MAGIC,
605
+ re.MULTILINE)
606
+ # Paragraph continue
607
+ par_re = re.compile(r'\\$',re.MULTILINE)
608
+
609
+ # The "\n" symbol
610
+ newline_re = re.compile(r'\\n')
611
+
612
+ # Now build the string for output:
613
+ #strng = cmd_name_re.sub(r'\n\\texttt{\\textsl{\\large \1}}:',strng)
614
+ strng = cmd_name_re.sub(r'\n\\bigskip\n\\texttt{\\textbf{ \1}}:',
615
+ strng)
616
+ strng = cmd_re.sub(r'\\texttt{\g<cmd>}',strng)
617
+ strng = par_re.sub(r'\\\\',strng)
618
+ strng = escape_re.sub(r'\\\1',strng)
619
+ strng = newline_re.sub(r'\\textbackslash{}n',strng)
620
+ return strng
621
+
622
+ def parse_options(self, arg_str, opt_str, *long_opts, **kw):
623
+ """Parse options passed to an argument string.
624
+
625
+ The interface is similar to that of :func:`getopt.getopt`, but it
626
+ returns a :class:`~IPython.utils.struct.Struct` with the options as keys
627
+ and the stripped argument string still as a string.
628
+
629
+ arg_str is quoted as a true sys.argv vector by using shlex.split.
630
+ This allows us to easily expand variables, glob files, quote
631
+ arguments, etc.
632
+
633
+ Parameters
634
+ ----------
635
+ arg_str : str
636
+ The arguments to parse.
637
+ opt_str : str
638
+ The options specification.
639
+ mode : str, default 'string'
640
+ If given as 'list', the argument string is returned as a list (split
641
+ on whitespace) instead of a string.
642
+ list_all : bool, default False
643
+ Put all option values in lists. Normally only options
644
+ appearing more than once are put in a list.
645
+ posix : bool, default True
646
+ Whether to split the input line in POSIX mode or not, as per the
647
+ conventions outlined in the :mod:`shlex` module from the standard
648
+ library.
649
+ """
650
+
651
+ # inject default options at the beginning of the input line
652
+ caller = sys._getframe(1).f_code.co_name
653
+ arg_str = '%s %s' % (self.options_table.get(caller,''),arg_str)
654
+
655
+ mode = kw.get('mode','string')
656
+ if mode not in ['string','list']:
657
+ raise ValueError('incorrect mode given: %s' % mode)
658
+ # Get options
659
+ list_all = kw.get('list_all',0)
660
+ posix = kw.get('posix', os.name == 'posix')
661
+ strict = kw.get('strict', True)
662
+
663
+ preserve_non_opts = kw.get("preserve_non_opts", False)
664
+ remainder_arg_str = arg_str
665
+
666
+ # Check if we have more than one argument to warrant extra processing:
667
+ odict = {} # Dictionary with options
668
+ args = arg_str.split()
669
+ if len(args) >= 1:
670
+ # If the list of inputs only has 0 or 1 thing in it, there's no
671
+ # need to look for options
672
+ argv = arg_split(arg_str, posix, strict)
673
+ # Do regular option processing
674
+ try:
675
+ opts, args = getopt(argv, opt_str, long_opts)
676
+ except GetoptError as e:
677
+ raise UsageError(
678
+ '%s (allowed: "%s"%s)'
679
+ % (e.msg, opt_str, " ".join(("",) + long_opts) if long_opts else "")
680
+ ) from e
681
+ for o, a in opts:
682
+ if mode == "string" and preserve_non_opts:
683
+ # remove option-parts from the original args-string and preserve remaining-part.
684
+ # This relies on the arg_split(...) and getopt(...)'s impl spec, that the parsed options are
685
+ # returned in the original order.
686
+ remainder_arg_str = remainder_arg_str.replace(o, "", 1).replace(
687
+ a, "", 1
688
+ )
689
+ if o.startswith("--"):
690
+ o = o[2:]
691
+ else:
692
+ o = o[1:]
693
+ try:
694
+ odict[o].append(a)
695
+ except AttributeError:
696
+ odict[o] = [odict[o],a]
697
+ except KeyError:
698
+ if list_all:
699
+ odict[o] = [a]
700
+ else:
701
+ odict[o] = a
702
+
703
+ # Prepare opts,args for return
704
+ opts = Struct(odict)
705
+ if mode == 'string':
706
+ if preserve_non_opts:
707
+ args = remainder_arg_str.lstrip()
708
+ else:
709
+ args = " ".join(args)
710
+
711
+ return opts,args
712
+
713
+ def default_option(self, fn, optstr):
714
+ """Make an entry in the options_table for fn, with value optstr"""
715
+
716
+ if fn not in self.lsmagic():
717
+ error("%s is not a magic function" % fn)
718
+ self.options_table[fn] = optstr
719
+
720
+
721
+ class MagicAlias:
722
+ """An alias to another magic function.
723
+
724
+ An alias is determined by its magic name and magic kind. Lookup
725
+ is done at call time, so if the underlying magic changes the alias
726
+ will call the new function.
727
+
728
+ Use the :meth:`MagicsManager.register_alias` method or the
729
+ `%alias_magic` magic function to create and register a new alias.
730
+ """
731
+ def __init__(self, shell, magic_name, magic_kind, magic_params=None):
732
+ self.shell = shell
733
+ self.magic_name = magic_name
734
+ self.magic_params = magic_params
735
+ self.magic_kind = magic_kind
736
+
737
+ self.pretty_target = '%s%s' % (magic_escapes[self.magic_kind], self.magic_name)
738
+ self.__doc__ = "Alias for `%s`." % self.pretty_target
739
+
740
+ self._in_call = False
741
+
742
+ def __call__(self, *args, **kwargs):
743
+ """Call the magic alias."""
744
+ fn = self.shell.find_magic(self.magic_name, self.magic_kind)
745
+ if fn is None:
746
+ raise UsageError("Magic `%s` not found." % self.pretty_target)
747
+
748
+ # Protect against infinite recursion.
749
+ if self._in_call:
750
+ raise UsageError("Infinite recursion detected; "
751
+ "magic aliases cannot call themselves.")
752
+ self._in_call = True
753
+ try:
754
+ if self.magic_params:
755
+ args_list = list(args)
756
+ args_list[0] = self.magic_params + " " + args[0]
757
+ args = tuple(args_list)
758
+ return fn(*args, **kwargs)
759
+ finally:
760
+ self._in_call = False
temp_venv/lib/python3.13/site-packages/IPython/core/prefilter.py ADDED
@@ -0,0 +1,707 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # encoding: utf-8
2
+ """
3
+ Prefiltering components.
4
+
5
+ Prefilters transform user input before it is exec'd by Python. These
6
+ transforms are used to implement additional syntax such as !ls and %magic.
7
+ """
8
+
9
+ # Copyright (c) IPython Development Team.
10
+ # Distributed under the terms of the Modified BSD License.
11
+
12
+ from keyword import iskeyword
13
+ import re
14
+
15
+ from .autocall import IPyAutocall
16
+ from traitlets.config.configurable import Configurable
17
+ from .inputtransformer2 import (
18
+ ESC_MAGIC,
19
+ ESC_QUOTE,
20
+ ESC_QUOTE2,
21
+ ESC_PAREN,
22
+ )
23
+ from .macro import Macro
24
+ from .splitinput import LineInfo
25
+
26
+ from traitlets import (
27
+ List, Integer, Unicode, Bool, Instance, CRegExp
28
+ )
29
+
30
+ #-----------------------------------------------------------------------------
31
+ # Global utilities, errors and constants
32
+ #-----------------------------------------------------------------------------
33
+
34
+
35
+ class PrefilterError(Exception):
36
+ pass
37
+
38
+
39
+ # RegExp to identify potential function names
40
+ re_fun_name = re.compile(r'[^\W\d]([\w.]*) *$')
41
+
42
+ # RegExp to exclude strings with this start from autocalling. In
43
+ # particular, all binary operators should be excluded, so that if foo is
44
+ # callable, foo OP bar doesn't become foo(OP bar), which is invalid. The
45
+ # characters '!=()' don't need to be checked for, as the checkPythonChars
46
+ # routine explicitly does so, to catch direct calls and rebindings of
47
+ # existing names.
48
+
49
+ # Warning: the '-' HAS TO BE AT THE END of the first group, otherwise
50
+ # it affects the rest of the group in square brackets.
51
+ re_exclude_auto = re.compile(r'^[,&^\|\*/\+-]'
52
+ r'|^is |^not |^in |^and |^or ')
53
+
54
+ # try to catch also methods for stuff in lists/tuples/dicts: off
55
+ # (experimental). For this to work, the line_split regexp would need
56
+ # to be modified so it wouldn't break things at '['. That line is
57
+ # nasty enough that I shouldn't change it until I can test it _well_.
58
+ #self.re_fun_name = re.compile (r'[a-zA-Z_]([a-zA-Z0-9_.\[\]]*) ?$')
59
+
60
+
61
+ # Handler Check Utilities
62
+ def is_shadowed(identifier, ip):
63
+ """Is the given identifier defined in one of the namespaces which shadow
64
+ the alias and magic namespaces? Note that an identifier is different
65
+ than ifun, because it can not contain a '.' character."""
66
+ # This is much safer than calling ofind, which can change state
67
+ return (identifier in ip.user_ns \
68
+ or identifier in ip.user_global_ns \
69
+ or identifier in ip.ns_table['builtin']\
70
+ or iskeyword(identifier))
71
+
72
+
73
+ #-----------------------------------------------------------------------------
74
+ # Main Prefilter manager
75
+ #-----------------------------------------------------------------------------
76
+
77
+
78
+ class PrefilterManager(Configurable):
79
+ """Main prefilter component.
80
+
81
+ The IPython prefilter is run on all user input before it is run. The
82
+ prefilter consumes lines of input and produces transformed lines of
83
+ input.
84
+
85
+ The implementation consists of two phases:
86
+
87
+ 1. Transformers
88
+ 2. Checkers and handlers
89
+
90
+ Over time, we plan on deprecating the checkers and handlers and doing
91
+ everything in the transformers.
92
+
93
+ The transformers are instances of :class:`PrefilterTransformer` and have
94
+ a single method :meth:`transform` that takes a line and returns a
95
+ transformed line. The transformation can be accomplished using any
96
+ tool, but our current ones use regular expressions for speed.
97
+
98
+ After all the transformers have been run, the line is fed to the checkers,
99
+ which are instances of :class:`PrefilterChecker`. The line is passed to
100
+ the :meth:`check` method, which either returns `None` or a
101
+ :class:`PrefilterHandler` instance. If `None` is returned, the other
102
+ checkers are tried. If an :class:`PrefilterHandler` instance is returned,
103
+ the line is passed to the :meth:`handle` method of the returned
104
+ handler and no further checkers are tried.
105
+
106
+ Both transformers and checkers have a `priority` attribute, that determines
107
+ the order in which they are called. Smaller priorities are tried first.
108
+
109
+ Both transformers and checkers also have `enabled` attribute, which is
110
+ a boolean that determines if the instance is used.
111
+
112
+ Users or developers can change the priority or enabled attribute of
113
+ transformers or checkers, but they must call the :meth:`sort_checkers`
114
+ or :meth:`sort_transformers` method after changing the priority.
115
+ """
116
+
117
+ multi_line_specials = Bool(True).tag(config=True)
118
+ shell = Instance('IPython.core.interactiveshell.InteractiveShellABC', allow_none=True)
119
+
120
+ def __init__(self, shell=None, **kwargs):
121
+ super(PrefilterManager, self).__init__(shell=shell, **kwargs)
122
+ self.shell = shell
123
+ self._transformers = []
124
+ self.init_handlers()
125
+ self.init_checkers()
126
+
127
+ #-------------------------------------------------------------------------
128
+ # API for managing transformers
129
+ #-------------------------------------------------------------------------
130
+
131
+ def sort_transformers(self):
132
+ """Sort the transformers by priority.
133
+
134
+ This must be called after the priority of a transformer is changed.
135
+ The :meth:`register_transformer` method calls this automatically.
136
+ """
137
+ self._transformers.sort(key=lambda x: x.priority)
138
+
139
+ @property
140
+ def transformers(self):
141
+ """Return a list of checkers, sorted by priority."""
142
+ return self._transformers
143
+
144
+ def register_transformer(self, transformer):
145
+ """Register a transformer instance."""
146
+ if transformer not in self._transformers:
147
+ self._transformers.append(transformer)
148
+ self.sort_transformers()
149
+
150
+ def unregister_transformer(self, transformer):
151
+ """Unregister a transformer instance."""
152
+ if transformer in self._transformers:
153
+ self._transformers.remove(transformer)
154
+
155
+ #-------------------------------------------------------------------------
156
+ # API for managing checkers
157
+ #-------------------------------------------------------------------------
158
+
159
+ def init_checkers(self):
160
+ """Create the default checkers."""
161
+ self._checkers = []
162
+ for checker in _default_checkers:
163
+ checker(
164
+ shell=self.shell, prefilter_manager=self, parent=self
165
+ )
166
+
167
+ def sort_checkers(self):
168
+ """Sort the checkers by priority.
169
+
170
+ This must be called after the priority of a checker is changed.
171
+ The :meth:`register_checker` method calls this automatically.
172
+ """
173
+ self._checkers.sort(key=lambda x: x.priority)
174
+
175
+ @property
176
+ def checkers(self):
177
+ """Return a list of checkers, sorted by priority."""
178
+ return self._checkers
179
+
180
+ def register_checker(self, checker):
181
+ """Register a checker instance."""
182
+ if checker not in self._checkers:
183
+ self._checkers.append(checker)
184
+ self.sort_checkers()
185
+
186
+ def unregister_checker(self, checker):
187
+ """Unregister a checker instance."""
188
+ if checker in self._checkers:
189
+ self._checkers.remove(checker)
190
+
191
+ #-------------------------------------------------------------------------
192
+ # API for managing handlers
193
+ #-------------------------------------------------------------------------
194
+
195
+ def init_handlers(self):
196
+ """Create the default handlers."""
197
+ self._handlers = {}
198
+ self._esc_handlers = {}
199
+ for handler in _default_handlers:
200
+ handler(
201
+ shell=self.shell, prefilter_manager=self, parent=self
202
+ )
203
+
204
+ @property
205
+ def handlers(self):
206
+ """Return a dict of all the handlers."""
207
+ return self._handlers
208
+
209
+ def register_handler(self, name, handler, esc_strings):
210
+ """Register a handler instance by name with esc_strings."""
211
+ self._handlers[name] = handler
212
+ for esc_str in esc_strings:
213
+ self._esc_handlers[esc_str] = handler
214
+
215
+ def unregister_handler(self, name, handler, esc_strings):
216
+ """Unregister a handler instance by name with esc_strings."""
217
+ try:
218
+ del self._handlers[name]
219
+ except KeyError:
220
+ pass
221
+ for esc_str in esc_strings:
222
+ h = self._esc_handlers.get(esc_str)
223
+ if h is handler:
224
+ del self._esc_handlers[esc_str]
225
+
226
+ def get_handler_by_name(self, name):
227
+ """Get a handler by its name."""
228
+ return self._handlers.get(name)
229
+
230
+ def get_handler_by_esc(self, esc_str):
231
+ """Get a handler by its escape string."""
232
+ return self._esc_handlers.get(esc_str)
233
+
234
+ #-------------------------------------------------------------------------
235
+ # Main prefiltering API
236
+ #-------------------------------------------------------------------------
237
+
238
+ def prefilter_line_info(self, line_info):
239
+ """Prefilter a line that has been converted to a LineInfo object.
240
+
241
+ This implements the checker/handler part of the prefilter pipe.
242
+ """
243
+ # print("prefilter_line_info: ", line_info)
244
+ handler = self.find_handler(line_info)
245
+ return handler.handle(line_info)
246
+
247
+ def find_handler(self, line_info):
248
+ """Find a handler for the line_info by trying checkers."""
249
+ for checker in self.checkers:
250
+ if checker.enabled:
251
+ handler = checker.check(line_info)
252
+ if handler:
253
+ return handler
254
+ return self.get_handler_by_name('normal')
255
+
256
+ def transform_line(self, line, continue_prompt):
257
+ """Calls the enabled transformers in order of increasing priority."""
258
+ for transformer in self.transformers:
259
+ if transformer.enabled:
260
+ line = transformer.transform(line, continue_prompt)
261
+ return line
262
+
263
+ def prefilter_line(self, line, continue_prompt=False):
264
+ """Prefilter a single input line as text.
265
+
266
+ This method prefilters a single line of text by calling the
267
+ transformers and then the checkers/handlers.
268
+ """
269
+
270
+ # print("prefilter_line: ", line, continue_prompt)
271
+ # All handlers *must* return a value, even if it's blank ('').
272
+
273
+ # save the line away in case we crash, so the post-mortem handler can
274
+ # record it
275
+ self.shell._last_input_line = line
276
+
277
+ if not line:
278
+ # Return immediately on purely empty lines, so that if the user
279
+ # previously typed some whitespace that started a continuation
280
+ # prompt, he can break out of that loop with just an empty line.
281
+ # This is how the default python prompt works.
282
+ return ''
283
+
284
+ # At this point, we invoke our transformers.
285
+ if not continue_prompt or (continue_prompt and self.multi_line_specials):
286
+ line = self.transform_line(line, continue_prompt)
287
+
288
+ # Now we compute line_info for the checkers and handlers
289
+ line_info = LineInfo(line, continue_prompt)
290
+
291
+ # the input history needs to track even empty lines
292
+ stripped = line.strip()
293
+
294
+ normal_handler = self.get_handler_by_name('normal')
295
+ if not stripped:
296
+ return normal_handler.handle(line_info)
297
+
298
+ # special handlers are only allowed for single line statements
299
+ if continue_prompt and not self.multi_line_specials:
300
+ return normal_handler.handle(line_info)
301
+
302
+ prefiltered = self.prefilter_line_info(line_info)
303
+ # print("prefiltered line: %r" % prefiltered)
304
+ return prefiltered
305
+
306
+ def prefilter_lines(self, lines, continue_prompt=False):
307
+ """Prefilter multiple input lines of text.
308
+
309
+ This is the main entry point for prefiltering multiple lines of
310
+ input. This simply calls :meth:`prefilter_line` for each line of
311
+ input.
312
+
313
+ This covers cases where there are multiple lines in the user entry,
314
+ which is the case when the user goes back to a multiline history
315
+ entry and presses enter.
316
+ """
317
+ llines = lines.rstrip('\n').split('\n')
318
+ # We can get multiple lines in one shot, where multiline input 'blends'
319
+ # into one line, in cases like recalling from the readline history
320
+ # buffer. We need to make sure that in such cases, we correctly
321
+ # communicate downstream which line is first and which are continuation
322
+ # ones.
323
+ if len(llines) > 1:
324
+ out = '\n'.join([self.prefilter_line(line, lnum>0)
325
+ for lnum, line in enumerate(llines) ])
326
+ else:
327
+ out = self.prefilter_line(llines[0], continue_prompt)
328
+
329
+ return out
330
+
331
+ #-----------------------------------------------------------------------------
332
+ # Prefilter transformers
333
+ #-----------------------------------------------------------------------------
334
+
335
+
336
+ class PrefilterTransformer(Configurable):
337
+ """Transform a line of user input."""
338
+
339
+ priority = Integer(100).tag(config=True)
340
+ # Transformers don't currently use shell or prefilter_manager, but as we
341
+ # move away from checkers and handlers, they will need them.
342
+ shell = Instance('IPython.core.interactiveshell.InteractiveShellABC', allow_none=True)
343
+ prefilter_manager = Instance('IPython.core.prefilter.PrefilterManager', allow_none=True)
344
+ enabled = Bool(True).tag(config=True)
345
+
346
+ def __init__(self, shell=None, prefilter_manager=None, **kwargs):
347
+ super(PrefilterTransformer, self).__init__(
348
+ shell=shell, prefilter_manager=prefilter_manager, **kwargs
349
+ )
350
+ self.prefilter_manager.register_transformer(self)
351
+
352
+ def transform(self, line, continue_prompt):
353
+ """Transform a line, returning the new one."""
354
+ return None
355
+
356
+ def __repr__(self):
357
+ return "<%s(priority=%r, enabled=%r)>" % (
358
+ self.__class__.__name__, self.priority, self.enabled)
359
+
360
+
361
+ #-----------------------------------------------------------------------------
362
+ # Prefilter checkers
363
+ #-----------------------------------------------------------------------------
364
+
365
+
366
+ class PrefilterChecker(Configurable):
367
+ """Inspect an input line and return a handler for that line."""
368
+
369
+ priority = Integer(100).tag(config=True)
370
+ shell = Instance('IPython.core.interactiveshell.InteractiveShellABC', allow_none=True)
371
+ prefilter_manager = Instance('IPython.core.prefilter.PrefilterManager', allow_none=True)
372
+ enabled = Bool(True).tag(config=True)
373
+
374
+ def __init__(self, shell=None, prefilter_manager=None, **kwargs):
375
+ super(PrefilterChecker, self).__init__(
376
+ shell=shell, prefilter_manager=prefilter_manager, **kwargs
377
+ )
378
+ self.prefilter_manager.register_checker(self)
379
+
380
+ def check(self, line_info):
381
+ """Inspect line_info and return a handler instance or None."""
382
+ return None
383
+
384
+ def __repr__(self):
385
+ return "<%s(priority=%r, enabled=%r)>" % (
386
+ self.__class__.__name__, self.priority, self.enabled)
387
+
388
+
389
+ class EmacsChecker(PrefilterChecker):
390
+
391
+ priority = Integer(100).tag(config=True)
392
+ enabled = Bool(False).tag(config=True)
393
+
394
+ def check(self, line_info):
395
+ "Emacs ipython-mode tags certain input lines."
396
+ if line_info.line.endswith('# PYTHON-MODE'):
397
+ return self.prefilter_manager.get_handler_by_name('emacs')
398
+ else:
399
+ return None
400
+
401
+
402
+ class MacroChecker(PrefilterChecker):
403
+
404
+ priority = Integer(250).tag(config=True)
405
+
406
+ def check(self, line_info):
407
+ obj = self.shell.user_ns.get(line_info.ifun)
408
+ if isinstance(obj, Macro):
409
+ return self.prefilter_manager.get_handler_by_name('macro')
410
+ else:
411
+ return None
412
+
413
+
414
+ class IPyAutocallChecker(PrefilterChecker):
415
+
416
+ priority = Integer(300).tag(config=True)
417
+
418
+ def check(self, line_info):
419
+ "Instances of IPyAutocall in user_ns get autocalled immediately"
420
+ obj = self.shell.user_ns.get(line_info.ifun, None)
421
+ if isinstance(obj, IPyAutocall):
422
+ obj.set_ip(self.shell)
423
+ return self.prefilter_manager.get_handler_by_name('auto')
424
+ else:
425
+ return None
426
+
427
+
428
+ class AssignmentChecker(PrefilterChecker):
429
+
430
+ priority = Integer(600).tag(config=True)
431
+
432
+ def check(self, line_info):
433
+ """Check to see if user is assigning to a var for the first time, in
434
+ which case we want to avoid any sort of automagic / autocall games.
435
+
436
+ This allows users to assign to either alias or magic names true python
437
+ variables (the magic/alias systems always take second seat to true
438
+ python code). E.g. ls='hi', or ls,that=1,2"""
439
+ if line_info.the_rest:
440
+ if line_info.the_rest[0] in '=,':
441
+ return self.prefilter_manager.get_handler_by_name('normal')
442
+ else:
443
+ return None
444
+
445
+
446
+ class AutoMagicChecker(PrefilterChecker):
447
+
448
+ priority = Integer(700).tag(config=True)
449
+
450
+ def check(self, line_info):
451
+ """If the ifun is magic, and automagic is on, run it. Note: normal,
452
+ non-auto magic would already have been triggered via '%' in
453
+ check_esc_chars. This just checks for automagic. Also, before
454
+ triggering the magic handler, make sure that there is nothing in the
455
+ user namespace which could shadow it."""
456
+ if not self.shell.automagic or not self.shell.find_magic(line_info.ifun):
457
+ return None
458
+
459
+ # We have a likely magic method. Make sure we should actually call it.
460
+ if line_info.continue_prompt and not self.prefilter_manager.multi_line_specials:
461
+ return None
462
+
463
+ head = line_info.ifun.split('.',1)[0]
464
+ if is_shadowed(head, self.shell):
465
+ return None
466
+
467
+ return self.prefilter_manager.get_handler_by_name('magic')
468
+
469
+
470
+ class PythonOpsChecker(PrefilterChecker):
471
+
472
+ priority = Integer(900).tag(config=True)
473
+
474
+ def check(self, line_info):
475
+ """If the 'rest' of the line begins with a function call or pretty much
476
+ any python operator, we should simply execute the line (regardless of
477
+ whether or not there's a possible autocall expansion). This avoids
478
+ spurious (and very confusing) geattr() accesses."""
479
+ if line_info.the_rest and line_info.the_rest[0] in "!=()<>,+*/%^&|":
480
+ return self.prefilter_manager.get_handler_by_name("normal")
481
+ else:
482
+ return None
483
+
484
+
485
+ class AutocallChecker(PrefilterChecker):
486
+
487
+ priority = Integer(1000).tag(config=True)
488
+
489
+ function_name_regexp = CRegExp(re_fun_name,
490
+ help="RegExp to identify potential function names."
491
+ ).tag(config=True)
492
+ exclude_regexp = CRegExp(re_exclude_auto,
493
+ help="RegExp to exclude strings with this start from autocalling."
494
+ ).tag(config=True)
495
+
496
+ def check(self, line_info):
497
+ "Check if the initial word/function is callable and autocall is on."
498
+ if not self.shell.autocall:
499
+ return None
500
+
501
+ oinfo = line_info.ofind(self.shell) # This can mutate state via getattr
502
+ if not oinfo.found:
503
+ return None
504
+
505
+ ignored_funs = ['b', 'f', 'r', 'u', 'br', 'rb', 'fr', 'rf']
506
+ ifun = line_info.ifun
507
+ line = line_info.line
508
+ if ifun.lower() in ignored_funs and (line.startswith(ifun + "'") or line.startswith(ifun + '"')):
509
+ return None
510
+
511
+ if (
512
+ callable(oinfo.obj)
513
+ and (not self.exclude_regexp.match(line_info.the_rest))
514
+ and self.function_name_regexp.match(line_info.ifun)
515
+ and (
516
+ line_info.raw_the_rest.startswith(" ")
517
+ or not line_info.raw_the_rest.strip()
518
+ )
519
+ ):
520
+ return self.prefilter_manager.get_handler_by_name("auto")
521
+ else:
522
+ return None
523
+
524
+
525
+ #-----------------------------------------------------------------------------
526
+ # Prefilter handlers
527
+ #-----------------------------------------------------------------------------
528
+
529
+
530
+ class PrefilterHandler(Configurable):
531
+ handler_name = Unicode("normal")
532
+ esc_strings: List = List([])
533
+ shell = Instance(
534
+ "IPython.core.interactiveshell.InteractiveShellABC", allow_none=True
535
+ )
536
+ prefilter_manager = Instance(
537
+ "IPython.core.prefilter.PrefilterManager", allow_none=True
538
+ )
539
+
540
+ def __init__(self, shell=None, prefilter_manager=None, **kwargs):
541
+ super(PrefilterHandler, self).__init__(
542
+ shell=shell, prefilter_manager=prefilter_manager, **kwargs
543
+ )
544
+ self.prefilter_manager.register_handler(
545
+ self.handler_name,
546
+ self,
547
+ self.esc_strings
548
+ )
549
+
550
+ def handle(self, line_info):
551
+ # print("normal: ", line_info)
552
+ """Handle normal input lines. Use as a template for handlers."""
553
+
554
+ # With autoindent on, we need some way to exit the input loop, and I
555
+ # don't want to force the user to have to backspace all the way to
556
+ # clear the line. The rule will be in this case, that either two
557
+ # lines of pure whitespace in a row, or a line of pure whitespace but
558
+ # of a size different to the indent level, will exit the input loop.
559
+ line = line_info.line
560
+ continue_prompt = line_info.continue_prompt
561
+
562
+ if (continue_prompt and
563
+ self.shell.autoindent and
564
+ line.isspace() and
565
+ 0 < abs(len(line) - self.shell.indent_current_nsp) <= 2):
566
+ line = ''
567
+
568
+ return line
569
+
570
+ def __str__(self):
571
+ return "<%s(name=%s)>" % (self.__class__.__name__, self.handler_name)
572
+
573
+
574
+ class MacroHandler(PrefilterHandler):
575
+ handler_name = Unicode("macro")
576
+
577
+ def handle(self, line_info):
578
+ obj = self.shell.user_ns.get(line_info.ifun)
579
+ pre_space = line_info.pre_whitespace
580
+ line_sep = "\n" + pre_space
581
+ return pre_space + line_sep.join(obj.value.splitlines())
582
+
583
+
584
+ class MagicHandler(PrefilterHandler):
585
+
586
+ handler_name = Unicode('magic')
587
+ esc_strings = List([ESC_MAGIC])
588
+
589
+ def handle(self, line_info):
590
+ """Execute magic functions."""
591
+ ifun = line_info.ifun
592
+ the_rest = line_info.the_rest
593
+ #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
594
+ t_arg_s = ifun + " " + the_rest
595
+ t_magic_name, _, t_magic_arg_s = t_arg_s.partition(' ')
596
+ t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
597
+ cmd = '%sget_ipython().run_line_magic(%r, %r)' % (line_info.pre_whitespace, t_magic_name, t_magic_arg_s)
598
+ return cmd
599
+
600
+
601
+ class AutoHandler(PrefilterHandler):
602
+
603
+ handler_name = Unicode('auto')
604
+ esc_strings = List([ESC_PAREN, ESC_QUOTE, ESC_QUOTE2])
605
+
606
+ def handle(self, line_info):
607
+ """Handle lines which can be auto-executed, quoting if requested."""
608
+ line = line_info.line
609
+ ifun = line_info.ifun
610
+ the_rest = line_info.the_rest
611
+ esc = line_info.esc
612
+ continue_prompt = line_info.continue_prompt
613
+ obj = line_info.ofind(self.shell).obj
614
+
615
+ # This should only be active for single-line input!
616
+ if continue_prompt:
617
+ return line
618
+
619
+ force_auto = isinstance(obj, IPyAutocall)
620
+
621
+ # User objects sometimes raise exceptions on attribute access other
622
+ # than AttributeError (we've seen it in the past), so it's safest to be
623
+ # ultra-conservative here and catch all.
624
+ try:
625
+ auto_rewrite = obj.rewrite
626
+ except Exception:
627
+ auto_rewrite = True
628
+
629
+ if esc == ESC_QUOTE:
630
+ # Auto-quote splitting on whitespace
631
+ newcmd = '%s("%s")' % (ifun,'", "'.join(the_rest.split()) )
632
+ elif esc == ESC_QUOTE2:
633
+ # Auto-quote whole string
634
+ newcmd = '%s("%s")' % (ifun,the_rest)
635
+ elif esc == ESC_PAREN:
636
+ newcmd = '%s(%s)' % (ifun,",".join(the_rest.split()))
637
+ else:
638
+ # Auto-paren.
639
+ if force_auto:
640
+ # Don't rewrite if it is already a call.
641
+ do_rewrite = not the_rest.startswith('(')
642
+ else:
643
+ if not the_rest:
644
+ # We only apply it to argument-less calls if the autocall
645
+ # parameter is set to 2.
646
+ do_rewrite = (self.shell.autocall >= 2)
647
+ elif the_rest.startswith('[') and hasattr(obj, '__getitem__'):
648
+ # Don't autocall in this case: item access for an object
649
+ # which is BOTH callable and implements __getitem__.
650
+ do_rewrite = False
651
+ else:
652
+ do_rewrite = True
653
+
654
+ # Figure out the rewritten command
655
+ if do_rewrite:
656
+ if the_rest.endswith(';'):
657
+ newcmd = '%s(%s);' % (ifun.rstrip(),the_rest[:-1])
658
+ else:
659
+ newcmd = '%s(%s)' % (ifun.rstrip(), the_rest)
660
+ else:
661
+ normal_handler = self.prefilter_manager.get_handler_by_name('normal')
662
+ return normal_handler.handle(line_info)
663
+
664
+ # Display the rewritten call
665
+ if auto_rewrite:
666
+ self.shell.auto_rewrite_input(newcmd)
667
+
668
+ return newcmd
669
+
670
+
671
+ class EmacsHandler(PrefilterHandler):
672
+
673
+ handler_name = Unicode('emacs')
674
+ esc_strings = List([])
675
+
676
+ def handle(self, line_info):
677
+ """Handle input lines marked by python-mode."""
678
+
679
+ # Currently, nothing is done. Later more functionality can be added
680
+ # here if needed.
681
+
682
+ # The input cache shouldn't be updated
683
+ return line_info.line
684
+
685
+
686
+ #-----------------------------------------------------------------------------
687
+ # Defaults
688
+ #-----------------------------------------------------------------------------
689
+
690
+
691
+ _default_checkers = [
692
+ EmacsChecker,
693
+ MacroChecker,
694
+ IPyAutocallChecker,
695
+ AssignmentChecker,
696
+ AutoMagicChecker,
697
+ PythonOpsChecker,
698
+ AutocallChecker
699
+ ]
700
+
701
+ _default_handlers = [
702
+ PrefilterHandler,
703
+ MacroHandler,
704
+ MagicHandler,
705
+ AutoHandler,
706
+ EmacsHandler
707
+ ]
temp_venv/lib/python3.13/site-packages/IPython/core/profiledir.py ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # encoding: utf-8
2
+ """An object for managing IPython profile directories."""
3
+
4
+ # Copyright (c) IPython Development Team.
5
+ # Distributed under the terms of the Modified BSD License.
6
+
7
+ import os
8
+ import shutil
9
+ import errno
10
+ from pathlib import Path
11
+
12
+ from traitlets.config.configurable import LoggingConfigurable
13
+ from ..paths import get_ipython_package_dir
14
+ from ..utils.path import expand_path, ensure_dir_exists
15
+ from traitlets import Unicode, Bool, observe
16
+
17
+ from typing import Optional
18
+
19
+ #-----------------------------------------------------------------------------
20
+ # Module errors
21
+ #-----------------------------------------------------------------------------
22
+
23
+ class ProfileDirError(Exception):
24
+ pass
25
+
26
+
27
+ #-----------------------------------------------------------------------------
28
+ # Class for managing profile directories
29
+ #-----------------------------------------------------------------------------
30
+
31
+ class ProfileDir(LoggingConfigurable):
32
+ """An object to manage the profile directory and its resources.
33
+
34
+ The profile directory is used by all IPython applications, to manage
35
+ configuration, logging and security.
36
+
37
+ This object knows how to find, create and manage these directories. This
38
+ should be used by any code that wants to handle profiles.
39
+ """
40
+
41
+ security_dir_name = Unicode('security')
42
+ log_dir_name = Unicode('log')
43
+ startup_dir_name = Unicode('startup')
44
+ pid_dir_name = Unicode('pid')
45
+ static_dir_name = Unicode('static')
46
+ security_dir = Unicode(u'')
47
+ log_dir = Unicode(u'')
48
+ startup_dir = Unicode(u'')
49
+ pid_dir = Unicode(u'')
50
+ static_dir = Unicode(u'')
51
+
52
+ location = Unicode(u'',
53
+ help="""Set the profile location directly. This overrides the logic used by the
54
+ `profile` option.""",
55
+ ).tag(config=True)
56
+
57
+ _location_isset = Bool(False) # flag for detecting multiply set location
58
+ @observe('location')
59
+ def _location_changed(self, change):
60
+ if self._location_isset:
61
+ raise RuntimeError("Cannot set profile location more than once.")
62
+ self._location_isset = True
63
+ new = change['new']
64
+ ensure_dir_exists(new)
65
+
66
+ # ensure config files exist:
67
+ self.security_dir = os.path.join(new, self.security_dir_name)
68
+ self.log_dir = os.path.join(new, self.log_dir_name)
69
+ self.startup_dir = os.path.join(new, self.startup_dir_name)
70
+ self.pid_dir = os.path.join(new, self.pid_dir_name)
71
+ self.static_dir = os.path.join(new, self.static_dir_name)
72
+ self.check_dirs()
73
+
74
+ def _mkdir(self, path: str, mode: Optional[int] = None) -> bool:
75
+ """ensure a directory exists at a given path
76
+
77
+ This is a version of os.mkdir, with the following differences:
78
+
79
+ - returns whether the directory has been created or not.
80
+ - ignores EEXIST, protecting against race conditions where
81
+ the dir may have been created in between the check and
82
+ the creation
83
+ - sets permissions if requested and the dir already exists
84
+
85
+ Parameters
86
+ ----------
87
+ path: str
88
+ path of the dir to create
89
+ mode: int
90
+ see `mode` of `os.mkdir`
91
+
92
+ Returns
93
+ -------
94
+ bool:
95
+ returns True if it created the directory, False otherwise
96
+ """
97
+
98
+ if os.path.exists(path):
99
+ if mode and os.stat(path).st_mode != mode:
100
+ try:
101
+ os.chmod(path, mode)
102
+ except OSError:
103
+ self.log.warning(
104
+ "Could not set permissions on %s",
105
+ path
106
+ )
107
+ return False
108
+ try:
109
+ if mode:
110
+ os.mkdir(path, mode)
111
+ else:
112
+ os.mkdir(path)
113
+ except OSError as e:
114
+ if e.errno == errno.EEXIST:
115
+ return False
116
+ else:
117
+ raise
118
+
119
+ return True
120
+
121
+ @observe('log_dir')
122
+ def check_log_dir(self, change=None):
123
+ self._mkdir(self.log_dir)
124
+
125
+ @observe('startup_dir')
126
+ def check_startup_dir(self, change=None):
127
+ if self._mkdir(self.startup_dir):
128
+ readme = os.path.join(self.startup_dir, "README")
129
+ src = os.path.join(
130
+ get_ipython_package_dir(), "core", "profile", "README_STARTUP"
131
+ )
132
+
133
+ if os.path.exists(src):
134
+ if not os.path.exists(readme):
135
+ shutil.copy(src, readme)
136
+ else:
137
+ self.log.warning(
138
+ "Could not copy README_STARTUP to startup dir. Source file %s does not exist.",
139
+ src,
140
+ )
141
+
142
+ @observe('security_dir')
143
+ def check_security_dir(self, change=None):
144
+ self._mkdir(self.security_dir, 0o40700)
145
+
146
+ @observe('pid_dir')
147
+ def check_pid_dir(self, change=None):
148
+ self._mkdir(self.pid_dir, 0o40700)
149
+
150
+ def check_dirs(self):
151
+ self.check_security_dir()
152
+ self.check_log_dir()
153
+ self.check_pid_dir()
154
+ self.check_startup_dir()
155
+
156
+ def copy_config_file(self, config_file: str, path: Path, overwrite=False) -> bool:
157
+ """Copy a default config file into the active profile directory.
158
+
159
+ Default configuration files are kept in :mod:`IPython.core.profile`.
160
+ This function moves these from that location to the working profile
161
+ directory.
162
+ """
163
+ dst = Path(os.path.join(self.location, config_file))
164
+ if dst.exists() and not overwrite:
165
+ return False
166
+ if path is None:
167
+ path = os.path.join(get_ipython_package_dir(), u'core', u'profile', u'default')
168
+ assert isinstance(path, Path)
169
+ src = path / config_file
170
+ shutil.copy(src, dst)
171
+ return True
172
+
173
+ @classmethod
174
+ def create_profile_dir(cls, profile_dir, config=None):
175
+ """Create a new profile directory given a full path.
176
+
177
+ Parameters
178
+ ----------
179
+ profile_dir : str
180
+ The full path to the profile directory. If it does exist, it will
181
+ be used. If not, it will be created.
182
+ """
183
+ return cls(location=profile_dir, config=config)
184
+
185
+ @classmethod
186
+ def create_profile_dir_by_name(cls, path, name=u'default', config=None):
187
+ """Create a profile dir by profile name and path.
188
+
189
+ Parameters
190
+ ----------
191
+ path : unicode
192
+ The path (directory) to put the profile directory in.
193
+ name : unicode
194
+ The name of the profile. The name of the profile directory will
195
+ be "profile_<profile>".
196
+ """
197
+ if not os.path.isdir(path):
198
+ raise ProfileDirError('Directory not found: %s' % path)
199
+ profile_dir = os.path.join(path, u'profile_' + name)
200
+ return cls(location=profile_dir, config=config)
201
+
202
+ @classmethod
203
+ def find_profile_dir_by_name(cls, ipython_dir, name=u'default', config=None):
204
+ """Find an existing profile dir by profile name, return its ProfileDir.
205
+
206
+ This searches through a sequence of paths for a profile dir. If it
207
+ is not found, a :class:`ProfileDirError` exception will be raised.
208
+
209
+ The search path algorithm is:
210
+ 1. ``os.getcwd()`` # removed for security reason.
211
+ 2. ``ipython_dir``
212
+
213
+ Parameters
214
+ ----------
215
+ ipython_dir : unicode or str
216
+ The IPython directory to use.
217
+ name : unicode or str
218
+ The name of the profile. The name of the profile directory
219
+ will be "profile_<profile>".
220
+ """
221
+ dirname = u'profile_' + name
222
+ paths = [ipython_dir]
223
+ for p in paths:
224
+ profile_dir = os.path.join(p, dirname)
225
+ if os.path.isdir(profile_dir):
226
+ return cls(location=profile_dir, config=config)
227
+ else:
228
+ raise ProfileDirError('Profile directory not found in paths: %s' % dirname)
229
+
230
+ @classmethod
231
+ def find_profile_dir(cls, profile_dir, config=None):
232
+ """Find/create a profile dir and return its ProfileDir.
233
+
234
+ This will create the profile directory if it doesn't exist.
235
+
236
+ Parameters
237
+ ----------
238
+ profile_dir : unicode or str
239
+ The path of the profile directory.
240
+ """
241
+ profile_dir = expand_path(profile_dir)
242
+ if not os.path.isdir(profile_dir):
243
+ raise ProfileDirError('Profile directory not found: %s' % profile_dir)
244
+ return cls(location=profile_dir, config=config)
temp_venv/lib/python3.13/site-packages/IPython/core/release.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Release data for the IPython project."""
3
+
4
+ #-----------------------------------------------------------------------------
5
+ # Copyright (c) 2008, IPython Development Team.
6
+ # Copyright (c) 2001, Fernando Perez <[email protected]>
7
+ # Copyright (c) 2001, Janko Hauser <[email protected]>
8
+ # Copyright (c) 2001, Nathaniel Gray <[email protected]>
9
+ #
10
+ # Distributed under the terms of the Modified BSD License.
11
+ #
12
+ # The full license is in the file COPYING.txt, distributed with this software.
13
+ #-----------------------------------------------------------------------------
14
+
15
+ # IPython version information. An empty _version_extra corresponds to a full
16
+ # release. 'dev' as a _version_extra string means this is a development
17
+ # version
18
+ _version_major = 9
19
+ _version_minor = 2
20
+ _version_patch = 0
21
+ _version_extra = ".dev"
22
+ # _version_extra = "b2"
23
+ _version_extra = "" # Uncomment this for full releases
24
+
25
+ # Construct full version string from these.
26
+ _ver = [_version_major, _version_minor, _version_patch]
27
+
28
+ __version__ = '.'.join(map(str, _ver))
29
+ if _version_extra:
30
+ __version__ = __version__ + _version_extra
31
+
32
+ version = __version__ # backwards compatibility name
33
+ version_info = (_version_major, _version_minor, _version_patch, _version_extra)
34
+
35
+
36
+ license = "BSD-3-Clause"
37
+
38
+ authors = {
39
+ "Fernando": ("Fernando Perez", "[email protected]"),
40
+ "M": ("M Bussonnier", "[email protected]"),
41
+ }
42
+
43
+ author = 'The IPython Development Team'
44
+
45
+ author_email = '[email protected]'
temp_venv/lib/python3.13/site-packages/charset_normalizer-3.4.2.dist-info/licenses/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 TAHRI Ahmed R.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
temp_venv/lib/python3.13/site-packages/executing/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (1.13 kB). View file
 
temp_venv/lib/python3.13/site-packages/executing/__pycache__/_exceptions.cpython-313.pyc ADDED
Binary file (1.21 kB). View file
 
temp_venv/lib/python3.13/site-packages/executing/__pycache__/_position_node_finder.cpython-313.pyc ADDED
Binary file (37 kB). View file
 
temp_venv/lib/python3.13/site-packages/executing/__pycache__/_pytest_utils.cpython-313.pyc ADDED
Binary file (756 Bytes). View file
 
temp_venv/lib/python3.13/site-packages/executing/__pycache__/executing.cpython-313.pyc ADDED
Binary file (49.6 kB). View file
 
temp_venv/lib/python3.13/site-packages/executing/__pycache__/version.cpython-313.pyc ADDED
Binary file (222 Bytes). View file
 
temp_venv/lib/python3.13/site-packages/fsspec-2025.3.2.dist-info/licenses/LICENSE ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2018, Martin Durant
4
+ All rights reserved.
5
+
6
+ Redistribution and use in source and binary forms, with or without
7
+ modification, are permitted provided that the following conditions are met:
8
+
9
+ * Redistributions of source code must retain the above copyright notice, this
10
+ list of conditions and the following disclaimer.
11
+
12
+ * Redistributions in binary form must reproduce the above copyright notice,
13
+ this list of conditions and the following disclaimer in the documentation
14
+ and/or other materials provided with the distribution.
15
+
16
+ * Neither the name of the copyright holder nor the names of its
17
+ contributors may be used to endorse or promote products derived from
18
+ this software without specific prior written permission.
19
+
20
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
temp_venv/lib/python3.13/site-packages/fsspec/implementations/__init__.py ADDED
File without changes
temp_venv/lib/python3.13/site-packages/fsspec/implementations/arrow.py ADDED
@@ -0,0 +1,304 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import errno
2
+ import io
3
+ import os
4
+ import secrets
5
+ import shutil
6
+ from contextlib import suppress
7
+ from functools import cached_property, wraps
8
+ from urllib.parse import parse_qs
9
+
10
+ from fsspec.spec import AbstractFileSystem
11
+ from fsspec.utils import (
12
+ get_package_version_without_import,
13
+ infer_storage_options,
14
+ mirror_from,
15
+ tokenize,
16
+ )
17
+
18
+
19
+ def wrap_exceptions(func):
20
+ @wraps(func)
21
+ def wrapper(*args, **kwargs):
22
+ try:
23
+ return func(*args, **kwargs)
24
+ except OSError as exception:
25
+ if not exception.args:
26
+ raise
27
+
28
+ message, *args = exception.args
29
+ if isinstance(message, str) and "does not exist" in message:
30
+ raise FileNotFoundError(errno.ENOENT, message) from exception
31
+ else:
32
+ raise
33
+
34
+ return wrapper
35
+
36
+
37
+ PYARROW_VERSION = None
38
+
39
+
40
+ class ArrowFSWrapper(AbstractFileSystem):
41
+ """FSSpec-compatible wrapper of pyarrow.fs.FileSystem.
42
+
43
+ Parameters
44
+ ----------
45
+ fs : pyarrow.fs.FileSystem
46
+
47
+ """
48
+
49
+ root_marker = "/"
50
+
51
+ def __init__(self, fs, **kwargs):
52
+ global PYARROW_VERSION
53
+ PYARROW_VERSION = get_package_version_without_import("pyarrow")
54
+ self.fs = fs
55
+ super().__init__(**kwargs)
56
+
57
+ @property
58
+ def protocol(self):
59
+ return self.fs.type_name
60
+
61
+ @cached_property
62
+ def fsid(self):
63
+ return "hdfs_" + tokenize(self.fs.host, self.fs.port)
64
+
65
+ @classmethod
66
+ def _strip_protocol(cls, path):
67
+ ops = infer_storage_options(path)
68
+ path = ops["path"]
69
+ if path.startswith("//"):
70
+ # special case for "hdfs://path" (without the triple slash)
71
+ path = path[1:]
72
+ return path
73
+
74
+ def ls(self, path, detail=False, **kwargs):
75
+ path = self._strip_protocol(path)
76
+ from pyarrow.fs import FileSelector
77
+
78
+ entries = [
79
+ self._make_entry(entry)
80
+ for entry in self.fs.get_file_info(FileSelector(path))
81
+ ]
82
+ if detail:
83
+ return entries
84
+ else:
85
+ return [entry["name"] for entry in entries]
86
+
87
+ def info(self, path, **kwargs):
88
+ path = self._strip_protocol(path)
89
+ [info] = self.fs.get_file_info([path])
90
+ return self._make_entry(info)
91
+
92
+ def exists(self, path):
93
+ path = self._strip_protocol(path)
94
+ try:
95
+ self.info(path)
96
+ except FileNotFoundError:
97
+ return False
98
+ else:
99
+ return True
100
+
101
+ def _make_entry(self, info):
102
+ from pyarrow.fs import FileType
103
+
104
+ if info.type is FileType.Directory:
105
+ kind = "directory"
106
+ elif info.type is FileType.File:
107
+ kind = "file"
108
+ elif info.type is FileType.NotFound:
109
+ raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), info.path)
110
+ else:
111
+ kind = "other"
112
+
113
+ return {
114
+ "name": info.path,
115
+ "size": info.size,
116
+ "type": kind,
117
+ "mtime": info.mtime,
118
+ }
119
+
120
+ @wrap_exceptions
121
+ def cp_file(self, path1, path2, **kwargs):
122
+ path1 = self._strip_protocol(path1).rstrip("/")
123
+ path2 = self._strip_protocol(path2).rstrip("/")
124
+
125
+ with self._open(path1, "rb") as lstream:
126
+ tmp_fname = f"{path2}.tmp.{secrets.token_hex(6)}"
127
+ try:
128
+ with self.open(tmp_fname, "wb") as rstream:
129
+ shutil.copyfileobj(lstream, rstream)
130
+ self.fs.move(tmp_fname, path2)
131
+ except BaseException:
132
+ with suppress(FileNotFoundError):
133
+ self.fs.delete_file(tmp_fname)
134
+ raise
135
+
136
+ @wrap_exceptions
137
+ def mv(self, path1, path2, **kwargs):
138
+ path1 = self._strip_protocol(path1).rstrip("/")
139
+ path2 = self._strip_protocol(path2).rstrip("/")
140
+ self.fs.move(path1, path2)
141
+
142
+ @wrap_exceptions
143
+ def rm_file(self, path):
144
+ path = self._strip_protocol(path)
145
+ self.fs.delete_file(path)
146
+
147
+ @wrap_exceptions
148
+ def rm(self, path, recursive=False, maxdepth=None):
149
+ path = self._strip_protocol(path).rstrip("/")
150
+ if self.isdir(path):
151
+ if recursive:
152
+ self.fs.delete_dir(path)
153
+ else:
154
+ raise ValueError("Can't delete directories without recursive=False")
155
+ else:
156
+ self.fs.delete_file(path)
157
+
158
+ @wrap_exceptions
159
+ def _open(self, path, mode="rb", block_size=None, seekable=True, **kwargs):
160
+ if mode == "rb":
161
+ if seekable:
162
+ method = self.fs.open_input_file
163
+ else:
164
+ method = self.fs.open_input_stream
165
+ elif mode == "wb":
166
+ method = self.fs.open_output_stream
167
+ elif mode == "ab":
168
+ method = self.fs.open_append_stream
169
+ else:
170
+ raise ValueError(f"unsupported mode for Arrow filesystem: {mode!r}")
171
+
172
+ _kwargs = {}
173
+ if mode != "rb" or not seekable:
174
+ if int(PYARROW_VERSION.split(".")[0]) >= 4:
175
+ # disable compression auto-detection
176
+ _kwargs["compression"] = None
177
+ stream = method(path, **_kwargs)
178
+
179
+ return ArrowFile(self, stream, path, mode, block_size, **kwargs)
180
+
181
+ @wrap_exceptions
182
+ def mkdir(self, path, create_parents=True, **kwargs):
183
+ path = self._strip_protocol(path)
184
+ if create_parents:
185
+ self.makedirs(path, exist_ok=True)
186
+ else:
187
+ self.fs.create_dir(path, recursive=False)
188
+
189
+ @wrap_exceptions
190
+ def makedirs(self, path, exist_ok=False):
191
+ path = self._strip_protocol(path)
192
+ self.fs.create_dir(path, recursive=True)
193
+
194
+ @wrap_exceptions
195
+ def rmdir(self, path):
196
+ path = self._strip_protocol(path)
197
+ self.fs.delete_dir(path)
198
+
199
+ @wrap_exceptions
200
+ def modified(self, path):
201
+ path = self._strip_protocol(path)
202
+ return self.fs.get_file_info(path).mtime
203
+
204
+ def cat_file(self, path, start=None, end=None, **kwargs):
205
+ kwargs["seekable"] = start not in [None, 0]
206
+ return super().cat_file(path, start=None, end=None, **kwargs)
207
+
208
+ def get_file(self, rpath, lpath, **kwargs):
209
+ kwargs["seekable"] = False
210
+ super().get_file(rpath, lpath, **kwargs)
211
+
212
+
213
+ @mirror_from(
214
+ "stream",
215
+ [
216
+ "read",
217
+ "seek",
218
+ "tell",
219
+ "write",
220
+ "readable",
221
+ "writable",
222
+ "close",
223
+ "size",
224
+ "seekable",
225
+ ],
226
+ )
227
+ class ArrowFile(io.IOBase):
228
+ def __init__(self, fs, stream, path, mode, block_size=None, **kwargs):
229
+ self.path = path
230
+ self.mode = mode
231
+
232
+ self.fs = fs
233
+ self.stream = stream
234
+
235
+ self.blocksize = self.block_size = block_size
236
+ self.kwargs = kwargs
237
+
238
+ def __enter__(self):
239
+ return self
240
+
241
+ def __exit__(self, *args):
242
+ return self.close()
243
+
244
+
245
+ class HadoopFileSystem(ArrowFSWrapper):
246
+ """A wrapper on top of the pyarrow.fs.HadoopFileSystem
247
+ to connect it's interface with fsspec"""
248
+
249
+ protocol = "hdfs"
250
+
251
+ def __init__(
252
+ self,
253
+ host="default",
254
+ port=0,
255
+ user=None,
256
+ kerb_ticket=None,
257
+ replication=3,
258
+ extra_conf=None,
259
+ **kwargs,
260
+ ):
261
+ """
262
+
263
+ Parameters
264
+ ----------
265
+ host: str
266
+ Hostname, IP or "default" to try to read from Hadoop config
267
+ port: int
268
+ Port to connect on, or default from Hadoop config if 0
269
+ user: str or None
270
+ If given, connect as this username
271
+ kerb_ticket: str or None
272
+ If given, use this ticket for authentication
273
+ replication: int
274
+ set replication factor of file for write operations. default value is 3.
275
+ extra_conf: None or dict
276
+ Passed on to HadoopFileSystem
277
+ """
278
+ from pyarrow.fs import HadoopFileSystem
279
+
280
+ fs = HadoopFileSystem(
281
+ host=host,
282
+ port=port,
283
+ user=user,
284
+ kerb_ticket=kerb_ticket,
285
+ replication=replication,
286
+ extra_conf=extra_conf,
287
+ )
288
+ super().__init__(fs=fs, **kwargs)
289
+
290
+ @staticmethod
291
+ def _get_kwargs_from_urls(path):
292
+ ops = infer_storage_options(path)
293
+ out = {}
294
+ if ops.get("host", None):
295
+ out["host"] = ops["host"]
296
+ if ops.get("username", None):
297
+ out["user"] = ops["username"]
298
+ if ops.get("port", None):
299
+ out["port"] = ops["port"]
300
+ if ops.get("url_query", None):
301
+ queries = parse_qs(ops["url_query"])
302
+ if queries.get("replication", None):
303
+ out["replication"] = int(queries["replication"][0])
304
+ return out
temp_venv/lib/python3.13/site-packages/fsspec/implementations/asyn_wrapper.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import functools
3
+ import inspect
4
+
5
+ from fsspec.asyn import AsyncFileSystem, running_async
6
+
7
+
8
+ def async_wrapper(func, obj=None):
9
+ """
10
+ Wraps a synchronous function to make it awaitable.
11
+
12
+ Parameters
13
+ ----------
14
+ func : callable
15
+ The synchronous function to wrap.
16
+ obj : object, optional
17
+ The instance to bind the function to, if applicable.
18
+
19
+ Returns
20
+ -------
21
+ coroutine
22
+ An awaitable version of the function.
23
+ """
24
+
25
+ @functools.wraps(func)
26
+ async def wrapper(*args, **kwargs):
27
+ return await asyncio.to_thread(func, *args, **kwargs)
28
+
29
+ return wrapper
30
+
31
+
32
+ class AsyncFileSystemWrapper(AsyncFileSystem):
33
+ """
34
+ A wrapper class to convert a synchronous filesystem into an asynchronous one.
35
+
36
+ This class takes an existing synchronous filesystem implementation and wraps all
37
+ its methods to provide an asynchronous interface.
38
+
39
+ Parameters
40
+ ----------
41
+ sync_fs : AbstractFileSystem
42
+ The synchronous filesystem instance to wrap.
43
+ """
44
+
45
+ protocol = "async_wrapper"
46
+ cachable = False
47
+
48
+ def __init__(self, fs, *args, asynchronous=None, **kwargs):
49
+ if asynchronous is None:
50
+ asynchronous = running_async()
51
+ super().__init__(*args, asynchronous=asynchronous, **kwargs)
52
+ self.sync_fs = fs
53
+ self.protocol = self.sync_fs.protocol
54
+ self._wrap_all_sync_methods()
55
+
56
+ @property
57
+ def fsid(self):
58
+ return f"async_{self.sync_fs.fsid}"
59
+
60
+ def _wrap_all_sync_methods(self):
61
+ """
62
+ Wrap all synchronous methods of the underlying filesystem with asynchronous versions.
63
+ """
64
+ excluded_methods = {"open"}
65
+ for method_name in dir(self.sync_fs):
66
+ if method_name.startswith("_") or method_name in excluded_methods:
67
+ continue
68
+
69
+ attr = inspect.getattr_static(self.sync_fs, method_name)
70
+ if isinstance(attr, property):
71
+ continue
72
+
73
+ method = getattr(self.sync_fs, method_name)
74
+ if callable(method) and not asyncio.iscoroutinefunction(method):
75
+ async_method = async_wrapper(method, obj=self)
76
+ setattr(self, f"_{method_name}", async_method)
77
+
78
+ @classmethod
79
+ def wrap_class(cls, sync_fs_class):
80
+ """
81
+ Create a new class that can be used to instantiate an AsyncFileSystemWrapper
82
+ with lazy instantiation of the underlying synchronous filesystem.
83
+
84
+ Parameters
85
+ ----------
86
+ sync_fs_class : type
87
+ The class of the synchronous filesystem to wrap.
88
+
89
+ Returns
90
+ -------
91
+ type
92
+ A new class that wraps the provided synchronous filesystem class.
93
+ """
94
+
95
+ class GeneratedAsyncFileSystemWrapper(cls):
96
+ def __init__(self, *args, **kwargs):
97
+ sync_fs = sync_fs_class(*args, **kwargs)
98
+ super().__init__(sync_fs)
99
+
100
+ GeneratedAsyncFileSystemWrapper.__name__ = (
101
+ f"Async{sync_fs_class.__name__}Wrapper"
102
+ )
103
+ return GeneratedAsyncFileSystemWrapper
temp_venv/lib/python3.13/site-packages/fsspec/implementations/cache_mapper.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import abc
4
+ import hashlib
5
+
6
+ from fsspec.implementations.local import make_path_posix
7
+
8
+
9
+ class AbstractCacheMapper(abc.ABC):
10
+ """Abstract super-class for mappers from remote URLs to local cached
11
+ basenames.
12
+ """
13
+
14
+ @abc.abstractmethod
15
+ def __call__(self, path: str) -> str: ...
16
+
17
+ def __eq__(self, other: object) -> bool:
18
+ # Identity only depends on class. When derived classes have attributes
19
+ # they will need to be included.
20
+ return isinstance(other, type(self))
21
+
22
+ def __hash__(self) -> int:
23
+ # Identity only depends on class. When derived classes have attributes
24
+ # they will need to be included.
25
+ return hash(type(self))
26
+
27
+
28
+ class BasenameCacheMapper(AbstractCacheMapper):
29
+ """Cache mapper that uses the basename of the remote URL and a fixed number
30
+ of directory levels above this.
31
+
32
+ The default is zero directory levels, meaning different paths with the same
33
+ basename will have the same cached basename.
34
+ """
35
+
36
+ def __init__(self, directory_levels: int = 0):
37
+ if directory_levels < 0:
38
+ raise ValueError(
39
+ "BasenameCacheMapper requires zero or positive directory_levels"
40
+ )
41
+ self.directory_levels = directory_levels
42
+
43
+ # Separator for directories when encoded as strings.
44
+ self._separator = "_@_"
45
+
46
+ def __call__(self, path: str) -> str:
47
+ path = make_path_posix(path)
48
+ prefix, *bits = path.rsplit("/", self.directory_levels + 1)
49
+ if bits:
50
+ return self._separator.join(bits)
51
+ else:
52
+ return prefix # No separator found, simple filename
53
+
54
+ def __eq__(self, other: object) -> bool:
55
+ return super().__eq__(other) and self.directory_levels == other.directory_levels
56
+
57
+ def __hash__(self) -> int:
58
+ return super().__hash__() ^ hash(self.directory_levels)
59
+
60
+
61
+ class HashCacheMapper(AbstractCacheMapper):
62
+ """Cache mapper that uses a hash of the remote URL."""
63
+
64
+ def __call__(self, path: str) -> str:
65
+ return hashlib.sha256(path.encode()).hexdigest()
66
+
67
+
68
+ def create_cache_mapper(same_names: bool) -> AbstractCacheMapper:
69
+ """Factory method to create cache mapper for backward compatibility with
70
+ ``CachingFileSystem`` constructor using ``same_names`` kwarg.
71
+ """
72
+ if same_names:
73
+ return BasenameCacheMapper()
74
+ else:
75
+ return HashCacheMapper()
temp_venv/lib/python3.13/site-packages/fsspec/implementations/cache_metadata.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import pickle
5
+ import time
6
+ from typing import TYPE_CHECKING
7
+
8
+ from fsspec.utils import atomic_write
9
+
10
+ try:
11
+ import ujson as json
12
+ except ImportError:
13
+ if not TYPE_CHECKING:
14
+ import json
15
+
16
+ if TYPE_CHECKING:
17
+ from typing import Any, Dict, Iterator, Literal
18
+
19
+ from typing_extensions import TypeAlias
20
+
21
+ from .cached import CachingFileSystem
22
+
23
+ Detail: TypeAlias = Dict[str, Any]
24
+
25
+
26
+ class CacheMetadata:
27
+ """Cache metadata.
28
+
29
+ All reading and writing of cache metadata is performed by this class,
30
+ accessing the cached files and blocks is not.
31
+
32
+ Metadata is stored in a single file per storage directory in JSON format.
33
+ For backward compatibility, also reads metadata stored in pickle format
34
+ which is converted to JSON when next saved.
35
+ """
36
+
37
+ def __init__(self, storage: list[str]):
38
+ """
39
+
40
+ Parameters
41
+ ----------
42
+ storage: list[str]
43
+ Directories containing cached files, must be at least one. Metadata
44
+ is stored in the last of these directories by convention.
45
+ """
46
+ if not storage:
47
+ raise ValueError("CacheMetadata expects at least one storage location")
48
+
49
+ self._storage = storage
50
+ self.cached_files: list[Detail] = [{}]
51
+
52
+ # Private attribute to force saving of metadata in pickle format rather than
53
+ # JSON for use in tests to confirm can read both pickle and JSON formats.
54
+ self._force_save_pickle = False
55
+
56
+ def _load(self, fn: str) -> Detail:
57
+ """Low-level function to load metadata from specific file"""
58
+ try:
59
+ with open(fn, "r") as f:
60
+ loaded = json.load(f)
61
+ except ValueError:
62
+ with open(fn, "rb") as f:
63
+ loaded = pickle.load(f)
64
+ for c in loaded.values():
65
+ if isinstance(c.get("blocks"), list):
66
+ c["blocks"] = set(c["blocks"])
67
+ return loaded
68
+
69
+ def _save(self, metadata_to_save: Detail, fn: str) -> None:
70
+ """Low-level function to save metadata to specific file"""
71
+ if self._force_save_pickle:
72
+ with atomic_write(fn) as f:
73
+ pickle.dump(metadata_to_save, f)
74
+ else:
75
+ with atomic_write(fn, mode="w") as f:
76
+ json.dump(metadata_to_save, f)
77
+
78
+ def _scan_locations(
79
+ self, writable_only: bool = False
80
+ ) -> Iterator[tuple[str, str, bool]]:
81
+ """Yield locations (filenames) where metadata is stored, and whether
82
+ writable or not.
83
+
84
+ Parameters
85
+ ----------
86
+ writable: bool
87
+ Set to True to only yield writable locations.
88
+
89
+ Returns
90
+ -------
91
+ Yields (str, str, bool)
92
+ """
93
+ n = len(self._storage)
94
+ for i, storage in enumerate(self._storage):
95
+ writable = i == n - 1
96
+ if writable_only and not writable:
97
+ continue
98
+ yield os.path.join(storage, "cache"), storage, writable
99
+
100
+ def check_file(
101
+ self, path: str, cfs: CachingFileSystem | None
102
+ ) -> Literal[False] | tuple[Detail, str]:
103
+ """If path is in cache return its details, otherwise return ``False``.
104
+
105
+ If the optional CachingFileSystem is specified then it is used to
106
+ perform extra checks to reject possible matches, such as if they are
107
+ too old.
108
+ """
109
+ for (fn, base, _), cache in zip(self._scan_locations(), self.cached_files):
110
+ if path not in cache:
111
+ continue
112
+ detail = cache[path].copy()
113
+
114
+ if cfs is not None:
115
+ if cfs.check_files and detail["uid"] != cfs.fs.ukey(path):
116
+ # Wrong file as determined by hash of file properties
117
+ continue
118
+ if cfs.expiry and time.time() - detail["time"] > cfs.expiry:
119
+ # Cached file has expired
120
+ continue
121
+
122
+ fn = os.path.join(base, detail["fn"])
123
+ if os.path.exists(fn):
124
+ return detail, fn
125
+ return False
126
+
127
+ def clear_expired(self, expiry_time: int) -> tuple[list[str], bool]:
128
+ """Remove expired metadata from the cache.
129
+
130
+ Returns names of files corresponding to expired metadata and a boolean
131
+ flag indicating whether the writable cache is empty. Caller is
132
+ responsible for deleting the expired files.
133
+ """
134
+ expired_files = []
135
+ for path, detail in self.cached_files[-1].copy().items():
136
+ if time.time() - detail["time"] > expiry_time:
137
+ fn = detail.get("fn", "")
138
+ if not fn:
139
+ raise RuntimeError(
140
+ f"Cache metadata does not contain 'fn' for {path}"
141
+ )
142
+ fn = os.path.join(self._storage[-1], fn)
143
+ expired_files.append(fn)
144
+ self.cached_files[-1].pop(path)
145
+
146
+ if self.cached_files[-1]:
147
+ cache_path = os.path.join(self._storage[-1], "cache")
148
+ self._save(self.cached_files[-1], cache_path)
149
+
150
+ writable_cache_empty = not self.cached_files[-1]
151
+ return expired_files, writable_cache_empty
152
+
153
+ def load(self) -> None:
154
+ """Load all metadata from disk and store in ``self.cached_files``"""
155
+ cached_files = []
156
+ for fn, _, _ in self._scan_locations():
157
+ if os.path.exists(fn):
158
+ # TODO: consolidate blocks here
159
+ cached_files.append(self._load(fn))
160
+ else:
161
+ cached_files.append({})
162
+ self.cached_files = cached_files or [{}]
163
+
164
+ def on_close_cached_file(self, f: Any, path: str) -> None:
165
+ """Perform side-effect actions on closing a cached file.
166
+
167
+ The actual closing of the file is the responsibility of the caller.
168
+ """
169
+ # File must be writeble, so in self.cached_files[-1]
170
+ c = self.cached_files[-1][path]
171
+ if c["blocks"] is not True and len(c["blocks"]) * f.blocksize >= f.size:
172
+ c["blocks"] = True
173
+
174
+ def pop_file(self, path: str) -> str | None:
175
+ """Remove metadata of cached file.
176
+
177
+ If path is in the cache, return the filename of the cached file,
178
+ otherwise return ``None``. Caller is responsible for deleting the
179
+ cached file.
180
+ """
181
+ details = self.check_file(path, None)
182
+ if not details:
183
+ return None
184
+ _, fn = details
185
+ if fn.startswith(self._storage[-1]):
186
+ self.cached_files[-1].pop(path)
187
+ self.save()
188
+ else:
189
+ raise PermissionError(
190
+ "Can only delete cached file in last, writable cache location"
191
+ )
192
+ return fn
193
+
194
+ def save(self) -> None:
195
+ """Save metadata to disk"""
196
+ for (fn, _, writable), cache in zip(self._scan_locations(), self.cached_files):
197
+ if not writable:
198
+ continue
199
+
200
+ if os.path.exists(fn):
201
+ cached_files = self._load(fn)
202
+ for k, c in cached_files.items():
203
+ if k in cache:
204
+ if c["blocks"] is True or cache[k]["blocks"] is True:
205
+ c["blocks"] = True
206
+ else:
207
+ # self.cached_files[*][*]["blocks"] must continue to
208
+ # point to the same set object so that updates
209
+ # performed by MMapCache are propagated back to
210
+ # self.cached_files.
211
+ blocks = cache[k]["blocks"]
212
+ blocks.update(c["blocks"])
213
+ c["blocks"] = blocks
214
+ c["time"] = max(c["time"], cache[k]["time"])
215
+ c["uid"] = cache[k]["uid"]
216
+
217
+ # Files can be added to cache after it was written once
218
+ for k, c in cache.items():
219
+ if k not in cached_files:
220
+ cached_files[k] = c
221
+ else:
222
+ cached_files = cache
223
+ cache = {k: v.copy() for k, v in cached_files.items()}
224
+ for c in cache.values():
225
+ if isinstance(c["blocks"], set):
226
+ c["blocks"] = list(c["blocks"])
227
+ self._save(cache, fn)
228
+ self.cached_files[-1] = cached_files
229
+
230
+ def update_file(self, path: str, detail: Detail) -> None:
231
+ """Update metadata for specific file in memory, do not save"""
232
+ self.cached_files[-1][path] = detail
temp_venv/lib/python3.13/site-packages/fsspec/implementations/cached.py ADDED
@@ -0,0 +1,941 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import inspect
4
+ import logging
5
+ import os
6
+ import tempfile
7
+ import time
8
+ import weakref
9
+ from shutil import rmtree
10
+ from typing import TYPE_CHECKING, Any, Callable, ClassVar
11
+
12
+ from fsspec import AbstractFileSystem, filesystem
13
+ from fsspec.callbacks import DEFAULT_CALLBACK
14
+ from fsspec.compression import compr
15
+ from fsspec.core import BaseCache, MMapCache
16
+ from fsspec.exceptions import BlocksizeMismatchError
17
+ from fsspec.implementations.cache_mapper import create_cache_mapper
18
+ from fsspec.implementations.cache_metadata import CacheMetadata
19
+ from fsspec.spec import AbstractBufferedFile
20
+ from fsspec.transaction import Transaction
21
+ from fsspec.utils import infer_compression
22
+
23
+ if TYPE_CHECKING:
24
+ from fsspec.implementations.cache_mapper import AbstractCacheMapper
25
+
26
+ logger = logging.getLogger("fsspec.cached")
27
+
28
+
29
+ class WriteCachedTransaction(Transaction):
30
+ def complete(self, commit=True):
31
+ rpaths = [f.path for f in self.files]
32
+ lpaths = [f.fn for f in self.files]
33
+ if commit:
34
+ self.fs.put(lpaths, rpaths)
35
+ self.files.clear()
36
+ self.fs._intrans = False
37
+ self.fs._transaction = None
38
+ self.fs = None # break cycle
39
+
40
+
41
+ class CachingFileSystem(AbstractFileSystem):
42
+ """Locally caching filesystem, layer over any other FS
43
+
44
+ This class implements chunk-wise local storage of remote files, for quick
45
+ access after the initial download. The files are stored in a given
46
+ directory with hashes of URLs for the filenames. If no directory is given,
47
+ a temporary one is used, which should be cleaned up by the OS after the
48
+ process ends. The files themselves are sparse (as implemented in
49
+ :class:`~fsspec.caching.MMapCache`), so only the data which is accessed
50
+ takes up space.
51
+
52
+ Restrictions:
53
+
54
+ - the block-size must be the same for each access of a given file, unless
55
+ all blocks of the file have already been read
56
+ - caching can only be applied to file-systems which produce files
57
+ derived from fsspec.spec.AbstractBufferedFile ; LocalFileSystem is also
58
+ allowed, for testing
59
+ """
60
+
61
+ protocol: ClassVar[str | tuple[str, ...]] = ("blockcache", "cached")
62
+
63
+ def __init__(
64
+ self,
65
+ target_protocol=None,
66
+ cache_storage="TMP",
67
+ cache_check=10,
68
+ check_files=False,
69
+ expiry_time=604800,
70
+ target_options=None,
71
+ fs=None,
72
+ same_names: bool | None = None,
73
+ compression=None,
74
+ cache_mapper: AbstractCacheMapper | None = None,
75
+ **kwargs,
76
+ ):
77
+ """
78
+
79
+ Parameters
80
+ ----------
81
+ target_protocol: str (optional)
82
+ Target filesystem protocol. Provide either this or ``fs``.
83
+ cache_storage: str or list(str)
84
+ Location to store files. If "TMP", this is a temporary directory,
85
+ and will be cleaned up by the OS when this process ends (or later).
86
+ If a list, each location will be tried in the order given, but
87
+ only the last will be considered writable.
88
+ cache_check: int
89
+ Number of seconds between reload of cache metadata
90
+ check_files: bool
91
+ Whether to explicitly see if the UID of the remote file matches
92
+ the stored one before using. Warning: some file systems such as
93
+ HTTP cannot reliably give a unique hash of the contents of some
94
+ path, so be sure to set this option to False.
95
+ expiry_time: int
96
+ The time in seconds after which a local copy is considered useless.
97
+ Set to falsy to prevent expiry. The default is equivalent to one
98
+ week.
99
+ target_options: dict or None
100
+ Passed to the instantiation of the FS, if fs is None.
101
+ fs: filesystem instance
102
+ The target filesystem to run against. Provide this or ``protocol``.
103
+ same_names: bool (optional)
104
+ By default, target URLs are hashed using a ``HashCacheMapper`` so
105
+ that files from different backends with the same basename do not
106
+ conflict. If this argument is ``true``, a ``BasenameCacheMapper``
107
+ is used instead. Other cache mapper options are available by using
108
+ the ``cache_mapper`` keyword argument. Only one of this and
109
+ ``cache_mapper`` should be specified.
110
+ compression: str (optional)
111
+ To decompress on download. Can be 'infer' (guess from the URL name),
112
+ one of the entries in ``fsspec.compression.compr``, or None for no
113
+ decompression.
114
+ cache_mapper: AbstractCacheMapper (optional)
115
+ The object use to map from original filenames to cached filenames.
116
+ Only one of this and ``same_names`` should be specified.
117
+ """
118
+ super().__init__(**kwargs)
119
+ if fs is None and target_protocol is None:
120
+ raise ValueError(
121
+ "Please provide filesystem instance(fs) or target_protocol"
122
+ )
123
+ if not (fs is None) ^ (target_protocol is None):
124
+ raise ValueError(
125
+ "Both filesystems (fs) and target_protocol may not be both given."
126
+ )
127
+ if cache_storage == "TMP":
128
+ tempdir = tempfile.mkdtemp()
129
+ storage = [tempdir]
130
+ weakref.finalize(self, self._remove_tempdir, tempdir)
131
+ else:
132
+ if isinstance(cache_storage, str):
133
+ storage = [cache_storage]
134
+ else:
135
+ storage = cache_storage
136
+ os.makedirs(storage[-1], exist_ok=True)
137
+ self.storage = storage
138
+ self.kwargs = target_options or {}
139
+ self.cache_check = cache_check
140
+ self.check_files = check_files
141
+ self.expiry = expiry_time
142
+ self.compression = compression
143
+
144
+ # Size of cache in bytes. If None then the size is unknown and will be
145
+ # recalculated the next time cache_size() is called. On writes to the
146
+ # cache this is reset to None.
147
+ self._cache_size = None
148
+
149
+ if same_names is not None and cache_mapper is not None:
150
+ raise ValueError(
151
+ "Cannot specify both same_names and cache_mapper in "
152
+ "CachingFileSystem.__init__"
153
+ )
154
+ if cache_mapper is not None:
155
+ self._mapper = cache_mapper
156
+ else:
157
+ self._mapper = create_cache_mapper(
158
+ same_names if same_names is not None else False
159
+ )
160
+
161
+ self.target_protocol = (
162
+ target_protocol
163
+ if isinstance(target_protocol, str)
164
+ else (fs.protocol if isinstance(fs.protocol, str) else fs.protocol[0])
165
+ )
166
+ self._metadata = CacheMetadata(self.storage)
167
+ self.load_cache()
168
+ self.fs = fs if fs is not None else filesystem(target_protocol, **self.kwargs)
169
+
170
+ def _strip_protocol(path):
171
+ # acts as a method, since each instance has a difference target
172
+ return self.fs._strip_protocol(type(self)._strip_protocol(path))
173
+
174
+ self._strip_protocol: Callable = _strip_protocol
175
+
176
+ @staticmethod
177
+ def _remove_tempdir(tempdir):
178
+ try:
179
+ rmtree(tempdir)
180
+ except Exception:
181
+ pass
182
+
183
+ def _mkcache(self):
184
+ os.makedirs(self.storage[-1], exist_ok=True)
185
+
186
+ def cache_size(self):
187
+ """Return size of cache in bytes.
188
+
189
+ If more than one cache directory is in use, only the size of the last
190
+ one (the writable cache directory) is returned.
191
+ """
192
+ if self._cache_size is None:
193
+ cache_dir = self.storage[-1]
194
+ self._cache_size = filesystem("file").du(cache_dir, withdirs=True)
195
+ return self._cache_size
196
+
197
+ def load_cache(self):
198
+ """Read set of stored blocks from file"""
199
+ self._metadata.load()
200
+ self._mkcache()
201
+ self.last_cache = time.time()
202
+
203
+ def save_cache(self):
204
+ """Save set of stored blocks from file"""
205
+ self._mkcache()
206
+ self._metadata.save()
207
+ self.last_cache = time.time()
208
+ self._cache_size = None
209
+
210
+ def _check_cache(self):
211
+ """Reload caches if time elapsed or any disappeared"""
212
+ self._mkcache()
213
+ if not self.cache_check:
214
+ # explicitly told not to bother checking
215
+ return
216
+ timecond = time.time() - self.last_cache > self.cache_check
217
+ existcond = all(os.path.exists(storage) for storage in self.storage)
218
+ if timecond or not existcond:
219
+ self.load_cache()
220
+
221
+ def _check_file(self, path):
222
+ """Is path in cache and still valid"""
223
+ path = self._strip_protocol(path)
224
+ self._check_cache()
225
+ return self._metadata.check_file(path, self)
226
+
227
+ def clear_cache(self):
228
+ """Remove all files and metadata from the cache
229
+
230
+ In the case of multiple cache locations, this clears only the last one,
231
+ which is assumed to be the read/write one.
232
+ """
233
+ rmtree(self.storage[-1])
234
+ self.load_cache()
235
+ self._cache_size = None
236
+
237
+ def clear_expired_cache(self, expiry_time=None):
238
+ """Remove all expired files and metadata from the cache
239
+
240
+ In the case of multiple cache locations, this clears only the last one,
241
+ which is assumed to be the read/write one.
242
+
243
+ Parameters
244
+ ----------
245
+ expiry_time: int
246
+ The time in seconds after which a local copy is considered useless.
247
+ If not defined the default is equivalent to the attribute from the
248
+ file caching instantiation.
249
+ """
250
+
251
+ if not expiry_time:
252
+ expiry_time = self.expiry
253
+
254
+ self._check_cache()
255
+
256
+ expired_files, writable_cache_empty = self._metadata.clear_expired(expiry_time)
257
+ for fn in expired_files:
258
+ if os.path.exists(fn):
259
+ os.remove(fn)
260
+
261
+ if writable_cache_empty:
262
+ rmtree(self.storage[-1])
263
+ self.load_cache()
264
+
265
+ self._cache_size = None
266
+
267
+ def pop_from_cache(self, path):
268
+ """Remove cached version of given file
269
+
270
+ Deletes local copy of the given (remote) path. If it is found in a cache
271
+ location which is not the last, it is assumed to be read-only, and
272
+ raises PermissionError
273
+ """
274
+ path = self._strip_protocol(path)
275
+ fn = self._metadata.pop_file(path)
276
+ if fn is not None:
277
+ os.remove(fn)
278
+ self._cache_size = None
279
+
280
+ def _open(
281
+ self,
282
+ path,
283
+ mode="rb",
284
+ block_size=None,
285
+ autocommit=True,
286
+ cache_options=None,
287
+ **kwargs,
288
+ ):
289
+ """Wrap the target _open
290
+
291
+ If the whole file exists in the cache, just open it locally and
292
+ return that.
293
+
294
+ Otherwise, open the file on the target FS, and make it have a mmap
295
+ cache pointing to the location which we determine, in our cache.
296
+ The ``blocks`` instance is shared, so as the mmap cache instance
297
+ updates, so does the entry in our ``cached_files`` attribute.
298
+ We monkey-patch this file, so that when it closes, we call
299
+ ``close_and_update`` to save the state of the blocks.
300
+ """
301
+ path = self._strip_protocol(path)
302
+
303
+ path = self.fs._strip_protocol(path)
304
+ if "r" not in mode:
305
+ return self.fs._open(
306
+ path,
307
+ mode=mode,
308
+ block_size=block_size,
309
+ autocommit=autocommit,
310
+ cache_options=cache_options,
311
+ **kwargs,
312
+ )
313
+ detail = self._check_file(path)
314
+ if detail:
315
+ # file is in cache
316
+ detail, fn = detail
317
+ hash, blocks = detail["fn"], detail["blocks"]
318
+ if blocks is True:
319
+ # stored file is complete
320
+ logger.debug("Opening local copy of %s", path)
321
+ return open(fn, mode)
322
+ # TODO: action where partial file exists in read-only cache
323
+ logger.debug("Opening partially cached copy of %s", path)
324
+ else:
325
+ hash = self._mapper(path)
326
+ fn = os.path.join(self.storage[-1], hash)
327
+ blocks = set()
328
+ detail = {
329
+ "original": path,
330
+ "fn": hash,
331
+ "blocks": blocks,
332
+ "time": time.time(),
333
+ "uid": self.fs.ukey(path),
334
+ }
335
+ self._metadata.update_file(path, detail)
336
+ logger.debug("Creating local sparse file for %s", path)
337
+
338
+ # call target filesystems open
339
+ self._mkcache()
340
+ f = self.fs._open(
341
+ path,
342
+ mode=mode,
343
+ block_size=block_size,
344
+ autocommit=autocommit,
345
+ cache_options=cache_options,
346
+ cache_type="none",
347
+ **kwargs,
348
+ )
349
+ if self.compression:
350
+ comp = (
351
+ infer_compression(path)
352
+ if self.compression == "infer"
353
+ else self.compression
354
+ )
355
+ f = compr[comp](f, mode="rb")
356
+ if "blocksize" in detail:
357
+ if detail["blocksize"] != f.blocksize:
358
+ raise BlocksizeMismatchError(
359
+ f"Cached file must be reopened with same block"
360
+ f" size as original (old: {detail['blocksize']},"
361
+ f" new {f.blocksize})"
362
+ )
363
+ else:
364
+ detail["blocksize"] = f.blocksize
365
+
366
+ def _fetch_ranges(ranges):
367
+ return self.fs.cat_ranges(
368
+ [path] * len(ranges),
369
+ [r[0] for r in ranges],
370
+ [r[1] for r in ranges],
371
+ **kwargs,
372
+ )
373
+
374
+ multi_fetcher = None if self.compression else _fetch_ranges
375
+ f.cache = MMapCache(
376
+ f.blocksize, f._fetch_range, f.size, fn, blocks, multi_fetcher=multi_fetcher
377
+ )
378
+ close = f.close
379
+ f.close = lambda: self.close_and_update(f, close)
380
+ self.save_cache()
381
+ return f
382
+
383
+ def _parent(self, path):
384
+ return self.fs._parent(path)
385
+
386
+ def hash_name(self, path: str, *args: Any) -> str:
387
+ # Kept for backward compatibility with downstream libraries.
388
+ # Ignores extra arguments, previously same_name boolean.
389
+ return self._mapper(path)
390
+
391
+ def close_and_update(self, f, close):
392
+ """Called when a file is closing, so store the set of blocks"""
393
+ if f.closed:
394
+ return
395
+ path = self._strip_protocol(f.path)
396
+ self._metadata.on_close_cached_file(f, path)
397
+ try:
398
+ logger.debug("going to save")
399
+ self.save_cache()
400
+ logger.debug("saved")
401
+ except OSError:
402
+ logger.debug("Cache saving failed while closing file")
403
+ except NameError:
404
+ logger.debug("Cache save failed due to interpreter shutdown")
405
+ close()
406
+ f.closed = True
407
+
408
+ def ls(self, path, detail=True):
409
+ return self.fs.ls(path, detail)
410
+
411
+ def __getattribute__(self, item):
412
+ if item in {
413
+ "load_cache",
414
+ "_open",
415
+ "save_cache",
416
+ "close_and_update",
417
+ "__init__",
418
+ "__getattribute__",
419
+ "__reduce__",
420
+ "_make_local_details",
421
+ "open",
422
+ "cat",
423
+ "cat_file",
424
+ "cat_ranges",
425
+ "get",
426
+ "read_block",
427
+ "tail",
428
+ "head",
429
+ "info",
430
+ "ls",
431
+ "exists",
432
+ "isfile",
433
+ "isdir",
434
+ "_check_file",
435
+ "_check_cache",
436
+ "_mkcache",
437
+ "clear_cache",
438
+ "clear_expired_cache",
439
+ "pop_from_cache",
440
+ "local_file",
441
+ "_paths_from_path",
442
+ "get_mapper",
443
+ "open_many",
444
+ "commit_many",
445
+ "hash_name",
446
+ "__hash__",
447
+ "__eq__",
448
+ "to_json",
449
+ "to_dict",
450
+ "cache_size",
451
+ "pipe_file",
452
+ "pipe",
453
+ "start_transaction",
454
+ "end_transaction",
455
+ }:
456
+ # all the methods defined in this class. Note `open` here, since
457
+ # it calls `_open`, but is actually in superclass
458
+ return lambda *args, **kw: getattr(type(self), item).__get__(self)(
459
+ *args, **kw
460
+ )
461
+ if item in ["__reduce_ex__"]:
462
+ raise AttributeError
463
+ if item in ["transaction"]:
464
+ # property
465
+ return type(self).transaction.__get__(self)
466
+ if item in ["_cache", "transaction_type"]:
467
+ # class attributes
468
+ return getattr(type(self), item)
469
+ if item == "__class__":
470
+ return type(self)
471
+ d = object.__getattribute__(self, "__dict__")
472
+ fs = d.get("fs", None) # fs is not immediately defined
473
+ if item in d:
474
+ return d[item]
475
+ elif fs is not None:
476
+ if item in fs.__dict__:
477
+ # attribute of instance
478
+ return fs.__dict__[item]
479
+ # attributed belonging to the target filesystem
480
+ cls = type(fs)
481
+ m = getattr(cls, item)
482
+ if (inspect.isfunction(m) or inspect.isdatadescriptor(m)) and (
483
+ not hasattr(m, "__self__") or m.__self__ is None
484
+ ):
485
+ # instance method
486
+ return m.__get__(fs, cls)
487
+ return m # class method or attribute
488
+ else:
489
+ # attributes of the superclass, while target is being set up
490
+ return super().__getattribute__(item)
491
+
492
+ def __eq__(self, other):
493
+ """Test for equality."""
494
+ if self is other:
495
+ return True
496
+ if not isinstance(other, type(self)):
497
+ return False
498
+ return (
499
+ self.storage == other.storage
500
+ and self.kwargs == other.kwargs
501
+ and self.cache_check == other.cache_check
502
+ and self.check_files == other.check_files
503
+ and self.expiry == other.expiry
504
+ and self.compression == other.compression
505
+ and self._mapper == other._mapper
506
+ and self.target_protocol == other.target_protocol
507
+ )
508
+
509
+ def __hash__(self):
510
+ """Calculate hash."""
511
+ return (
512
+ hash(tuple(self.storage))
513
+ ^ hash(str(self.kwargs))
514
+ ^ hash(self.cache_check)
515
+ ^ hash(self.check_files)
516
+ ^ hash(self.expiry)
517
+ ^ hash(self.compression)
518
+ ^ hash(self._mapper)
519
+ ^ hash(self.target_protocol)
520
+ )
521
+
522
+
523
+ class WholeFileCacheFileSystem(CachingFileSystem):
524
+ """Caches whole remote files on first access
525
+
526
+ This class is intended as a layer over any other file system, and
527
+ will make a local copy of each file accessed, so that all subsequent
528
+ reads are local. This is similar to ``CachingFileSystem``, but without
529
+ the block-wise functionality and so can work even when sparse files
530
+ are not allowed. See its docstring for definition of the init
531
+ arguments.
532
+
533
+ The class still needs access to the remote store for listing files,
534
+ and may refresh cached files.
535
+ """
536
+
537
+ protocol = "filecache"
538
+ local_file = True
539
+
540
+ def open_many(self, open_files, **kwargs):
541
+ paths = [of.path for of in open_files]
542
+ if "r" in open_files.mode:
543
+ self._mkcache()
544
+ else:
545
+ return [
546
+ LocalTempFile(
547
+ self.fs,
548
+ path,
549
+ mode=open_files.mode,
550
+ fn=os.path.join(self.storage[-1], self._mapper(path)),
551
+ **kwargs,
552
+ )
553
+ for path in paths
554
+ ]
555
+
556
+ if self.compression:
557
+ raise NotImplementedError
558
+ details = [self._check_file(sp) for sp in paths]
559
+ downpath = [p for p, d in zip(paths, details) if not d]
560
+ downfn0 = [
561
+ os.path.join(self.storage[-1], self._mapper(p))
562
+ for p, d in zip(paths, details)
563
+ ] # keep these path names for opening later
564
+ downfn = [fn for fn, d in zip(downfn0, details) if not d]
565
+ if downpath:
566
+ # skip if all files are already cached and up to date
567
+ self.fs.get(downpath, downfn)
568
+
569
+ # update metadata - only happens when downloads are successful
570
+ newdetail = [
571
+ {
572
+ "original": path,
573
+ "fn": self._mapper(path),
574
+ "blocks": True,
575
+ "time": time.time(),
576
+ "uid": self.fs.ukey(path),
577
+ }
578
+ for path in downpath
579
+ ]
580
+ for path, detail in zip(downpath, newdetail):
581
+ self._metadata.update_file(path, detail)
582
+ self.save_cache()
583
+
584
+ def firstpart(fn):
585
+ # helper to adapt both whole-file and simple-cache
586
+ return fn[1] if isinstance(fn, tuple) else fn
587
+
588
+ return [
589
+ open(firstpart(fn0) if fn0 else fn1, mode=open_files.mode)
590
+ for fn0, fn1 in zip(details, downfn0)
591
+ ]
592
+
593
+ def commit_many(self, open_files):
594
+ self.fs.put([f.fn for f in open_files], [f.path for f in open_files])
595
+ [f.close() for f in open_files]
596
+ for f in open_files:
597
+ # in case autocommit is off, and so close did not already delete
598
+ try:
599
+ os.remove(f.name)
600
+ except FileNotFoundError:
601
+ pass
602
+ self._cache_size = None
603
+
604
+ def _make_local_details(self, path):
605
+ hash = self._mapper(path)
606
+ fn = os.path.join(self.storage[-1], hash)
607
+ detail = {
608
+ "original": path,
609
+ "fn": hash,
610
+ "blocks": True,
611
+ "time": time.time(),
612
+ "uid": self.fs.ukey(path),
613
+ }
614
+ self._metadata.update_file(path, detail)
615
+ logger.debug("Copying %s to local cache", path)
616
+ return fn
617
+
618
+ def cat(
619
+ self,
620
+ path,
621
+ recursive=False,
622
+ on_error="raise",
623
+ callback=DEFAULT_CALLBACK,
624
+ **kwargs,
625
+ ):
626
+ paths = self.expand_path(
627
+ path, recursive=recursive, maxdepth=kwargs.get("maxdepth")
628
+ )
629
+ getpaths = []
630
+ storepaths = []
631
+ fns = []
632
+ out = {}
633
+ for p in paths.copy():
634
+ try:
635
+ detail = self._check_file(p)
636
+ if not detail:
637
+ fn = self._make_local_details(p)
638
+ getpaths.append(p)
639
+ storepaths.append(fn)
640
+ else:
641
+ detail, fn = detail if isinstance(detail, tuple) else (None, detail)
642
+ fns.append(fn)
643
+ except Exception as e:
644
+ if on_error == "raise":
645
+ raise
646
+ if on_error == "return":
647
+ out[p] = e
648
+ paths.remove(p)
649
+
650
+ if getpaths:
651
+ self.fs.get(getpaths, storepaths)
652
+ self.save_cache()
653
+
654
+ callback.set_size(len(paths))
655
+ for p, fn in zip(paths, fns):
656
+ with open(fn, "rb") as f:
657
+ out[p] = f.read()
658
+ callback.relative_update(1)
659
+ if isinstance(path, str) and len(paths) == 1 and recursive is False:
660
+ out = out[paths[0]]
661
+ return out
662
+
663
+ def _open(self, path, mode="rb", **kwargs):
664
+ path = self._strip_protocol(path)
665
+ if "r" not in mode:
666
+ hash = self._mapper(path)
667
+ fn = os.path.join(self.storage[-1], hash)
668
+ user_specified_kwargs = {
669
+ k: v
670
+ for k, v in kwargs.items()
671
+ # those kwargs were added by open(), we don't want them
672
+ if k not in ["autocommit", "block_size", "cache_options"]
673
+ }
674
+ return LocalTempFile(self, path, mode=mode, fn=fn, **user_specified_kwargs)
675
+ detail = self._check_file(path)
676
+ if detail:
677
+ detail, fn = detail
678
+ _, blocks = detail["fn"], detail["blocks"]
679
+ if blocks is True:
680
+ logger.debug("Opening local copy of %s", path)
681
+
682
+ # In order to support downstream filesystems to be able to
683
+ # infer the compression from the original filename, like
684
+ # the `TarFileSystem`, let's extend the `io.BufferedReader`
685
+ # fileobject protocol by adding a dedicated attribute
686
+ # `original`.
687
+ f = open(fn, mode)
688
+ f.original = detail.get("original")
689
+ return f
690
+ else:
691
+ raise ValueError(
692
+ f"Attempt to open partially cached file {path}"
693
+ f" as a wholly cached file"
694
+ )
695
+ else:
696
+ fn = self._make_local_details(path)
697
+ kwargs["mode"] = mode
698
+
699
+ # call target filesystems open
700
+ self._mkcache()
701
+ if self.compression:
702
+ with self.fs._open(path, **kwargs) as f, open(fn, "wb") as f2:
703
+ if isinstance(f, AbstractBufferedFile):
704
+ # want no type of caching if just downloading whole thing
705
+ f.cache = BaseCache(0, f.cache.fetcher, f.size)
706
+ comp = (
707
+ infer_compression(path)
708
+ if self.compression == "infer"
709
+ else self.compression
710
+ )
711
+ f = compr[comp](f, mode="rb")
712
+ data = True
713
+ while data:
714
+ block = getattr(f, "blocksize", 5 * 2**20)
715
+ data = f.read(block)
716
+ f2.write(data)
717
+ else:
718
+ self.fs.get_file(path, fn)
719
+ self.save_cache()
720
+ return self._open(path, mode)
721
+
722
+
723
+ class SimpleCacheFileSystem(WholeFileCacheFileSystem):
724
+ """Caches whole remote files on first access
725
+
726
+ This class is intended as a layer over any other file system, and
727
+ will make a local copy of each file accessed, so that all subsequent
728
+ reads are local. This implementation only copies whole files, and
729
+ does not keep any metadata about the download time or file details.
730
+ It is therefore safer to use in multi-threaded/concurrent situations.
731
+
732
+ This is the only of the caching filesystems that supports write: you will
733
+ be given a real local open file, and upon close and commit, it will be
734
+ uploaded to the target filesystem; the writability or the target URL is
735
+ not checked until that time.
736
+
737
+ """
738
+
739
+ protocol = "simplecache"
740
+ local_file = True
741
+ transaction_type = WriteCachedTransaction
742
+
743
+ def __init__(self, **kwargs):
744
+ kw = kwargs.copy()
745
+ for key in ["cache_check", "expiry_time", "check_files"]:
746
+ kw[key] = False
747
+ super().__init__(**kw)
748
+ for storage in self.storage:
749
+ if not os.path.exists(storage):
750
+ os.makedirs(storage, exist_ok=True)
751
+
752
+ def _check_file(self, path):
753
+ self._check_cache()
754
+ sha = self._mapper(path)
755
+ for storage in self.storage:
756
+ fn = os.path.join(storage, sha)
757
+ if os.path.exists(fn):
758
+ return fn
759
+
760
+ def save_cache(self):
761
+ pass
762
+
763
+ def load_cache(self):
764
+ pass
765
+
766
+ def pipe_file(self, path, value=None, **kwargs):
767
+ if self._intrans:
768
+ with self.open(path, "wb") as f:
769
+ f.write(value)
770
+ else:
771
+ super().pipe_file(path, value)
772
+
773
+ def ls(self, path, detail=True, **kwargs):
774
+ path = self._strip_protocol(path)
775
+ details = []
776
+ try:
777
+ details = self.fs.ls(
778
+ path, detail=True, **kwargs
779
+ ).copy() # don't edit original!
780
+ except FileNotFoundError as e:
781
+ ex = e
782
+ else:
783
+ ex = None
784
+ if self._intrans:
785
+ path1 = path.rstrip("/") + "/"
786
+ for f in self.transaction.files:
787
+ if f.path == path:
788
+ details.append(
789
+ {"name": path, "size": f.size or f.tell(), "type": "file"}
790
+ )
791
+ elif f.path.startswith(path1):
792
+ if f.path.count("/") == path1.count("/"):
793
+ details.append(
794
+ {"name": f.path, "size": f.size or f.tell(), "type": "file"}
795
+ )
796
+ else:
797
+ dname = "/".join(f.path.split("/")[: path1.count("/") + 1])
798
+ details.append({"name": dname, "size": 0, "type": "directory"})
799
+ if ex is not None and not details:
800
+ raise ex
801
+ if detail:
802
+ return details
803
+ return sorted(_["name"] for _ in details)
804
+
805
+ def info(self, path, **kwargs):
806
+ path = self._strip_protocol(path)
807
+ if self._intrans:
808
+ f = [_ for _ in self.transaction.files if _.path == path]
809
+ if f:
810
+ size = os.path.getsize(f[0].fn) if f[0].closed else f[0].tell()
811
+ return {"name": path, "size": size, "type": "file"}
812
+ f = any(_.path.startswith(path + "/") for _ in self.transaction.files)
813
+ if f:
814
+ return {"name": path, "size": 0, "type": "directory"}
815
+ return self.fs.info(path, **kwargs)
816
+
817
+ def pipe(self, path, value=None, **kwargs):
818
+ if isinstance(path, str):
819
+ self.pipe_file(self._strip_protocol(path), value, **kwargs)
820
+ elif isinstance(path, dict):
821
+ for k, v in path.items():
822
+ self.pipe_file(self._strip_protocol(k), v, **kwargs)
823
+ else:
824
+ raise ValueError("path must be str or dict")
825
+
826
+ def cat_ranges(
827
+ self, paths, starts, ends, max_gap=None, on_error="return", **kwargs
828
+ ):
829
+ lpaths = [self._check_file(p) for p in paths]
830
+ rpaths = [p for l, p in zip(lpaths, paths) if l is False]
831
+ lpaths = [l for l, p in zip(lpaths, paths) if l is False]
832
+ self.fs.get(rpaths, lpaths)
833
+ return super().cat_ranges(
834
+ paths, starts, ends, max_gap=max_gap, on_error=on_error, **kwargs
835
+ )
836
+
837
+ def _open(self, path, mode="rb", **kwargs):
838
+ path = self._strip_protocol(path)
839
+ sha = self._mapper(path)
840
+
841
+ if "r" not in mode:
842
+ fn = os.path.join(self.storage[-1], sha)
843
+ user_specified_kwargs = {
844
+ k: v
845
+ for k, v in kwargs.items()
846
+ if k not in ["autocommit", "block_size", "cache_options"]
847
+ } # those were added by open()
848
+ return LocalTempFile(
849
+ self,
850
+ path,
851
+ mode=mode,
852
+ autocommit=not self._intrans,
853
+ fn=fn,
854
+ **user_specified_kwargs,
855
+ )
856
+ fn = self._check_file(path)
857
+ if fn:
858
+ return open(fn, mode)
859
+
860
+ fn = os.path.join(self.storage[-1], sha)
861
+ logger.debug("Copying %s to local cache", path)
862
+ kwargs["mode"] = mode
863
+
864
+ self._mkcache()
865
+ self._cache_size = None
866
+ if self.compression:
867
+ with self.fs._open(path, **kwargs) as f, open(fn, "wb") as f2:
868
+ if isinstance(f, AbstractBufferedFile):
869
+ # want no type of caching if just downloading whole thing
870
+ f.cache = BaseCache(0, f.cache.fetcher, f.size)
871
+ comp = (
872
+ infer_compression(path)
873
+ if self.compression == "infer"
874
+ else self.compression
875
+ )
876
+ f = compr[comp](f, mode="rb")
877
+ data = True
878
+ while data:
879
+ block = getattr(f, "blocksize", 5 * 2**20)
880
+ data = f.read(block)
881
+ f2.write(data)
882
+ else:
883
+ self.fs.get_file(path, fn)
884
+ return self._open(path, mode)
885
+
886
+
887
+ class LocalTempFile:
888
+ """A temporary local file, which will be uploaded on commit"""
889
+
890
+ def __init__(self, fs, path, fn, mode="wb", autocommit=True, seek=0, **kwargs):
891
+ self.fn = fn
892
+ self.fh = open(fn, mode)
893
+ self.mode = mode
894
+ if seek:
895
+ self.fh.seek(seek)
896
+ self.path = path
897
+ self.size = None
898
+ self.fs = fs
899
+ self.closed = False
900
+ self.autocommit = autocommit
901
+ self.kwargs = kwargs
902
+
903
+ def __reduce__(self):
904
+ # always open in r+b to allow continuing writing at a location
905
+ return (
906
+ LocalTempFile,
907
+ (self.fs, self.path, self.fn, "r+b", self.autocommit, self.tell()),
908
+ )
909
+
910
+ def __enter__(self):
911
+ return self.fh
912
+
913
+ def __exit__(self, exc_type, exc_val, exc_tb):
914
+ self.close()
915
+
916
+ def close(self):
917
+ # self.size = self.fh.tell()
918
+ if self.closed:
919
+ return
920
+ self.fh.close()
921
+ self.closed = True
922
+ if self.autocommit:
923
+ self.commit()
924
+
925
+ def discard(self):
926
+ self.fh.close()
927
+ os.remove(self.fn)
928
+
929
+ def commit(self):
930
+ self.fs.put(self.fn, self.path, **self.kwargs)
931
+ # we do not delete local copy - it's still in the cache
932
+
933
+ @property
934
+ def name(self):
935
+ return self.fn
936
+
937
+ def __repr__(self) -> str:
938
+ return f"LocalTempFile: {self.path}"
939
+
940
+ def __getattr__(self, item):
941
+ return getattr(self.fh, item)
temp_venv/lib/python3.13/site-packages/fsspec/implementations/dask.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import dask
2
+ from distributed.client import Client, _get_global_client
3
+ from distributed.worker import Worker
4
+
5
+ from fsspec import filesystem
6
+ from fsspec.spec import AbstractBufferedFile, AbstractFileSystem
7
+ from fsspec.utils import infer_storage_options
8
+
9
+
10
+ def _get_client(client):
11
+ if client is None:
12
+ return _get_global_client()
13
+ elif isinstance(client, Client):
14
+ return client
15
+ else:
16
+ # e.g., connection string
17
+ return Client(client)
18
+
19
+
20
+ def _in_worker():
21
+ return bool(Worker._instances)
22
+
23
+
24
+ class DaskWorkerFileSystem(AbstractFileSystem):
25
+ """View files accessible to a worker as any other remote file-system
26
+
27
+ When instances are run on the worker, uses the real filesystem. When
28
+ run on the client, they call the worker to provide information or data.
29
+
30
+ **Warning** this implementation is experimental, and read-only for now.
31
+ """
32
+
33
+ def __init__(
34
+ self, target_protocol=None, target_options=None, fs=None, client=None, **kwargs
35
+ ):
36
+ super().__init__(**kwargs)
37
+ if not (fs is None) ^ (target_protocol is None):
38
+ raise ValueError(
39
+ "Please provide one of filesystem instance (fs) or"
40
+ " target_protocol, not both"
41
+ )
42
+ self.target_protocol = target_protocol
43
+ self.target_options = target_options
44
+ self.worker = None
45
+ self.client = client
46
+ self.fs = fs
47
+ self._determine_worker()
48
+
49
+ @staticmethod
50
+ def _get_kwargs_from_urls(path):
51
+ so = infer_storage_options(path)
52
+ if "host" in so and "port" in so:
53
+ return {"client": f"{so['host']}:{so['port']}"}
54
+ else:
55
+ return {}
56
+
57
+ def _determine_worker(self):
58
+ if _in_worker():
59
+ self.worker = True
60
+ if self.fs is None:
61
+ self.fs = filesystem(
62
+ self.target_protocol, **(self.target_options or {})
63
+ )
64
+ else:
65
+ self.worker = False
66
+ self.client = _get_client(self.client)
67
+ self.rfs = dask.delayed(self)
68
+
69
+ def mkdir(self, *args, **kwargs):
70
+ if self.worker:
71
+ self.fs.mkdir(*args, **kwargs)
72
+ else:
73
+ self.rfs.mkdir(*args, **kwargs).compute()
74
+
75
+ def rm(self, *args, **kwargs):
76
+ if self.worker:
77
+ self.fs.rm(*args, **kwargs)
78
+ else:
79
+ self.rfs.rm(*args, **kwargs).compute()
80
+
81
+ def copy(self, *args, **kwargs):
82
+ if self.worker:
83
+ self.fs.copy(*args, **kwargs)
84
+ else:
85
+ self.rfs.copy(*args, **kwargs).compute()
86
+
87
+ def mv(self, *args, **kwargs):
88
+ if self.worker:
89
+ self.fs.mv(*args, **kwargs)
90
+ else:
91
+ self.rfs.mv(*args, **kwargs).compute()
92
+
93
+ def ls(self, *args, **kwargs):
94
+ if self.worker:
95
+ return self.fs.ls(*args, **kwargs)
96
+ else:
97
+ return self.rfs.ls(*args, **kwargs).compute()
98
+
99
+ def _open(
100
+ self,
101
+ path,
102
+ mode="rb",
103
+ block_size=None,
104
+ autocommit=True,
105
+ cache_options=None,
106
+ **kwargs,
107
+ ):
108
+ if self.worker:
109
+ return self.fs._open(
110
+ path,
111
+ mode=mode,
112
+ block_size=block_size,
113
+ autocommit=autocommit,
114
+ cache_options=cache_options,
115
+ **kwargs,
116
+ )
117
+ else:
118
+ return DaskFile(
119
+ fs=self,
120
+ path=path,
121
+ mode=mode,
122
+ block_size=block_size,
123
+ autocommit=autocommit,
124
+ cache_options=cache_options,
125
+ **kwargs,
126
+ )
127
+
128
+ def fetch_range(self, path, mode, start, end):
129
+ if self.worker:
130
+ with self._open(path, mode) as f:
131
+ f.seek(start)
132
+ return f.read(end - start)
133
+ else:
134
+ return self.rfs.fetch_range(path, mode, start, end).compute()
135
+
136
+
137
+ class DaskFile(AbstractBufferedFile):
138
+ def __init__(self, mode="rb", **kwargs):
139
+ if mode != "rb":
140
+ raise ValueError('Remote dask files can only be opened in "rb" mode')
141
+ super().__init__(**kwargs)
142
+
143
+ def _upload_chunk(self, final=False):
144
+ pass
145
+
146
+ def _initiate_upload(self):
147
+ """Create remote file/upload"""
148
+ pass
149
+
150
+ def _fetch_range(self, start, end):
151
+ """Get the specified set of bytes from remote"""
152
+ return self.fs.fetch_range(self.path, self.mode, start, end)
temp_venv/lib/python3.13/site-packages/fsspec/implementations/data.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import io
3
+ from typing import Optional
4
+ from urllib.parse import unquote
5
+
6
+ from fsspec import AbstractFileSystem
7
+
8
+
9
+ class DataFileSystem(AbstractFileSystem):
10
+ """A handy decoder for data-URLs
11
+
12
+ Example
13
+ -------
14
+ >>> with fsspec.open("data:,Hello%2C%20World%21") as f:
15
+ ... print(f.read())
16
+ b"Hello, World!"
17
+
18
+ See https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs
19
+ """
20
+
21
+ protocol = "data"
22
+
23
+ def __init__(self, **kwargs):
24
+ """No parameters for this filesystem"""
25
+ super().__init__(**kwargs)
26
+
27
+ def cat_file(self, path, start=None, end=None, **kwargs):
28
+ pref, data = path.split(",", 1)
29
+ if pref.endswith("base64"):
30
+ return base64.b64decode(data)[start:end]
31
+ return unquote(data).encode()[start:end]
32
+
33
+ def info(self, path, **kwargs):
34
+ pref, name = path.split(",", 1)
35
+ data = self.cat_file(path)
36
+ mime = pref.split(":", 1)[1].split(";", 1)[0]
37
+ return {"name": name, "size": len(data), "type": "file", "mimetype": mime}
38
+
39
+ def _open(
40
+ self,
41
+ path,
42
+ mode="rb",
43
+ block_size=None,
44
+ autocommit=True,
45
+ cache_options=None,
46
+ **kwargs,
47
+ ):
48
+ if "r" not in mode:
49
+ raise ValueError("Read only filesystem")
50
+ return io.BytesIO(self.cat_file(path))
51
+
52
+ @staticmethod
53
+ def encode(data: bytes, mime: Optional[str] = None):
54
+ """Format the given data into data-URL syntax
55
+
56
+ This version always base64 encodes, even when the data is ascii/url-safe.
57
+ """
58
+ return f"data:{mime or ''};base64,{base64.b64encode(data).decode()}"
temp_venv/lib/python3.13/site-packages/fsspec/implementations/dbfs.py ADDED
@@ -0,0 +1,467 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import urllib
3
+
4
+ import requests
5
+ import requests.exceptions
6
+ from requests.adapters import HTTPAdapter, Retry
7
+
8
+ from fsspec import AbstractFileSystem
9
+ from fsspec.spec import AbstractBufferedFile
10
+
11
+
12
+ class DatabricksException(Exception):
13
+ """
14
+ Helper class for exceptions raised in this module.
15
+ """
16
+
17
+ def __init__(self, error_code, message):
18
+ """Create a new DatabricksException"""
19
+ super().__init__(message)
20
+
21
+ self.error_code = error_code
22
+ self.message = message
23
+
24
+
25
+ class DatabricksFileSystem(AbstractFileSystem):
26
+ """
27
+ Get access to the Databricks filesystem implementation over HTTP.
28
+ Can be used inside and outside of a databricks cluster.
29
+ """
30
+
31
+ def __init__(self, instance, token, **kwargs):
32
+ """
33
+ Create a new DatabricksFileSystem.
34
+
35
+ Parameters
36
+ ----------
37
+ instance: str
38
+ The instance URL of the databricks cluster.
39
+ For example for an Azure databricks cluster, this
40
+ has the form adb-<some-number>.<two digits>.azuredatabricks.net.
41
+ token: str
42
+ Your personal token. Find out more
43
+ here: https://docs.databricks.com/dev-tools/api/latest/authentication.html
44
+ """
45
+ self.instance = instance
46
+ self.token = token
47
+ self.session = requests.Session()
48
+ self.retries = Retry(
49
+ total=10,
50
+ backoff_factor=0.05,
51
+ status_forcelist=[408, 429, 500, 502, 503, 504],
52
+ )
53
+
54
+ self.session.mount("https://", HTTPAdapter(max_retries=self.retries))
55
+ self.session.headers.update({"Authorization": f"Bearer {self.token}"})
56
+
57
+ super().__init__(**kwargs)
58
+
59
+ def ls(self, path, detail=True, **kwargs):
60
+ """
61
+ List the contents of the given path.
62
+
63
+ Parameters
64
+ ----------
65
+ path: str
66
+ Absolute path
67
+ detail: bool
68
+ Return not only the list of filenames,
69
+ but also additional information on file sizes
70
+ and types.
71
+ """
72
+ out = self._ls_from_cache(path)
73
+ if not out:
74
+ try:
75
+ r = self._send_to_api(
76
+ method="get", endpoint="list", json={"path": path}
77
+ )
78
+ except DatabricksException as e:
79
+ if e.error_code == "RESOURCE_DOES_NOT_EXIST":
80
+ raise FileNotFoundError(e.message) from e
81
+
82
+ raise
83
+ files = r["files"]
84
+ out = [
85
+ {
86
+ "name": o["path"],
87
+ "type": "directory" if o["is_dir"] else "file",
88
+ "size": o["file_size"],
89
+ }
90
+ for o in files
91
+ ]
92
+ self.dircache[path] = out
93
+
94
+ if detail:
95
+ return out
96
+ return [o["name"] for o in out]
97
+
98
+ def makedirs(self, path, exist_ok=True):
99
+ """
100
+ Create a given absolute path and all of its parents.
101
+
102
+ Parameters
103
+ ----------
104
+ path: str
105
+ Absolute path to create
106
+ exist_ok: bool
107
+ If false, checks if the folder
108
+ exists before creating it (and raises an
109
+ Exception if this is the case)
110
+ """
111
+ if not exist_ok:
112
+ try:
113
+ # If the following succeeds, the path is already present
114
+ self._send_to_api(
115
+ method="get", endpoint="get-status", json={"path": path}
116
+ )
117
+ raise FileExistsError(f"Path {path} already exists")
118
+ except DatabricksException as e:
119
+ if e.error_code == "RESOURCE_DOES_NOT_EXIST":
120
+ pass
121
+
122
+ try:
123
+ self._send_to_api(method="post", endpoint="mkdirs", json={"path": path})
124
+ except DatabricksException as e:
125
+ if e.error_code == "RESOURCE_ALREADY_EXISTS":
126
+ raise FileExistsError(e.message) from e
127
+
128
+ raise
129
+ self.invalidate_cache(self._parent(path))
130
+
131
+ def mkdir(self, path, create_parents=True, **kwargs):
132
+ """
133
+ Create a given absolute path and all of its parents.
134
+
135
+ Parameters
136
+ ----------
137
+ path: str
138
+ Absolute path to create
139
+ create_parents: bool
140
+ Whether to create all parents or not.
141
+ "False" is not implemented so far.
142
+ """
143
+ if not create_parents:
144
+ raise NotImplementedError
145
+
146
+ self.mkdirs(path, **kwargs)
147
+
148
+ def rm(self, path, recursive=False, **kwargs):
149
+ """
150
+ Remove the file or folder at the given absolute path.
151
+
152
+ Parameters
153
+ ----------
154
+ path: str
155
+ Absolute path what to remove
156
+ recursive: bool
157
+ Recursively delete all files in a folder.
158
+ """
159
+ try:
160
+ self._send_to_api(
161
+ method="post",
162
+ endpoint="delete",
163
+ json={"path": path, "recursive": recursive},
164
+ )
165
+ except DatabricksException as e:
166
+ # This is not really an exception, it just means
167
+ # not everything was deleted so far
168
+ if e.error_code == "PARTIAL_DELETE":
169
+ self.rm(path=path, recursive=recursive)
170
+ elif e.error_code == "IO_ERROR":
171
+ # Using the same exception as the os module would use here
172
+ raise OSError(e.message) from e
173
+
174
+ raise
175
+ self.invalidate_cache(self._parent(path))
176
+
177
+ def mv(
178
+ self, source_path, destination_path, recursive=False, maxdepth=None, **kwargs
179
+ ):
180
+ """
181
+ Move a source to a destination path.
182
+
183
+ A note from the original [databricks API manual]
184
+ (https://docs.databricks.com/dev-tools/api/latest/dbfs.html#move).
185
+
186
+ When moving a large number of files the API call will time out after
187
+ approximately 60s, potentially resulting in partially moved data.
188
+ Therefore, for operations that move more than 10k files, we strongly
189
+ discourage using the DBFS REST API.
190
+
191
+ Parameters
192
+ ----------
193
+ source_path: str
194
+ From where to move (absolute path)
195
+ destination_path: str
196
+ To where to move (absolute path)
197
+ recursive: bool
198
+ Not implemented to far.
199
+ maxdepth:
200
+ Not implemented to far.
201
+ """
202
+ if recursive:
203
+ raise NotImplementedError
204
+ if maxdepth:
205
+ raise NotImplementedError
206
+
207
+ try:
208
+ self._send_to_api(
209
+ method="post",
210
+ endpoint="move",
211
+ json={"source_path": source_path, "destination_path": destination_path},
212
+ )
213
+ except DatabricksException as e:
214
+ if e.error_code == "RESOURCE_DOES_NOT_EXIST":
215
+ raise FileNotFoundError(e.message) from e
216
+ elif e.error_code == "RESOURCE_ALREADY_EXISTS":
217
+ raise FileExistsError(e.message) from e
218
+
219
+ raise
220
+ self.invalidate_cache(self._parent(source_path))
221
+ self.invalidate_cache(self._parent(destination_path))
222
+
223
+ def _open(self, path, mode="rb", block_size="default", **kwargs):
224
+ """
225
+ Overwrite the base class method to make sure to create a DBFile.
226
+ All arguments are copied from the base method.
227
+
228
+ Only the default blocksize is allowed.
229
+ """
230
+ return DatabricksFile(self, path, mode=mode, block_size=block_size, **kwargs)
231
+
232
+ def _send_to_api(self, method, endpoint, json):
233
+ """
234
+ Send the given json to the DBFS API
235
+ using a get or post request (specified by the argument `method`).
236
+
237
+ Parameters
238
+ ----------
239
+ method: str
240
+ Which http method to use for communication; "get" or "post".
241
+ endpoint: str
242
+ Where to send the request to (last part of the API URL)
243
+ json: dict
244
+ Dictionary of information to send
245
+ """
246
+ if method == "post":
247
+ session_call = self.session.post
248
+ elif method == "get":
249
+ session_call = self.session.get
250
+ else:
251
+ raise ValueError(f"Do not understand method {method}")
252
+
253
+ url = urllib.parse.urljoin(f"https://{self.instance}/api/2.0/dbfs/", endpoint)
254
+
255
+ r = session_call(url, json=json)
256
+
257
+ # The DBFS API will return a json, also in case of an exception.
258
+ # We want to preserve this information as good as possible.
259
+ try:
260
+ r.raise_for_status()
261
+ except requests.HTTPError as e:
262
+ # try to extract json error message
263
+ # if that fails, fall back to the original exception
264
+ try:
265
+ exception_json = e.response.json()
266
+ except Exception:
267
+ raise e from None
268
+
269
+ raise DatabricksException(**exception_json) from e
270
+
271
+ return r.json()
272
+
273
+ def _create_handle(self, path, overwrite=True):
274
+ """
275
+ Internal function to create a handle, which can be used to
276
+ write blocks of a file to DBFS.
277
+ A handle has a unique identifier which needs to be passed
278
+ whenever written during this transaction.
279
+ The handle is active for 10 minutes - after that a new
280
+ write transaction needs to be created.
281
+ Make sure to close the handle after you are finished.
282
+
283
+ Parameters
284
+ ----------
285
+ path: str
286
+ Absolute path for this file.
287
+ overwrite: bool
288
+ If a file already exist at this location, either overwrite
289
+ it or raise an exception.
290
+ """
291
+ try:
292
+ r = self._send_to_api(
293
+ method="post",
294
+ endpoint="create",
295
+ json={"path": path, "overwrite": overwrite},
296
+ )
297
+ return r["handle"]
298
+ except DatabricksException as e:
299
+ if e.error_code == "RESOURCE_ALREADY_EXISTS":
300
+ raise FileExistsError(e.message) from e
301
+
302
+ raise
303
+
304
+ def _close_handle(self, handle):
305
+ """
306
+ Close a handle, which was opened by :func:`_create_handle`.
307
+
308
+ Parameters
309
+ ----------
310
+ handle: str
311
+ Which handle to close.
312
+ """
313
+ try:
314
+ self._send_to_api(method="post", endpoint="close", json={"handle": handle})
315
+ except DatabricksException as e:
316
+ if e.error_code == "RESOURCE_DOES_NOT_EXIST":
317
+ raise FileNotFoundError(e.message) from e
318
+
319
+ raise
320
+
321
+ def _add_data(self, handle, data):
322
+ """
323
+ Upload data to an already opened file handle
324
+ (opened by :func:`_create_handle`).
325
+ The maximal allowed data size is 1MB after
326
+ conversion to base64.
327
+ Remember to close the handle when you are finished.
328
+
329
+ Parameters
330
+ ----------
331
+ handle: str
332
+ Which handle to upload data to.
333
+ data: bytes
334
+ Block of data to add to the handle.
335
+ """
336
+ data = base64.b64encode(data).decode()
337
+ try:
338
+ self._send_to_api(
339
+ method="post",
340
+ endpoint="add-block",
341
+ json={"handle": handle, "data": data},
342
+ )
343
+ except DatabricksException as e:
344
+ if e.error_code == "RESOURCE_DOES_NOT_EXIST":
345
+ raise FileNotFoundError(e.message) from e
346
+ elif e.error_code == "MAX_BLOCK_SIZE_EXCEEDED":
347
+ raise ValueError(e.message) from e
348
+
349
+ raise
350
+
351
+ def _get_data(self, path, start, end):
352
+ """
353
+ Download data in bytes from a given absolute path in a block
354
+ from [start, start+length].
355
+ The maximum number of allowed bytes to read is 1MB.
356
+
357
+ Parameters
358
+ ----------
359
+ path: str
360
+ Absolute path to download data from
361
+ start: int
362
+ Start position of the block
363
+ end: int
364
+ End position of the block
365
+ """
366
+ try:
367
+ r = self._send_to_api(
368
+ method="get",
369
+ endpoint="read",
370
+ json={"path": path, "offset": start, "length": end - start},
371
+ )
372
+ return base64.b64decode(r["data"])
373
+ except DatabricksException as e:
374
+ if e.error_code == "RESOURCE_DOES_NOT_EXIST":
375
+ raise FileNotFoundError(e.message) from e
376
+ elif e.error_code in ["INVALID_PARAMETER_VALUE", "MAX_READ_SIZE_EXCEEDED"]:
377
+ raise ValueError(e.message) from e
378
+
379
+ raise
380
+
381
+ def invalidate_cache(self, path=None):
382
+ if path is None:
383
+ self.dircache.clear()
384
+ else:
385
+ self.dircache.pop(path, None)
386
+ super().invalidate_cache(path)
387
+
388
+
389
+ class DatabricksFile(AbstractBufferedFile):
390
+ """
391
+ Helper class for files referenced in the DatabricksFileSystem.
392
+ """
393
+
394
+ DEFAULT_BLOCK_SIZE = 1 * 2**20 # only allowed block size
395
+
396
+ def __init__(
397
+ self,
398
+ fs,
399
+ path,
400
+ mode="rb",
401
+ block_size="default",
402
+ autocommit=True,
403
+ cache_type="readahead",
404
+ cache_options=None,
405
+ **kwargs,
406
+ ):
407
+ """
408
+ Create a new instance of the DatabricksFile.
409
+
410
+ The blocksize needs to be the default one.
411
+ """
412
+ if block_size is None or block_size == "default":
413
+ block_size = self.DEFAULT_BLOCK_SIZE
414
+
415
+ assert block_size == self.DEFAULT_BLOCK_SIZE, (
416
+ f"Only the default block size is allowed, not {block_size}"
417
+ )
418
+
419
+ super().__init__(
420
+ fs,
421
+ path,
422
+ mode=mode,
423
+ block_size=block_size,
424
+ autocommit=autocommit,
425
+ cache_type=cache_type,
426
+ cache_options=cache_options or {},
427
+ **kwargs,
428
+ )
429
+
430
+ def _initiate_upload(self):
431
+ """Internal function to start a file upload"""
432
+ self.handle = self.fs._create_handle(self.path)
433
+
434
+ def _upload_chunk(self, final=False):
435
+ """Internal function to add a chunk of data to a started upload"""
436
+ self.buffer.seek(0)
437
+ data = self.buffer.getvalue()
438
+
439
+ data_chunks = [
440
+ data[start:end] for start, end in self._to_sized_blocks(len(data))
441
+ ]
442
+
443
+ for data_chunk in data_chunks:
444
+ self.fs._add_data(handle=self.handle, data=data_chunk)
445
+
446
+ if final:
447
+ self.fs._close_handle(handle=self.handle)
448
+ return True
449
+
450
+ def _fetch_range(self, start, end):
451
+ """Internal function to download a block of data"""
452
+ return_buffer = b""
453
+ length = end - start
454
+ for chunk_start, chunk_end in self._to_sized_blocks(length, start):
455
+ return_buffer += self.fs._get_data(
456
+ path=self.path, start=chunk_start, end=chunk_end
457
+ )
458
+
459
+ return return_buffer
460
+
461
+ def _to_sized_blocks(self, length, start=0):
462
+ """Helper function to split a range from 0 to total_length into bloksizes"""
463
+ end = start + length
464
+ for data_chunk in range(start, end, self.blocksize):
465
+ data_start = data_chunk
466
+ data_end = min(end, data_chunk + self.blocksize)
467
+ yield data_start, data_end
temp_venv/lib/python3.13/site-packages/fsspec/implementations/dirfs.py ADDED
@@ -0,0 +1,388 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .. import filesystem
2
+ from ..asyn import AsyncFileSystem
3
+
4
+
5
+ class DirFileSystem(AsyncFileSystem):
6
+ """Directory prefix filesystem
7
+
8
+ The DirFileSystem is a filesystem-wrapper. It assumes every path it is dealing with
9
+ is relative to the `path`. After performing the necessary paths operation it
10
+ delegates everything to the wrapped filesystem.
11
+ """
12
+
13
+ protocol = "dir"
14
+
15
+ def __init__(
16
+ self,
17
+ path=None,
18
+ fs=None,
19
+ fo=None,
20
+ target_protocol=None,
21
+ target_options=None,
22
+ **storage_options,
23
+ ):
24
+ """
25
+ Parameters
26
+ ----------
27
+ path: str
28
+ Path to the directory.
29
+ fs: AbstractFileSystem
30
+ An instantiated filesystem to wrap.
31
+ target_protocol, target_options:
32
+ if fs is none, construct it from these
33
+ fo: str
34
+ Alternate for path; do not provide both
35
+ """
36
+ super().__init__(**storage_options)
37
+ if fs is None:
38
+ fs = filesystem(protocol=target_protocol, **(target_options or {}))
39
+ path = path or fo
40
+
41
+ if self.asynchronous and not fs.async_impl:
42
+ raise ValueError("can't use asynchronous with non-async fs")
43
+
44
+ if fs.async_impl and self.asynchronous != fs.asynchronous:
45
+ raise ValueError("both dirfs and fs should be in the same sync/async mode")
46
+
47
+ self.path = fs._strip_protocol(path)
48
+ self.fs = fs
49
+
50
+ def _join(self, path):
51
+ if isinstance(path, str):
52
+ if not self.path:
53
+ return path
54
+ if not path:
55
+ return self.path
56
+ return self.fs.sep.join((self.path, self._strip_protocol(path)))
57
+ if isinstance(path, dict):
58
+ return {self._join(_path): value for _path, value in path.items()}
59
+ return [self._join(_path) for _path in path]
60
+
61
+ def _relpath(self, path):
62
+ if isinstance(path, str):
63
+ if not self.path:
64
+ return path
65
+ # We need to account for S3FileSystem returning paths that do not
66
+ # start with a '/'
67
+ if path == self.path or (
68
+ self.path.startswith(self.fs.sep) and path == self.path[1:]
69
+ ):
70
+ return ""
71
+ prefix = self.path + self.fs.sep
72
+ if self.path.startswith(self.fs.sep) and not path.startswith(self.fs.sep):
73
+ prefix = prefix[1:]
74
+ assert path.startswith(prefix)
75
+ return path[len(prefix) :]
76
+ return [self._relpath(_path) for _path in path]
77
+
78
+ # Wrappers below
79
+
80
+ @property
81
+ def sep(self):
82
+ return self.fs.sep
83
+
84
+ async def set_session(self, *args, **kwargs):
85
+ return await self.fs.set_session(*args, **kwargs)
86
+
87
+ async def _rm_file(self, path, **kwargs):
88
+ return await self.fs._rm_file(self._join(path), **kwargs)
89
+
90
+ def rm_file(self, path, **kwargs):
91
+ return self.fs.rm_file(self._join(path), **kwargs)
92
+
93
+ async def _rm(self, path, *args, **kwargs):
94
+ return await self.fs._rm(self._join(path), *args, **kwargs)
95
+
96
+ def rm(self, path, *args, **kwargs):
97
+ return self.fs.rm(self._join(path), *args, **kwargs)
98
+
99
+ async def _cp_file(self, path1, path2, **kwargs):
100
+ return await self.fs._cp_file(self._join(path1), self._join(path2), **kwargs)
101
+
102
+ def cp_file(self, path1, path2, **kwargs):
103
+ return self.fs.cp_file(self._join(path1), self._join(path2), **kwargs)
104
+
105
+ async def _copy(
106
+ self,
107
+ path1,
108
+ path2,
109
+ *args,
110
+ **kwargs,
111
+ ):
112
+ return await self.fs._copy(
113
+ self._join(path1),
114
+ self._join(path2),
115
+ *args,
116
+ **kwargs,
117
+ )
118
+
119
+ def copy(self, path1, path2, *args, **kwargs):
120
+ return self.fs.copy(
121
+ self._join(path1),
122
+ self._join(path2),
123
+ *args,
124
+ **kwargs,
125
+ )
126
+
127
+ async def _pipe(self, path, *args, **kwargs):
128
+ return await self.fs._pipe(self._join(path), *args, **kwargs)
129
+
130
+ def pipe(self, path, *args, **kwargs):
131
+ return self.fs.pipe(self._join(path), *args, **kwargs)
132
+
133
+ async def _pipe_file(self, path, *args, **kwargs):
134
+ return await self.fs._pipe_file(self._join(path), *args, **kwargs)
135
+
136
+ def pipe_file(self, path, *args, **kwargs):
137
+ return self.fs.pipe_file(self._join(path), *args, **kwargs)
138
+
139
+ async def _cat_file(self, path, *args, **kwargs):
140
+ return await self.fs._cat_file(self._join(path), *args, **kwargs)
141
+
142
+ def cat_file(self, path, *args, **kwargs):
143
+ return self.fs.cat_file(self._join(path), *args, **kwargs)
144
+
145
+ async def _cat(self, path, *args, **kwargs):
146
+ ret = await self.fs._cat(
147
+ self._join(path),
148
+ *args,
149
+ **kwargs,
150
+ )
151
+
152
+ if isinstance(ret, dict):
153
+ return {self._relpath(key): value for key, value in ret.items()}
154
+
155
+ return ret
156
+
157
+ def cat(self, path, *args, **kwargs):
158
+ ret = self.fs.cat(
159
+ self._join(path),
160
+ *args,
161
+ **kwargs,
162
+ )
163
+
164
+ if isinstance(ret, dict):
165
+ return {self._relpath(key): value for key, value in ret.items()}
166
+
167
+ return ret
168
+
169
+ async def _put_file(self, lpath, rpath, **kwargs):
170
+ return await self.fs._put_file(lpath, self._join(rpath), **kwargs)
171
+
172
+ def put_file(self, lpath, rpath, **kwargs):
173
+ return self.fs.put_file(lpath, self._join(rpath), **kwargs)
174
+
175
+ async def _put(
176
+ self,
177
+ lpath,
178
+ rpath,
179
+ *args,
180
+ **kwargs,
181
+ ):
182
+ return await self.fs._put(
183
+ lpath,
184
+ self._join(rpath),
185
+ *args,
186
+ **kwargs,
187
+ )
188
+
189
+ def put(self, lpath, rpath, *args, **kwargs):
190
+ return self.fs.put(
191
+ lpath,
192
+ self._join(rpath),
193
+ *args,
194
+ **kwargs,
195
+ )
196
+
197
+ async def _get_file(self, rpath, lpath, **kwargs):
198
+ return await self.fs._get_file(self._join(rpath), lpath, **kwargs)
199
+
200
+ def get_file(self, rpath, lpath, **kwargs):
201
+ return self.fs.get_file(self._join(rpath), lpath, **kwargs)
202
+
203
+ async def _get(self, rpath, *args, **kwargs):
204
+ return await self.fs._get(self._join(rpath), *args, **kwargs)
205
+
206
+ def get(self, rpath, *args, **kwargs):
207
+ return self.fs.get(self._join(rpath), *args, **kwargs)
208
+
209
+ async def _isfile(self, path):
210
+ return await self.fs._isfile(self._join(path))
211
+
212
+ def isfile(self, path):
213
+ return self.fs.isfile(self._join(path))
214
+
215
+ async def _isdir(self, path):
216
+ return await self.fs._isdir(self._join(path))
217
+
218
+ def isdir(self, path):
219
+ return self.fs.isdir(self._join(path))
220
+
221
+ async def _size(self, path):
222
+ return await self.fs._size(self._join(path))
223
+
224
+ def size(self, path):
225
+ return self.fs.size(self._join(path))
226
+
227
+ async def _exists(self, path):
228
+ return await self.fs._exists(self._join(path))
229
+
230
+ def exists(self, path):
231
+ return self.fs.exists(self._join(path))
232
+
233
+ async def _info(self, path, **kwargs):
234
+ info = await self.fs._info(self._join(path), **kwargs)
235
+ info = info.copy()
236
+ info["name"] = self._relpath(info["name"])
237
+ return info
238
+
239
+ def info(self, path, **kwargs):
240
+ info = self.fs.info(self._join(path), **kwargs)
241
+ info = info.copy()
242
+ info["name"] = self._relpath(info["name"])
243
+ return info
244
+
245
+ async def _ls(self, path, detail=True, **kwargs):
246
+ ret = (await self.fs._ls(self._join(path), detail=detail, **kwargs)).copy()
247
+ if detail:
248
+ out = []
249
+ for entry in ret:
250
+ entry = entry.copy()
251
+ entry["name"] = self._relpath(entry["name"])
252
+ out.append(entry)
253
+ return out
254
+
255
+ return self._relpath(ret)
256
+
257
+ def ls(self, path, detail=True, **kwargs):
258
+ ret = self.fs.ls(self._join(path), detail=detail, **kwargs).copy()
259
+ if detail:
260
+ out = []
261
+ for entry in ret:
262
+ entry = entry.copy()
263
+ entry["name"] = self._relpath(entry["name"])
264
+ out.append(entry)
265
+ return out
266
+
267
+ return self._relpath(ret)
268
+
269
+ async def _walk(self, path, *args, **kwargs):
270
+ async for root, dirs, files in self.fs._walk(self._join(path), *args, **kwargs):
271
+ yield self._relpath(root), dirs, files
272
+
273
+ def walk(self, path, *args, **kwargs):
274
+ for root, dirs, files in self.fs.walk(self._join(path), *args, **kwargs):
275
+ yield self._relpath(root), dirs, files
276
+
277
+ async def _glob(self, path, **kwargs):
278
+ detail = kwargs.get("detail", False)
279
+ ret = await self.fs._glob(self._join(path), **kwargs)
280
+ if detail:
281
+ return {self._relpath(path): info for path, info in ret.items()}
282
+ return self._relpath(ret)
283
+
284
+ def glob(self, path, **kwargs):
285
+ detail = kwargs.get("detail", False)
286
+ ret = self.fs.glob(self._join(path), **kwargs)
287
+ if detail:
288
+ return {self._relpath(path): info for path, info in ret.items()}
289
+ return self._relpath(ret)
290
+
291
+ async def _du(self, path, *args, **kwargs):
292
+ total = kwargs.get("total", True)
293
+ ret = await self.fs._du(self._join(path), *args, **kwargs)
294
+ if total:
295
+ return ret
296
+
297
+ return {self._relpath(path): size for path, size in ret.items()}
298
+
299
+ def du(self, path, *args, **kwargs):
300
+ total = kwargs.get("total", True)
301
+ ret = self.fs.du(self._join(path), *args, **kwargs)
302
+ if total:
303
+ return ret
304
+
305
+ return {self._relpath(path): size for path, size in ret.items()}
306
+
307
+ async def _find(self, path, *args, **kwargs):
308
+ detail = kwargs.get("detail", False)
309
+ ret = await self.fs._find(self._join(path), *args, **kwargs)
310
+ if detail:
311
+ return {self._relpath(path): info for path, info in ret.items()}
312
+ return self._relpath(ret)
313
+
314
+ def find(self, path, *args, **kwargs):
315
+ detail = kwargs.get("detail", False)
316
+ ret = self.fs.find(self._join(path), *args, **kwargs)
317
+ if detail:
318
+ return {self._relpath(path): info for path, info in ret.items()}
319
+ return self._relpath(ret)
320
+
321
+ async def _expand_path(self, path, *args, **kwargs):
322
+ return self._relpath(
323
+ await self.fs._expand_path(self._join(path), *args, **kwargs)
324
+ )
325
+
326
+ def expand_path(self, path, *args, **kwargs):
327
+ return self._relpath(self.fs.expand_path(self._join(path), *args, **kwargs))
328
+
329
+ async def _mkdir(self, path, *args, **kwargs):
330
+ return await self.fs._mkdir(self._join(path), *args, **kwargs)
331
+
332
+ def mkdir(self, path, *args, **kwargs):
333
+ return self.fs.mkdir(self._join(path), *args, **kwargs)
334
+
335
+ async def _makedirs(self, path, *args, **kwargs):
336
+ return await self.fs._makedirs(self._join(path), *args, **kwargs)
337
+
338
+ def makedirs(self, path, *args, **kwargs):
339
+ return self.fs.makedirs(self._join(path), *args, **kwargs)
340
+
341
+ def rmdir(self, path):
342
+ return self.fs.rmdir(self._join(path))
343
+
344
+ def mv(self, path1, path2, **kwargs):
345
+ return self.fs.mv(
346
+ self._join(path1),
347
+ self._join(path2),
348
+ **kwargs,
349
+ )
350
+
351
+ def touch(self, path, **kwargs):
352
+ return self.fs.touch(self._join(path), **kwargs)
353
+
354
+ def created(self, path):
355
+ return self.fs.created(self._join(path))
356
+
357
+ def modified(self, path):
358
+ return self.fs.modified(self._join(path))
359
+
360
+ def sign(self, path, *args, **kwargs):
361
+ return self.fs.sign(self._join(path), *args, **kwargs)
362
+
363
+ def __repr__(self):
364
+ return f"{self.__class__.__qualname__}(path='{self.path}', fs={self.fs})"
365
+
366
+ def open(
367
+ self,
368
+ path,
369
+ *args,
370
+ **kwargs,
371
+ ):
372
+ return self.fs.open(
373
+ self._join(path),
374
+ *args,
375
+ **kwargs,
376
+ )
377
+
378
+ async def open_async(
379
+ self,
380
+ path,
381
+ *args,
382
+ **kwargs,
383
+ ):
384
+ return await self.fs.open_async(
385
+ self._join(path),
386
+ *args,
387
+ **kwargs,
388
+ )
temp_venv/lib/python3.13/site-packages/fsspec/implementations/ftp.py ADDED
@@ -0,0 +1,395 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import uuid
4
+ import warnings
5
+ from ftplib import FTP, FTP_TLS, Error, error_perm
6
+ from typing import Any
7
+
8
+ from ..spec import AbstractBufferedFile, AbstractFileSystem
9
+ from ..utils import infer_storage_options, isfilelike
10
+
11
+
12
+ class FTPFileSystem(AbstractFileSystem):
13
+ """A filesystem over classic FTP"""
14
+
15
+ root_marker = "/"
16
+ cachable = False
17
+ protocol = "ftp"
18
+
19
+ def __init__(
20
+ self,
21
+ host,
22
+ port=21,
23
+ username=None,
24
+ password=None,
25
+ acct=None,
26
+ block_size=None,
27
+ tempdir=None,
28
+ timeout=30,
29
+ encoding="utf-8",
30
+ tls=False,
31
+ **kwargs,
32
+ ):
33
+ """
34
+ You can use _get_kwargs_from_urls to get some kwargs from
35
+ a reasonable FTP url.
36
+
37
+ Authentication will be anonymous if username/password are not
38
+ given.
39
+
40
+ Parameters
41
+ ----------
42
+ host: str
43
+ The remote server name/ip to connect to
44
+ port: int
45
+ Port to connect with
46
+ username: str or None
47
+ If authenticating, the user's identifier
48
+ password: str of None
49
+ User's password on the server, if using
50
+ acct: str or None
51
+ Some servers also need an "account" string for auth
52
+ block_size: int or None
53
+ If given, the read-ahead or write buffer size.
54
+ tempdir: str
55
+ Directory on remote to put temporary files when in a transaction
56
+ timeout: int
57
+ Timeout of the ftp connection in seconds
58
+ encoding: str
59
+ Encoding to use for directories and filenames in FTP connection
60
+ tls: bool
61
+ Use FTP-TLS, by default False
62
+ """
63
+ super().__init__(**kwargs)
64
+ self.host = host
65
+ self.port = port
66
+ self.tempdir = tempdir or "/tmp"
67
+ self.cred = username or "", password or "", acct or ""
68
+ self.timeout = timeout
69
+ self.encoding = encoding
70
+ if block_size is not None:
71
+ self.blocksize = block_size
72
+ else:
73
+ self.blocksize = 2**16
74
+ self.tls = tls
75
+ self._connect()
76
+ if self.tls:
77
+ self.ftp.prot_p()
78
+
79
+ def _connect(self):
80
+ if self.tls:
81
+ ftp_cls = FTP_TLS
82
+ else:
83
+ ftp_cls = FTP
84
+ if sys.version_info >= (3, 9):
85
+ self.ftp = ftp_cls(timeout=self.timeout, encoding=self.encoding)
86
+ elif self.encoding:
87
+ warnings.warn("`encoding` not supported for python<3.9, ignoring")
88
+ self.ftp = ftp_cls(timeout=self.timeout)
89
+ else:
90
+ self.ftp = ftp_cls(timeout=self.timeout)
91
+ self.ftp.connect(self.host, self.port)
92
+ self.ftp.login(*self.cred)
93
+
94
+ @classmethod
95
+ def _strip_protocol(cls, path):
96
+ return "/" + infer_storage_options(path)["path"].lstrip("/").rstrip("/")
97
+
98
+ @staticmethod
99
+ def _get_kwargs_from_urls(urlpath):
100
+ out = infer_storage_options(urlpath)
101
+ out.pop("path", None)
102
+ out.pop("protocol", None)
103
+ return out
104
+
105
+ def ls(self, path, detail=True, **kwargs):
106
+ path = self._strip_protocol(path)
107
+ out = []
108
+ if path not in self.dircache:
109
+ try:
110
+ try:
111
+ out = [
112
+ (fn, details)
113
+ for (fn, details) in self.ftp.mlsd(path)
114
+ if fn not in [".", ".."]
115
+ and details["type"] not in ["pdir", "cdir"]
116
+ ]
117
+ except error_perm:
118
+ out = _mlsd2(self.ftp, path) # Not platform independent
119
+ for fn, details in out:
120
+ details["name"] = "/".join(
121
+ ["" if path == "/" else path, fn.lstrip("/")]
122
+ )
123
+ if details["type"] == "file":
124
+ details["size"] = int(details["size"])
125
+ else:
126
+ details["size"] = 0
127
+ if details["type"] == "dir":
128
+ details["type"] = "directory"
129
+ self.dircache[path] = out
130
+ except Error:
131
+ try:
132
+ info = self.info(path)
133
+ if info["type"] == "file":
134
+ out = [(path, info)]
135
+ except (Error, IndexError) as exc:
136
+ raise FileNotFoundError(path) from exc
137
+ files = self.dircache.get(path, out)
138
+ if not detail:
139
+ return sorted([fn for fn, details in files])
140
+ return [details for fn, details in files]
141
+
142
+ def info(self, path, **kwargs):
143
+ # implement with direct method
144
+ path = self._strip_protocol(path)
145
+ if path == "/":
146
+ # special case, since this dir has no real entry
147
+ return {"name": "/", "size": 0, "type": "directory"}
148
+ files = self.ls(self._parent(path).lstrip("/"), True)
149
+ try:
150
+ out = next(f for f in files if f["name"] == path)
151
+ except StopIteration as exc:
152
+ raise FileNotFoundError(path) from exc
153
+ return out
154
+
155
+ def get_file(self, rpath, lpath, **kwargs):
156
+ if self.isdir(rpath):
157
+ if not os.path.exists(lpath):
158
+ os.mkdir(lpath)
159
+ return
160
+ if isfilelike(lpath):
161
+ outfile = lpath
162
+ else:
163
+ outfile = open(lpath, "wb")
164
+
165
+ def cb(x):
166
+ outfile.write(x)
167
+
168
+ self.ftp.retrbinary(
169
+ f"RETR {rpath}",
170
+ blocksize=self.blocksize,
171
+ callback=cb,
172
+ )
173
+ if not isfilelike(lpath):
174
+ outfile.close()
175
+
176
+ def cat_file(self, path, start=None, end=None, **kwargs):
177
+ if end is not None:
178
+ return super().cat_file(path, start, end, **kwargs)
179
+ out = []
180
+
181
+ def cb(x):
182
+ out.append(x)
183
+
184
+ try:
185
+ self.ftp.retrbinary(
186
+ f"RETR {path}",
187
+ blocksize=self.blocksize,
188
+ rest=start,
189
+ callback=cb,
190
+ )
191
+ except (Error, error_perm) as orig_exc:
192
+ raise FileNotFoundError(path) from orig_exc
193
+ return b"".join(out)
194
+
195
+ def _open(
196
+ self,
197
+ path,
198
+ mode="rb",
199
+ block_size=None,
200
+ cache_options=None,
201
+ autocommit=True,
202
+ **kwargs,
203
+ ):
204
+ path = self._strip_protocol(path)
205
+ block_size = block_size or self.blocksize
206
+ return FTPFile(
207
+ self,
208
+ path,
209
+ mode=mode,
210
+ block_size=block_size,
211
+ tempdir=self.tempdir,
212
+ autocommit=autocommit,
213
+ cache_options=cache_options,
214
+ )
215
+
216
+ def _rm(self, path):
217
+ path = self._strip_protocol(path)
218
+ self.ftp.delete(path)
219
+ self.invalidate_cache(self._parent(path))
220
+
221
+ def rm(self, path, recursive=False, maxdepth=None):
222
+ paths = self.expand_path(path, recursive=recursive, maxdepth=maxdepth)
223
+ for p in reversed(paths):
224
+ if self.isfile(p):
225
+ self.rm_file(p)
226
+ else:
227
+ self.rmdir(p)
228
+
229
+ def mkdir(self, path: str, create_parents: bool = True, **kwargs: Any) -> None:
230
+ path = self._strip_protocol(path)
231
+ parent = self._parent(path)
232
+ if parent != self.root_marker and not self.exists(parent) and create_parents:
233
+ self.mkdir(parent, create_parents=create_parents)
234
+
235
+ self.ftp.mkd(path)
236
+ self.invalidate_cache(self._parent(path))
237
+
238
+ def makedirs(self, path: str, exist_ok: bool = False) -> None:
239
+ path = self._strip_protocol(path)
240
+ if self.exists(path):
241
+ # NB: "/" does not "exist" as it has no directory entry
242
+ if not exist_ok:
243
+ raise FileExistsError(f"{path} exists without `exist_ok`")
244
+ # exists_ok=True -> no-op
245
+ else:
246
+ self.mkdir(path, create_parents=True)
247
+
248
+ def rmdir(self, path):
249
+ path = self._strip_protocol(path)
250
+ self.ftp.rmd(path)
251
+ self.invalidate_cache(self._parent(path))
252
+
253
+ def mv(self, path1, path2, **kwargs):
254
+ path1 = self._strip_protocol(path1)
255
+ path2 = self._strip_protocol(path2)
256
+ self.ftp.rename(path1, path2)
257
+ self.invalidate_cache(self._parent(path1))
258
+ self.invalidate_cache(self._parent(path2))
259
+
260
+ def __del__(self):
261
+ self.ftp.close()
262
+
263
+ def invalidate_cache(self, path=None):
264
+ if path is None:
265
+ self.dircache.clear()
266
+ else:
267
+ self.dircache.pop(path, None)
268
+ super().invalidate_cache(path)
269
+
270
+
271
+ class TransferDone(Exception):
272
+ """Internal exception to break out of transfer"""
273
+
274
+ pass
275
+
276
+
277
+ class FTPFile(AbstractBufferedFile):
278
+ """Interact with a remote FTP file with read/write buffering"""
279
+
280
+ def __init__(
281
+ self,
282
+ fs,
283
+ path,
284
+ mode="rb",
285
+ block_size="default",
286
+ autocommit=True,
287
+ cache_type="readahead",
288
+ cache_options=None,
289
+ **kwargs,
290
+ ):
291
+ super().__init__(
292
+ fs,
293
+ path,
294
+ mode=mode,
295
+ block_size=block_size,
296
+ autocommit=autocommit,
297
+ cache_type=cache_type,
298
+ cache_options=cache_options,
299
+ **kwargs,
300
+ )
301
+ if not autocommit:
302
+ self.target = self.path
303
+ self.path = "/".join([kwargs["tempdir"], str(uuid.uuid4())])
304
+
305
+ def commit(self):
306
+ self.fs.mv(self.path, self.target)
307
+
308
+ def discard(self):
309
+ self.fs.rm(self.path)
310
+
311
+ def _fetch_range(self, start, end):
312
+ """Get bytes between given byte limits
313
+
314
+ Implemented by raising an exception in the fetch callback when the
315
+ number of bytes received reaches the requested amount.
316
+
317
+ Will fail if the server does not respect the REST command on
318
+ retrieve requests.
319
+ """
320
+ out = []
321
+ total = [0]
322
+
323
+ def callback(x):
324
+ total[0] += len(x)
325
+ if total[0] > end - start:
326
+ out.append(x[: (end - start) - total[0]])
327
+ if end < self.size:
328
+ raise TransferDone
329
+ else:
330
+ out.append(x)
331
+
332
+ if total[0] == end - start and end < self.size:
333
+ raise TransferDone
334
+
335
+ try:
336
+ self.fs.ftp.retrbinary(
337
+ f"RETR {self.path}",
338
+ blocksize=self.blocksize,
339
+ rest=start,
340
+ callback=callback,
341
+ )
342
+ except TransferDone:
343
+ try:
344
+ # stop transfer, we got enough bytes for this block
345
+ self.fs.ftp.abort()
346
+ self.fs.ftp.getmultiline()
347
+ except Error:
348
+ self.fs._connect()
349
+
350
+ return b"".join(out)
351
+
352
+ def _upload_chunk(self, final=False):
353
+ self.buffer.seek(0)
354
+ self.fs.ftp.storbinary(
355
+ f"STOR {self.path}", self.buffer, blocksize=self.blocksize, rest=self.offset
356
+ )
357
+ return True
358
+
359
+
360
+ def _mlsd2(ftp, path="."):
361
+ """
362
+ Fall back to using `dir` instead of `mlsd` if not supported.
363
+
364
+ This parses a Linux style `ls -l` response to `dir`, but the response may
365
+ be platform dependent.
366
+
367
+ Parameters
368
+ ----------
369
+ ftp: ftplib.FTP
370
+ path: str
371
+ Expects to be given path, but defaults to ".".
372
+ """
373
+ lines = []
374
+ minfo = []
375
+ ftp.dir(path, lines.append)
376
+ for line in lines:
377
+ split_line = line.split()
378
+ if len(split_line) < 9:
379
+ continue
380
+ this = (
381
+ split_line[-1],
382
+ {
383
+ "modify": " ".join(split_line[5:8]),
384
+ "unix.owner": split_line[2],
385
+ "unix.group": split_line[3],
386
+ "unix.mode": split_line[0],
387
+ "size": split_line[4],
388
+ },
389
+ )
390
+ if this[1]["unix.mode"][0] == "d":
391
+ this[1]["type"] = "dir"
392
+ else:
393
+ this[1]["type"] = "file"
394
+ minfo.append(this)
395
+ return minfo
temp_venv/lib/python3.13/site-packages/fsspec/implementations/git.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import pygit2
4
+
5
+ from fsspec.spec import AbstractFileSystem
6
+
7
+ from .memory import MemoryFile
8
+
9
+
10
+ class GitFileSystem(AbstractFileSystem):
11
+ """Browse the files of a local git repo at any hash/tag/branch
12
+
13
+ (experimental backend)
14
+ """
15
+
16
+ root_marker = ""
17
+ cachable = True
18
+
19
+ def __init__(self, path=None, fo=None, ref=None, **kwargs):
20
+ """
21
+
22
+ Parameters
23
+ ----------
24
+ path: str (optional)
25
+ Local location of the repo (uses current directory if not given).
26
+ May be deprecated in favour of ``fo``. When used with a higher
27
+ level function such as fsspec.open(), may be of the form
28
+ "git://[path-to-repo[:]][ref@]path/to/file" (but the actual
29
+ file path should not contain "@" or ":").
30
+ fo: str (optional)
31
+ Same as ``path``, but passed as part of a chained URL. This one
32
+ takes precedence if both are given.
33
+ ref: str (optional)
34
+ Reference to work with, could be a hash, tag or branch name. Defaults
35
+ to current working tree. Note that ``ls`` and ``open`` also take hash,
36
+ so this becomes the default for those operations
37
+ kwargs
38
+ """
39
+ super().__init__(**kwargs)
40
+ self.repo = pygit2.Repository(fo or path or os.getcwd())
41
+ self.ref = ref or "master"
42
+
43
+ @classmethod
44
+ def _strip_protocol(cls, path):
45
+ path = super()._strip_protocol(path).lstrip("/")
46
+ if ":" in path:
47
+ path = path.split(":", 1)[1]
48
+ if "@" in path:
49
+ path = path.split("@", 1)[1]
50
+ return path.lstrip("/")
51
+
52
+ def _path_to_object(self, path, ref):
53
+ comm, ref = self.repo.resolve_refish(ref or self.ref)
54
+ parts = path.split("/")
55
+ tree = comm.tree
56
+ for part in parts:
57
+ if part and isinstance(tree, pygit2.Tree):
58
+ if part not in tree:
59
+ raise FileNotFoundError(path)
60
+ tree = tree[part]
61
+ return tree
62
+
63
+ @staticmethod
64
+ def _get_kwargs_from_urls(path):
65
+ if path.startswith("git://"):
66
+ path = path[6:]
67
+ out = {}
68
+ if ":" in path:
69
+ out["path"], path = path.split(":", 1)
70
+ if "@" in path:
71
+ out["ref"], path = path.split("@", 1)
72
+ return out
73
+
74
+ @staticmethod
75
+ def _object_to_info(obj, path=None):
76
+ # obj.name and obj.filemode are None for the root tree!
77
+ is_dir = isinstance(obj, pygit2.Tree)
78
+ return {
79
+ "type": "directory" if is_dir else "file",
80
+ "name": (
81
+ "/".join([path, obj.name or ""]).lstrip("/") if path else obj.name
82
+ ),
83
+ "hex": str(obj.id),
84
+ "mode": "100644" if obj.filemode is None else f"{obj.filemode:o}",
85
+ "size": 0 if is_dir else obj.size,
86
+ }
87
+
88
+ def ls(self, path, detail=True, ref=None, **kwargs):
89
+ tree = self._path_to_object(self._strip_protocol(path), ref)
90
+ return [
91
+ GitFileSystem._object_to_info(obj, path)
92
+ if detail
93
+ else GitFileSystem._object_to_info(obj, path)["name"]
94
+ for obj in (tree if isinstance(tree, pygit2.Tree) else [tree])
95
+ ]
96
+
97
+ def info(self, path, ref=None, **kwargs):
98
+ tree = self._path_to_object(self._strip_protocol(path), ref)
99
+ return GitFileSystem._object_to_info(tree, path)
100
+
101
+ def ukey(self, path, ref=None):
102
+ return self.info(path, ref=ref)["hex"]
103
+
104
+ def _open(
105
+ self,
106
+ path,
107
+ mode="rb",
108
+ block_size=None,
109
+ autocommit=True,
110
+ cache_options=None,
111
+ ref=None,
112
+ **kwargs,
113
+ ):
114
+ obj = self._path_to_object(path, ref or self.ref)
115
+ return MemoryFile(data=obj.data)
temp_venv/lib/python3.13/site-packages/fsspec/implementations/github.py ADDED
@@ -0,0 +1,267 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+
3
+ import requests
4
+
5
+ from ..spec import AbstractFileSystem
6
+ from ..utils import infer_storage_options
7
+ from .memory import MemoryFile
8
+
9
+ # TODO: add GIST backend, would be very similar
10
+
11
+
12
+ class GithubFileSystem(AbstractFileSystem):
13
+ """Interface to files in github
14
+
15
+ An instance of this class provides the files residing within a remote github
16
+ repository. You may specify a point in the repos history, by SHA, branch
17
+ or tag (default is current master).
18
+
19
+ For files less than 1 MB in size, file content is returned directly in a
20
+ MemoryFile. For larger files, or for files tracked by git-lfs, file content
21
+ is returned as an HTTPFile wrapping the ``download_url`` provided by the
22
+ GitHub API.
23
+
24
+ When using fsspec.open, allows URIs of the form:
25
+
26
+ - "github://path/file", in which case you must specify org, repo and
27
+ may specify sha in the extra args
28
+ - 'github://org:repo@/precip/catalog.yml', where the org and repo are
29
+ part of the URI
30
+ - 'github://org:repo@sha/precip/catalog.yml', where the sha is also included
31
+
32
+ ``sha`` can be the full or abbreviated hex of the commit you want to fetch
33
+ from, or a branch or tag name (so long as it doesn't contain special characters
34
+ like "/", "?", which would have to be HTTP-encoded).
35
+
36
+ For authorised access, you must provide username and token, which can be made
37
+ at https://github.com/settings/tokens
38
+ """
39
+
40
+ url = "https://api.github.com/repos/{org}/{repo}/git/trees/{sha}"
41
+ content_url = "https://api.github.com/repos/{org}/{repo}/contents/{path}?ref={sha}"
42
+ protocol = "github"
43
+ timeout = (60, 60) # connect, read timeouts
44
+
45
+ def __init__(
46
+ self, org, repo, sha=None, username=None, token=None, timeout=None, **kwargs
47
+ ):
48
+ super().__init__(**kwargs)
49
+ self.org = org
50
+ self.repo = repo
51
+ if (username is None) ^ (token is None):
52
+ raise ValueError("Auth required both username and token")
53
+ self.username = username
54
+ self.token = token
55
+ if timeout is not None:
56
+ self.timeout = timeout
57
+ if sha is None:
58
+ # look up default branch (not necessarily "master")
59
+ u = "https://api.github.com/repos/{org}/{repo}"
60
+ r = requests.get(
61
+ u.format(org=org, repo=repo), timeout=self.timeout, **self.kw
62
+ )
63
+ r.raise_for_status()
64
+ sha = r.json()["default_branch"]
65
+
66
+ self.root = sha
67
+ self.ls("")
68
+ try:
69
+ from .http import HTTPFileSystem
70
+
71
+ self.http_fs = HTTPFileSystem(**kwargs)
72
+ except ImportError:
73
+ self.http_fs = None
74
+
75
+ @property
76
+ def kw(self):
77
+ if self.username:
78
+ return {"auth": (self.username, self.token)}
79
+ return {}
80
+
81
+ @classmethod
82
+ def repos(cls, org_or_user, is_org=True):
83
+ """List repo names for given org or user
84
+
85
+ This may become the top level of the FS
86
+
87
+ Parameters
88
+ ----------
89
+ org_or_user: str
90
+ Name of the github org or user to query
91
+ is_org: bool (default True)
92
+ Whether the name is an organisation (True) or user (False)
93
+
94
+ Returns
95
+ -------
96
+ List of string
97
+ """
98
+ r = requests.get(
99
+ f"https://api.github.com/{['users', 'orgs'][is_org]}/{org_or_user}/repos",
100
+ timeout=cls.timeout,
101
+ )
102
+ r.raise_for_status()
103
+ return [repo["name"] for repo in r.json()]
104
+
105
+ @property
106
+ def tags(self):
107
+ """Names of tags in the repo"""
108
+ r = requests.get(
109
+ f"https://api.github.com/repos/{self.org}/{self.repo}/tags",
110
+ timeout=self.timeout,
111
+ **self.kw,
112
+ )
113
+ r.raise_for_status()
114
+ return [t["name"] for t in r.json()]
115
+
116
+ @property
117
+ def branches(self):
118
+ """Names of branches in the repo"""
119
+ r = requests.get(
120
+ f"https://api.github.com/repos/{self.org}/{self.repo}/branches",
121
+ timeout=self.timeout,
122
+ **self.kw,
123
+ )
124
+ r.raise_for_status()
125
+ return [t["name"] for t in r.json()]
126
+
127
+ @property
128
+ def refs(self):
129
+ """Named references, tags and branches"""
130
+ return {"tags": self.tags, "branches": self.branches}
131
+
132
+ def ls(self, path, detail=False, sha=None, _sha=None, **kwargs):
133
+ """List files at given path
134
+
135
+ Parameters
136
+ ----------
137
+ path: str
138
+ Location to list, relative to repo root
139
+ detail: bool
140
+ If True, returns list of dicts, one per file; if False, returns
141
+ list of full filenames only
142
+ sha: str (optional)
143
+ List at the given point in the repo history, branch or tag name or commit
144
+ SHA
145
+ _sha: str (optional)
146
+ List this specific tree object (used internally to descend into trees)
147
+ """
148
+ path = self._strip_protocol(path)
149
+ if path == "":
150
+ _sha = sha or self.root
151
+ if _sha is None:
152
+ parts = path.rstrip("/").split("/")
153
+ so_far = ""
154
+ _sha = sha or self.root
155
+ for part in parts:
156
+ out = self.ls(so_far, True, sha=sha, _sha=_sha)
157
+ so_far += "/" + part if so_far else part
158
+ out = [o for o in out if o["name"] == so_far]
159
+ if not out:
160
+ raise FileNotFoundError(path)
161
+ out = out[0]
162
+ if out["type"] == "file":
163
+ if detail:
164
+ return [out]
165
+ else:
166
+ return path
167
+ _sha = out["sha"]
168
+ if path not in self.dircache or sha not in [self.root, None]:
169
+ r = requests.get(
170
+ self.url.format(org=self.org, repo=self.repo, sha=_sha),
171
+ timeout=self.timeout,
172
+ **self.kw,
173
+ )
174
+ if r.status_code == 404:
175
+ raise FileNotFoundError(path)
176
+ r.raise_for_status()
177
+ types = {"blob": "file", "tree": "directory"}
178
+ out = [
179
+ {
180
+ "name": path + "/" + f["path"] if path else f["path"],
181
+ "mode": f["mode"],
182
+ "type": types[f["type"]],
183
+ "size": f.get("size", 0),
184
+ "sha": f["sha"],
185
+ }
186
+ for f in r.json()["tree"]
187
+ if f["type"] in types
188
+ ]
189
+ if sha in [self.root, None]:
190
+ self.dircache[path] = out
191
+ else:
192
+ out = self.dircache[path]
193
+ if detail:
194
+ return out
195
+ else:
196
+ return sorted([f["name"] for f in out])
197
+
198
+ def invalidate_cache(self, path=None):
199
+ self.dircache.clear()
200
+
201
+ @classmethod
202
+ def _strip_protocol(cls, path):
203
+ opts = infer_storage_options(path)
204
+ if "username" not in opts:
205
+ return super()._strip_protocol(path)
206
+ return opts["path"].lstrip("/")
207
+
208
+ @staticmethod
209
+ def _get_kwargs_from_urls(path):
210
+ opts = infer_storage_options(path)
211
+ if "username" not in opts:
212
+ return {}
213
+ out = {"org": opts["username"], "repo": opts["password"]}
214
+ if opts["host"]:
215
+ out["sha"] = opts["host"]
216
+ return out
217
+
218
+ def _open(
219
+ self,
220
+ path,
221
+ mode="rb",
222
+ block_size=None,
223
+ cache_options=None,
224
+ sha=None,
225
+ **kwargs,
226
+ ):
227
+ if mode != "rb":
228
+ raise NotImplementedError
229
+
230
+ # construct a url to hit the GitHub API's repo contents API
231
+ url = self.content_url.format(
232
+ org=self.org, repo=self.repo, path=path, sha=sha or self.root
233
+ )
234
+
235
+ # make a request to this API, and parse the response as JSON
236
+ r = requests.get(url, timeout=self.timeout, **self.kw)
237
+ if r.status_code == 404:
238
+ raise FileNotFoundError(path)
239
+ r.raise_for_status()
240
+ content_json = r.json()
241
+
242
+ # if the response's content key is not empty, try to parse it as base64
243
+ if content_json["content"]:
244
+ content = base64.b64decode(content_json["content"])
245
+
246
+ # as long as the content does not start with the string
247
+ # "version https://git-lfs.github.com/"
248
+ # then it is probably not a git-lfs pointer and we can just return
249
+ # the content directly
250
+ if not content.startswith(b"version https://git-lfs.github.com/"):
251
+ return MemoryFile(None, None, content)
252
+
253
+ # we land here if the content was not present in the first response
254
+ # (regular file over 1MB or git-lfs tracked file)
255
+ # in this case, we get let the HTTPFileSystem handle the download
256
+ if self.http_fs is None:
257
+ raise ImportError(
258
+ "Please install fsspec[http] to access github files >1 MB "
259
+ "or git-lfs tracked files."
260
+ )
261
+ return self.http_fs.open(
262
+ content_json["download_url"],
263
+ mode=mode,
264
+ block_size=block_size,
265
+ cache_options=cache_options,
266
+ **kwargs,
267
+ )
temp_venv/lib/python3.13/site-packages/fsspec/implementations/http.py ADDED
@@ -0,0 +1,880 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import io
3
+ import logging
4
+ import re
5
+ import weakref
6
+ from copy import copy
7
+ from urllib.parse import urlparse
8
+
9
+ import aiohttp
10
+ import yarl
11
+
12
+ from fsspec.asyn import AbstractAsyncStreamedFile, AsyncFileSystem, sync, sync_wrapper
13
+ from fsspec.callbacks import DEFAULT_CALLBACK
14
+ from fsspec.exceptions import FSTimeoutError
15
+ from fsspec.spec import AbstractBufferedFile
16
+ from fsspec.utils import (
17
+ DEFAULT_BLOCK_SIZE,
18
+ glob_translate,
19
+ isfilelike,
20
+ nullcontext,
21
+ tokenize,
22
+ )
23
+
24
+ from ..caching import AllBytes
25
+
26
+ # https://stackoverflow.com/a/15926317/3821154
27
+ ex = re.compile(r"""<(a|A)\s+(?:[^>]*?\s+)?(href|HREF)=["'](?P<url>[^"']+)""")
28
+ ex2 = re.compile(r"""(?P<url>http[s]?://[-a-zA-Z0-9@:%_+.~#?&/=]+)""")
29
+ logger = logging.getLogger("fsspec.http")
30
+
31
+
32
+ async def get_client(**kwargs):
33
+ return aiohttp.ClientSession(**kwargs)
34
+
35
+
36
+ class HTTPFileSystem(AsyncFileSystem):
37
+ """
38
+ Simple File-System for fetching data via HTTP(S)
39
+
40
+ ``ls()`` is implemented by loading the parent page and doing a regex
41
+ match on the result. If simple_link=True, anything of the form
42
+ "http(s)://server.com/stuff?thing=other"; otherwise only links within
43
+ HTML href tags will be used.
44
+ """
45
+
46
+ sep = "/"
47
+
48
+ def __init__(
49
+ self,
50
+ simple_links=True,
51
+ block_size=None,
52
+ same_scheme=True,
53
+ size_policy=None,
54
+ cache_type="bytes",
55
+ cache_options=None,
56
+ asynchronous=False,
57
+ loop=None,
58
+ client_kwargs=None,
59
+ get_client=get_client,
60
+ encoded=False,
61
+ **storage_options,
62
+ ):
63
+ """
64
+ NB: if this is called async, you must await set_client
65
+
66
+ Parameters
67
+ ----------
68
+ block_size: int
69
+ Blocks to read bytes; if 0, will default to raw requests file-like
70
+ objects instead of HTTPFile instances
71
+ simple_links: bool
72
+ If True, will consider both HTML <a> tags and anything that looks
73
+ like a URL; if False, will consider only the former.
74
+ same_scheme: True
75
+ When doing ls/glob, if this is True, only consider paths that have
76
+ http/https matching the input URLs.
77
+ size_policy: this argument is deprecated
78
+ client_kwargs: dict
79
+ Passed to aiohttp.ClientSession, see
80
+ https://docs.aiohttp.org/en/stable/client_reference.html
81
+ For example, ``{'auth': aiohttp.BasicAuth('user', 'pass')}``
82
+ get_client: Callable[..., aiohttp.ClientSession]
83
+ A callable which takes keyword arguments and constructs
84
+ an aiohttp.ClientSession. It's state will be managed by
85
+ the HTTPFileSystem class.
86
+ storage_options: key-value
87
+ Any other parameters passed on to requests
88
+ cache_type, cache_options: defaults used in open
89
+ """
90
+ super().__init__(self, asynchronous=asynchronous, loop=loop, **storage_options)
91
+ self.block_size = block_size if block_size is not None else DEFAULT_BLOCK_SIZE
92
+ self.simple_links = simple_links
93
+ self.same_schema = same_scheme
94
+ self.cache_type = cache_type
95
+ self.cache_options = cache_options
96
+ self.client_kwargs = client_kwargs or {}
97
+ self.get_client = get_client
98
+ self.encoded = encoded
99
+ self.kwargs = storage_options
100
+ self._session = None
101
+
102
+ # Clean caching-related parameters from `storage_options`
103
+ # before propagating them as `request_options` through `self.kwargs`.
104
+ # TODO: Maybe rename `self.kwargs` to `self.request_options` to make
105
+ # it clearer.
106
+ request_options = copy(storage_options)
107
+ self.use_listings_cache = request_options.pop("use_listings_cache", False)
108
+ request_options.pop("listings_expiry_time", None)
109
+ request_options.pop("max_paths", None)
110
+ request_options.pop("skip_instance_cache", None)
111
+ self.kwargs = request_options
112
+
113
+ @property
114
+ def fsid(self):
115
+ return "http"
116
+
117
+ def encode_url(self, url):
118
+ return yarl.URL(url, encoded=self.encoded)
119
+
120
+ @staticmethod
121
+ def close_session(loop, session):
122
+ if loop is not None and loop.is_running():
123
+ try:
124
+ sync(loop, session.close, timeout=0.1)
125
+ return
126
+ except (TimeoutError, FSTimeoutError, NotImplementedError):
127
+ pass
128
+ connector = getattr(session, "_connector", None)
129
+ if connector is not None:
130
+ # close after loop is dead
131
+ connector._close()
132
+
133
+ async def set_session(self):
134
+ if self._session is None:
135
+ self._session = await self.get_client(loop=self.loop, **self.client_kwargs)
136
+ if not self.asynchronous:
137
+ weakref.finalize(self, self.close_session, self.loop, self._session)
138
+ return self._session
139
+
140
+ @classmethod
141
+ def _strip_protocol(cls, path):
142
+ """For HTTP, we always want to keep the full URL"""
143
+ return path
144
+
145
+ @classmethod
146
+ def _parent(cls, path):
147
+ # override, since _strip_protocol is different for URLs
148
+ par = super()._parent(path)
149
+ if len(par) > 7: # "http://..."
150
+ return par
151
+ return ""
152
+
153
+ async def _ls_real(self, url, detail=True, **kwargs):
154
+ # ignoring URL-encoded arguments
155
+ kw = self.kwargs.copy()
156
+ kw.update(kwargs)
157
+ logger.debug(url)
158
+ session = await self.set_session()
159
+ async with session.get(self.encode_url(url), **self.kwargs) as r:
160
+ self._raise_not_found_for_status(r, url)
161
+ try:
162
+ text = await r.text()
163
+ if self.simple_links:
164
+ links = ex2.findall(text) + [u[2] for u in ex.findall(text)]
165
+ else:
166
+ links = [u[2] for u in ex.findall(text)]
167
+ except UnicodeDecodeError:
168
+ links = [] # binary, not HTML
169
+ out = set()
170
+ parts = urlparse(url)
171
+ for l in links:
172
+ if isinstance(l, tuple):
173
+ l = l[1]
174
+ if l.startswith("/") and len(l) > 1:
175
+ # absolute URL on this server
176
+ l = f"{parts.scheme}://{parts.netloc}{l}"
177
+ if l.startswith("http"):
178
+ if self.same_schema and l.startswith(url.rstrip("/") + "/"):
179
+ out.add(l)
180
+ elif l.replace("https", "http").startswith(
181
+ url.replace("https", "http").rstrip("/") + "/"
182
+ ):
183
+ # allowed to cross http <-> https
184
+ out.add(l)
185
+ else:
186
+ if l not in ["..", "../"]:
187
+ # Ignore FTP-like "parent"
188
+ out.add("/".join([url.rstrip("/"), l.lstrip("/")]))
189
+ if not out and url.endswith("/"):
190
+ out = await self._ls_real(url.rstrip("/"), detail=False)
191
+ if detail:
192
+ return [
193
+ {
194
+ "name": u,
195
+ "size": None,
196
+ "type": "directory" if u.endswith("/") else "file",
197
+ }
198
+ for u in out
199
+ ]
200
+ else:
201
+ return sorted(out)
202
+
203
+ async def _ls(self, url, detail=True, **kwargs):
204
+ if self.use_listings_cache and url in self.dircache:
205
+ out = self.dircache[url]
206
+ else:
207
+ out = await self._ls_real(url, detail=detail, **kwargs)
208
+ self.dircache[url] = out
209
+ return out
210
+
211
+ ls = sync_wrapper(_ls)
212
+
213
+ def _raise_not_found_for_status(self, response, url):
214
+ """
215
+ Raises FileNotFoundError for 404s, otherwise uses raise_for_status.
216
+ """
217
+ if response.status == 404:
218
+ raise FileNotFoundError(url)
219
+ response.raise_for_status()
220
+
221
+ async def _cat_file(self, url, start=None, end=None, **kwargs):
222
+ kw = self.kwargs.copy()
223
+ kw.update(kwargs)
224
+ logger.debug(url)
225
+
226
+ if start is not None or end is not None:
227
+ if start == end:
228
+ return b""
229
+ headers = kw.pop("headers", {}).copy()
230
+
231
+ headers["Range"] = await self._process_limits(url, start, end)
232
+ kw["headers"] = headers
233
+ session = await self.set_session()
234
+ async with session.get(self.encode_url(url), **kw) as r:
235
+ out = await r.read()
236
+ self._raise_not_found_for_status(r, url)
237
+ return out
238
+
239
+ async def _get_file(
240
+ self, rpath, lpath, chunk_size=5 * 2**20, callback=DEFAULT_CALLBACK, **kwargs
241
+ ):
242
+ kw = self.kwargs.copy()
243
+ kw.update(kwargs)
244
+ logger.debug(rpath)
245
+ session = await self.set_session()
246
+ async with session.get(self.encode_url(rpath), **kw) as r:
247
+ try:
248
+ size = int(r.headers["content-length"])
249
+ except (ValueError, KeyError):
250
+ size = None
251
+
252
+ callback.set_size(size)
253
+ self._raise_not_found_for_status(r, rpath)
254
+ if isfilelike(lpath):
255
+ outfile = lpath
256
+ else:
257
+ outfile = open(lpath, "wb") # noqa: ASYNC101, ASYNC230
258
+
259
+ try:
260
+ chunk = True
261
+ while chunk:
262
+ chunk = await r.content.read(chunk_size)
263
+ outfile.write(chunk)
264
+ callback.relative_update(len(chunk))
265
+ finally:
266
+ if not isfilelike(lpath):
267
+ outfile.close()
268
+
269
+ async def _put_file(
270
+ self,
271
+ lpath,
272
+ rpath,
273
+ chunk_size=5 * 2**20,
274
+ callback=DEFAULT_CALLBACK,
275
+ method="post",
276
+ mode="overwrite",
277
+ **kwargs,
278
+ ):
279
+ if mode != "overwrite":
280
+ raise NotImplementedError("Exclusive write")
281
+
282
+ async def gen_chunks():
283
+ # Support passing arbitrary file-like objects
284
+ # and use them instead of streams.
285
+ if isinstance(lpath, io.IOBase):
286
+ context = nullcontext(lpath)
287
+ use_seek = False # might not support seeking
288
+ else:
289
+ context = open(lpath, "rb") # noqa: ASYNC101, ASYNC230
290
+ use_seek = True
291
+
292
+ with context as f:
293
+ if use_seek:
294
+ callback.set_size(f.seek(0, 2))
295
+ f.seek(0)
296
+ else:
297
+ callback.set_size(getattr(f, "size", None))
298
+
299
+ chunk = f.read(chunk_size)
300
+ while chunk:
301
+ yield chunk
302
+ callback.relative_update(len(chunk))
303
+ chunk = f.read(chunk_size)
304
+
305
+ kw = self.kwargs.copy()
306
+ kw.update(kwargs)
307
+ session = await self.set_session()
308
+
309
+ method = method.lower()
310
+ if method not in ("post", "put"):
311
+ raise ValueError(
312
+ f"method has to be either 'post' or 'put', not: {method!r}"
313
+ )
314
+
315
+ meth = getattr(session, method)
316
+ async with meth(self.encode_url(rpath), data=gen_chunks(), **kw) as resp:
317
+ self._raise_not_found_for_status(resp, rpath)
318
+
319
+ async def _exists(self, path, **kwargs):
320
+ kw = self.kwargs.copy()
321
+ kw.update(kwargs)
322
+ try:
323
+ logger.debug(path)
324
+ session = await self.set_session()
325
+ r = await session.get(self.encode_url(path), **kw)
326
+ async with r:
327
+ return r.status < 400
328
+ except aiohttp.ClientError:
329
+ return False
330
+
331
+ async def _isfile(self, path, **kwargs):
332
+ return await self._exists(path, **kwargs)
333
+
334
+ def _open(
335
+ self,
336
+ path,
337
+ mode="rb",
338
+ block_size=None,
339
+ autocommit=None, # XXX: This differs from the base class.
340
+ cache_type=None,
341
+ cache_options=None,
342
+ size=None,
343
+ **kwargs,
344
+ ):
345
+ """Make a file-like object
346
+
347
+ Parameters
348
+ ----------
349
+ path: str
350
+ Full URL with protocol
351
+ mode: string
352
+ must be "rb"
353
+ block_size: int or None
354
+ Bytes to download in one request; use instance value if None. If
355
+ zero, will return a streaming Requests file-like instance.
356
+ kwargs: key-value
357
+ Any other parameters, passed to requests calls
358
+ """
359
+ if mode != "rb":
360
+ raise NotImplementedError
361
+ block_size = block_size if block_size is not None else self.block_size
362
+ kw = self.kwargs.copy()
363
+ kw["asynchronous"] = self.asynchronous
364
+ kw.update(kwargs)
365
+ info = {}
366
+ size = size or info.update(self.info(path, **kwargs)) or info["size"]
367
+ session = sync(self.loop, self.set_session)
368
+ if block_size and size and info.get("partial", True):
369
+ return HTTPFile(
370
+ self,
371
+ path,
372
+ session=session,
373
+ block_size=block_size,
374
+ mode=mode,
375
+ size=size,
376
+ cache_type=cache_type or self.cache_type,
377
+ cache_options=cache_options or self.cache_options,
378
+ loop=self.loop,
379
+ **kw,
380
+ )
381
+ else:
382
+ return HTTPStreamFile(
383
+ self,
384
+ path,
385
+ mode=mode,
386
+ loop=self.loop,
387
+ session=session,
388
+ **kw,
389
+ )
390
+
391
+ async def open_async(self, path, mode="rb", size=None, **kwargs):
392
+ session = await self.set_session()
393
+ if size is None:
394
+ try:
395
+ size = (await self._info(path, **kwargs))["size"]
396
+ except FileNotFoundError:
397
+ pass
398
+ return AsyncStreamFile(
399
+ self,
400
+ path,
401
+ loop=self.loop,
402
+ session=session,
403
+ size=size,
404
+ **kwargs,
405
+ )
406
+
407
+ def ukey(self, url):
408
+ """Unique identifier; assume HTTP files are static, unchanging"""
409
+ return tokenize(url, self.kwargs, self.protocol)
410
+
411
+ async def _info(self, url, **kwargs):
412
+ """Get info of URL
413
+
414
+ Tries to access location via HEAD, and then GET methods, but does
415
+ not fetch the data.
416
+
417
+ It is possible that the server does not supply any size information, in
418
+ which case size will be given as None (and certain operations on the
419
+ corresponding file will not work).
420
+ """
421
+ info = {}
422
+ session = await self.set_session()
423
+
424
+ for policy in ["head", "get"]:
425
+ try:
426
+ info.update(
427
+ await _file_info(
428
+ self.encode_url(url),
429
+ size_policy=policy,
430
+ session=session,
431
+ **self.kwargs,
432
+ **kwargs,
433
+ )
434
+ )
435
+ if info.get("size") is not None:
436
+ break
437
+ except Exception as exc:
438
+ if policy == "get":
439
+ # If get failed, then raise a FileNotFoundError
440
+ raise FileNotFoundError(url) from exc
441
+ logger.debug("", exc_info=exc)
442
+
443
+ return {"name": url, "size": None, **info, "type": "file"}
444
+
445
+ async def _glob(self, path, maxdepth=None, **kwargs):
446
+ """
447
+ Find files by glob-matching.
448
+
449
+ This implementation is idntical to the one in AbstractFileSystem,
450
+ but "?" is not considered as a character for globbing, because it is
451
+ so common in URLs, often identifying the "query" part.
452
+ """
453
+ if maxdepth is not None and maxdepth < 1:
454
+ raise ValueError("maxdepth must be at least 1")
455
+ import re
456
+
457
+ ends_with_slash = path.endswith("/") # _strip_protocol strips trailing slash
458
+ path = self._strip_protocol(path)
459
+ append_slash_to_dirname = ends_with_slash or path.endswith(("/**", "/*"))
460
+ idx_star = path.find("*") if path.find("*") >= 0 else len(path)
461
+ idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
462
+
463
+ min_idx = min(idx_star, idx_brace)
464
+
465
+ detail = kwargs.pop("detail", False)
466
+
467
+ if not has_magic(path):
468
+ if await self._exists(path, **kwargs):
469
+ if not detail:
470
+ return [path]
471
+ else:
472
+ return {path: await self._info(path, **kwargs)}
473
+ else:
474
+ if not detail:
475
+ return [] # glob of non-existent returns empty
476
+ else:
477
+ return {}
478
+ elif "/" in path[:min_idx]:
479
+ min_idx = path[:min_idx].rindex("/")
480
+ root = path[: min_idx + 1]
481
+ depth = path[min_idx + 1 :].count("/") + 1
482
+ else:
483
+ root = ""
484
+ depth = path[min_idx + 1 :].count("/") + 1
485
+
486
+ if "**" in path:
487
+ if maxdepth is not None:
488
+ idx_double_stars = path.find("**")
489
+ depth_double_stars = path[idx_double_stars:].count("/") + 1
490
+ depth = depth - depth_double_stars + maxdepth
491
+ else:
492
+ depth = None
493
+
494
+ allpaths = await self._find(
495
+ root, maxdepth=depth, withdirs=True, detail=True, **kwargs
496
+ )
497
+
498
+ pattern = glob_translate(path + ("/" if ends_with_slash else ""))
499
+ pattern = re.compile(pattern)
500
+
501
+ out = {
502
+ (
503
+ p.rstrip("/")
504
+ if not append_slash_to_dirname
505
+ and info["type"] == "directory"
506
+ and p.endswith("/")
507
+ else p
508
+ ): info
509
+ for p, info in sorted(allpaths.items())
510
+ if pattern.match(p.rstrip("/"))
511
+ }
512
+
513
+ if detail:
514
+ return out
515
+ else:
516
+ return list(out)
517
+
518
+ async def _isdir(self, path):
519
+ # override, since all URLs are (also) files
520
+ try:
521
+ return bool(await self._ls(path))
522
+ except (FileNotFoundError, ValueError):
523
+ return False
524
+
525
+ async def _pipe_file(self, path, value, mode="overwrite", **kwargs):
526
+ """
527
+ Write bytes to a remote file over HTTP.
528
+
529
+ Parameters
530
+ ----------
531
+ path : str
532
+ Target URL where the data should be written
533
+ value : bytes
534
+ Data to be written
535
+ mode : str
536
+ How to write to the file - 'overwrite' or 'append'
537
+ **kwargs : dict
538
+ Additional parameters to pass to the HTTP request
539
+ """
540
+ url = self._strip_protocol(path)
541
+ headers = kwargs.pop("headers", {})
542
+ headers["Content-Length"] = str(len(value))
543
+
544
+ session = await self.set_session()
545
+
546
+ async with session.put(url, data=value, headers=headers, **kwargs) as r:
547
+ r.raise_for_status()
548
+
549
+
550
+ class HTTPFile(AbstractBufferedFile):
551
+ """
552
+ A file-like object pointing to a remote HTTP(S) resource
553
+
554
+ Supports only reading, with read-ahead of a predetermined block-size.
555
+
556
+ In the case that the server does not supply the filesize, only reading of
557
+ the complete file in one go is supported.
558
+
559
+ Parameters
560
+ ----------
561
+ url: str
562
+ Full URL of the remote resource, including the protocol
563
+ session: aiohttp.ClientSession or None
564
+ All calls will be made within this session, to avoid restarting
565
+ connections where the server allows this
566
+ block_size: int or None
567
+ The amount of read-ahead to do, in bytes. Default is 5MB, or the value
568
+ configured for the FileSystem creating this file
569
+ size: None or int
570
+ If given, this is the size of the file in bytes, and we don't attempt
571
+ to call the server to find the value.
572
+ kwargs: all other key-values are passed to requests calls.
573
+ """
574
+
575
+ def __init__(
576
+ self,
577
+ fs,
578
+ url,
579
+ session=None,
580
+ block_size=None,
581
+ mode="rb",
582
+ cache_type="bytes",
583
+ cache_options=None,
584
+ size=None,
585
+ loop=None,
586
+ asynchronous=False,
587
+ **kwargs,
588
+ ):
589
+ if mode != "rb":
590
+ raise NotImplementedError("File mode not supported")
591
+ self.asynchronous = asynchronous
592
+ self.loop = loop
593
+ self.url = url
594
+ self.session = session
595
+ self.details = {"name": url, "size": size, "type": "file"}
596
+ super().__init__(
597
+ fs=fs,
598
+ path=url,
599
+ mode=mode,
600
+ block_size=block_size,
601
+ cache_type=cache_type,
602
+ cache_options=cache_options,
603
+ **kwargs,
604
+ )
605
+
606
+ def read(self, length=-1):
607
+ """Read bytes from file
608
+
609
+ Parameters
610
+ ----------
611
+ length: int
612
+ Read up to this many bytes. If negative, read all content to end of
613
+ file. If the server has not supplied the filesize, attempting to
614
+ read only part of the data will raise a ValueError.
615
+ """
616
+ if (
617
+ (length < 0 and self.loc == 0) # explicit read all
618
+ # but not when the size is known and fits into a block anyways
619
+ and not (self.size is not None and self.size <= self.blocksize)
620
+ ):
621
+ self._fetch_all()
622
+ if self.size is None:
623
+ if length < 0:
624
+ self._fetch_all()
625
+ else:
626
+ length = min(self.size - self.loc, length)
627
+ return super().read(length)
628
+
629
+ async def async_fetch_all(self):
630
+ """Read whole file in one shot, without caching
631
+
632
+ This is only called when position is still at zero,
633
+ and read() is called without a byte-count.
634
+ """
635
+ logger.debug(f"Fetch all for {self}")
636
+ if not isinstance(self.cache, AllBytes):
637
+ r = await self.session.get(self.fs.encode_url(self.url), **self.kwargs)
638
+ async with r:
639
+ r.raise_for_status()
640
+ out = await r.read()
641
+ self.cache = AllBytes(
642
+ size=len(out), fetcher=None, blocksize=None, data=out
643
+ )
644
+ self.size = len(out)
645
+
646
+ _fetch_all = sync_wrapper(async_fetch_all)
647
+
648
+ def _parse_content_range(self, headers):
649
+ """Parse the Content-Range header"""
650
+ s = headers.get("Content-Range", "")
651
+ m = re.match(r"bytes (\d+-\d+|\*)/(\d+|\*)", s)
652
+ if not m:
653
+ return None, None, None
654
+
655
+ if m[1] == "*":
656
+ start = end = None
657
+ else:
658
+ start, end = [int(x) for x in m[1].split("-")]
659
+ total = None if m[2] == "*" else int(m[2])
660
+ return start, end, total
661
+
662
+ async def async_fetch_range(self, start, end):
663
+ """Download a block of data
664
+
665
+ The expectation is that the server returns only the requested bytes,
666
+ with HTTP code 206. If this is not the case, we first check the headers,
667
+ and then stream the output - if the data size is bigger than we
668
+ requested, an exception is raised.
669
+ """
670
+ logger.debug(f"Fetch range for {self}: {start}-{end}")
671
+ kwargs = self.kwargs.copy()
672
+ headers = kwargs.pop("headers", {}).copy()
673
+ headers["Range"] = f"bytes={start}-{end - 1}"
674
+ logger.debug(f"{self.url} : {headers['Range']}")
675
+ r = await self.session.get(
676
+ self.fs.encode_url(self.url), headers=headers, **kwargs
677
+ )
678
+ async with r:
679
+ if r.status == 416:
680
+ # range request outside file
681
+ return b""
682
+ r.raise_for_status()
683
+
684
+ # If the server has handled the range request, it should reply
685
+ # with status 206 (partial content). But we'll guess that a suitable
686
+ # Content-Range header or a Content-Length no more than the
687
+ # requested range also mean we have got the desired range.
688
+ response_is_range = (
689
+ r.status == 206
690
+ or self._parse_content_range(r.headers)[0] == start
691
+ or int(r.headers.get("Content-Length", end + 1)) <= end - start
692
+ )
693
+
694
+ if response_is_range:
695
+ # partial content, as expected
696
+ out = await r.read()
697
+ elif start > 0:
698
+ raise ValueError(
699
+ "The HTTP server doesn't appear to support range requests. "
700
+ "Only reading this file from the beginning is supported. "
701
+ "Open with block_size=0 for a streaming file interface."
702
+ )
703
+ else:
704
+ # Response is not a range, but we want the start of the file,
705
+ # so we can read the required amount anyway.
706
+ cl = 0
707
+ out = []
708
+ while True:
709
+ chunk = await r.content.read(2**20)
710
+ # data size unknown, let's read until we have enough
711
+ if chunk:
712
+ out.append(chunk)
713
+ cl += len(chunk)
714
+ if cl > end - start:
715
+ break
716
+ else:
717
+ break
718
+ out = b"".join(out)[: end - start]
719
+ return out
720
+
721
+ _fetch_range = sync_wrapper(async_fetch_range)
722
+
723
+
724
+ magic_check = re.compile("([*[])")
725
+
726
+
727
+ def has_magic(s):
728
+ match = magic_check.search(s)
729
+ return match is not None
730
+
731
+
732
+ class HTTPStreamFile(AbstractBufferedFile):
733
+ def __init__(self, fs, url, mode="rb", loop=None, session=None, **kwargs):
734
+ self.asynchronous = kwargs.pop("asynchronous", False)
735
+ self.url = url
736
+ self.loop = loop
737
+ self.session = session
738
+ if mode != "rb":
739
+ raise ValueError
740
+ self.details = {"name": url, "size": None}
741
+ super().__init__(fs=fs, path=url, mode=mode, cache_type="none", **kwargs)
742
+
743
+ async def cor():
744
+ r = await self.session.get(self.fs.encode_url(url), **kwargs).__aenter__()
745
+ self.fs._raise_not_found_for_status(r, url)
746
+ return r
747
+
748
+ self.r = sync(self.loop, cor)
749
+ self.loop = fs.loop
750
+
751
+ def seek(self, loc, whence=0):
752
+ if loc == 0 and whence == 1:
753
+ return
754
+ if loc == self.loc and whence == 0:
755
+ return
756
+ raise ValueError("Cannot seek streaming HTTP file")
757
+
758
+ async def _read(self, num=-1):
759
+ out = await self.r.content.read(num)
760
+ self.loc += len(out)
761
+ return out
762
+
763
+ read = sync_wrapper(_read)
764
+
765
+ async def _close(self):
766
+ self.r.close()
767
+
768
+ def close(self):
769
+ asyncio.run_coroutine_threadsafe(self._close(), self.loop)
770
+ super().close()
771
+
772
+
773
+ class AsyncStreamFile(AbstractAsyncStreamedFile):
774
+ def __init__(
775
+ self, fs, url, mode="rb", loop=None, session=None, size=None, **kwargs
776
+ ):
777
+ self.url = url
778
+ self.session = session
779
+ self.r = None
780
+ if mode != "rb":
781
+ raise ValueError
782
+ self.details = {"name": url, "size": None}
783
+ self.kwargs = kwargs
784
+ super().__init__(fs=fs, path=url, mode=mode, cache_type="none")
785
+ self.size = size
786
+
787
+ async def read(self, num=-1):
788
+ if self.r is None:
789
+ r = await self.session.get(
790
+ self.fs.encode_url(self.url), **self.kwargs
791
+ ).__aenter__()
792
+ self.fs._raise_not_found_for_status(r, self.url)
793
+ self.r = r
794
+ out = await self.r.content.read(num)
795
+ self.loc += len(out)
796
+ return out
797
+
798
+ async def close(self):
799
+ if self.r is not None:
800
+ self.r.close()
801
+ self.r = None
802
+ await super().close()
803
+
804
+
805
+ async def get_range(session, url, start, end, file=None, **kwargs):
806
+ # explicit get a range when we know it must be safe
807
+ kwargs = kwargs.copy()
808
+ headers = kwargs.pop("headers", {}).copy()
809
+ headers["Range"] = f"bytes={start}-{end - 1}"
810
+ r = await session.get(url, headers=headers, **kwargs)
811
+ r.raise_for_status()
812
+ async with r:
813
+ out = await r.read()
814
+ if file:
815
+ with open(file, "r+b") as f: # noqa: ASYNC101, ASYNC230
816
+ f.seek(start)
817
+ f.write(out)
818
+ else:
819
+ return out
820
+
821
+
822
+ async def _file_info(url, session, size_policy="head", **kwargs):
823
+ """Call HEAD on the server to get details about the file (size/checksum etc.)
824
+
825
+ Default operation is to explicitly allow redirects and use encoding
826
+ 'identity' (no compression) to get the true size of the target.
827
+ """
828
+ logger.debug("Retrieve file size for %s", url)
829
+ kwargs = kwargs.copy()
830
+ ar = kwargs.pop("allow_redirects", True)
831
+ head = kwargs.get("headers", {}).copy()
832
+ head["Accept-Encoding"] = "identity"
833
+ kwargs["headers"] = head
834
+
835
+ info = {}
836
+ if size_policy == "head":
837
+ r = await session.head(url, allow_redirects=ar, **kwargs)
838
+ elif size_policy == "get":
839
+ r = await session.get(url, allow_redirects=ar, **kwargs)
840
+ else:
841
+ raise TypeError(f'size_policy must be "head" or "get", got {size_policy}')
842
+ async with r:
843
+ r.raise_for_status()
844
+
845
+ if "Content-Length" in r.headers:
846
+ # Some servers may choose to ignore Accept-Encoding and return
847
+ # compressed content, in which case the returned size is unreliable.
848
+ if "Content-Encoding" not in r.headers or r.headers["Content-Encoding"] in [
849
+ "identity",
850
+ "",
851
+ ]:
852
+ info["size"] = int(r.headers["Content-Length"])
853
+ elif "Content-Range" in r.headers:
854
+ info["size"] = int(r.headers["Content-Range"].split("/")[1])
855
+
856
+ if "Content-Type" in r.headers:
857
+ info["mimetype"] = r.headers["Content-Type"].partition(";")[0]
858
+
859
+ if r.headers.get("Accept-Ranges") == "none":
860
+ # Some servers may explicitly discourage partial content requests, but
861
+ # the lack of "Accept-Ranges" does not always indicate they would fail
862
+ info["partial"] = False
863
+
864
+ info["url"] = str(r.url)
865
+
866
+ for checksum_field in ["ETag", "Content-MD5", "Digest"]:
867
+ if r.headers.get(checksum_field):
868
+ info[checksum_field] = r.headers[checksum_field]
869
+
870
+ return info
871
+
872
+
873
+ async def _file_size(url, session=None, *args, **kwargs):
874
+ if session is None:
875
+ session = await get_client()
876
+ info = await _file_info(url, session=session, *args, **kwargs)
877
+ return info.get("size")
878
+
879
+
880
+ file_size = sync_wrapper(_file_size)
temp_venv/lib/python3.13/site-packages/fsspec/implementations/http_sync.py ADDED
@@ -0,0 +1,931 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """This file is largely copied from http.py"""
2
+
3
+ import io
4
+ import logging
5
+ import re
6
+ import urllib.error
7
+ import urllib.parse
8
+ from copy import copy
9
+ from json import dumps, loads
10
+ from urllib.parse import urlparse
11
+
12
+ try:
13
+ import yarl
14
+ except (ImportError, ModuleNotFoundError, OSError):
15
+ yarl = False
16
+
17
+ from fsspec.callbacks import _DEFAULT_CALLBACK
18
+ from fsspec.registry import register_implementation
19
+ from fsspec.spec import AbstractBufferedFile, AbstractFileSystem
20
+ from fsspec.utils import DEFAULT_BLOCK_SIZE, isfilelike, nullcontext, tokenize
21
+
22
+ from ..caching import AllBytes
23
+
24
+ # https://stackoverflow.com/a/15926317/3821154
25
+ ex = re.compile(r"""<(a|A)\s+(?:[^>]*?\s+)?(href|HREF)=["'](?P<url>[^"']+)""")
26
+ ex2 = re.compile(r"""(?P<url>http[s]?://[-a-zA-Z0-9@:%_+.~#?&/=]+)""")
27
+ logger = logging.getLogger("fsspec.http")
28
+
29
+
30
+ class JsHttpException(urllib.error.HTTPError): ...
31
+
32
+
33
+ class StreamIO(io.BytesIO):
34
+ # fake class, so you can set attributes on it
35
+ # will eventually actually stream
36
+ ...
37
+
38
+
39
+ class ResponseProxy:
40
+ """Looks like a requests response"""
41
+
42
+ def __init__(self, req, stream=False):
43
+ self.request = req
44
+ self.stream = stream
45
+ self._data = None
46
+ self._headers = None
47
+
48
+ @property
49
+ def raw(self):
50
+ if self._data is None:
51
+ b = self.request.response.to_bytes()
52
+ if self.stream:
53
+ self._data = StreamIO(b)
54
+ else:
55
+ self._data = b
56
+ return self._data
57
+
58
+ def close(self):
59
+ if hasattr(self, "_data"):
60
+ del self._data
61
+
62
+ @property
63
+ def headers(self):
64
+ if self._headers is None:
65
+ self._headers = dict(
66
+ [
67
+ _.split(": ")
68
+ for _ in self.request.getAllResponseHeaders().strip().split("\r\n")
69
+ ]
70
+ )
71
+ return self._headers
72
+
73
+ @property
74
+ def status_code(self):
75
+ return int(self.request.status)
76
+
77
+ def raise_for_status(self):
78
+ if not self.ok:
79
+ raise JsHttpException(
80
+ self.url, self.status_code, self.reason, self.headers, None
81
+ )
82
+
83
+ def iter_content(self, chunksize, *_, **__):
84
+ while True:
85
+ out = self.raw.read(chunksize)
86
+ if out:
87
+ yield out
88
+ else:
89
+ break
90
+
91
+ @property
92
+ def reason(self):
93
+ return self.request.statusText
94
+
95
+ @property
96
+ def ok(self):
97
+ return self.status_code < 400
98
+
99
+ @property
100
+ def url(self):
101
+ return self.request.response.responseURL
102
+
103
+ @property
104
+ def text(self):
105
+ # TODO: encoding from headers
106
+ return self.content.decode()
107
+
108
+ @property
109
+ def content(self):
110
+ self.stream = False
111
+ return self.raw
112
+
113
+ def json(self):
114
+ return loads(self.text)
115
+
116
+
117
+ class RequestsSessionShim:
118
+ def __init__(self):
119
+ self.headers = {}
120
+
121
+ def request(
122
+ self,
123
+ method,
124
+ url,
125
+ params=None,
126
+ data=None,
127
+ headers=None,
128
+ cookies=None,
129
+ files=None,
130
+ auth=None,
131
+ timeout=None,
132
+ allow_redirects=None,
133
+ proxies=None,
134
+ hooks=None,
135
+ stream=None,
136
+ verify=None,
137
+ cert=None,
138
+ json=None,
139
+ ):
140
+ from js import Blob, XMLHttpRequest
141
+
142
+ logger.debug("JS request: %s %s", method, url)
143
+
144
+ if cert or verify or proxies or files or cookies or hooks:
145
+ raise NotImplementedError
146
+ if data and json:
147
+ raise ValueError("Use json= or data=, not both")
148
+ req = XMLHttpRequest.new()
149
+ extra = auth if auth else ()
150
+ if params:
151
+ url = f"{url}?{urllib.parse.urlencode(params)}"
152
+ req.open(method, url, False, *extra)
153
+ if timeout:
154
+ req.timeout = timeout
155
+ if headers:
156
+ for k, v in headers.items():
157
+ req.setRequestHeader(k, v)
158
+
159
+ req.setRequestHeader("Accept", "application/octet-stream")
160
+ req.responseType = "arraybuffer"
161
+ if json:
162
+ blob = Blob.new([dumps(data)], {type: "application/json"})
163
+ req.send(blob)
164
+ elif data:
165
+ if isinstance(data, io.IOBase):
166
+ data = data.read()
167
+ blob = Blob.new([data], {type: "application/octet-stream"})
168
+ req.send(blob)
169
+ else:
170
+ req.send(None)
171
+ return ResponseProxy(req, stream=stream)
172
+
173
+ def get(self, url, **kwargs):
174
+ return self.request("GET", url, **kwargs)
175
+
176
+ def head(self, url, **kwargs):
177
+ return self.request("HEAD", url, **kwargs)
178
+
179
+ def post(self, url, **kwargs):
180
+ return self.request("POST}", url, **kwargs)
181
+
182
+ def put(self, url, **kwargs):
183
+ return self.request("PUT", url, **kwargs)
184
+
185
+ def patch(self, url, **kwargs):
186
+ return self.request("PATCH", url, **kwargs)
187
+
188
+ def delete(self, url, **kwargs):
189
+ return self.request("DELETE", url, **kwargs)
190
+
191
+
192
+ class HTTPFileSystem(AbstractFileSystem):
193
+ """
194
+ Simple File-System for fetching data via HTTP(S)
195
+
196
+ This is the BLOCKING version of the normal HTTPFileSystem. It uses
197
+ requests in normal python and the JS runtime in pyodide.
198
+
199
+ ***This implementation is extremely experimental, do not use unless
200
+ you are testing pyodide/pyscript integration***
201
+ """
202
+
203
+ protocol = ("http", "https", "sync-http", "sync-https")
204
+ sep = "/"
205
+
206
+ def __init__(
207
+ self,
208
+ simple_links=True,
209
+ block_size=None,
210
+ same_scheme=True,
211
+ cache_type="readahead",
212
+ cache_options=None,
213
+ client_kwargs=None,
214
+ encoded=False,
215
+ **storage_options,
216
+ ):
217
+ """
218
+
219
+ Parameters
220
+ ----------
221
+ block_size: int
222
+ Blocks to read bytes; if 0, will default to raw requests file-like
223
+ objects instead of HTTPFile instances
224
+ simple_links: bool
225
+ If True, will consider both HTML <a> tags and anything that looks
226
+ like a URL; if False, will consider only the former.
227
+ same_scheme: True
228
+ When doing ls/glob, if this is True, only consider paths that have
229
+ http/https matching the input URLs.
230
+ size_policy: this argument is deprecated
231
+ client_kwargs: dict
232
+ Passed to aiohttp.ClientSession, see
233
+ https://docs.aiohttp.org/en/stable/client_reference.html
234
+ For example, ``{'auth': aiohttp.BasicAuth('user', 'pass')}``
235
+ storage_options: key-value
236
+ Any other parameters passed on to requests
237
+ cache_type, cache_options: defaults used in open
238
+ """
239
+ super().__init__(self, **storage_options)
240
+ self.block_size = block_size if block_size is not None else DEFAULT_BLOCK_SIZE
241
+ self.simple_links = simple_links
242
+ self.same_schema = same_scheme
243
+ self.cache_type = cache_type
244
+ self.cache_options = cache_options
245
+ self.client_kwargs = client_kwargs or {}
246
+ self.encoded = encoded
247
+ self.kwargs = storage_options
248
+
249
+ try:
250
+ import js # noqa: F401
251
+
252
+ logger.debug("Starting JS session")
253
+ self.session = RequestsSessionShim()
254
+ self.js = True
255
+ except Exception as e:
256
+ import requests
257
+
258
+ logger.debug("Starting cpython session because of: %s", e)
259
+ self.session = requests.Session(**(client_kwargs or {}))
260
+ self.js = False
261
+
262
+ request_options = copy(storage_options)
263
+ self.use_listings_cache = request_options.pop("use_listings_cache", False)
264
+ request_options.pop("listings_expiry_time", None)
265
+ request_options.pop("max_paths", None)
266
+ request_options.pop("skip_instance_cache", None)
267
+ self.kwargs = request_options
268
+
269
+ @property
270
+ def fsid(self):
271
+ return "sync-http"
272
+
273
+ def encode_url(self, url):
274
+ if yarl:
275
+ return yarl.URL(url, encoded=self.encoded)
276
+ return url
277
+
278
+ @classmethod
279
+ def _strip_protocol(cls, path: str) -> str:
280
+ """For HTTP, we always want to keep the full URL"""
281
+ path = path.replace("sync-http://", "http://").replace(
282
+ "sync-https://", "https://"
283
+ )
284
+ return path
285
+
286
+ @classmethod
287
+ def _parent(cls, path):
288
+ # override, since _strip_protocol is different for URLs
289
+ par = super()._parent(path)
290
+ if len(par) > 7: # "http://..."
291
+ return par
292
+ return ""
293
+
294
+ def _ls_real(self, url, detail=True, **kwargs):
295
+ # ignoring URL-encoded arguments
296
+ kw = self.kwargs.copy()
297
+ kw.update(kwargs)
298
+ logger.debug(url)
299
+ r = self.session.get(self.encode_url(url), **self.kwargs)
300
+ self._raise_not_found_for_status(r, url)
301
+ text = r.text
302
+ if self.simple_links:
303
+ links = ex2.findall(text) + [u[2] for u in ex.findall(text)]
304
+ else:
305
+ links = [u[2] for u in ex.findall(text)]
306
+ out = set()
307
+ parts = urlparse(url)
308
+ for l in links:
309
+ if isinstance(l, tuple):
310
+ l = l[1]
311
+ if l.startswith("/") and len(l) > 1:
312
+ # absolute URL on this server
313
+ l = parts.scheme + "://" + parts.netloc + l
314
+ if l.startswith("http"):
315
+ if self.same_schema and l.startswith(url.rstrip("/") + "/"):
316
+ out.add(l)
317
+ elif l.replace("https", "http").startswith(
318
+ url.replace("https", "http").rstrip("/") + "/"
319
+ ):
320
+ # allowed to cross http <-> https
321
+ out.add(l)
322
+ else:
323
+ if l not in ["..", "../"]:
324
+ # Ignore FTP-like "parent"
325
+ out.add("/".join([url.rstrip("/"), l.lstrip("/")]))
326
+ if not out and url.endswith("/"):
327
+ out = self._ls_real(url.rstrip("/"), detail=False)
328
+ if detail:
329
+ return [
330
+ {
331
+ "name": u,
332
+ "size": None,
333
+ "type": "directory" if u.endswith("/") else "file",
334
+ }
335
+ for u in out
336
+ ]
337
+ else:
338
+ return sorted(out)
339
+
340
+ def ls(self, url, detail=True, **kwargs):
341
+ if self.use_listings_cache and url in self.dircache:
342
+ out = self.dircache[url]
343
+ else:
344
+ out = self._ls_real(url, detail=detail, **kwargs)
345
+ self.dircache[url] = out
346
+ return out
347
+
348
+ def _raise_not_found_for_status(self, response, url):
349
+ """
350
+ Raises FileNotFoundError for 404s, otherwise uses raise_for_status.
351
+ """
352
+ if response.status_code == 404:
353
+ raise FileNotFoundError(url)
354
+ response.raise_for_status()
355
+
356
+ def cat_file(self, url, start=None, end=None, **kwargs):
357
+ kw = self.kwargs.copy()
358
+ kw.update(kwargs)
359
+ logger.debug(url)
360
+
361
+ if start is not None or end is not None:
362
+ if start == end:
363
+ return b""
364
+ headers = kw.pop("headers", {}).copy()
365
+
366
+ headers["Range"] = self._process_limits(url, start, end)
367
+ kw["headers"] = headers
368
+ r = self.session.get(self.encode_url(url), **kw)
369
+ self._raise_not_found_for_status(r, url)
370
+ return r.content
371
+
372
+ def get_file(
373
+ self, rpath, lpath, chunk_size=5 * 2**20, callback=_DEFAULT_CALLBACK, **kwargs
374
+ ):
375
+ kw = self.kwargs.copy()
376
+ kw.update(kwargs)
377
+ logger.debug(rpath)
378
+ r = self.session.get(self.encode_url(rpath), **kw)
379
+ try:
380
+ size = int(
381
+ r.headers.get("content-length", None)
382
+ or r.headers.get("Content-Length", None)
383
+ )
384
+ except (ValueError, KeyError, TypeError):
385
+ size = None
386
+
387
+ callback.set_size(size)
388
+ self._raise_not_found_for_status(r, rpath)
389
+ if not isfilelike(lpath):
390
+ lpath = open(lpath, "wb")
391
+ for chunk in r.iter_content(chunk_size, decode_unicode=False):
392
+ lpath.write(chunk)
393
+ callback.relative_update(len(chunk))
394
+
395
+ def put_file(
396
+ self,
397
+ lpath,
398
+ rpath,
399
+ chunk_size=5 * 2**20,
400
+ callback=_DEFAULT_CALLBACK,
401
+ method="post",
402
+ **kwargs,
403
+ ):
404
+ def gen_chunks():
405
+ # Support passing arbitrary file-like objects
406
+ # and use them instead of streams.
407
+ if isinstance(lpath, io.IOBase):
408
+ context = nullcontext(lpath)
409
+ use_seek = False # might not support seeking
410
+ else:
411
+ context = open(lpath, "rb")
412
+ use_seek = True
413
+
414
+ with context as f:
415
+ if use_seek:
416
+ callback.set_size(f.seek(0, 2))
417
+ f.seek(0)
418
+ else:
419
+ callback.set_size(getattr(f, "size", None))
420
+
421
+ chunk = f.read(chunk_size)
422
+ while chunk:
423
+ yield chunk
424
+ callback.relative_update(len(chunk))
425
+ chunk = f.read(chunk_size)
426
+
427
+ kw = self.kwargs.copy()
428
+ kw.update(kwargs)
429
+
430
+ method = method.lower()
431
+ if method not in ("post", "put"):
432
+ raise ValueError(
433
+ f"method has to be either 'post' or 'put', not: {method!r}"
434
+ )
435
+
436
+ meth = getattr(self.session, method)
437
+ resp = meth(rpath, data=gen_chunks(), **kw)
438
+ self._raise_not_found_for_status(resp, rpath)
439
+
440
+ def _process_limits(self, url, start, end):
441
+ """Helper for "Range"-based _cat_file"""
442
+ size = None
443
+ suff = False
444
+ if start is not None and start < 0:
445
+ # if start is negative and end None, end is the "suffix length"
446
+ if end is None:
447
+ end = -start
448
+ start = ""
449
+ suff = True
450
+ else:
451
+ size = size or self.info(url)["size"]
452
+ start = size + start
453
+ elif start is None:
454
+ start = 0
455
+ if not suff:
456
+ if end is not None and end < 0:
457
+ if start is not None:
458
+ size = size or self.info(url)["size"]
459
+ end = size + end
460
+ elif end is None:
461
+ end = ""
462
+ if isinstance(end, int):
463
+ end -= 1 # bytes range is inclusive
464
+ return f"bytes={start}-{end}"
465
+
466
+ def exists(self, path, **kwargs):
467
+ kw = self.kwargs.copy()
468
+ kw.update(kwargs)
469
+ try:
470
+ logger.debug(path)
471
+ r = self.session.get(self.encode_url(path), **kw)
472
+ return r.status_code < 400
473
+ except Exception:
474
+ return False
475
+
476
+ def isfile(self, path, **kwargs):
477
+ return self.exists(path, **kwargs)
478
+
479
+ def _open(
480
+ self,
481
+ path,
482
+ mode="rb",
483
+ block_size=None,
484
+ autocommit=None, # XXX: This differs from the base class.
485
+ cache_type=None,
486
+ cache_options=None,
487
+ size=None,
488
+ **kwargs,
489
+ ):
490
+ """Make a file-like object
491
+
492
+ Parameters
493
+ ----------
494
+ path: str
495
+ Full URL with protocol
496
+ mode: string
497
+ must be "rb"
498
+ block_size: int or None
499
+ Bytes to download in one request; use instance value if None. If
500
+ zero, will return a streaming Requests file-like instance.
501
+ kwargs: key-value
502
+ Any other parameters, passed to requests calls
503
+ """
504
+ if mode != "rb":
505
+ raise NotImplementedError
506
+ block_size = block_size if block_size is not None else self.block_size
507
+ kw = self.kwargs.copy()
508
+ kw.update(kwargs)
509
+ size = size or self.info(path, **kwargs)["size"]
510
+ if block_size and size:
511
+ return HTTPFile(
512
+ self,
513
+ path,
514
+ session=self.session,
515
+ block_size=block_size,
516
+ mode=mode,
517
+ size=size,
518
+ cache_type=cache_type or self.cache_type,
519
+ cache_options=cache_options or self.cache_options,
520
+ **kw,
521
+ )
522
+ else:
523
+ return HTTPStreamFile(
524
+ self,
525
+ path,
526
+ mode=mode,
527
+ session=self.session,
528
+ **kw,
529
+ )
530
+
531
+ def ukey(self, url):
532
+ """Unique identifier; assume HTTP files are static, unchanging"""
533
+ return tokenize(url, self.kwargs, self.protocol)
534
+
535
+ def info(self, url, **kwargs):
536
+ """Get info of URL
537
+
538
+ Tries to access location via HEAD, and then GET methods, but does
539
+ not fetch the data.
540
+
541
+ It is possible that the server does not supply any size information, in
542
+ which case size will be given as None (and certain operations on the
543
+ corresponding file will not work).
544
+ """
545
+ info = {}
546
+ for policy in ["head", "get"]:
547
+ try:
548
+ info.update(
549
+ _file_info(
550
+ self.encode_url(url),
551
+ size_policy=policy,
552
+ session=self.session,
553
+ **self.kwargs,
554
+ **kwargs,
555
+ )
556
+ )
557
+ if info.get("size") is not None:
558
+ break
559
+ except Exception as exc:
560
+ if policy == "get":
561
+ # If get failed, then raise a FileNotFoundError
562
+ raise FileNotFoundError(url) from exc
563
+ logger.debug(str(exc))
564
+
565
+ return {"name": url, "size": None, **info, "type": "file"}
566
+
567
+ def glob(self, path, maxdepth=None, **kwargs):
568
+ """
569
+ Find files by glob-matching.
570
+
571
+ This implementation is idntical to the one in AbstractFileSystem,
572
+ but "?" is not considered as a character for globbing, because it is
573
+ so common in URLs, often identifying the "query" part.
574
+ """
575
+ import re
576
+
577
+ ends = path.endswith("/")
578
+ path = self._strip_protocol(path)
579
+ indstar = path.find("*") if path.find("*") >= 0 else len(path)
580
+ indbrace = path.find("[") if path.find("[") >= 0 else len(path)
581
+
582
+ ind = min(indstar, indbrace)
583
+
584
+ detail = kwargs.pop("detail", False)
585
+
586
+ if not has_magic(path):
587
+ root = path
588
+ depth = 1
589
+ if ends:
590
+ path += "/*"
591
+ elif self.exists(path):
592
+ if not detail:
593
+ return [path]
594
+ else:
595
+ return {path: self.info(path)}
596
+ else:
597
+ if not detail:
598
+ return [] # glob of non-existent returns empty
599
+ else:
600
+ return {}
601
+ elif "/" in path[:ind]:
602
+ ind2 = path[:ind].rindex("/")
603
+ root = path[: ind2 + 1]
604
+ depth = None if "**" in path else path[ind2 + 1 :].count("/") + 1
605
+ else:
606
+ root = ""
607
+ depth = None if "**" in path else path[ind + 1 :].count("/") + 1
608
+
609
+ allpaths = self.find(
610
+ root, maxdepth=maxdepth or depth, withdirs=True, detail=True, **kwargs
611
+ )
612
+ # Escape characters special to python regex, leaving our supported
613
+ # special characters in place.
614
+ # See https://www.gnu.org/software/bash/manual/html_node/Pattern-Matching.html
615
+ # for shell globbing details.
616
+ pattern = (
617
+ "^"
618
+ + (
619
+ path.replace("\\", r"\\")
620
+ .replace(".", r"\.")
621
+ .replace("+", r"\+")
622
+ .replace("//", "/")
623
+ .replace("(", r"\(")
624
+ .replace(")", r"\)")
625
+ .replace("|", r"\|")
626
+ .replace("^", r"\^")
627
+ .replace("$", r"\$")
628
+ .replace("{", r"\{")
629
+ .replace("}", r"\}")
630
+ .rstrip("/")
631
+ )
632
+ + "$"
633
+ )
634
+ pattern = re.sub("[*]{2}", "=PLACEHOLDER=", pattern)
635
+ pattern = re.sub("[*]", "[^/]*", pattern)
636
+ pattern = re.compile(pattern.replace("=PLACEHOLDER=", ".*"))
637
+ out = {
638
+ p: allpaths[p]
639
+ for p in sorted(allpaths)
640
+ if pattern.match(p.replace("//", "/").rstrip("/"))
641
+ }
642
+ if detail:
643
+ return out
644
+ else:
645
+ return list(out)
646
+
647
+ def isdir(self, path):
648
+ # override, since all URLs are (also) files
649
+ try:
650
+ return bool(self.ls(path))
651
+ except (FileNotFoundError, ValueError):
652
+ return False
653
+
654
+
655
+ class HTTPFile(AbstractBufferedFile):
656
+ """
657
+ A file-like object pointing to a remove HTTP(S) resource
658
+
659
+ Supports only reading, with read-ahead of a predermined block-size.
660
+
661
+ In the case that the server does not supply the filesize, only reading of
662
+ the complete file in one go is supported.
663
+
664
+ Parameters
665
+ ----------
666
+ url: str
667
+ Full URL of the remote resource, including the protocol
668
+ session: requests.Session or None
669
+ All calls will be made within this session, to avoid restarting
670
+ connections where the server allows this
671
+ block_size: int or None
672
+ The amount of read-ahead to do, in bytes. Default is 5MB, or the value
673
+ configured for the FileSystem creating this file
674
+ size: None or int
675
+ If given, this is the size of the file in bytes, and we don't attempt
676
+ to call the server to find the value.
677
+ kwargs: all other key-values are passed to requests calls.
678
+ """
679
+
680
+ def __init__(
681
+ self,
682
+ fs,
683
+ url,
684
+ session=None,
685
+ block_size=None,
686
+ mode="rb",
687
+ cache_type="bytes",
688
+ cache_options=None,
689
+ size=None,
690
+ **kwargs,
691
+ ):
692
+ if mode != "rb":
693
+ raise NotImplementedError("File mode not supported")
694
+ self.url = url
695
+ self.session = session
696
+ self.details = {"name": url, "size": size, "type": "file"}
697
+ super().__init__(
698
+ fs=fs,
699
+ path=url,
700
+ mode=mode,
701
+ block_size=block_size,
702
+ cache_type=cache_type,
703
+ cache_options=cache_options,
704
+ **kwargs,
705
+ )
706
+
707
+ def read(self, length=-1):
708
+ """Read bytes from file
709
+
710
+ Parameters
711
+ ----------
712
+ length: int
713
+ Read up to this many bytes. If negative, read all content to end of
714
+ file. If the server has not supplied the filesize, attempting to
715
+ read only part of the data will raise a ValueError.
716
+ """
717
+ if (
718
+ (length < 0 and self.loc == 0) # explicit read all
719
+ # but not when the size is known and fits into a block anyways
720
+ and not (self.size is not None and self.size <= self.blocksize)
721
+ ):
722
+ self._fetch_all()
723
+ if self.size is None:
724
+ if length < 0:
725
+ self._fetch_all()
726
+ else:
727
+ length = min(self.size - self.loc, length)
728
+ return super().read(length)
729
+
730
+ def _fetch_all(self):
731
+ """Read whole file in one shot, without caching
732
+
733
+ This is only called when position is still at zero,
734
+ and read() is called without a byte-count.
735
+ """
736
+ logger.debug(f"Fetch all for {self}")
737
+ if not isinstance(self.cache, AllBytes):
738
+ r = self.session.get(self.fs.encode_url(self.url), **self.kwargs)
739
+ r.raise_for_status()
740
+ out = r.content
741
+ self.cache = AllBytes(size=len(out), fetcher=None, blocksize=None, data=out)
742
+ self.size = len(out)
743
+
744
+ def _parse_content_range(self, headers):
745
+ """Parse the Content-Range header"""
746
+ s = headers.get("Content-Range", "")
747
+ m = re.match(r"bytes (\d+-\d+|\*)/(\d+|\*)", s)
748
+ if not m:
749
+ return None, None, None
750
+
751
+ if m[1] == "*":
752
+ start = end = None
753
+ else:
754
+ start, end = [int(x) for x in m[1].split("-")]
755
+ total = None if m[2] == "*" else int(m[2])
756
+ return start, end, total
757
+
758
+ def _fetch_range(self, start, end):
759
+ """Download a block of data
760
+
761
+ The expectation is that the server returns only the requested bytes,
762
+ with HTTP code 206. If this is not the case, we first check the headers,
763
+ and then stream the output - if the data size is bigger than we
764
+ requested, an exception is raised.
765
+ """
766
+ logger.debug(f"Fetch range for {self}: {start}-{end}")
767
+ kwargs = self.kwargs.copy()
768
+ headers = kwargs.pop("headers", {}).copy()
769
+ headers["Range"] = f"bytes={start}-{end - 1}"
770
+ logger.debug("%s : %s", self.url, headers["Range"])
771
+ r = self.session.get(self.fs.encode_url(self.url), headers=headers, **kwargs)
772
+ if r.status_code == 416:
773
+ # range request outside file
774
+ return b""
775
+ r.raise_for_status()
776
+
777
+ # If the server has handled the range request, it should reply
778
+ # with status 206 (partial content). But we'll guess that a suitable
779
+ # Content-Range header or a Content-Length no more than the
780
+ # requested range also mean we have got the desired range.
781
+ cl = r.headers.get("Content-Length", r.headers.get("content-length", end + 1))
782
+ response_is_range = (
783
+ r.status_code == 206
784
+ or self._parse_content_range(r.headers)[0] == start
785
+ or int(cl) <= end - start
786
+ )
787
+
788
+ if response_is_range:
789
+ # partial content, as expected
790
+ out = r.content
791
+ elif start > 0:
792
+ raise ValueError(
793
+ "The HTTP server doesn't appear to support range requests. "
794
+ "Only reading this file from the beginning is supported. "
795
+ "Open with block_size=0 for a streaming file interface."
796
+ )
797
+ else:
798
+ # Response is not a range, but we want the start of the file,
799
+ # so we can read the required amount anyway.
800
+ cl = 0
801
+ out = []
802
+ for chunk in r.iter_content(2**20, False):
803
+ out.append(chunk)
804
+ cl += len(chunk)
805
+ out = b"".join(out)[: end - start]
806
+ return out
807
+
808
+
809
+ magic_check = re.compile("([*[])")
810
+
811
+
812
+ def has_magic(s):
813
+ match = magic_check.search(s)
814
+ return match is not None
815
+
816
+
817
+ class HTTPStreamFile(AbstractBufferedFile):
818
+ def __init__(self, fs, url, mode="rb", session=None, **kwargs):
819
+ self.url = url
820
+ self.session = session
821
+ if mode != "rb":
822
+ raise ValueError
823
+ self.details = {"name": url, "size": None}
824
+ super().__init__(fs=fs, path=url, mode=mode, cache_type="readahead", **kwargs)
825
+
826
+ r = self.session.get(self.fs.encode_url(url), stream=True, **kwargs)
827
+ self.fs._raise_not_found_for_status(r, url)
828
+ self.it = r.iter_content(1024, False)
829
+ self.leftover = b""
830
+
831
+ self.r = r
832
+
833
+ def seek(self, *args, **kwargs):
834
+ raise ValueError("Cannot seek streaming HTTP file")
835
+
836
+ def read(self, num=-1):
837
+ bufs = [self.leftover]
838
+ leng = len(self.leftover)
839
+ while leng < num or num < 0:
840
+ try:
841
+ out = self.it.__next__()
842
+ except StopIteration:
843
+ break
844
+ if out:
845
+ bufs.append(out)
846
+ else:
847
+ break
848
+ leng += len(out)
849
+ out = b"".join(bufs)
850
+ if num >= 0:
851
+ self.leftover = out[num:]
852
+ out = out[:num]
853
+ else:
854
+ self.leftover = b""
855
+ self.loc += len(out)
856
+ return out
857
+
858
+ def close(self):
859
+ self.r.close()
860
+ self.closed = True
861
+
862
+
863
+ def get_range(session, url, start, end, **kwargs):
864
+ # explicit get a range when we know it must be safe
865
+ kwargs = kwargs.copy()
866
+ headers = kwargs.pop("headers", {}).copy()
867
+ headers["Range"] = f"bytes={start}-{end - 1}"
868
+ r = session.get(url, headers=headers, **kwargs)
869
+ r.raise_for_status()
870
+ return r.content
871
+
872
+
873
+ def _file_info(url, session, size_policy="head", **kwargs):
874
+ """Call HEAD on the server to get details about the file (size/checksum etc.)
875
+
876
+ Default operation is to explicitly allow redirects and use encoding
877
+ 'identity' (no compression) to get the true size of the target.
878
+ """
879
+ logger.debug("Retrieve file size for %s", url)
880
+ kwargs = kwargs.copy()
881
+ ar = kwargs.pop("allow_redirects", True)
882
+ head = kwargs.get("headers", {}).copy()
883
+ # TODO: not allowed in JS
884
+ # head["Accept-Encoding"] = "identity"
885
+ kwargs["headers"] = head
886
+
887
+ info = {}
888
+ if size_policy == "head":
889
+ r = session.head(url, allow_redirects=ar, **kwargs)
890
+ elif size_policy == "get":
891
+ r = session.get(url, allow_redirects=ar, **kwargs)
892
+ else:
893
+ raise TypeError(f'size_policy must be "head" or "get", got {size_policy}')
894
+ r.raise_for_status()
895
+
896
+ # TODO:
897
+ # recognise lack of 'Accept-Ranges',
898
+ # or 'Accept-Ranges': 'none' (not 'bytes')
899
+ # to mean streaming only, no random access => return None
900
+ if "Content-Length" in r.headers:
901
+ info["size"] = int(r.headers["Content-Length"])
902
+ elif "Content-Range" in r.headers:
903
+ info["size"] = int(r.headers["Content-Range"].split("/")[1])
904
+ elif "content-length" in r.headers:
905
+ info["size"] = int(r.headers["content-length"])
906
+ elif "content-range" in r.headers:
907
+ info["size"] = int(r.headers["content-range"].split("/")[1])
908
+
909
+ for checksum_field in ["ETag", "Content-MD5", "Digest"]:
910
+ if r.headers.get(checksum_field):
911
+ info[checksum_field] = r.headers[checksum_field]
912
+
913
+ return info
914
+
915
+
916
+ # importing this is enough to register it
917
+ def register():
918
+ register_implementation("http", HTTPFileSystem, clobber=True)
919
+ register_implementation("https", HTTPFileSystem, clobber=True)
920
+ register_implementation("sync-http", HTTPFileSystem, clobber=True)
921
+ register_implementation("sync-https", HTTPFileSystem, clobber=True)
922
+
923
+
924
+ register()
925
+
926
+
927
+ def unregister():
928
+ from fsspec.implementations.http import HTTPFileSystem
929
+
930
+ register_implementation("http", HTTPFileSystem, clobber=True)
931
+ register_implementation("https", HTTPFileSystem, clobber=True)
temp_venv/lib/python3.13/site-packages/fsspec/implementations/jupyter.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import io
3
+ import re
4
+
5
+ import requests
6
+
7
+ import fsspec
8
+
9
+
10
+ class JupyterFileSystem(fsspec.AbstractFileSystem):
11
+ """View of the files as seen by a Jupyter server (notebook or lab)"""
12
+
13
+ protocol = ("jupyter", "jlab")
14
+
15
+ def __init__(self, url, tok=None, **kwargs):
16
+ """
17
+
18
+ Parameters
19
+ ----------
20
+ url : str
21
+ Base URL of the server, like "http://127.0.0.1:8888". May include
22
+ token in the string, which is given by the process when starting up
23
+ tok : str
24
+ If the token is obtained separately, can be given here
25
+ kwargs
26
+ """
27
+ if "?" in url:
28
+ if tok is None:
29
+ try:
30
+ tok = re.findall("token=([a-z0-9]+)", url)[0]
31
+ except IndexError as e:
32
+ raise ValueError("Could not determine token") from e
33
+ url = url.split("?", 1)[0]
34
+ self.url = url.rstrip("/") + "/api/contents"
35
+ self.session = requests.Session()
36
+ if tok:
37
+ self.session.headers["Authorization"] = f"token {tok}"
38
+
39
+ super().__init__(**kwargs)
40
+
41
+ def ls(self, path, detail=True, **kwargs):
42
+ path = self._strip_protocol(path)
43
+ r = self.session.get(f"{self.url}/{path}")
44
+ if r.status_code == 404:
45
+ return FileNotFoundError(path)
46
+ r.raise_for_status()
47
+ out = r.json()
48
+
49
+ if out["type"] == "directory":
50
+ out = out["content"]
51
+ else:
52
+ out = [out]
53
+ for o in out:
54
+ o["name"] = o.pop("path")
55
+ o.pop("content")
56
+ if o["type"] == "notebook":
57
+ o["type"] = "file"
58
+ if detail:
59
+ return out
60
+ return [o["name"] for o in out]
61
+
62
+ def cat_file(self, path, start=None, end=None, **kwargs):
63
+ path = self._strip_protocol(path)
64
+ r = self.session.get(f"{self.url}/{path}")
65
+ if r.status_code == 404:
66
+ return FileNotFoundError(path)
67
+ r.raise_for_status()
68
+ out = r.json()
69
+ if out["format"] == "text":
70
+ # data should be binary
71
+ b = out["content"].encode()
72
+ else:
73
+ b = base64.b64decode(out["content"])
74
+ return b[start:end]
75
+
76
+ def pipe_file(self, path, value, **_):
77
+ path = self._strip_protocol(path)
78
+ json = {
79
+ "name": path.rsplit("/", 1)[-1],
80
+ "path": path,
81
+ "size": len(value),
82
+ "content": base64.b64encode(value).decode(),
83
+ "format": "base64",
84
+ "type": "file",
85
+ }
86
+ self.session.put(f"{self.url}/{path}", json=json)
87
+
88
+ def mkdir(self, path, create_parents=True, **kwargs):
89
+ path = self._strip_protocol(path)
90
+ if create_parents and "/" in path:
91
+ self.mkdir(path.rsplit("/", 1)[0], True)
92
+ json = {
93
+ "name": path.rsplit("/", 1)[-1],
94
+ "path": path,
95
+ "size": None,
96
+ "content": None,
97
+ "type": "directory",
98
+ }
99
+ self.session.put(f"{self.url}/{path}", json=json)
100
+
101
+ def _rm(self, path):
102
+ path = self._strip_protocol(path)
103
+ self.session.delete(f"{self.url}/{path}")
104
+
105
+ def _open(self, path, mode="rb", **kwargs):
106
+ path = self._strip_protocol(path)
107
+ if mode == "rb":
108
+ data = self.cat_file(path)
109
+ return io.BytesIO(data)
110
+ else:
111
+ return SimpleFileWriter(self, path, mode="wb")
112
+
113
+
114
+ class SimpleFileWriter(fsspec.spec.AbstractBufferedFile):
115
+ def _upload_chunk(self, final=False):
116
+ """Never uploads a chunk until file is done
117
+
118
+ Not suitable for large files
119
+ """
120
+ if final is False:
121
+ return False
122
+ self.buffer.seek(0)
123
+ data = self.buffer.read()
124
+ self.fs.pipe_file(self.path, data)
temp_venv/lib/python3.13/site-packages/fsspec/implementations/libarchive.py ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from contextlib import contextmanager
2
+ from ctypes import (
3
+ CFUNCTYPE,
4
+ POINTER,
5
+ c_int,
6
+ c_longlong,
7
+ c_void_p,
8
+ cast,
9
+ create_string_buffer,
10
+ )
11
+
12
+ import libarchive
13
+ import libarchive.ffi as ffi
14
+
15
+ from fsspec import open_files
16
+ from fsspec.archive import AbstractArchiveFileSystem
17
+ from fsspec.implementations.memory import MemoryFile
18
+ from fsspec.utils import DEFAULT_BLOCK_SIZE
19
+
20
+ # Libarchive requires seekable files or memory only for certain archive
21
+ # types. However, since we read the directory first to cache the contents
22
+ # and also allow random access to any file, the file-like object needs
23
+ # to be seekable no matter what.
24
+
25
+ # Seek call-backs (not provided in the libarchive python wrapper)
26
+ SEEK_CALLBACK = CFUNCTYPE(c_longlong, c_int, c_void_p, c_longlong, c_int)
27
+ read_set_seek_callback = ffi.ffi(
28
+ "read_set_seek_callback", [ffi.c_archive_p, SEEK_CALLBACK], c_int, ffi.check_int
29
+ )
30
+ new_api = hasattr(ffi, "NO_OPEN_CB")
31
+
32
+
33
+ @contextmanager
34
+ def custom_reader(file, format_name="all", filter_name="all", block_size=ffi.page_size):
35
+ """Read an archive from a seekable file-like object.
36
+
37
+ The `file` object must support the standard `readinto` and 'seek' methods.
38
+ """
39
+ buf = create_string_buffer(block_size)
40
+ buf_p = cast(buf, c_void_p)
41
+
42
+ def read_func(archive_p, context, ptrptr):
43
+ # readinto the buffer, returns number of bytes read
44
+ length = file.readinto(buf)
45
+ # write the address of the buffer into the pointer
46
+ ptrptr = cast(ptrptr, POINTER(c_void_p))
47
+ ptrptr[0] = buf_p
48
+ # tell libarchive how much data was written into the buffer
49
+ return length
50
+
51
+ def seek_func(archive_p, context, offset, whence):
52
+ file.seek(offset, whence)
53
+ # tell libarchvie the current position
54
+ return file.tell()
55
+
56
+ read_cb = ffi.READ_CALLBACK(read_func)
57
+ seek_cb = SEEK_CALLBACK(seek_func)
58
+
59
+ if new_api:
60
+ open_cb = ffi.NO_OPEN_CB
61
+ close_cb = ffi.NO_CLOSE_CB
62
+ else:
63
+ open_cb = libarchive.read.OPEN_CALLBACK(ffi.VOID_CB)
64
+ close_cb = libarchive.read.CLOSE_CALLBACK(ffi.VOID_CB)
65
+
66
+ with libarchive.read.new_archive_read(format_name, filter_name) as archive_p:
67
+ read_set_seek_callback(archive_p, seek_cb)
68
+ ffi.read_open(archive_p, None, open_cb, read_cb, close_cb)
69
+ yield libarchive.read.ArchiveRead(archive_p)
70
+
71
+
72
+ class LibArchiveFileSystem(AbstractArchiveFileSystem):
73
+ """Compressed archives as a file-system (read-only)
74
+
75
+ Supports the following formats:
76
+ tar, pax , cpio, ISO9660, zip, mtree, shar, ar, raw, xar, lha/lzh, rar
77
+ Microsoft CAB, 7-Zip, WARC
78
+
79
+ See the libarchive documentation for further restrictions.
80
+ https://www.libarchive.org/
81
+
82
+ Keeps file object open while instance lives. It only works in seekable
83
+ file-like objects. In case the filesystem does not support this kind of
84
+ file object, it is recommended to cache locally.
85
+
86
+ This class is pickleable, but not necessarily thread-safe (depends on the
87
+ platform). See libarchive documentation for details.
88
+ """
89
+
90
+ root_marker = ""
91
+ protocol = "libarchive"
92
+ cachable = False
93
+
94
+ def __init__(
95
+ self,
96
+ fo="",
97
+ mode="r",
98
+ target_protocol=None,
99
+ target_options=None,
100
+ block_size=DEFAULT_BLOCK_SIZE,
101
+ **kwargs,
102
+ ):
103
+ """
104
+ Parameters
105
+ ----------
106
+ fo: str or file-like
107
+ Contains ZIP, and must exist. If a str, will fetch file using
108
+ :meth:`~fsspec.open_files`, which must return one file exactly.
109
+ mode: str
110
+ Currently, only 'r' accepted
111
+ target_protocol: str (optional)
112
+ If ``fo`` is a string, this value can be used to override the
113
+ FS protocol inferred from a URL
114
+ target_options: dict (optional)
115
+ Kwargs passed when instantiating the target FS, if ``fo`` is
116
+ a string.
117
+ """
118
+ super().__init__(self, **kwargs)
119
+ if mode != "r":
120
+ raise ValueError("Only read from archive files accepted")
121
+ if isinstance(fo, str):
122
+ files = open_files(fo, protocol=target_protocol, **(target_options or {}))
123
+ if len(files) != 1:
124
+ raise ValueError(
125
+ f'Path "{fo}" did not resolve to exactly one file: "{files}"'
126
+ )
127
+ fo = files[0]
128
+ self.of = fo
129
+ self.fo = fo.__enter__() # the whole instance is a context
130
+ self.block_size = block_size
131
+ self.dir_cache = None
132
+
133
+ @contextmanager
134
+ def _open_archive(self):
135
+ self.fo.seek(0)
136
+ with custom_reader(self.fo, block_size=self.block_size) as arc:
137
+ yield arc
138
+
139
+ @classmethod
140
+ def _strip_protocol(cls, path):
141
+ # file paths are always relative to the archive root
142
+ return super()._strip_protocol(path).lstrip("/")
143
+
144
+ def _get_dirs(self):
145
+ fields = {
146
+ "name": "pathname",
147
+ "size": "size",
148
+ "created": "ctime",
149
+ "mode": "mode",
150
+ "uid": "uid",
151
+ "gid": "gid",
152
+ "mtime": "mtime",
153
+ }
154
+
155
+ if self.dir_cache is not None:
156
+ return
157
+
158
+ self.dir_cache = {}
159
+ list_names = []
160
+ with self._open_archive() as arc:
161
+ for entry in arc:
162
+ if not entry.isdir and not entry.isfile:
163
+ # Skip symbolic links, fifo entries, etc.
164
+ continue
165
+ self.dir_cache.update(
166
+ {
167
+ dirname: {"name": dirname, "size": 0, "type": "directory"}
168
+ for dirname in self._all_dirnames(set(entry.name))
169
+ }
170
+ )
171
+ f = {key: getattr(entry, fields[key]) for key in fields}
172
+ f["type"] = "directory" if entry.isdir else "file"
173
+ list_names.append(entry.name)
174
+
175
+ self.dir_cache[f["name"]] = f
176
+ # libarchive does not seem to return an entry for the directories (at least
177
+ # not in all formats), so get the directories names from the files names
178
+ self.dir_cache.update(
179
+ {
180
+ dirname: {"name": dirname, "size": 0, "type": "directory"}
181
+ for dirname in self._all_dirnames(list_names)
182
+ }
183
+ )
184
+
185
+ def _open(
186
+ self,
187
+ path,
188
+ mode="rb",
189
+ block_size=None,
190
+ autocommit=True,
191
+ cache_options=None,
192
+ **kwargs,
193
+ ):
194
+ path = self._strip_protocol(path)
195
+ if mode != "rb":
196
+ raise NotImplementedError
197
+
198
+ data = bytes()
199
+ with self._open_archive() as arc:
200
+ for entry in arc:
201
+ if entry.pathname != path:
202
+ continue
203
+
204
+ if entry.size == 0:
205
+ # empty file, so there are no blocks
206
+ break
207
+
208
+ for block in entry.get_blocks(entry.size):
209
+ data = block
210
+ break
211
+ else:
212
+ raise ValueError
213
+ return MemoryFile(fs=self, path=path, data=data)
temp_venv/lib/python3.13/site-packages/fsspec/implementations/local.py ADDED
@@ -0,0 +1,477 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ import io
3
+ import logging
4
+ import os
5
+ import os.path as osp
6
+ import shutil
7
+ import stat
8
+ import tempfile
9
+
10
+ from fsspec import AbstractFileSystem
11
+ from fsspec.compression import compr
12
+ from fsspec.core import get_compression
13
+ from fsspec.utils import isfilelike, stringify_path
14
+
15
+ logger = logging.getLogger("fsspec.local")
16
+
17
+
18
+ class LocalFileSystem(AbstractFileSystem):
19
+ """Interface to files on local storage
20
+
21
+ Parameters
22
+ ----------
23
+ auto_mkdir: bool
24
+ Whether, when opening a file, the directory containing it should
25
+ be created (if it doesn't already exist). This is assumed by pyarrow
26
+ code.
27
+ """
28
+
29
+ root_marker = "/"
30
+ protocol = "file", "local"
31
+ local_file = True
32
+
33
+ def __init__(self, auto_mkdir=False, **kwargs):
34
+ super().__init__(**kwargs)
35
+ self.auto_mkdir = auto_mkdir
36
+
37
+ @property
38
+ def fsid(self):
39
+ return "local"
40
+
41
+ def mkdir(self, path, create_parents=True, **kwargs):
42
+ path = self._strip_protocol(path)
43
+ if self.exists(path):
44
+ raise FileExistsError(path)
45
+ if create_parents:
46
+ self.makedirs(path, exist_ok=True)
47
+ else:
48
+ os.mkdir(path, **kwargs)
49
+
50
+ def makedirs(self, path, exist_ok=False):
51
+ path = self._strip_protocol(path)
52
+ os.makedirs(path, exist_ok=exist_ok)
53
+
54
+ def rmdir(self, path):
55
+ path = self._strip_protocol(path)
56
+ os.rmdir(path)
57
+
58
+ def ls(self, path, detail=False, **kwargs):
59
+ path = self._strip_protocol(path)
60
+ path_info = self.info(path)
61
+ infos = []
62
+ if path_info["type"] == "directory":
63
+ with os.scandir(path) as it:
64
+ for f in it:
65
+ try:
66
+ # Only get the info if requested since it is a bit expensive (the stat call inside)
67
+ # The strip_protocol is also used in info() and calls make_path_posix to always return posix paths
68
+ info = self.info(f) if detail else self._strip_protocol(f.path)
69
+ infos.append(info)
70
+ except FileNotFoundError:
71
+ pass
72
+ else:
73
+ infos = [path_info] if detail else [path_info["name"]]
74
+
75
+ return infos
76
+
77
+ def info(self, path, **kwargs):
78
+ if isinstance(path, os.DirEntry):
79
+ # scandir DirEntry
80
+ out = path.stat(follow_symlinks=False)
81
+ link = path.is_symlink()
82
+ if path.is_dir(follow_symlinks=False):
83
+ t = "directory"
84
+ elif path.is_file(follow_symlinks=False):
85
+ t = "file"
86
+ else:
87
+ t = "other"
88
+
89
+ size = out.st_size
90
+ if link:
91
+ try:
92
+ out2 = path.stat(follow_symlinks=True)
93
+ size = out2.st_size
94
+ except OSError:
95
+ size = 0
96
+ path = self._strip_protocol(path.path)
97
+ else:
98
+ # str or path-like
99
+ path = self._strip_protocol(path)
100
+ out = os.stat(path, follow_symlinks=False)
101
+ link = stat.S_ISLNK(out.st_mode)
102
+ if link:
103
+ out = os.stat(path, follow_symlinks=True)
104
+ size = out.st_size
105
+ if stat.S_ISDIR(out.st_mode):
106
+ t = "directory"
107
+ elif stat.S_ISREG(out.st_mode):
108
+ t = "file"
109
+ else:
110
+ t = "other"
111
+ result = {
112
+ "name": path,
113
+ "size": size,
114
+ "type": t,
115
+ "created": out.st_ctime,
116
+ "islink": link,
117
+ }
118
+ for field in ["mode", "uid", "gid", "mtime", "ino", "nlink"]:
119
+ result[field] = getattr(out, f"st_{field}")
120
+ if link:
121
+ result["destination"] = os.readlink(path)
122
+ return result
123
+
124
+ def lexists(self, path, **kwargs):
125
+ return osp.lexists(path)
126
+
127
+ def cp_file(self, path1, path2, **kwargs):
128
+ path1 = self._strip_protocol(path1)
129
+ path2 = self._strip_protocol(path2)
130
+ if self.auto_mkdir:
131
+ self.makedirs(self._parent(path2), exist_ok=True)
132
+ if self.isfile(path1):
133
+ shutil.copyfile(path1, path2)
134
+ elif self.isdir(path1):
135
+ self.mkdirs(path2, exist_ok=True)
136
+ else:
137
+ raise FileNotFoundError(path1)
138
+
139
+ def isfile(self, path):
140
+ path = self._strip_protocol(path)
141
+ return os.path.isfile(path)
142
+
143
+ def isdir(self, path):
144
+ path = self._strip_protocol(path)
145
+ return os.path.isdir(path)
146
+
147
+ def get_file(self, path1, path2, callback=None, **kwargs):
148
+ if isfilelike(path2):
149
+ with open(path1, "rb") as f:
150
+ shutil.copyfileobj(f, path2)
151
+ else:
152
+ return self.cp_file(path1, path2, **kwargs)
153
+
154
+ def put_file(self, path1, path2, callback=None, **kwargs):
155
+ return self.cp_file(path1, path2, **kwargs)
156
+
157
+ def mv(self, path1, path2, **kwargs):
158
+ path1 = self._strip_protocol(path1)
159
+ path2 = self._strip_protocol(path2)
160
+ shutil.move(path1, path2)
161
+
162
+ def link(self, src, dst, **kwargs):
163
+ src = self._strip_protocol(src)
164
+ dst = self._strip_protocol(dst)
165
+ os.link(src, dst, **kwargs)
166
+
167
+ def symlink(self, src, dst, **kwargs):
168
+ src = self._strip_protocol(src)
169
+ dst = self._strip_protocol(dst)
170
+ os.symlink(src, dst, **kwargs)
171
+
172
+ def islink(self, path) -> bool:
173
+ return os.path.islink(self._strip_protocol(path))
174
+
175
+ def rm_file(self, path):
176
+ os.remove(self._strip_protocol(path))
177
+
178
+ def rm(self, path, recursive=False, maxdepth=None):
179
+ if not isinstance(path, list):
180
+ path = [path]
181
+
182
+ for p in path:
183
+ p = self._strip_protocol(p)
184
+ if self.isdir(p):
185
+ if not recursive:
186
+ raise ValueError("Cannot delete directory, set recursive=True")
187
+ if osp.abspath(p) == os.getcwd():
188
+ raise ValueError("Cannot delete current working directory")
189
+ shutil.rmtree(p)
190
+ else:
191
+ os.remove(p)
192
+
193
+ def unstrip_protocol(self, name):
194
+ name = self._strip_protocol(name) # normalise for local/win/...
195
+ return f"file://{name}"
196
+
197
+ def _open(self, path, mode="rb", block_size=None, **kwargs):
198
+ path = self._strip_protocol(path)
199
+ if self.auto_mkdir and "w" in mode:
200
+ self.makedirs(self._parent(path), exist_ok=True)
201
+ return LocalFileOpener(path, mode, fs=self, **kwargs)
202
+
203
+ def touch(self, path, truncate=True, **kwargs):
204
+ path = self._strip_protocol(path)
205
+ if self.auto_mkdir:
206
+ self.makedirs(self._parent(path), exist_ok=True)
207
+ if self.exists(path):
208
+ os.utime(path, None)
209
+ else:
210
+ open(path, "a").close()
211
+ if truncate:
212
+ os.truncate(path, 0)
213
+
214
+ def created(self, path):
215
+ info = self.info(path=path)
216
+ return datetime.datetime.fromtimestamp(
217
+ info["created"], tz=datetime.timezone.utc
218
+ )
219
+
220
+ def modified(self, path):
221
+ info = self.info(path=path)
222
+ return datetime.datetime.fromtimestamp(info["mtime"], tz=datetime.timezone.utc)
223
+
224
+ @classmethod
225
+ def _parent(cls, path):
226
+ path = cls._strip_protocol(path)
227
+ if os.sep == "/":
228
+ # posix native
229
+ return path.rsplit("/", 1)[0] or "/"
230
+ else:
231
+ # NT
232
+ path_ = path.rsplit("/", 1)[0]
233
+ if len(path_) <= 3:
234
+ if path_[1:2] == ":":
235
+ # nt root (something like c:/)
236
+ return path_[0] + ":/"
237
+ # More cases may be required here
238
+ return path_
239
+
240
+ @classmethod
241
+ def _strip_protocol(cls, path):
242
+ path = stringify_path(path)
243
+ if path.startswith("file://"):
244
+ path = path[7:]
245
+ elif path.startswith("file:"):
246
+ path = path[5:]
247
+ elif path.startswith("local://"):
248
+ path = path[8:]
249
+ elif path.startswith("local:"):
250
+ path = path[6:]
251
+
252
+ path = make_path_posix(path)
253
+ if os.sep != "/":
254
+ # This code-path is a stripped down version of
255
+ # > drive, path = ntpath.splitdrive(path)
256
+ if path[1:2] == ":":
257
+ # Absolute drive-letter path, e.g. X:\Windows
258
+ # Relative path with drive, e.g. X:Windows
259
+ drive, path = path[:2], path[2:]
260
+ elif path[:2] == "//":
261
+ # UNC drives, e.g. \\server\share or \\?\UNC\server\share
262
+ # Device drives, e.g. \\.\device or \\?\device
263
+ if (index1 := path.find("/", 2)) == -1 or (
264
+ index2 := path.find("/", index1 + 1)
265
+ ) == -1:
266
+ drive, path = path, ""
267
+ else:
268
+ drive, path = path[:index2], path[index2:]
269
+ else:
270
+ # Relative path, e.g. Windows
271
+ drive = ""
272
+
273
+ path = path.rstrip("/") or cls.root_marker
274
+ return drive + path
275
+
276
+ else:
277
+ return path.rstrip("/") or cls.root_marker
278
+
279
+ def _isfilestore(self):
280
+ # Inheriting from DaskFileSystem makes this False (S3, etc. were)
281
+ # the original motivation. But we are a posix-like file system.
282
+ # See https://github.com/dask/dask/issues/5526
283
+ return True
284
+
285
+ def chmod(self, path, mode):
286
+ path = stringify_path(path)
287
+ return os.chmod(path, mode)
288
+
289
+
290
+ def make_path_posix(path):
291
+ """Make path generic and absolute for current OS"""
292
+ if not isinstance(path, str):
293
+ if isinstance(path, (list, set, tuple)):
294
+ return type(path)(make_path_posix(p) for p in path)
295
+ else:
296
+ path = stringify_path(path)
297
+ if not isinstance(path, str):
298
+ raise TypeError(f"could not convert {path!r} to string")
299
+ if os.sep == "/":
300
+ # Native posix
301
+ if path.startswith("/"):
302
+ # most common fast case for posix
303
+ return path
304
+ elif path.startswith("~"):
305
+ return osp.expanduser(path)
306
+ elif path.startswith("./"):
307
+ path = path[2:]
308
+ elif path == ".":
309
+ path = ""
310
+ return f"{os.getcwd()}/{path}"
311
+ else:
312
+ # NT handling
313
+ if path[0:1] == "/" and path[2:3] == ":":
314
+ # path is like "/c:/local/path"
315
+ path = path[1:]
316
+ if path[1:2] == ":":
317
+ # windows full path like "C:\\local\\path"
318
+ if len(path) <= 3:
319
+ # nt root (something like c:/)
320
+ return path[0] + ":/"
321
+ path = path.replace("\\", "/")
322
+ return path
323
+ elif path[0:1] == "~":
324
+ return make_path_posix(osp.expanduser(path))
325
+ elif path.startswith(("\\\\", "//")):
326
+ # windows UNC/DFS-style paths
327
+ return "//" + path[2:].replace("\\", "/")
328
+ elif path.startswith(("\\", "/")):
329
+ # windows relative path with root
330
+ path = path.replace("\\", "/")
331
+ return f"{osp.splitdrive(os.getcwd())[0]}{path}"
332
+ else:
333
+ path = path.replace("\\", "/")
334
+ if path.startswith("./"):
335
+ path = path[2:]
336
+ elif path == ".":
337
+ path = ""
338
+ return f"{make_path_posix(os.getcwd())}/{path}"
339
+
340
+
341
+ def trailing_sep(path):
342
+ """Return True if the path ends with a path separator.
343
+
344
+ A forward slash is always considered a path separator, even on Operating
345
+ Systems that normally use a backslash.
346
+ """
347
+ # TODO: if all incoming paths were posix-compliant then separator would
348
+ # always be a forward slash, simplifying this function.
349
+ # See https://github.com/fsspec/filesystem_spec/pull/1250
350
+ return path.endswith(os.sep) or (os.altsep is not None and path.endswith(os.altsep))
351
+
352
+
353
+ class LocalFileOpener(io.IOBase):
354
+ def __init__(
355
+ self, path, mode, autocommit=True, fs=None, compression=None, **kwargs
356
+ ):
357
+ logger.debug("open file: %s", path)
358
+ self.path = path
359
+ self.mode = mode
360
+ self.fs = fs
361
+ self.f = None
362
+ self.autocommit = autocommit
363
+ self.compression = get_compression(path, compression)
364
+ self.blocksize = io.DEFAULT_BUFFER_SIZE
365
+ self._open()
366
+
367
+ def _open(self):
368
+ if self.f is None or self.f.closed:
369
+ if self.autocommit or "w" not in self.mode:
370
+ self.f = open(self.path, mode=self.mode)
371
+ if self.compression:
372
+ compress = compr[self.compression]
373
+ self.f = compress(self.f, mode=self.mode)
374
+ else:
375
+ # TODO: check if path is writable?
376
+ i, name = tempfile.mkstemp()
377
+ os.close(i) # we want normal open and normal buffered file
378
+ self.temp = name
379
+ self.f = open(name, mode=self.mode)
380
+ if "w" not in self.mode:
381
+ self.size = self.f.seek(0, 2)
382
+ self.f.seek(0)
383
+ self.f.size = self.size
384
+
385
+ def _fetch_range(self, start, end):
386
+ # probably only used by cached FS
387
+ if "r" not in self.mode:
388
+ raise ValueError
389
+ self._open()
390
+ self.f.seek(start)
391
+ return self.f.read(end - start)
392
+
393
+ def __setstate__(self, state):
394
+ self.f = None
395
+ loc = state.pop("loc", None)
396
+ self.__dict__.update(state)
397
+ if "r" in state["mode"]:
398
+ self.f = None
399
+ self._open()
400
+ self.f.seek(loc)
401
+
402
+ def __getstate__(self):
403
+ d = self.__dict__.copy()
404
+ d.pop("f")
405
+ if "r" in self.mode:
406
+ d["loc"] = self.f.tell()
407
+ else:
408
+ if not self.f.closed:
409
+ raise ValueError("Cannot serialise open write-mode local file")
410
+ return d
411
+
412
+ def commit(self):
413
+ if self.autocommit:
414
+ raise RuntimeError("Can only commit if not already set to autocommit")
415
+ shutil.move(self.temp, self.path)
416
+
417
+ def discard(self):
418
+ if self.autocommit:
419
+ raise RuntimeError("Cannot discard if set to autocommit")
420
+ os.remove(self.temp)
421
+
422
+ def readable(self) -> bool:
423
+ return True
424
+
425
+ def writable(self) -> bool:
426
+ return "r" not in self.mode
427
+
428
+ def read(self, *args, **kwargs):
429
+ return self.f.read(*args, **kwargs)
430
+
431
+ def write(self, *args, **kwargs):
432
+ return self.f.write(*args, **kwargs)
433
+
434
+ def tell(self, *args, **kwargs):
435
+ return self.f.tell(*args, **kwargs)
436
+
437
+ def seek(self, *args, **kwargs):
438
+ return self.f.seek(*args, **kwargs)
439
+
440
+ def seekable(self, *args, **kwargs):
441
+ return self.f.seekable(*args, **kwargs)
442
+
443
+ def readline(self, *args, **kwargs):
444
+ return self.f.readline(*args, **kwargs)
445
+
446
+ def readlines(self, *args, **kwargs):
447
+ return self.f.readlines(*args, **kwargs)
448
+
449
+ def close(self):
450
+ return self.f.close()
451
+
452
+ def truncate(self, size=None) -> int:
453
+ return self.f.truncate(size)
454
+
455
+ @property
456
+ def closed(self):
457
+ return self.f.closed
458
+
459
+ def fileno(self):
460
+ return self.raw.fileno()
461
+
462
+ def flush(self) -> None:
463
+ self.f.flush()
464
+
465
+ def __iter__(self):
466
+ return self.f.__iter__()
467
+
468
+ def __getattr__(self, item):
469
+ return getattr(self.f, item)
470
+
471
+ def __enter__(self):
472
+ self._incontext = True
473
+ return self
474
+
475
+ def __exit__(self, exc_type, exc_value, traceback):
476
+ self._incontext = False
477
+ self.f.__exit__(exc_type, exc_value, traceback)
temp_venv/lib/python3.13/site-packages/fsspec/implementations/memory.py ADDED
@@ -0,0 +1,312 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from datetime import datetime, timezone
5
+ from errno import ENOTEMPTY
6
+ from io import BytesIO
7
+ from pathlib import PurePath, PureWindowsPath
8
+ from typing import Any, ClassVar
9
+
10
+ from fsspec import AbstractFileSystem
11
+ from fsspec.implementations.local import LocalFileSystem
12
+ from fsspec.utils import stringify_path
13
+
14
+ logger = logging.getLogger("fsspec.memoryfs")
15
+
16
+
17
+ class MemoryFileSystem(AbstractFileSystem):
18
+ """A filesystem based on a dict of BytesIO objects
19
+
20
+ This is a global filesystem so instances of this class all point to the same
21
+ in memory filesystem.
22
+ """
23
+
24
+ store: ClassVar[dict[str, Any]] = {} # global, do not overwrite!
25
+ pseudo_dirs = [""] # global, do not overwrite!
26
+ protocol = "memory"
27
+ root_marker = "/"
28
+
29
+ @classmethod
30
+ def _strip_protocol(cls, path):
31
+ if isinstance(path, PurePath):
32
+ if isinstance(path, PureWindowsPath):
33
+ return LocalFileSystem._strip_protocol(path)
34
+ else:
35
+ path = stringify_path(path)
36
+
37
+ if path.startswith("memory://"):
38
+ path = path[len("memory://") :]
39
+ if "::" in path or "://" in path:
40
+ return path.rstrip("/")
41
+ path = path.lstrip("/").rstrip("/")
42
+ return "/" + path if path else ""
43
+
44
+ def ls(self, path, detail=True, **kwargs):
45
+ path = self._strip_protocol(path)
46
+ if path in self.store:
47
+ # there is a key with this exact name
48
+ if not detail:
49
+ return [path]
50
+ return [
51
+ {
52
+ "name": path,
53
+ "size": self.store[path].size,
54
+ "type": "file",
55
+ "created": self.store[path].created.timestamp(),
56
+ }
57
+ ]
58
+ paths = set()
59
+ starter = path + "/"
60
+ out = []
61
+ for p2 in tuple(self.store):
62
+ if p2.startswith(starter):
63
+ if "/" not in p2[len(starter) :]:
64
+ # exact child
65
+ out.append(
66
+ {
67
+ "name": p2,
68
+ "size": self.store[p2].size,
69
+ "type": "file",
70
+ "created": self.store[p2].created.timestamp(),
71
+ }
72
+ )
73
+ elif len(p2) > len(starter):
74
+ # implied child directory
75
+ ppath = starter + p2[len(starter) :].split("/", 1)[0]
76
+ if ppath not in paths:
77
+ out = out or []
78
+ out.append(
79
+ {
80
+ "name": ppath,
81
+ "size": 0,
82
+ "type": "directory",
83
+ }
84
+ )
85
+ paths.add(ppath)
86
+ for p2 in self.pseudo_dirs:
87
+ if p2.startswith(starter):
88
+ if "/" not in p2[len(starter) :]:
89
+ # exact child pdir
90
+ if p2 not in paths:
91
+ out.append({"name": p2, "size": 0, "type": "directory"})
92
+ paths.add(p2)
93
+ else:
94
+ # directory implied by deeper pdir
95
+ ppath = starter + p2[len(starter) :].split("/", 1)[0]
96
+ if ppath not in paths:
97
+ out.append({"name": ppath, "size": 0, "type": "directory"})
98
+ paths.add(ppath)
99
+ if not out:
100
+ if path in self.pseudo_dirs:
101
+ # empty dir
102
+ return []
103
+ raise FileNotFoundError(path)
104
+ if detail:
105
+ return out
106
+ return sorted([f["name"] for f in out])
107
+
108
+ def mkdir(self, path, create_parents=True, **kwargs):
109
+ path = self._strip_protocol(path)
110
+ if path in self.store or path in self.pseudo_dirs:
111
+ raise FileExistsError(path)
112
+ if self._parent(path).strip("/") and self.isfile(self._parent(path)):
113
+ raise NotADirectoryError(self._parent(path))
114
+ if create_parents and self._parent(path).strip("/"):
115
+ try:
116
+ self.mkdir(self._parent(path), create_parents, **kwargs)
117
+ except FileExistsError:
118
+ pass
119
+ if path and path not in self.pseudo_dirs:
120
+ self.pseudo_dirs.append(path)
121
+
122
+ def makedirs(self, path, exist_ok=False):
123
+ try:
124
+ self.mkdir(path, create_parents=True)
125
+ except FileExistsError:
126
+ if not exist_ok:
127
+ raise
128
+
129
+ def pipe_file(self, path, value, mode="overwrite", **kwargs):
130
+ """Set the bytes of given file
131
+
132
+ Avoids copies of the data if possible
133
+ """
134
+ mode = "xb" if mode == "create" else "wb"
135
+ self.open(path, mode=mode, data=value)
136
+
137
+ def rmdir(self, path):
138
+ path = self._strip_protocol(path)
139
+ if path == "":
140
+ # silently avoid deleting FS root
141
+ return
142
+ if path in self.pseudo_dirs:
143
+ if not self.ls(path):
144
+ self.pseudo_dirs.remove(path)
145
+ else:
146
+ raise OSError(ENOTEMPTY, "Directory not empty", path)
147
+ else:
148
+ raise FileNotFoundError(path)
149
+
150
+ def info(self, path, **kwargs):
151
+ logger.debug("info: %s", path)
152
+ path = self._strip_protocol(path)
153
+ if path in self.pseudo_dirs or any(
154
+ p.startswith(path + "/") for p in list(self.store) + self.pseudo_dirs
155
+ ):
156
+ return {
157
+ "name": path,
158
+ "size": 0,
159
+ "type": "directory",
160
+ }
161
+ elif path in self.store:
162
+ filelike = self.store[path]
163
+ return {
164
+ "name": path,
165
+ "size": filelike.size,
166
+ "type": "file",
167
+ "created": getattr(filelike, "created", None),
168
+ }
169
+ else:
170
+ raise FileNotFoundError(path)
171
+
172
+ def _open(
173
+ self,
174
+ path,
175
+ mode="rb",
176
+ block_size=None,
177
+ autocommit=True,
178
+ cache_options=None,
179
+ **kwargs,
180
+ ):
181
+ path = self._strip_protocol(path)
182
+ if "x" in mode and self.exists(path):
183
+ raise FileExistsError
184
+ if path in self.pseudo_dirs:
185
+ raise IsADirectoryError(path)
186
+ parent = path
187
+ while len(parent) > 1:
188
+ parent = self._parent(parent)
189
+ if self.isfile(parent):
190
+ raise FileExistsError(parent)
191
+ if mode in ["rb", "ab", "r+b"]:
192
+ if path in self.store:
193
+ f = self.store[path]
194
+ if mode == "ab":
195
+ # position at the end of file
196
+ f.seek(0, 2)
197
+ else:
198
+ # position at the beginning of file
199
+ f.seek(0)
200
+ return f
201
+ else:
202
+ raise FileNotFoundError(path)
203
+ elif mode in {"wb", "xb"}:
204
+ if mode == "xb" and self.exists(path):
205
+ raise FileExistsError
206
+ m = MemoryFile(self, path, kwargs.get("data"))
207
+ if not self._intrans:
208
+ m.commit()
209
+ return m
210
+ else:
211
+ name = self.__class__.__name__
212
+ raise ValueError(f"unsupported file mode for {name}: {mode!r}")
213
+
214
+ def cp_file(self, path1, path2, **kwargs):
215
+ path1 = self._strip_protocol(path1)
216
+ path2 = self._strip_protocol(path2)
217
+ if self.isfile(path1):
218
+ self.store[path2] = MemoryFile(
219
+ self, path2, self.store[path1].getvalue()
220
+ ) # implicit copy
221
+ elif self.isdir(path1):
222
+ if path2 not in self.pseudo_dirs:
223
+ self.pseudo_dirs.append(path2)
224
+ else:
225
+ raise FileNotFoundError(path1)
226
+
227
+ def cat_file(self, path, start=None, end=None, **kwargs):
228
+ logger.debug("cat: %s", path)
229
+ path = self._strip_protocol(path)
230
+ try:
231
+ return bytes(self.store[path].getbuffer()[start:end])
232
+ except KeyError as e:
233
+ raise FileNotFoundError(path) from e
234
+
235
+ def _rm(self, path):
236
+ path = self._strip_protocol(path)
237
+ try:
238
+ del self.store[path]
239
+ except KeyError as e:
240
+ raise FileNotFoundError(path) from e
241
+
242
+ def modified(self, path):
243
+ path = self._strip_protocol(path)
244
+ try:
245
+ return self.store[path].modified
246
+ except KeyError as e:
247
+ raise FileNotFoundError(path) from e
248
+
249
+ def created(self, path):
250
+ path = self._strip_protocol(path)
251
+ try:
252
+ return self.store[path].created
253
+ except KeyError as e:
254
+ raise FileNotFoundError(path) from e
255
+
256
+ def isfile(self, path):
257
+ path = self._strip_protocol(path)
258
+ return path in self.store
259
+
260
+ def rm(self, path, recursive=False, maxdepth=None):
261
+ if isinstance(path, str):
262
+ path = self._strip_protocol(path)
263
+ else:
264
+ path = [self._strip_protocol(p) for p in path]
265
+ paths = self.expand_path(path, recursive=recursive, maxdepth=maxdepth)
266
+ for p in reversed(paths):
267
+ if self.isfile(p):
268
+ self.rm_file(p)
269
+ # If the expanded path doesn't exist, it is only because the expanded
270
+ # path was a directory that does not exist in self.pseudo_dirs. This
271
+ # is possible if you directly create files without making the
272
+ # directories first.
273
+ elif not self.exists(p):
274
+ continue
275
+ else:
276
+ self.rmdir(p)
277
+
278
+
279
+ class MemoryFile(BytesIO):
280
+ """A BytesIO which can't close and works as a context manager
281
+
282
+ Can initialise with data. Each path should only be active once at any moment.
283
+
284
+ No need to provide fs, path if auto-committing (default)
285
+ """
286
+
287
+ def __init__(self, fs=None, path=None, data=None):
288
+ logger.debug("open file %s", path)
289
+ self.fs = fs
290
+ self.path = path
291
+ self.created = datetime.now(tz=timezone.utc)
292
+ self.modified = datetime.now(tz=timezone.utc)
293
+ if data:
294
+ super().__init__(data)
295
+ self.seek(0)
296
+
297
+ @property
298
+ def size(self):
299
+ return self.getbuffer().nbytes
300
+
301
+ def __enter__(self):
302
+ return self
303
+
304
+ def close(self):
305
+ pass
306
+
307
+ def discard(self):
308
+ pass
309
+
310
+ def commit(self):
311
+ self.fs.store[self.path] = self
312
+ self.modified = datetime.now(tz=timezone.utc)
temp_venv/lib/python3.13/site-packages/fsspec/implementations/reference.py ADDED
@@ -0,0 +1,1305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import collections
3
+ import io
4
+ import itertools
5
+ import logging
6
+ import math
7
+ import os
8
+ from functools import lru_cache
9
+ from itertools import chain
10
+ from typing import TYPE_CHECKING, Literal
11
+
12
+ import fsspec.core
13
+ from fsspec.spec import AbstractBufferedFile
14
+
15
+ try:
16
+ import ujson as json
17
+ except ImportError:
18
+ if not TYPE_CHECKING:
19
+ import json
20
+
21
+ from fsspec.asyn import AsyncFileSystem
22
+ from fsspec.callbacks import DEFAULT_CALLBACK
23
+ from fsspec.core import filesystem, open, split_protocol
24
+ from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper
25
+ from fsspec.utils import isfilelike, merge_offset_ranges, other_paths
26
+
27
+ logger = logging.getLogger("fsspec.reference")
28
+
29
+
30
+ class ReferenceNotReachable(RuntimeError):
31
+ def __init__(self, reference, target, *args):
32
+ super().__init__(*args)
33
+ self.reference = reference
34
+ self.target = target
35
+
36
+ def __str__(self):
37
+ return f'Reference "{self.reference}" failed to fetch target {self.target}'
38
+
39
+
40
+ def _first(d):
41
+ return next(iter(d.values()))
42
+
43
+
44
+ def _prot_in_references(path, references):
45
+ ref = references.get(path)
46
+ if isinstance(ref, (list, tuple)) and isinstance(ref[0], str):
47
+ return split_protocol(ref[0])[0] if ref[0] else ref[0]
48
+
49
+
50
+ def _protocol_groups(paths, references):
51
+ if isinstance(paths, str):
52
+ return {_prot_in_references(paths, references): [paths]}
53
+ out = {}
54
+ for path in paths:
55
+ protocol = _prot_in_references(path, references)
56
+ out.setdefault(protocol, []).append(path)
57
+ return out
58
+
59
+
60
+ class RefsValuesView(collections.abc.ValuesView):
61
+ def __iter__(self):
62
+ for val in self._mapping.zmetadata.values():
63
+ yield json.dumps(val).encode()
64
+ yield from self._mapping._items.values()
65
+ for field in self._mapping.listdir():
66
+ chunk_sizes = self._mapping._get_chunk_sizes(field)
67
+ if len(chunk_sizes) == 0:
68
+ yield self._mapping[field + "/0"]
69
+ continue
70
+ yield from self._mapping._generate_all_records(field)
71
+
72
+
73
+ class RefsItemsView(collections.abc.ItemsView):
74
+ def __iter__(self):
75
+ return zip(self._mapping.keys(), self._mapping.values())
76
+
77
+
78
+ def ravel_multi_index(idx, sizes):
79
+ val = 0
80
+ mult = 1
81
+ for i, s in zip(idx[::-1], sizes[::-1]):
82
+ val += i * mult
83
+ mult *= s
84
+ return val
85
+
86
+
87
+ class LazyReferenceMapper(collections.abc.MutableMapping):
88
+ """This interface can be used to read/write references from Parquet stores.
89
+ It is not intended for other types of references.
90
+ It can be used with Kerchunk's MultiZarrToZarr method to combine
91
+ references into a parquet store.
92
+ Examples of this use-case can be found here:
93
+ https://fsspec.github.io/kerchunk/advanced.html?highlight=parquet#parquet-storage"""
94
+
95
+ # import is class level to prevent numpy dep requirement for fsspec
96
+ @property
97
+ def np(self):
98
+ import numpy as np
99
+
100
+ return np
101
+
102
+ @property
103
+ def pd(self):
104
+ import pandas as pd
105
+
106
+ return pd
107
+
108
+ def __init__(
109
+ self,
110
+ root,
111
+ fs=None,
112
+ out_root=None,
113
+ cache_size=128,
114
+ categorical_threshold=10,
115
+ engine: Literal["fastparquet", "pyarrow"] = "fastparquet",
116
+ ):
117
+ """
118
+
119
+ This instance will be writable, storing changes in memory until full partitions
120
+ are accumulated or .flush() is called.
121
+
122
+ To create an empty lazy store, use .create()
123
+
124
+ Parameters
125
+ ----------
126
+ root : str
127
+ Root of parquet store
128
+ fs : fsspec.AbstractFileSystem
129
+ fsspec filesystem object, default is local filesystem.
130
+ cache_size : int, default=128
131
+ Maximum size of LRU cache, where cache_size*record_size denotes
132
+ the total number of references that can be loaded in memory at once.
133
+ categorical_threshold : int
134
+ Encode urls as pandas.Categorical to reduce memory footprint if the ratio
135
+ of the number of unique urls to total number of refs for each variable
136
+ is greater than or equal to this number. (default 10)
137
+ engine: Literal["fastparquet","pyarrow"]
138
+ Engine choice for reading parquet files. (default is "fastparquet")
139
+ """
140
+
141
+ self.root = root
142
+ self.chunk_sizes = {}
143
+ self.cat_thresh = categorical_threshold
144
+ self.engine = engine
145
+ self.cache_size = cache_size
146
+ self.url = self.root + "/{field}/refs.{record}.parq"
147
+ # TODO: derive fs from `root`
148
+ self.fs = fsspec.filesystem("file") if fs is None else fs
149
+ self.out_root = self.fs.unstrip_protocol(out_root or self.root)
150
+
151
+ from importlib.util import find_spec
152
+
153
+ if self.engine == "pyarrow" and find_spec("pyarrow") is None:
154
+ raise ImportError("engine choice `pyarrow` is not installed.")
155
+
156
+ def __getattr__(self, item):
157
+ if item in ("_items", "record_size", "zmetadata"):
158
+ self.setup()
159
+ # avoid possible recursion if setup fails somehow
160
+ return self.__dict__[item]
161
+ raise AttributeError(item)
162
+
163
+ def setup(self):
164
+ self._items = {}
165
+ self._items[".zmetadata"] = self.fs.cat_file(
166
+ "/".join([self.root, ".zmetadata"])
167
+ )
168
+ met = json.loads(self._items[".zmetadata"])
169
+ self.record_size = met["record_size"]
170
+ self.zmetadata = met["metadata"]
171
+
172
+ # Define function to open and decompress refs
173
+ @lru_cache(maxsize=self.cache_size)
174
+ def open_refs(field, record):
175
+ """cached parquet file loader"""
176
+ path = self.url.format(field=field, record=record)
177
+ data = io.BytesIO(self.fs.cat_file(path))
178
+ try:
179
+ df = self.pd.read_parquet(data, engine=self.engine)
180
+ refs = {c: df[c].to_numpy() for c in df.columns}
181
+ except OSError:
182
+ refs = None
183
+ return refs
184
+
185
+ self.open_refs = open_refs
186
+
187
+ @staticmethod
188
+ def create(root, storage_options=None, fs=None, record_size=10000, **kwargs):
189
+ """Make empty parquet reference set
190
+
191
+ First deletes the contents of the given directory, if it exists.
192
+
193
+ Parameters
194
+ ----------
195
+ root: str
196
+ Directory to contain the output; will be created
197
+ storage_options: dict | None
198
+ For making the filesystem to use for writing is fs is None
199
+ fs: FileSystem | None
200
+ Filesystem for writing
201
+ record_size: int
202
+ Number of references per parquet file
203
+ kwargs: passed to __init__
204
+
205
+ Returns
206
+ -------
207
+ LazyReferenceMapper instance
208
+ """
209
+ met = {"metadata": {}, "record_size": record_size}
210
+ if fs is None:
211
+ fs, root = fsspec.core.url_to_fs(root, **(storage_options or {}))
212
+ if fs.exists(root):
213
+ fs.rm(root, recursive=True)
214
+ fs.makedirs(root, exist_ok=True)
215
+ fs.pipe("/".join([root, ".zmetadata"]), json.dumps(met).encode())
216
+ return LazyReferenceMapper(root, fs, **kwargs)
217
+
218
+ @lru_cache()
219
+ def listdir(self):
220
+ """List top-level directories"""
221
+ dirs = (p.rsplit("/", 1)[0] for p in self.zmetadata if not p.startswith(".z"))
222
+ return set(dirs)
223
+
224
+ def ls(self, path="", detail=True):
225
+ """Shortcut file listings"""
226
+ path = path.rstrip("/")
227
+ pathdash = path + "/" if path else ""
228
+ dirnames = self.listdir()
229
+ dirs = [
230
+ d
231
+ for d in dirnames
232
+ if d.startswith(pathdash) and "/" not in d.lstrip(pathdash)
233
+ ]
234
+ if dirs:
235
+ others = {
236
+ f
237
+ for f in chain(
238
+ [".zmetadata"],
239
+ (name for name in self.zmetadata),
240
+ (name for name in self._items),
241
+ )
242
+ if f.startswith(pathdash) and "/" not in f.lstrip(pathdash)
243
+ }
244
+ if detail is False:
245
+ others.update(dirs)
246
+ return sorted(others)
247
+ dirinfo = [{"name": name, "type": "directory", "size": 0} for name in dirs]
248
+ fileinfo = [
249
+ {
250
+ "name": name,
251
+ "type": "file",
252
+ "size": len(
253
+ json.dumps(self.zmetadata[name])
254
+ if name in self.zmetadata
255
+ else self._items[name]
256
+ ),
257
+ }
258
+ for name in others
259
+ ]
260
+ return sorted(dirinfo + fileinfo, key=lambda s: s["name"])
261
+ field = path
262
+ others = set(
263
+ [name for name in self.zmetadata if name.startswith(f"{path}/")]
264
+ + [name for name in self._items if name.startswith(f"{path}/")]
265
+ )
266
+ fileinfo = [
267
+ {
268
+ "name": name,
269
+ "type": "file",
270
+ "size": len(
271
+ json.dumps(self.zmetadata[name])
272
+ if name in self.zmetadata
273
+ else self._items[name]
274
+ ),
275
+ }
276
+ for name in others
277
+ ]
278
+ keys = self._keys_in_field(field)
279
+
280
+ if detail is False:
281
+ return list(others) + list(keys)
282
+ recs = self._generate_all_records(field)
283
+ recinfo = [
284
+ {"name": name, "type": "file", "size": rec[-1]}
285
+ for name, rec in zip(keys, recs)
286
+ if rec[0] # filters out path==None, deleted/missing
287
+ ]
288
+ return fileinfo + recinfo
289
+
290
+ def _load_one_key(self, key):
291
+ """Get the reference for one key
292
+
293
+ Returns bytes, one-element list or three-element list.
294
+ """
295
+ if key in self._items:
296
+ return self._items[key]
297
+ elif key in self.zmetadata:
298
+ return json.dumps(self.zmetadata[key]).encode()
299
+ elif "/" not in key or self._is_meta(key):
300
+ raise KeyError(key)
301
+ field, _ = key.rsplit("/", 1)
302
+ record, ri, chunk_size = self._key_to_record(key)
303
+ maybe = self._items.get((field, record), {}).get(ri, False)
304
+ if maybe is None:
305
+ # explicitly deleted
306
+ raise KeyError
307
+ elif maybe:
308
+ return maybe
309
+ elif chunk_size == 0:
310
+ return b""
311
+
312
+ # Chunk keys can be loaded from row group and cached in LRU cache
313
+ try:
314
+ refs = self.open_refs(field, record)
315
+ except (ValueError, TypeError, FileNotFoundError) as exc:
316
+ raise KeyError(key) from exc
317
+ columns = ["path", "offset", "size", "raw"]
318
+ selection = [refs[c][ri] if c in refs else None for c in columns]
319
+ raw = selection[-1]
320
+ if raw is not None:
321
+ return raw
322
+ if selection[0] is None:
323
+ raise KeyError("This reference does not exist or has been deleted")
324
+ if selection[1:3] == [0, 0]:
325
+ # URL only
326
+ return selection[:1]
327
+ # URL, offset, size
328
+ return selection[:3]
329
+
330
+ @lru_cache(4096)
331
+ def _key_to_record(self, key):
332
+ """Details needed to construct a reference for one key"""
333
+ field, chunk = key.rsplit("/", 1)
334
+ chunk_sizes = self._get_chunk_sizes(field)
335
+ if len(chunk_sizes) == 0:
336
+ return 0, 0, 0
337
+ chunk_idx = [int(c) for c in chunk.split(".")]
338
+ chunk_number = ravel_multi_index(chunk_idx, chunk_sizes)
339
+ record = chunk_number // self.record_size
340
+ ri = chunk_number % self.record_size
341
+ return record, ri, len(chunk_sizes)
342
+
343
+ def _get_chunk_sizes(self, field):
344
+ """The number of chunks along each axis for a given field"""
345
+ if field not in self.chunk_sizes:
346
+ zarray = self.zmetadata[f"{field}/.zarray"]
347
+ size_ratio = [
348
+ math.ceil(s / c) for s, c in zip(zarray["shape"], zarray["chunks"])
349
+ ]
350
+ self.chunk_sizes[field] = size_ratio or [1]
351
+ return self.chunk_sizes[field]
352
+
353
+ def _generate_record(self, field, record):
354
+ """The references for a given parquet file of a given field"""
355
+ refs = self.open_refs(field, record)
356
+ it = iter(zip(*refs.values()))
357
+ if len(refs) == 3:
358
+ # All urls
359
+ return (list(t) for t in it)
360
+ elif len(refs) == 1:
361
+ # All raws
362
+ return refs["raw"]
363
+ else:
364
+ # Mix of urls and raws
365
+ return (list(t[:3]) if not t[3] else t[3] for t in it)
366
+
367
+ def _generate_all_records(self, field):
368
+ """Load all the references within a field by iterating over the parquet files"""
369
+ nrec = 1
370
+ for ch in self._get_chunk_sizes(field):
371
+ nrec *= ch
372
+ nrec = math.ceil(nrec / self.record_size)
373
+ for record in range(nrec):
374
+ yield from self._generate_record(field, record)
375
+
376
+ def values(self):
377
+ return RefsValuesView(self)
378
+
379
+ def items(self):
380
+ return RefsItemsView(self)
381
+
382
+ def __hash__(self):
383
+ return id(self)
384
+
385
+ def __getitem__(self, key):
386
+ return self._load_one_key(key)
387
+
388
+ def __setitem__(self, key, value):
389
+ if "/" in key and not self._is_meta(key):
390
+ field, chunk = key.rsplit("/", 1)
391
+ record, i, _ = self._key_to_record(key)
392
+ subdict = self._items.setdefault((field, record), {})
393
+ subdict[i] = value
394
+ if len(subdict) == self.record_size:
395
+ self.write(field, record)
396
+ else:
397
+ # metadata or top-level
398
+ if hasattr(value, "to_bytes"):
399
+ val = value.to_bytes().decode()
400
+ elif isinstance(value, bytes):
401
+ val = value.decode()
402
+ else:
403
+ val = value
404
+ self._items[key] = val
405
+ new_value = json.loads(val)
406
+ self.zmetadata[key] = {**self.zmetadata.get(key, {}), **new_value}
407
+
408
+ @staticmethod
409
+ def _is_meta(key):
410
+ return key.startswith(".z") or "/.z" in key
411
+
412
+ def __delitem__(self, key):
413
+ if key in self._items:
414
+ del self._items[key]
415
+ elif key in self.zmetadata:
416
+ del self.zmetadata[key]
417
+ else:
418
+ if "/" in key and not self._is_meta(key):
419
+ field, _ = key.rsplit("/", 1)
420
+ record, i, _ = self._key_to_record(key)
421
+ subdict = self._items.setdefault((field, record), {})
422
+ subdict[i] = None
423
+ if len(subdict) == self.record_size:
424
+ self.write(field, record)
425
+ else:
426
+ # metadata or top-level
427
+ self._items[key] = None
428
+
429
+ def write(self, field, record, base_url=None, storage_options=None):
430
+ # extra requirements if writing
431
+ import kerchunk.df
432
+ import numpy as np
433
+ import pandas as pd
434
+
435
+ partition = self._items[(field, record)]
436
+ original = False
437
+ if len(partition) < self.record_size:
438
+ try:
439
+ original = self.open_refs(field, record)
440
+ except OSError:
441
+ pass
442
+
443
+ if original:
444
+ paths = original["path"]
445
+ offsets = original["offset"]
446
+ sizes = original["size"]
447
+ raws = original["raw"]
448
+ else:
449
+ paths = np.full(self.record_size, np.nan, dtype="O")
450
+ offsets = np.zeros(self.record_size, dtype="int64")
451
+ sizes = np.zeros(self.record_size, dtype="int64")
452
+ raws = np.full(self.record_size, np.nan, dtype="O")
453
+ for j, data in partition.items():
454
+ if isinstance(data, list):
455
+ if (
456
+ str(paths.dtype) == "category"
457
+ and data[0] not in paths.dtype.categories
458
+ ):
459
+ paths = paths.add_categories(data[0])
460
+ paths[j] = data[0]
461
+ if len(data) > 1:
462
+ offsets[j] = data[1]
463
+ sizes[j] = data[2]
464
+ elif data is None:
465
+ # delete
466
+ paths[j] = None
467
+ offsets[j] = 0
468
+ sizes[j] = 0
469
+ raws[j] = None
470
+ else:
471
+ # this is the only call into kerchunk, could remove
472
+ raws[j] = kerchunk.df._proc_raw(data)
473
+ # TODO: only save needed columns
474
+ df = pd.DataFrame(
475
+ {
476
+ "path": paths,
477
+ "offset": offsets,
478
+ "size": sizes,
479
+ "raw": raws,
480
+ },
481
+ copy=False,
482
+ )
483
+ if df.path.count() / (df.path.nunique() or 1) > self.cat_thresh:
484
+ df["path"] = df["path"].astype("category")
485
+ object_encoding = {"raw": "bytes", "path": "utf8"}
486
+ has_nulls = ["path", "raw"]
487
+
488
+ fn = f"{base_url or self.out_root}/{field}/refs.{record}.parq"
489
+ self.fs.mkdirs(f"{base_url or self.out_root}/{field}", exist_ok=True)
490
+
491
+ if self.engine == "pyarrow":
492
+ df_backend_kwargs = {"write_statistics": False}
493
+ elif self.engine == "fastparquet":
494
+ df_backend_kwargs = {
495
+ "stats": False,
496
+ "object_encoding": object_encoding,
497
+ "has_nulls": has_nulls,
498
+ }
499
+ else:
500
+ raise NotImplementedError(f"{self.engine} not supported")
501
+ df.to_parquet(
502
+ fn,
503
+ engine=self.engine,
504
+ storage_options=storage_options
505
+ or getattr(self.fs, "storage_options", None),
506
+ compression="zstd",
507
+ index=False,
508
+ **df_backend_kwargs,
509
+ )
510
+
511
+ partition.clear()
512
+ self._items.pop((field, record))
513
+
514
+ def flush(self, base_url=None, storage_options=None):
515
+ """Output any modified or deleted keys
516
+
517
+ Parameters
518
+ ----------
519
+ base_url: str
520
+ Location of the output
521
+ """
522
+
523
+ # write what we have so far and clear sub chunks
524
+ for thing in list(self._items):
525
+ if isinstance(thing, tuple):
526
+ field, record = thing
527
+ self.write(
528
+ field,
529
+ record,
530
+ base_url=base_url,
531
+ storage_options=storage_options,
532
+ )
533
+
534
+ # gather .zmetadata from self._items and write that too
535
+ for k in list(self._items):
536
+ if k != ".zmetadata" and ".z" in k:
537
+ self.zmetadata[k] = json.loads(self._items.pop(k))
538
+ met = {"metadata": self.zmetadata, "record_size": self.record_size}
539
+ self._items.clear()
540
+ self._items[".zmetadata"] = json.dumps(met).encode()
541
+ self.fs.pipe(
542
+ "/".join([base_url or self.out_root, ".zmetadata"]),
543
+ self._items[".zmetadata"],
544
+ )
545
+
546
+ # TODO: only clear those that we wrote to?
547
+ self.open_refs.cache_clear()
548
+
549
+ def __len__(self):
550
+ # Caveat: This counts expected references, not actual - but is fast
551
+ count = 0
552
+ for field in self.listdir():
553
+ if field.startswith("."):
554
+ count += 1
555
+ else:
556
+ count += math.prod(self._get_chunk_sizes(field))
557
+ count += len(self.zmetadata) # all metadata keys
558
+ # any other files not in reference partitions
559
+ count += sum(1 for _ in self._items if not isinstance(_, tuple))
560
+ return count
561
+
562
+ def __iter__(self):
563
+ # Caveat: returns only existing keys, so the number of these does not
564
+ # match len(self)
565
+ metas = set(self.zmetadata)
566
+ metas.update(self._items)
567
+ for bit in metas:
568
+ if isinstance(bit, str):
569
+ yield bit
570
+ for field in self.listdir():
571
+ for k in self._keys_in_field(field):
572
+ if k in self:
573
+ yield k
574
+
575
+ def __contains__(self, item):
576
+ try:
577
+ self._load_one_key(item)
578
+ return True
579
+ except KeyError:
580
+ return False
581
+
582
+ def _keys_in_field(self, field):
583
+ """List key names in given field
584
+
585
+ Produces strings like "field/x.y" appropriate from the chunking of the array
586
+ """
587
+ chunk_sizes = self._get_chunk_sizes(field)
588
+ if len(chunk_sizes) == 0:
589
+ yield field + "/0"
590
+ return
591
+ inds = itertools.product(*(range(i) for i in chunk_sizes))
592
+ for ind in inds:
593
+ yield field + "/" + ".".join([str(c) for c in ind])
594
+
595
+
596
+ class ReferenceFileSystem(AsyncFileSystem):
597
+ """View byte ranges of some other file as a file system
598
+ Initial version: single file system target, which must support
599
+ async, and must allow start and end args in _cat_file. Later versions
600
+ may allow multiple arbitrary URLs for the targets.
601
+ This FileSystem is read-only. It is designed to be used with async
602
+ targets (for now). We do not get original file details from the target FS.
603
+ Configuration is by passing a dict of references at init, or a URL to
604
+ a JSON file containing the same; this dict
605
+ can also contain concrete data for some set of paths.
606
+ Reference dict format:
607
+ {path0: bytes_data, path1: (target_url, offset, size)}
608
+ https://github.com/fsspec/kerchunk/blob/main/README.md
609
+ """
610
+
611
+ protocol = "reference"
612
+ cachable = False
613
+
614
+ def __init__(
615
+ self,
616
+ fo,
617
+ target=None,
618
+ ref_storage_args=None,
619
+ target_protocol=None,
620
+ target_options=None,
621
+ remote_protocol=None,
622
+ remote_options=None,
623
+ fs=None,
624
+ template_overrides=None,
625
+ simple_templates=True,
626
+ max_gap=64_000,
627
+ max_block=256_000_000,
628
+ cache_size=128,
629
+ **kwargs,
630
+ ):
631
+ """
632
+ Parameters
633
+ ----------
634
+ fo : dict or str
635
+ The set of references to use for this instance, with a structure as above.
636
+ If str referencing a JSON file, will use fsspec.open, in conjunction
637
+ with target_options and target_protocol to open and parse JSON at this
638
+ location. If a directory, then assume references are a set of parquet
639
+ files to be loaded lazily.
640
+ target : str
641
+ For any references having target_url as None, this is the default file
642
+ target to use
643
+ ref_storage_args : dict
644
+ If references is a str, use these kwargs for loading the JSON file.
645
+ Deprecated: use target_options instead.
646
+ target_protocol : str
647
+ Used for loading the reference file, if it is a path. If None, protocol
648
+ will be derived from the given path
649
+ target_options : dict
650
+ Extra FS options for loading the reference file ``fo``, if given as a path
651
+ remote_protocol : str
652
+ The protocol of the filesystem on which the references will be evaluated
653
+ (unless fs is provided). If not given, will be derived from the first
654
+ URL that has a protocol in the templates or in the references, in that
655
+ order.
656
+ remote_options : dict
657
+ kwargs to go with remote_protocol
658
+ fs : AbstractFileSystem | dict(str, (AbstractFileSystem | dict))
659
+ Directly provide a file system(s):
660
+ - a single filesystem instance
661
+ - a dict of protocol:filesystem, where each value is either a filesystem
662
+ instance, or a dict of kwargs that can be used to create in
663
+ instance for the given protocol
664
+
665
+ If this is given, remote_options and remote_protocol are ignored.
666
+ template_overrides : dict
667
+ Swap out any templates in the references file with these - useful for
668
+ testing.
669
+ simple_templates: bool
670
+ Whether templates can be processed with simple replace (True) or if
671
+ jinja is needed (False, much slower). All reference sets produced by
672
+ ``kerchunk`` are simple in this sense, but the spec allows for complex.
673
+ max_gap, max_block: int
674
+ For merging multiple concurrent requests to the same remote file.
675
+ Neighboring byte ranges will only be merged when their
676
+ inter-range gap is <= ``max_gap``. Default is 64KB. Set to 0
677
+ to only merge when it requires no extra bytes. Pass a negative
678
+ number to disable merging, appropriate for local target files.
679
+ Neighboring byte ranges will only be merged when the size of
680
+ the aggregated range is <= ``max_block``. Default is 256MB.
681
+ cache_size : int
682
+ Maximum size of LRU cache, where cache_size*record_size denotes
683
+ the total number of references that can be loaded in memory at once.
684
+ Only used for lazily loaded references.
685
+ kwargs : passed to parent class
686
+ """
687
+ super().__init__(**kwargs)
688
+ self.target = target
689
+ self.template_overrides = template_overrides
690
+ self.simple_templates = simple_templates
691
+ self.templates = {}
692
+ self.fss = {}
693
+ self._dircache = {}
694
+ self.max_gap = max_gap
695
+ self.max_block = max_block
696
+ if isinstance(fo, str):
697
+ dic = dict(
698
+ **(ref_storage_args or target_options or {}), protocol=target_protocol
699
+ )
700
+ ref_fs, fo2 = fsspec.core.url_to_fs(fo, **dic)
701
+ if ref_fs.isfile(fo2):
702
+ # text JSON
703
+ with fsspec.open(fo, "rb", **dic) as f:
704
+ logger.info("Read reference from URL %s", fo)
705
+ text = json.load(f)
706
+ self._process_references(text, template_overrides)
707
+ else:
708
+ # Lazy parquet refs
709
+ logger.info("Open lazy reference dict from URL %s", fo)
710
+ self.references = LazyReferenceMapper(
711
+ fo2,
712
+ fs=ref_fs,
713
+ cache_size=cache_size,
714
+ )
715
+ else:
716
+ # dictionaries
717
+ self._process_references(fo, template_overrides)
718
+ if isinstance(fs, dict):
719
+ self.fss = {
720
+ k: (
721
+ fsspec.filesystem(k.split(":", 1)[0], **opts)
722
+ if isinstance(opts, dict)
723
+ else opts
724
+ )
725
+ for k, opts in fs.items()
726
+ }
727
+ if None not in self.fss:
728
+ self.fss[None] = filesystem("file")
729
+ return
730
+ if fs is not None:
731
+ # single remote FS
732
+ remote_protocol = (
733
+ fs.protocol[0] if isinstance(fs.protocol, tuple) else fs.protocol
734
+ )
735
+ self.fss[remote_protocol] = fs
736
+
737
+ if remote_protocol is None:
738
+ # get single protocol from any templates
739
+ for ref in self.templates.values():
740
+ if callable(ref):
741
+ ref = ref()
742
+ protocol, _ = fsspec.core.split_protocol(ref)
743
+ if protocol and protocol not in self.fss:
744
+ fs = filesystem(protocol, **(remote_options or {}))
745
+ self.fss[protocol] = fs
746
+ if remote_protocol is None:
747
+ # get single protocol from references
748
+ # TODO: warning here, since this can be very expensive?
749
+ for ref in self.references.values():
750
+ if callable(ref):
751
+ ref = ref()
752
+ if isinstance(ref, list) and ref[0]:
753
+ protocol, _ = fsspec.core.split_protocol(ref[0])
754
+ if protocol not in self.fss:
755
+ fs = filesystem(protocol, **(remote_options or {}))
756
+ self.fss[protocol] = fs
757
+ # only use first remote URL
758
+ break
759
+
760
+ if remote_protocol and remote_protocol not in self.fss:
761
+ fs = filesystem(remote_protocol, **(remote_options or {}))
762
+ self.fss[remote_protocol] = fs
763
+
764
+ self.fss[None] = fs or filesystem("file") # default one
765
+ # Wrap any non-async filesystems to ensure async methods are available below
766
+ for k, f in self.fss.items():
767
+ if not f.async_impl:
768
+ self.fss[k] = AsyncFileSystemWrapper(f, asynchronous=self.asynchronous)
769
+ elif self.asynchronous ^ f.asynchronous:
770
+ raise ValueError(
771
+ "Reference-FS's target filesystem must have same value"
772
+ "of asynchronous"
773
+ )
774
+
775
+ def _cat_common(self, path, start=None, end=None):
776
+ path = self._strip_protocol(path)
777
+ logger.debug(f"cat: {path}")
778
+ try:
779
+ part = self.references[path]
780
+ except KeyError as exc:
781
+ raise FileNotFoundError(path) from exc
782
+ if isinstance(part, str):
783
+ part = part.encode()
784
+ if hasattr(part, "to_bytes"):
785
+ part = part.to_bytes()
786
+ if isinstance(part, bytes):
787
+ logger.debug(f"Reference: {path}, type bytes")
788
+ if part.startswith(b"base64:"):
789
+ part = base64.b64decode(part[7:])
790
+ return part, None, None
791
+
792
+ if len(part) == 1:
793
+ logger.debug(f"Reference: {path}, whole file => {part}")
794
+ url = part[0]
795
+ start1, end1 = start, end
796
+ else:
797
+ url, start0, size = part
798
+ logger.debug(f"Reference: {path} => {url}, offset {start0}, size {size}")
799
+ end0 = start0 + size
800
+
801
+ if start is not None:
802
+ if start >= 0:
803
+ start1 = start0 + start
804
+ else:
805
+ start1 = end0 + start
806
+ else:
807
+ start1 = start0
808
+ if end is not None:
809
+ if end >= 0:
810
+ end1 = start0 + end
811
+ else:
812
+ end1 = end0 + end
813
+ else:
814
+ end1 = end0
815
+ if url is None:
816
+ url = self.target
817
+ return url, start1, end1
818
+
819
+ async def _cat_file(self, path, start=None, end=None, **kwargs):
820
+ part_or_url, start0, end0 = self._cat_common(path, start=start, end=end)
821
+ if isinstance(part_or_url, bytes):
822
+ return part_or_url[start:end]
823
+ protocol, _ = split_protocol(part_or_url)
824
+ try:
825
+ return await self.fss[protocol]._cat_file(
826
+ part_or_url, start=start0, end=end0
827
+ )
828
+ except Exception as e:
829
+ raise ReferenceNotReachable(path, part_or_url) from e
830
+
831
+ def cat_file(self, path, start=None, end=None, **kwargs):
832
+ part_or_url, start0, end0 = self._cat_common(path, start=start, end=end)
833
+ if isinstance(part_or_url, bytes):
834
+ return part_or_url[start:end]
835
+ protocol, _ = split_protocol(part_or_url)
836
+ try:
837
+ return self.fss[protocol].cat_file(part_or_url, start=start0, end=end0)
838
+ except Exception as e:
839
+ raise ReferenceNotReachable(path, part_or_url) from e
840
+
841
+ def pipe_file(self, path, value, **_):
842
+ """Temporarily add binary data or reference as a file"""
843
+ self.references[path] = value
844
+
845
+ async def _get_file(self, rpath, lpath, **kwargs):
846
+ if self.isdir(rpath):
847
+ return os.makedirs(lpath, exist_ok=True)
848
+ data = await self._cat_file(rpath)
849
+ with open(lpath, "wb") as f:
850
+ f.write(data)
851
+
852
+ def get_file(self, rpath, lpath, callback=DEFAULT_CALLBACK, **kwargs):
853
+ if self.isdir(rpath):
854
+ return os.makedirs(lpath, exist_ok=True)
855
+ data = self.cat_file(rpath, **kwargs)
856
+ callback.set_size(len(data))
857
+ if isfilelike(lpath):
858
+ lpath.write(data)
859
+ else:
860
+ with open(lpath, "wb") as f:
861
+ f.write(data)
862
+ callback.absolute_update(len(data))
863
+
864
+ def get(self, rpath, lpath, recursive=False, **kwargs):
865
+ if recursive:
866
+ # trigger directory build
867
+ self.ls("")
868
+ rpath = self.expand_path(rpath, recursive=recursive)
869
+ fs = fsspec.filesystem("file", auto_mkdir=True)
870
+ targets = other_paths(rpath, lpath)
871
+ if recursive:
872
+ data = self.cat([r for r in rpath if not self.isdir(r)])
873
+ else:
874
+ data = self.cat(rpath)
875
+ for remote, local in zip(rpath, targets):
876
+ if remote in data:
877
+ fs.pipe_file(local, data[remote])
878
+
879
+ def cat(self, path, recursive=False, on_error="raise", **kwargs):
880
+ if isinstance(path, str) and recursive:
881
+ raise NotImplementedError
882
+ if isinstance(path, list) and (recursive or any("*" in p for p in path)):
883
+ raise NotImplementedError
884
+ # TODO: if references is lazy, pre-fetch all paths in batch before access
885
+ proto_dict = _protocol_groups(path, self.references)
886
+ out = {}
887
+ for proto, paths in proto_dict.items():
888
+ fs = self.fss[proto]
889
+ urls, starts, ends, valid_paths = [], [], [], []
890
+ for p in paths:
891
+ # find references or label not-found. Early exit if any not
892
+ # found and on_error is "raise"
893
+ try:
894
+ u, s, e = self._cat_common(p)
895
+ if not isinstance(u, (bytes, str)):
896
+ # nan/None from parquet
897
+ continue
898
+ except FileNotFoundError as err:
899
+ if on_error == "raise":
900
+ raise
901
+ if on_error != "omit":
902
+ out[p] = err
903
+ else:
904
+ urls.append(u)
905
+ starts.append(s)
906
+ ends.append(e)
907
+ valid_paths.append(p)
908
+
909
+ # process references into form for merging
910
+ urls2 = []
911
+ starts2 = []
912
+ ends2 = []
913
+ paths2 = []
914
+ whole_files = set()
915
+ for u, s, e, p in zip(urls, starts, ends, valid_paths):
916
+ if isinstance(u, bytes):
917
+ # data
918
+ out[p] = u
919
+ elif s is None:
920
+ # whole file - limits are None, None, but no further
921
+ # entries take for this file
922
+ whole_files.add(u)
923
+ urls2.append(u)
924
+ starts2.append(s)
925
+ ends2.append(e)
926
+ paths2.append(p)
927
+ for u, s, e, p in zip(urls, starts, ends, valid_paths):
928
+ # second run to account for files that are to be loaded whole
929
+ if s is not None and u not in whole_files:
930
+ urls2.append(u)
931
+ starts2.append(s)
932
+ ends2.append(e)
933
+ paths2.append(p)
934
+
935
+ # merge and fetch consolidated ranges
936
+ new_paths, new_starts, new_ends = merge_offset_ranges(
937
+ list(urls2),
938
+ list(starts2),
939
+ list(ends2),
940
+ sort=True,
941
+ max_gap=self.max_gap,
942
+ max_block=self.max_block,
943
+ )
944
+ bytes_out = fs.cat_ranges(new_paths, new_starts, new_ends)
945
+
946
+ # unbundle from merged bytes - simple approach
947
+ for u, s, e, p in zip(urls, starts, ends, valid_paths):
948
+ if p in out:
949
+ continue # was bytes, already handled
950
+ for np, ns, ne, b in zip(new_paths, new_starts, new_ends, bytes_out):
951
+ if np == u and (ns is None or ne is None):
952
+ if isinstance(b, Exception):
953
+ out[p] = b
954
+ else:
955
+ out[p] = b[s:e]
956
+ elif np == u and s >= ns and e <= ne:
957
+ if isinstance(b, Exception):
958
+ out[p] = b
959
+ else:
960
+ out[p] = b[s - ns : (e - ne) or None]
961
+
962
+ for k, v in out.copy().items():
963
+ # these were valid references, but fetch failed, so transform exc
964
+ if isinstance(v, Exception) and k in self.references:
965
+ ex = out[k]
966
+ new_ex = ReferenceNotReachable(k, self.references[k])
967
+ new_ex.__cause__ = ex
968
+ if on_error == "raise":
969
+ raise new_ex
970
+ elif on_error != "omit":
971
+ out[k] = new_ex
972
+
973
+ if len(out) == 1 and isinstance(path, str) and "*" not in path:
974
+ return _first(out)
975
+ return out
976
+
977
+ def _process_references(self, references, template_overrides=None):
978
+ vers = references.get("version", None)
979
+ if vers is None:
980
+ self._process_references0(references)
981
+ elif vers == 1:
982
+ self._process_references1(references, template_overrides=template_overrides)
983
+ else:
984
+ raise ValueError(f"Unknown reference spec version: {vers}")
985
+ # TODO: we make dircache by iterating over all entries, but for Spec >= 1,
986
+ # can replace with programmatic. Is it even needed for mapper interface?
987
+
988
+ def _process_references0(self, references):
989
+ """Make reference dict for Spec Version 0"""
990
+ if isinstance(references, dict):
991
+ # do not do this for lazy/parquet backend, which will not make dicts,
992
+ # but must remain writable in the original object
993
+ references = {
994
+ key: json.dumps(val) if isinstance(val, dict) else val
995
+ for key, val in references.items()
996
+ }
997
+ self.references = references
998
+
999
+ def _process_references1(self, references, template_overrides=None):
1000
+ if not self.simple_templates or self.templates:
1001
+ import jinja2
1002
+ self.references = {}
1003
+ self._process_templates(references.get("templates", {}))
1004
+
1005
+ @lru_cache(1000)
1006
+ def _render_jinja(u):
1007
+ return jinja2.Template(u).render(**self.templates)
1008
+
1009
+ for k, v in references.get("refs", {}).items():
1010
+ if isinstance(v, str):
1011
+ if v.startswith("base64:"):
1012
+ self.references[k] = base64.b64decode(v[7:])
1013
+ self.references[k] = v
1014
+ elif isinstance(v, dict):
1015
+ self.references[k] = json.dumps(v)
1016
+ elif self.templates:
1017
+ u = v[0]
1018
+ if "{{" in u:
1019
+ if self.simple_templates:
1020
+ u = (
1021
+ u.replace("{{", "{")
1022
+ .replace("}}", "}")
1023
+ .format(**self.templates)
1024
+ )
1025
+ else:
1026
+ u = _render_jinja(u)
1027
+ self.references[k] = [u] if len(v) == 1 else [u, v[1], v[2]]
1028
+ else:
1029
+ self.references[k] = v
1030
+ self.references.update(self._process_gen(references.get("gen", [])))
1031
+
1032
+ def _process_templates(self, tmp):
1033
+ self.templates = {}
1034
+ if self.template_overrides is not None:
1035
+ tmp.update(self.template_overrides)
1036
+ for k, v in tmp.items():
1037
+ if "{{" in v:
1038
+ import jinja2
1039
+
1040
+ self.templates[k] = lambda temp=v, **kwargs: jinja2.Template(
1041
+ temp
1042
+ ).render(**kwargs)
1043
+ else:
1044
+ self.templates[k] = v
1045
+
1046
+ def _process_gen(self, gens):
1047
+ out = {}
1048
+ for gen in gens:
1049
+ dimension = {
1050
+ k: (
1051
+ v
1052
+ if isinstance(v, list)
1053
+ else range(v.get("start", 0), v["stop"], v.get("step", 1))
1054
+ )
1055
+ for k, v in gen["dimensions"].items()
1056
+ }
1057
+ products = (
1058
+ dict(zip(dimension.keys(), values))
1059
+ for values in itertools.product(*dimension.values())
1060
+ )
1061
+ for pr in products:
1062
+ import jinja2
1063
+
1064
+ key = jinja2.Template(gen["key"]).render(**pr, **self.templates)
1065
+ url = jinja2.Template(gen["url"]).render(**pr, **self.templates)
1066
+ if ("offset" in gen) and ("length" in gen):
1067
+ offset = int(
1068
+ jinja2.Template(gen["offset"]).render(**pr, **self.templates)
1069
+ )
1070
+ length = int(
1071
+ jinja2.Template(gen["length"]).render(**pr, **self.templates)
1072
+ )
1073
+ out[key] = [url, offset, length]
1074
+ elif ("offset" in gen) ^ ("length" in gen):
1075
+ raise ValueError(
1076
+ "Both 'offset' and 'length' are required for a "
1077
+ "reference generator entry if either is provided."
1078
+ )
1079
+ else:
1080
+ out[key] = [url]
1081
+ return out
1082
+
1083
+ def _dircache_from_items(self):
1084
+ self.dircache = {"": []}
1085
+ it = self.references.items()
1086
+ for path, part in it:
1087
+ if isinstance(part, (bytes, str)) or hasattr(part, "to_bytes"):
1088
+ size = len(part)
1089
+ elif len(part) == 1:
1090
+ size = None
1091
+ else:
1092
+ _, _, size = part
1093
+ par = path.rsplit("/", 1)[0] if "/" in path else ""
1094
+ par0 = par
1095
+ subdirs = [par0]
1096
+ while par0 and par0 not in self.dircache:
1097
+ # collect parent directories
1098
+ par0 = self._parent(par0)
1099
+ subdirs.append(par0)
1100
+
1101
+ subdirs.reverse()
1102
+ for parent, child in zip(subdirs, subdirs[1:]):
1103
+ # register newly discovered directories
1104
+ assert child not in self.dircache
1105
+ assert parent in self.dircache
1106
+ self.dircache[parent].append(
1107
+ {"name": child, "type": "directory", "size": 0}
1108
+ )
1109
+ self.dircache[child] = []
1110
+
1111
+ self.dircache[par].append({"name": path, "type": "file", "size": size})
1112
+
1113
+ def _open(self, path, mode="rb", block_size=None, cache_options=None, **kwargs):
1114
+ part_or_url, start0, end0 = self._cat_common(path)
1115
+ # This logic is kept outside `ReferenceFile` to avoid unnecessary redirection.
1116
+ # That does mean `_cat_common` gets called twice if it eventually reaches `ReferenceFile`.
1117
+ if isinstance(part_or_url, bytes):
1118
+ return io.BytesIO(part_or_url[start0:end0])
1119
+
1120
+ protocol, _ = split_protocol(part_or_url)
1121
+ if start0 is None and end0 is None:
1122
+ return self.fss[protocol]._open(
1123
+ part_or_url,
1124
+ mode,
1125
+ block_size=block_size,
1126
+ cache_options=cache_options,
1127
+ **kwargs,
1128
+ )
1129
+
1130
+ return ReferenceFile(
1131
+ self,
1132
+ path,
1133
+ mode,
1134
+ block_size=block_size,
1135
+ cache_options=cache_options,
1136
+ **kwargs,
1137
+ )
1138
+
1139
+ def ls(self, path, detail=True, **kwargs):
1140
+ logger.debug("list %s", path)
1141
+ path = self._strip_protocol(path)
1142
+ if isinstance(self.references, LazyReferenceMapper):
1143
+ try:
1144
+ return self.references.ls(path, detail)
1145
+ except KeyError:
1146
+ pass
1147
+ raise FileNotFoundError(f"'{path}' is not a known key")
1148
+ if not self.dircache:
1149
+ self._dircache_from_items()
1150
+ out = self._ls_from_cache(path)
1151
+ if out is None:
1152
+ raise FileNotFoundError(path)
1153
+ if detail:
1154
+ return out
1155
+ return [o["name"] for o in out]
1156
+
1157
+ def exists(self, path, **kwargs): # overwrite auto-sync version
1158
+ return self.isdir(path) or self.isfile(path)
1159
+
1160
+ def isdir(self, path): # overwrite auto-sync version
1161
+ if self.dircache:
1162
+ return path in self.dircache
1163
+ elif isinstance(self.references, LazyReferenceMapper):
1164
+ return path in self.references.listdir()
1165
+ else:
1166
+ # this may be faster than building dircache for single calls, but
1167
+ # by looping will be slow for many calls; could cache it?
1168
+ return any(_.startswith(f"{path}/") for _ in self.references)
1169
+
1170
+ def isfile(self, path): # overwrite auto-sync version
1171
+ return path in self.references
1172
+
1173
+ async def _ls(self, path, detail=True, **kwargs): # calls fast sync code
1174
+ return self.ls(path, detail, **kwargs)
1175
+
1176
+ def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
1177
+ if withdirs:
1178
+ return super().find(
1179
+ path, maxdepth=maxdepth, withdirs=withdirs, detail=detail, **kwargs
1180
+ )
1181
+ if path:
1182
+ path = self._strip_protocol(path)
1183
+ r = sorted(k for k in self.references if k.startswith(path))
1184
+ else:
1185
+ r = sorted(self.references)
1186
+ if detail:
1187
+ if not self.dircache:
1188
+ self._dircache_from_items()
1189
+ return {k: self._ls_from_cache(k)[0] for k in r}
1190
+ else:
1191
+ return r
1192
+
1193
+ def info(self, path, **kwargs):
1194
+ out = self.references.get(path)
1195
+ if out is not None:
1196
+ if isinstance(out, (str, bytes)):
1197
+ # decode base64 here
1198
+ return {"name": path, "type": "file", "size": len(out)}
1199
+ elif len(out) > 1:
1200
+ return {"name": path, "type": "file", "size": out[2]}
1201
+ else:
1202
+ out0 = [{"name": path, "type": "file", "size": None}]
1203
+ else:
1204
+ out = self.ls(path, True)
1205
+ out0 = [o for o in out if o["name"] == path]
1206
+ if not out0:
1207
+ return {"name": path, "type": "directory", "size": 0}
1208
+ if out0[0]["size"] is None:
1209
+ # if this is a whole remote file, update size using remote FS
1210
+ prot, _ = split_protocol(self.references[path][0])
1211
+ out0[0]["size"] = self.fss[prot].size(self.references[path][0])
1212
+ return out0[0]
1213
+
1214
+ async def _info(self, path, **kwargs): # calls fast sync code
1215
+ return self.info(path)
1216
+
1217
+ async def _rm_file(self, path, **kwargs):
1218
+ self.references.pop(
1219
+ path, None
1220
+ ) # ignores FileNotFound, just as well for directories
1221
+ self.dircache.clear() # this is a bit heavy handed
1222
+
1223
+ async def _pipe_file(self, path, data, mode="overwrite", **kwargs):
1224
+ if mode == "create" and self.exists(path):
1225
+ raise FileExistsError
1226
+ # can be str or bytes
1227
+ self.references[path] = data
1228
+ self.dircache.clear() # this is a bit heavy handed
1229
+
1230
+ async def _put_file(self, lpath, rpath, mode="overwrite", **kwargs):
1231
+ # puts binary
1232
+ if mode == "create" and self.exists(rpath):
1233
+ raise FileExistsError
1234
+ with open(lpath, "rb") as f:
1235
+ self.references[rpath] = f.read()
1236
+ self.dircache.clear() # this is a bit heavy handed
1237
+
1238
+ def save_json(self, url, **storage_options):
1239
+ """Write modified references into new location"""
1240
+ out = {}
1241
+ for k, v in self.references.items():
1242
+ if isinstance(v, bytes):
1243
+ try:
1244
+ out[k] = v.decode("ascii")
1245
+ except UnicodeDecodeError:
1246
+ out[k] = (b"base64:" + base64.b64encode(v)).decode()
1247
+ else:
1248
+ out[k] = v
1249
+ with fsspec.open(url, "wb", **storage_options) as f:
1250
+ f.write(json.dumps({"version": 1, "refs": out}).encode())
1251
+
1252
+
1253
+ class ReferenceFile(AbstractBufferedFile):
1254
+ def __init__(
1255
+ self,
1256
+ fs,
1257
+ path,
1258
+ mode="rb",
1259
+ block_size="default",
1260
+ autocommit=True,
1261
+ cache_type="readahead",
1262
+ cache_options=None,
1263
+ size=None,
1264
+ **kwargs,
1265
+ ):
1266
+ super().__init__(
1267
+ fs,
1268
+ path,
1269
+ mode=mode,
1270
+ block_size=block_size,
1271
+ autocommit=autocommit,
1272
+ size=size,
1273
+ cache_type=cache_type,
1274
+ cache_options=cache_options,
1275
+ **kwargs,
1276
+ )
1277
+ part_or_url, self.start, self.end = self.fs._cat_common(self.path)
1278
+ protocol, _ = split_protocol(part_or_url)
1279
+ self.src_fs = self.fs.fss[protocol]
1280
+ self.src_path = part_or_url
1281
+ self._f = None
1282
+
1283
+ @property
1284
+ def f(self):
1285
+ if self._f is None or self._f.closed:
1286
+ self._f = self.src_fs._open(
1287
+ self.src_path,
1288
+ mode=self.mode,
1289
+ block_size=self.blocksize,
1290
+ autocommit=self.autocommit,
1291
+ cache_type="none",
1292
+ **self.kwargs,
1293
+ )
1294
+ return self._f
1295
+
1296
+ def close(self):
1297
+ if self._f is not None:
1298
+ self._f.close()
1299
+ return super().close()
1300
+
1301
+ def _fetch_range(self, start, end):
1302
+ start = start + self.start
1303
+ end = min(end + self.start, self.end)
1304
+ self.f.seek(start)
1305
+ return self.f.read(end - start)
temp_venv/lib/python3.13/site-packages/fsspec/implementations/sftp.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ import logging
3
+ import os
4
+ import types
5
+ import uuid
6
+ from stat import S_ISDIR, S_ISLNK
7
+
8
+ import paramiko
9
+
10
+ from .. import AbstractFileSystem
11
+ from ..utils import infer_storage_options
12
+
13
+ logger = logging.getLogger("fsspec.sftp")
14
+
15
+
16
+ class SFTPFileSystem(AbstractFileSystem):
17
+ """Files over SFTP/SSH
18
+
19
+ Peer-to-peer filesystem over SSH using paramiko.
20
+
21
+ Note: if using this with the ``open`` or ``open_files``, with full URLs,
22
+ there is no way to tell if a path is relative, so all paths are assumed
23
+ to be absolute.
24
+ """
25
+
26
+ protocol = "sftp", "ssh"
27
+
28
+ def __init__(self, host, **ssh_kwargs):
29
+ """
30
+
31
+ Parameters
32
+ ----------
33
+ host: str
34
+ Hostname or IP as a string
35
+ temppath: str
36
+ Location on the server to put files, when within a transaction
37
+ ssh_kwargs: dict
38
+ Parameters passed on to connection. See details in
39
+ https://docs.paramiko.org/en/3.3/api/client.html#paramiko.client.SSHClient.connect
40
+ May include port, username, password...
41
+ """
42
+ if self._cached:
43
+ return
44
+ super().__init__(**ssh_kwargs)
45
+ self.temppath = ssh_kwargs.pop("temppath", "/tmp") # remote temp directory
46
+ self.host = host
47
+ self.ssh_kwargs = ssh_kwargs
48
+ self._connect()
49
+
50
+ def _connect(self):
51
+ logger.debug("Connecting to SFTP server %s", self.host)
52
+ self.client = paramiko.SSHClient()
53
+ self.client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
54
+ self.client.connect(self.host, **self.ssh_kwargs)
55
+ self.ftp = self.client.open_sftp()
56
+
57
+ @classmethod
58
+ def _strip_protocol(cls, path):
59
+ return infer_storage_options(path)["path"]
60
+
61
+ @staticmethod
62
+ def _get_kwargs_from_urls(urlpath):
63
+ out = infer_storage_options(urlpath)
64
+ out.pop("path", None)
65
+ out.pop("protocol", None)
66
+ return out
67
+
68
+ def mkdir(self, path, create_parents=True, mode=511):
69
+ logger.debug("Creating folder %s", path)
70
+ if self.exists(path):
71
+ raise FileExistsError(f"File exists: {path}")
72
+
73
+ if create_parents:
74
+ self.makedirs(path)
75
+ else:
76
+ self.ftp.mkdir(path, mode)
77
+
78
+ def makedirs(self, path, exist_ok=False, mode=511):
79
+ if self.exists(path) and not exist_ok:
80
+ raise FileExistsError(f"File exists: {path}")
81
+
82
+ parts = path.split("/")
83
+ new_path = "/" if path[:1] == "/" else ""
84
+
85
+ for part in parts:
86
+ if part:
87
+ new_path = f"{new_path}/{part}" if new_path else part
88
+ if not self.exists(new_path):
89
+ self.ftp.mkdir(new_path, mode)
90
+
91
+ def rmdir(self, path):
92
+ logger.debug("Removing folder %s", path)
93
+ self.ftp.rmdir(path)
94
+
95
+ def info(self, path):
96
+ stat = self._decode_stat(self.ftp.stat(path))
97
+ stat["name"] = path
98
+ return stat
99
+
100
+ @staticmethod
101
+ def _decode_stat(stat, parent_path=None):
102
+ if S_ISDIR(stat.st_mode):
103
+ t = "directory"
104
+ elif S_ISLNK(stat.st_mode):
105
+ t = "link"
106
+ else:
107
+ t = "file"
108
+ out = {
109
+ "name": "",
110
+ "size": stat.st_size,
111
+ "type": t,
112
+ "uid": stat.st_uid,
113
+ "gid": stat.st_gid,
114
+ "time": datetime.datetime.fromtimestamp(
115
+ stat.st_atime, tz=datetime.timezone.utc
116
+ ),
117
+ "mtime": datetime.datetime.fromtimestamp(
118
+ stat.st_mtime, tz=datetime.timezone.utc
119
+ ),
120
+ }
121
+ if parent_path:
122
+ out["name"] = "/".join([parent_path.rstrip("/"), stat.filename])
123
+ return out
124
+
125
+ def ls(self, path, detail=False):
126
+ logger.debug("Listing folder %s", path)
127
+ stats = [self._decode_stat(stat, path) for stat in self.ftp.listdir_iter(path)]
128
+ if detail:
129
+ return stats
130
+ else:
131
+ paths = [stat["name"] for stat in stats]
132
+ return sorted(paths)
133
+
134
+ def put(self, lpath, rpath, callback=None, **kwargs):
135
+ logger.debug("Put file %s into %s", lpath, rpath)
136
+ self.ftp.put(lpath, rpath)
137
+
138
+ def get_file(self, rpath, lpath, **kwargs):
139
+ if self.isdir(rpath):
140
+ os.makedirs(lpath, exist_ok=True)
141
+ else:
142
+ self.ftp.get(self._strip_protocol(rpath), lpath)
143
+
144
+ def _open(self, path, mode="rb", block_size=None, **kwargs):
145
+ """
146
+ block_size: int or None
147
+ If 0, no buffering, if 1, line buffering, if >1, buffer that many
148
+ bytes, if None use default from paramiko.
149
+ """
150
+ logger.debug("Opening file %s", path)
151
+ if kwargs.get("autocommit", True) is False:
152
+ # writes to temporary file, move on commit
153
+ path2 = "/".join([self.temppath, str(uuid.uuid4())])
154
+ f = self.ftp.open(path2, mode, bufsize=block_size if block_size else -1)
155
+ f.temppath = path2
156
+ f.targetpath = path
157
+ f.fs = self
158
+ f.commit = types.MethodType(commit_a_file, f)
159
+ f.discard = types.MethodType(discard_a_file, f)
160
+ else:
161
+ f = self.ftp.open(path, mode, bufsize=block_size if block_size else -1)
162
+ return f
163
+
164
+ def _rm(self, path):
165
+ if self.isdir(path):
166
+ self.ftp.rmdir(path)
167
+ else:
168
+ self.ftp.remove(path)
169
+
170
+ def mv(self, old, new):
171
+ logger.debug("Renaming %s into %s", old, new)
172
+ self.ftp.posix_rename(old, new)
173
+
174
+
175
+ def commit_a_file(self):
176
+ self.fs.mv(self.temppath, self.targetpath)
177
+
178
+
179
+ def discard_a_file(self):
180
+ self.fs._rm(self.temppath)
temp_venv/lib/python3.13/site-packages/fsspec/implementations/smb.py ADDED
@@ -0,0 +1,416 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This module contains SMBFileSystem class responsible for handling access to
3
+ Windows Samba network shares by using package smbprotocol
4
+ """
5
+
6
+ import datetime
7
+ import re
8
+ import uuid
9
+ from stat import S_ISDIR, S_ISLNK
10
+
11
+ import smbclient
12
+ import smbprotocol.exceptions
13
+
14
+ from .. import AbstractFileSystem
15
+ from ..utils import infer_storage_options
16
+
17
+ # ! pylint: disable=bad-continuation
18
+
19
+
20
+ class SMBFileSystem(AbstractFileSystem):
21
+ """Allow reading and writing to Windows and Samba network shares.
22
+
23
+ When using `fsspec.open()` for getting a file-like object the URI
24
+ should be specified as this format:
25
+ ``smb://workgroup;user:password@server:port/share/folder/file.csv``.
26
+
27
+ Example::
28
+
29
+ >>> import fsspec
30
+ >>> with fsspec.open(
31
+ ... 'smb://myuser:[email protected]/' 'share/folder/file.csv'
32
+ ... ) as smbfile:
33
+ ... df = pd.read_csv(smbfile, sep='|', header=None)
34
+
35
+ Note that you need to pass in a valid hostname or IP address for the host
36
+ component of the URL. Do not use the Windows/NetBIOS machine name for the
37
+ host component.
38
+
39
+ The first component of the path in the URL points to the name of the shared
40
+ folder. Subsequent path components will point to the directory/folder/file.
41
+
42
+ The URL components ``workgroup`` , ``user``, ``password`` and ``port`` may be
43
+ optional.
44
+
45
+ .. note::
46
+
47
+ For working this source require `smbprotocol`_ to be installed, e.g.::
48
+
49
+ $ pip install smbprotocol
50
+ # or
51
+ # pip install smbprotocol[kerberos]
52
+
53
+ .. _smbprotocol: https://github.com/jborean93/smbprotocol#requirements
54
+
55
+ Note: if using this with the ``open`` or ``open_files``, with full URLs,
56
+ there is no way to tell if a path is relative, so all paths are assumed
57
+ to be absolute.
58
+ """
59
+
60
+ protocol = "smb"
61
+
62
+ # pylint: disable=too-many-arguments
63
+ def __init__(
64
+ self,
65
+ host,
66
+ port=None,
67
+ username=None,
68
+ password=None,
69
+ timeout=60,
70
+ encrypt=None,
71
+ share_access=None,
72
+ register_session_retries=4,
73
+ register_session_retry_wait=1,
74
+ register_session_retry_factor=10,
75
+ auto_mkdir=False,
76
+ **kwargs,
77
+ ):
78
+ """
79
+ You can use _get_kwargs_from_urls to get some kwargs from
80
+ a reasonable SMB url.
81
+
82
+ Authentication will be anonymous or integrated if username/password are not
83
+ given.
84
+
85
+ Parameters
86
+ ----------
87
+ host: str
88
+ The remote server name/ip to connect to
89
+ port: int or None
90
+ Port to connect with. Usually 445, sometimes 139.
91
+ username: str or None
92
+ Username to connect with. Required if Kerberos auth is not being used.
93
+ password: str or None
94
+ User's password on the server, if using username
95
+ timeout: int
96
+ Connection timeout in seconds
97
+ encrypt: bool
98
+ Whether to force encryption or not, once this has been set to True
99
+ the session cannot be changed back to False.
100
+ share_access: str or None
101
+ Specifies the default access applied to file open operations
102
+ performed with this file system object.
103
+ This affects whether other processes can concurrently open a handle
104
+ to the same file.
105
+
106
+ - None (the default): exclusively locks the file until closed.
107
+ - 'r': Allow other handles to be opened with read access.
108
+ - 'w': Allow other handles to be opened with write access.
109
+ - 'd': Allow other handles to be opened with delete access.
110
+ register_session_retries: int
111
+ Number of retries to register a session with the server. Retries are not performed
112
+ for authentication errors, as they are considered as invalid credentials and not network
113
+ issues. If set to negative value, no register attempts will be performed.
114
+ register_session_retry_wait: int
115
+ Time in seconds to wait between each retry. Number must be non-negative.
116
+ register_session_retry_factor: int
117
+ Base factor for the wait time between each retry. The wait time
118
+ is calculated using exponential function. For factor=1 all wait times
119
+ will be equal to `register_session_retry_wait`. For any number of retries,
120
+ the last wait time will be equal to `register_session_retry_wait` and for retries>1
121
+ the first wait time will be equal to `register_session_retry_wait / factor`.
122
+ Number must be equal to or greater than 1. Optimal factor is 10.
123
+ auto_mkdir: bool
124
+ Whether, when opening a file, the directory containing it should
125
+ be created (if it doesn't already exist). This is assumed by pyarrow
126
+ and zarr-python code.
127
+ """
128
+ super().__init__(**kwargs)
129
+ self.host = host
130
+ self.port = port
131
+ self.username = username
132
+ self.password = password
133
+ self.timeout = timeout
134
+ self.encrypt = encrypt
135
+ self.temppath = kwargs.pop("temppath", "")
136
+ self.share_access = share_access
137
+ self.register_session_retries = register_session_retries
138
+ if register_session_retry_wait < 0:
139
+ raise ValueError(
140
+ "register_session_retry_wait must be a non-negative integer"
141
+ )
142
+ self.register_session_retry_wait = register_session_retry_wait
143
+ if register_session_retry_factor < 1:
144
+ raise ValueError(
145
+ "register_session_retry_factor must be a positive "
146
+ "integer equal to or greater than 1"
147
+ )
148
+ self.register_session_retry_factor = register_session_retry_factor
149
+ self.auto_mkdir = auto_mkdir
150
+ self._connect()
151
+
152
+ @property
153
+ def _port(self):
154
+ return 445 if self.port is None else self.port
155
+
156
+ def _connect(self):
157
+ import time
158
+
159
+ if self.register_session_retries <= -1:
160
+ return
161
+
162
+ retried_errors = []
163
+
164
+ wait_time = self.register_session_retry_wait
165
+ n_waits = (
166
+ self.register_session_retries - 1
167
+ ) # -1 = No wait time after the last retry
168
+ factor = self.register_session_retry_factor
169
+
170
+ # Generate wait times for each retry attempt.
171
+ # Wait times are calculated using exponential function. For factor=1 all wait times
172
+ # will be equal to `wait`. For any number of retries the last wait time will be
173
+ # equal to `wait` and for retries>2 the first wait time will be equal to `wait / factor`.
174
+ wait_times = iter(
175
+ factor ** (n / n_waits - 1) * wait_time for n in range(0, n_waits + 1)
176
+ )
177
+
178
+ for attempt in range(self.register_session_retries + 1):
179
+ try:
180
+ smbclient.register_session(
181
+ self.host,
182
+ username=self.username,
183
+ password=self.password,
184
+ port=self._port,
185
+ encrypt=self.encrypt,
186
+ connection_timeout=self.timeout,
187
+ )
188
+ return
189
+ except (
190
+ smbprotocol.exceptions.SMBAuthenticationError,
191
+ smbprotocol.exceptions.LogonFailure,
192
+ ):
193
+ # These exceptions should not be repeated, as they clearly indicate
194
+ # that the credentials are invalid and not a network issue.
195
+ raise
196
+ except ValueError as exc:
197
+ if re.findall(r"\[Errno -\d+]", str(exc)):
198
+ # This exception is raised by the smbprotocol.transport:Tcp.connect
199
+ # and originates from socket.gaierror (OSError). These exceptions might
200
+ # be raised due to network instability. We will retry to connect.
201
+ retried_errors.append(exc)
202
+ else:
203
+ # All another ValueError exceptions should be raised, as they are not
204
+ # related to network issues.
205
+ raise
206
+ except Exception as exc:
207
+ # Save the exception and retry to connect. This except might be dropped
208
+ # in the future, once all exceptions suited for retry are identified.
209
+ retried_errors.append(exc)
210
+
211
+ if attempt < self.register_session_retries:
212
+ time.sleep(next(wait_times))
213
+
214
+ # Raise last exception to inform user about the connection issues.
215
+ # Note: Should we use ExceptionGroup to raise all exceptions?
216
+ raise retried_errors[-1]
217
+
218
+ @classmethod
219
+ def _strip_protocol(cls, path):
220
+ return infer_storage_options(path)["path"]
221
+
222
+ @staticmethod
223
+ def _get_kwargs_from_urls(path):
224
+ # smb://workgroup;user:password@host:port/share/folder/file.csv
225
+ out = infer_storage_options(path)
226
+ out.pop("path", None)
227
+ out.pop("protocol", None)
228
+ return out
229
+
230
+ def mkdir(self, path, create_parents=True, **kwargs):
231
+ wpath = _as_unc_path(self.host, path)
232
+ if create_parents:
233
+ smbclient.makedirs(wpath, exist_ok=False, port=self._port, **kwargs)
234
+ else:
235
+ smbclient.mkdir(wpath, port=self._port, **kwargs)
236
+
237
+ def makedirs(self, path, exist_ok=False):
238
+ if _share_has_path(path):
239
+ wpath = _as_unc_path(self.host, path)
240
+ smbclient.makedirs(wpath, exist_ok=exist_ok, port=self._port)
241
+
242
+ def rmdir(self, path):
243
+ if _share_has_path(path):
244
+ wpath = _as_unc_path(self.host, path)
245
+ smbclient.rmdir(wpath, port=self._port)
246
+
247
+ def info(self, path, **kwargs):
248
+ wpath = _as_unc_path(self.host, path)
249
+ stats = smbclient.stat(wpath, port=self._port, **kwargs)
250
+ if S_ISDIR(stats.st_mode):
251
+ stype = "directory"
252
+ elif S_ISLNK(stats.st_mode):
253
+ stype = "link"
254
+ else:
255
+ stype = "file"
256
+ res = {
257
+ "name": path + "/" if stype == "directory" else path,
258
+ "size": stats.st_size,
259
+ "type": stype,
260
+ "uid": stats.st_uid,
261
+ "gid": stats.st_gid,
262
+ "time": stats.st_atime,
263
+ "mtime": stats.st_mtime,
264
+ }
265
+ return res
266
+
267
+ def created(self, path):
268
+ """Return the created timestamp of a file as a datetime.datetime"""
269
+ wpath = _as_unc_path(self.host, path)
270
+ stats = smbclient.stat(wpath, port=self._port)
271
+ return datetime.datetime.fromtimestamp(stats.st_ctime, tz=datetime.timezone.utc)
272
+
273
+ def modified(self, path):
274
+ """Return the modified timestamp of a file as a datetime.datetime"""
275
+ wpath = _as_unc_path(self.host, path)
276
+ stats = smbclient.stat(wpath, port=self._port)
277
+ return datetime.datetime.fromtimestamp(stats.st_mtime, tz=datetime.timezone.utc)
278
+
279
+ def ls(self, path, detail=True, **kwargs):
280
+ unc = _as_unc_path(self.host, path)
281
+ listed = smbclient.listdir(unc, port=self._port, **kwargs)
282
+ dirs = ["/".join([path.rstrip("/"), p]) for p in listed]
283
+ if detail:
284
+ dirs = [self.info(d) for d in dirs]
285
+ return dirs
286
+
287
+ # pylint: disable=too-many-arguments
288
+ def _open(
289
+ self,
290
+ path,
291
+ mode="rb",
292
+ block_size=-1,
293
+ autocommit=True,
294
+ cache_options=None,
295
+ **kwargs,
296
+ ):
297
+ """
298
+ block_size: int or None
299
+ If 0, no buffering, 1, line buffering, >1, buffer that many bytes
300
+
301
+ Notes
302
+ -----
303
+ By specifying 'share_access' in 'kwargs' it is possible to override the
304
+ default shared access setting applied in the constructor of this object.
305
+ """
306
+ if self.auto_mkdir and "w" in mode:
307
+ self.makedirs(self._parent(path), exist_ok=True)
308
+ bls = block_size if block_size is not None and block_size >= 0 else -1
309
+ wpath = _as_unc_path(self.host, path)
310
+ share_access = kwargs.pop("share_access", self.share_access)
311
+ if "w" in mode and autocommit is False:
312
+ temp = _as_temp_path(self.host, path, self.temppath)
313
+ return SMBFileOpener(
314
+ wpath, temp, mode, port=self._port, block_size=bls, **kwargs
315
+ )
316
+ return smbclient.open_file(
317
+ wpath,
318
+ mode,
319
+ buffering=bls,
320
+ share_access=share_access,
321
+ port=self._port,
322
+ **kwargs,
323
+ )
324
+
325
+ def copy(self, path1, path2, **kwargs):
326
+ """Copy within two locations in the same filesystem"""
327
+ wpath1 = _as_unc_path(self.host, path1)
328
+ wpath2 = _as_unc_path(self.host, path2)
329
+ if self.auto_mkdir:
330
+ self.makedirs(self._parent(path2), exist_ok=True)
331
+ smbclient.copyfile(wpath1, wpath2, port=self._port, **kwargs)
332
+
333
+ def _rm(self, path):
334
+ if _share_has_path(path):
335
+ wpath = _as_unc_path(self.host, path)
336
+ stats = smbclient.stat(wpath, port=self._port)
337
+ if S_ISDIR(stats.st_mode):
338
+ smbclient.rmdir(wpath, port=self._port)
339
+ else:
340
+ smbclient.remove(wpath, port=self._port)
341
+
342
+ def mv(self, path1, path2, recursive=None, maxdepth=None, **kwargs):
343
+ wpath1 = _as_unc_path(self.host, path1)
344
+ wpath2 = _as_unc_path(self.host, path2)
345
+ smbclient.rename(wpath1, wpath2, port=self._port, **kwargs)
346
+
347
+
348
+ def _as_unc_path(host, path):
349
+ rpath = path.replace("/", "\\")
350
+ unc = f"\\\\{host}{rpath}"
351
+ return unc
352
+
353
+
354
+ def _as_temp_path(host, path, temppath):
355
+ share = path.split("/")[1]
356
+ temp_file = f"/{share}{temppath}/{uuid.uuid4()}"
357
+ unc = _as_unc_path(host, temp_file)
358
+ return unc
359
+
360
+
361
+ def _share_has_path(path):
362
+ parts = path.count("/")
363
+ if path.endswith("/"):
364
+ return parts > 2
365
+ return parts > 1
366
+
367
+
368
+ class SMBFileOpener:
369
+ """writes to remote temporary file, move on commit"""
370
+
371
+ def __init__(self, path, temp, mode, port=445, block_size=-1, **kwargs):
372
+ self.path = path
373
+ self.temp = temp
374
+ self.mode = mode
375
+ self.block_size = block_size
376
+ self.kwargs = kwargs
377
+ self.smbfile = None
378
+ self._incontext = False
379
+ self.port = port
380
+ self._open()
381
+
382
+ def _open(self):
383
+ if self.smbfile is None or self.smbfile.closed:
384
+ self.smbfile = smbclient.open_file(
385
+ self.temp,
386
+ self.mode,
387
+ port=self.port,
388
+ buffering=self.block_size,
389
+ **self.kwargs,
390
+ )
391
+
392
+ def commit(self):
393
+ """Move temp file to definitive on success."""
394
+ # TODO: use transaction support in SMB protocol
395
+ smbclient.replace(self.temp, self.path, port=self.port)
396
+
397
+ def discard(self):
398
+ """Remove the temp file on failure."""
399
+ smbclient.remove(self.temp, port=self.port)
400
+
401
+ def __fspath__(self):
402
+ return self.path
403
+
404
+ def __iter__(self):
405
+ return self.smbfile.__iter__()
406
+
407
+ def __getattr__(self, item):
408
+ return getattr(self.smbfile, item)
409
+
410
+ def __enter__(self):
411
+ self._incontext = True
412
+ return self.smbfile.__enter__()
413
+
414
+ def __exit__(self, exc_type, exc_value, traceback):
415
+ self._incontext = False
416
+ self.smbfile.__exit__(exc_type, exc_value, traceback)
temp_venv/lib/python3.13/site-packages/fsspec/implementations/tar.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import tarfile
3
+
4
+ import fsspec
5
+ from fsspec.archive import AbstractArchiveFileSystem
6
+ from fsspec.compression import compr
7
+ from fsspec.utils import infer_compression
8
+
9
+ typemap = {b"0": "file", b"5": "directory"}
10
+
11
+ logger = logging.getLogger("tar")
12
+
13
+
14
+ class TarFileSystem(AbstractArchiveFileSystem):
15
+ """Compressed Tar archives as a file-system (read-only)
16
+
17
+ Supports the following formats:
18
+ tar.gz, tar.bz2, tar.xz
19
+ """
20
+
21
+ root_marker = ""
22
+ protocol = "tar"
23
+ cachable = False
24
+
25
+ def __init__(
26
+ self,
27
+ fo="",
28
+ index_store=None,
29
+ target_options=None,
30
+ target_protocol=None,
31
+ compression=None,
32
+ **kwargs,
33
+ ):
34
+ super().__init__(**kwargs)
35
+ target_options = target_options or {}
36
+
37
+ if isinstance(fo, str):
38
+ self.of = fsspec.open(fo, protocol=target_protocol, **target_options)
39
+ fo = self.of.open() # keep the reference
40
+
41
+ # Try to infer compression.
42
+ if compression is None:
43
+ name = None
44
+
45
+ # Try different ways to get hold of the filename. `fo` might either
46
+ # be a `fsspec.LocalFileOpener`, an `io.BufferedReader` or an
47
+ # `fsspec.AbstractFileSystem` instance.
48
+ try:
49
+ # Amended io.BufferedReader or similar.
50
+ # This uses a "protocol extension" where original filenames are
51
+ # propagated to archive-like filesystems in order to let them
52
+ # infer the right compression appropriately.
53
+ if hasattr(fo, "original"):
54
+ name = fo.original
55
+
56
+ # fsspec.LocalFileOpener
57
+ elif hasattr(fo, "path"):
58
+ name = fo.path
59
+
60
+ # io.BufferedReader
61
+ elif hasattr(fo, "name"):
62
+ name = fo.name
63
+
64
+ # fsspec.AbstractFileSystem
65
+ elif hasattr(fo, "info"):
66
+ name = fo.info()["name"]
67
+
68
+ except Exception as ex:
69
+ logger.warning(
70
+ f"Unable to determine file name, not inferring compression: {ex}"
71
+ )
72
+
73
+ if name is not None:
74
+ compression = infer_compression(name)
75
+ logger.info(f"Inferred compression {compression} from file name {name}")
76
+
77
+ if compression is not None:
78
+ # TODO: tarfile already implements compression with modes like "'r:gz'",
79
+ # but then would seek to offset in the file work?
80
+ fo = compr[compression](fo)
81
+
82
+ self._fo_ref = fo
83
+ self.fo = fo # the whole instance is a context
84
+ self.tar = tarfile.TarFile(fileobj=self.fo)
85
+ self.dir_cache = None
86
+
87
+ self.index_store = index_store
88
+ self.index = None
89
+ self._index()
90
+
91
+ def _index(self):
92
+ # TODO: load and set saved index, if exists
93
+ out = {}
94
+ for ti in self.tar:
95
+ info = ti.get_info()
96
+ info["type"] = typemap.get(info["type"], "file")
97
+ name = ti.get_info()["name"].rstrip("/")
98
+ out[name] = (info, ti.offset_data)
99
+
100
+ self.index = out
101
+ # TODO: save index to self.index_store here, if set
102
+
103
+ def _get_dirs(self):
104
+ if self.dir_cache is not None:
105
+ return
106
+
107
+ # This enables ls to get directories as children as well as files
108
+ self.dir_cache = {
109
+ dirname: {"name": dirname, "size": 0, "type": "directory"}
110
+ for dirname in self._all_dirnames(self.tar.getnames())
111
+ }
112
+ for member in self.tar.getmembers():
113
+ info = member.get_info()
114
+ info["name"] = info["name"].rstrip("/")
115
+ info["type"] = typemap.get(info["type"], "file")
116
+ self.dir_cache[info["name"]] = info
117
+
118
+ def _open(self, path, mode="rb", **kwargs):
119
+ if mode != "rb":
120
+ raise ValueError("Read-only filesystem implementation")
121
+ details, offset = self.index[path]
122
+ if details["type"] != "file":
123
+ raise ValueError("Can only handle regular files")
124
+ return self.tar.extractfile(path)
temp_venv/lib/python3.13/site-packages/fsspec/implementations/webhdfs.py ADDED
@@ -0,0 +1,485 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://hadoop.apache.org/docs/r1.0.4/webhdfs.html
2
+
3
+ import logging
4
+ import os
5
+ import secrets
6
+ import shutil
7
+ import tempfile
8
+ import uuid
9
+ from contextlib import suppress
10
+ from urllib.parse import quote
11
+
12
+ import requests
13
+
14
+ from ..spec import AbstractBufferedFile, AbstractFileSystem
15
+ from ..utils import infer_storage_options, tokenize
16
+
17
+ logger = logging.getLogger("webhdfs")
18
+
19
+
20
+ class WebHDFS(AbstractFileSystem):
21
+ """
22
+ Interface to HDFS over HTTP using the WebHDFS API. Supports also HttpFS gateways.
23
+
24
+ Four auth mechanisms are supported:
25
+
26
+ insecure: no auth is done, and the user is assumed to be whoever they
27
+ say they are (parameter ``user``), or a predefined value such as
28
+ "dr.who" if not given
29
+ spnego: when kerberos authentication is enabled, auth is negotiated by
30
+ requests_kerberos https://github.com/requests/requests-kerberos .
31
+ This establishes a session based on existing kinit login and/or
32
+ specified principal/password; parameters are passed with ``kerb_kwargs``
33
+ token: uses an existing Hadoop delegation token from another secured
34
+ service. Indeed, this client can also generate such tokens when
35
+ not insecure. Note that tokens expire, but can be renewed (by a
36
+ previously specified user) and may allow for proxying.
37
+ basic-auth: used when both parameter ``user`` and parameter ``password``
38
+ are provided.
39
+
40
+ """
41
+
42
+ tempdir = str(tempfile.gettempdir())
43
+ protocol = "webhdfs", "webHDFS"
44
+
45
+ def __init__(
46
+ self,
47
+ host,
48
+ port=50070,
49
+ kerberos=False,
50
+ token=None,
51
+ user=None,
52
+ password=None,
53
+ proxy_to=None,
54
+ kerb_kwargs=None,
55
+ data_proxy=None,
56
+ use_https=False,
57
+ session_cert=None,
58
+ session_verify=True,
59
+ **kwargs,
60
+ ):
61
+ """
62
+ Parameters
63
+ ----------
64
+ host: str
65
+ Name-node address
66
+ port: int
67
+ Port for webHDFS
68
+ kerberos: bool
69
+ Whether to authenticate with kerberos for this connection
70
+ token: str or None
71
+ If given, use this token on every call to authenticate. A user
72
+ and user-proxy may be encoded in the token and should not be also
73
+ given
74
+ user: str or None
75
+ If given, assert the user name to connect with
76
+ password: str or None
77
+ If given, assert the password to use for basic auth. If password
78
+ is provided, user must be provided also
79
+ proxy_to: str or None
80
+ If given, the user has the authority to proxy, and this value is
81
+ the user in who's name actions are taken
82
+ kerb_kwargs: dict
83
+ Any extra arguments for HTTPKerberosAuth, see
84
+ `<https://github.com/requests/requests-kerberos/blob/master/requests_kerberos/kerberos_.py>`_
85
+ data_proxy: dict, callable or None
86
+ If given, map data-node addresses. This can be necessary if the
87
+ HDFS cluster is behind a proxy, running on Docker or otherwise has
88
+ a mismatch between the host-names given by the name-node and the
89
+ address by which to refer to them from the client. If a dict,
90
+ maps host names ``host->data_proxy[host]``; if a callable, full
91
+ URLs are passed, and function must conform to
92
+ ``url->data_proxy(url)``.
93
+ use_https: bool
94
+ Whether to connect to the Name-node using HTTPS instead of HTTP
95
+ session_cert: str or Tuple[str, str] or None
96
+ Path to a certificate file, or tuple of (cert, key) files to use
97
+ for the requests.Session
98
+ session_verify: str, bool or None
99
+ Path to a certificate file to use for verifying the requests.Session.
100
+ kwargs
101
+ """
102
+ if self._cached:
103
+ return
104
+ super().__init__(**kwargs)
105
+ self.url = f"{'https' if use_https else 'http'}://{host}:{port}/webhdfs/v1"
106
+ self.kerb = kerberos
107
+ self.kerb_kwargs = kerb_kwargs or {}
108
+ self.pars = {}
109
+ self.proxy = data_proxy or {}
110
+ if token is not None:
111
+ if user is not None or proxy_to is not None:
112
+ raise ValueError(
113
+ "If passing a delegation token, must not set "
114
+ "user or proxy_to, as these are encoded in the"
115
+ " token"
116
+ )
117
+ self.pars["delegation"] = token
118
+ self.user = user
119
+ self.password = password
120
+
121
+ if password is not None:
122
+ if user is None:
123
+ raise ValueError(
124
+ "If passing a password, the user must also be"
125
+ "set in order to set up the basic-auth"
126
+ )
127
+ else:
128
+ if user is not None:
129
+ self.pars["user.name"] = user
130
+
131
+ if proxy_to is not None:
132
+ self.pars["doas"] = proxy_to
133
+ if kerberos and user is not None:
134
+ raise ValueError(
135
+ "If using Kerberos auth, do not specify the "
136
+ "user, this is handled by kinit."
137
+ )
138
+
139
+ self.session_cert = session_cert
140
+ self.session_verify = session_verify
141
+
142
+ self._connect()
143
+
144
+ self._fsid = f"webhdfs_{tokenize(host, port)}"
145
+
146
+ @property
147
+ def fsid(self):
148
+ return self._fsid
149
+
150
+ def _connect(self):
151
+ self.session = requests.Session()
152
+
153
+ if self.session_cert:
154
+ self.session.cert = self.session_cert
155
+
156
+ self.session.verify = self.session_verify
157
+
158
+ if self.kerb:
159
+ from requests_kerberos import HTTPKerberosAuth
160
+
161
+ self.session.auth = HTTPKerberosAuth(**self.kerb_kwargs)
162
+
163
+ if self.user is not None and self.password is not None:
164
+ from requests.auth import HTTPBasicAuth
165
+
166
+ self.session.auth = HTTPBasicAuth(self.user, self.password)
167
+
168
+ def _call(self, op, method="get", path=None, data=None, redirect=True, **kwargs):
169
+ path = self._strip_protocol(path) if path is not None else ""
170
+ url = self._apply_proxy(self.url + quote(path, safe="/="))
171
+ args = kwargs.copy()
172
+ args.update(self.pars)
173
+ args["op"] = op.upper()
174
+ logger.debug("sending %s with %s", url, method)
175
+ out = self.session.request(
176
+ method=method.upper(),
177
+ url=url,
178
+ params=args,
179
+ data=data,
180
+ allow_redirects=redirect,
181
+ )
182
+ if out.status_code in [400, 401, 403, 404, 500]:
183
+ try:
184
+ err = out.json()
185
+ msg = err["RemoteException"]["message"]
186
+ exp = err["RemoteException"]["exception"]
187
+ except (ValueError, KeyError):
188
+ pass
189
+ else:
190
+ if exp in ["IllegalArgumentException", "UnsupportedOperationException"]:
191
+ raise ValueError(msg)
192
+ elif exp in ["SecurityException", "AccessControlException"]:
193
+ raise PermissionError(msg)
194
+ elif exp in ["FileNotFoundException"]:
195
+ raise FileNotFoundError(msg)
196
+ else:
197
+ raise RuntimeError(msg)
198
+ out.raise_for_status()
199
+ return out
200
+
201
+ def _open(
202
+ self,
203
+ path,
204
+ mode="rb",
205
+ block_size=None,
206
+ autocommit=True,
207
+ replication=None,
208
+ permissions=None,
209
+ **kwargs,
210
+ ):
211
+ """
212
+
213
+ Parameters
214
+ ----------
215
+ path: str
216
+ File location
217
+ mode: str
218
+ 'rb', 'wb', etc.
219
+ block_size: int
220
+ Client buffer size for read-ahead or write buffer
221
+ autocommit: bool
222
+ If False, writes to temporary file that only gets put in final
223
+ location upon commit
224
+ replication: int
225
+ Number of copies of file on the cluster, write mode only
226
+ permissions: str or int
227
+ posix permissions, write mode only
228
+ kwargs
229
+
230
+ Returns
231
+ -------
232
+ WebHDFile instance
233
+ """
234
+ block_size = block_size or self.blocksize
235
+ return WebHDFile(
236
+ self,
237
+ path,
238
+ mode=mode,
239
+ block_size=block_size,
240
+ tempdir=self.tempdir,
241
+ autocommit=autocommit,
242
+ replication=replication,
243
+ permissions=permissions,
244
+ )
245
+
246
+ @staticmethod
247
+ def _process_info(info):
248
+ info["type"] = info["type"].lower()
249
+ info["size"] = info["length"]
250
+ return info
251
+
252
+ @classmethod
253
+ def _strip_protocol(cls, path):
254
+ return infer_storage_options(path)["path"]
255
+
256
+ @staticmethod
257
+ def _get_kwargs_from_urls(urlpath):
258
+ out = infer_storage_options(urlpath)
259
+ out.pop("path", None)
260
+ out.pop("protocol", None)
261
+ if "username" in out:
262
+ out["user"] = out.pop("username")
263
+ return out
264
+
265
+ def info(self, path):
266
+ out = self._call("GETFILESTATUS", path=path)
267
+ info = out.json()["FileStatus"]
268
+ info["name"] = path
269
+ return self._process_info(info)
270
+
271
+ def ls(self, path, detail=False):
272
+ out = self._call("LISTSTATUS", path=path)
273
+ infos = out.json()["FileStatuses"]["FileStatus"]
274
+ for info in infos:
275
+ self._process_info(info)
276
+ info["name"] = path.rstrip("/") + "/" + info["pathSuffix"]
277
+ if detail:
278
+ return sorted(infos, key=lambda i: i["name"])
279
+ else:
280
+ return sorted(info["name"] for info in infos)
281
+
282
+ def content_summary(self, path):
283
+ """Total numbers of files, directories and bytes under path"""
284
+ out = self._call("GETCONTENTSUMMARY", path=path)
285
+ return out.json()["ContentSummary"]
286
+
287
+ def ukey(self, path):
288
+ """Checksum info of file, giving method and result"""
289
+ out = self._call("GETFILECHECKSUM", path=path, redirect=False)
290
+ if "Location" in out.headers:
291
+ location = self._apply_proxy(out.headers["Location"])
292
+ out2 = self.session.get(location)
293
+ out2.raise_for_status()
294
+ return out2.json()["FileChecksum"]
295
+ else:
296
+ out.raise_for_status()
297
+ return out.json()["FileChecksum"]
298
+
299
+ def home_directory(self):
300
+ """Get user's home directory"""
301
+ out = self._call("GETHOMEDIRECTORY")
302
+ return out.json()["Path"]
303
+
304
+ def get_delegation_token(self, renewer=None):
305
+ """Retrieve token which can give the same authority to other uses
306
+
307
+ Parameters
308
+ ----------
309
+ renewer: str or None
310
+ User who may use this token; if None, will be current user
311
+ """
312
+ if renewer:
313
+ out = self._call("GETDELEGATIONTOKEN", renewer=renewer)
314
+ else:
315
+ out = self._call("GETDELEGATIONTOKEN")
316
+ t = out.json()["Token"]
317
+ if t is None:
318
+ raise ValueError("No token available for this user/security context")
319
+ return t["urlString"]
320
+
321
+ def renew_delegation_token(self, token):
322
+ """Make token live longer. Returns new expiry time"""
323
+ out = self._call("RENEWDELEGATIONTOKEN", method="put", token=token)
324
+ return out.json()["long"]
325
+
326
+ def cancel_delegation_token(self, token):
327
+ """Stop the token from being useful"""
328
+ self._call("CANCELDELEGATIONTOKEN", method="put", token=token)
329
+
330
+ def chmod(self, path, mod):
331
+ """Set the permission at path
332
+
333
+ Parameters
334
+ ----------
335
+ path: str
336
+ location to set (file or directory)
337
+ mod: str or int
338
+ posix epresentation or permission, give as oct string, e.g, '777'
339
+ or 0o777
340
+ """
341
+ self._call("SETPERMISSION", method="put", path=path, permission=mod)
342
+
343
+ def chown(self, path, owner=None, group=None):
344
+ """Change owning user and/or group"""
345
+ kwargs = {}
346
+ if owner is not None:
347
+ kwargs["owner"] = owner
348
+ if group is not None:
349
+ kwargs["group"] = group
350
+ self._call("SETOWNER", method="put", path=path, **kwargs)
351
+
352
+ def set_replication(self, path, replication):
353
+ """
354
+ Set file replication factor
355
+
356
+ Parameters
357
+ ----------
358
+ path: str
359
+ File location (not for directories)
360
+ replication: int
361
+ Number of copies of file on the cluster. Should be smaller than
362
+ number of data nodes; normally 3 on most systems.
363
+ """
364
+ self._call("SETREPLICATION", path=path, method="put", replication=replication)
365
+
366
+ def mkdir(self, path, **kwargs):
367
+ self._call("MKDIRS", method="put", path=path)
368
+
369
+ def makedirs(self, path, exist_ok=False):
370
+ if exist_ok is False and self.exists(path):
371
+ raise FileExistsError(path)
372
+ self.mkdir(path)
373
+
374
+ def mv(self, path1, path2, **kwargs):
375
+ self._call("RENAME", method="put", path=path1, destination=path2)
376
+
377
+ def rm(self, path, recursive=False, **kwargs):
378
+ self._call(
379
+ "DELETE",
380
+ method="delete",
381
+ path=path,
382
+ recursive="true" if recursive else "false",
383
+ )
384
+
385
+ def rm_file(self, path, **kwargs):
386
+ self.rm(path)
387
+
388
+ def cp_file(self, lpath, rpath, **kwargs):
389
+ with self.open(lpath) as lstream:
390
+ tmp_fname = "/".join([self._parent(rpath), f".tmp.{secrets.token_hex(16)}"])
391
+ # Perform an atomic copy (stream to a temporary file and
392
+ # move it to the actual destination).
393
+ try:
394
+ with self.open(tmp_fname, "wb") as rstream:
395
+ shutil.copyfileobj(lstream, rstream)
396
+ self.mv(tmp_fname, rpath)
397
+ except BaseException:
398
+ with suppress(FileNotFoundError):
399
+ self.rm(tmp_fname)
400
+ raise
401
+
402
+ def _apply_proxy(self, location):
403
+ if self.proxy and callable(self.proxy):
404
+ location = self.proxy(location)
405
+ elif self.proxy:
406
+ # as a dict
407
+ for k, v in self.proxy.items():
408
+ location = location.replace(k, v, 1)
409
+ return location
410
+
411
+
412
+ class WebHDFile(AbstractBufferedFile):
413
+ """A file living in HDFS over webHDFS"""
414
+
415
+ def __init__(self, fs, path, **kwargs):
416
+ super().__init__(fs, path, **kwargs)
417
+ kwargs = kwargs.copy()
418
+ if kwargs.get("permissions", None) is None:
419
+ kwargs.pop("permissions", None)
420
+ if kwargs.get("replication", None) is None:
421
+ kwargs.pop("replication", None)
422
+ self.permissions = kwargs.pop("permissions", 511)
423
+ tempdir = kwargs.pop("tempdir")
424
+ if kwargs.pop("autocommit", False) is False:
425
+ self.target = self.path
426
+ self.path = os.path.join(tempdir, str(uuid.uuid4()))
427
+
428
+ def _upload_chunk(self, final=False):
429
+ """Write one part of a multi-block file upload
430
+
431
+ Parameters
432
+ ==========
433
+ final: bool
434
+ This is the last block, so should complete file, if
435
+ self.autocommit is True.
436
+ """
437
+ out = self.fs.session.post(
438
+ self.location,
439
+ data=self.buffer.getvalue(),
440
+ headers={"content-type": "application/octet-stream"},
441
+ )
442
+ out.raise_for_status()
443
+ return True
444
+
445
+ def _initiate_upload(self):
446
+ """Create remote file/upload"""
447
+ kwargs = self.kwargs.copy()
448
+ if "a" in self.mode:
449
+ op, method = "APPEND", "POST"
450
+ else:
451
+ op, method = "CREATE", "PUT"
452
+ kwargs["overwrite"] = "true"
453
+ out = self.fs._call(op, method, self.path, redirect=False, **kwargs)
454
+ location = self.fs._apply_proxy(out.headers["Location"])
455
+ if "w" in self.mode:
456
+ # create empty file to append to
457
+ out2 = self.fs.session.put(
458
+ location, headers={"content-type": "application/octet-stream"}
459
+ )
460
+ out2.raise_for_status()
461
+ # after creating empty file, change location to append to
462
+ out2 = self.fs._call("APPEND", "POST", self.path, redirect=False, **kwargs)
463
+ self.location = self.fs._apply_proxy(out2.headers["Location"])
464
+
465
+ def _fetch_range(self, start, end):
466
+ start = max(start, 0)
467
+ end = min(self.size, end)
468
+ if start >= end or start >= self.size:
469
+ return b""
470
+ out = self.fs._call(
471
+ "OPEN", path=self.path, offset=start, length=end - start, redirect=False
472
+ )
473
+ out.raise_for_status()
474
+ if "Location" in out.headers:
475
+ location = out.headers["Location"]
476
+ out2 = self.fs.session.get(self.fs._apply_proxy(location))
477
+ return out2.content
478
+ else:
479
+ return out.content
480
+
481
+ def commit(self):
482
+ self.fs.mv(self.path, self.target)
483
+
484
+ def discard(self):
485
+ self.fs.rm(self.path)
temp_venv/lib/python3.13/site-packages/fsspec/implementations/zip.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import zipfile
3
+
4
+ import fsspec
5
+ from fsspec.archive import AbstractArchiveFileSystem
6
+
7
+
8
+ class ZipFileSystem(AbstractArchiveFileSystem):
9
+ """Read/Write contents of ZIP archive as a file-system
10
+
11
+ Keeps file object open while instance lives.
12
+
13
+ This class is pickleable, but not necessarily thread-safe
14
+ """
15
+
16
+ root_marker = ""
17
+ protocol = "zip"
18
+ cachable = False
19
+
20
+ def __init__(
21
+ self,
22
+ fo="",
23
+ mode="r",
24
+ target_protocol=None,
25
+ target_options=None,
26
+ compression=zipfile.ZIP_STORED,
27
+ allowZip64=True,
28
+ compresslevel=None,
29
+ **kwargs,
30
+ ):
31
+ """
32
+ Parameters
33
+ ----------
34
+ fo: str or file-like
35
+ Contains ZIP, and must exist. If a str, will fetch file using
36
+ :meth:`~fsspec.open_files`, which must return one file exactly.
37
+ mode: str
38
+ Accept: "r", "w", "a"
39
+ target_protocol: str (optional)
40
+ If ``fo`` is a string, this value can be used to override the
41
+ FS protocol inferred from a URL
42
+ target_options: dict (optional)
43
+ Kwargs passed when instantiating the target FS, if ``fo`` is
44
+ a string.
45
+ compression, allowZip64, compresslevel: passed to ZipFile
46
+ Only relevant when creating a ZIP
47
+ """
48
+ super().__init__(self, **kwargs)
49
+ if mode not in set("rwa"):
50
+ raise ValueError(f"mode '{mode}' no understood")
51
+ self.mode = mode
52
+ if isinstance(fo, (str, os.PathLike)):
53
+ if mode == "a":
54
+ m = "r+b"
55
+ else:
56
+ m = mode + "b"
57
+ fo = fsspec.open(
58
+ fo, mode=m, protocol=target_protocol, **(target_options or {})
59
+ )
60
+ self.force_zip_64 = allowZip64
61
+ self.of = fo
62
+ self.fo = fo.__enter__() # the whole instance is a context
63
+ self.zip = zipfile.ZipFile(
64
+ self.fo,
65
+ mode=mode,
66
+ compression=compression,
67
+ allowZip64=allowZip64,
68
+ compresslevel=compresslevel,
69
+ )
70
+ self.dir_cache = None
71
+
72
+ @classmethod
73
+ def _strip_protocol(cls, path):
74
+ # zip file paths are always relative to the archive root
75
+ return super()._strip_protocol(path).lstrip("/")
76
+
77
+ def __del__(self):
78
+ if hasattr(self, "zip"):
79
+ self.close()
80
+ del self.zip
81
+
82
+ def close(self):
83
+ """Commits any write changes to the file. Done on ``del`` too."""
84
+ self.zip.close()
85
+
86
+ def _get_dirs(self):
87
+ if self.dir_cache is None or self.mode in set("wa"):
88
+ # when writing, dir_cache is always in the ZipFile's attributes,
89
+ # not read from the file.
90
+ files = self.zip.infolist()
91
+ self.dir_cache = {
92
+ dirname.rstrip("/"): {
93
+ "name": dirname.rstrip("/"),
94
+ "size": 0,
95
+ "type": "directory",
96
+ }
97
+ for dirname in self._all_dirnames(self.zip.namelist())
98
+ }
99
+ for z in files:
100
+ f = {s: getattr(z, s, None) for s in zipfile.ZipInfo.__slots__}
101
+ f.update(
102
+ {
103
+ "name": z.filename.rstrip("/"),
104
+ "size": z.file_size,
105
+ "type": ("directory" if z.is_dir() else "file"),
106
+ }
107
+ )
108
+ self.dir_cache[f["name"]] = f
109
+
110
+ def pipe_file(self, path, value, **kwargs):
111
+ # override upstream, because we know the exact file size in this case
112
+ self.zip.writestr(path, value, **kwargs)
113
+
114
+ def _open(
115
+ self,
116
+ path,
117
+ mode="rb",
118
+ block_size=None,
119
+ autocommit=True,
120
+ cache_options=None,
121
+ **kwargs,
122
+ ):
123
+ path = self._strip_protocol(path)
124
+ if "r" in mode and self.mode in set("wa"):
125
+ if self.exists(path):
126
+ raise OSError("ZipFS can only be open for reading or writing, not both")
127
+ raise FileNotFoundError(path)
128
+ if "r" in self.mode and "w" in mode:
129
+ raise OSError("ZipFS can only be open for reading or writing, not both")
130
+ out = self.zip.open(path, mode.strip("b"), force_zip64=self.force_zip_64)
131
+ if "r" in mode:
132
+ info = self.info(path)
133
+ out.size = info["size"]
134
+ out.name = info["name"]
135
+ return out
136
+
137
+ def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
138
+ if maxdepth is not None and maxdepth < 1:
139
+ raise ValueError("maxdepth must be at least 1")
140
+
141
+ # Remove the leading slash, as the zip file paths are always
142
+ # given without a leading slash
143
+ path = path.lstrip("/")
144
+ path_parts = list(filter(lambda s: bool(s), path.split("/")))
145
+
146
+ def _matching_starts(file_path):
147
+ file_parts = filter(lambda s: bool(s), file_path.split("/"))
148
+ return all(a == b for a, b in zip(path_parts, file_parts))
149
+
150
+ self._get_dirs()
151
+
152
+ result = {}
153
+ # To match posix find, if an exact file name is given, we should
154
+ # return only that file
155
+ if path in self.dir_cache and self.dir_cache[path]["type"] == "file":
156
+ result[path] = self.dir_cache[path]
157
+ return result if detail else [path]
158
+
159
+ for file_path, file_info in self.dir_cache.items():
160
+ if not (path == "" or _matching_starts(file_path)):
161
+ continue
162
+
163
+ if file_info["type"] == "directory":
164
+ if withdirs:
165
+ if file_path not in result:
166
+ result[file_path.strip("/")] = file_info
167
+ continue
168
+
169
+ if file_path not in result:
170
+ result[file_path] = file_info if detail else None
171
+
172
+ if maxdepth:
173
+ path_depth = path.count("/")
174
+ result = {
175
+ k: v for k, v in result.items() if k.count("/") - path_depth < maxdepth
176
+ }
177
+ return result if detail else sorted(result)
temp_venv/lib/python3.13/site-packages/fsspec/tests/abstract/__init__.py ADDED
@@ -0,0 +1,289 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from hashlib import md5
3
+
4
+ import pytest
5
+
6
+ from fsspec.implementations.local import LocalFileSystem
7
+ from fsspec.tests.abstract.copy import AbstractCopyTests # noqa: F401
8
+ from fsspec.tests.abstract.get import AbstractGetTests # noqa: F401
9
+ from fsspec.tests.abstract.open import AbstractOpenTests # noqa: F401
10
+ from fsspec.tests.abstract.pipe import AbstractPipeTests # noqa: F401
11
+ from fsspec.tests.abstract.put import AbstractPutTests # noqa: F401
12
+
13
+
14
+ class BaseAbstractFixtures:
15
+ """
16
+ Abstract base class containing fixtures that are used by but never need to
17
+ be overridden in derived filesystem-specific classes to run the abstract
18
+ tests on such filesystems.
19
+ """
20
+
21
+ @pytest.fixture
22
+ def fs_bulk_operations_scenario_0(self, fs, fs_join, fs_path):
23
+ """
24
+ Scenario on remote filesystem that is used for many cp/get/put tests.
25
+
26
+ Cleans up at the end of each test it which it is used.
27
+ """
28
+ source = self._bulk_operations_scenario_0(fs, fs_join, fs_path)
29
+ yield source
30
+ fs.rm(source, recursive=True)
31
+
32
+ @pytest.fixture
33
+ def fs_glob_edge_cases_files(self, fs, fs_join, fs_path):
34
+ """
35
+ Scenario on remote filesystem that is used for glob edge cases cp/get/put tests.
36
+
37
+ Cleans up at the end of each test it which it is used.
38
+ """
39
+ source = self._glob_edge_cases_files(fs, fs_join, fs_path)
40
+ yield source
41
+ fs.rm(source, recursive=True)
42
+
43
+ @pytest.fixture
44
+ def fs_dir_and_file_with_same_name_prefix(self, fs, fs_join, fs_path):
45
+ """
46
+ Scenario on remote filesystem that is used to check cp/get/put on directory
47
+ and file with the same name prefixes.
48
+
49
+ Cleans up at the end of each test it which it is used.
50
+ """
51
+ source = self._dir_and_file_with_same_name_prefix(fs, fs_join, fs_path)
52
+ yield source
53
+ fs.rm(source, recursive=True)
54
+
55
+ @pytest.fixture
56
+ def fs_10_files_with_hashed_names(self, fs, fs_join, fs_path):
57
+ """
58
+ Scenario on remote filesystem that is used to check cp/get/put files order
59
+ when source and destination are lists.
60
+
61
+ Cleans up at the end of each test it which it is used.
62
+ """
63
+ source = self._10_files_with_hashed_names(fs, fs_join, fs_path)
64
+ yield source
65
+ fs.rm(source, recursive=True)
66
+
67
+ @pytest.fixture
68
+ def fs_target(self, fs, fs_join, fs_path):
69
+ """
70
+ Return name of remote directory that does not yet exist to copy into.
71
+
72
+ Cleans up at the end of each test it which it is used.
73
+ """
74
+ target = fs_join(fs_path, "target")
75
+ yield target
76
+ if fs.exists(target):
77
+ fs.rm(target, recursive=True)
78
+
79
+ @pytest.fixture
80
+ def local_bulk_operations_scenario_0(self, local_fs, local_join, local_path):
81
+ """
82
+ Scenario on local filesystem that is used for many cp/get/put tests.
83
+
84
+ Cleans up at the end of each test it which it is used.
85
+ """
86
+ source = self._bulk_operations_scenario_0(local_fs, local_join, local_path)
87
+ yield source
88
+ local_fs.rm(source, recursive=True)
89
+
90
+ @pytest.fixture
91
+ def local_glob_edge_cases_files(self, local_fs, local_join, local_path):
92
+ """
93
+ Scenario on local filesystem that is used for glob edge cases cp/get/put tests.
94
+
95
+ Cleans up at the end of each test it which it is used.
96
+ """
97
+ source = self._glob_edge_cases_files(local_fs, local_join, local_path)
98
+ yield source
99
+ local_fs.rm(source, recursive=True)
100
+
101
+ @pytest.fixture
102
+ def local_dir_and_file_with_same_name_prefix(
103
+ self, local_fs, local_join, local_path
104
+ ):
105
+ """
106
+ Scenario on local filesystem that is used to check cp/get/put on directory
107
+ and file with the same name prefixes.
108
+
109
+ Cleans up at the end of each test it which it is used.
110
+ """
111
+ source = self._dir_and_file_with_same_name_prefix(
112
+ local_fs, local_join, local_path
113
+ )
114
+ yield source
115
+ local_fs.rm(source, recursive=True)
116
+
117
+ @pytest.fixture
118
+ def local_10_files_with_hashed_names(self, local_fs, local_join, local_path):
119
+ """
120
+ Scenario on local filesystem that is used to check cp/get/put files order
121
+ when source and destination are lists.
122
+
123
+ Cleans up at the end of each test it which it is used.
124
+ """
125
+ source = self._10_files_with_hashed_names(local_fs, local_join, local_path)
126
+ yield source
127
+ local_fs.rm(source, recursive=True)
128
+
129
+ @pytest.fixture
130
+ def local_target(self, local_fs, local_join, local_path):
131
+ """
132
+ Return name of local directory that does not yet exist to copy into.
133
+
134
+ Cleans up at the end of each test it which it is used.
135
+ """
136
+ target = local_join(local_path, "target")
137
+ yield target
138
+ if local_fs.exists(target):
139
+ local_fs.rm(target, recursive=True)
140
+
141
+ def _glob_edge_cases_files(self, some_fs, some_join, some_path):
142
+ """
143
+ Scenario that is used for glob edge cases cp/get/put tests.
144
+ Creates the following directory and file structure:
145
+
146
+ 📁 source
147
+ ├── 📄 file1
148
+ ├── 📄 file2
149
+ ├── 📁 subdir0
150
+ │ ├── 📄 subfile1
151
+ │ ├── 📄 subfile2
152
+ │ └── 📁 nesteddir
153
+ │ └── 📄 nestedfile
154
+ └── 📁 subdir1
155
+ ├── 📄 subfile1
156
+ ├── 📄 subfile2
157
+ └── 📁 nesteddir
158
+ └── 📄 nestedfile
159
+ """
160
+ source = some_join(some_path, "source")
161
+ some_fs.touch(some_join(source, "file1"))
162
+ some_fs.touch(some_join(source, "file2"))
163
+
164
+ for subdir_idx in range(2):
165
+ subdir = some_join(source, f"subdir{subdir_idx}")
166
+ nesteddir = some_join(subdir, "nesteddir")
167
+ some_fs.makedirs(nesteddir)
168
+ some_fs.touch(some_join(subdir, "subfile1"))
169
+ some_fs.touch(some_join(subdir, "subfile2"))
170
+ some_fs.touch(some_join(nesteddir, "nestedfile"))
171
+
172
+ return source
173
+
174
+ def _bulk_operations_scenario_0(self, some_fs, some_join, some_path):
175
+ """
176
+ Scenario that is used for many cp/get/put tests. Creates the following
177
+ directory and file structure:
178
+
179
+ 📁 source
180
+ ├── 📄 file1
181
+ ├── 📄 file2
182
+ └── 📁 subdir
183
+ ├── 📄 subfile1
184
+ ├── 📄 subfile2
185
+ └── 📁 nesteddir
186
+ └── 📄 nestedfile
187
+ """
188
+ source = some_join(some_path, "source")
189
+ subdir = some_join(source, "subdir")
190
+ nesteddir = some_join(subdir, "nesteddir")
191
+ some_fs.makedirs(nesteddir)
192
+ some_fs.touch(some_join(source, "file1"))
193
+ some_fs.touch(some_join(source, "file2"))
194
+ some_fs.touch(some_join(subdir, "subfile1"))
195
+ some_fs.touch(some_join(subdir, "subfile2"))
196
+ some_fs.touch(some_join(nesteddir, "nestedfile"))
197
+ return source
198
+
199
+ def _dir_and_file_with_same_name_prefix(self, some_fs, some_join, some_path):
200
+ """
201
+ Scenario that is used to check cp/get/put on directory and file with
202
+ the same name prefixes. Creates the following directory and file structure:
203
+
204
+ 📁 source
205
+ ├── 📄 subdir.txt
206
+ └── 📁 subdir
207
+ └── 📄 subfile.txt
208
+ """
209
+ source = some_join(some_path, "source")
210
+ subdir = some_join(source, "subdir")
211
+ file = some_join(source, "subdir.txt")
212
+ subfile = some_join(subdir, "subfile.txt")
213
+ some_fs.makedirs(subdir)
214
+ some_fs.touch(file)
215
+ some_fs.touch(subfile)
216
+ return source
217
+
218
+ def _10_files_with_hashed_names(self, some_fs, some_join, some_path):
219
+ """
220
+ Scenario that is used to check cp/get/put files order when source and
221
+ destination are lists. Creates the following directory and file structure:
222
+
223
+ 📁 source
224
+ └── 📄 {hashed([0-9])}.txt
225
+ """
226
+ source = some_join(some_path, "source")
227
+ for i in range(10):
228
+ hashed_i = md5(str(i).encode("utf-8")).hexdigest()
229
+ path = some_join(source, f"{hashed_i}.txt")
230
+ some_fs.pipe(path=path, value=f"{i}".encode())
231
+ return source
232
+
233
+
234
+ class AbstractFixtures(BaseAbstractFixtures):
235
+ """
236
+ Abstract base class containing fixtures that may be overridden in derived
237
+ filesystem-specific classes to run the abstract tests on such filesystems.
238
+
239
+ For any particular filesystem some of these fixtures must be overridden,
240
+ such as ``fs`` and ``fs_path``, and others may be overridden if the
241
+ default functions here are not appropriate, such as ``fs_join``.
242
+ """
243
+
244
+ @pytest.fixture
245
+ def fs(self):
246
+ raise NotImplementedError("This function must be overridden in derived classes")
247
+
248
+ @pytest.fixture
249
+ def fs_join(self):
250
+ """
251
+ Return a function that joins its arguments together into a path.
252
+
253
+ Most fsspec implementations join paths in a platform-dependent way,
254
+ but some will override this to always use a forward slash.
255
+ """
256
+ return os.path.join
257
+
258
+ @pytest.fixture
259
+ def fs_path(self):
260
+ raise NotImplementedError("This function must be overridden in derived classes")
261
+
262
+ @pytest.fixture(scope="class")
263
+ def local_fs(self):
264
+ # Maybe need an option for auto_mkdir=False? This is only relevant
265
+ # for certain implementations.
266
+ return LocalFileSystem(auto_mkdir=True)
267
+
268
+ @pytest.fixture
269
+ def local_join(self):
270
+ """
271
+ Return a function that joins its arguments together into a path, on
272
+ the local filesystem.
273
+ """
274
+ return os.path.join
275
+
276
+ @pytest.fixture
277
+ def local_path(self, tmpdir):
278
+ return tmpdir
279
+
280
+ @pytest.fixture
281
+ def supports_empty_directories(self):
282
+ """
283
+ Return whether this implementation supports empty directories.
284
+ """
285
+ return True
286
+
287
+ @pytest.fixture
288
+ def fs_sanitize_path(self):
289
+ return lambda x: x
temp_venv/lib/python3.13/site-packages/fsspec/tests/abstract/common.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ GLOB_EDGE_CASES_TESTS = {
2
+ "argnames": ("path", "recursive", "maxdepth", "expected"),
3
+ "argvalues": [
4
+ ("fil?1", False, None, ["file1"]),
5
+ ("fil?1", True, None, ["file1"]),
6
+ ("file[1-2]", False, None, ["file1", "file2"]),
7
+ ("file[1-2]", True, None, ["file1", "file2"]),
8
+ ("*", False, None, ["file1", "file2"]),
9
+ (
10
+ "*",
11
+ True,
12
+ None,
13
+ [
14
+ "file1",
15
+ "file2",
16
+ "subdir0/subfile1",
17
+ "subdir0/subfile2",
18
+ "subdir0/nesteddir/nestedfile",
19
+ "subdir1/subfile1",
20
+ "subdir1/subfile2",
21
+ "subdir1/nesteddir/nestedfile",
22
+ ],
23
+ ),
24
+ ("*", True, 1, ["file1", "file2"]),
25
+ (
26
+ "*",
27
+ True,
28
+ 2,
29
+ [
30
+ "file1",
31
+ "file2",
32
+ "subdir0/subfile1",
33
+ "subdir0/subfile2",
34
+ "subdir1/subfile1",
35
+ "subdir1/subfile2",
36
+ ],
37
+ ),
38
+ ("*1", False, None, ["file1"]),
39
+ (
40
+ "*1",
41
+ True,
42
+ None,
43
+ [
44
+ "file1",
45
+ "subdir1/subfile1",
46
+ "subdir1/subfile2",
47
+ "subdir1/nesteddir/nestedfile",
48
+ ],
49
+ ),
50
+ ("*1", True, 2, ["file1", "subdir1/subfile1", "subdir1/subfile2"]),
51
+ (
52
+ "**",
53
+ False,
54
+ None,
55
+ [
56
+ "file1",
57
+ "file2",
58
+ "subdir0/subfile1",
59
+ "subdir0/subfile2",
60
+ "subdir0/nesteddir/nestedfile",
61
+ "subdir1/subfile1",
62
+ "subdir1/subfile2",
63
+ "subdir1/nesteddir/nestedfile",
64
+ ],
65
+ ),
66
+ (
67
+ "**",
68
+ True,
69
+ None,
70
+ [
71
+ "file1",
72
+ "file2",
73
+ "subdir0/subfile1",
74
+ "subdir0/subfile2",
75
+ "subdir0/nesteddir/nestedfile",
76
+ "subdir1/subfile1",
77
+ "subdir1/subfile2",
78
+ "subdir1/nesteddir/nestedfile",
79
+ ],
80
+ ),
81
+ ("**", True, 1, ["file1", "file2"]),
82
+ (
83
+ "**",
84
+ True,
85
+ 2,
86
+ [
87
+ "file1",
88
+ "file2",
89
+ "subdir0/subfile1",
90
+ "subdir0/subfile2",
91
+ "subdir0/nesteddir/nestedfile",
92
+ "subdir1/subfile1",
93
+ "subdir1/subfile2",
94
+ "subdir1/nesteddir/nestedfile",
95
+ ],
96
+ ),
97
+ (
98
+ "**",
99
+ False,
100
+ 2,
101
+ [
102
+ "file1",
103
+ "file2",
104
+ "subdir0/subfile1",
105
+ "subdir0/subfile2",
106
+ "subdir1/subfile1",
107
+ "subdir1/subfile2",
108
+ ],
109
+ ),
110
+ ("**/*1", False, None, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
111
+ (
112
+ "**/*1",
113
+ True,
114
+ None,
115
+ [
116
+ "file1",
117
+ "subdir0/subfile1",
118
+ "subdir1/subfile1",
119
+ "subdir1/subfile2",
120
+ "subdir1/nesteddir/nestedfile",
121
+ ],
122
+ ),
123
+ ("**/*1", True, 1, ["file1"]),
124
+ (
125
+ "**/*1",
126
+ True,
127
+ 2,
128
+ ["file1", "subdir0/subfile1", "subdir1/subfile1", "subdir1/subfile2"],
129
+ ),
130
+ ("**/*1", False, 2, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
131
+ ("**/subdir0", False, None, []),
132
+ ("**/subdir0", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]),
133
+ ("**/subdir0/nested*", False, 2, []),
134
+ ("**/subdir0/nested*", True, 2, ["nestedfile"]),
135
+ ("subdir[1-2]", False, None, []),
136
+ ("subdir[1-2]", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]),
137
+ ("subdir[1-2]", True, 2, ["subfile1", "subfile2"]),
138
+ ("subdir[0-1]", False, None, []),
139
+ (
140
+ "subdir[0-1]",
141
+ True,
142
+ None,
143
+ [
144
+ "subdir0/subfile1",
145
+ "subdir0/subfile2",
146
+ "subdir0/nesteddir/nestedfile",
147
+ "subdir1/subfile1",
148
+ "subdir1/subfile2",
149
+ "subdir1/nesteddir/nestedfile",
150
+ ],
151
+ ),
152
+ (
153
+ "subdir[0-1]/*fil[e]*",
154
+ False,
155
+ None,
156
+ [
157
+ "subdir0/subfile1",
158
+ "subdir0/subfile2",
159
+ "subdir1/subfile1",
160
+ "subdir1/subfile2",
161
+ ],
162
+ ),
163
+ (
164
+ "subdir[0-1]/*fil[e]*",
165
+ True,
166
+ None,
167
+ [
168
+ "subdir0/subfile1",
169
+ "subdir0/subfile2",
170
+ "subdir1/subfile1",
171
+ "subdir1/subfile2",
172
+ ],
173
+ ),
174
+ ],
175
+ }
temp_venv/lib/python3.13/site-packages/fsspec/tests/abstract/copy.py ADDED
@@ -0,0 +1,557 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from hashlib import md5
2
+ from itertools import product
3
+
4
+ import pytest
5
+
6
+ from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
7
+
8
+
9
+ class AbstractCopyTests:
10
+ def test_copy_file_to_existing_directory(
11
+ self,
12
+ fs,
13
+ fs_join,
14
+ fs_bulk_operations_scenario_0,
15
+ fs_target,
16
+ supports_empty_directories,
17
+ ):
18
+ # Copy scenario 1a
19
+ source = fs_bulk_operations_scenario_0
20
+
21
+ target = fs_target
22
+ fs.mkdir(target)
23
+ if not supports_empty_directories:
24
+ # Force target directory to exist by adding a dummy file
25
+ fs.touch(fs_join(target, "dummy"))
26
+ assert fs.isdir(target)
27
+
28
+ target_file2 = fs_join(target, "file2")
29
+ target_subfile1 = fs_join(target, "subfile1")
30
+
31
+ # Copy from source directory
32
+ fs.cp(fs_join(source, "file2"), target)
33
+ assert fs.isfile(target_file2)
34
+
35
+ # Copy from sub directory
36
+ fs.cp(fs_join(source, "subdir", "subfile1"), target)
37
+ assert fs.isfile(target_subfile1)
38
+
39
+ # Remove copied files
40
+ fs.rm([target_file2, target_subfile1])
41
+ assert not fs.exists(target_file2)
42
+ assert not fs.exists(target_subfile1)
43
+
44
+ # Repeat with trailing slash on target
45
+ fs.cp(fs_join(source, "file2"), target + "/")
46
+ assert fs.isdir(target)
47
+ assert fs.isfile(target_file2)
48
+
49
+ fs.cp(fs_join(source, "subdir", "subfile1"), target + "/")
50
+ assert fs.isfile(target_subfile1)
51
+
52
+ def test_copy_file_to_new_directory(
53
+ self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
54
+ ):
55
+ # Copy scenario 1b
56
+ source = fs_bulk_operations_scenario_0
57
+
58
+ target = fs_target
59
+ fs.mkdir(target)
60
+
61
+ fs.cp(
62
+ fs_join(source, "subdir", "subfile1"), fs_join(target, "newdir/")
63
+ ) # Note trailing slash
64
+ assert fs.isdir(target)
65
+ assert fs.isdir(fs_join(target, "newdir"))
66
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
67
+
68
+ def test_copy_file_to_file_in_existing_directory(
69
+ self,
70
+ fs,
71
+ fs_join,
72
+ fs_bulk_operations_scenario_0,
73
+ fs_target,
74
+ supports_empty_directories,
75
+ ):
76
+ # Copy scenario 1c
77
+ source = fs_bulk_operations_scenario_0
78
+
79
+ target = fs_target
80
+ fs.mkdir(target)
81
+ if not supports_empty_directories:
82
+ # Force target directory to exist by adding a dummy file
83
+ fs.touch(fs_join(target, "dummy"))
84
+ assert fs.isdir(target)
85
+
86
+ fs.cp(fs_join(source, "subdir", "subfile1"), fs_join(target, "newfile"))
87
+ assert fs.isfile(fs_join(target, "newfile"))
88
+
89
+ def test_copy_file_to_file_in_new_directory(
90
+ self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
91
+ ):
92
+ # Copy scenario 1d
93
+ source = fs_bulk_operations_scenario_0
94
+
95
+ target = fs_target
96
+ fs.mkdir(target)
97
+
98
+ fs.cp(
99
+ fs_join(source, "subdir", "subfile1"), fs_join(target, "newdir", "newfile")
100
+ )
101
+ assert fs.isdir(fs_join(target, "newdir"))
102
+ assert fs.isfile(fs_join(target, "newdir", "newfile"))
103
+
104
+ def test_copy_directory_to_existing_directory(
105
+ self,
106
+ fs,
107
+ fs_join,
108
+ fs_bulk_operations_scenario_0,
109
+ fs_target,
110
+ supports_empty_directories,
111
+ ):
112
+ # Copy scenario 1e
113
+ source = fs_bulk_operations_scenario_0
114
+
115
+ target = fs_target
116
+ fs.mkdir(target)
117
+ if not supports_empty_directories:
118
+ # Force target directory to exist by adding a dummy file
119
+ dummy = fs_join(target, "dummy")
120
+ fs.touch(dummy)
121
+ assert fs.isdir(target)
122
+
123
+ for source_slash, target_slash in zip([False, True], [False, True]):
124
+ s = fs_join(source, "subdir")
125
+ if source_slash:
126
+ s += "/"
127
+ t = target + "/" if target_slash else target
128
+
129
+ # Without recursive does nothing
130
+ fs.cp(s, t)
131
+ assert fs.ls(target, detail=False) == (
132
+ [] if supports_empty_directories else [dummy]
133
+ )
134
+
135
+ # With recursive
136
+ fs.cp(s, t, recursive=True)
137
+ if source_slash:
138
+ assert fs.isfile(fs_join(target, "subfile1"))
139
+ assert fs.isfile(fs_join(target, "subfile2"))
140
+ assert fs.isdir(fs_join(target, "nesteddir"))
141
+ assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
142
+ assert not fs.exists(fs_join(target, "subdir"))
143
+
144
+ fs.rm(
145
+ [
146
+ fs_join(target, "subfile1"),
147
+ fs_join(target, "subfile2"),
148
+ fs_join(target, "nesteddir"),
149
+ ],
150
+ recursive=True,
151
+ )
152
+ else:
153
+ assert fs.isdir(fs_join(target, "subdir"))
154
+ assert fs.isfile(fs_join(target, "subdir", "subfile1"))
155
+ assert fs.isfile(fs_join(target, "subdir", "subfile2"))
156
+ assert fs.isdir(fs_join(target, "subdir", "nesteddir"))
157
+ assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile"))
158
+
159
+ fs.rm(fs_join(target, "subdir"), recursive=True)
160
+ assert fs.ls(target, detail=False) == (
161
+ [] if supports_empty_directories else [dummy]
162
+ )
163
+
164
+ # Limit recursive by maxdepth
165
+ fs.cp(s, t, recursive=True, maxdepth=1)
166
+ if source_slash:
167
+ assert fs.isfile(fs_join(target, "subfile1"))
168
+ assert fs.isfile(fs_join(target, "subfile2"))
169
+ assert not fs.exists(fs_join(target, "nesteddir"))
170
+ assert not fs.exists(fs_join(target, "subdir"))
171
+
172
+ fs.rm(
173
+ [
174
+ fs_join(target, "subfile1"),
175
+ fs_join(target, "subfile2"),
176
+ ],
177
+ recursive=True,
178
+ )
179
+ else:
180
+ assert fs.isdir(fs_join(target, "subdir"))
181
+ assert fs.isfile(fs_join(target, "subdir", "subfile1"))
182
+ assert fs.isfile(fs_join(target, "subdir", "subfile2"))
183
+ assert not fs.exists(fs_join(target, "subdir", "nesteddir"))
184
+
185
+ fs.rm(fs_join(target, "subdir"), recursive=True)
186
+ assert fs.ls(target, detail=False) == (
187
+ [] if supports_empty_directories else [dummy]
188
+ )
189
+
190
+ def test_copy_directory_to_new_directory(
191
+ self,
192
+ fs,
193
+ fs_join,
194
+ fs_bulk_operations_scenario_0,
195
+ fs_target,
196
+ supports_empty_directories,
197
+ ):
198
+ # Copy scenario 1f
199
+ source = fs_bulk_operations_scenario_0
200
+
201
+ target = fs_target
202
+ fs.mkdir(target)
203
+
204
+ for source_slash, target_slash in zip([False, True], [False, True]):
205
+ s = fs_join(source, "subdir")
206
+ if source_slash:
207
+ s += "/"
208
+ t = fs_join(target, "newdir")
209
+ if target_slash:
210
+ t += "/"
211
+
212
+ # Without recursive does nothing
213
+ fs.cp(s, t)
214
+ if supports_empty_directories:
215
+ assert fs.ls(target) == []
216
+ else:
217
+ with pytest.raises(FileNotFoundError):
218
+ fs.ls(target)
219
+
220
+ # With recursive
221
+ fs.cp(s, t, recursive=True)
222
+ assert fs.isdir(fs_join(target, "newdir"))
223
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
224
+ assert fs.isfile(fs_join(target, "newdir", "subfile2"))
225
+ assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
226
+ assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
227
+ assert not fs.exists(fs_join(target, "subdir"))
228
+
229
+ fs.rm(fs_join(target, "newdir"), recursive=True)
230
+ assert not fs.exists(fs_join(target, "newdir"))
231
+
232
+ # Limit recursive by maxdepth
233
+ fs.cp(s, t, recursive=True, maxdepth=1)
234
+ assert fs.isdir(fs_join(target, "newdir"))
235
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
236
+ assert fs.isfile(fs_join(target, "newdir", "subfile2"))
237
+ assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
238
+ assert not fs.exists(fs_join(target, "subdir"))
239
+
240
+ fs.rm(fs_join(target, "newdir"), recursive=True)
241
+ assert not fs.exists(fs_join(target, "newdir"))
242
+
243
+ def test_copy_glob_to_existing_directory(
244
+ self,
245
+ fs,
246
+ fs_join,
247
+ fs_bulk_operations_scenario_0,
248
+ fs_target,
249
+ supports_empty_directories,
250
+ ):
251
+ # Copy scenario 1g
252
+ source = fs_bulk_operations_scenario_0
253
+
254
+ target = fs_target
255
+ fs.mkdir(target)
256
+ if not supports_empty_directories:
257
+ # Force target directory to exist by adding a dummy file
258
+ dummy = fs_join(target, "dummy")
259
+ fs.touch(dummy)
260
+ assert fs.isdir(target)
261
+
262
+ for target_slash in [False, True]:
263
+ t = target + "/" if target_slash else target
264
+
265
+ # Without recursive
266
+ fs.cp(fs_join(source, "subdir", "*"), t)
267
+ assert fs.isfile(fs_join(target, "subfile1"))
268
+ assert fs.isfile(fs_join(target, "subfile2"))
269
+ assert not fs.isdir(fs_join(target, "nesteddir"))
270
+ assert not fs.exists(fs_join(target, "nesteddir", "nestedfile"))
271
+ assert not fs.exists(fs_join(target, "subdir"))
272
+
273
+ fs.rm(
274
+ [
275
+ fs_join(target, "subfile1"),
276
+ fs_join(target, "subfile2"),
277
+ ],
278
+ recursive=True,
279
+ )
280
+ assert fs.ls(target, detail=False) == (
281
+ [] if supports_empty_directories else [dummy]
282
+ )
283
+
284
+ # With recursive
285
+ for glob, recursive in zip(["*", "**"], [True, False]):
286
+ fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive)
287
+ assert fs.isfile(fs_join(target, "subfile1"))
288
+ assert fs.isfile(fs_join(target, "subfile2"))
289
+ assert fs.isdir(fs_join(target, "nesteddir"))
290
+ assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
291
+ assert not fs.exists(fs_join(target, "subdir"))
292
+
293
+ fs.rm(
294
+ [
295
+ fs_join(target, "subfile1"),
296
+ fs_join(target, "subfile2"),
297
+ fs_join(target, "nesteddir"),
298
+ ],
299
+ recursive=True,
300
+ )
301
+ assert fs.ls(target, detail=False) == (
302
+ [] if supports_empty_directories else [dummy]
303
+ )
304
+
305
+ # Limit recursive by maxdepth
306
+ fs.cp(
307
+ fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
308
+ )
309
+ assert fs.isfile(fs_join(target, "subfile1"))
310
+ assert fs.isfile(fs_join(target, "subfile2"))
311
+ assert not fs.exists(fs_join(target, "nesteddir"))
312
+ assert not fs.exists(fs_join(target, "subdir"))
313
+
314
+ fs.rm(
315
+ [
316
+ fs_join(target, "subfile1"),
317
+ fs_join(target, "subfile2"),
318
+ ],
319
+ recursive=True,
320
+ )
321
+ assert fs.ls(target, detail=False) == (
322
+ [] if supports_empty_directories else [dummy]
323
+ )
324
+
325
+ def test_copy_glob_to_new_directory(
326
+ self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
327
+ ):
328
+ # Copy scenario 1h
329
+ source = fs_bulk_operations_scenario_0
330
+
331
+ target = fs_target
332
+ fs.mkdir(target)
333
+
334
+ for target_slash in [False, True]:
335
+ t = fs_join(target, "newdir")
336
+ if target_slash:
337
+ t += "/"
338
+
339
+ # Without recursive
340
+ fs.cp(fs_join(source, "subdir", "*"), t)
341
+ assert fs.isdir(fs_join(target, "newdir"))
342
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
343
+ assert fs.isfile(fs_join(target, "newdir", "subfile2"))
344
+ assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
345
+ assert not fs.exists(fs_join(target, "newdir", "nesteddir", "nestedfile"))
346
+ assert not fs.exists(fs_join(target, "subdir"))
347
+ assert not fs.exists(fs_join(target, "newdir", "subdir"))
348
+
349
+ fs.rm(fs_join(target, "newdir"), recursive=True)
350
+ assert not fs.exists(fs_join(target, "newdir"))
351
+
352
+ # With recursive
353
+ for glob, recursive in zip(["*", "**"], [True, False]):
354
+ fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive)
355
+ assert fs.isdir(fs_join(target, "newdir"))
356
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
357
+ assert fs.isfile(fs_join(target, "newdir", "subfile2"))
358
+ assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
359
+ assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
360
+ assert not fs.exists(fs_join(target, "subdir"))
361
+ assert not fs.exists(fs_join(target, "newdir", "subdir"))
362
+
363
+ fs.rm(fs_join(target, "newdir"), recursive=True)
364
+ assert not fs.exists(fs_join(target, "newdir"))
365
+
366
+ # Limit recursive by maxdepth
367
+ fs.cp(
368
+ fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
369
+ )
370
+ assert fs.isdir(fs_join(target, "newdir"))
371
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
372
+ assert fs.isfile(fs_join(target, "newdir", "subfile2"))
373
+ assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
374
+ assert not fs.exists(fs_join(target, "subdir"))
375
+ assert not fs.exists(fs_join(target, "newdir", "subdir"))
376
+
377
+ fs.rm(fs_join(target, "newdir"), recursive=True)
378
+ assert not fs.exists(fs_join(target, "newdir"))
379
+
380
+ @pytest.mark.parametrize(
381
+ GLOB_EDGE_CASES_TESTS["argnames"],
382
+ GLOB_EDGE_CASES_TESTS["argvalues"],
383
+ )
384
+ def test_copy_glob_edge_cases(
385
+ self,
386
+ path,
387
+ recursive,
388
+ maxdepth,
389
+ expected,
390
+ fs,
391
+ fs_join,
392
+ fs_glob_edge_cases_files,
393
+ fs_target,
394
+ fs_sanitize_path,
395
+ ):
396
+ # Copy scenario 1g
397
+ source = fs_glob_edge_cases_files
398
+
399
+ target = fs_target
400
+
401
+ for new_dir, target_slash in product([True, False], [True, False]):
402
+ fs.mkdir(target)
403
+
404
+ t = fs_join(target, "newdir") if new_dir else target
405
+ t = t + "/" if target_slash else t
406
+
407
+ fs.copy(fs_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
408
+
409
+ output = fs.find(target)
410
+ if new_dir:
411
+ prefixed_expected = [
412
+ fs_sanitize_path(fs_join(target, "newdir", p)) for p in expected
413
+ ]
414
+ else:
415
+ prefixed_expected = [
416
+ fs_sanitize_path(fs_join(target, p)) for p in expected
417
+ ]
418
+ assert sorted(output) == sorted(prefixed_expected)
419
+
420
+ try:
421
+ fs.rm(target, recursive=True)
422
+ except FileNotFoundError:
423
+ pass
424
+
425
+ def test_copy_list_of_files_to_existing_directory(
426
+ self,
427
+ fs,
428
+ fs_join,
429
+ fs_bulk_operations_scenario_0,
430
+ fs_target,
431
+ supports_empty_directories,
432
+ ):
433
+ # Copy scenario 2a
434
+ source = fs_bulk_operations_scenario_0
435
+
436
+ target = fs_target
437
+ fs.mkdir(target)
438
+ if not supports_empty_directories:
439
+ # Force target directory to exist by adding a dummy file
440
+ dummy = fs_join(target, "dummy")
441
+ fs.touch(dummy)
442
+ assert fs.isdir(target)
443
+
444
+ source_files = [
445
+ fs_join(source, "file1"),
446
+ fs_join(source, "file2"),
447
+ fs_join(source, "subdir", "subfile1"),
448
+ ]
449
+
450
+ for target_slash in [False, True]:
451
+ t = target + "/" if target_slash else target
452
+
453
+ fs.cp(source_files, t)
454
+ assert fs.isfile(fs_join(target, "file1"))
455
+ assert fs.isfile(fs_join(target, "file2"))
456
+ assert fs.isfile(fs_join(target, "subfile1"))
457
+
458
+ fs.rm(
459
+ [
460
+ fs_join(target, "file1"),
461
+ fs_join(target, "file2"),
462
+ fs_join(target, "subfile1"),
463
+ ],
464
+ recursive=True,
465
+ )
466
+ assert fs.ls(target, detail=False) == (
467
+ [] if supports_empty_directories else [dummy]
468
+ )
469
+
470
+ def test_copy_list_of_files_to_new_directory(
471
+ self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
472
+ ):
473
+ # Copy scenario 2b
474
+ source = fs_bulk_operations_scenario_0
475
+
476
+ target = fs_target
477
+ fs.mkdir(target)
478
+
479
+ source_files = [
480
+ fs_join(source, "file1"),
481
+ fs_join(source, "file2"),
482
+ fs_join(source, "subdir", "subfile1"),
483
+ ]
484
+
485
+ fs.cp(source_files, fs_join(target, "newdir") + "/") # Note trailing slash
486
+ assert fs.isdir(fs_join(target, "newdir"))
487
+ assert fs.isfile(fs_join(target, "newdir", "file1"))
488
+ assert fs.isfile(fs_join(target, "newdir", "file2"))
489
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
490
+
491
+ def test_copy_two_files_new_directory(
492
+ self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
493
+ ):
494
+ # This is a duplicate of test_copy_list_of_files_to_new_directory and
495
+ # can eventually be removed.
496
+ source = fs_bulk_operations_scenario_0
497
+
498
+ target = fs_target
499
+ assert not fs.exists(target)
500
+ fs.cp([fs_join(source, "file1"), fs_join(source, "file2")], target)
501
+
502
+ assert fs.isdir(target)
503
+ assert fs.isfile(fs_join(target, "file1"))
504
+ assert fs.isfile(fs_join(target, "file2"))
505
+
506
+ def test_copy_directory_without_files_with_same_name_prefix(
507
+ self,
508
+ fs,
509
+ fs_join,
510
+ fs_target,
511
+ fs_dir_and_file_with_same_name_prefix,
512
+ supports_empty_directories,
513
+ ):
514
+ # Create the test dirs
515
+ source = fs_dir_and_file_with_same_name_prefix
516
+ target = fs_target
517
+
518
+ # Test without glob
519
+ fs.cp(fs_join(source, "subdir"), target, recursive=True)
520
+
521
+ assert fs.isfile(fs_join(target, "subfile.txt"))
522
+ assert not fs.isfile(fs_join(target, "subdir.txt"))
523
+
524
+ fs.rm([fs_join(target, "subfile.txt")])
525
+ if supports_empty_directories:
526
+ assert fs.ls(target) == []
527
+ else:
528
+ assert not fs.exists(target)
529
+
530
+ # Test with glob
531
+ fs.cp(fs_join(source, "subdir*"), target, recursive=True)
532
+
533
+ assert fs.isdir(fs_join(target, "subdir"))
534
+ assert fs.isfile(fs_join(target, "subdir", "subfile.txt"))
535
+ assert fs.isfile(fs_join(target, "subdir.txt"))
536
+
537
+ def test_copy_with_source_and_destination_as_list(
538
+ self, fs, fs_target, fs_join, fs_10_files_with_hashed_names
539
+ ):
540
+ # Create the test dir
541
+ source = fs_10_files_with_hashed_names
542
+ target = fs_target
543
+
544
+ # Create list of files for source and destination
545
+ source_files = []
546
+ destination_files = []
547
+ for i in range(10):
548
+ hashed_i = md5(str(i).encode("utf-8")).hexdigest()
549
+ source_files.append(fs_join(source, f"{hashed_i}.txt"))
550
+ destination_files.append(fs_join(target, f"{hashed_i}.txt"))
551
+
552
+ # Copy and assert order was kept
553
+ fs.copy(path1=source_files, path2=destination_files)
554
+
555
+ for i in range(10):
556
+ file_content = fs.cat(destination_files[i]).decode("utf-8")
557
+ assert file_content == str(i)
temp_venv/lib/python3.13/site-packages/fsspec/tests/abstract/get.py ADDED
@@ -0,0 +1,587 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from hashlib import md5
2
+ from itertools import product
3
+
4
+ import pytest
5
+
6
+ from fsspec.implementations.local import make_path_posix
7
+ from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
8
+
9
+
10
+ class AbstractGetTests:
11
+ def test_get_file_to_existing_directory(
12
+ self,
13
+ fs,
14
+ fs_join,
15
+ fs_bulk_operations_scenario_0,
16
+ local_fs,
17
+ local_join,
18
+ local_target,
19
+ ):
20
+ # Copy scenario 1a
21
+ source = fs_bulk_operations_scenario_0
22
+
23
+ target = local_target
24
+ local_fs.mkdir(target)
25
+ assert local_fs.isdir(target)
26
+
27
+ target_file2 = local_join(target, "file2")
28
+ target_subfile1 = local_join(target, "subfile1")
29
+
30
+ # Copy from source directory
31
+ fs.get(fs_join(source, "file2"), target)
32
+ assert local_fs.isfile(target_file2)
33
+
34
+ # Copy from sub directory
35
+ fs.get(fs_join(source, "subdir", "subfile1"), target)
36
+ assert local_fs.isfile(target_subfile1)
37
+
38
+ # Remove copied files
39
+ local_fs.rm([target_file2, target_subfile1])
40
+ assert not local_fs.exists(target_file2)
41
+ assert not local_fs.exists(target_subfile1)
42
+
43
+ # Repeat with trailing slash on target
44
+ fs.get(fs_join(source, "file2"), target + "/")
45
+ assert local_fs.isdir(target)
46
+ assert local_fs.isfile(target_file2)
47
+
48
+ fs.get(fs_join(source, "subdir", "subfile1"), target + "/")
49
+ assert local_fs.isfile(target_subfile1)
50
+
51
+ def test_get_file_to_new_directory(
52
+ self,
53
+ fs,
54
+ fs_join,
55
+ fs_bulk_operations_scenario_0,
56
+ local_fs,
57
+ local_join,
58
+ local_target,
59
+ ):
60
+ # Copy scenario 1b
61
+ source = fs_bulk_operations_scenario_0
62
+
63
+ target = local_target
64
+ local_fs.mkdir(target)
65
+
66
+ fs.get(
67
+ fs_join(source, "subdir", "subfile1"), local_join(target, "newdir/")
68
+ ) # Note trailing slash
69
+
70
+ assert local_fs.isdir(target)
71
+ assert local_fs.isdir(local_join(target, "newdir"))
72
+ assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
73
+
74
+ def test_get_file_to_file_in_existing_directory(
75
+ self,
76
+ fs,
77
+ fs_join,
78
+ fs_bulk_operations_scenario_0,
79
+ local_fs,
80
+ local_join,
81
+ local_target,
82
+ ):
83
+ # Copy scenario 1c
84
+ source = fs_bulk_operations_scenario_0
85
+
86
+ target = local_target
87
+ local_fs.mkdir(target)
88
+
89
+ fs.get(fs_join(source, "subdir", "subfile1"), local_join(target, "newfile"))
90
+ assert local_fs.isfile(local_join(target, "newfile"))
91
+
92
+ def test_get_file_to_file_in_new_directory(
93
+ self,
94
+ fs,
95
+ fs_join,
96
+ fs_bulk_operations_scenario_0,
97
+ local_fs,
98
+ local_join,
99
+ local_target,
100
+ ):
101
+ # Copy scenario 1d
102
+ source = fs_bulk_operations_scenario_0
103
+
104
+ target = local_target
105
+ local_fs.mkdir(target)
106
+
107
+ fs.get(
108
+ fs_join(source, "subdir", "subfile1"),
109
+ local_join(target, "newdir", "newfile"),
110
+ )
111
+ assert local_fs.isdir(local_join(target, "newdir"))
112
+ assert local_fs.isfile(local_join(target, "newdir", "newfile"))
113
+
114
+ def test_get_directory_to_existing_directory(
115
+ self,
116
+ fs,
117
+ fs_join,
118
+ fs_bulk_operations_scenario_0,
119
+ local_fs,
120
+ local_join,
121
+ local_target,
122
+ ):
123
+ # Copy scenario 1e
124
+ source = fs_bulk_operations_scenario_0
125
+
126
+ target = local_target
127
+ local_fs.mkdir(target)
128
+ assert local_fs.isdir(target)
129
+
130
+ for source_slash, target_slash in zip([False, True], [False, True]):
131
+ s = fs_join(source, "subdir")
132
+ if source_slash:
133
+ s += "/"
134
+ t = target + "/" if target_slash else target
135
+
136
+ # Without recursive does nothing
137
+ fs.get(s, t)
138
+ assert local_fs.ls(target) == []
139
+
140
+ # With recursive
141
+ fs.get(s, t, recursive=True)
142
+ if source_slash:
143
+ assert local_fs.isfile(local_join(target, "subfile1"))
144
+ assert local_fs.isfile(local_join(target, "subfile2"))
145
+ assert local_fs.isdir(local_join(target, "nesteddir"))
146
+ assert local_fs.isfile(local_join(target, "nesteddir", "nestedfile"))
147
+ assert not local_fs.exists(local_join(target, "subdir"))
148
+
149
+ local_fs.rm(
150
+ [
151
+ local_join(target, "subfile1"),
152
+ local_join(target, "subfile2"),
153
+ local_join(target, "nesteddir"),
154
+ ],
155
+ recursive=True,
156
+ )
157
+ else:
158
+ assert local_fs.isdir(local_join(target, "subdir"))
159
+ assert local_fs.isfile(local_join(target, "subdir", "subfile1"))
160
+ assert local_fs.isfile(local_join(target, "subdir", "subfile2"))
161
+ assert local_fs.isdir(local_join(target, "subdir", "nesteddir"))
162
+ assert local_fs.isfile(
163
+ local_join(target, "subdir", "nesteddir", "nestedfile")
164
+ )
165
+
166
+ local_fs.rm(local_join(target, "subdir"), recursive=True)
167
+ assert local_fs.ls(target) == []
168
+
169
+ # Limit recursive by maxdepth
170
+ fs.get(s, t, recursive=True, maxdepth=1)
171
+ if source_slash:
172
+ assert local_fs.isfile(local_join(target, "subfile1"))
173
+ assert local_fs.isfile(local_join(target, "subfile2"))
174
+ assert not local_fs.exists(local_join(target, "nesteddir"))
175
+ assert not local_fs.exists(local_join(target, "subdir"))
176
+
177
+ local_fs.rm(
178
+ [
179
+ local_join(target, "subfile1"),
180
+ local_join(target, "subfile2"),
181
+ ],
182
+ recursive=True,
183
+ )
184
+ else:
185
+ assert local_fs.isdir(local_join(target, "subdir"))
186
+ assert local_fs.isfile(local_join(target, "subdir", "subfile1"))
187
+ assert local_fs.isfile(local_join(target, "subdir", "subfile2"))
188
+ assert not local_fs.exists(local_join(target, "subdir", "nesteddir"))
189
+
190
+ local_fs.rm(local_join(target, "subdir"), recursive=True)
191
+ assert local_fs.ls(target) == []
192
+
193
+ def test_get_directory_to_new_directory(
194
+ self,
195
+ fs,
196
+ fs_join,
197
+ fs_bulk_operations_scenario_0,
198
+ local_fs,
199
+ local_join,
200
+ local_target,
201
+ ):
202
+ # Copy scenario 1f
203
+ source = fs_bulk_operations_scenario_0
204
+
205
+ target = local_target
206
+ local_fs.mkdir(target)
207
+
208
+ for source_slash, target_slash in zip([False, True], [False, True]):
209
+ s = fs_join(source, "subdir")
210
+ if source_slash:
211
+ s += "/"
212
+ t = local_join(target, "newdir")
213
+ if target_slash:
214
+ t += "/"
215
+
216
+ # Without recursive does nothing
217
+ fs.get(s, t)
218
+ assert local_fs.ls(target) == []
219
+
220
+ # With recursive
221
+ fs.get(s, t, recursive=True)
222
+ assert local_fs.isdir(local_join(target, "newdir"))
223
+ assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
224
+ assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
225
+ assert local_fs.isdir(local_join(target, "newdir", "nesteddir"))
226
+ assert local_fs.isfile(
227
+ local_join(target, "newdir", "nesteddir", "nestedfile")
228
+ )
229
+ assert not local_fs.exists(local_join(target, "subdir"))
230
+
231
+ local_fs.rm(local_join(target, "newdir"), recursive=True)
232
+ assert local_fs.ls(target) == []
233
+
234
+ # Limit recursive by maxdepth
235
+ fs.get(s, t, recursive=True, maxdepth=1)
236
+ assert local_fs.isdir(local_join(target, "newdir"))
237
+ assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
238
+ assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
239
+ assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
240
+ assert not local_fs.exists(local_join(target, "subdir"))
241
+
242
+ local_fs.rm(local_join(target, "newdir"), recursive=True)
243
+ assert not local_fs.exists(local_join(target, "newdir"))
244
+
245
+ def test_get_glob_to_existing_directory(
246
+ self,
247
+ fs,
248
+ fs_join,
249
+ fs_bulk_operations_scenario_0,
250
+ local_fs,
251
+ local_join,
252
+ local_target,
253
+ ):
254
+ # Copy scenario 1g
255
+ source = fs_bulk_operations_scenario_0
256
+
257
+ target = local_target
258
+ local_fs.mkdir(target)
259
+
260
+ for target_slash in [False, True]:
261
+ t = target + "/" if target_slash else target
262
+
263
+ # Without recursive
264
+ fs.get(fs_join(source, "subdir", "*"), t)
265
+ assert local_fs.isfile(local_join(target, "subfile1"))
266
+ assert local_fs.isfile(local_join(target, "subfile2"))
267
+ assert not local_fs.isdir(local_join(target, "nesteddir"))
268
+ assert not local_fs.exists(local_join(target, "nesteddir", "nestedfile"))
269
+ assert not local_fs.exists(local_join(target, "subdir"))
270
+
271
+ local_fs.rm(
272
+ [
273
+ local_join(target, "subfile1"),
274
+ local_join(target, "subfile2"),
275
+ ],
276
+ recursive=True,
277
+ )
278
+ assert local_fs.ls(target) == []
279
+
280
+ # With recursive
281
+ for glob, recursive in zip(["*", "**"], [True, False]):
282
+ fs.get(fs_join(source, "subdir", glob), t, recursive=recursive)
283
+ assert local_fs.isfile(local_join(target, "subfile1"))
284
+ assert local_fs.isfile(local_join(target, "subfile2"))
285
+ assert local_fs.isdir(local_join(target, "nesteddir"))
286
+ assert local_fs.isfile(local_join(target, "nesteddir", "nestedfile"))
287
+ assert not local_fs.exists(local_join(target, "subdir"))
288
+
289
+ local_fs.rm(
290
+ [
291
+ local_join(target, "subfile1"),
292
+ local_join(target, "subfile2"),
293
+ local_join(target, "nesteddir"),
294
+ ],
295
+ recursive=True,
296
+ )
297
+ assert local_fs.ls(target) == []
298
+
299
+ # Limit recursive by maxdepth
300
+ fs.get(
301
+ fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
302
+ )
303
+ assert local_fs.isfile(local_join(target, "subfile1"))
304
+ assert local_fs.isfile(local_join(target, "subfile2"))
305
+ assert not local_fs.exists(local_join(target, "nesteddir"))
306
+ assert not local_fs.exists(local_join(target, "subdir"))
307
+
308
+ local_fs.rm(
309
+ [
310
+ local_join(target, "subfile1"),
311
+ local_join(target, "subfile2"),
312
+ ],
313
+ recursive=True,
314
+ )
315
+ assert local_fs.ls(target) == []
316
+
317
+ def test_get_glob_to_new_directory(
318
+ self,
319
+ fs,
320
+ fs_join,
321
+ fs_bulk_operations_scenario_0,
322
+ local_fs,
323
+ local_join,
324
+ local_target,
325
+ ):
326
+ # Copy scenario 1h
327
+ source = fs_bulk_operations_scenario_0
328
+
329
+ target = local_target
330
+ local_fs.mkdir(target)
331
+
332
+ for target_slash in [False, True]:
333
+ t = fs_join(target, "newdir")
334
+ if target_slash:
335
+ t += "/"
336
+
337
+ # Without recursive
338
+ fs.get(fs_join(source, "subdir", "*"), t)
339
+ assert local_fs.isdir(local_join(target, "newdir"))
340
+ assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
341
+ assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
342
+ assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
343
+ assert not local_fs.exists(
344
+ local_join(target, "newdir", "nesteddir", "nestedfile")
345
+ )
346
+ assert not local_fs.exists(local_join(target, "subdir"))
347
+ assert not local_fs.exists(local_join(target, "newdir", "subdir"))
348
+
349
+ local_fs.rm(local_join(target, "newdir"), recursive=True)
350
+ assert local_fs.ls(target) == []
351
+
352
+ # With recursive
353
+ for glob, recursive in zip(["*", "**"], [True, False]):
354
+ fs.get(fs_join(source, "subdir", glob), t, recursive=recursive)
355
+ assert local_fs.isdir(local_join(target, "newdir"))
356
+ assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
357
+ assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
358
+ assert local_fs.isdir(local_join(target, "newdir", "nesteddir"))
359
+ assert local_fs.isfile(
360
+ local_join(target, "newdir", "nesteddir", "nestedfile")
361
+ )
362
+ assert not local_fs.exists(local_join(target, "subdir"))
363
+ assert not local_fs.exists(local_join(target, "newdir", "subdir"))
364
+
365
+ local_fs.rm(local_join(target, "newdir"), recursive=True)
366
+ assert not local_fs.exists(local_join(target, "newdir"))
367
+
368
+ # Limit recursive by maxdepth
369
+ fs.get(
370
+ fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
371
+ )
372
+ assert local_fs.isdir(local_join(target, "newdir"))
373
+ assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
374
+ assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
375
+ assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
376
+ assert not local_fs.exists(local_join(target, "subdir"))
377
+ assert not local_fs.exists(local_join(target, "newdir", "subdir"))
378
+
379
+ local_fs.rm(local_fs.ls(target, detail=False), recursive=True)
380
+ assert not local_fs.exists(local_join(target, "newdir"))
381
+
382
+ @pytest.mark.parametrize(
383
+ GLOB_EDGE_CASES_TESTS["argnames"],
384
+ GLOB_EDGE_CASES_TESTS["argvalues"],
385
+ )
386
+ def test_get_glob_edge_cases(
387
+ self,
388
+ path,
389
+ recursive,
390
+ maxdepth,
391
+ expected,
392
+ fs,
393
+ fs_join,
394
+ fs_glob_edge_cases_files,
395
+ local_fs,
396
+ local_join,
397
+ local_target,
398
+ ):
399
+ # Copy scenario 1g
400
+ source = fs_glob_edge_cases_files
401
+
402
+ target = local_target
403
+
404
+ for new_dir, target_slash in product([True, False], [True, False]):
405
+ local_fs.mkdir(target)
406
+
407
+ t = local_join(target, "newdir") if new_dir else target
408
+ t = t + "/" if target_slash else t
409
+
410
+ fs.get(fs_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
411
+
412
+ output = local_fs.find(target)
413
+ if new_dir:
414
+ prefixed_expected = [
415
+ make_path_posix(local_join(target, "newdir", p)) for p in expected
416
+ ]
417
+ else:
418
+ prefixed_expected = [
419
+ make_path_posix(local_join(target, p)) for p in expected
420
+ ]
421
+ assert sorted(output) == sorted(prefixed_expected)
422
+
423
+ try:
424
+ local_fs.rm(target, recursive=True)
425
+ except FileNotFoundError:
426
+ pass
427
+
428
+ def test_get_list_of_files_to_existing_directory(
429
+ self,
430
+ fs,
431
+ fs_join,
432
+ fs_bulk_operations_scenario_0,
433
+ local_fs,
434
+ local_join,
435
+ local_target,
436
+ ):
437
+ # Copy scenario 2a
438
+ source = fs_bulk_operations_scenario_0
439
+
440
+ target = local_target
441
+ local_fs.mkdir(target)
442
+
443
+ source_files = [
444
+ fs_join(source, "file1"),
445
+ fs_join(source, "file2"),
446
+ fs_join(source, "subdir", "subfile1"),
447
+ ]
448
+
449
+ for target_slash in [False, True]:
450
+ t = target + "/" if target_slash else target
451
+
452
+ fs.get(source_files, t)
453
+ assert local_fs.isfile(local_join(target, "file1"))
454
+ assert local_fs.isfile(local_join(target, "file2"))
455
+ assert local_fs.isfile(local_join(target, "subfile1"))
456
+
457
+ local_fs.rm(
458
+ [
459
+ local_join(target, "file1"),
460
+ local_join(target, "file2"),
461
+ local_join(target, "subfile1"),
462
+ ],
463
+ recursive=True,
464
+ )
465
+ assert local_fs.ls(target) == []
466
+
467
+ def test_get_list_of_files_to_new_directory(
468
+ self,
469
+ fs,
470
+ fs_join,
471
+ fs_bulk_operations_scenario_0,
472
+ local_fs,
473
+ local_join,
474
+ local_target,
475
+ ):
476
+ # Copy scenario 2b
477
+ source = fs_bulk_operations_scenario_0
478
+
479
+ target = local_target
480
+ local_fs.mkdir(target)
481
+
482
+ source_files = [
483
+ fs_join(source, "file1"),
484
+ fs_join(source, "file2"),
485
+ fs_join(source, "subdir", "subfile1"),
486
+ ]
487
+
488
+ fs.get(source_files, local_join(target, "newdir") + "/") # Note trailing slash
489
+ assert local_fs.isdir(local_join(target, "newdir"))
490
+ assert local_fs.isfile(local_join(target, "newdir", "file1"))
491
+ assert local_fs.isfile(local_join(target, "newdir", "file2"))
492
+ assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
493
+
494
+ def test_get_directory_recursive(
495
+ self, fs, fs_join, fs_path, local_fs, local_join, local_target
496
+ ):
497
+ # https://github.com/fsspec/filesystem_spec/issues/1062
498
+ # Recursive cp/get/put of source directory into non-existent target directory.
499
+ src = fs_join(fs_path, "src")
500
+ src_file = fs_join(src, "file")
501
+ fs.mkdir(src)
502
+ fs.touch(src_file)
503
+
504
+ target = local_target
505
+
506
+ # get without slash
507
+ assert not local_fs.exists(target)
508
+ for loop in range(2):
509
+ fs.get(src, target, recursive=True)
510
+ assert local_fs.isdir(target)
511
+
512
+ if loop == 0:
513
+ assert local_fs.isfile(local_join(target, "file"))
514
+ assert not local_fs.exists(local_join(target, "src"))
515
+ else:
516
+ assert local_fs.isfile(local_join(target, "file"))
517
+ assert local_fs.isdir(local_join(target, "src"))
518
+ assert local_fs.isfile(local_join(target, "src", "file"))
519
+
520
+ local_fs.rm(target, recursive=True)
521
+
522
+ # get with slash
523
+ assert not local_fs.exists(target)
524
+ for loop in range(2):
525
+ fs.get(src + "/", target, recursive=True)
526
+ assert local_fs.isdir(target)
527
+ assert local_fs.isfile(local_join(target, "file"))
528
+ assert not local_fs.exists(local_join(target, "src"))
529
+
530
+ def test_get_directory_without_files_with_same_name_prefix(
531
+ self,
532
+ fs,
533
+ fs_join,
534
+ local_fs,
535
+ local_join,
536
+ local_target,
537
+ fs_dir_and_file_with_same_name_prefix,
538
+ ):
539
+ # Create the test dirs
540
+ source = fs_dir_and_file_with_same_name_prefix
541
+ target = local_target
542
+
543
+ # Test without glob
544
+ fs.get(fs_join(source, "subdir"), target, recursive=True)
545
+
546
+ assert local_fs.isfile(local_join(target, "subfile.txt"))
547
+ assert not local_fs.isfile(local_join(target, "subdir.txt"))
548
+
549
+ local_fs.rm([local_join(target, "subfile.txt")])
550
+ assert local_fs.ls(target) == []
551
+
552
+ # Test with glob
553
+ fs.get(fs_join(source, "subdir*"), target, recursive=True)
554
+
555
+ assert local_fs.isdir(local_join(target, "subdir"))
556
+ assert local_fs.isfile(local_join(target, "subdir", "subfile.txt"))
557
+ assert local_fs.isfile(local_join(target, "subdir.txt"))
558
+
559
+ def test_get_with_source_and_destination_as_list(
560
+ self,
561
+ fs,
562
+ fs_join,
563
+ local_fs,
564
+ local_join,
565
+ local_target,
566
+ fs_10_files_with_hashed_names,
567
+ ):
568
+ # Create the test dir
569
+ source = fs_10_files_with_hashed_names
570
+ target = local_target
571
+
572
+ # Create list of files for source and destination
573
+ source_files = []
574
+ destination_files = []
575
+ for i in range(10):
576
+ hashed_i = md5(str(i).encode("utf-8")).hexdigest()
577
+ source_files.append(fs_join(source, f"{hashed_i}.txt"))
578
+ destination_files.append(
579
+ make_path_posix(local_join(target, f"{hashed_i}.txt"))
580
+ )
581
+
582
+ # Copy and assert order was kept
583
+ fs.get(rpath=source_files, lpath=destination_files)
584
+
585
+ for i in range(10):
586
+ file_content = local_fs.cat(destination_files[i]).decode("utf-8")
587
+ assert file_content == str(i)
temp_venv/lib/python3.13/site-packages/fsspec/tests/abstract/mv.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import pytest
4
+
5
+ import fsspec
6
+
7
+
8
+ def test_move_raises_error_with_tmpdir(tmpdir):
9
+ # Create a file in the temporary directory
10
+ source = tmpdir.join("source_file.txt")
11
+ source.write("content")
12
+
13
+ # Define a destination that simulates a protected or invalid path
14
+ destination = tmpdir.join("non_existent_directory/destination_file.txt")
15
+
16
+ # Instantiate the filesystem (assuming the local file system interface)
17
+ fs = fsspec.filesystem("file")
18
+
19
+ # Use the actual file paths as string
20
+ with pytest.raises(FileNotFoundError):
21
+ fs.mv(str(source), str(destination))
22
+
23
+
24
+ @pytest.mark.parametrize("recursive", (True, False))
25
+ def test_move_raises_error_with_tmpdir_permission(recursive, tmpdir):
26
+ # Create a file in the temporary directory
27
+ source = tmpdir.join("source_file.txt")
28
+ source.write("content")
29
+
30
+ # Create a protected directory (non-writable)
31
+ protected_dir = tmpdir.mkdir("protected_directory")
32
+ protected_path = str(protected_dir)
33
+
34
+ # Set the directory to read-only
35
+ if os.name == "nt":
36
+ os.system(f'icacls "{protected_path}" /deny Everyone:(W)')
37
+ else:
38
+ os.chmod(protected_path, 0o555) # Sets the directory to read-only
39
+
40
+ # Define a destination inside the protected directory
41
+ destination = protected_dir.join("destination_file.txt")
42
+
43
+ # Instantiate the filesystem (assuming the local file system interface)
44
+ fs = fsspec.filesystem("file")
45
+
46
+ # Try to move the file to the read-only directory, expecting a permission error
47
+ with pytest.raises(PermissionError):
48
+ fs.mv(str(source), str(destination), recursive=recursive)
49
+
50
+ # Assert the file was not created in the destination
51
+ assert not os.path.exists(destination)
52
+
53
+ # Cleanup: Restore permissions so the directory can be cleaned up
54
+ if os.name == "nt":
55
+ os.system(f'icacls "{protected_path}" /remove:d Everyone')
56
+ else:
57
+ os.chmod(protected_path, 0o755) # Restore write permission for cleanup
temp_venv/lib/python3.13/site-packages/fsspec/tests/abstract/open.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+
3
+
4
+ class AbstractOpenTests:
5
+ def test_open_exclusive(self, fs, fs_target):
6
+ with fs.open(fs_target, "wb") as f:
7
+ f.write(b"data")
8
+ with fs.open(fs_target, "rb") as f:
9
+ assert f.read() == b"data"
10
+ with pytest.raises(FileExistsError):
11
+ fs.open(fs_target, "xb")