mehhl commited on May 19

Commit

09826d0

verified ·

1 Parent(s): 15eb7f7

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

temp_venv/lib/python3.13/site-packages/IPython/core/alias.py +268 -0
temp_venv/lib/python3.13/site-packages/IPython/core/async_helpers.py +155 -0
temp_venv/lib/python3.13/site-packages/IPython/core/completerlib.py +382 -0
temp_venv/lib/python3.13/site-packages/IPython/core/display_trap.py +74 -0
temp_venv/lib/python3.13/site-packages/IPython/core/error.py +60 -0
temp_venv/lib/python3.13/site-packages/IPython/core/historyapp.py +158 -0
temp_venv/lib/python3.13/site-packages/IPython/core/hooks.py +158 -0
temp_venv/lib/python3.13/site-packages/IPython/core/magic.py +760 -0
temp_venv/lib/python3.13/site-packages/IPython/core/prefilter.py +707 -0
temp_venv/lib/python3.13/site-packages/IPython/core/profiledir.py +244 -0
temp_venv/lib/python3.13/site-packages/IPython/core/release.py +45 -0
temp_venv/lib/python3.13/site-packages/charset_normalizer-3.4.2.dist-info/licenses/LICENSE +21 -0
temp_venv/lib/python3.13/site-packages/executing/__pycache__/__init__.cpython-313.pyc +0 -0
temp_venv/lib/python3.13/site-packages/executing/__pycache__/_exceptions.cpython-313.pyc +0 -0
temp_venv/lib/python3.13/site-packages/executing/__pycache__/_position_node_finder.cpython-313.pyc +0 -0
temp_venv/lib/python3.13/site-packages/executing/__pycache__/_pytest_utils.cpython-313.pyc +0 -0
temp_venv/lib/python3.13/site-packages/executing/__pycache__/executing.cpython-313.pyc +0 -0
temp_venv/lib/python3.13/site-packages/executing/__pycache__/version.cpython-313.pyc +0 -0
temp_venv/lib/python3.13/site-packages/fsspec-2025.3.2.dist-info/licenses/LICENSE +29 -0
temp_venv/lib/python3.13/site-packages/fsspec/implementations/__init__.py +0 -0
temp_venv/lib/python3.13/site-packages/fsspec/implementations/arrow.py +304 -0
temp_venv/lib/python3.13/site-packages/fsspec/implementations/asyn_wrapper.py +103 -0
temp_venv/lib/python3.13/site-packages/fsspec/implementations/cache_mapper.py +75 -0
temp_venv/lib/python3.13/site-packages/fsspec/implementations/cache_metadata.py +232 -0
temp_venv/lib/python3.13/site-packages/fsspec/implementations/cached.py +941 -0
temp_venv/lib/python3.13/site-packages/fsspec/implementations/dask.py +152 -0
temp_venv/lib/python3.13/site-packages/fsspec/implementations/data.py +58 -0
temp_venv/lib/python3.13/site-packages/fsspec/implementations/dbfs.py +467 -0
temp_venv/lib/python3.13/site-packages/fsspec/implementations/dirfs.py +388 -0
temp_venv/lib/python3.13/site-packages/fsspec/implementations/ftp.py +395 -0
temp_venv/lib/python3.13/site-packages/fsspec/implementations/git.py +115 -0
temp_venv/lib/python3.13/site-packages/fsspec/implementations/github.py +267 -0
temp_venv/lib/python3.13/site-packages/fsspec/implementations/http.py +880 -0
temp_venv/lib/python3.13/site-packages/fsspec/implementations/http_sync.py +931 -0
temp_venv/lib/python3.13/site-packages/fsspec/implementations/jupyter.py +124 -0
temp_venv/lib/python3.13/site-packages/fsspec/implementations/libarchive.py +213 -0
temp_venv/lib/python3.13/site-packages/fsspec/implementations/local.py +477 -0
temp_venv/lib/python3.13/site-packages/fsspec/implementations/memory.py +312 -0
temp_venv/lib/python3.13/site-packages/fsspec/implementations/reference.py +1305 -0
temp_venv/lib/python3.13/site-packages/fsspec/implementations/sftp.py +180 -0
temp_venv/lib/python3.13/site-packages/fsspec/implementations/smb.py +416 -0
temp_venv/lib/python3.13/site-packages/fsspec/implementations/tar.py +124 -0
temp_venv/lib/python3.13/site-packages/fsspec/implementations/webhdfs.py +485 -0
temp_venv/lib/python3.13/site-packages/fsspec/implementations/zip.py +177 -0
temp_venv/lib/python3.13/site-packages/fsspec/tests/abstract/__init__.py +289 -0
temp_venv/lib/python3.13/site-packages/fsspec/tests/abstract/common.py +175 -0
temp_venv/lib/python3.13/site-packages/fsspec/tests/abstract/copy.py +557 -0
temp_venv/lib/python3.13/site-packages/fsspec/tests/abstract/get.py +587 -0
temp_venv/lib/python3.13/site-packages/fsspec/tests/abstract/mv.py +57 -0
temp_venv/lib/python3.13/site-packages/fsspec/tests/abstract/open.py +11 -0

temp_venv/lib/python3.13/site-packages/IPython/core/alias.py ADDED Viewed

	@@ -0,0 +1,268 @@

+# encoding: utf-8
+"""
+System command aliases.
+Authors:
+* Fernando Perez
+* Brian Granger
+"""
+#-----------------------------------------------------------------------------
+#  Copyright (C) 2008-2011  The IPython Development Team
+#
+#  Distributed under the terms of the BSD License.
+#
+#  The full license is in the file COPYING.txt, distributed with this software.
+#-----------------------------------------------------------------------------
+#-----------------------------------------------------------------------------
+# Imports
+#-----------------------------------------------------------------------------
+import os
+import re
+import sys
+from traitlets.config.configurable import Configurable
+from .error import UsageError
+from traitlets import List, Instance
+from logging import error
+import typing as t
+#-----------------------------------------------------------------------------
+# Utilities
+#-----------------------------------------------------------------------------
+# This is used as the pattern for calls to split_user_input.
+shell_line_split = re.compile(r'^(\s*)()(\S+)(.*$)')
+def default_aliases() -> t.List[t.Tuple[str, str]]:
+    """Return list of shell aliases to auto-define.
+    """
+    # Note: the aliases defined here should be safe to use on a kernel
+    # regardless of what frontend it is attached to.  Frontends that use a
+    # kernel in-process can define additional aliases that will only work in
+    # their case.  For example, things like 'less' or 'clear' that manipulate
+    # the terminal should NOT be declared here, as they will only work if the
+    # kernel is running inside a true terminal, and not over the network.
+    if os.name == 'posix':
+        default_aliases = [('mkdir', 'mkdir'), ('rmdir', 'rmdir'),
+                           ('mv', 'mv'), ('rm', 'rm'), ('cp', 'cp'),
+                           ('cat', 'cat'),
+                           ]
+        # Useful set of ls aliases.  The GNU and BSD options are a little
+        # different, so we make aliases that provide as similar as possible
+        # behavior in ipython, by passing the right flags for each platform
+        if sys.platform.startswith('linux'):
+            ls_aliases = [('ls', 'ls -F --color'),
+                          # long ls
+                          ('ll', 'ls -F -o --color'),
+                          # ls normal files only
+                          ('lf', 'ls -F -o --color %l | grep ^-'),
+                          # ls symbolic links
+                          ('lk', 'ls -F -o --color %l | grep ^l'),
+                          # directories or links to directories,
+                          ('ldir', 'ls -F -o --color %l | grep /$'),
+                          # things which are executable
+                          ('lx', 'ls -F -o --color %l | grep ^-..x'),
+                          ]
+        elif sys.platform.startswith('openbsd') or sys.platform.startswith('netbsd'):
+            # OpenBSD, NetBSD. The ls implementation on these platforms do not support
+            # the -G switch and lack the ability to use colorized output.
+            ls_aliases = [('ls', 'ls -F'),
+                          # long ls
+                          ('ll', 'ls -F -l'),
+                          # ls normal files only
+                          ('lf', 'ls -F -l %l | grep ^-'),
+                          # ls symbolic links
+                          ('lk', 'ls -F -l %l | grep ^l'),
+                          # directories or links to directories,
+                          ('ldir', 'ls -F -l %l | grep /$'),
+                          # things which are executable
+                          ('lx', 'ls -F -l %l | grep ^-..x'),
+                          ]
+        else:
+            # BSD, OSX, etc.
+            ls_aliases = [('ls', 'ls -F -G'),
+                          # long ls
+                          ('ll', 'ls -F -l -G'),
+                          # ls normal files only
+                          ('lf', 'ls -F -l -G %l | grep ^-'),
+                          # ls symbolic links
+                          ('lk', 'ls -F -l -G %l | grep ^l'),
+                          # directories or links to directories,
+                          ('ldir', 'ls -F -G -l %l | grep /$'),
+                          # things which are executable
+                          ('lx', 'ls -F -l -G %l | grep ^-..x'),
+                          ]
+        default_aliases = default_aliases + ls_aliases
+    elif os.name in ['nt', 'dos']:
+        default_aliases = [('ls', 'dir /on'),
+                           ('ddir', 'dir /ad /on'), ('ldir', 'dir /ad /on'),
+                           ('mkdir', 'mkdir'), ('rmdir', 'rmdir'),
+                           ('echo', 'echo'), ('ren', 'ren'), ('copy', 'copy'),
+                           ]
+    else:
+        default_aliases = []
+    return default_aliases
+class AliasError(Exception):
+    pass
+class InvalidAliasError(AliasError):
+    pass
+class Alias:
+    """Callable object storing the details of one alias.
+    Instances are registered as magic functions to allow use of aliases.
+    """
+    # Prepare blacklist
+    blacklist = {'cd','popd','pushd','dhist','alias','unalias'}
+    def __init__(self, shell, name, cmd):
+        self.shell = shell
+        self.name = name
+        self.cmd = cmd
+        self.__doc__ = "Alias for `!{}`".format(cmd)
+        self.nargs = self.validate()
+    def validate(self):
+        """Validate the alias, and return the number of arguments."""
+        if self.name in self.blacklist:
+            raise InvalidAliasError("The name %s can't be aliased "
+                                    "because it is a keyword or builtin." % self.name)
+        try:
+            caller = self.shell.magics_manager.magics['line'][self.name]
+        except KeyError:
+            pass
+        else:
+            if not isinstance(caller, Alias):
+                raise InvalidAliasError("The name %s can't be aliased "
+                                        "because it is another magic command." % self.name)
+        if not (isinstance(self.cmd, str)):
+            raise InvalidAliasError("An alias command must be a string, "
+                                    "got: %r" % self.cmd)
+        nargs = self.cmd.count('%s') - self.cmd.count('%%s')
+        if (nargs > 0) and (self.cmd.find('%l') >= 0):
+            raise InvalidAliasError('The %s and %l specifiers are mutually '
+                                    'exclusive in alias definitions.')
+        return nargs
+    def __repr__(self):
+        return "<alias {} for {!r}>".format(self.name, self.cmd)
+    def __call__(self, rest=''):
+        cmd = self.cmd
+        nargs = self.nargs
+        # Expand the %l special to be the user's input line
+        if cmd.find('%l') >= 0:
+            cmd = cmd.replace('%l', rest)
+            rest = ''
+        if nargs==0:
+            if cmd.find('%%s') >= 1:
+                cmd = cmd.replace('%%s', '%s')
+            # Simple, argument-less aliases
+            cmd = '%s %s' % (cmd, rest)
+        else:
+            # Handle aliases with positional arguments
+            args = rest.split(None, nargs)
+            if len(args) < nargs:
+                raise UsageError('Alias <%s> requires %s arguments, %s given.' %
+                      (self.name, nargs, len(args)))
+            cmd = '%s %s' % (cmd % tuple(args[:nargs]),' '.join(args[nargs:]))
+        self.shell.system(cmd)
+#-----------------------------------------------------------------------------
+# Main AliasManager class
+#-----------------------------------------------------------------------------
+class AliasManager(Configurable):
+    default_aliases: List = List(default_aliases()).tag(config=True)
+    user_aliases: List = List(default_value=[]).tag(config=True)
+    shell = Instance(
+        "IPython.core.interactiveshell.InteractiveShellABC", allow_none=True
+    )
+    def __init__(self, shell=None, **kwargs):
+        super(AliasManager, self).__init__(shell=shell, **kwargs)
+        # For convenient access
+        if self.shell is not None:
+            self.linemagics = self.shell.magics_manager.magics["line"]
+            self.init_aliases()
+    def init_aliases(self):
+        # Load default & user aliases
+        for name, cmd in self.default_aliases + self.user_aliases:
+            if (
+                cmd.startswith("ls ")
+                and self.shell is not None
+                and self.shell.colors == "nocolor"
+            ):
+                cmd = cmd.replace(" --color", "")
+            self.soft_define_alias(name, cmd)
+    @property
+    def aliases(self):
+        return [(n, func.cmd) for (n, func) in self.linemagics.items()
+                            if isinstance(func, Alias)]
+    def soft_define_alias(self, name, cmd):
+        """Define an alias, but don't raise on an AliasError."""
+        try:
+            self.define_alias(name, cmd)
+        except AliasError as e:
+            error("Invalid alias: %s" % e)
+    def define_alias(self, name, cmd):
+        """Define a new alias after validating it.
+        This will raise an :exc:`AliasError` if there are validation
+        problems.
+        """
+        caller = Alias(shell=self.shell, name=name, cmd=cmd)
+        self.shell.magics_manager.register_function(caller, magic_kind='line',
+                                                    magic_name=name)
+    def get_alias(self, name):
+        """Return an alias, or None if no alias by that name exists."""
+        aname = self.linemagics.get(name, None)
+        return aname if isinstance(aname, Alias) else None
+    def is_alias(self, name):
+        """Return whether or not a given name has been defined as an alias"""
+        return self.get_alias(name) is not None
+    def undefine_alias(self, name):
+        if self.is_alias(name):
+            del self.linemagics[name]
+        else:
+            raise ValueError('%s is not an alias' % name)
+    def clear_aliases(self):
+        for name, _ in self.aliases:
+            self.undefine_alias(name)
+    def retrieve_alias(self, name):
+        """Retrieve the command to which an alias expands."""
+        caller = self.get_alias(name)
+        if caller:
+            return caller.cmd
+        else:
+            raise ValueError('%s is not an alias' % name)

temp_venv/lib/python3.13/site-packages/IPython/core/async_helpers.py ADDED Viewed

	@@ -0,0 +1,155 @@

+"""
+Async helper function that are invalid syntax on Python 3.5 and below.
+This code is best effort, and may have edge cases not behaving as expected. In
+particular it contain a number of heuristics to detect whether code is
+effectively async and need to run in an event loop or not.
+Some constructs (like top-level `return`, or `yield`) are taken care of
+explicitly to actually raise a SyntaxError and stay as close as possible to
+Python semantics.
+"""
+import ast
+import asyncio
+import inspect
+from functools import wraps
+_asyncio_event_loop = None
+def get_asyncio_loop():
+    """asyncio has deprecated get_event_loop
+    Replicate it here, with our desired semantics:
+    - always returns a valid, not-closed loop
+    - not thread-local like asyncio's,
+      because we only want one loop for IPython
+    - if called from inside a coroutine (e.g. in ipykernel),
+      return the running loop
+    .. versionadded:: 8.0
+    """
+    try:
+        return asyncio.get_running_loop()
+    except RuntimeError:
+        # not inside a coroutine,
+        # track our own global
+        pass
+    # not thread-local like asyncio's,
+    # because we only track one event loop to run for IPython itself,
+    # always in the main thread.
+    global _asyncio_event_loop
+    if _asyncio_event_loop is None or _asyncio_event_loop.is_closed():
+        _asyncio_event_loop = asyncio.new_event_loop()
+    return _asyncio_event_loop
+class _AsyncIORunner:
+    def __call__(self, coro):
+        """
+        Handler for asyncio autoawait
+        """
+        return get_asyncio_loop().run_until_complete(coro)
+    def __str__(self):
+        return "asyncio"
+_asyncio_runner = _AsyncIORunner()
+class _AsyncIOProxy:
+    """Proxy-object for an asyncio
+    Any coroutine methods will be wrapped in event_loop.run_
+    """
+    def __init__(self, obj, event_loop):
+        self._obj = obj
+        self._event_loop = event_loop
+    def __repr__(self):
+        return f"<_AsyncIOProxy({self._obj!r})>"
+    def __getattr__(self, key):
+        attr = getattr(self._obj, key)
+        if inspect.iscoroutinefunction(attr):
+            # if it's a coroutine method,
+            # return a threadsafe wrapper onto the _current_ asyncio loop
+            @wraps(attr)
+            def _wrapped(*args, **kwargs):
+                concurrent_future = asyncio.run_coroutine_threadsafe(
+                    attr(*args, **kwargs), self._event_loop
+                )
+                return asyncio.wrap_future(concurrent_future)
+            return _wrapped
+        else:
+            return attr
+    def __dir__(self):
+        return dir(self._obj)
+def _curio_runner(coroutine):
+    """
+    handler for curio autoawait
+    """
+    import curio
+    return curio.run(coroutine)
+def _trio_runner(async_fn):
+    import trio
+    async def loc(coro):
+        """
+        We need the dummy no-op async def to protect from
+        trio's internal. See https://github.com/python-trio/trio/issues/89
+        """
+        return await coro
+    return trio.run(loc, async_fn)
+def _pseudo_sync_runner(coro):
+    """
+    A runner that does not really allow async execution, and just advance the coroutine.
+    See discussion in https://github.com/python-trio/trio/issues/608,
+    Credit to Nathaniel Smith
+    """
+    try:
+        coro.send(None)
+    except StopIteration as exc:
+        return exc.value
+    else:
+        # TODO: do not raise but return an execution result with the right info.
+        raise RuntimeError(
+            "{coro_name!r} needs a real async loop".format(coro_name=coro.__name__)
+        )
+def _should_be_async(cell: str) -> bool:
+    """Detect if a block of code need to be wrapped in an `async def`
+    Attempt to parse the block of code, it it compile we're fine.
+    Otherwise we  wrap if and try to compile.
+    If it works, assume it should be async. Otherwise Return False.
+    Not handled yet: If the block of code has a return statement as the top
+    level, it will be seen as async. This is a know limitation.
+    """
+    try:
+        code = compile(
+            cell, "<>", "exec", flags=getattr(ast, "PyCF_ALLOW_TOP_LEVEL_AWAIT", 0x0)
+        )
+        return inspect.CO_COROUTINE & code.co_flags == inspect.CO_COROUTINE
+    except (SyntaxError, MemoryError):
+        return False

temp_venv/lib/python3.13/site-packages/IPython/core/completerlib.py ADDED Viewed

	@@ -0,0 +1,382 @@

+# encoding: utf-8
+"""Implementations for various useful completers.
+These are all loaded by default by IPython.
+"""
+#-----------------------------------------------------------------------------
+#  Copyright (C) 2010-2011 The IPython Development Team.
+#
+#  Distributed under the terms of the BSD License.
+#
+#  The full license is in the file COPYING.txt, distributed with this software.
+#-----------------------------------------------------------------------------
+#-----------------------------------------------------------------------------
+# Imports
+#-----------------------------------------------------------------------------
+# Stdlib imports
+import glob
+import inspect
+import os
+import re
+import sys
+from importlib import import_module
+from importlib.machinery import all_suffixes
+# Third-party imports
+from time import time
+from zipimport import zipimporter
+# Our own imports
+from .completer import expand_user, compress_user
+from .error import TryNext
+from ..utils._process_common import arg_split
+# FIXME: this should be pulled in with the right call via the component system
+from IPython import get_ipython
+from typing import List
+#-----------------------------------------------------------------------------
+# Globals and constants
+#-----------------------------------------------------------------------------
+_suffixes = all_suffixes()
+# Time in seconds after which the rootmodules will be stored permanently in the
+# ipython ip.db database (kept in the user's .ipython dir).
+TIMEOUT_STORAGE = 2
+# Time in seconds after which we give up
+TIMEOUT_GIVEUP = 20
+# Regular expression for the python import statement
+import_re = re.compile(r'(?P<name>[^\W\d]\w*?)'
+                       r'(?P<package>[/\\]__init__)?'
+                       r'(?P<suffix>%s)$' %
+                       r'|'.join(re.escape(s) for s in _suffixes))
+# RE for the ipython %run command (python + ipython scripts)
+magic_run_re = re.compile(r'.*(\.ipy|\.ipynb|\.py[w]?)$')
+#-----------------------------------------------------------------------------
+# Local utilities
+#-----------------------------------------------------------------------------
+def module_list(path: str) -> List[str]:
+    """
+    Return the list containing the names of the modules available in the given
+    folder.
+    """
+    # sys.path has the cwd as an empty string, but isdir/listdir need it as '.'
+    if path == '':
+        path = '.'
+    # A few local constants to be used in loops below
+    pjoin = os.path.join
+    if os.path.isdir(path):
+        # Build a list of all files in the directory and all files
+        # in its subdirectories. For performance reasons, do not
+        # recurse more than one level into subdirectories.
+        files: List[str] = []
+        for root, dirs, nondirs in os.walk(path, followlinks=True):
+            subdir = root[len(path)+1:]
+            if subdir:
+                files.extend(pjoin(subdir, f) for f in nondirs)
+                dirs[:] = [] # Do not recurse into additional subdirectories.
+            else:
+                files.extend(nondirs)
+    else:
+        try:
+            files = list(zipimporter(path)._files.keys())  # type: ignore
+        except Exception:
+            files = []
+    # Build a list of modules which match the import_re regex.
+    modules = []
+    for f in files:
+        m = import_re.match(f)
+        if m:
+            modules.append(m.group('name'))
+    return list(set(modules))
+def get_root_modules():
+    """
+    Returns a list containing the names of all the modules available in the
+    folders of the pythonpath.
+    ip.db['rootmodules_cache'] maps sys.path entries to list of modules.
+    """
+    ip = get_ipython()
+    if ip is None:
+        # No global shell instance to store cached list of modules.
+        # Don't try to scan for modules every time.
+        return list(sys.builtin_module_names)
+    if getattr(ip.db, "_mock", False):
+        rootmodules_cache = {}
+    else:
+        rootmodules_cache = ip.db.get("rootmodules_cache", {})
+    rootmodules = list(sys.builtin_module_names)
+    start_time = time()
+    store = False
+    for path in sys.path:
+        try:
+            modules = rootmodules_cache[path]
+        except KeyError:
+            modules = module_list(path)
+            try:
+                modules.remove('__init__')
+            except ValueError:
+                pass
+            if path not in ('', '.'): # cwd modules should not be cached
+                rootmodules_cache[path] = modules
+            if time() - start_time > TIMEOUT_STORAGE and not store:
+                store = True
+                print("\nCaching the list of root modules, please wait!")
+                print("(This will only be done once - type '%rehashx' to "
+                      "reset cache!)\n")
+                sys.stdout.flush()
+            if time() - start_time > TIMEOUT_GIVEUP:
+                print("This is taking too long, we give up.\n")
+                return []
+        rootmodules.extend(modules)
+    if store:
+        ip.db['rootmodules_cache'] = rootmodules_cache
+    rootmodules = list(set(rootmodules))
+    return rootmodules
+def is_importable(module, attr: str, only_modules) -> bool:
+    if only_modules:
+        try:
+            mod = getattr(module, attr)
+        except ModuleNotFoundError:
+            # See gh-14434
+            return False
+        return inspect.ismodule(mod)
+    else:
+        return not(attr[:2] == '__' and attr[-2:] == '__')
+def is_possible_submodule(module, attr):
+    try:
+        obj = getattr(module, attr)
+    except AttributeError:
+        # Is possibly an unimported submodule
+        return True
+    except TypeError:
+        # https://github.com/ipython/ipython/issues/9678
+        return False
+    return inspect.ismodule(obj)
+def try_import(mod: str, only_modules=False) -> List[str]:
+    """
+    Try to import given module and return list of potential completions.
+    """
+    mod = mod.rstrip('.')
+    try:
+        m = import_module(mod)
+    except:
+        return []
+    m_is_init = '__init__' in (getattr(m, '__file__', '') or '')
+    completions = []
+    if (not hasattr(m, '__file__')) or (not only_modules) or m_is_init:
+        completions.extend( [attr for attr in dir(m) if
+                             is_importable(m, attr, only_modules)])
+    m_all = getattr(m, "__all__", [])
+    if only_modules:
+        completions.extend(attr for attr in m_all if is_possible_submodule(m, attr))
+    else:
+        completions.extend(m_all)
+    if m_is_init:
+        file_ = m.__file__
+        file_path = os.path.dirname(file_)  # type: ignore
+        if file_path is not None:
+            completions.extend(module_list(file_path))
+    completions_set = {c for c in completions if isinstance(c, str)}
+    completions_set.discard('__init__')
+    return list(completions_set)
+#-----------------------------------------------------------------------------
+# Completion-related functions.
+#-----------------------------------------------------------------------------
+def quick_completer(cmd, completions):
+    r""" Easily create a trivial completer for a command.
+    Takes either a list of completions, or all completions in string (that will
+    be split on whitespace).
+    Example::
+        [d:\ipython]|1> import ipy_completers
+        [d:\ipython]|2> ipy_completers.quick_completer('foo', ['bar','baz'])
+        [d:\ipython]|3> foo b<TAB>
+        bar baz
+        [d:\ipython]|3> foo ba
+    """
+    if isinstance(completions, str):
+        completions = completions.split()
+    def do_complete(self, event):
+        return completions
+    get_ipython().set_hook('complete_command',do_complete, str_key = cmd)
+def module_completion(line):
+    """
+    Returns a list containing the completion possibilities for an import line.
+    The line looks like this :
+    'import xml.d'
+    'from xml.dom import'
+    """
+    words = line.split(' ')
+    nwords = len(words)
+    # from whatever <tab> -> 'import '
+    if nwords == 3 and words[0] == 'from':
+        return ['import ']
+    # 'from xy<tab>' or 'import xy<tab>'
+    if nwords < 3 and (words[0] in {'%aimport', 'import', 'from'}) :
+        if nwords == 1:
+            return get_root_modules()
+        mod = words[1].split('.')
+        if len(mod) < 2:
+            return get_root_modules()
+        completion_list = try_import('.'.join(mod[:-1]), True)
+        return ['.'.join(mod[:-1] + [el]) for el in completion_list]
+    # 'from xyz import abc<tab>'
+    if nwords >= 3 and words[0] == 'from':
+        mod = words[1]
+        return try_import(mod)
+#-----------------------------------------------------------------------------
+# Completers
+#-----------------------------------------------------------------------------
+# These all have the func(self, event) signature to be used as custom
+# completers
+def module_completer(self,event):
+    """Give completions after user has typed 'import ...' or 'from ...'"""
+    # This works in all versions of python.  While 2.5 has
+    # pkgutil.walk_packages(), that particular routine is fairly dangerous,
+    # since it imports *EVERYTHING* on sys.path.  That is: a) very slow b) full
+    # of possibly problematic side effects.
+    # This search the folders in the sys.path for available modules.
+    return module_completion(event.line)
+# FIXME: there's a lot of logic common to the run, cd and builtin file
+# completers, that is currently reimplemented in each.
+def magic_run_completer(self, event):
+    """Complete files that end in .py or .ipy or .ipynb for the %run command.
+    """
+    comps = arg_split(event.line, strict=False)
+    # relpath should be the current token that we need to complete.
+    if (len(comps) > 1) and (not event.line.endswith(' ')):
+        relpath = comps[-1].strip("'\"")
+    else:
+        relpath = ''
+    #print("\nev=", event)  # dbg
+    #print("rp=", relpath)  # dbg
+    #print('comps=', comps)  # dbg
+    lglob = glob.glob
+    isdir = os.path.isdir
+    relpath, tilde_expand, tilde_val = expand_user(relpath)
+    # Find if the user has already typed the first filename, after which we
+    # should complete on all files, since after the first one other files may
+    # be arguments to the input script.
+    if any(magic_run_re.match(c) for c in comps):
+        matches =  [f.replace('\\','/') + ('/' if isdir(f) else '')
+                            for f in lglob(relpath+'*')]
+    else:
+        dirs = [f.replace('\\','/') + "/" for f in lglob(relpath+'*') if isdir(f)]
+        pys =  [f.replace('\\','/')
+                for f in lglob(relpath+'*.py') + lglob(relpath+'*.ipy') +
+                lglob(relpath+'*.ipynb') + lglob(relpath + '*.pyw')]
+        matches = dirs + pys
+    #print('run comp:', dirs+pys) # dbg
+    return [compress_user(p, tilde_expand, tilde_val) for p in matches]
+def cd_completer(self, event):
+    """Completer function for cd, which only returns directories."""
+    ip = get_ipython()
+    relpath = event.symbol
+    #print(event) # dbg
+    if event.line.endswith('-b') or ' -b ' in event.line:
+        # return only bookmark completions
+        bkms = self.db.get('bookmarks', None)
+        if bkms:
+            return bkms.keys()
+        else:
+            return []
+    if event.symbol == '-':
+        width_dh = str(len(str(len(ip.user_ns['_dh']) + 1)))
+        # jump in directory history by number
+        fmt = '-%0' + width_dh +'d [%s]'
+        ents = [ fmt % (i,s) for i,s in enumerate(ip.user_ns['_dh'])]
+        if len(ents) > 1:
+            return ents
+        return []
+    if event.symbol.startswith('--'):
+        return ["--" + os.path.basename(d) for d in ip.user_ns['_dh']]
+    # Expand ~ in path and normalize directory separators.
+    relpath, tilde_expand, tilde_val = expand_user(relpath)
+    relpath = relpath.replace('\\','/')
+    found = []
+    for d in [f.replace('\\','/') + '/' for f in glob.glob(relpath+'*')
+              if os.path.isdir(f)]:
+        if ' ' in d:
+            # we don't want to deal with any of that, complex code
+            # for this is elsewhere
+            raise TryNext
+        found.append(d)
+    if not found:
+        if os.path.isdir(relpath):
+            return [compress_user(relpath, tilde_expand, tilde_val)]
+        # if no completions so far, try bookmarks
+        bks = self.db.get('bookmarks',{})
+        bkmatches = [s for s in bks if s.startswith(event.symbol)]
+        if bkmatches:
+            return bkmatches
+        raise TryNext
+    return [compress_user(p, tilde_expand, tilde_val) for p in found]
+def reset_completer(self, event):
+    "A completer for %reset magic"
+    return '-f -s in out array dhist'.split()

temp_venv/lib/python3.13/site-packages/IPython/core/display_trap.py ADDED Viewed

	@@ -0,0 +1,74 @@

+# encoding: utf-8
+"""
+A context manager for handling sys.displayhook.
+Authors:
+* Robert Kern
+* Brian Granger
+"""
+#-----------------------------------------------------------------------------
+#  Copyright (C) 2008-2011  The IPython Development Team
+#
+#  Distributed under the terms of the BSD License.  The full license is in
+#  the file COPYING, distributed as part of this software.
+#-----------------------------------------------------------------------------
+#-----------------------------------------------------------------------------
+# Imports
+#-----------------------------------------------------------------------------
+import sys
+from traitlets.config.configurable import Configurable
+from traitlets import Any
+#-----------------------------------------------------------------------------
+# Classes and functions
+#-----------------------------------------------------------------------------
+class DisplayTrap(Configurable):
+    """Object to manage sys.displayhook.
+    This came from IPython.core.kernel.display_hook, but is simplified
+    (no callbacks or formatters) until more of the core is refactored.
+    """
+    hook = Any()
+    def __init__(self, hook=None):
+        super(DisplayTrap, self).__init__(hook=hook, config=None)
+        self.old_hook = None
+        # We define this to track if a single BuiltinTrap is nested.
+        # Only turn off the trap when the outermost call to __exit__ is made.
+        self._nested_level = 0
+    def __enter__(self):
+        if self._nested_level == 0:
+            self.set()
+        self._nested_level += 1
+        return self
+    def __exit__(self, type, value, traceback):
+        if self._nested_level == 1:
+            self.unset()
+        self._nested_level -= 1
+        # Returning False will cause exceptions to propagate
+        return False
+    @property
+    def is_active(self) -> bool:
+        return self._nested_level != 0
+    def set(self):
+        """Set the hook."""
+        if sys.displayhook is not self.hook:
+            self.old_hook = sys.displayhook
+            sys.displayhook = self.hook
+    def unset(self):
+        """Unset the hook."""
+        sys.displayhook = self.old_hook

temp_venv/lib/python3.13/site-packages/IPython/core/error.py ADDED Viewed

	@@ -0,0 +1,60 @@

+# encoding: utf-8
+"""
+Global exception classes for IPython.core.
+Authors:
+* Brian Granger
+* Fernando Perez
+* Min Ragan-Kelley
+Notes
+-----
+"""
+#-----------------------------------------------------------------------------
+#  Copyright (C) 2008 The IPython Development Team
+#
+#  Distributed under the terms of the BSD License.  The full license is in
+#  the file COPYING, distributed as part of this software.
+#-----------------------------------------------------------------------------
+#-----------------------------------------------------------------------------
+# Imports
+#-----------------------------------------------------------------------------
+#-----------------------------------------------------------------------------
+# Exception classes
+#-----------------------------------------------------------------------------
+class IPythonCoreError(Exception):
+    pass
+class TryNext(IPythonCoreError):
+    """Try next hook exception.
+    Raise this in your hook function to indicate that the next hook handler
+    should be used to handle the operation.
+    """
+class UsageError(IPythonCoreError):
+    """Error in magic function arguments, etc.
+    Something that probably won't warrant a full traceback, but should
+    nevertheless interrupt a macro / batch file.
+    """
+class StdinNotImplementedError(IPythonCoreError, NotImplementedError):
+    """raw_input was requested in a context where it is not supported
+    For use in IPython kernels, where only some frontends may support
+    stdin requests.
+    """
+class InputRejected(Exception):
+    """Input rejected by ast transformer.
+    Raise this in your NodeTransformer to indicate that InteractiveShell should
+    not execute the supplied input.
+    """

temp_venv/lib/python3.13/site-packages/IPython/core/historyapp.py ADDED Viewed

	@@ -0,0 +1,158 @@

+# encoding: utf-8
+"""
+An application for managing IPython history.
+To be invoked as the `ipython history` subcommand.
+"""
+import sqlite3
+from pathlib import Path
+from traitlets.config.application import Application
+from .application import BaseIPythonApplication
+from traitlets import Bool, Int, Dict
+from ..utils.io import ask_yes_no
+trim_hist_help = """Trim the IPython history database to the last 1000 entries.
+This actually copies the last 1000 entries to a new database, and then replaces
+the old file with the new. Use the `--keep=` argument to specify a number
+other than 1000.
+"""
+clear_hist_help = """Clear the IPython history database, deleting all entries.
+Because this is a destructive operation, IPython will prompt the user if they
+really want to do this. Passing a `-f` flag will force clearing without a
+prompt.
+This is an handy alias to `ipython history trim --keep=0`
+"""
+class HistoryTrim(BaseIPythonApplication):
+    description = trim_hist_help
+    backup = Bool(False, help="Keep the old history file as history.sqlite.<N>").tag(
+        config=True
+    )
+    keep = Int(1000, help="Number of recent lines to keep in the database.").tag(
+        config=True
+    )
+    flags = Dict(  # type: ignore
+        dict(backup=({"HistoryTrim": {"backup": True}}, backup.help))
+    )
+    aliases = Dict(dict(keep="HistoryTrim.keep"))  # type: ignore
+    def start(self):
+        profile_dir = Path(self.profile_dir.location)
+        hist_file = profile_dir / "history.sqlite"
+        con = sqlite3.connect(hist_file)
+        # Grab the recent history from the current database.
+        inputs = list(con.execute('SELECT session, line, source, source_raw FROM '
+                                'history ORDER BY session DESC, line DESC LIMIT ?', (self.keep+1,)))
+        if len(inputs) <= self.keep:
+            print("There are already at most %d entries in the history database." % self.keep)
+            print("Not doing anything. Use --keep= argument to keep fewer entries")
+            return
+        print("Trimming history to the most recent %d entries." % self.keep)
+        inputs.pop() # Remove the extra element we got to check the length.
+        inputs.reverse()
+        if inputs:
+            first_session = inputs[0][0]
+            outputs = list(con.execute('SELECT session, line, output FROM '
+                                       'output_history WHERE session >= ?', (first_session,)))
+            sessions = list(con.execute('SELECT session, start, end, num_cmds, remark FROM '
+                                        'sessions WHERE session >= ?', (first_session,)))
+        con.close()
+        # Create the new history database.
+        new_hist_file = profile_dir / "history.sqlite.new"
+        i = 0
+        while new_hist_file.exists():
+            # Make sure we don't interfere with an existing file.
+            i += 1
+            new_hist_file = profile_dir / ("history.sqlite.new" + str(i))
+        new_db = sqlite3.connect(new_hist_file)
+        new_db.execute("""CREATE TABLE IF NOT EXISTS sessions (session integer
+                            primary key autoincrement, start timestamp,
+                            end timestamp, num_cmds integer, remark text)""")
+        new_db.execute("""CREATE TABLE IF NOT EXISTS history
+                        (session integer, line integer, source text, source_raw text,
+                        PRIMARY KEY (session, line))""")
+        new_db.execute("""CREATE TABLE IF NOT EXISTS output_history
+                        (session integer, line integer, output text,
+                        PRIMARY KEY (session, line))""")
+        new_db.commit()
+        if inputs:
+            with new_db:
+                # Add the recent history into the new database.
+                new_db.executemany('insert into sessions values (?,?,?,?,?)', sessions)
+                new_db.executemany('insert into history values (?,?,?,?)', inputs)
+                new_db.executemany('insert into output_history values (?,?,?)', outputs)
+        new_db.close()
+        if self.backup:
+            i = 1
+            backup_hist_file = profile_dir / ("history.sqlite.old.%d" % i)
+            while backup_hist_file.exists():
+                i += 1
+                backup_hist_file = profile_dir / ("history.sqlite.old.%d" % i)
+            hist_file.rename(backup_hist_file)
+            print("Backed up longer history file to", backup_hist_file)
+        else:
+            hist_file.unlink()
+        new_hist_file.rename(hist_file)
+class HistoryClear(HistoryTrim):
+    description = clear_hist_help
+    keep = Int(0, help="Number of recent lines to keep in the database.")
+    force = Bool(False, help="Don't prompt user for confirmation").tag(config=True)
+    flags = Dict(  # type: ignore
+        dict(
+            force=({"HistoryClear": {"force": True}}, force.help),
+            f=({"HistoryTrim": {"force": True}}, force.help),
+        )
+    )
+    aliases = Dict()  # type: ignore
+    def start(self):
+        if self.force or ask_yes_no(
+            "Really delete all ipython history? ", default="no", interrupt="no"
+        ):
+            HistoryTrim.start(self)
+class HistoryApp(Application):
+    name = "ipython-history"
+    description = "Manage the IPython history database."
+    subcommands = Dict(dict(
+        trim = (HistoryTrim, HistoryTrim.description.splitlines()[0]),
+        clear = (HistoryClear, HistoryClear.description.splitlines()[0]),
+    ))
+    def start(self):
+        if self.subapp is None:
+            print(
+                "No subcommand specified. Must specify one of: "
+                + ", ".join(map(repr, self.subcommands))
+                + ".\n"
+            )
+            self.print_description()
+            self.print_subcommands()
+            self.exit(1)
+        else:
+            return self.subapp.start()

temp_venv/lib/python3.13/site-packages/IPython/core/hooks.py ADDED Viewed

	@@ -0,0 +1,158 @@

+"""Hooks for IPython.
+In Python, it is possible to overwrite any method of any object if you really
+want to.  But IPython exposes a few 'hooks', methods which are *designed* to
+be overwritten by users for customization purposes.  This module defines the
+default versions of all such hooks, which get used by IPython if not
+overridden by the user.
+Hooks are simple functions, but they should be declared with ``self`` as their
+first argument, because when activated they are registered into IPython as
+instance methods. The self argument will be the IPython running instance
+itself, so hooks have full access to the entire IPython object.
+If you wish to define a new hook and activate it, you can make an :doc:`extension
+</config/extensions/index>` or a :ref:`startup script <startup_files>`. For
+example, you could use a startup file like this::
+    import os
+    def calljed(self,filename, linenum):
+        "My editor hook calls the jed editor directly."
+        print("Calling my own editor, jed ...")
+        if os.system('jed +%d %s' % (linenum,filename)) != 0:
+            raise TryNext()
+    def load_ipython_extension(ip):
+        ip.set_hook('editor', calljed)
+"""
+#*****************************************************************************
+#       Copyright (C) 2005 Fernando Perez. <[email protected]>
+#
+#  Distributed under the terms of the BSD License.  The full license is in
+#  the file COPYING, distributed as part of this software.
+#*****************************************************************************
+import os
+import subprocess
+import sys
+from .error import TryNext
+# List here all the default hooks.  For now it's just the editor functions
+# but over time we'll move here all the public API for user-accessible things.
+__all__ = [
+    "editor",
+    "synchronize_with_editor",
+    "show_in_pager",
+    "clipboard_get",
+]
+def editor(self, filename, linenum=None, wait=True):
+    """Open the default editor at the given filename and linenumber.
+    This is IPython's default editor hook, you can use it as an example to
+    write your own modified one.  To set your own editor function as the
+    new editor hook, call ip.set_hook('editor',yourfunc)."""
+    # IPython configures a default editor at startup by reading $EDITOR from
+    # the environment, and falling back on vi (unix) or notepad (win32).
+    editor = self.editor
+    # marker for at which line to open the file (for existing objects)
+    if linenum is None or editor=='notepad':
+        linemark = ''
+    else:
+        linemark = '+%d' % int(linenum)
+    # Enclose in quotes if necessary and legal
+    if ' ' in editor and os.path.isfile(editor) and editor[0] != '"':
+        editor = '"%s"' % editor
+    # Call the actual editor
+    proc = subprocess.Popen('%s %s %s' % (editor, linemark, filename),
+                            shell=True)
+    if wait and proc.wait() != 0:
+        raise TryNext()
+def synchronize_with_editor(self, filename, linenum, column):
+        pass
+class CommandChainDispatcher:
+    """ Dispatch calls to a chain of commands until some func can handle it
+    Usage: instantiate, execute "add" to add commands (with optional
+    priority), execute normally via f() calling mechanism.
+    """
+    def __init__(self,commands=None):
+        if commands is None:
+            self.chain = []
+        else:
+            self.chain = commands
+    def __call__(self,*args, **kw):
+        """ Command chain is called just like normal func.
+        This will call all funcs in chain with the same args as were given to
+        this function, and return the result of first func that didn't raise
+        TryNext"""
+        last_exc = TryNext()
+        for prio,cmd in self.chain:
+            # print("prio",prio,"cmd",cmd)  # dbg
+            try:
+                return cmd(*args, **kw)
+            except TryNext as exc:
+                last_exc = exc
+        # if no function will accept it, raise TryNext up to the caller
+        raise last_exc
+    def __str__(self):
+        return str(self.chain)
+    def add(self, func, priority=0):
+        """ Add a func to the cmd chain with given priority """
+        self.chain.append((priority, func))
+        self.chain.sort(key=lambda x: x[0])
+    def __iter__(self):
+        """ Return all objects in chain.
+        Handy if the objects are not callable.
+        """
+        return iter(self.chain)
+def show_in_pager(self, data, start, screen_lines):
+    """ Run a string through pager """
+    # raising TryNext here will use the default paging functionality
+    raise TryNext
+def clipboard_get(self):
+    """ Get text from the clipboard.
+    """
+    from ..lib.clipboard import (
+        osx_clipboard_get,
+        tkinter_clipboard_get,
+        win32_clipboard_get,
+        wayland_clipboard_get,
+    )
+    if sys.platform == 'win32':
+        chain = [win32_clipboard_get, tkinter_clipboard_get]
+    elif sys.platform == 'darwin':
+        chain = [osx_clipboard_get, tkinter_clipboard_get]
+    else:
+        chain = [wayland_clipboard_get, tkinter_clipboard_get]
+    dispatcher = CommandChainDispatcher()
+    for func in chain:
+        dispatcher.add(func)
+    text = dispatcher()
+    return text

temp_venv/lib/python3.13/site-packages/IPython/core/magic.py ADDED Viewed

	@@ -0,0 +1,760 @@

+# encoding: utf-8
+"""Magic functions for InteractiveShell.
+"""
+#-----------------------------------------------------------------------------
+#  Copyright (C) 2001 Janko Hauser <[email protected]> and
+#  Copyright (C) 2001 Fernando Perez <[email protected]>
+#  Copyright (C) 2008 The IPython Development Team
+#  Distributed under the terms of the BSD License.  The full license is in
+#  the file COPYING, distributed as part of this software.
+#-----------------------------------------------------------------------------
+import os
+import re
+import sys
+from getopt import getopt, GetoptError
+from traitlets.config.configurable import Configurable
+from . import oinspect
+from .error import UsageError
+from .inputtransformer2 import ESC_MAGIC, ESC_MAGIC2
+from ..utils.ipstruct import Struct
+from ..utils.process import arg_split
+from ..utils.text import dedent
+from traitlets import Bool, Dict, Instance, observe
+from logging import error
+import typing as t
+#-----------------------------------------------------------------------------
+# Globals
+#-----------------------------------------------------------------------------
+# A dict we'll use for each class that has magics, used as temporary storage to
+# pass information between the @line/cell_magic method decorators and the
+# @magics_class class decorator, because the method decorators have no
+# access to the class when they run.  See for more details:
+# http://stackoverflow.com/questions/2366713/can-a-python-decorator-of-an-instance-method-access-the-class
+magics: t.Dict = dict(line={}, cell={})
+magic_kinds = ('line', 'cell')
+magic_spec = ('line', 'cell', 'line_cell')
+magic_escapes = dict(line=ESC_MAGIC, cell=ESC_MAGIC2)
+#-----------------------------------------------------------------------------
+# Utility classes and functions
+#-----------------------------------------------------------------------------
+class Bunch: pass
+def on_off(tag):
+    """Return an ON/OFF string for a 1/0 input. Simple utility function."""
+    return ['OFF','ON'][tag]
+def compress_dhist(dh):
+    """Compress a directory history into a new one with at most 20 entries.
+    Return a new list made from the first and last 10 elements of dhist after
+    removal of duplicates.
+    """
+    head, tail = dh[:-10], dh[-10:]
+    newhead = []
+    done = set()
+    for h in head:
+        if h in done:
+            continue
+        newhead.append(h)
+        done.add(h)
+    return newhead + tail
+def needs_local_scope(func):
+    """Decorator to mark magic functions which need to local scope to run."""
+    func.needs_local_scope = True
+    return func
+#-----------------------------------------------------------------------------
+# Class and method decorators for registering magics
+#-----------------------------------------------------------------------------
+def magics_class(cls):
+    """Class decorator for all subclasses of the main Magics class.
+    Any class that subclasses Magics *must* also apply this decorator, to
+    ensure that all the methods that have been decorated as line/cell magics
+    get correctly registered in the class instance.  This is necessary because
+    when method decorators run, the class does not exist yet, so they
+    temporarily store their information into a module global.  Application of
+    this class decorator copies that global data to the class instance and
+    clears the global.
+    Obviously, this mechanism is not thread-safe, which means that the
+    *creation* of subclasses of Magic should only be done in a single-thread
+    context.  Instantiation of the classes has no restrictions.  Given that
+    these classes are typically created at IPython startup time and before user
+    application code becomes active, in practice this should not pose any
+    problems.
+    """
+    cls.registered = True
+    cls.magics = dict(line = magics['line'],
+                      cell = magics['cell'])
+    magics['line'] = {}
+    magics['cell'] = {}
+    return cls
+def record_magic(dct, magic_kind, magic_name, func):
+    """Utility function to store a function as a magic of a specific kind.
+    Parameters
+    ----------
+    dct : dict
+        A dictionary with 'line' and 'cell' subdicts.
+    magic_kind : str
+        Kind of magic to be stored.
+    magic_name : str
+        Key to store the magic as.
+    func : function
+        Callable object to store.
+    """
+    if magic_kind == 'line_cell':
+        dct['line'][magic_name] = dct['cell'][magic_name] = func
+    else:
+        dct[magic_kind][magic_name] = func
+def validate_type(magic_kind):
+    """Ensure that the given magic_kind is valid.
+    Check that the given magic_kind is one of the accepted spec types (stored
+    in the global `magic_spec`), raise ValueError otherwise.
+    """
+    if magic_kind not in magic_spec:
+        raise ValueError('magic_kind must be one of %s, %s given' %
+                         magic_kinds, magic_kind)
+# The docstrings for the decorator below will be fairly similar for the two
+# types (method and function), so we generate them here once and reuse the
+# templates below.
+_docstring_template = \
+"""Decorate the given {0} as {1} magic.
+The decorator can be used with or without arguments, as follows.
+i) without arguments: it will create a {1} magic named as the {0} being
+decorated::
+    @deco
+    def foo(...)
+will create a {1} magic named `foo`.
+ii) with one string argument: which will be used as the actual name of the
+resulting magic::
+    @deco('bar')
+    def foo(...)
+will create a {1} magic named `bar`.
+To register a class magic use ``Interactiveshell.register_magic(class or instance)``.
+"""
+# These two are decorator factories.  While they are conceptually very similar,
+# there are enough differences in the details that it's simpler to have them
+# written as completely standalone functions rather than trying to share code
+# and make a single one with convoluted logic.
+def _method_magic_marker(magic_kind):
+    """Decorator factory for methods in Magics subclasses.
+    """
+    validate_type(magic_kind)
+    # This is a closure to capture the magic_kind.  We could also use a class,
+    # but it's overkill for just that one bit of state.
+    def magic_deco(arg):
+        if callable(arg):
+            # "Naked" decorator call (just @foo, no args)
+            func = arg
+            name = func.__name__
+            retval = arg
+            record_magic(magics, magic_kind, name, name)
+        elif isinstance(arg, str):
+            # Decorator called with arguments (@foo('bar'))
+            name = arg
+            def mark(func, *a, **kw):
+                record_magic(magics, magic_kind, name, func.__name__)
+                return func
+            retval = mark
+        else:
+            raise TypeError("Decorator can only be called with "
+                            "string or function")
+        return retval
+    # Ensure the resulting decorator has a usable docstring
+    magic_deco.__doc__ = _docstring_template.format('method', magic_kind)
+    return magic_deco
+def _function_magic_marker(magic_kind):
+    """Decorator factory for standalone functions.
+    """
+    validate_type(magic_kind)
+    # This is a closure to capture the magic_kind.  We could also use a class,
+    # but it's overkill for just that one bit of state.
+    def magic_deco(arg):
+        # Find get_ipython() in the caller's namespace
+        caller = sys._getframe(1)
+        for ns in ['f_locals', 'f_globals', 'f_builtins']:
+            get_ipython = getattr(caller, ns).get('get_ipython')
+            if get_ipython is not None:
+                break
+        else:
+            raise NameError('Decorator can only run in context where '
+                            '`get_ipython` exists')
+        ip = get_ipython()
+        if callable(arg):
+            # "Naked" decorator call (just @foo, no args)
+            func = arg
+            name = func.__name__
+            ip.register_magic_function(func, magic_kind, name)
+            retval = arg
+        elif isinstance(arg, str):
+            # Decorator called with arguments (@foo('bar'))
+            name = arg
+            def mark(func, *a, **kw):
+                ip.register_magic_function(func, magic_kind, name)
+                return func
+            retval = mark
+        else:
+            raise TypeError("Decorator can only be called with "
+                             "string or function")
+        return retval
+    # Ensure the resulting decorator has a usable docstring
+    ds = _docstring_template.format('function', magic_kind)
+    ds += dedent("""
+    Note: this decorator can only be used in a context where IPython is already
+    active, so that the `get_ipython()` call succeeds.  You can therefore use
+    it in your startup files loaded after IPython initializes, but *not* in the
+    IPython configuration file itself, which is executed before IPython is
+    fully up and running.  Any file located in the `startup` subdirectory of
+    your configuration profile will be OK in this sense.
+    """)
+    magic_deco.__doc__ = ds
+    return magic_deco
+MAGIC_NO_VAR_EXPAND_ATTR = "_ipython_magic_no_var_expand"
+MAGIC_OUTPUT_CAN_BE_SILENCED = "_ipython_magic_output_can_be_silenced"
+def no_var_expand(magic_func):
+    """Mark a magic function as not needing variable expansion
+    By default, IPython interprets `{a}` or `$a` in the line passed to magics
+    as variables that should be interpolated from the interactive namespace
+    before passing the line to the magic function.
+    This is not always desirable, e.g. when the magic executes Python code
+    (%timeit, %time, etc.).
+    Decorate magics with `@no_var_expand` to opt-out of variable expansion.
+    .. versionadded:: 7.3
+    """
+    setattr(magic_func, MAGIC_NO_VAR_EXPAND_ATTR, True)
+    return magic_func
+def output_can_be_silenced(magic_func):
+    """Mark a magic function so its output may be silenced.
+    The output is silenced if the Python code used as a parameter of
+    the magic ends in a semicolon, not counting a Python comment that can
+    follow it.
+    """
+    setattr(magic_func, MAGIC_OUTPUT_CAN_BE_SILENCED, True)
+    return magic_func
+# Create the actual decorators for public use
+# These three are used to decorate methods in class definitions
+line_magic = _method_magic_marker('line')
+cell_magic = _method_magic_marker('cell')
+line_cell_magic = _method_magic_marker('line_cell')
+# These three decorate standalone functions and perform the decoration
+# immediately.  They can only run where get_ipython() works
+register_line_magic = _function_magic_marker('line')
+register_cell_magic = _function_magic_marker('cell')
+register_line_cell_magic = _function_magic_marker('line_cell')
+#-----------------------------------------------------------------------------
+# Core Magic classes
+#-----------------------------------------------------------------------------
+class MagicsManager(Configurable):
+    """Object that handles all magic-related functionality for IPython.
+    """
+    # Non-configurable class attributes
+    # A two-level dict, first keyed by magic type, then by magic function, and
+    # holding the actual callable object as value.  This is the dict used for
+    # magic function dispatch
+    magics = Dict()
+    lazy_magics = Dict(
+        help="""
+    Mapping from magic names to modules to load.
+    This can be used in IPython/IPykernel configuration to declare lazy magics
+    that will only be imported/registered on first use.
+    For example::
+        c.MagicsManager.lazy_magics = {
+          "my_magic": "slow.to.import",
+          "my_other_magic": "also.slow",
+        }
+    On first invocation of `%my_magic`, `%%my_magic`, `%%my_other_magic` or
+    `%%my_other_magic`, the corresponding module will be loaded as an ipython
+    extensions as if you had previously done `%load_ext ipython`.
+    Magics names should be without percent(s) as magics can be both cell
+    and line magics.
+    Lazy loading happen relatively late in execution process, and
+    complex extensions that manipulate Python/IPython internal state or global state
+    might not support lazy loading.
+    """
+    ).tag(
+        config=True,
+    )
+    # A registry of the original objects that we've been given holding magics.
+    registry = Dict()
+    shell = Instance('IPython.core.interactiveshell.InteractiveShellABC', allow_none=True)
+    auto_magic = Bool(True, help=
+        "Automatically call line magics without requiring explicit % prefix"
+    ).tag(config=True)
+    @observe('auto_magic')
+    def _auto_magic_changed(self, change):
+        self.shell.automagic = change['new']
+    _auto_status = [
+        'Automagic is OFF, % prefix IS needed for line magics.',
+        'Automagic is ON, % prefix IS NOT needed for line magics.']
+    user_magics = Instance('IPython.core.magics.UserMagics', allow_none=True)
+    def __init__(self, shell=None, config=None, user_magics=None, **traits):
+        super(MagicsManager, self).__init__(shell=shell, config=config,
+                                           user_magics=user_magics, **traits)
+        self.magics = dict(line={}, cell={})
+        # Let's add the user_magics to the registry for uniformity, so *all*
+        # registered magic containers can be found there.
+        self.registry[user_magics.__class__.__name__] = user_magics
+    def auto_status(self):
+        """Return descriptive string with automagic status."""
+        return self._auto_status[self.auto_magic]
+    def lsmagic(self):
+        """Return a dict of currently available magic functions.
+        The return dict has the keys 'line' and 'cell', corresponding to the
+        two types of magics we support.  Each value is a list of names.
+        """
+        return self.magics
+    def lsmagic_docs(self, brief=False, missing=''):
+        """Return dict of documentation of magic functions.
+        The return dict has the keys 'line' and 'cell', corresponding to the
+        two types of magics we support. Each value is a dict keyed by magic
+        name whose value is the function docstring. If a docstring is
+        unavailable, the value of `missing` is used instead.
+        If brief is True, only the first line of each docstring will be returned.
+        """
+        docs = {}
+        for m_type in self.magics:
+            m_docs = {}
+            for m_name, m_func in self.magics[m_type].items():
+                if m_func.__doc__:
+                    if brief:
+                        m_docs[m_name] = m_func.__doc__.split('\n', 1)[0]
+                    else:
+                        m_docs[m_name] = m_func.__doc__.rstrip()
+                else:
+                    m_docs[m_name] = missing
+            docs[m_type] = m_docs
+        return docs
+    def register_lazy(self, name: str, fully_qualified_name: str):
+        """
+        Lazily register a magic via an extension.
+        Parameters
+        ----------
+        name : str
+            Name of the magic you wish to register.
+        fully_qualified_name :
+            Fully qualified name of the module/submodule that should be loaded
+            as an extensions when the magic is first called.
+            It is assumed that loading this extensions will register the given
+            magic.
+        """
+        self.lazy_magics[name] = fully_qualified_name
+    def register(self, *magic_objects):
+        """Register one or more instances of Magics.
+        Take one or more classes or instances of classes that subclass the main
+        `core.Magic` class, and register them with IPython to use the magic
+        functions they provide.  The registration process will then ensure that
+        any methods that have decorated to provide line and/or cell magics will
+        be recognized with the `%x`/`%%x` syntax as a line/cell magic
+        respectively.
+        If classes are given, they will be instantiated with the default
+        constructor.  If your classes need a custom constructor, you should
+        instanitate them first and pass the instance.
+        The provided arguments can be an arbitrary mix of classes and instances.
+        Parameters
+        ----------
+        *magic_objects : one or more classes or instances
+        """
+        # Start by validating them to ensure they have all had their magic
+        # methods registered at the instance level
+        for m in magic_objects:
+            if not m.registered:
+                raise ValueError("Class of magics %r was constructed without "
+                                 "the @register_magics class decorator")
+            if isinstance(m, type):
+                # If we're given an uninstantiated class
+                m = m(shell=self.shell)
+            # Now that we have an instance, we can register it and update the
+            # table of callables
+            self.registry[m.__class__.__name__] = m
+            for mtype in magic_kinds:
+                self.magics[mtype].update(m.magics[mtype])
+    def register_function(self, func, magic_kind='line', magic_name=None):
+        """Expose a standalone function as magic function for IPython.
+        This will create an IPython magic (line, cell or both) from a
+        standalone function.  The functions should have the following
+        signatures:
+        * For line magics: `def f(line)`
+        * For cell magics: `def f(line, cell)`
+        * For a function that does both: `def f(line, cell=None)`
+        In the latter case, the function will be called with `cell==None` when
+        invoked as `%f`, and with cell as a string when invoked as `%%f`.
+        Parameters
+        ----------
+        func : callable
+            Function to be registered as a magic.
+        magic_kind : str
+            Kind of magic, one of 'line', 'cell' or 'line_cell'
+        magic_name : optional str
+            If given, the name the magic will have in the IPython namespace.  By
+            default, the name of the function itself is used.
+        """
+        # Create the new method in the user_magics and register it in the
+        # global table
+        validate_type(magic_kind)
+        magic_name = func.__name__ if magic_name is None else magic_name
+        setattr(self.user_magics, magic_name, func)
+        record_magic(self.magics, magic_kind, magic_name, func)
+    def register_alias(self, alias_name, magic_name, magic_kind='line', magic_params=None):
+        """Register an alias to a magic function.
+        The alias is an instance of :class:`MagicAlias`, which holds the
+        name and kind of the magic it should call. Binding is done at
+        call time, so if the underlying magic function is changed the alias
+        will call the new function.
+        Parameters
+        ----------
+        alias_name : str
+            The name of the magic to be registered.
+        magic_name : str
+            The name of an existing magic.
+        magic_kind : str
+            Kind of magic, one of 'line' or 'cell'
+        """
+        # `validate_type` is too permissive, as it allows 'line_cell'
+        # which we do not handle.
+        if magic_kind not in magic_kinds:
+            raise ValueError('magic_kind must be one of %s, %s given' %
+                             magic_kinds, magic_kind)
+        alias = MagicAlias(self.shell, magic_name, magic_kind, magic_params)
+        setattr(self.user_magics, alias_name, alias)
+        record_magic(self.magics, magic_kind, alias_name, alias)
+# Key base class that provides the central functionality for magics.
+class Magics(Configurable):
+    """Base class for implementing magic functions.
+    Shell functions which can be reached as %function_name. All magic
+    functions should accept a string, which they can parse for their own
+    needs. This can make some functions easier to type, eg `%cd ../`
+    vs. `%cd("../")`
+    Classes providing magic functions need to subclass this class, and they
+    MUST:
+    - Use the method decorators `@line_magic` and `@cell_magic` to decorate
+      individual methods as magic functions, AND
+    - Use the class decorator `@magics_class` to ensure that the magic
+      methods are properly registered at the instance level upon instance
+      initialization.
+    See :mod:`magic_functions` for examples of actual implementation classes.
+    """
+    # Dict holding all command-line options for each magic.
+    options_table = None
+    # Dict for the mapping of magic names to methods, set by class decorator
+    magics = None
+    # Flag to check that the class decorator was properly applied
+    registered = False
+    # Instance of IPython shell
+    shell = None
+    def __init__(self, shell=None, **kwargs):
+        if not(self.__class__.registered):
+            raise ValueError('Magics subclass without registration - '
+                             'did you forget to apply @magics_class?')
+        if shell is not None:
+            if hasattr(shell, 'configurables'):
+                shell.configurables.append(self)
+            if hasattr(shell, 'config'):
+                kwargs.setdefault('parent', shell)
+        self.shell = shell
+        self.options_table = {}
+        # The method decorators are run when the instance doesn't exist yet, so
+        # they can only record the names of the methods they are supposed to
+        # grab.  Only now, that the instance exists, can we create the proper
+        # mapping to bound methods.  So we read the info off the original names
+        # table and replace each method name by the actual bound method.
+        # But we mustn't clobber the *class* mapping, in case of multiple instances.
+        class_magics = self.magics
+        self.magics = {}
+        for mtype in magic_kinds:
+            tab = self.magics[mtype] = {}
+            cls_tab = class_magics[mtype]
+            for magic_name, meth_name in cls_tab.items():
+                if isinstance(meth_name, str):
+                    # it's a method name, grab it
+                    tab[magic_name] = getattr(self, meth_name)
+                else:
+                    # it's the real thing
+                    tab[magic_name] = meth_name
+        # Configurable **needs** to be initiated at the end or the config
+        # magics get screwed up.
+        super(Magics, self).__init__(**kwargs)
+    def arg_err(self,func):
+        """Print docstring if incorrect arguments were passed"""
+        print('Error in arguments:')
+        print(oinspect.getdoc(func))
+    def format_latex(self, strng):
+        """Format a string for latex inclusion."""
+        # Characters that need to be escaped for latex:
+        escape_re = re.compile(r'(%|_|\$|#|&)',re.MULTILINE)
+        # Magic command names as headers:
+        cmd_name_re = re.compile(r'^(%s.*?):' % ESC_MAGIC,
+                                 re.MULTILINE)
+        # Magic commands
+        cmd_re = re.compile(r'(?P<cmd>%s.+?\b)(?!\}\}:)' % ESC_MAGIC,
+                            re.MULTILINE)
+        # Paragraph continue
+        par_re = re.compile(r'\\$',re.MULTILINE)
+        # The "\n" symbol
+        newline_re = re.compile(r'\\n')
+        # Now build the string for output:
+        #strng = cmd_name_re.sub(r'\n\\texttt{\\textsl{\\large \1}}:',strng)
+        strng = cmd_name_re.sub(r'\n\\bigskip\n\\texttt{\\textbf{ \1}}:',
+                                strng)
+        strng = cmd_re.sub(r'\\texttt{\g<cmd>}',strng)
+        strng = par_re.sub(r'\\\\',strng)
+        strng = escape_re.sub(r'\\\1',strng)
+        strng = newline_re.sub(r'\\textbackslash{}n',strng)
+        return strng
+    def parse_options(self, arg_str, opt_str, *long_opts, **kw):
+        """Parse options passed to an argument string.
+        The interface is similar to that of :func:`getopt.getopt`, but it
+        returns a :class:`~IPython.utils.struct.Struct` with the options as keys
+        and the stripped argument string still as a string.
+        arg_str is quoted as a true sys.argv vector by using shlex.split.
+        This allows us to easily expand variables, glob files, quote
+        arguments, etc.
+        Parameters
+        ----------
+        arg_str : str
+            The arguments to parse.
+        opt_str : str
+            The options specification.
+        mode : str, default 'string'
+            If given as 'list', the argument string is returned as a list (split
+            on whitespace) instead of a string.
+        list_all : bool, default False
+            Put all option values in lists. Normally only options
+            appearing more than once are put in a list.
+        posix : bool, default True
+            Whether to split the input line in POSIX mode or not, as per the
+            conventions outlined in the :mod:`shlex` module from the standard
+            library.
+        """
+        # inject default options at the beginning of the input line
+        caller = sys._getframe(1).f_code.co_name
+        arg_str = '%s %s' % (self.options_table.get(caller,''),arg_str)
+        mode = kw.get('mode','string')
+        if mode not in ['string','list']:
+            raise ValueError('incorrect mode given: %s' % mode)
+        # Get options
+        list_all = kw.get('list_all',0)
+        posix = kw.get('posix', os.name == 'posix')
+        strict = kw.get('strict', True)
+        preserve_non_opts = kw.get("preserve_non_opts", False)
+        remainder_arg_str = arg_str
+        # Check if we have more than one argument to warrant extra processing:
+        odict = {}  # Dictionary with options
+        args = arg_str.split()
+        if len(args) >= 1:
+            # If the list of inputs only has 0 or 1 thing in it, there's no
+            # need to look for options
+            argv = arg_split(arg_str, posix, strict)
+            # Do regular option processing
+            try:
+                opts, args = getopt(argv, opt_str, long_opts)
+            except GetoptError as e:
+                raise UsageError(
+                    '%s (allowed: "%s"%s)'
+                    % (e.msg, opt_str, " ".join(("",) + long_opts) if long_opts else "")
+                ) from e
+            for o, a in opts:
+                if mode == "string" and preserve_non_opts:
+                    # remove option-parts from the original args-string and preserve remaining-part.
+                    # This relies on the arg_split(...) and getopt(...)'s impl spec, that the parsed options are
+                    # returned in the original order.
+                    remainder_arg_str = remainder_arg_str.replace(o, "", 1).replace(
+                        a, "", 1
+                    )
+                if o.startswith("--"):
+                    o = o[2:]
+                else:
+                    o = o[1:]
+                try:
+                    odict[o].append(a)
+                except AttributeError:
+                    odict[o] = [odict[o],a]
+                except KeyError:
+                    if list_all:
+                        odict[o] = [a]
+                    else:
+                        odict[o] = a
+        # Prepare opts,args for return
+        opts = Struct(odict)
+        if mode == 'string':
+            if preserve_non_opts:
+                args = remainder_arg_str.lstrip()
+            else:
+                args = " ".join(args)
+        return opts,args
+    def default_option(self, fn, optstr):
+        """Make an entry in the options_table for fn, with value optstr"""
+        if fn not in self.lsmagic():
+            error("%s is not a magic function" % fn)
+        self.options_table[fn] = optstr
+class MagicAlias:
+    """An alias to another magic function.
+    An alias is determined by its magic name and magic kind. Lookup
+    is done at call time, so if the underlying magic changes the alias
+    will call the new function.
+    Use the :meth:`MagicsManager.register_alias` method or the
+    `%alias_magic` magic function to create and register a new alias.
+    """
+    def __init__(self, shell, magic_name, magic_kind, magic_params=None):
+        self.shell = shell
+        self.magic_name = magic_name
+        self.magic_params = magic_params
+        self.magic_kind = magic_kind
+        self.pretty_target = '%s%s' % (magic_escapes[self.magic_kind], self.magic_name)
+        self.__doc__ = "Alias for `%s`." % self.pretty_target
+        self._in_call = False
+    def __call__(self, *args, **kwargs):
+        """Call the magic alias."""
+        fn = self.shell.find_magic(self.magic_name, self.magic_kind)
+        if fn is None:
+            raise UsageError("Magic `%s` not found." % self.pretty_target)
+        # Protect against infinite recursion.
+        if self._in_call:
+            raise UsageError("Infinite recursion detected; "
+                             "magic aliases cannot call themselves.")
+        self._in_call = True
+        try:
+            if self.magic_params:
+                args_list = list(args)
+                args_list[0] = self.magic_params + " " + args[0]
+                args = tuple(args_list)
+            return fn(*args, **kwargs)
+        finally:
+            self._in_call = False

temp_venv/lib/python3.13/site-packages/IPython/core/prefilter.py ADDED Viewed

	@@ -0,0 +1,707 @@

+# encoding: utf-8
+"""
+Prefiltering components.
+Prefilters transform user input before it is exec'd by Python.  These
+transforms are used to implement additional syntax such as !ls and %magic.
+"""
+# Copyright (c) IPython Development Team.
+# Distributed under the terms of the Modified BSD License.
+from keyword import iskeyword
+import re
+from .autocall import IPyAutocall
+from traitlets.config.configurable import Configurable
+from .inputtransformer2 import (
+    ESC_MAGIC,
+    ESC_QUOTE,
+    ESC_QUOTE2,
+    ESC_PAREN,
+)
+from .macro import Macro
+from .splitinput import LineInfo
+from traitlets import (
+    List, Integer, Unicode, Bool, Instance, CRegExp
+)
+#-----------------------------------------------------------------------------
+# Global utilities, errors and constants
+#-----------------------------------------------------------------------------
+class PrefilterError(Exception):
+    pass
+# RegExp to identify potential function names
+re_fun_name = re.compile(r'[^\W\d]([\w.]*) *$')
+# RegExp to exclude strings with this start from autocalling.  In
+# particular, all binary operators should be excluded, so that if foo is
+# callable, foo OP bar doesn't become foo(OP bar), which is invalid.  The
+# characters '!=()' don't need to be checked for, as the checkPythonChars
+# routine explicitly does so, to catch direct calls and rebindings of
+# existing names.
+# Warning: the '-' HAS TO BE AT THE END of the first group, otherwise
+# it affects the rest of the group in square brackets.
+re_exclude_auto = re.compile(r'^[,&^\|\*/\+-]'
+                             r'|^is |^not |^in |^and |^or ')
+# try to catch also methods for stuff in lists/tuples/dicts: off
+# (experimental). For this to work, the line_split regexp would need
+# to be modified so it wouldn't break things at '['. That line is
+# nasty enough that I shouldn't change it until I can test it _well_.
+#self.re_fun_name = re.compile (r'[a-zA-Z_]([a-zA-Z0-9_.\[\]]*) ?$')
+# Handler Check Utilities
+def is_shadowed(identifier, ip):
+    """Is the given identifier defined in one of the namespaces which shadow
+    the alias and magic namespaces?  Note that an identifier is different
+    than ifun, because it can not contain a '.' character."""
+    # This is much safer than calling ofind, which can change state
+    return (identifier in ip.user_ns \
+            or identifier in ip.user_global_ns \
+            or identifier in ip.ns_table['builtin']\
+            or iskeyword(identifier))
+#-----------------------------------------------------------------------------
+# Main Prefilter manager
+#-----------------------------------------------------------------------------
+class PrefilterManager(Configurable):
+    """Main prefilter component.
+    The IPython prefilter is run on all user input before it is run.  The
+    prefilter consumes lines of input and produces transformed lines of
+    input.
+    The implementation consists of two phases:
+    1. Transformers
+    2. Checkers and handlers
+    Over time, we plan on deprecating the checkers and handlers and doing
+    everything in the transformers.
+    The transformers are instances of :class:`PrefilterTransformer` and have
+    a single method :meth:`transform` that takes a line and returns a
+    transformed line.  The transformation can be accomplished using any
+    tool, but our current ones use regular expressions for speed.
+    After all the transformers have been run, the line is fed to the checkers,
+    which are instances of :class:`PrefilterChecker`.  The line is passed to
+    the :meth:`check` method, which either returns `None` or a
+    :class:`PrefilterHandler` instance.  If `None` is returned, the other
+    checkers are tried.  If an :class:`PrefilterHandler` instance is returned,
+    the line is passed to the :meth:`handle` method of the returned
+    handler and no further checkers are tried.
+    Both transformers and checkers have a `priority` attribute, that determines
+    the order in which they are called.  Smaller priorities are tried first.
+    Both transformers and checkers also have `enabled` attribute, which is
+    a boolean that determines if the instance is used.
+    Users or developers can change the priority or enabled attribute of
+    transformers or checkers, but they must call the :meth:`sort_checkers`
+    or :meth:`sort_transformers` method after changing the priority.
+    """
+    multi_line_specials = Bool(True).tag(config=True)
+    shell = Instance('IPython.core.interactiveshell.InteractiveShellABC', allow_none=True)
+    def __init__(self, shell=None, **kwargs):
+        super(PrefilterManager, self).__init__(shell=shell, **kwargs)
+        self.shell = shell
+        self._transformers = []
+        self.init_handlers()
+        self.init_checkers()
+    #-------------------------------------------------------------------------
+    # API for managing transformers
+    #-------------------------------------------------------------------------
+    def sort_transformers(self):
+        """Sort the transformers by priority.
+        This must be called after the priority of a transformer is changed.
+        The :meth:`register_transformer` method calls this automatically.
+        """
+        self._transformers.sort(key=lambda x: x.priority)
+    @property
+    def transformers(self):
+        """Return a list of checkers, sorted by priority."""
+        return self._transformers
+    def register_transformer(self, transformer):
+        """Register a transformer instance."""
+        if transformer not in self._transformers:
+            self._transformers.append(transformer)
+            self.sort_transformers()
+    def unregister_transformer(self, transformer):
+        """Unregister a transformer instance."""
+        if transformer in self._transformers:
+            self._transformers.remove(transformer)
+    #-------------------------------------------------------------------------
+    # API for managing checkers
+    #-------------------------------------------------------------------------
+    def init_checkers(self):
+        """Create the default checkers."""
+        self._checkers = []
+        for checker in _default_checkers:
+            checker(
+                shell=self.shell, prefilter_manager=self, parent=self
+            )
+    def sort_checkers(self):
+        """Sort the checkers by priority.
+        This must be called after the priority of a checker is changed.
+        The :meth:`register_checker` method calls this automatically.
+        """
+        self._checkers.sort(key=lambda x: x.priority)
+    @property
+    def checkers(self):
+        """Return a list of checkers, sorted by priority."""
+        return self._checkers
+    def register_checker(self, checker):
+        """Register a checker instance."""
+        if checker not in self._checkers:
+            self._checkers.append(checker)
+            self.sort_checkers()
+    def unregister_checker(self, checker):
+        """Unregister a checker instance."""
+        if checker in self._checkers:
+            self._checkers.remove(checker)
+    #-------------------------------------------------------------------------
+    # API for managing handlers
+    #-------------------------------------------------------------------------
+    def init_handlers(self):
+        """Create the default handlers."""
+        self._handlers = {}
+        self._esc_handlers = {}
+        for handler in _default_handlers:
+            handler(
+                shell=self.shell, prefilter_manager=self, parent=self
+            )
+    @property
+    def handlers(self):
+        """Return a dict of all the handlers."""
+        return self._handlers
+    def register_handler(self, name, handler, esc_strings):
+        """Register a handler instance by name with esc_strings."""
+        self._handlers[name] = handler
+        for esc_str in esc_strings:
+            self._esc_handlers[esc_str] = handler
+    def unregister_handler(self, name, handler, esc_strings):
+        """Unregister a handler instance by name with esc_strings."""
+        try:
+            del self._handlers[name]
+        except KeyError:
+            pass
+        for esc_str in esc_strings:
+            h = self._esc_handlers.get(esc_str)
+            if h is handler:
+                del self._esc_handlers[esc_str]
+    def get_handler_by_name(self, name):
+        """Get a handler by its name."""
+        return self._handlers.get(name)
+    def get_handler_by_esc(self, esc_str):
+        """Get a handler by its escape string."""
+        return self._esc_handlers.get(esc_str)
+    #-------------------------------------------------------------------------
+    # Main prefiltering API
+    #-------------------------------------------------------------------------
+    def prefilter_line_info(self, line_info):
+        """Prefilter a line that has been converted to a LineInfo object.
+        This implements the checker/handler part of the prefilter pipe.
+        """
+        # print("prefilter_line_info: ", line_info)
+        handler = self.find_handler(line_info)
+        return handler.handle(line_info)
+    def find_handler(self, line_info):
+        """Find a handler for the line_info by trying checkers."""
+        for checker in self.checkers:
+            if checker.enabled:
+                handler = checker.check(line_info)
+                if handler:
+                    return handler
+        return self.get_handler_by_name('normal')
+    def transform_line(self, line, continue_prompt):
+        """Calls the enabled transformers in order of increasing priority."""
+        for transformer in self.transformers:
+            if transformer.enabled:
+                line = transformer.transform(line, continue_prompt)
+        return line
+    def prefilter_line(self, line, continue_prompt=False):
+        """Prefilter a single input line as text.
+        This method prefilters a single line of text by calling the
+        transformers and then the checkers/handlers.
+        """
+        # print("prefilter_line: ", line, continue_prompt)
+        # All handlers *must* return a value, even if it's blank ('').
+        # save the line away in case we crash, so the post-mortem handler can
+        # record it
+        self.shell._last_input_line = line
+        if not line:
+            # Return immediately on purely empty lines, so that if the user
+            # previously typed some whitespace that started a continuation
+            # prompt, he can break out of that loop with just an empty line.
+            # This is how the default python prompt works.
+            return ''
+        # At this point, we invoke our transformers.
+        if not continue_prompt or (continue_prompt and self.multi_line_specials):
+            line = self.transform_line(line, continue_prompt)
+        # Now we compute line_info for the checkers and handlers
+        line_info = LineInfo(line, continue_prompt)
+        # the input history needs to track even empty lines
+        stripped = line.strip()
+        normal_handler = self.get_handler_by_name('normal')
+        if not stripped:
+            return normal_handler.handle(line_info)
+        # special handlers are only allowed for single line statements
+        if continue_prompt and not self.multi_line_specials:
+            return normal_handler.handle(line_info)
+        prefiltered = self.prefilter_line_info(line_info)
+        # print("prefiltered line: %r" % prefiltered)
+        return prefiltered
+    def prefilter_lines(self, lines, continue_prompt=False):
+        """Prefilter multiple input lines of text.
+        This is the main entry point for prefiltering multiple lines of
+        input.  This simply calls :meth:`prefilter_line` for each line of
+        input.
+        This covers cases where there are multiple lines in the user entry,
+        which is the case when the user goes back to a multiline history
+        entry and presses enter.
+        """
+        llines = lines.rstrip('\n').split('\n')
+        # We can get multiple lines in one shot, where multiline input 'blends'
+        # into one line, in cases like recalling from the readline history
+        # buffer.  We need to make sure that in such cases, we correctly
+        # communicate downstream which line is first and which are continuation
+        # ones.
+        if len(llines) > 1:
+            out = '\n'.join([self.prefilter_line(line, lnum>0)
+                             for lnum, line in enumerate(llines) ])
+        else:
+            out = self.prefilter_line(llines[0], continue_prompt)
+        return out
+#-----------------------------------------------------------------------------
+# Prefilter transformers
+#-----------------------------------------------------------------------------
+class PrefilterTransformer(Configurable):
+    """Transform a line of user input."""
+    priority = Integer(100).tag(config=True)
+    # Transformers don't currently use shell or prefilter_manager, but as we
+    # move away from checkers and handlers, they will need them.
+    shell = Instance('IPython.core.interactiveshell.InteractiveShellABC', allow_none=True)
+    prefilter_manager = Instance('IPython.core.prefilter.PrefilterManager', allow_none=True)
+    enabled = Bool(True).tag(config=True)
+    def __init__(self, shell=None, prefilter_manager=None, **kwargs):
+        super(PrefilterTransformer, self).__init__(
+            shell=shell, prefilter_manager=prefilter_manager, **kwargs
+        )
+        self.prefilter_manager.register_transformer(self)
+    def transform(self, line, continue_prompt):
+        """Transform a line, returning the new one."""
+        return None
+    def __repr__(self):
+        return "<%s(priority=%r, enabled=%r)>" % (
+            self.__class__.__name__, self.priority, self.enabled)
+#-----------------------------------------------------------------------------
+# Prefilter checkers
+#-----------------------------------------------------------------------------
+class PrefilterChecker(Configurable):
+    """Inspect an input line and return a handler for that line."""
+    priority = Integer(100).tag(config=True)
+    shell = Instance('IPython.core.interactiveshell.InteractiveShellABC', allow_none=True)
+    prefilter_manager = Instance('IPython.core.prefilter.PrefilterManager', allow_none=True)
+    enabled = Bool(True).tag(config=True)
+    def __init__(self, shell=None, prefilter_manager=None, **kwargs):
+        super(PrefilterChecker, self).__init__(
+            shell=shell, prefilter_manager=prefilter_manager, **kwargs
+        )
+        self.prefilter_manager.register_checker(self)
+    def check(self, line_info):
+        """Inspect line_info and return a handler instance or None."""
+        return None
+    def __repr__(self):
+        return "<%s(priority=%r, enabled=%r)>" % (
+            self.__class__.__name__, self.priority, self.enabled)
+class EmacsChecker(PrefilterChecker):
+    priority = Integer(100).tag(config=True)
+    enabled = Bool(False).tag(config=True)
+    def check(self, line_info):
+        "Emacs ipython-mode tags certain input lines."
+        if line_info.line.endswith('# PYTHON-MODE'):
+            return self.prefilter_manager.get_handler_by_name('emacs')
+        else:
+            return None
+class MacroChecker(PrefilterChecker):
+    priority = Integer(250).tag(config=True)
+    def check(self, line_info):
+        obj = self.shell.user_ns.get(line_info.ifun)
+        if isinstance(obj, Macro):
+            return self.prefilter_manager.get_handler_by_name('macro')
+        else:
+            return None
+class IPyAutocallChecker(PrefilterChecker):
+    priority = Integer(300).tag(config=True)
+    def check(self, line_info):
+        "Instances of IPyAutocall in user_ns get autocalled immediately"
+        obj = self.shell.user_ns.get(line_info.ifun, None)
+        if isinstance(obj, IPyAutocall):
+            obj.set_ip(self.shell)
+            return self.prefilter_manager.get_handler_by_name('auto')
+        else:
+            return None
+class AssignmentChecker(PrefilterChecker):
+    priority = Integer(600).tag(config=True)
+    def check(self, line_info):
+        """Check to see if user is assigning to a var for the first time, in
+        which case we want to avoid any sort of automagic / autocall games.
+        This allows users to assign to either alias or magic names true python
+        variables (the magic/alias systems always take second seat to true
+        python code).  E.g. ls='hi', or ls,that=1,2"""
+        if line_info.the_rest:
+            if line_info.the_rest[0] in '=,':
+                return self.prefilter_manager.get_handler_by_name('normal')
+        else:
+            return None
+class AutoMagicChecker(PrefilterChecker):
+    priority = Integer(700).tag(config=True)
+    def check(self, line_info):
+        """If the ifun is magic, and automagic is on, run it.  Note: normal,
+        non-auto magic would already have been triggered via '%' in
+        check_esc_chars. This just checks for automagic.  Also, before
+        triggering the magic handler, make sure that there is nothing in the
+        user namespace which could shadow it."""
+        if not self.shell.automagic or not self.shell.find_magic(line_info.ifun):
+            return None
+        # We have a likely magic method.  Make sure we should actually call it.
+        if line_info.continue_prompt and not self.prefilter_manager.multi_line_specials:
+            return None
+        head = line_info.ifun.split('.',1)[0]
+        if is_shadowed(head, self.shell):
+            return None
+        return self.prefilter_manager.get_handler_by_name('magic')
+class PythonOpsChecker(PrefilterChecker):
+    priority = Integer(900).tag(config=True)
+    def check(self, line_info):
+        """If the 'rest' of the line begins with a function call or pretty much
+        any python operator, we should simply execute the line (regardless of
+        whether or not there's a possible autocall expansion).  This avoids
+        spurious (and very confusing) geattr() accesses."""
+        if line_info.the_rest and line_info.the_rest[0] in "!=()<>,+*/%^&|":
+            return self.prefilter_manager.get_handler_by_name("normal")
+        else:
+            return None
+class AutocallChecker(PrefilterChecker):
+    priority = Integer(1000).tag(config=True)
+    function_name_regexp = CRegExp(re_fun_name,
+        help="RegExp to identify potential function names."
+        ).tag(config=True)
+    exclude_regexp = CRegExp(re_exclude_auto,
+        help="RegExp to exclude strings with this start from autocalling."
+        ).tag(config=True)
+    def check(self, line_info):
+        "Check if the initial word/function is callable and autocall is on."
+        if not self.shell.autocall:
+            return None
+        oinfo = line_info.ofind(self.shell) # This can mutate state via getattr
+        if not oinfo.found:
+            return None
+        ignored_funs = ['b', 'f', 'r', 'u', 'br', 'rb', 'fr', 'rf']
+        ifun = line_info.ifun
+        line = line_info.line
+        if ifun.lower() in ignored_funs and (line.startswith(ifun + "'") or line.startswith(ifun + '"')):
+            return None
+        if (
+            callable(oinfo.obj)
+            and (not self.exclude_regexp.match(line_info.the_rest))
+            and self.function_name_regexp.match(line_info.ifun)
+            and (
+                line_info.raw_the_rest.startswith(" ")
+                or not line_info.raw_the_rest.strip()
+            )
+        ):
+            return self.prefilter_manager.get_handler_by_name("auto")
+        else:
+            return None
+#-----------------------------------------------------------------------------
+# Prefilter handlers
+#-----------------------------------------------------------------------------
+class PrefilterHandler(Configurable):
+    handler_name = Unicode("normal")
+    esc_strings: List = List([])
+    shell = Instance(
+        "IPython.core.interactiveshell.InteractiveShellABC", allow_none=True
+    )
+    prefilter_manager = Instance(
+        "IPython.core.prefilter.PrefilterManager", allow_none=True
+    )
+    def __init__(self, shell=None, prefilter_manager=None, **kwargs):
+        super(PrefilterHandler, self).__init__(
+            shell=shell, prefilter_manager=prefilter_manager, **kwargs
+        )
+        self.prefilter_manager.register_handler(
+            self.handler_name,
+            self,
+            self.esc_strings
+        )
+    def handle(self, line_info):
+        # print("normal: ", line_info)
+        """Handle normal input lines. Use as a template for handlers."""
+        # With autoindent on, we need some way to exit the input loop, and I
+        # don't want to force the user to have to backspace all the way to
+        # clear the line.  The rule will be in this case, that either two
+        # lines of pure whitespace in a row, or a line of pure whitespace but
+        # of a size different to the indent level, will exit the input loop.
+        line = line_info.line
+        continue_prompt = line_info.continue_prompt
+        if (continue_prompt and
+            self.shell.autoindent and
+            line.isspace() and
+            0 < abs(len(line) - self.shell.indent_current_nsp) <= 2):
+            line = ''
+        return line
+    def __str__(self):
+        return "<%s(name=%s)>" % (self.__class__.__name__, self.handler_name)
+class MacroHandler(PrefilterHandler):
+    handler_name = Unicode("macro")
+    def handle(self, line_info):
+        obj = self.shell.user_ns.get(line_info.ifun)
+        pre_space = line_info.pre_whitespace
+        line_sep = "\n" + pre_space
+        return pre_space + line_sep.join(obj.value.splitlines())
+class MagicHandler(PrefilterHandler):
+    handler_name = Unicode('magic')
+    esc_strings = List([ESC_MAGIC])
+    def handle(self, line_info):
+        """Execute magic functions."""
+        ifun    = line_info.ifun
+        the_rest = line_info.the_rest
+        #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
+        t_arg_s = ifun + " " + the_rest
+        t_magic_name, _, t_magic_arg_s = t_arg_s.partition(' ')
+        t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
+        cmd = '%sget_ipython().run_line_magic(%r, %r)' % (line_info.pre_whitespace, t_magic_name, t_magic_arg_s)
+        return cmd
+class AutoHandler(PrefilterHandler):
+    handler_name = Unicode('auto')
+    esc_strings = List([ESC_PAREN, ESC_QUOTE, ESC_QUOTE2])
+    def handle(self, line_info):
+        """Handle lines which can be auto-executed, quoting if requested."""
+        line    = line_info.line
+        ifun    = line_info.ifun
+        the_rest = line_info.the_rest
+        esc     = line_info.esc
+        continue_prompt = line_info.continue_prompt
+        obj = line_info.ofind(self.shell).obj
+        # This should only be active for single-line input!
+        if continue_prompt:
+            return line
+        force_auto = isinstance(obj, IPyAutocall)
+        # User objects sometimes raise exceptions on attribute access other
+        # than AttributeError (we've seen it in the past), so it's safest to be
+        # ultra-conservative here and catch all.
+        try:
+            auto_rewrite = obj.rewrite
+        except Exception:
+            auto_rewrite = True
+        if esc == ESC_QUOTE:
+            # Auto-quote splitting on whitespace
+            newcmd = '%s("%s")' % (ifun,'", "'.join(the_rest.split()) )
+        elif esc == ESC_QUOTE2:
+            # Auto-quote whole string
+            newcmd = '%s("%s")' % (ifun,the_rest)
+        elif esc == ESC_PAREN:
+            newcmd = '%s(%s)' % (ifun,",".join(the_rest.split()))
+        else:
+            # Auto-paren.
+            if force_auto:
+                # Don't rewrite if it is already a call.
+                do_rewrite = not the_rest.startswith('(')
+            else:
+                if not the_rest:
+                    # We only apply it to argument-less calls if the autocall
+                    # parameter is set to 2.
+                    do_rewrite = (self.shell.autocall >= 2)
+                elif the_rest.startswith('[') and hasattr(obj, '__getitem__'):
+                    # Don't autocall in this case: item access for an object
+                    # which is BOTH callable and implements __getitem__.
+                    do_rewrite = False
+                else:
+                    do_rewrite = True
+            # Figure out the rewritten command
+            if do_rewrite:
+                if the_rest.endswith(';'):
+                    newcmd = '%s(%s);' % (ifun.rstrip(),the_rest[:-1])
+                else:
+                    newcmd = '%s(%s)' % (ifun.rstrip(), the_rest)
+            else:
+                normal_handler = self.prefilter_manager.get_handler_by_name('normal')
+                return normal_handler.handle(line_info)
+        # Display the rewritten call
+        if auto_rewrite:
+            self.shell.auto_rewrite_input(newcmd)
+        return newcmd
+class EmacsHandler(PrefilterHandler):
+    handler_name = Unicode('emacs')
+    esc_strings = List([])
+    def handle(self, line_info):
+        """Handle input lines marked by python-mode."""
+        # Currently, nothing is done.  Later more functionality can be added
+        # here if needed.
+        # The input cache shouldn't be updated
+        return line_info.line
+#-----------------------------------------------------------------------------
+# Defaults
+#-----------------------------------------------------------------------------
+_default_checkers = [
+    EmacsChecker,
+    MacroChecker,
+    IPyAutocallChecker,
+    AssignmentChecker,
+    AutoMagicChecker,
+    PythonOpsChecker,
+    AutocallChecker
+]
+_default_handlers = [
+    PrefilterHandler,
+    MacroHandler,
+    MagicHandler,
+    AutoHandler,
+    EmacsHandler
+]

temp_venv/lib/python3.13/site-packages/IPython/core/profiledir.py ADDED Viewed

	@@ -0,0 +1,244 @@

+# encoding: utf-8
+"""An object for managing IPython profile directories."""
+# Copyright (c) IPython Development Team.
+# Distributed under the terms of the Modified BSD License.
+import os
+import shutil
+import errno
+from pathlib import Path
+from traitlets.config.configurable import LoggingConfigurable
+from ..paths import get_ipython_package_dir
+from ..utils.path import expand_path, ensure_dir_exists
+from traitlets import Unicode, Bool, observe
+from typing import Optional
+#-----------------------------------------------------------------------------
+# Module errors
+#-----------------------------------------------------------------------------
+class ProfileDirError(Exception):
+    pass
+#-----------------------------------------------------------------------------
+# Class for managing profile directories
+#-----------------------------------------------------------------------------
+class ProfileDir(LoggingConfigurable):
+    """An object to manage the profile directory and its resources.
+    The profile directory is used by all IPython applications, to manage
+    configuration, logging and security.
+    This object knows how to find, create and manage these directories. This
+    should be used by any code that wants to handle profiles.
+    """
+    security_dir_name = Unicode('security')
+    log_dir_name = Unicode('log')
+    startup_dir_name = Unicode('startup')
+    pid_dir_name = Unicode('pid')
+    static_dir_name = Unicode('static')
+    security_dir = Unicode(u'')
+    log_dir = Unicode(u'')
+    startup_dir = Unicode(u'')
+    pid_dir = Unicode(u'')
+    static_dir = Unicode(u'')
+    location = Unicode(u'',
+        help="""Set the profile location directly. This overrides the logic used by the
+        `profile` option.""",
+        ).tag(config=True)
+    _location_isset = Bool(False) # flag for detecting multiply set location
+    @observe('location')
+    def _location_changed(self, change):
+        if self._location_isset:
+            raise RuntimeError("Cannot set profile location more than once.")
+        self._location_isset = True
+        new = change['new']
+        ensure_dir_exists(new)
+        # ensure config files exist:
+        self.security_dir = os.path.join(new, self.security_dir_name)
+        self.log_dir = os.path.join(new, self.log_dir_name)
+        self.startup_dir = os.path.join(new, self.startup_dir_name)
+        self.pid_dir = os.path.join(new, self.pid_dir_name)
+        self.static_dir = os.path.join(new, self.static_dir_name)
+        self.check_dirs()
+    def _mkdir(self, path: str, mode: Optional[int] = None) -> bool:
+        """ensure a directory exists at a given path
+        This is a version of os.mkdir, with the following differences:
+        - returns whether the directory has been created or not.
+        - ignores EEXIST, protecting against race conditions where
+          the dir may have been created in between the check and
+          the creation
+        - sets permissions if requested and the dir already exists
+        Parameters
+        ----------
+        path: str
+            path of the dir to create
+        mode: int
+            see `mode` of `os.mkdir`
+        Returns
+        -------
+        bool:
+            returns True if it created the directory, False otherwise
+        """
+        if os.path.exists(path):
+            if mode and os.stat(path).st_mode != mode:
+                try:
+                    os.chmod(path, mode)
+                except OSError:
+                    self.log.warning(
+                        "Could not set permissions on %s",
+                        path
+                    )
+            return False
+        try:
+            if mode:
+                os.mkdir(path, mode)
+            else:
+                os.mkdir(path)
+        except OSError as e:
+            if e.errno == errno.EEXIST:
+                return False
+            else:
+                raise
+        return True
+    @observe('log_dir')
+    def check_log_dir(self, change=None):
+        self._mkdir(self.log_dir)
+    @observe('startup_dir')
+    def check_startup_dir(self, change=None):
+        if self._mkdir(self.startup_dir):
+            readme = os.path.join(self.startup_dir, "README")
+            src = os.path.join(
+                get_ipython_package_dir(), "core", "profile", "README_STARTUP"
+            )
+            if os.path.exists(src):
+                if not os.path.exists(readme):
+                    shutil.copy(src, readme)
+            else:
+                self.log.warning(
+                    "Could not copy README_STARTUP to startup dir. Source file %s does not exist.",
+                    src,
+                )
+    @observe('security_dir')
+    def check_security_dir(self, change=None):
+        self._mkdir(self.security_dir, 0o40700)
+    @observe('pid_dir')
+    def check_pid_dir(self, change=None):
+        self._mkdir(self.pid_dir, 0o40700)
+    def check_dirs(self):
+        self.check_security_dir()
+        self.check_log_dir()
+        self.check_pid_dir()
+        self.check_startup_dir()
+    def copy_config_file(self, config_file: str, path: Path, overwrite=False) -> bool:
+        """Copy a default config file into the active profile directory.
+        Default configuration files are kept in :mod:`IPython.core.profile`.
+        This function moves these from that location to the working profile
+        directory.
+        """
+        dst = Path(os.path.join(self.location, config_file))
+        if dst.exists() and not overwrite:
+            return False
+        if path is None:
+            path = os.path.join(get_ipython_package_dir(), u'core', u'profile', u'default')
+        assert isinstance(path, Path)
+        src = path / config_file
+        shutil.copy(src, dst)
+        return True
+    @classmethod
+    def create_profile_dir(cls, profile_dir, config=None):
+        """Create a new profile directory given a full path.
+        Parameters
+        ----------
+        profile_dir : str
+            The full path to the profile directory.  If it does exist, it will
+            be used.  If not, it will be created.
+        """
+        return cls(location=profile_dir, config=config)
+    @classmethod
+    def create_profile_dir_by_name(cls, path, name=u'default', config=None):
+        """Create a profile dir by profile name and path.
+        Parameters
+        ----------
+        path : unicode
+            The path (directory) to put the profile directory in.
+        name : unicode
+            The name of the profile.  The name of the profile directory will
+            be "profile_<profile>".
+        """
+        if not os.path.isdir(path):
+            raise ProfileDirError('Directory not found: %s' % path)
+        profile_dir = os.path.join(path, u'profile_' + name)
+        return cls(location=profile_dir, config=config)
+    @classmethod
+    def find_profile_dir_by_name(cls, ipython_dir, name=u'default', config=None):
+        """Find an existing profile dir by profile name, return its ProfileDir.
+        This searches through a sequence of paths for a profile dir.  If it
+        is not found, a :class:`ProfileDirError` exception will be raised.
+        The search path algorithm is:
+        1. ``os.getcwd()`` # removed for security reason.
+        2. ``ipython_dir``
+        Parameters
+        ----------
+        ipython_dir : unicode or str
+            The IPython directory to use.
+        name : unicode or str
+            The name of the profile.  The name of the profile directory
+            will be "profile_<profile>".
+        """
+        dirname = u'profile_' + name
+        paths = [ipython_dir]
+        for p in paths:
+            profile_dir = os.path.join(p, dirname)
+            if os.path.isdir(profile_dir):
+                return cls(location=profile_dir, config=config)
+        else:
+            raise ProfileDirError('Profile directory not found in paths: %s' % dirname)
+    @classmethod
+    def find_profile_dir(cls, profile_dir, config=None):
+        """Find/create a profile dir and return its ProfileDir.
+        This will create the profile directory if it doesn't exist.
+        Parameters
+        ----------
+        profile_dir : unicode or str
+            The path of the profile directory.
+        """
+        profile_dir = expand_path(profile_dir)
+        if not os.path.isdir(profile_dir):
+            raise ProfileDirError('Profile directory not found: %s' % profile_dir)
+        return cls(location=profile_dir, config=config)

temp_venv/lib/python3.13/site-packages/IPython/core/release.py ADDED Viewed

	@@ -0,0 +1,45 @@

+# -*- coding: utf-8 -*-
+"""Release data for the IPython project."""
+#-----------------------------------------------------------------------------
+#  Copyright (c) 2008, IPython Development Team.
+#  Copyright (c) 2001, Fernando Perez <[email protected]>
+#  Copyright (c) 2001, Janko Hauser <[email protected]>
+#  Copyright (c) 2001, Nathaniel Gray <[email protected]>
+#
+#  Distributed under the terms of the Modified BSD License.
+#
+#  The full license is in the file COPYING.txt, distributed with this software.
+#-----------------------------------------------------------------------------
+# IPython version information.  An empty _version_extra corresponds to a full
+# release.  'dev' as a _version_extra string means this is a development
+# version
+_version_major = 9
+_version_minor = 2
+_version_patch = 0
+_version_extra = ".dev"
+# _version_extra = "b2"
+_version_extra = ""  # Uncomment this for full releases
+# Construct full version string from these.
+_ver = [_version_major, _version_minor, _version_patch]
+__version__ = '.'.join(map(str, _ver))
+if _version_extra:
+    __version__ = __version__  + _version_extra
+version = __version__  # backwards compatibility name
+version_info = (_version_major, _version_minor, _version_patch, _version_extra)
+license = "BSD-3-Clause"
+authors = {
+    "Fernando": ("Fernando Perez", "[email protected]"),
+    "M": ("M Bussonnier", "[email protected]"),
+}
+author = 'The IPython Development Team'
+author_email = '[email protected]'

temp_venv/lib/python3.13/site-packages/charset_normalizer-3.4.2.dist-info/licenses/LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 TAHRI Ahmed R.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

temp_venv/lib/python3.13/site-packages/executing/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (1.13 kB). View file

temp_venv/lib/python3.13/site-packages/executing/__pycache__/_exceptions.cpython-313.pyc ADDED Viewed

Binary file (1.21 kB). View file

temp_venv/lib/python3.13/site-packages/executing/__pycache__/_position_node_finder.cpython-313.pyc ADDED Viewed

Binary file (37 kB). View file

temp_venv/lib/python3.13/site-packages/executing/__pycache__/_pytest_utils.cpython-313.pyc ADDED Viewed

Binary file (756 Bytes). View file

temp_venv/lib/python3.13/site-packages/executing/__pycache__/executing.cpython-313.pyc ADDED Viewed

Binary file (49.6 kB). View file

temp_venv/lib/python3.13/site-packages/executing/__pycache__/version.cpython-313.pyc ADDED Viewed

Binary file (222 Bytes). View file

temp_venv/lib/python3.13/site-packages/fsspec-2025.3.2.dist-info/licenses/LICENSE ADDED Viewed

	@@ -0,0 +1,29 @@

+BSD 3-Clause License
+Copyright (c) 2018, Martin Durant
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

temp_venv/lib/python3.13/site-packages/fsspec/implementations/__init__.py ADDED Viewed

File without changes

temp_venv/lib/python3.13/site-packages/fsspec/implementations/arrow.py ADDED Viewed

	@@ -0,0 +1,304 @@

+import errno
+import io
+import os
+import secrets
+import shutil
+from contextlib import suppress
+from functools import cached_property, wraps
+from urllib.parse import parse_qs
+from fsspec.spec import AbstractFileSystem
+from fsspec.utils import (
+    get_package_version_without_import,
+    infer_storage_options,
+    mirror_from,
+    tokenize,
+)
+def wrap_exceptions(func):
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        try:
+            return func(*args, **kwargs)
+        except OSError as exception:
+            if not exception.args:
+                raise
+            message, *args = exception.args
+            if isinstance(message, str) and "does not exist" in message:
+                raise FileNotFoundError(errno.ENOENT, message) from exception
+            else:
+                raise
+    return wrapper
+PYARROW_VERSION = None
+class ArrowFSWrapper(AbstractFileSystem):
+    """FSSpec-compatible wrapper of pyarrow.fs.FileSystem.
+    Parameters
+    ----------
+    fs : pyarrow.fs.FileSystem
+    """
+    root_marker = "/"
+    def __init__(self, fs, **kwargs):
+        global PYARROW_VERSION
+        PYARROW_VERSION = get_package_version_without_import("pyarrow")
+        self.fs = fs
+        super().__init__(**kwargs)
+    @property
+    def protocol(self):
+        return self.fs.type_name
+    @cached_property
+    def fsid(self):
+        return "hdfs_" + tokenize(self.fs.host, self.fs.port)
+    @classmethod
+    def _strip_protocol(cls, path):
+        ops = infer_storage_options(path)
+        path = ops["path"]
+        if path.startswith("//"):
+            # special case for "hdfs://path" (without the triple slash)
+            path = path[1:]
+        return path
+    def ls(self, path, detail=False, **kwargs):
+        path = self._strip_protocol(path)
+        from pyarrow.fs import FileSelector
+        entries = [
+            self._make_entry(entry)
+            for entry in self.fs.get_file_info(FileSelector(path))
+        ]
+        if detail:
+            return entries
+        else:
+            return [entry["name"] for entry in entries]
+    def info(self, path, **kwargs):
+        path = self._strip_protocol(path)
+        [info] = self.fs.get_file_info([path])
+        return self._make_entry(info)
+    def exists(self, path):
+        path = self._strip_protocol(path)
+        try:
+            self.info(path)
+        except FileNotFoundError:
+            return False
+        else:
+            return True
+    def _make_entry(self, info):
+        from pyarrow.fs import FileType
+        if info.type is FileType.Directory:
+            kind = "directory"
+        elif info.type is FileType.File:
+            kind = "file"
+        elif info.type is FileType.NotFound:
+            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), info.path)
+        else:
+            kind = "other"
+        return {
+            "name": info.path,
+            "size": info.size,
+            "type": kind,
+            "mtime": info.mtime,
+        }
+    @wrap_exceptions
+    def cp_file(self, path1, path2, **kwargs):
+        path1 = self._strip_protocol(path1).rstrip("/")
+        path2 = self._strip_protocol(path2).rstrip("/")
+        with self._open(path1, "rb") as lstream:
+            tmp_fname = f"{path2}.tmp.{secrets.token_hex(6)}"
+            try:
+                with self.open(tmp_fname, "wb") as rstream:
+                    shutil.copyfileobj(lstream, rstream)
+                self.fs.move(tmp_fname, path2)
+            except BaseException:
+                with suppress(FileNotFoundError):
+                    self.fs.delete_file(tmp_fname)
+                raise
+    @wrap_exceptions
+    def mv(self, path1, path2, **kwargs):
+        path1 = self._strip_protocol(path1).rstrip("/")
+        path2 = self._strip_protocol(path2).rstrip("/")
+        self.fs.move(path1, path2)
+    @wrap_exceptions
+    def rm_file(self, path):
+        path = self._strip_protocol(path)
+        self.fs.delete_file(path)
+    @wrap_exceptions
+    def rm(self, path, recursive=False, maxdepth=None):
+        path = self._strip_protocol(path).rstrip("/")
+        if self.isdir(path):
+            if recursive:
+                self.fs.delete_dir(path)
+            else:
+                raise ValueError("Can't delete directories without recursive=False")
+        else:
+            self.fs.delete_file(path)
+    @wrap_exceptions
+    def _open(self, path, mode="rb", block_size=None, seekable=True, **kwargs):
+        if mode == "rb":
+            if seekable:
+                method = self.fs.open_input_file
+            else:
+                method = self.fs.open_input_stream
+        elif mode == "wb":
+            method = self.fs.open_output_stream
+        elif mode == "ab":
+            method = self.fs.open_append_stream
+        else:
+            raise ValueError(f"unsupported mode for Arrow filesystem: {mode!r}")
+        _kwargs = {}
+        if mode != "rb" or not seekable:
+            if int(PYARROW_VERSION.split(".")[0]) >= 4:
+                # disable compression auto-detection
+                _kwargs["compression"] = None
+        stream = method(path, **_kwargs)
+        return ArrowFile(self, stream, path, mode, block_size, **kwargs)
+    @wrap_exceptions
+    def mkdir(self, path, create_parents=True, **kwargs):
+        path = self._strip_protocol(path)
+        if create_parents:
+            self.makedirs(path, exist_ok=True)
+        else:
+            self.fs.create_dir(path, recursive=False)
+    @wrap_exceptions
+    def makedirs(self, path, exist_ok=False):
+        path = self._strip_protocol(path)
+        self.fs.create_dir(path, recursive=True)
+    @wrap_exceptions
+    def rmdir(self, path):
+        path = self._strip_protocol(path)
+        self.fs.delete_dir(path)
+    @wrap_exceptions
+    def modified(self, path):
+        path = self._strip_protocol(path)
+        return self.fs.get_file_info(path).mtime
+    def cat_file(self, path, start=None, end=None, **kwargs):
+        kwargs["seekable"] = start not in [None, 0]
+        return super().cat_file(path, start=None, end=None, **kwargs)
+    def get_file(self, rpath, lpath, **kwargs):
+        kwargs["seekable"] = False
+        super().get_file(rpath, lpath, **kwargs)
+@mirror_from(
+    "stream",
+    [
+        "read",
+        "seek",
+        "tell",
+        "write",
+        "readable",
+        "writable",
+        "close",
+        "size",
+        "seekable",
+    ],
+)
+class ArrowFile(io.IOBase):
+    def __init__(self, fs, stream, path, mode, block_size=None, **kwargs):
+        self.path = path
+        self.mode = mode
+        self.fs = fs
+        self.stream = stream
+        self.blocksize = self.block_size = block_size
+        self.kwargs = kwargs
+    def __enter__(self):
+        return self
+    def __exit__(self, *args):
+        return self.close()
+class HadoopFileSystem(ArrowFSWrapper):
+    """A wrapper on top of the pyarrow.fs.HadoopFileSystem
+    to connect it's interface with fsspec"""
+    protocol = "hdfs"
+    def __init__(
+        self,
+        host="default",
+        port=0,
+        user=None,
+        kerb_ticket=None,
+        replication=3,
+        extra_conf=None,
+        **kwargs,
+    ):
+        """
+        Parameters
+        ----------
+        host: str
+            Hostname, IP or "default" to try to read from Hadoop config
+        port: int
+            Port to connect on, or default from Hadoop config if 0
+        user: str or None
+            If given, connect as this username
+        kerb_ticket: str or None
+            If given, use this ticket for authentication
+        replication: int
+            set replication factor of file for write operations. default value is 3.
+        extra_conf: None or dict
+            Passed on to HadoopFileSystem
+        """
+        from pyarrow.fs import HadoopFileSystem
+        fs = HadoopFileSystem(
+            host=host,
+            port=port,
+            user=user,
+            kerb_ticket=kerb_ticket,
+            replication=replication,
+            extra_conf=extra_conf,
+        )
+        super().__init__(fs=fs, **kwargs)
+    @staticmethod
+    def _get_kwargs_from_urls(path):
+        ops = infer_storage_options(path)
+        out = {}
+        if ops.get("host", None):
+            out["host"] = ops["host"]
+        if ops.get("username", None):
+            out["user"] = ops["username"]
+        if ops.get("port", None):
+            out["port"] = ops["port"]
+        if ops.get("url_query", None):
+            queries = parse_qs(ops["url_query"])
+            if queries.get("replication", None):
+                out["replication"] = int(queries["replication"][0])
+        return out

temp_venv/lib/python3.13/site-packages/fsspec/implementations/asyn_wrapper.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import asyncio
+import functools
+import inspect
+from fsspec.asyn import AsyncFileSystem, running_async
+def async_wrapper(func, obj=None):
+    """
+    Wraps a synchronous function to make it awaitable.
+    Parameters
+    ----------
+    func : callable
+        The synchronous function to wrap.
+    obj : object, optional
+        The instance to bind the function to, if applicable.
+    Returns
+    -------
+    coroutine
+        An awaitable version of the function.
+    """
+    @functools.wraps(func)
+    async def wrapper(*args, **kwargs):
+        return await asyncio.to_thread(func, *args, **kwargs)
+    return wrapper
+class AsyncFileSystemWrapper(AsyncFileSystem):
+    """
+    A wrapper class to convert a synchronous filesystem into an asynchronous one.
+    This class takes an existing synchronous filesystem implementation and wraps all
+    its methods to provide an asynchronous interface.
+    Parameters
+    ----------
+    sync_fs : AbstractFileSystem
+        The synchronous filesystem instance to wrap.
+    """
+    protocol = "async_wrapper"
+    cachable = False
+    def __init__(self, fs, *args, asynchronous=None, **kwargs):
+        if asynchronous is None:
+            asynchronous = running_async()
+        super().__init__(*args, asynchronous=asynchronous, **kwargs)
+        self.sync_fs = fs
+        self.protocol = self.sync_fs.protocol
+        self._wrap_all_sync_methods()
+    @property
+    def fsid(self):
+        return f"async_{self.sync_fs.fsid}"
+    def _wrap_all_sync_methods(self):
+        """
+        Wrap all synchronous methods of the underlying filesystem with asynchronous versions.
+        """
+        excluded_methods = {"open"}
+        for method_name in dir(self.sync_fs):
+            if method_name.startswith("_") or method_name in excluded_methods:
+                continue
+            attr = inspect.getattr_static(self.sync_fs, method_name)
+            if isinstance(attr, property):
+                continue
+            method = getattr(self.sync_fs, method_name)
+            if callable(method) and not asyncio.iscoroutinefunction(method):
+                async_method = async_wrapper(method, obj=self)
+                setattr(self, f"_{method_name}", async_method)
+    @classmethod
+    def wrap_class(cls, sync_fs_class):
+        """
+        Create a new class that can be used to instantiate an AsyncFileSystemWrapper
+        with lazy instantiation of the underlying synchronous filesystem.
+        Parameters
+        ----------
+        sync_fs_class : type
+            The class of the synchronous filesystem to wrap.
+        Returns
+        -------
+        type
+            A new class that wraps the provided synchronous filesystem class.
+        """
+        class GeneratedAsyncFileSystemWrapper(cls):
+            def __init__(self, *args, **kwargs):
+                sync_fs = sync_fs_class(*args, **kwargs)
+                super().__init__(sync_fs)
+        GeneratedAsyncFileSystemWrapper.__name__ = (
+            f"Async{sync_fs_class.__name__}Wrapper"
+        )
+        return GeneratedAsyncFileSystemWrapper

temp_venv/lib/python3.13/site-packages/fsspec/implementations/cache_mapper.py ADDED Viewed

	@@ -0,0 +1,75 @@

+from __future__ import annotations
+import abc
+import hashlib
+from fsspec.implementations.local import make_path_posix
+class AbstractCacheMapper(abc.ABC):
+    """Abstract super-class for mappers from remote URLs to local cached
+    basenames.
+    """
+    @abc.abstractmethod
+    def __call__(self, path: str) -> str: ...
+    def __eq__(self, other: object) -> bool:
+        # Identity only depends on class. When derived classes have attributes
+        # they will need to be included.
+        return isinstance(other, type(self))
+    def __hash__(self) -> int:
+        # Identity only depends on class. When derived classes have attributes
+        # they will need to be included.
+        return hash(type(self))
+class BasenameCacheMapper(AbstractCacheMapper):
+    """Cache mapper that uses the basename of the remote URL and a fixed number
+    of directory levels above this.
+    The default is zero directory levels, meaning different paths with the same
+    basename will have the same cached basename.
+    """
+    def __init__(self, directory_levels: int = 0):
+        if directory_levels < 0:
+            raise ValueError(
+                "BasenameCacheMapper requires zero or positive directory_levels"
+            )
+        self.directory_levels = directory_levels
+        # Separator for directories when encoded as strings.
+        self._separator = "_@_"
+    def __call__(self, path: str) -> str:
+        path = make_path_posix(path)
+        prefix, *bits = path.rsplit("/", self.directory_levels + 1)
+        if bits:
+            return self._separator.join(bits)
+        else:
+            return prefix  # No separator found, simple filename
+    def __eq__(self, other: object) -> bool:
+        return super().__eq__(other) and self.directory_levels == other.directory_levels
+    def __hash__(self) -> int:
+        return super().__hash__() ^ hash(self.directory_levels)
+class HashCacheMapper(AbstractCacheMapper):
+    """Cache mapper that uses a hash of the remote URL."""
+    def __call__(self, path: str) -> str:
+        return hashlib.sha256(path.encode()).hexdigest()
+def create_cache_mapper(same_names: bool) -> AbstractCacheMapper:
+    """Factory method to create cache mapper for backward compatibility with
+    ``CachingFileSystem`` constructor using ``same_names`` kwarg.
+    """
+    if same_names:
+        return BasenameCacheMapper()
+    else:
+        return HashCacheMapper()

temp_venv/lib/python3.13/site-packages/fsspec/implementations/cache_metadata.py ADDED Viewed

	@@ -0,0 +1,232 @@

+from __future__ import annotations
+import os
+import pickle
+import time
+from typing import TYPE_CHECKING
+from fsspec.utils import atomic_write
+try:
+    import ujson as json
+except ImportError:
+    if not TYPE_CHECKING:
+        import json
+if TYPE_CHECKING:
+    from typing import Any, Dict, Iterator, Literal
+    from typing_extensions import TypeAlias
+    from .cached import CachingFileSystem
+    Detail: TypeAlias = Dict[str, Any]
+class CacheMetadata:
+    """Cache metadata.
+    All reading and writing of cache metadata is performed by this class,
+    accessing the cached files and blocks is not.
+    Metadata is stored in a single file per storage directory in JSON format.
+    For backward compatibility, also reads metadata stored in pickle format
+    which is converted to JSON when next saved.
+    """
+    def __init__(self, storage: list[str]):
+        """
+        Parameters
+        ----------
+        storage: list[str]
+            Directories containing cached files, must be at least one. Metadata
+            is stored in the last of these directories by convention.
+        """
+        if not storage:
+            raise ValueError("CacheMetadata expects at least one storage location")
+        self._storage = storage
+        self.cached_files: list[Detail] = [{}]
+        # Private attribute to force saving of metadata in pickle format rather than
+        # JSON for use in tests to confirm can read both pickle and JSON formats.
+        self._force_save_pickle = False
+    def _load(self, fn: str) -> Detail:
+        """Low-level function to load metadata from specific file"""
+        try:
+            with open(fn, "r") as f:
+                loaded = json.load(f)
+        except ValueError:
+            with open(fn, "rb") as f:
+                loaded = pickle.load(f)
+        for c in loaded.values():
+            if isinstance(c.get("blocks"), list):
+                c["blocks"] = set(c["blocks"])
+        return loaded
+    def _save(self, metadata_to_save: Detail, fn: str) -> None:
+        """Low-level function to save metadata to specific file"""
+        if self._force_save_pickle:
+            with atomic_write(fn) as f:
+                pickle.dump(metadata_to_save, f)
+        else:
+            with atomic_write(fn, mode="w") as f:
+                json.dump(metadata_to_save, f)
+    def _scan_locations(
+        self, writable_only: bool = False
+    ) -> Iterator[tuple[str, str, bool]]:
+        """Yield locations (filenames) where metadata is stored, and whether
+        writable or not.
+        Parameters
+        ----------
+        writable: bool
+            Set to True to only yield writable locations.
+        Returns
+        -------
+        Yields (str, str, bool)
+        """
+        n = len(self._storage)
+        for i, storage in enumerate(self._storage):
+            writable = i == n - 1
+            if writable_only and not writable:
+                continue
+            yield os.path.join(storage, "cache"), storage, writable
+    def check_file(
+        self, path: str, cfs: CachingFileSystem | None
+    ) -> Literal[False] | tuple[Detail, str]:
+        """If path is in cache return its details, otherwise return ``False``.
+        If the optional CachingFileSystem is specified then it is used to
+        perform extra checks to reject possible matches, such as if they are
+        too old.
+        """
+        for (fn, base, _), cache in zip(self._scan_locations(), self.cached_files):
+            if path not in cache:
+                continue
+            detail = cache[path].copy()
+            if cfs is not None:
+                if cfs.check_files and detail["uid"] != cfs.fs.ukey(path):
+                    # Wrong file as determined by hash of file properties
+                    continue
+                if cfs.expiry and time.time() - detail["time"] > cfs.expiry:
+                    # Cached file has expired
+                    continue
+            fn = os.path.join(base, detail["fn"])
+            if os.path.exists(fn):
+                return detail, fn
+        return False
+    def clear_expired(self, expiry_time: int) -> tuple[list[str], bool]:
+        """Remove expired metadata from the cache.
+        Returns names of files corresponding to expired metadata and a boolean
+        flag indicating whether the writable cache is empty. Caller is
+        responsible for deleting the expired files.
+        """
+        expired_files = []
+        for path, detail in self.cached_files[-1].copy().items():
+            if time.time() - detail["time"] > expiry_time:
+                fn = detail.get("fn", "")
+                if not fn:
+                    raise RuntimeError(
+                        f"Cache metadata does not contain 'fn' for {path}"
+                    )
+                fn = os.path.join(self._storage[-1], fn)
+                expired_files.append(fn)
+                self.cached_files[-1].pop(path)
+        if self.cached_files[-1]:
+            cache_path = os.path.join(self._storage[-1], "cache")
+            self._save(self.cached_files[-1], cache_path)
+        writable_cache_empty = not self.cached_files[-1]
+        return expired_files, writable_cache_empty
+    def load(self) -> None:
+        """Load all metadata from disk and store in ``self.cached_files``"""
+        cached_files = []
+        for fn, _, _ in self._scan_locations():
+            if os.path.exists(fn):
+                # TODO: consolidate blocks here
+                cached_files.append(self._load(fn))
+            else:
+                cached_files.append({})
+        self.cached_files = cached_files or [{}]
+    def on_close_cached_file(self, f: Any, path: str) -> None:
+        """Perform side-effect actions on closing a cached file.
+        The actual closing of the file is the responsibility of the caller.
+        """
+        # File must be writeble, so in self.cached_files[-1]
+        c = self.cached_files[-1][path]
+        if c["blocks"] is not True and len(c["blocks"]) * f.blocksize >= f.size:
+            c["blocks"] = True
+    def pop_file(self, path: str) -> str | None:
+        """Remove metadata of cached file.
+        If path is in the cache, return the filename of the cached file,
+        otherwise return ``None``.  Caller is responsible for deleting the
+        cached file.
+        """
+        details = self.check_file(path, None)
+        if not details:
+            return None
+        _, fn = details
+        if fn.startswith(self._storage[-1]):
+            self.cached_files[-1].pop(path)
+            self.save()
+        else:
+            raise PermissionError(
+                "Can only delete cached file in last, writable cache location"
+            )
+        return fn
+    def save(self) -> None:
+        """Save metadata to disk"""
+        for (fn, _, writable), cache in zip(self._scan_locations(), self.cached_files):
+            if not writable:
+                continue
+            if os.path.exists(fn):
+                cached_files = self._load(fn)
+                for k, c in cached_files.items():
+                    if k in cache:
+                        if c["blocks"] is True or cache[k]["blocks"] is True:
+                            c["blocks"] = True
+                        else:
+                            # self.cached_files[*][*]["blocks"] must continue to
+                            # point to the same set object so that updates
+                            # performed by MMapCache are propagated back to
+                            # self.cached_files.
+                            blocks = cache[k]["blocks"]
+                            blocks.update(c["blocks"])
+                            c["blocks"] = blocks
+                        c["time"] = max(c["time"], cache[k]["time"])
+                        c["uid"] = cache[k]["uid"]
+                # Files can be added to cache after it was written once
+                for k, c in cache.items():
+                    if k not in cached_files:
+                        cached_files[k] = c
+            else:
+                cached_files = cache
+            cache = {k: v.copy() for k, v in cached_files.items()}
+            for c in cache.values():
+                if isinstance(c["blocks"], set):
+                    c["blocks"] = list(c["blocks"])
+            self._save(cache, fn)
+            self.cached_files[-1] = cached_files
+    def update_file(self, path: str, detail: Detail) -> None:
+        """Update metadata for specific file in memory, do not save"""
+        self.cached_files[-1][path] = detail

temp_venv/lib/python3.13/site-packages/fsspec/implementations/cached.py ADDED Viewed

	@@ -0,0 +1,941 @@

+from __future__ import annotations
+import inspect
+import logging
+import os
+import tempfile
+import time
+import weakref
+from shutil import rmtree
+from typing import TYPE_CHECKING, Any, Callable, ClassVar
+from fsspec import AbstractFileSystem, filesystem
+from fsspec.callbacks import DEFAULT_CALLBACK
+from fsspec.compression import compr
+from fsspec.core import BaseCache, MMapCache
+from fsspec.exceptions import BlocksizeMismatchError
+from fsspec.implementations.cache_mapper import create_cache_mapper
+from fsspec.implementations.cache_metadata import CacheMetadata
+from fsspec.spec import AbstractBufferedFile
+from fsspec.transaction import Transaction
+from fsspec.utils import infer_compression
+if TYPE_CHECKING:
+    from fsspec.implementations.cache_mapper import AbstractCacheMapper
+logger = logging.getLogger("fsspec.cached")
+class WriteCachedTransaction(Transaction):
+    def complete(self, commit=True):
+        rpaths = [f.path for f in self.files]
+        lpaths = [f.fn for f in self.files]
+        if commit:
+            self.fs.put(lpaths, rpaths)
+        self.files.clear()
+        self.fs._intrans = False
+        self.fs._transaction = None
+        self.fs = None  # break cycle
+class CachingFileSystem(AbstractFileSystem):
+    """Locally caching filesystem, layer over any other FS
+    This class implements chunk-wise local storage of remote files, for quick
+    access after the initial download. The files are stored in a given
+    directory with hashes of URLs for the filenames. If no directory is given,
+    a temporary one is used, which should be cleaned up by the OS after the
+    process ends. The files themselves are sparse (as implemented in
+    :class:`~fsspec.caching.MMapCache`), so only the data which is accessed
+    takes up space.
+    Restrictions:
+    - the block-size must be the same for each access of a given file, unless
+      all blocks of the file have already been read
+    - caching can only be applied to file-systems which produce files
+      derived from fsspec.spec.AbstractBufferedFile ; LocalFileSystem is also
+      allowed, for testing
+    """
+    protocol: ClassVar[str | tuple[str, ...]] = ("blockcache", "cached")
+    def __init__(
+        self,
+        target_protocol=None,
+        cache_storage="TMP",
+        cache_check=10,
+        check_files=False,
+        expiry_time=604800,
+        target_options=None,
+        fs=None,
+        same_names: bool | None = None,
+        compression=None,
+        cache_mapper: AbstractCacheMapper | None = None,
+        **kwargs,
+    ):
+        """
+        Parameters
+        ----------
+        target_protocol: str (optional)
+            Target filesystem protocol. Provide either this or ``fs``.
+        cache_storage: str or list(str)
+            Location to store files. If "TMP", this is a temporary directory,
+            and will be cleaned up by the OS when this process ends (or later).
+            If a list, each location will be tried in the order given, but
+            only the last will be considered writable.
+        cache_check: int
+            Number of seconds between reload of cache metadata
+        check_files: bool
+            Whether to explicitly see if the UID of the remote file matches
+            the stored one before using. Warning: some file systems such as
+            HTTP cannot reliably give a unique hash of the contents of some
+            path, so be sure to set this option to False.
+        expiry_time: int
+            The time in seconds after which a local copy is considered useless.
+            Set to falsy to prevent expiry. The default is equivalent to one
+            week.
+        target_options: dict or None
+            Passed to the instantiation of the FS, if fs is None.
+        fs: filesystem instance
+            The target filesystem to run against. Provide this or ``protocol``.
+        same_names: bool (optional)
+            By default, target URLs are hashed using a ``HashCacheMapper`` so
+            that files from different backends with the same basename do not
+            conflict. If this argument is ``true``, a ``BasenameCacheMapper``
+            is used instead. Other cache mapper options are available by using
+            the ``cache_mapper`` keyword argument. Only one of this and
+            ``cache_mapper`` should be specified.
+        compression: str (optional)
+            To decompress on download. Can be 'infer' (guess from the URL name),
+            one of the entries in ``fsspec.compression.compr``, or None for no
+            decompression.
+        cache_mapper: AbstractCacheMapper (optional)
+            The object use to map from original filenames to cached filenames.
+            Only one of this and ``same_names`` should be specified.
+        """
+        super().__init__(**kwargs)
+        if fs is None and target_protocol is None:
+            raise ValueError(
+                "Please provide filesystem instance(fs) or target_protocol"
+            )
+        if not (fs is None) ^ (target_protocol is None):
+            raise ValueError(
+                "Both filesystems (fs) and target_protocol may not be both given."
+            )
+        if cache_storage == "TMP":
+            tempdir = tempfile.mkdtemp()
+            storage = [tempdir]
+            weakref.finalize(self, self._remove_tempdir, tempdir)
+        else:
+            if isinstance(cache_storage, str):
+                storage = [cache_storage]
+            else:
+                storage = cache_storage
+        os.makedirs(storage[-1], exist_ok=True)
+        self.storage = storage
+        self.kwargs = target_options or {}
+        self.cache_check = cache_check
+        self.check_files = check_files
+        self.expiry = expiry_time
+        self.compression = compression
+        # Size of cache in bytes. If None then the size is unknown and will be
+        # recalculated the next time cache_size() is called. On writes to the
+        # cache this is reset to None.
+        self._cache_size = None
+        if same_names is not None and cache_mapper is not None:
+            raise ValueError(
+                "Cannot specify both same_names and cache_mapper in "
+                "CachingFileSystem.__init__"
+            )
+        if cache_mapper is not None:
+            self._mapper = cache_mapper
+        else:
+            self._mapper = create_cache_mapper(
+                same_names if same_names is not None else False
+            )
+        self.target_protocol = (
+            target_protocol
+            if isinstance(target_protocol, str)
+            else (fs.protocol if isinstance(fs.protocol, str) else fs.protocol[0])
+        )
+        self._metadata = CacheMetadata(self.storage)
+        self.load_cache()
+        self.fs = fs if fs is not None else filesystem(target_protocol, **self.kwargs)
+        def _strip_protocol(path):
+            # acts as a method, since each instance has a difference target
+            return self.fs._strip_protocol(type(self)._strip_protocol(path))
+        self._strip_protocol: Callable = _strip_protocol
+    @staticmethod
+    def _remove_tempdir(tempdir):
+        try:
+            rmtree(tempdir)
+        except Exception:
+            pass
+    def _mkcache(self):
+        os.makedirs(self.storage[-1], exist_ok=True)
+    def cache_size(self):
+        """Return size of cache in bytes.
+        If more than one cache directory is in use, only the size of the last
+        one (the writable cache directory) is returned.
+        """
+        if self._cache_size is None:
+            cache_dir = self.storage[-1]
+            self._cache_size = filesystem("file").du(cache_dir, withdirs=True)
+        return self._cache_size
+    def load_cache(self):
+        """Read set of stored blocks from file"""
+        self._metadata.load()
+        self._mkcache()
+        self.last_cache = time.time()
+    def save_cache(self):
+        """Save set of stored blocks from file"""
+        self._mkcache()
+        self._metadata.save()
+        self.last_cache = time.time()
+        self._cache_size = None
+    def _check_cache(self):
+        """Reload caches if time elapsed or any disappeared"""
+        self._mkcache()
+        if not self.cache_check:
+            # explicitly told not to bother checking
+            return
+        timecond = time.time() - self.last_cache > self.cache_check
+        existcond = all(os.path.exists(storage) for storage in self.storage)
+        if timecond or not existcond:
+            self.load_cache()
+    def _check_file(self, path):
+        """Is path in cache and still valid"""
+        path = self._strip_protocol(path)
+        self._check_cache()
+        return self._metadata.check_file(path, self)
+    def clear_cache(self):
+        """Remove all files and metadata from the cache
+        In the case of multiple cache locations, this clears only the last one,
+        which is assumed to be the read/write one.
+        """
+        rmtree(self.storage[-1])
+        self.load_cache()
+        self._cache_size = None
+    def clear_expired_cache(self, expiry_time=None):
+        """Remove all expired files and metadata from the cache
+        In the case of multiple cache locations, this clears only the last one,
+        which is assumed to be the read/write one.
+        Parameters
+        ----------
+        expiry_time: int
+            The time in seconds after which a local copy is considered useless.
+            If not defined the default is equivalent to the attribute from the
+            file caching instantiation.
+        """
+        if not expiry_time:
+            expiry_time = self.expiry
+        self._check_cache()
+        expired_files, writable_cache_empty = self._metadata.clear_expired(expiry_time)
+        for fn in expired_files:
+            if os.path.exists(fn):
+                os.remove(fn)
+        if writable_cache_empty:
+            rmtree(self.storage[-1])
+            self.load_cache()
+        self._cache_size = None
+    def pop_from_cache(self, path):
+        """Remove cached version of given file
+        Deletes local copy of the given (remote) path. If it is found in a cache
+        location which is not the last, it is assumed to be read-only, and
+        raises PermissionError
+        """
+        path = self._strip_protocol(path)
+        fn = self._metadata.pop_file(path)
+        if fn is not None:
+            os.remove(fn)
+        self._cache_size = None
+    def _open(
+        self,
+        path,
+        mode="rb",
+        block_size=None,
+        autocommit=True,
+        cache_options=None,
+        **kwargs,
+    ):
+        """Wrap the target _open
+        If the whole file exists in the cache, just open it locally and
+        return that.
+        Otherwise, open the file on the target FS, and make it have a mmap
+        cache pointing to the location which we determine, in our cache.
+        The ``blocks`` instance is shared, so as the mmap cache instance
+        updates, so does the entry in our ``cached_files`` attribute.
+        We monkey-patch this file, so that when it closes, we call
+        ``close_and_update`` to save the state of the blocks.
+        """
+        path = self._strip_protocol(path)
+        path = self.fs._strip_protocol(path)
+        if "r" not in mode:
+            return self.fs._open(
+                path,
+                mode=mode,
+                block_size=block_size,
+                autocommit=autocommit,
+                cache_options=cache_options,
+                **kwargs,
+            )
+        detail = self._check_file(path)
+        if detail:
+            # file is in cache
+            detail, fn = detail
+            hash, blocks = detail["fn"], detail["blocks"]
+            if blocks is True:
+                # stored file is complete
+                logger.debug("Opening local copy of %s", path)
+                return open(fn, mode)
+            # TODO: action where partial file exists in read-only cache
+            logger.debug("Opening partially cached copy of %s", path)
+        else:
+            hash = self._mapper(path)
+            fn = os.path.join(self.storage[-1], hash)
+            blocks = set()
+            detail = {
+                "original": path,
+                "fn": hash,
+                "blocks": blocks,
+                "time": time.time(),
+                "uid": self.fs.ukey(path),
+            }
+            self._metadata.update_file(path, detail)
+            logger.debug("Creating local sparse file for %s", path)
+        # call target filesystems open
+        self._mkcache()
+        f = self.fs._open(
+            path,
+            mode=mode,
+            block_size=block_size,
+            autocommit=autocommit,
+            cache_options=cache_options,
+            cache_type="none",
+            **kwargs,
+        )
+        if self.compression:
+            comp = (
+                infer_compression(path)
+                if self.compression == "infer"
+                else self.compression
+            )
+            f = compr[comp](f, mode="rb")
+        if "blocksize" in detail:
+            if detail["blocksize"] != f.blocksize:
+                raise BlocksizeMismatchError(
+                    f"Cached file must be reopened with same block"
+                    f" size as original (old: {detail['blocksize']},"
+                    f" new {f.blocksize})"
+                )
+        else:
+            detail["blocksize"] = f.blocksize
+        def _fetch_ranges(ranges):
+            return self.fs.cat_ranges(
+                [path] * len(ranges),
+                [r[0] for r in ranges],
+                [r[1] for r in ranges],
+                **kwargs,
+            )
+        multi_fetcher = None if self.compression else _fetch_ranges
+        f.cache = MMapCache(
+            f.blocksize, f._fetch_range, f.size, fn, blocks, multi_fetcher=multi_fetcher
+        )
+        close = f.close
+        f.close = lambda: self.close_and_update(f, close)
+        self.save_cache()
+        return f
+    def _parent(self, path):
+        return self.fs._parent(path)
+    def hash_name(self, path: str, *args: Any) -> str:
+        # Kept for backward compatibility with downstream libraries.
+        # Ignores extra arguments, previously same_name boolean.
+        return self._mapper(path)
+    def close_and_update(self, f, close):
+        """Called when a file is closing, so store the set of blocks"""
+        if f.closed:
+            return
+        path = self._strip_protocol(f.path)
+        self._metadata.on_close_cached_file(f, path)
+        try:
+            logger.debug("going to save")
+            self.save_cache()
+            logger.debug("saved")
+        except OSError:
+            logger.debug("Cache saving failed while closing file")
+        except NameError:
+            logger.debug("Cache save failed due to interpreter shutdown")
+        close()
+        f.closed = True
+    def ls(self, path, detail=True):
+        return self.fs.ls(path, detail)
+    def __getattribute__(self, item):
+        if item in {
+            "load_cache",
+            "_open",
+            "save_cache",
+            "close_and_update",
+            "__init__",
+            "__getattribute__",
+            "__reduce__",
+            "_make_local_details",
+            "open",
+            "cat",
+            "cat_file",
+            "cat_ranges",
+            "get",
+            "read_block",
+            "tail",
+            "head",
+            "info",
+            "ls",
+            "exists",
+            "isfile",
+            "isdir",
+            "_check_file",
+            "_check_cache",
+            "_mkcache",
+            "clear_cache",
+            "clear_expired_cache",
+            "pop_from_cache",
+            "local_file",
+            "_paths_from_path",
+            "get_mapper",
+            "open_many",
+            "commit_many",
+            "hash_name",
+            "__hash__",
+            "__eq__",
+            "to_json",
+            "to_dict",
+            "cache_size",
+            "pipe_file",
+            "pipe",
+            "start_transaction",
+            "end_transaction",
+        }:
+            # all the methods defined in this class. Note `open` here, since
+            # it calls `_open`, but is actually in superclass
+            return lambda *args, **kw: getattr(type(self), item).__get__(self)(
+                *args, **kw
+            )
+        if item in ["__reduce_ex__"]:
+            raise AttributeError
+        if item in ["transaction"]:
+            # property
+            return type(self).transaction.__get__(self)
+        if item in ["_cache", "transaction_type"]:
+            # class attributes
+            return getattr(type(self), item)
+        if item == "__class__":
+            return type(self)
+        d = object.__getattribute__(self, "__dict__")
+        fs = d.get("fs", None)  # fs is not immediately defined
+        if item in d:
+            return d[item]
+        elif fs is not None:
+            if item in fs.__dict__:
+                # attribute of instance
+                return fs.__dict__[item]
+            # attributed belonging to the target filesystem
+            cls = type(fs)
+            m = getattr(cls, item)
+            if (inspect.isfunction(m) or inspect.isdatadescriptor(m)) and (
+                not hasattr(m, "__self__") or m.__self__ is None
+            ):
+                # instance method
+                return m.__get__(fs, cls)
+            return m  # class method or attribute
+        else:
+            # attributes of the superclass, while target is being set up
+            return super().__getattribute__(item)
+    def __eq__(self, other):
+        """Test for equality."""
+        if self is other:
+            return True
+        if not isinstance(other, type(self)):
+            return False
+        return (
+            self.storage == other.storage
+            and self.kwargs == other.kwargs
+            and self.cache_check == other.cache_check
+            and self.check_files == other.check_files
+            and self.expiry == other.expiry
+            and self.compression == other.compression
+            and self._mapper == other._mapper
+            and self.target_protocol == other.target_protocol
+        )
+    def __hash__(self):
+        """Calculate hash."""
+        return (
+            hash(tuple(self.storage))
+            ^ hash(str(self.kwargs))
+            ^ hash(self.cache_check)
+            ^ hash(self.check_files)
+            ^ hash(self.expiry)
+            ^ hash(self.compression)
+            ^ hash(self._mapper)
+            ^ hash(self.target_protocol)
+        )
+class WholeFileCacheFileSystem(CachingFileSystem):
+    """Caches whole remote files on first access
+    This class is intended as a layer over any other file system, and
+    will make a local copy of each file accessed, so that all subsequent
+    reads are local. This is similar to ``CachingFileSystem``, but without
+    the block-wise functionality and so can work even when sparse files
+    are not allowed. See its docstring for definition of the init
+    arguments.
+    The class still needs access to the remote store for listing files,
+    and may refresh cached files.
+    """
+    protocol = "filecache"
+    local_file = True
+    def open_many(self, open_files, **kwargs):
+        paths = [of.path for of in open_files]
+        if "r" in open_files.mode:
+            self._mkcache()
+        else:
+            return [
+                LocalTempFile(
+                    self.fs,
+                    path,
+                    mode=open_files.mode,
+                    fn=os.path.join(self.storage[-1], self._mapper(path)),
+                    **kwargs,
+                )
+                for path in paths
+            ]
+        if self.compression:
+            raise NotImplementedError
+        details = [self._check_file(sp) for sp in paths]
+        downpath = [p for p, d in zip(paths, details) if not d]
+        downfn0 = [
+            os.path.join(self.storage[-1], self._mapper(p))
+            for p, d in zip(paths, details)
+        ]  # keep these path names for opening later
+        downfn = [fn for fn, d in zip(downfn0, details) if not d]
+        if downpath:
+            # skip if all files are already cached and up to date
+            self.fs.get(downpath, downfn)
+            # update metadata - only happens when downloads are successful
+            newdetail = [
+                {
+                    "original": path,
+                    "fn": self._mapper(path),
+                    "blocks": True,
+                    "time": time.time(),
+                    "uid": self.fs.ukey(path),
+                }
+                for path in downpath
+            ]
+            for path, detail in zip(downpath, newdetail):
+                self._metadata.update_file(path, detail)
+            self.save_cache()
+        def firstpart(fn):
+            # helper to adapt both whole-file and simple-cache
+            return fn[1] if isinstance(fn, tuple) else fn
+        return [
+            open(firstpart(fn0) if fn0 else fn1, mode=open_files.mode)
+            for fn0, fn1 in zip(details, downfn0)
+        ]
+    def commit_many(self, open_files):
+        self.fs.put([f.fn for f in open_files], [f.path for f in open_files])
+        [f.close() for f in open_files]
+        for f in open_files:
+            # in case autocommit is off, and so close did not already delete
+            try:
+                os.remove(f.name)
+            except FileNotFoundError:
+                pass
+        self._cache_size = None
+    def _make_local_details(self, path):
+        hash = self._mapper(path)
+        fn = os.path.join(self.storage[-1], hash)
+        detail = {
+            "original": path,
+            "fn": hash,
+            "blocks": True,
+            "time": time.time(),
+            "uid": self.fs.ukey(path),
+        }
+        self._metadata.update_file(path, detail)
+        logger.debug("Copying %s to local cache", path)
+        return fn
+    def cat(
+        self,
+        path,
+        recursive=False,
+        on_error="raise",
+        callback=DEFAULT_CALLBACK,
+        **kwargs,
+    ):
+        paths = self.expand_path(
+            path, recursive=recursive, maxdepth=kwargs.get("maxdepth")
+        )
+        getpaths = []
+        storepaths = []
+        fns = []
+        out = {}
+        for p in paths.copy():
+            try:
+                detail = self._check_file(p)
+                if not detail:
+                    fn = self._make_local_details(p)
+                    getpaths.append(p)
+                    storepaths.append(fn)
+                else:
+                    detail, fn = detail if isinstance(detail, tuple) else (None, detail)
+                fns.append(fn)
+            except Exception as e:
+                if on_error == "raise":
+                    raise
+                if on_error == "return":
+                    out[p] = e
+                paths.remove(p)
+        if getpaths:
+            self.fs.get(getpaths, storepaths)
+            self.save_cache()
+        callback.set_size(len(paths))
+        for p, fn in zip(paths, fns):
+            with open(fn, "rb") as f:
+                out[p] = f.read()
+            callback.relative_update(1)
+        if isinstance(path, str) and len(paths) == 1 and recursive is False:
+            out = out[paths[0]]
+        return out
+    def _open(self, path, mode="rb", **kwargs):
+        path = self._strip_protocol(path)
+        if "r" not in mode:
+            hash = self._mapper(path)
+            fn = os.path.join(self.storage[-1], hash)
+            user_specified_kwargs = {
+                k: v
+                for k, v in kwargs.items()
+                # those kwargs were added by open(), we don't want them
+                if k not in ["autocommit", "block_size", "cache_options"]
+            }
+            return LocalTempFile(self, path, mode=mode, fn=fn, **user_specified_kwargs)
+        detail = self._check_file(path)
+        if detail:
+            detail, fn = detail
+            _, blocks = detail["fn"], detail["blocks"]
+            if blocks is True:
+                logger.debug("Opening local copy of %s", path)
+                # In order to support downstream filesystems to be able to
+                # infer the compression from the original filename, like
+                # the `TarFileSystem`, let's extend the `io.BufferedReader`
+                # fileobject protocol by adding a dedicated attribute
+                # `original`.
+                f = open(fn, mode)
+                f.original = detail.get("original")
+                return f
+            else:
+                raise ValueError(
+                    f"Attempt to open partially cached file {path}"
+                    f" as a wholly cached file"
+                )
+        else:
+            fn = self._make_local_details(path)
+        kwargs["mode"] = mode
+        # call target filesystems open
+        self._mkcache()
+        if self.compression:
+            with self.fs._open(path, **kwargs) as f, open(fn, "wb") as f2:
+                if isinstance(f, AbstractBufferedFile):
+                    # want no type of caching if just downloading whole thing
+                    f.cache = BaseCache(0, f.cache.fetcher, f.size)
+                comp = (
+                    infer_compression(path)
+                    if self.compression == "infer"
+                    else self.compression
+                )
+                f = compr[comp](f, mode="rb")
+                data = True
+                while data:
+                    block = getattr(f, "blocksize", 5 * 2**20)
+                    data = f.read(block)
+                    f2.write(data)
+        else:
+            self.fs.get_file(path, fn)
+        self.save_cache()
+        return self._open(path, mode)
+class SimpleCacheFileSystem(WholeFileCacheFileSystem):
+    """Caches whole remote files on first access
+    This class is intended as a layer over any other file system, and
+    will make a local copy of each file accessed, so that all subsequent
+    reads are local. This implementation only copies whole files, and
+    does not keep any metadata about the download time or file details.
+    It is therefore safer to use in multi-threaded/concurrent situations.
+    This is the only of the caching filesystems that supports write: you will
+    be given a real local open file, and upon close and commit, it will be
+    uploaded to the target filesystem; the writability or the target URL is
+    not checked until that time.
+    """
+    protocol = "simplecache"
+    local_file = True
+    transaction_type = WriteCachedTransaction
+    def __init__(self, **kwargs):
+        kw = kwargs.copy()
+        for key in ["cache_check", "expiry_time", "check_files"]:
+            kw[key] = False
+        super().__init__(**kw)
+        for storage in self.storage:
+            if not os.path.exists(storage):
+                os.makedirs(storage, exist_ok=True)
+    def _check_file(self, path):
+        self._check_cache()
+        sha = self._mapper(path)
+        for storage in self.storage:
+            fn = os.path.join(storage, sha)
+            if os.path.exists(fn):
+                return fn
+    def save_cache(self):
+        pass
+    def load_cache(self):
+        pass
+    def pipe_file(self, path, value=None, **kwargs):
+        if self._intrans:
+            with self.open(path, "wb") as f:
+                f.write(value)
+        else:
+            super().pipe_file(path, value)
+    def ls(self, path, detail=True, **kwargs):
+        path = self._strip_protocol(path)
+        details = []
+        try:
+            details = self.fs.ls(
+                path, detail=True, **kwargs
+            ).copy()  # don't edit original!
+        except FileNotFoundError as e:
+            ex = e
+        else:
+            ex = None
+        if self._intrans:
+            path1 = path.rstrip("/") + "/"
+            for f in self.transaction.files:
+                if f.path == path:
+                    details.append(
+                        {"name": path, "size": f.size or f.tell(), "type": "file"}
+                    )
+                elif f.path.startswith(path1):
+                    if f.path.count("/") == path1.count("/"):
+                        details.append(
+                            {"name": f.path, "size": f.size or f.tell(), "type": "file"}
+                        )
+                    else:
+                        dname = "/".join(f.path.split("/")[: path1.count("/") + 1])
+                        details.append({"name": dname, "size": 0, "type": "directory"})
+        if ex is not None and not details:
+            raise ex
+        if detail:
+            return details
+        return sorted(_["name"] for _ in details)
+    def info(self, path, **kwargs):
+        path = self._strip_protocol(path)
+        if self._intrans:
+            f = [_ for _ in self.transaction.files if _.path == path]
+            if f:
+                size = os.path.getsize(f[0].fn) if f[0].closed else f[0].tell()
+                return {"name": path, "size": size, "type": "file"}
+            f = any(_.path.startswith(path + "/") for _ in self.transaction.files)
+            if f:
+                return {"name": path, "size": 0, "type": "directory"}
+        return self.fs.info(path, **kwargs)
+    def pipe(self, path, value=None, **kwargs):
+        if isinstance(path, str):
+            self.pipe_file(self._strip_protocol(path), value, **kwargs)
+        elif isinstance(path, dict):
+            for k, v in path.items():
+                self.pipe_file(self._strip_protocol(k), v, **kwargs)
+        else:
+            raise ValueError("path must be str or dict")
+    def cat_ranges(
+        self, paths, starts, ends, max_gap=None, on_error="return", **kwargs
+    ):
+        lpaths = [self._check_file(p) for p in paths]
+        rpaths = [p for l, p in zip(lpaths, paths) if l is False]
+        lpaths = [l for l, p in zip(lpaths, paths) if l is False]
+        self.fs.get(rpaths, lpaths)
+        return super().cat_ranges(
+            paths, starts, ends, max_gap=max_gap, on_error=on_error, **kwargs
+        )
+    def _open(self, path, mode="rb", **kwargs):
+        path = self._strip_protocol(path)
+        sha = self._mapper(path)
+        if "r" not in mode:
+            fn = os.path.join(self.storage[-1], sha)
+            user_specified_kwargs = {
+                k: v
+                for k, v in kwargs.items()
+                if k not in ["autocommit", "block_size", "cache_options"]
+            }  # those were added by open()
+            return LocalTempFile(
+                self,
+                path,
+                mode=mode,
+                autocommit=not self._intrans,
+                fn=fn,
+                **user_specified_kwargs,
+            )
+        fn = self._check_file(path)
+        if fn:
+            return open(fn, mode)
+        fn = os.path.join(self.storage[-1], sha)
+        logger.debug("Copying %s to local cache", path)
+        kwargs["mode"] = mode
+        self._mkcache()
+        self._cache_size = None
+        if self.compression:
+            with self.fs._open(path, **kwargs) as f, open(fn, "wb") as f2:
+                if isinstance(f, AbstractBufferedFile):
+                    # want no type of caching if just downloading whole thing
+                    f.cache = BaseCache(0, f.cache.fetcher, f.size)
+                comp = (
+                    infer_compression(path)
+                    if self.compression == "infer"
+                    else self.compression
+                )
+                f = compr[comp](f, mode="rb")
+                data = True
+                while data:
+                    block = getattr(f, "blocksize", 5 * 2**20)
+                    data = f.read(block)
+                    f2.write(data)
+        else:
+            self.fs.get_file(path, fn)
+        return self._open(path, mode)
+class LocalTempFile:
+    """A temporary local file, which will be uploaded on commit"""
+    def __init__(self, fs, path, fn, mode="wb", autocommit=True, seek=0, **kwargs):
+        self.fn = fn
+        self.fh = open(fn, mode)
+        self.mode = mode
+        if seek:
+            self.fh.seek(seek)
+        self.path = path
+        self.size = None
+        self.fs = fs
+        self.closed = False
+        self.autocommit = autocommit
+        self.kwargs = kwargs
+    def __reduce__(self):
+        # always open in r+b to allow continuing writing at a location
+        return (
+            LocalTempFile,
+            (self.fs, self.path, self.fn, "r+b", self.autocommit, self.tell()),
+        )
+    def __enter__(self):
+        return self.fh
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
+    def close(self):
+        # self.size = self.fh.tell()
+        if self.closed:
+            return
+        self.fh.close()
+        self.closed = True
+        if self.autocommit:
+            self.commit()
+    def discard(self):
+        self.fh.close()
+        os.remove(self.fn)
+    def commit(self):
+        self.fs.put(self.fn, self.path, **self.kwargs)
+        # we do not delete local copy - it's still in the cache
+    @property
+    def name(self):
+        return self.fn
+    def __repr__(self) -> str:
+        return f"LocalTempFile: {self.path}"
+    def __getattr__(self, item):
+        return getattr(self.fh, item)

temp_venv/lib/python3.13/site-packages/fsspec/implementations/dask.py ADDED Viewed

	@@ -0,0 +1,152 @@

+import dask
+from distributed.client import Client, _get_global_client
+from distributed.worker import Worker
+from fsspec import filesystem
+from fsspec.spec import AbstractBufferedFile, AbstractFileSystem
+from fsspec.utils import infer_storage_options
+def _get_client(client):
+    if client is None:
+        return _get_global_client()
+    elif isinstance(client, Client):
+        return client
+    else:
+        # e.g., connection string
+        return Client(client)
+def _in_worker():
+    return bool(Worker._instances)
+class DaskWorkerFileSystem(AbstractFileSystem):
+    """View files accessible to a worker as any other remote file-system
+    When instances are run on the worker, uses the real filesystem. When
+    run on the client, they call the worker to provide information or data.
+    **Warning** this implementation is experimental, and read-only for now.
+    """
+    def __init__(
+        self, target_protocol=None, target_options=None, fs=None, client=None, **kwargs
+    ):
+        super().__init__(**kwargs)
+        if not (fs is None) ^ (target_protocol is None):
+            raise ValueError(
+                "Please provide one of filesystem instance (fs) or"
+                " target_protocol, not both"
+            )
+        self.target_protocol = target_protocol
+        self.target_options = target_options
+        self.worker = None
+        self.client = client
+        self.fs = fs
+        self._determine_worker()
+    @staticmethod
+    def _get_kwargs_from_urls(path):
+        so = infer_storage_options(path)
+        if "host" in so and "port" in so:
+            return {"client": f"{so['host']}:{so['port']}"}
+        else:
+            return {}
+    def _determine_worker(self):
+        if _in_worker():
+            self.worker = True
+            if self.fs is None:
+                self.fs = filesystem(
+                    self.target_protocol, **(self.target_options or {})
+                )
+        else:
+            self.worker = False
+            self.client = _get_client(self.client)
+            self.rfs = dask.delayed(self)
+    def mkdir(self, *args, **kwargs):
+        if self.worker:
+            self.fs.mkdir(*args, **kwargs)
+        else:
+            self.rfs.mkdir(*args, **kwargs).compute()
+    def rm(self, *args, **kwargs):
+        if self.worker:
+            self.fs.rm(*args, **kwargs)
+        else:
+            self.rfs.rm(*args, **kwargs).compute()
+    def copy(self, *args, **kwargs):
+        if self.worker:
+            self.fs.copy(*args, **kwargs)
+        else:
+            self.rfs.copy(*args, **kwargs).compute()
+    def mv(self, *args, **kwargs):
+        if self.worker:
+            self.fs.mv(*args, **kwargs)
+        else:
+            self.rfs.mv(*args, **kwargs).compute()
+    def ls(self, *args, **kwargs):
+        if self.worker:
+            return self.fs.ls(*args, **kwargs)
+        else:
+            return self.rfs.ls(*args, **kwargs).compute()
+    def _open(
+        self,
+        path,
+        mode="rb",
+        block_size=None,
+        autocommit=True,
+        cache_options=None,
+        **kwargs,
+    ):
+        if self.worker:
+            return self.fs._open(
+                path,
+                mode=mode,
+                block_size=block_size,
+                autocommit=autocommit,
+                cache_options=cache_options,
+                **kwargs,
+            )
+        else:
+            return DaskFile(
+                fs=self,
+                path=path,
+                mode=mode,
+                block_size=block_size,
+                autocommit=autocommit,
+                cache_options=cache_options,
+                **kwargs,
+            )
+    def fetch_range(self, path, mode, start, end):
+        if self.worker:
+            with self._open(path, mode) as f:
+                f.seek(start)
+                return f.read(end - start)
+        else:
+            return self.rfs.fetch_range(path, mode, start, end).compute()
+class DaskFile(AbstractBufferedFile):
+    def __init__(self, mode="rb", **kwargs):
+        if mode != "rb":
+            raise ValueError('Remote dask files can only be opened in "rb" mode')
+        super().__init__(**kwargs)
+    def _upload_chunk(self, final=False):
+        pass
+    def _initiate_upload(self):
+        """Create remote file/upload"""
+        pass
+    def _fetch_range(self, start, end):
+        """Get the specified set of bytes from remote"""
+        return self.fs.fetch_range(self.path, self.mode, start, end)

temp_venv/lib/python3.13/site-packages/fsspec/implementations/data.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import base64
+import io
+from typing import Optional
+from urllib.parse import unquote
+from fsspec import AbstractFileSystem
+class DataFileSystem(AbstractFileSystem):
+    """A handy decoder for data-URLs
+    Example
+    -------
+    >>> with fsspec.open("data:,Hello%2C%20World%21") as f:
+    ...     print(f.read())
+    b"Hello, World!"
+    See https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs
+    """
+    protocol = "data"
+    def __init__(self, **kwargs):
+        """No parameters for this filesystem"""
+        super().__init__(**kwargs)
+    def cat_file(self, path, start=None, end=None, **kwargs):
+        pref, data = path.split(",", 1)
+        if pref.endswith("base64"):
+            return base64.b64decode(data)[start:end]
+        return unquote(data).encode()[start:end]
+    def info(self, path, **kwargs):
+        pref, name = path.split(",", 1)
+        data = self.cat_file(path)
+        mime = pref.split(":", 1)[1].split(";", 1)[0]
+        return {"name": name, "size": len(data), "type": "file", "mimetype": mime}
+    def _open(
+        self,
+        path,
+        mode="rb",
+        block_size=None,
+        autocommit=True,
+        cache_options=None,
+        **kwargs,
+    ):
+        if "r" not in mode:
+            raise ValueError("Read only filesystem")
+        return io.BytesIO(self.cat_file(path))
+    @staticmethod
+    def encode(data: bytes, mime: Optional[str] = None):
+        """Format the given data into data-URL syntax
+        This version always base64 encodes, even when the data is ascii/url-safe.
+        """
+        return f"data:{mime or ''};base64,{base64.b64encode(data).decode()}"

temp_venv/lib/python3.13/site-packages/fsspec/implementations/dbfs.py ADDED Viewed

	@@ -0,0 +1,467 @@

+import base64
+import urllib
+import requests
+import requests.exceptions
+from requests.adapters import HTTPAdapter, Retry
+from fsspec import AbstractFileSystem
+from fsspec.spec import AbstractBufferedFile
+class DatabricksException(Exception):
+    """
+    Helper class for exceptions raised in this module.
+    """
+    def __init__(self, error_code, message):
+        """Create a new DatabricksException"""
+        super().__init__(message)
+        self.error_code = error_code
+        self.message = message
+class DatabricksFileSystem(AbstractFileSystem):
+    """
+    Get access to the Databricks filesystem implementation over HTTP.
+    Can be used inside and outside of a databricks cluster.
+    """
+    def __init__(self, instance, token, **kwargs):
+        """
+        Create a new DatabricksFileSystem.
+        Parameters
+        ----------
+        instance: str
+            The instance URL of the databricks cluster.
+            For example for an Azure databricks cluster, this
+            has the form adb-<some-number>.<two digits>.azuredatabricks.net.
+        token: str
+            Your personal token. Find out more
+            here: https://docs.databricks.com/dev-tools/api/latest/authentication.html
+        """
+        self.instance = instance
+        self.token = token
+        self.session = requests.Session()
+        self.retries = Retry(
+            total=10,
+            backoff_factor=0.05,
+            status_forcelist=[408, 429, 500, 502, 503, 504],
+        )
+        self.session.mount("https://", HTTPAdapter(max_retries=self.retries))
+        self.session.headers.update({"Authorization": f"Bearer {self.token}"})
+        super().__init__(**kwargs)
+    def ls(self, path, detail=True, **kwargs):
+        """
+        List the contents of the given path.
+        Parameters
+        ----------
+        path: str
+            Absolute path
+        detail: bool
+            Return not only the list of filenames,
+            but also additional information on file sizes
+            and types.
+        """
+        out = self._ls_from_cache(path)
+        if not out:
+            try:
+                r = self._send_to_api(
+                    method="get", endpoint="list", json={"path": path}
+                )
+            except DatabricksException as e:
+                if e.error_code == "RESOURCE_DOES_NOT_EXIST":
+                    raise FileNotFoundError(e.message) from e
+                raise
+            files = r["files"]
+            out = [
+                {
+                    "name": o["path"],
+                    "type": "directory" if o["is_dir"] else "file",
+                    "size": o["file_size"],
+                }
+                for o in files
+            ]
+            self.dircache[path] = out
+        if detail:
+            return out
+        return [o["name"] for o in out]
+    def makedirs(self, path, exist_ok=True):
+        """
+        Create a given absolute path and all of its parents.
+        Parameters
+        ----------
+        path: str
+            Absolute path to create
+        exist_ok: bool
+            If false, checks if the folder
+            exists before creating it (and raises an
+            Exception if this is the case)
+        """
+        if not exist_ok:
+            try:
+                # If the following succeeds, the path is already present
+                self._send_to_api(
+                    method="get", endpoint="get-status", json={"path": path}
+                )
+                raise FileExistsError(f"Path {path} already exists")
+            except DatabricksException as e:
+                if e.error_code == "RESOURCE_DOES_NOT_EXIST":
+                    pass
+        try:
+            self._send_to_api(method="post", endpoint="mkdirs", json={"path": path})
+        except DatabricksException as e:
+            if e.error_code == "RESOURCE_ALREADY_EXISTS":
+                raise FileExistsError(e.message) from e
+            raise
+        self.invalidate_cache(self._parent(path))
+    def mkdir(self, path, create_parents=True, **kwargs):
+        """
+        Create a given absolute path and all of its parents.
+        Parameters
+        ----------
+        path: str
+            Absolute path to create
+        create_parents: bool
+            Whether to create all parents or not.
+            "False" is not implemented so far.
+        """
+        if not create_parents:
+            raise NotImplementedError
+        self.mkdirs(path, **kwargs)
+    def rm(self, path, recursive=False, **kwargs):
+        """
+        Remove the file or folder at the given absolute path.
+        Parameters
+        ----------
+        path: str
+            Absolute path what to remove
+        recursive: bool
+            Recursively delete all files in a folder.
+        """
+        try:
+            self._send_to_api(
+                method="post",
+                endpoint="delete",
+                json={"path": path, "recursive": recursive},
+            )
+        except DatabricksException as e:
+            # This is not really an exception, it just means
+            # not everything was deleted so far
+            if e.error_code == "PARTIAL_DELETE":
+                self.rm(path=path, recursive=recursive)
+            elif e.error_code == "IO_ERROR":
+                # Using the same exception as the os module would use here
+                raise OSError(e.message) from e
+            raise
+        self.invalidate_cache(self._parent(path))
+    def mv(
+        self, source_path, destination_path, recursive=False, maxdepth=None, **kwargs
+    ):
+        """
+        Move a source to a destination path.
+        A note from the original [databricks API manual]
+        (https://docs.databricks.com/dev-tools/api/latest/dbfs.html#move).
+        When moving a large number of files the API call will time out after
+        approximately 60s, potentially resulting in partially moved data.
+        Therefore, for operations that move more than 10k files, we strongly
+        discourage using the DBFS REST API.
+        Parameters
+        ----------
+        source_path: str
+            From where to move (absolute path)
+        destination_path: str
+            To where to move (absolute path)
+        recursive: bool
+            Not implemented to far.
+        maxdepth:
+            Not implemented to far.
+        """
+        if recursive:
+            raise NotImplementedError
+        if maxdepth:
+            raise NotImplementedError
+        try:
+            self._send_to_api(
+                method="post",
+                endpoint="move",
+                json={"source_path": source_path, "destination_path": destination_path},
+            )
+        except DatabricksException as e:
+            if e.error_code == "RESOURCE_DOES_NOT_EXIST":
+                raise FileNotFoundError(e.message) from e
+            elif e.error_code == "RESOURCE_ALREADY_EXISTS":
+                raise FileExistsError(e.message) from e
+            raise
+        self.invalidate_cache(self._parent(source_path))
+        self.invalidate_cache(self._parent(destination_path))
+    def _open(self, path, mode="rb", block_size="default", **kwargs):
+        """
+        Overwrite the base class method to make sure to create a DBFile.
+        All arguments are copied from the base method.
+        Only the default blocksize is allowed.
+        """
+        return DatabricksFile(self, path, mode=mode, block_size=block_size, **kwargs)
+    def _send_to_api(self, method, endpoint, json):
+        """
+        Send the given json to the DBFS API
+        using a get or post request (specified by the argument `method`).
+        Parameters
+        ----------
+        method: str
+            Which http method to use for communication; "get" or "post".
+        endpoint: str
+            Where to send the request to (last part of the API URL)
+        json: dict
+            Dictionary of information to send
+        """
+        if method == "post":
+            session_call = self.session.post
+        elif method == "get":
+            session_call = self.session.get
+        else:
+            raise ValueError(f"Do not understand method {method}")
+        url = urllib.parse.urljoin(f"https://{self.instance}/api/2.0/dbfs/", endpoint)
+        r = session_call(url, json=json)
+        # The DBFS API will return a json, also in case of an exception.
+        # We want to preserve this information as good as possible.
+        try:
+            r.raise_for_status()
+        except requests.HTTPError as e:
+            # try to extract json error message
+            # if that fails, fall back to the original exception
+            try:
+                exception_json = e.response.json()
+            except Exception:
+                raise e from None
+            raise DatabricksException(**exception_json) from e
+        return r.json()
+    def _create_handle(self, path, overwrite=True):
+        """
+        Internal function to create a handle, which can be used to
+        write blocks of a file to DBFS.
+        A handle has a unique identifier which needs to be passed
+        whenever written during this transaction.
+        The handle is active for 10 minutes - after that a new
+        write transaction needs to be created.
+        Make sure to close the handle after you are finished.
+        Parameters
+        ----------
+        path: str
+            Absolute path for this file.
+        overwrite: bool
+            If a file already exist at this location, either overwrite
+            it or raise an exception.
+        """
+        try:
+            r = self._send_to_api(
+                method="post",
+                endpoint="create",
+                json={"path": path, "overwrite": overwrite},
+            )
+            return r["handle"]
+        except DatabricksException as e:
+            if e.error_code == "RESOURCE_ALREADY_EXISTS":
+                raise FileExistsError(e.message) from e
+            raise
+    def _close_handle(self, handle):
+        """
+        Close a handle, which was opened by :func:`_create_handle`.
+        Parameters
+        ----------
+        handle: str
+            Which handle to close.
+        """
+        try:
+            self._send_to_api(method="post", endpoint="close", json={"handle": handle})
+        except DatabricksException as e:
+            if e.error_code == "RESOURCE_DOES_NOT_EXIST":
+                raise FileNotFoundError(e.message) from e
+            raise
+    def _add_data(self, handle, data):
+        """
+        Upload data to an already opened file handle
+        (opened by :func:`_create_handle`).
+        The maximal allowed data size is 1MB after
+        conversion to base64.
+        Remember to close the handle when you are finished.
+        Parameters
+        ----------
+        handle: str
+            Which handle to upload data to.
+        data: bytes
+            Block of data to add to the handle.
+        """
+        data = base64.b64encode(data).decode()
+        try:
+            self._send_to_api(
+                method="post",
+                endpoint="add-block",
+                json={"handle": handle, "data": data},
+            )
+        except DatabricksException as e:
+            if e.error_code == "RESOURCE_DOES_NOT_EXIST":
+                raise FileNotFoundError(e.message) from e
+            elif e.error_code == "MAX_BLOCK_SIZE_EXCEEDED":
+                raise ValueError(e.message) from e
+            raise
+    def _get_data(self, path, start, end):
+        """
+        Download data in bytes from a given absolute path in a block
+        from [start, start+length].
+        The maximum number of allowed bytes to read is 1MB.
+        Parameters
+        ----------
+        path: str
+            Absolute path to download data from
+        start: int
+            Start position of the block
+        end: int
+            End position of the block
+        """
+        try:
+            r = self._send_to_api(
+                method="get",
+                endpoint="read",
+                json={"path": path, "offset": start, "length": end - start},
+            )
+            return base64.b64decode(r["data"])
+        except DatabricksException as e:
+            if e.error_code == "RESOURCE_DOES_NOT_EXIST":
+                raise FileNotFoundError(e.message) from e
+            elif e.error_code in ["INVALID_PARAMETER_VALUE", "MAX_READ_SIZE_EXCEEDED"]:
+                raise ValueError(e.message) from e
+            raise
+    def invalidate_cache(self, path=None):
+        if path is None:
+            self.dircache.clear()
+        else:
+            self.dircache.pop(path, None)
+        super().invalidate_cache(path)
+class DatabricksFile(AbstractBufferedFile):
+    """
+    Helper class for files referenced in the DatabricksFileSystem.
+    """
+    DEFAULT_BLOCK_SIZE = 1 * 2**20  # only allowed block size
+    def __init__(
+        self,
+        fs,
+        path,
+        mode="rb",
+        block_size="default",
+        autocommit=True,
+        cache_type="readahead",
+        cache_options=None,
+        **kwargs,
+    ):
+        """
+        Create a new instance of the DatabricksFile.
+        The blocksize needs to be the default one.
+        """
+        if block_size is None or block_size == "default":
+            block_size = self.DEFAULT_BLOCK_SIZE
+        assert block_size == self.DEFAULT_BLOCK_SIZE, (
+            f"Only the default block size is allowed, not {block_size}"
+        )
+        super().__init__(
+            fs,
+            path,
+            mode=mode,
+            block_size=block_size,
+            autocommit=autocommit,
+            cache_type=cache_type,
+            cache_options=cache_options or {},
+            **kwargs,
+        )
+    def _initiate_upload(self):
+        """Internal function to start a file upload"""
+        self.handle = self.fs._create_handle(self.path)
+    def _upload_chunk(self, final=False):
+        """Internal function to add a chunk of data to a started upload"""
+        self.buffer.seek(0)
+        data = self.buffer.getvalue()
+        data_chunks = [
+            data[start:end] for start, end in self._to_sized_blocks(len(data))
+        ]
+        for data_chunk in data_chunks:
+            self.fs._add_data(handle=self.handle, data=data_chunk)
+        if final:
+            self.fs._close_handle(handle=self.handle)
+            return True
+    def _fetch_range(self, start, end):
+        """Internal function to download a block of data"""
+        return_buffer = b""
+        length = end - start
+        for chunk_start, chunk_end in self._to_sized_blocks(length, start):
+            return_buffer += self.fs._get_data(
+                path=self.path, start=chunk_start, end=chunk_end
+            )
+        return return_buffer
+    def _to_sized_blocks(self, length, start=0):
+        """Helper function to split a range from 0 to total_length into bloksizes"""
+        end = start + length
+        for data_chunk in range(start, end, self.blocksize):
+            data_start = data_chunk
+            data_end = min(end, data_chunk + self.blocksize)
+            yield data_start, data_end

temp_venv/lib/python3.13/site-packages/fsspec/implementations/dirfs.py ADDED Viewed

	@@ -0,0 +1,388 @@

+from .. import filesystem
+from ..asyn import AsyncFileSystem
+class DirFileSystem(AsyncFileSystem):
+    """Directory prefix filesystem
+    The DirFileSystem is a filesystem-wrapper. It assumes every path it is dealing with
+    is relative to the `path`. After performing the necessary paths operation it
+    delegates everything to the wrapped filesystem.
+    """
+    protocol = "dir"
+    def __init__(
+        self,
+        path=None,
+        fs=None,
+        fo=None,
+        target_protocol=None,
+        target_options=None,
+        **storage_options,
+    ):
+        """
+        Parameters
+        ----------
+        path: str
+            Path to the directory.
+        fs: AbstractFileSystem
+            An instantiated filesystem to wrap.
+        target_protocol, target_options:
+            if fs is none, construct it from these
+        fo: str
+            Alternate for path; do not provide both
+        """
+        super().__init__(**storage_options)
+        if fs is None:
+            fs = filesystem(protocol=target_protocol, **(target_options or {}))
+        path = path or fo
+        if self.asynchronous and not fs.async_impl:
+            raise ValueError("can't use asynchronous with non-async fs")
+        if fs.async_impl and self.asynchronous != fs.asynchronous:
+            raise ValueError("both dirfs and fs should be in the same sync/async mode")
+        self.path = fs._strip_protocol(path)
+        self.fs = fs
+    def _join(self, path):
+        if isinstance(path, str):
+            if not self.path:
+                return path
+            if not path:
+                return self.path
+            return self.fs.sep.join((self.path, self._strip_protocol(path)))
+        if isinstance(path, dict):
+            return {self._join(_path): value for _path, value in path.items()}
+        return [self._join(_path) for _path in path]
+    def _relpath(self, path):
+        if isinstance(path, str):
+            if not self.path:
+                return path
+            # We need to account for S3FileSystem returning paths that do not
+            # start with a '/'
+            if path == self.path or (
+                self.path.startswith(self.fs.sep) and path == self.path[1:]
+            ):
+                return ""
+            prefix = self.path + self.fs.sep
+            if self.path.startswith(self.fs.sep) and not path.startswith(self.fs.sep):
+                prefix = prefix[1:]
+            assert path.startswith(prefix)
+            return path[len(prefix) :]
+        return [self._relpath(_path) for _path in path]
+    # Wrappers below
+    @property
+    def sep(self):
+        return self.fs.sep
+    async def set_session(self, *args, **kwargs):
+        return await self.fs.set_session(*args, **kwargs)
+    async def _rm_file(self, path, **kwargs):
+        return await self.fs._rm_file(self._join(path), **kwargs)
+    def rm_file(self, path, **kwargs):
+        return self.fs.rm_file(self._join(path), **kwargs)
+    async def _rm(self, path, *args, **kwargs):
+        return await self.fs._rm(self._join(path), *args, **kwargs)
+    def rm(self, path, *args, **kwargs):
+        return self.fs.rm(self._join(path), *args, **kwargs)
+    async def _cp_file(self, path1, path2, **kwargs):
+        return await self.fs._cp_file(self._join(path1), self._join(path2), **kwargs)
+    def cp_file(self, path1, path2, **kwargs):
+        return self.fs.cp_file(self._join(path1), self._join(path2), **kwargs)
+    async def _copy(
+        self,
+        path1,
+        path2,
+        *args,
+        **kwargs,
+    ):
+        return await self.fs._copy(
+            self._join(path1),
+            self._join(path2),
+            *args,
+            **kwargs,
+        )
+    def copy(self, path1, path2, *args, **kwargs):
+        return self.fs.copy(
+            self._join(path1),
+            self._join(path2),
+            *args,
+            **kwargs,
+        )
+    async def _pipe(self, path, *args, **kwargs):
+        return await self.fs._pipe(self._join(path), *args, **kwargs)
+    def pipe(self, path, *args, **kwargs):
+        return self.fs.pipe(self._join(path), *args, **kwargs)
+    async def _pipe_file(self, path, *args, **kwargs):
+        return await self.fs._pipe_file(self._join(path), *args, **kwargs)
+    def pipe_file(self, path, *args, **kwargs):
+        return self.fs.pipe_file(self._join(path), *args, **kwargs)
+    async def _cat_file(self, path, *args, **kwargs):
+        return await self.fs._cat_file(self._join(path), *args, **kwargs)
+    def cat_file(self, path, *args, **kwargs):
+        return self.fs.cat_file(self._join(path), *args, **kwargs)
+    async def _cat(self, path, *args, **kwargs):
+        ret = await self.fs._cat(
+            self._join(path),
+            *args,
+            **kwargs,
+        )
+        if isinstance(ret, dict):
+            return {self._relpath(key): value for key, value in ret.items()}
+        return ret
+    def cat(self, path, *args, **kwargs):
+        ret = self.fs.cat(
+            self._join(path),
+            *args,
+            **kwargs,
+        )
+        if isinstance(ret, dict):
+            return {self._relpath(key): value for key, value in ret.items()}
+        return ret
+    async def _put_file(self, lpath, rpath, **kwargs):
+        return await self.fs._put_file(lpath, self._join(rpath), **kwargs)
+    def put_file(self, lpath, rpath, **kwargs):
+        return self.fs.put_file(lpath, self._join(rpath), **kwargs)
+    async def _put(
+        self,
+        lpath,
+        rpath,
+        *args,
+        **kwargs,
+    ):
+        return await self.fs._put(
+            lpath,
+            self._join(rpath),
+            *args,
+            **kwargs,
+        )
+    def put(self, lpath, rpath, *args, **kwargs):
+        return self.fs.put(
+            lpath,
+            self._join(rpath),
+            *args,
+            **kwargs,
+        )
+    async def _get_file(self, rpath, lpath, **kwargs):
+        return await self.fs._get_file(self._join(rpath), lpath, **kwargs)
+    def get_file(self, rpath, lpath, **kwargs):
+        return self.fs.get_file(self._join(rpath), lpath, **kwargs)
+    async def _get(self, rpath, *args, **kwargs):
+        return await self.fs._get(self._join(rpath), *args, **kwargs)
+    def get(self, rpath, *args, **kwargs):
+        return self.fs.get(self._join(rpath), *args, **kwargs)
+    async def _isfile(self, path):
+        return await self.fs._isfile(self._join(path))
+    def isfile(self, path):
+        return self.fs.isfile(self._join(path))
+    async def _isdir(self, path):
+        return await self.fs._isdir(self._join(path))
+    def isdir(self, path):
+        return self.fs.isdir(self._join(path))
+    async def _size(self, path):
+        return await self.fs._size(self._join(path))
+    def size(self, path):
+        return self.fs.size(self._join(path))
+    async def _exists(self, path):
+        return await self.fs._exists(self._join(path))
+    def exists(self, path):
+        return self.fs.exists(self._join(path))
+    async def _info(self, path, **kwargs):
+        info = await self.fs._info(self._join(path), **kwargs)
+        info = info.copy()
+        info["name"] = self._relpath(info["name"])
+        return info
+    def info(self, path, **kwargs):
+        info = self.fs.info(self._join(path), **kwargs)
+        info = info.copy()
+        info["name"] = self._relpath(info["name"])
+        return info
+    async def _ls(self, path, detail=True, **kwargs):
+        ret = (await self.fs._ls(self._join(path), detail=detail, **kwargs)).copy()
+        if detail:
+            out = []
+            for entry in ret:
+                entry = entry.copy()
+                entry["name"] = self._relpath(entry["name"])
+                out.append(entry)
+            return out
+        return self._relpath(ret)
+    def ls(self, path, detail=True, **kwargs):
+        ret = self.fs.ls(self._join(path), detail=detail, **kwargs).copy()
+        if detail:
+            out = []
+            for entry in ret:
+                entry = entry.copy()
+                entry["name"] = self._relpath(entry["name"])
+                out.append(entry)
+            return out
+        return self._relpath(ret)
+    async def _walk(self, path, *args, **kwargs):
+        async for root, dirs, files in self.fs._walk(self._join(path), *args, **kwargs):
+            yield self._relpath(root), dirs, files
+    def walk(self, path, *args, **kwargs):
+        for root, dirs, files in self.fs.walk(self._join(path), *args, **kwargs):
+            yield self._relpath(root), dirs, files
+    async def _glob(self, path, **kwargs):
+        detail = kwargs.get("detail", False)
+        ret = await self.fs._glob(self._join(path), **kwargs)
+        if detail:
+            return {self._relpath(path): info for path, info in ret.items()}
+        return self._relpath(ret)
+    def glob(self, path, **kwargs):
+        detail = kwargs.get("detail", False)
+        ret = self.fs.glob(self._join(path), **kwargs)
+        if detail:
+            return {self._relpath(path): info for path, info in ret.items()}
+        return self._relpath(ret)
+    async def _du(self, path, *args, **kwargs):
+        total = kwargs.get("total", True)
+        ret = await self.fs._du(self._join(path), *args, **kwargs)
+        if total:
+            return ret
+        return {self._relpath(path): size for path, size in ret.items()}
+    def du(self, path, *args, **kwargs):
+        total = kwargs.get("total", True)
+        ret = self.fs.du(self._join(path), *args, **kwargs)
+        if total:
+            return ret
+        return {self._relpath(path): size for path, size in ret.items()}
+    async def _find(self, path, *args, **kwargs):
+        detail = kwargs.get("detail", False)
+        ret = await self.fs._find(self._join(path), *args, **kwargs)
+        if detail:
+            return {self._relpath(path): info for path, info in ret.items()}
+        return self._relpath(ret)
+    def find(self, path, *args, **kwargs):
+        detail = kwargs.get("detail", False)
+        ret = self.fs.find(self._join(path), *args, **kwargs)
+        if detail:
+            return {self._relpath(path): info for path, info in ret.items()}
+        return self._relpath(ret)
+    async def _expand_path(self, path, *args, **kwargs):
+        return self._relpath(
+            await self.fs._expand_path(self._join(path), *args, **kwargs)
+        )
+    def expand_path(self, path, *args, **kwargs):
+        return self._relpath(self.fs.expand_path(self._join(path), *args, **kwargs))
+    async def _mkdir(self, path, *args, **kwargs):
+        return await self.fs._mkdir(self._join(path), *args, **kwargs)
+    def mkdir(self, path, *args, **kwargs):
+        return self.fs.mkdir(self._join(path), *args, **kwargs)
+    async def _makedirs(self, path, *args, **kwargs):
+        return await self.fs._makedirs(self._join(path), *args, **kwargs)
+    def makedirs(self, path, *args, **kwargs):
+        return self.fs.makedirs(self._join(path), *args, **kwargs)
+    def rmdir(self, path):
+        return self.fs.rmdir(self._join(path))
+    def mv(self, path1, path2, **kwargs):
+        return self.fs.mv(
+            self._join(path1),
+            self._join(path2),
+            **kwargs,
+        )
+    def touch(self, path, **kwargs):
+        return self.fs.touch(self._join(path), **kwargs)
+    def created(self, path):
+        return self.fs.created(self._join(path))
+    def modified(self, path):
+        return self.fs.modified(self._join(path))
+    def sign(self, path, *args, **kwargs):
+        return self.fs.sign(self._join(path), *args, **kwargs)
+    def __repr__(self):
+        return f"{self.__class__.__qualname__}(path='{self.path}', fs={self.fs})"
+    def open(
+        self,
+        path,
+        *args,
+        **kwargs,
+    ):
+        return self.fs.open(
+            self._join(path),
+            *args,
+            **kwargs,
+        )
+    async def open_async(
+        self,
+        path,
+        *args,
+        **kwargs,
+    ):
+        return await self.fs.open_async(
+            self._join(path),
+            *args,
+            **kwargs,
+        )

temp_venv/lib/python3.13/site-packages/fsspec/implementations/ftp.py ADDED Viewed

	@@ -0,0 +1,395 @@

+import os
+import sys
+import uuid
+import warnings
+from ftplib import FTP, FTP_TLS, Error, error_perm
+from typing import Any
+from ..spec import AbstractBufferedFile, AbstractFileSystem
+from ..utils import infer_storage_options, isfilelike
+class FTPFileSystem(AbstractFileSystem):
+    """A filesystem over classic FTP"""
+    root_marker = "/"
+    cachable = False
+    protocol = "ftp"
+    def __init__(
+        self,
+        host,
+        port=21,
+        username=None,
+        password=None,
+        acct=None,
+        block_size=None,
+        tempdir=None,
+        timeout=30,
+        encoding="utf-8",
+        tls=False,
+        **kwargs,
+    ):
+        """
+        You can use _get_kwargs_from_urls to get some kwargs from
+        a reasonable FTP url.
+        Authentication will be anonymous if username/password are not
+        given.
+        Parameters
+        ----------
+        host: str
+            The remote server name/ip to connect to
+        port: int
+            Port to connect with
+        username: str or None
+            If authenticating, the user's identifier
+        password: str of None
+            User's password on the server, if using
+        acct: str or None
+            Some servers also need an "account" string for auth
+        block_size: int or None
+            If given, the read-ahead or write buffer size.
+        tempdir: str
+            Directory on remote to put temporary files when in a transaction
+        timeout: int
+            Timeout of the ftp connection in seconds
+        encoding: str
+            Encoding to use for directories and filenames in FTP connection
+        tls: bool
+            Use FTP-TLS, by default False
+        """
+        super().__init__(**kwargs)
+        self.host = host
+        self.port = port
+        self.tempdir = tempdir or "/tmp"
+        self.cred = username or "", password or "", acct or ""
+        self.timeout = timeout
+        self.encoding = encoding
+        if block_size is not None:
+            self.blocksize = block_size
+        else:
+            self.blocksize = 2**16
+        self.tls = tls
+        self._connect()
+        if self.tls:
+            self.ftp.prot_p()
+    def _connect(self):
+        if self.tls:
+            ftp_cls = FTP_TLS
+        else:
+            ftp_cls = FTP
+        if sys.version_info >= (3, 9):
+            self.ftp = ftp_cls(timeout=self.timeout, encoding=self.encoding)
+        elif self.encoding:
+            warnings.warn("`encoding` not supported for python<3.9, ignoring")
+            self.ftp = ftp_cls(timeout=self.timeout)
+        else:
+            self.ftp = ftp_cls(timeout=self.timeout)
+        self.ftp.connect(self.host, self.port)
+        self.ftp.login(*self.cred)
+    @classmethod
+    def _strip_protocol(cls, path):
+        return "/" + infer_storage_options(path)["path"].lstrip("/").rstrip("/")
+    @staticmethod
+    def _get_kwargs_from_urls(urlpath):
+        out = infer_storage_options(urlpath)
+        out.pop("path", None)
+        out.pop("protocol", None)
+        return out
+    def ls(self, path, detail=True, **kwargs):
+        path = self._strip_protocol(path)
+        out = []
+        if path not in self.dircache:
+            try:
+                try:
+                    out = [
+                        (fn, details)
+                        for (fn, details) in self.ftp.mlsd(path)
+                        if fn not in [".", ".."]
+                        and details["type"] not in ["pdir", "cdir"]
+                    ]
+                except error_perm:
+                    out = _mlsd2(self.ftp, path)  # Not platform independent
+                for fn, details in out:
+                    details["name"] = "/".join(
+                        ["" if path == "/" else path, fn.lstrip("/")]
+                    )
+                    if details["type"] == "file":
+                        details["size"] = int(details["size"])
+                    else:
+                        details["size"] = 0
+                    if details["type"] == "dir":
+                        details["type"] = "directory"
+                self.dircache[path] = out
+            except Error:
+                try:
+                    info = self.info(path)
+                    if info["type"] == "file":
+                        out = [(path, info)]
+                except (Error, IndexError) as exc:
+                    raise FileNotFoundError(path) from exc
+        files = self.dircache.get(path, out)
+        if not detail:
+            return sorted([fn for fn, details in files])
+        return [details for fn, details in files]
+    def info(self, path, **kwargs):
+        # implement with direct method
+        path = self._strip_protocol(path)
+        if path == "/":
+            # special case, since this dir has no real entry
+            return {"name": "/", "size": 0, "type": "directory"}
+        files = self.ls(self._parent(path).lstrip("/"), True)
+        try:
+            out = next(f for f in files if f["name"] == path)
+        except StopIteration as exc:
+            raise FileNotFoundError(path) from exc
+        return out
+    def get_file(self, rpath, lpath, **kwargs):
+        if self.isdir(rpath):
+            if not os.path.exists(lpath):
+                os.mkdir(lpath)
+            return
+        if isfilelike(lpath):
+            outfile = lpath
+        else:
+            outfile = open(lpath, "wb")
+        def cb(x):
+            outfile.write(x)
+        self.ftp.retrbinary(
+            f"RETR {rpath}",
+            blocksize=self.blocksize,
+            callback=cb,
+        )
+        if not isfilelike(lpath):
+            outfile.close()
+    def cat_file(self, path, start=None, end=None, **kwargs):
+        if end is not None:
+            return super().cat_file(path, start, end, **kwargs)
+        out = []
+        def cb(x):
+            out.append(x)
+        try:
+            self.ftp.retrbinary(
+                f"RETR {path}",
+                blocksize=self.blocksize,
+                rest=start,
+                callback=cb,
+            )
+        except (Error, error_perm) as orig_exc:
+            raise FileNotFoundError(path) from orig_exc
+        return b"".join(out)
+    def _open(
+        self,
+        path,
+        mode="rb",
+        block_size=None,
+        cache_options=None,
+        autocommit=True,
+        **kwargs,
+    ):
+        path = self._strip_protocol(path)
+        block_size = block_size or self.blocksize
+        return FTPFile(
+            self,
+            path,
+            mode=mode,
+            block_size=block_size,
+            tempdir=self.tempdir,
+            autocommit=autocommit,
+            cache_options=cache_options,
+        )
+    def _rm(self, path):
+        path = self._strip_protocol(path)
+        self.ftp.delete(path)
+        self.invalidate_cache(self._parent(path))
+    def rm(self, path, recursive=False, maxdepth=None):
+        paths = self.expand_path(path, recursive=recursive, maxdepth=maxdepth)
+        for p in reversed(paths):
+            if self.isfile(p):
+                self.rm_file(p)
+            else:
+                self.rmdir(p)
+    def mkdir(self, path: str, create_parents: bool = True, **kwargs: Any) -> None:
+        path = self._strip_protocol(path)
+        parent = self._parent(path)
+        if parent != self.root_marker and not self.exists(parent) and create_parents:
+            self.mkdir(parent, create_parents=create_parents)
+        self.ftp.mkd(path)
+        self.invalidate_cache(self._parent(path))
+    def makedirs(self, path: str, exist_ok: bool = False) -> None:
+        path = self._strip_protocol(path)
+        if self.exists(path):
+            # NB: "/" does not "exist" as it has no directory entry
+            if not exist_ok:
+                raise FileExistsError(f"{path} exists without `exist_ok`")
+            # exists_ok=True -> no-op
+        else:
+            self.mkdir(path, create_parents=True)
+    def rmdir(self, path):
+        path = self._strip_protocol(path)
+        self.ftp.rmd(path)
+        self.invalidate_cache(self._parent(path))
+    def mv(self, path1, path2, **kwargs):
+        path1 = self._strip_protocol(path1)
+        path2 = self._strip_protocol(path2)
+        self.ftp.rename(path1, path2)
+        self.invalidate_cache(self._parent(path1))
+        self.invalidate_cache(self._parent(path2))
+    def __del__(self):
+        self.ftp.close()
+    def invalidate_cache(self, path=None):
+        if path is None:
+            self.dircache.clear()
+        else:
+            self.dircache.pop(path, None)
+        super().invalidate_cache(path)
+class TransferDone(Exception):
+    """Internal exception to break out of transfer"""
+    pass
+class FTPFile(AbstractBufferedFile):
+    """Interact with a remote FTP file with read/write buffering"""
+    def __init__(
+        self,
+        fs,
+        path,
+        mode="rb",
+        block_size="default",
+        autocommit=True,
+        cache_type="readahead",
+        cache_options=None,
+        **kwargs,
+    ):
+        super().__init__(
+            fs,
+            path,
+            mode=mode,
+            block_size=block_size,
+            autocommit=autocommit,
+            cache_type=cache_type,
+            cache_options=cache_options,
+            **kwargs,
+        )
+        if not autocommit:
+            self.target = self.path
+            self.path = "/".join([kwargs["tempdir"], str(uuid.uuid4())])
+    def commit(self):
+        self.fs.mv(self.path, self.target)
+    def discard(self):
+        self.fs.rm(self.path)
+    def _fetch_range(self, start, end):
+        """Get bytes between given byte limits
+        Implemented by raising an exception in the fetch callback when the
+        number of bytes received reaches the requested amount.
+        Will fail if the server does not respect the REST command on
+        retrieve requests.
+        """
+        out = []
+        total = [0]
+        def callback(x):
+            total[0] += len(x)
+            if total[0] > end - start:
+                out.append(x[: (end - start) - total[0]])
+                if end < self.size:
+                    raise TransferDone
+            else:
+                out.append(x)
+            if total[0] == end - start and end < self.size:
+                raise TransferDone
+        try:
+            self.fs.ftp.retrbinary(
+                f"RETR {self.path}",
+                blocksize=self.blocksize,
+                rest=start,
+                callback=callback,
+            )
+        except TransferDone:
+            try:
+                # stop transfer, we got enough bytes for this block
+                self.fs.ftp.abort()
+                self.fs.ftp.getmultiline()
+            except Error:
+                self.fs._connect()
+        return b"".join(out)
+    def _upload_chunk(self, final=False):
+        self.buffer.seek(0)
+        self.fs.ftp.storbinary(
+            f"STOR {self.path}", self.buffer, blocksize=self.blocksize, rest=self.offset
+        )
+        return True
+def _mlsd2(ftp, path="."):
+    """
+    Fall back to using `dir` instead of `mlsd` if not supported.
+    This parses a Linux style `ls -l` response to `dir`, but the response may
+    be platform dependent.
+    Parameters
+    ----------
+    ftp: ftplib.FTP
+    path: str
+        Expects to be given path, but defaults to ".".
+    """
+    lines = []
+    minfo = []
+    ftp.dir(path, lines.append)
+    for line in lines:
+        split_line = line.split()
+        if len(split_line) < 9:
+            continue
+        this = (
+            split_line[-1],
+            {
+                "modify": " ".join(split_line[5:8]),
+                "unix.owner": split_line[2],
+                "unix.group": split_line[3],
+                "unix.mode": split_line[0],
+                "size": split_line[4],
+            },
+        )
+        if this[1]["unix.mode"][0] == "d":
+            this[1]["type"] = "dir"
+        else:
+            this[1]["type"] = "file"
+        minfo.append(this)
+    return minfo

temp_venv/lib/python3.13/site-packages/fsspec/implementations/git.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import os
+import pygit2
+from fsspec.spec import AbstractFileSystem
+from .memory import MemoryFile
+class GitFileSystem(AbstractFileSystem):
+    """Browse the files of a local git repo at any hash/tag/branch
+    (experimental backend)
+    """
+    root_marker = ""
+    cachable = True
+    def __init__(self, path=None, fo=None, ref=None, **kwargs):
+        """
+        Parameters
+        ----------
+        path: str (optional)
+            Local location of the repo (uses current directory if not given).
+            May be deprecated in favour of ``fo``. When used with a higher
+            level function such as fsspec.open(), may be of the form
+            "git://[path-to-repo[:]][ref@]path/to/file" (but the actual
+            file path should not contain "@" or ":").
+        fo: str (optional)
+            Same as ``path``, but passed as part of a chained URL. This one
+            takes precedence if both are given.
+        ref: str (optional)
+            Reference to work with, could be a hash, tag or branch name. Defaults
+            to current working tree. Note that ``ls`` and ``open`` also take hash,
+            so this becomes the default for those operations
+        kwargs
+        """
+        super().__init__(**kwargs)
+        self.repo = pygit2.Repository(fo or path or os.getcwd())
+        self.ref = ref or "master"
+    @classmethod
+    def _strip_protocol(cls, path):
+        path = super()._strip_protocol(path).lstrip("/")
+        if ":" in path:
+            path = path.split(":", 1)[1]
+        if "@" in path:
+            path = path.split("@", 1)[1]
+        return path.lstrip("/")
+    def _path_to_object(self, path, ref):
+        comm, ref = self.repo.resolve_refish(ref or self.ref)
+        parts = path.split("/")
+        tree = comm.tree
+        for part in parts:
+            if part and isinstance(tree, pygit2.Tree):
+                if part not in tree:
+                    raise FileNotFoundError(path)
+                tree = tree[part]
+        return tree
+    @staticmethod
+    def _get_kwargs_from_urls(path):
+        if path.startswith("git://"):
+            path = path[6:]
+        out = {}
+        if ":" in path:
+            out["path"], path = path.split(":", 1)
+        if "@" in path:
+            out["ref"], path = path.split("@", 1)
+        return out
+    @staticmethod
+    def _object_to_info(obj, path=None):
+        # obj.name and obj.filemode are None for the root tree!
+        is_dir = isinstance(obj, pygit2.Tree)
+        return {
+            "type": "directory" if is_dir else "file",
+            "name": (
+                "/".join([path, obj.name or ""]).lstrip("/") if path else obj.name
+            ),
+            "hex": str(obj.id),
+            "mode": "100644" if obj.filemode is None else f"{obj.filemode:o}",
+            "size": 0 if is_dir else obj.size,
+        }
+    def ls(self, path, detail=True, ref=None, **kwargs):
+        tree = self._path_to_object(self._strip_protocol(path), ref)
+        return [
+            GitFileSystem._object_to_info(obj, path)
+            if detail
+            else GitFileSystem._object_to_info(obj, path)["name"]
+            for obj in (tree if isinstance(tree, pygit2.Tree) else [tree])
+        ]
+    def info(self, path, ref=None, **kwargs):
+        tree = self._path_to_object(self._strip_protocol(path), ref)
+        return GitFileSystem._object_to_info(tree, path)
+    def ukey(self, path, ref=None):
+        return self.info(path, ref=ref)["hex"]
+    def _open(
+        self,
+        path,
+        mode="rb",
+        block_size=None,
+        autocommit=True,
+        cache_options=None,
+        ref=None,
+        **kwargs,
+    ):
+        obj = self._path_to_object(path, ref or self.ref)
+        return MemoryFile(data=obj.data)

temp_venv/lib/python3.13/site-packages/fsspec/implementations/github.py ADDED Viewed

	@@ -0,0 +1,267 @@

+import base64
+import requests
+from ..spec import AbstractFileSystem
+from ..utils import infer_storage_options
+from .memory import MemoryFile
+# TODO: add GIST backend, would be very similar
+class GithubFileSystem(AbstractFileSystem):
+    """Interface to files in github
+    An instance of this class provides the files residing within a remote github
+    repository. You may specify a point in the repos history, by SHA, branch
+    or tag (default is current master).
+    For files less than 1 MB in size, file content is returned directly in a
+    MemoryFile. For larger files, or for files tracked by git-lfs, file content
+    is returned as an HTTPFile wrapping the ``download_url`` provided by the
+    GitHub API.
+    When using fsspec.open, allows URIs of the form:
+    - "github://path/file", in which case you must specify org, repo and
+      may specify sha in the extra args
+    - 'github://org:repo@/precip/catalog.yml', where the org and repo are
+      part of the URI
+    - 'github://org:repo@sha/precip/catalog.yml', where the sha is also included
+    ``sha`` can be the full or abbreviated hex of the commit you want to fetch
+    from, or a branch or tag name (so long as it doesn't contain special characters
+    like "/", "?", which would have to be HTTP-encoded).
+    For authorised access, you must provide username and token, which can be made
+    at https://github.com/settings/tokens
+    """
+    url = "https://api.github.com/repos/{org}/{repo}/git/trees/{sha}"
+    content_url = "https://api.github.com/repos/{org}/{repo}/contents/{path}?ref={sha}"
+    protocol = "github"
+    timeout = (60, 60)  # connect, read timeouts
+    def __init__(
+        self, org, repo, sha=None, username=None, token=None, timeout=None, **kwargs
+    ):
+        super().__init__(**kwargs)
+        self.org = org
+        self.repo = repo
+        if (username is None) ^ (token is None):
+            raise ValueError("Auth required both username and token")
+        self.username = username
+        self.token = token
+        if timeout is not None:
+            self.timeout = timeout
+        if sha is None:
+            # look up default branch (not necessarily "master")
+            u = "https://api.github.com/repos/{org}/{repo}"
+            r = requests.get(
+                u.format(org=org, repo=repo), timeout=self.timeout, **self.kw
+            )
+            r.raise_for_status()
+            sha = r.json()["default_branch"]
+        self.root = sha
+        self.ls("")
+        try:
+            from .http import HTTPFileSystem
+            self.http_fs = HTTPFileSystem(**kwargs)
+        except ImportError:
+            self.http_fs = None
+    @property
+    def kw(self):
+        if self.username:
+            return {"auth": (self.username, self.token)}
+        return {}
+    @classmethod
+    def repos(cls, org_or_user, is_org=True):
+        """List repo names for given org or user
+        This may become the top level of the FS
+        Parameters
+        ----------
+        org_or_user: str
+            Name of the github org or user to query
+        is_org: bool (default True)
+            Whether the name is an organisation (True) or user (False)
+        Returns
+        -------
+        List of string
+        """
+        r = requests.get(
+            f"https://api.github.com/{['users', 'orgs'][is_org]}/{org_or_user}/repos",
+            timeout=cls.timeout,
+        )
+        r.raise_for_status()
+        return [repo["name"] for repo in r.json()]
+    @property
+    def tags(self):
+        """Names of tags in the repo"""
+        r = requests.get(
+            f"https://api.github.com/repos/{self.org}/{self.repo}/tags",
+            timeout=self.timeout,
+            **self.kw,
+        )
+        r.raise_for_status()
+        return [t["name"] for t in r.json()]
+    @property
+    def branches(self):
+        """Names of branches in the repo"""
+        r = requests.get(
+            f"https://api.github.com/repos/{self.org}/{self.repo}/branches",
+            timeout=self.timeout,
+            **self.kw,
+        )
+        r.raise_for_status()
+        return [t["name"] for t in r.json()]
+    @property
+    def refs(self):
+        """Named references, tags and branches"""
+        return {"tags": self.tags, "branches": self.branches}
+    def ls(self, path, detail=False, sha=None, _sha=None, **kwargs):
+        """List files at given path
+        Parameters
+        ----------
+        path: str
+            Location to list, relative to repo root
+        detail: bool
+            If True, returns list of dicts, one per file; if False, returns
+            list of full filenames only
+        sha: str (optional)
+            List at the given point in the repo history, branch or tag name or commit
+            SHA
+        _sha: str (optional)
+            List this specific tree object (used internally to descend into trees)
+        """
+        path = self._strip_protocol(path)
+        if path == "":
+            _sha = sha or self.root
+        if _sha is None:
+            parts = path.rstrip("/").split("/")
+            so_far = ""
+            _sha = sha or self.root
+            for part in parts:
+                out = self.ls(so_far, True, sha=sha, _sha=_sha)
+                so_far += "/" + part if so_far else part
+                out = [o for o in out if o["name"] == so_far]
+                if not out:
+                    raise FileNotFoundError(path)
+                out = out[0]
+                if out["type"] == "file":
+                    if detail:
+                        return [out]
+                    else:
+                        return path
+                _sha = out["sha"]
+        if path not in self.dircache or sha not in [self.root, None]:
+            r = requests.get(
+                self.url.format(org=self.org, repo=self.repo, sha=_sha),
+                timeout=self.timeout,
+                **self.kw,
+            )
+            if r.status_code == 404:
+                raise FileNotFoundError(path)
+            r.raise_for_status()
+            types = {"blob": "file", "tree": "directory"}
+            out = [
+                {
+                    "name": path + "/" + f["path"] if path else f["path"],
+                    "mode": f["mode"],
+                    "type": types[f["type"]],
+                    "size": f.get("size", 0),
+                    "sha": f["sha"],
+                }
+                for f in r.json()["tree"]
+                if f["type"] in types
+            ]
+            if sha in [self.root, None]:
+                self.dircache[path] = out
+        else:
+            out = self.dircache[path]
+        if detail:
+            return out
+        else:
+            return sorted([f["name"] for f in out])
+    def invalidate_cache(self, path=None):
+        self.dircache.clear()
+    @classmethod
+    def _strip_protocol(cls, path):
+        opts = infer_storage_options(path)
+        if "username" not in opts:
+            return super()._strip_protocol(path)
+        return opts["path"].lstrip("/")
+    @staticmethod
+    def _get_kwargs_from_urls(path):
+        opts = infer_storage_options(path)
+        if "username" not in opts:
+            return {}
+        out = {"org": opts["username"], "repo": opts["password"]}
+        if opts["host"]:
+            out["sha"] = opts["host"]
+        return out
+    def _open(
+        self,
+        path,
+        mode="rb",
+        block_size=None,
+        cache_options=None,
+        sha=None,
+        **kwargs,
+    ):
+        if mode != "rb":
+            raise NotImplementedError
+        # construct a url to hit the GitHub API's repo contents API
+        url = self.content_url.format(
+            org=self.org, repo=self.repo, path=path, sha=sha or self.root
+        )
+        # make a request to this API, and parse the response as JSON
+        r = requests.get(url, timeout=self.timeout, **self.kw)
+        if r.status_code == 404:
+            raise FileNotFoundError(path)
+        r.raise_for_status()
+        content_json = r.json()
+        # if the response's content key is not empty, try to parse it as base64
+        if content_json["content"]:
+            content = base64.b64decode(content_json["content"])
+            # as long as the content does not start with the string
+            # "version https://git-lfs.github.com/"
+            # then it is probably not a git-lfs pointer and we can just return
+            # the content directly
+            if not content.startswith(b"version https://git-lfs.github.com/"):
+                return MemoryFile(None, None, content)
+        # we land here if the content was not present in the first response
+        # (regular file over 1MB or git-lfs tracked file)
+        # in this case, we get let the HTTPFileSystem handle the download
+        if self.http_fs is None:
+            raise ImportError(
+                "Please install fsspec[http] to access github files >1 MB "
+                "or git-lfs tracked files."
+            )
+        return self.http_fs.open(
+            content_json["download_url"],
+            mode=mode,
+            block_size=block_size,
+            cache_options=cache_options,
+            **kwargs,
+        )

temp_venv/lib/python3.13/site-packages/fsspec/implementations/http.py ADDED Viewed

	@@ -0,0 +1,880 @@

+import asyncio
+import io
+import logging
+import re
+import weakref
+from copy import copy
+from urllib.parse import urlparse
+import aiohttp
+import yarl
+from fsspec.asyn import AbstractAsyncStreamedFile, AsyncFileSystem, sync, sync_wrapper
+from fsspec.callbacks import DEFAULT_CALLBACK
+from fsspec.exceptions import FSTimeoutError
+from fsspec.spec import AbstractBufferedFile
+from fsspec.utils import (
+    DEFAULT_BLOCK_SIZE,
+    glob_translate,
+    isfilelike,
+    nullcontext,
+    tokenize,
+)
+from ..caching import AllBytes
+# https://stackoverflow.com/a/15926317/3821154
+ex = re.compile(r"""<(a|A)\s+(?:[^>]*?\s+)?(href|HREF)=["'](?P<url>[^"']+)""")
+ex2 = re.compile(r"""(?P<url>http[s]?://[-a-zA-Z0-9@:%_+.~#?&/=]+)""")
+logger = logging.getLogger("fsspec.http")
+async def get_client(**kwargs):
+    return aiohttp.ClientSession(**kwargs)
+class HTTPFileSystem(AsyncFileSystem):
+    """
+    Simple File-System for fetching data via HTTP(S)
+    ``ls()`` is implemented by loading the parent page and doing a regex
+    match on the result. If simple_link=True, anything of the form
+    "http(s)://server.com/stuff?thing=other"; otherwise only links within
+    HTML href tags will be used.
+    """
+    sep = "/"
+    def __init__(
+        self,
+        simple_links=True,
+        block_size=None,
+        same_scheme=True,
+        size_policy=None,
+        cache_type="bytes",
+        cache_options=None,
+        asynchronous=False,
+        loop=None,
+        client_kwargs=None,
+        get_client=get_client,
+        encoded=False,
+        **storage_options,
+    ):
+        """
+        NB: if this is called async, you must await set_client
+        Parameters
+        ----------
+        block_size: int
+            Blocks to read bytes; if 0, will default to raw requests file-like
+            objects instead of HTTPFile instances
+        simple_links: bool
+            If True, will consider both HTML <a> tags and anything that looks
+            like a URL; if False, will consider only the former.
+        same_scheme: True
+            When doing ls/glob, if this is True, only consider paths that have
+            http/https matching the input URLs.
+        size_policy: this argument is deprecated
+        client_kwargs: dict
+            Passed to aiohttp.ClientSession, see
+            https://docs.aiohttp.org/en/stable/client_reference.html
+            For example, ``{'auth': aiohttp.BasicAuth('user', 'pass')}``
+        get_client: Callable[..., aiohttp.ClientSession]
+            A callable which takes keyword arguments and constructs
+            an aiohttp.ClientSession. It's state will be managed by
+            the HTTPFileSystem class.
+        storage_options: key-value
+            Any other parameters passed on to requests
+        cache_type, cache_options: defaults used in open
+        """
+        super().__init__(self, asynchronous=asynchronous, loop=loop, **storage_options)
+        self.block_size = block_size if block_size is not None else DEFAULT_BLOCK_SIZE
+        self.simple_links = simple_links
+        self.same_schema = same_scheme
+        self.cache_type = cache_type
+        self.cache_options = cache_options
+        self.client_kwargs = client_kwargs or {}
+        self.get_client = get_client
+        self.encoded = encoded
+        self.kwargs = storage_options
+        self._session = None
+        # Clean caching-related parameters from `storage_options`
+        # before propagating them as `request_options` through `self.kwargs`.
+        # TODO: Maybe rename `self.kwargs` to `self.request_options` to make
+        #       it clearer.
+        request_options = copy(storage_options)
+        self.use_listings_cache = request_options.pop("use_listings_cache", False)
+        request_options.pop("listings_expiry_time", None)
+        request_options.pop("max_paths", None)
+        request_options.pop("skip_instance_cache", None)
+        self.kwargs = request_options
+    @property
+    def fsid(self):
+        return "http"
+    def encode_url(self, url):
+        return yarl.URL(url, encoded=self.encoded)
+    @staticmethod
+    def close_session(loop, session):
+        if loop is not None and loop.is_running():
+            try:
+                sync(loop, session.close, timeout=0.1)
+                return
+            except (TimeoutError, FSTimeoutError, NotImplementedError):
+                pass
+        connector = getattr(session, "_connector", None)
+        if connector is not None:
+            # close after loop is dead
+            connector._close()
+    async def set_session(self):
+        if self._session is None:
+            self._session = await self.get_client(loop=self.loop, **self.client_kwargs)
+            if not self.asynchronous:
+                weakref.finalize(self, self.close_session, self.loop, self._session)
+        return self._session
+    @classmethod
+    def _strip_protocol(cls, path):
+        """For HTTP, we always want to keep the full URL"""
+        return path
+    @classmethod
+    def _parent(cls, path):
+        # override, since _strip_protocol is different for URLs
+        par = super()._parent(path)
+        if len(par) > 7:  # "http://..."
+            return par
+        return ""
+    async def _ls_real(self, url, detail=True, **kwargs):
+        # ignoring URL-encoded arguments
+        kw = self.kwargs.copy()
+        kw.update(kwargs)
+        logger.debug(url)
+        session = await self.set_session()
+        async with session.get(self.encode_url(url), **self.kwargs) as r:
+            self._raise_not_found_for_status(r, url)
+            try:
+                text = await r.text()
+                if self.simple_links:
+                    links = ex2.findall(text) + [u[2] for u in ex.findall(text)]
+                else:
+                    links = [u[2] for u in ex.findall(text)]
+            except UnicodeDecodeError:
+                links = []  # binary, not HTML
+        out = set()
+        parts = urlparse(url)
+        for l in links:
+            if isinstance(l, tuple):
+                l = l[1]
+            if l.startswith("/") and len(l) > 1:
+                # absolute URL on this server
+                l = f"{parts.scheme}://{parts.netloc}{l}"
+            if l.startswith("http"):
+                if self.same_schema and l.startswith(url.rstrip("/") + "/"):
+                    out.add(l)
+                elif l.replace("https", "http").startswith(
+                    url.replace("https", "http").rstrip("/") + "/"
+                ):
+                    # allowed to cross http <-> https
+                    out.add(l)
+            else:
+                if l not in ["..", "../"]:
+                    # Ignore FTP-like "parent"
+                    out.add("/".join([url.rstrip("/"), l.lstrip("/")]))
+        if not out and url.endswith("/"):
+            out = await self._ls_real(url.rstrip("/"), detail=False)
+        if detail:
+            return [
+                {
+                    "name": u,
+                    "size": None,
+                    "type": "directory" if u.endswith("/") else "file",
+                }
+                for u in out
+            ]
+        else:
+            return sorted(out)
+    async def _ls(self, url, detail=True, **kwargs):
+        if self.use_listings_cache and url in self.dircache:
+            out = self.dircache[url]
+        else:
+            out = await self._ls_real(url, detail=detail, **kwargs)
+            self.dircache[url] = out
+        return out
+    ls = sync_wrapper(_ls)
+    def _raise_not_found_for_status(self, response, url):
+        """
+        Raises FileNotFoundError for 404s, otherwise uses raise_for_status.
+        """
+        if response.status == 404:
+            raise FileNotFoundError(url)
+        response.raise_for_status()
+    async def _cat_file(self, url, start=None, end=None, **kwargs):
+        kw = self.kwargs.copy()
+        kw.update(kwargs)
+        logger.debug(url)
+        if start is not None or end is not None:
+            if start == end:
+                return b""
+            headers = kw.pop("headers", {}).copy()
+            headers["Range"] = await self._process_limits(url, start, end)
+            kw["headers"] = headers
+        session = await self.set_session()
+        async with session.get(self.encode_url(url), **kw) as r:
+            out = await r.read()
+            self._raise_not_found_for_status(r, url)
+        return out
+    async def _get_file(
+        self, rpath, lpath, chunk_size=5 * 2**20, callback=DEFAULT_CALLBACK, **kwargs
+    ):
+        kw = self.kwargs.copy()
+        kw.update(kwargs)
+        logger.debug(rpath)
+        session = await self.set_session()
+        async with session.get(self.encode_url(rpath), **kw) as r:
+            try:
+                size = int(r.headers["content-length"])
+            except (ValueError, KeyError):
+                size = None
+            callback.set_size(size)
+            self._raise_not_found_for_status(r, rpath)
+            if isfilelike(lpath):
+                outfile = lpath
+            else:
+                outfile = open(lpath, "wb")  # noqa: ASYNC101, ASYNC230
+            try:
+                chunk = True
+                while chunk:
+                    chunk = await r.content.read(chunk_size)
+                    outfile.write(chunk)
+                    callback.relative_update(len(chunk))
+            finally:
+                if not isfilelike(lpath):
+                    outfile.close()
+    async def _put_file(
+        self,
+        lpath,
+        rpath,
+        chunk_size=5 * 2**20,
+        callback=DEFAULT_CALLBACK,
+        method="post",
+        mode="overwrite",
+        **kwargs,
+    ):
+        if mode != "overwrite":
+            raise NotImplementedError("Exclusive write")
+        async def gen_chunks():
+            # Support passing arbitrary file-like objects
+            # and use them instead of streams.
+            if isinstance(lpath, io.IOBase):
+                context = nullcontext(lpath)
+                use_seek = False  # might not support seeking
+            else:
+                context = open(lpath, "rb")  # noqa: ASYNC101, ASYNC230
+                use_seek = True
+            with context as f:
+                if use_seek:
+                    callback.set_size(f.seek(0, 2))
+                    f.seek(0)
+                else:
+                    callback.set_size(getattr(f, "size", None))
+                chunk = f.read(chunk_size)
+                while chunk:
+                    yield chunk
+                    callback.relative_update(len(chunk))
+                    chunk = f.read(chunk_size)
+        kw = self.kwargs.copy()
+        kw.update(kwargs)
+        session = await self.set_session()
+        method = method.lower()
+        if method not in ("post", "put"):
+            raise ValueError(
+                f"method has to be either 'post' or 'put', not: {method!r}"
+            )
+        meth = getattr(session, method)
+        async with meth(self.encode_url(rpath), data=gen_chunks(), **kw) as resp:
+            self._raise_not_found_for_status(resp, rpath)
+    async def _exists(self, path, **kwargs):
+        kw = self.kwargs.copy()
+        kw.update(kwargs)
+        try:
+            logger.debug(path)
+            session = await self.set_session()
+            r = await session.get(self.encode_url(path), **kw)
+            async with r:
+                return r.status < 400
+        except aiohttp.ClientError:
+            return False
+    async def _isfile(self, path, **kwargs):
+        return await self._exists(path, **kwargs)
+    def _open(
+        self,
+        path,
+        mode="rb",
+        block_size=None,
+        autocommit=None,  # XXX: This differs from the base class.
+        cache_type=None,
+        cache_options=None,
+        size=None,
+        **kwargs,
+    ):
+        """Make a file-like object
+        Parameters
+        ----------
+        path: str
+            Full URL with protocol
+        mode: string
+            must be "rb"
+        block_size: int or None
+            Bytes to download in one request; use instance value if None. If
+            zero, will return a streaming Requests file-like instance.
+        kwargs: key-value
+            Any other parameters, passed to requests calls
+        """
+        if mode != "rb":
+            raise NotImplementedError
+        block_size = block_size if block_size is not None else self.block_size
+        kw = self.kwargs.copy()
+        kw["asynchronous"] = self.asynchronous
+        kw.update(kwargs)
+        info = {}
+        size = size or info.update(self.info(path, **kwargs)) or info["size"]
+        session = sync(self.loop, self.set_session)
+        if block_size and size and info.get("partial", True):
+            return HTTPFile(
+                self,
+                path,
+                session=session,
+                block_size=block_size,
+                mode=mode,
+                size=size,
+                cache_type=cache_type or self.cache_type,
+                cache_options=cache_options or self.cache_options,
+                loop=self.loop,
+                **kw,
+            )
+        else:
+            return HTTPStreamFile(
+                self,
+                path,
+                mode=mode,
+                loop=self.loop,
+                session=session,
+                **kw,
+            )
+    async def open_async(self, path, mode="rb", size=None, **kwargs):
+        session = await self.set_session()
+        if size is None:
+            try:
+                size = (await self._info(path, **kwargs))["size"]
+            except FileNotFoundError:
+                pass
+        return AsyncStreamFile(
+            self,
+            path,
+            loop=self.loop,
+            session=session,
+            size=size,
+            **kwargs,
+        )
+    def ukey(self, url):
+        """Unique identifier; assume HTTP files are static, unchanging"""
+        return tokenize(url, self.kwargs, self.protocol)
+    async def _info(self, url, **kwargs):
+        """Get info of URL
+        Tries to access location via HEAD, and then GET methods, but does
+        not fetch the data.
+        It is possible that the server does not supply any size information, in
+        which case size will be given as None (and certain operations on the
+        corresponding file will not work).
+        """
+        info = {}
+        session = await self.set_session()
+        for policy in ["head", "get"]:
+            try:
+                info.update(
+                    await _file_info(
+                        self.encode_url(url),
+                        size_policy=policy,
+                        session=session,
+                        **self.kwargs,
+                        **kwargs,
+                    )
+                )
+                if info.get("size") is not None:
+                    break
+            except Exception as exc:
+                if policy == "get":
+                    # If get failed, then raise a FileNotFoundError
+                    raise FileNotFoundError(url) from exc
+                logger.debug("", exc_info=exc)
+        return {"name": url, "size": None, **info, "type": "file"}
+    async def _glob(self, path, maxdepth=None, **kwargs):
+        """
+        Find files by glob-matching.
+        This implementation is idntical to the one in AbstractFileSystem,
+        but "?" is not considered as a character for globbing, because it is
+        so common in URLs, often identifying the "query" part.
+        """
+        if maxdepth is not None and maxdepth < 1:
+            raise ValueError("maxdepth must be at least 1")
+        import re
+        ends_with_slash = path.endswith("/")  # _strip_protocol strips trailing slash
+        path = self._strip_protocol(path)
+        append_slash_to_dirname = ends_with_slash or path.endswith(("/**", "/*"))
+        idx_star = path.find("*") if path.find("*") >= 0 else len(path)
+        idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
+        min_idx = min(idx_star, idx_brace)
+        detail = kwargs.pop("detail", False)
+        if not has_magic(path):
+            if await self._exists(path, **kwargs):
+                if not detail:
+                    return [path]
+                else:
+                    return {path: await self._info(path, **kwargs)}
+            else:
+                if not detail:
+                    return []  # glob of non-existent returns empty
+                else:
+                    return {}
+        elif "/" in path[:min_idx]:
+            min_idx = path[:min_idx].rindex("/")
+            root = path[: min_idx + 1]
+            depth = path[min_idx + 1 :].count("/") + 1
+        else:
+            root = ""
+            depth = path[min_idx + 1 :].count("/") + 1
+        if "**" in path:
+            if maxdepth is not None:
+                idx_double_stars = path.find("**")
+                depth_double_stars = path[idx_double_stars:].count("/") + 1
+                depth = depth - depth_double_stars + maxdepth
+            else:
+                depth = None
+        allpaths = await self._find(
+            root, maxdepth=depth, withdirs=True, detail=True, **kwargs
+        )
+        pattern = glob_translate(path + ("/" if ends_with_slash else ""))
+        pattern = re.compile(pattern)
+        out = {
+            (
+                p.rstrip("/")
+                if not append_slash_to_dirname
+                and info["type"] == "directory"
+                and p.endswith("/")
+                else p
+            ): info
+            for p, info in sorted(allpaths.items())
+            if pattern.match(p.rstrip("/"))
+        }
+        if detail:
+            return out
+        else:
+            return list(out)
+    async def _isdir(self, path):
+        # override, since all URLs are (also) files
+        try:
+            return bool(await self._ls(path))
+        except (FileNotFoundError, ValueError):
+            return False
+    async def _pipe_file(self, path, value, mode="overwrite", **kwargs):
+        """
+        Write bytes to a remote file over HTTP.
+        Parameters
+        ----------
+        path : str
+            Target URL where the data should be written
+        value : bytes
+            Data to be written
+        mode : str
+            How to write to the file - 'overwrite' or 'append'
+        **kwargs : dict
+            Additional parameters to pass to the HTTP request
+        """
+        url = self._strip_protocol(path)
+        headers = kwargs.pop("headers", {})
+        headers["Content-Length"] = str(len(value))
+        session = await self.set_session()
+        async with session.put(url, data=value, headers=headers, **kwargs) as r:
+            r.raise_for_status()
+class HTTPFile(AbstractBufferedFile):
+    """
+    A file-like object pointing to a remote HTTP(S) resource
+    Supports only reading, with read-ahead of a predetermined block-size.
+    In the case that the server does not supply the filesize, only reading of
+    the complete file in one go is supported.
+    Parameters
+    ----------
+    url: str
+        Full URL of the remote resource, including the protocol
+    session: aiohttp.ClientSession or None
+        All calls will be made within this session, to avoid restarting
+        connections where the server allows this
+    block_size: int or None
+        The amount of read-ahead to do, in bytes. Default is 5MB, or the value
+        configured for the FileSystem creating this file
+    size: None or int
+        If given, this is the size of the file in bytes, and we don't attempt
+        to call the server to find the value.
+    kwargs: all other key-values are passed to requests calls.
+    """
+    def __init__(
+        self,
+        fs,
+        url,
+        session=None,
+        block_size=None,
+        mode="rb",
+        cache_type="bytes",
+        cache_options=None,
+        size=None,
+        loop=None,
+        asynchronous=False,
+        **kwargs,
+    ):
+        if mode != "rb":
+            raise NotImplementedError("File mode not supported")
+        self.asynchronous = asynchronous
+        self.loop = loop
+        self.url = url
+        self.session = session
+        self.details = {"name": url, "size": size, "type": "file"}
+        super().__init__(
+            fs=fs,
+            path=url,
+            mode=mode,
+            block_size=block_size,
+            cache_type=cache_type,
+            cache_options=cache_options,
+            **kwargs,
+        )
+    def read(self, length=-1):
+        """Read bytes from file
+        Parameters
+        ----------
+        length: int
+            Read up to this many bytes. If negative, read all content to end of
+            file. If the server has not supplied the filesize, attempting to
+            read only part of the data will raise a ValueError.
+        """
+        if (
+            (length < 0 and self.loc == 0)  # explicit read all
+            # but not when the size is known and fits into a block anyways
+            and not (self.size is not None and self.size <= self.blocksize)
+        ):
+            self._fetch_all()
+        if self.size is None:
+            if length < 0:
+                self._fetch_all()
+        else:
+            length = min(self.size - self.loc, length)
+        return super().read(length)
+    async def async_fetch_all(self):
+        """Read whole file in one shot, without caching
+        This is only called when position is still at zero,
+        and read() is called without a byte-count.
+        """
+        logger.debug(f"Fetch all for {self}")
+        if not isinstance(self.cache, AllBytes):
+            r = await self.session.get(self.fs.encode_url(self.url), **self.kwargs)
+            async with r:
+                r.raise_for_status()
+                out = await r.read()
+                self.cache = AllBytes(
+                    size=len(out), fetcher=None, blocksize=None, data=out
+                )
+                self.size = len(out)
+    _fetch_all = sync_wrapper(async_fetch_all)
+    def _parse_content_range(self, headers):
+        """Parse the Content-Range header"""
+        s = headers.get("Content-Range", "")
+        m = re.match(r"bytes (\d+-\d+|\*)/(\d+|\*)", s)
+        if not m:
+            return None, None, None
+        if m[1] == "*":
+            start = end = None
+        else:
+            start, end = [int(x) for x in m[1].split("-")]
+        total = None if m[2] == "*" else int(m[2])
+        return start, end, total
+    async def async_fetch_range(self, start, end):
+        """Download a block of data
+        The expectation is that the server returns only the requested bytes,
+        with HTTP code 206. If this is not the case, we first check the headers,
+        and then stream the output - if the data size is bigger than we
+        requested, an exception is raised.
+        """
+        logger.debug(f"Fetch range for {self}: {start}-{end}")
+        kwargs = self.kwargs.copy()
+        headers = kwargs.pop("headers", {}).copy()
+        headers["Range"] = f"bytes={start}-{end - 1}"
+        logger.debug(f"{self.url} : {headers['Range']}")
+        r = await self.session.get(
+            self.fs.encode_url(self.url), headers=headers, **kwargs
+        )
+        async with r:
+            if r.status == 416:
+                # range request outside file
+                return b""
+            r.raise_for_status()
+            # If the server has handled the range request, it should reply
+            # with status 206 (partial content). But we'll guess that a suitable
+            # Content-Range header or a Content-Length no more than the
+            # requested range also mean we have got the desired range.
+            response_is_range = (
+                r.status == 206
+                or self._parse_content_range(r.headers)[0] == start
+                or int(r.headers.get("Content-Length", end + 1)) <= end - start
+            )
+            if response_is_range:
+                # partial content, as expected
+                out = await r.read()
+            elif start > 0:
+                raise ValueError(
+                    "The HTTP server doesn't appear to support range requests. "
+                    "Only reading this file from the beginning is supported. "
+                    "Open with block_size=0 for a streaming file interface."
+                )
+            else:
+                # Response is not a range, but we want the start of the file,
+                # so we can read the required amount anyway.
+                cl = 0
+                out = []
+                while True:
+                    chunk = await r.content.read(2**20)
+                    # data size unknown, let's read until we have enough
+                    if chunk:
+                        out.append(chunk)
+                        cl += len(chunk)
+                        if cl > end - start:
+                            break
+                    else:
+                        break
+                out = b"".join(out)[: end - start]
+            return out
+    _fetch_range = sync_wrapper(async_fetch_range)
+magic_check = re.compile("([*[])")
+def has_magic(s):
+    match = magic_check.search(s)
+    return match is not None
+class HTTPStreamFile(AbstractBufferedFile):
+    def __init__(self, fs, url, mode="rb", loop=None, session=None, **kwargs):
+        self.asynchronous = kwargs.pop("asynchronous", False)
+        self.url = url
+        self.loop = loop
+        self.session = session
+        if mode != "rb":
+            raise ValueError
+        self.details = {"name": url, "size": None}
+        super().__init__(fs=fs, path=url, mode=mode, cache_type="none", **kwargs)
+        async def cor():
+            r = await self.session.get(self.fs.encode_url(url), **kwargs).__aenter__()
+            self.fs._raise_not_found_for_status(r, url)
+            return r
+        self.r = sync(self.loop, cor)
+        self.loop = fs.loop
+    def seek(self, loc, whence=0):
+        if loc == 0 and whence == 1:
+            return
+        if loc == self.loc and whence == 0:
+            return
+        raise ValueError("Cannot seek streaming HTTP file")
+    async def _read(self, num=-1):
+        out = await self.r.content.read(num)
+        self.loc += len(out)
+        return out
+    read = sync_wrapper(_read)
+    async def _close(self):
+        self.r.close()
+    def close(self):
+        asyncio.run_coroutine_threadsafe(self._close(), self.loop)
+        super().close()
+class AsyncStreamFile(AbstractAsyncStreamedFile):
+    def __init__(
+        self, fs, url, mode="rb", loop=None, session=None, size=None, **kwargs
+    ):
+        self.url = url
+        self.session = session
+        self.r = None
+        if mode != "rb":
+            raise ValueError
+        self.details = {"name": url, "size": None}
+        self.kwargs = kwargs
+        super().__init__(fs=fs, path=url, mode=mode, cache_type="none")
+        self.size = size
+    async def read(self, num=-1):
+        if self.r is None:
+            r = await self.session.get(
+                self.fs.encode_url(self.url), **self.kwargs
+            ).__aenter__()
+            self.fs._raise_not_found_for_status(r, self.url)
+            self.r = r
+        out = await self.r.content.read(num)
+        self.loc += len(out)
+        return out
+    async def close(self):
+        if self.r is not None:
+            self.r.close()
+            self.r = None
+        await super().close()
+async def get_range(session, url, start, end, file=None, **kwargs):
+    # explicit get a range when we know it must be safe
+    kwargs = kwargs.copy()
+    headers = kwargs.pop("headers", {}).copy()
+    headers["Range"] = f"bytes={start}-{end - 1}"
+    r = await session.get(url, headers=headers, **kwargs)
+    r.raise_for_status()
+    async with r:
+        out = await r.read()
+    if file:
+        with open(file, "r+b") as f:  # noqa: ASYNC101, ASYNC230
+            f.seek(start)
+            f.write(out)
+    else:
+        return out
+async def _file_info(url, session, size_policy="head", **kwargs):
+    """Call HEAD on the server to get details about the file (size/checksum etc.)
+    Default operation is to explicitly allow redirects and use encoding
+    'identity' (no compression) to get the true size of the target.
+    """
+    logger.debug("Retrieve file size for %s", url)
+    kwargs = kwargs.copy()
+    ar = kwargs.pop("allow_redirects", True)
+    head = kwargs.get("headers", {}).copy()
+    head["Accept-Encoding"] = "identity"
+    kwargs["headers"] = head
+    info = {}
+    if size_policy == "head":
+        r = await session.head(url, allow_redirects=ar, **kwargs)
+    elif size_policy == "get":
+        r = await session.get(url, allow_redirects=ar, **kwargs)
+    else:
+        raise TypeError(f'size_policy must be "head" or "get", got {size_policy}')
+    async with r:
+        r.raise_for_status()
+        if "Content-Length" in r.headers:
+            # Some servers may choose to ignore Accept-Encoding and return
+            # compressed content, in which case the returned size is unreliable.
+            if "Content-Encoding" not in r.headers or r.headers["Content-Encoding"] in [
+                "identity",
+                "",
+            ]:
+                info["size"] = int(r.headers["Content-Length"])
+        elif "Content-Range" in r.headers:
+            info["size"] = int(r.headers["Content-Range"].split("/")[1])
+        if "Content-Type" in r.headers:
+            info["mimetype"] = r.headers["Content-Type"].partition(";")[0]
+        if r.headers.get("Accept-Ranges") == "none":
+            # Some servers may explicitly discourage partial content requests, but
+            # the lack of "Accept-Ranges" does not always indicate they would fail
+            info["partial"] = False
+        info["url"] = str(r.url)
+        for checksum_field in ["ETag", "Content-MD5", "Digest"]:
+            if r.headers.get(checksum_field):
+                info[checksum_field] = r.headers[checksum_field]
+    return info
+async def _file_size(url, session=None, *args, **kwargs):
+    if session is None:
+        session = await get_client()
+    info = await _file_info(url, session=session, *args, **kwargs)
+    return info.get("size")
+file_size = sync_wrapper(_file_size)

temp_venv/lib/python3.13/site-packages/fsspec/implementations/http_sync.py ADDED Viewed

	@@ -0,0 +1,931 @@

+"""This file is largely copied from http.py"""
+import io
+import logging
+import re
+import urllib.error
+import urllib.parse
+from copy import copy
+from json import dumps, loads
+from urllib.parse import urlparse
+try:
+    import yarl
+except (ImportError, ModuleNotFoundError, OSError):
+    yarl = False
+from fsspec.callbacks import _DEFAULT_CALLBACK
+from fsspec.registry import register_implementation
+from fsspec.spec import AbstractBufferedFile, AbstractFileSystem
+from fsspec.utils import DEFAULT_BLOCK_SIZE, isfilelike, nullcontext, tokenize
+from ..caching import AllBytes
+# https://stackoverflow.com/a/15926317/3821154
+ex = re.compile(r"""<(a|A)\s+(?:[^>]*?\s+)?(href|HREF)=["'](?P<url>[^"']+)""")
+ex2 = re.compile(r"""(?P<url>http[s]?://[-a-zA-Z0-9@:%_+.~#?&/=]+)""")
+logger = logging.getLogger("fsspec.http")
+class JsHttpException(urllib.error.HTTPError): ...
+class StreamIO(io.BytesIO):
+    # fake class, so you can set attributes on it
+    # will eventually actually stream
+    ...
+class ResponseProxy:
+    """Looks like a requests response"""
+    def __init__(self, req, stream=False):
+        self.request = req
+        self.stream = stream
+        self._data = None
+        self._headers = None
+    @property
+    def raw(self):
+        if self._data is None:
+            b = self.request.response.to_bytes()
+            if self.stream:
+                self._data = StreamIO(b)
+            else:
+                self._data = b
+        return self._data
+    def close(self):
+        if hasattr(self, "_data"):
+            del self._data
+    @property
+    def headers(self):
+        if self._headers is None:
+            self._headers = dict(
+                [
+                    _.split(": ")
+                    for _ in self.request.getAllResponseHeaders().strip().split("\r\n")
+                ]
+            )
+        return self._headers
+    @property
+    def status_code(self):
+        return int(self.request.status)
+    def raise_for_status(self):
+        if not self.ok:
+            raise JsHttpException(
+                self.url, self.status_code, self.reason, self.headers, None
+            )
+    def iter_content(self, chunksize, *_, **__):
+        while True:
+            out = self.raw.read(chunksize)
+            if out:
+                yield out
+            else:
+                break
+    @property
+    def reason(self):
+        return self.request.statusText
+    @property
+    def ok(self):
+        return self.status_code < 400
+    @property
+    def url(self):
+        return self.request.response.responseURL
+    @property
+    def text(self):
+        # TODO: encoding from headers
+        return self.content.decode()
+    @property
+    def content(self):
+        self.stream = False
+        return self.raw
+    def json(self):
+        return loads(self.text)
+class RequestsSessionShim:
+    def __init__(self):
+        self.headers = {}
+    def request(
+        self,
+        method,
+        url,
+        params=None,
+        data=None,
+        headers=None,
+        cookies=None,
+        files=None,
+        auth=None,
+        timeout=None,
+        allow_redirects=None,
+        proxies=None,
+        hooks=None,
+        stream=None,
+        verify=None,
+        cert=None,
+        json=None,
+    ):
+        from js import Blob, XMLHttpRequest
+        logger.debug("JS request: %s %s", method, url)
+        if cert or verify or proxies or files or cookies or hooks:
+            raise NotImplementedError
+        if data and json:
+            raise ValueError("Use json= or data=, not both")
+        req = XMLHttpRequest.new()
+        extra = auth if auth else ()
+        if params:
+            url = f"{url}?{urllib.parse.urlencode(params)}"
+        req.open(method, url, False, *extra)
+        if timeout:
+            req.timeout = timeout
+        if headers:
+            for k, v in headers.items():
+                req.setRequestHeader(k, v)
+        req.setRequestHeader("Accept", "application/octet-stream")
+        req.responseType = "arraybuffer"
+        if json:
+            blob = Blob.new([dumps(data)], {type: "application/json"})
+            req.send(blob)
+        elif data:
+            if isinstance(data, io.IOBase):
+                data = data.read()
+            blob = Blob.new([data], {type: "application/octet-stream"})
+            req.send(blob)
+        else:
+            req.send(None)
+        return ResponseProxy(req, stream=stream)
+    def get(self, url, **kwargs):
+        return self.request("GET", url, **kwargs)
+    def head(self, url, **kwargs):
+        return self.request("HEAD", url, **kwargs)
+    def post(self, url, **kwargs):
+        return self.request("POST}", url, **kwargs)
+    def put(self, url, **kwargs):
+        return self.request("PUT", url, **kwargs)
+    def patch(self, url, **kwargs):
+        return self.request("PATCH", url, **kwargs)
+    def delete(self, url, **kwargs):
+        return self.request("DELETE", url, **kwargs)
+class HTTPFileSystem(AbstractFileSystem):
+    """
+    Simple File-System for fetching data via HTTP(S)
+    This is the BLOCKING version of the normal HTTPFileSystem. It uses
+    requests in normal python and the JS runtime in pyodide.
+    ***This implementation is extremely experimental, do not use unless
+    you are testing pyodide/pyscript integration***
+    """
+    protocol = ("http", "https", "sync-http", "sync-https")
+    sep = "/"
+    def __init__(
+        self,
+        simple_links=True,
+        block_size=None,
+        same_scheme=True,
+        cache_type="readahead",
+        cache_options=None,
+        client_kwargs=None,
+        encoded=False,
+        **storage_options,
+    ):
+        """
+        Parameters
+        ----------
+        block_size: int
+            Blocks to read bytes; if 0, will default to raw requests file-like
+            objects instead of HTTPFile instances
+        simple_links: bool
+            If True, will consider both HTML <a> tags and anything that looks
+            like a URL; if False, will consider only the former.
+        same_scheme: True
+            When doing ls/glob, if this is True, only consider paths that have
+            http/https matching the input URLs.
+        size_policy: this argument is deprecated
+        client_kwargs: dict
+            Passed to aiohttp.ClientSession, see
+            https://docs.aiohttp.org/en/stable/client_reference.html
+            For example, ``{'auth': aiohttp.BasicAuth('user', 'pass')}``
+        storage_options: key-value
+            Any other parameters passed on to requests
+        cache_type, cache_options: defaults used in open
+        """
+        super().__init__(self, **storage_options)
+        self.block_size = block_size if block_size is not None else DEFAULT_BLOCK_SIZE
+        self.simple_links = simple_links
+        self.same_schema = same_scheme
+        self.cache_type = cache_type
+        self.cache_options = cache_options
+        self.client_kwargs = client_kwargs or {}
+        self.encoded = encoded
+        self.kwargs = storage_options
+        try:
+            import js  # noqa: F401
+            logger.debug("Starting JS session")
+            self.session = RequestsSessionShim()
+            self.js = True
+        except Exception as e:
+            import requests
+            logger.debug("Starting cpython session because of: %s", e)
+            self.session = requests.Session(**(client_kwargs or {}))
+            self.js = False
+        request_options = copy(storage_options)
+        self.use_listings_cache = request_options.pop("use_listings_cache", False)
+        request_options.pop("listings_expiry_time", None)
+        request_options.pop("max_paths", None)
+        request_options.pop("skip_instance_cache", None)
+        self.kwargs = request_options
+    @property
+    def fsid(self):
+        return "sync-http"
+    def encode_url(self, url):
+        if yarl:
+            return yarl.URL(url, encoded=self.encoded)
+        return url
+    @classmethod
+    def _strip_protocol(cls, path: str) -> str:
+        """For HTTP, we always want to keep the full URL"""
+        path = path.replace("sync-http://", "http://").replace(
+            "sync-https://", "https://"
+        )
+        return path
+    @classmethod
+    def _parent(cls, path):
+        # override, since _strip_protocol is different for URLs
+        par = super()._parent(path)
+        if len(par) > 7:  # "http://..."
+            return par
+        return ""
+    def _ls_real(self, url, detail=True, **kwargs):
+        # ignoring URL-encoded arguments
+        kw = self.kwargs.copy()
+        kw.update(kwargs)
+        logger.debug(url)
+        r = self.session.get(self.encode_url(url), **self.kwargs)
+        self._raise_not_found_for_status(r, url)
+        text = r.text
+        if self.simple_links:
+            links = ex2.findall(text) + [u[2] for u in ex.findall(text)]
+        else:
+            links = [u[2] for u in ex.findall(text)]
+        out = set()
+        parts = urlparse(url)
+        for l in links:
+            if isinstance(l, tuple):
+                l = l[1]
+            if l.startswith("/") and len(l) > 1:
+                # absolute URL on this server
+                l = parts.scheme + "://" + parts.netloc + l
+            if l.startswith("http"):
+                if self.same_schema and l.startswith(url.rstrip("/") + "/"):
+                    out.add(l)
+                elif l.replace("https", "http").startswith(
+                    url.replace("https", "http").rstrip("/") + "/"
+                ):
+                    # allowed to cross http <-> https
+                    out.add(l)
+            else:
+                if l not in ["..", "../"]:
+                    # Ignore FTP-like "parent"
+                    out.add("/".join([url.rstrip("/"), l.lstrip("/")]))
+        if not out and url.endswith("/"):
+            out = self._ls_real(url.rstrip("/"), detail=False)
+        if detail:
+            return [
+                {
+                    "name": u,
+                    "size": None,
+                    "type": "directory" if u.endswith("/") else "file",
+                }
+                for u in out
+            ]
+        else:
+            return sorted(out)
+    def ls(self, url, detail=True, **kwargs):
+        if self.use_listings_cache and url in self.dircache:
+            out = self.dircache[url]
+        else:
+            out = self._ls_real(url, detail=detail, **kwargs)
+            self.dircache[url] = out
+        return out
+    def _raise_not_found_for_status(self, response, url):
+        """
+        Raises FileNotFoundError for 404s, otherwise uses raise_for_status.
+        """
+        if response.status_code == 404:
+            raise FileNotFoundError(url)
+        response.raise_for_status()
+    def cat_file(self, url, start=None, end=None, **kwargs):
+        kw = self.kwargs.copy()
+        kw.update(kwargs)
+        logger.debug(url)
+        if start is not None or end is not None:
+            if start == end:
+                return b""
+            headers = kw.pop("headers", {}).copy()
+            headers["Range"] = self._process_limits(url, start, end)
+            kw["headers"] = headers
+        r = self.session.get(self.encode_url(url), **kw)
+        self._raise_not_found_for_status(r, url)
+        return r.content
+    def get_file(
+        self, rpath, lpath, chunk_size=5 * 2**20, callback=_DEFAULT_CALLBACK, **kwargs
+    ):
+        kw = self.kwargs.copy()
+        kw.update(kwargs)
+        logger.debug(rpath)
+        r = self.session.get(self.encode_url(rpath), **kw)
+        try:
+            size = int(
+                r.headers.get("content-length", None)
+                or r.headers.get("Content-Length", None)
+            )
+        except (ValueError, KeyError, TypeError):
+            size = None
+        callback.set_size(size)
+        self._raise_not_found_for_status(r, rpath)
+        if not isfilelike(lpath):
+            lpath = open(lpath, "wb")
+        for chunk in r.iter_content(chunk_size, decode_unicode=False):
+            lpath.write(chunk)
+            callback.relative_update(len(chunk))
+    def put_file(
+        self,
+        lpath,
+        rpath,
+        chunk_size=5 * 2**20,
+        callback=_DEFAULT_CALLBACK,
+        method="post",
+        **kwargs,
+    ):
+        def gen_chunks():
+            # Support passing arbitrary file-like objects
+            # and use them instead of streams.
+            if isinstance(lpath, io.IOBase):
+                context = nullcontext(lpath)
+                use_seek = False  # might not support seeking
+            else:
+                context = open(lpath, "rb")
+                use_seek = True
+            with context as f:
+                if use_seek:
+                    callback.set_size(f.seek(0, 2))
+                    f.seek(0)
+                else:
+                    callback.set_size(getattr(f, "size", None))
+                chunk = f.read(chunk_size)
+                while chunk:
+                    yield chunk
+                    callback.relative_update(len(chunk))
+                    chunk = f.read(chunk_size)
+        kw = self.kwargs.copy()
+        kw.update(kwargs)
+        method = method.lower()
+        if method not in ("post", "put"):
+            raise ValueError(
+                f"method has to be either 'post' or 'put', not: {method!r}"
+            )
+        meth = getattr(self.session, method)
+        resp = meth(rpath, data=gen_chunks(), **kw)
+        self._raise_not_found_for_status(resp, rpath)
+    def _process_limits(self, url, start, end):
+        """Helper for "Range"-based _cat_file"""
+        size = None
+        suff = False
+        if start is not None and start < 0:
+            # if start is negative and end None, end is the "suffix length"
+            if end is None:
+                end = -start
+                start = ""
+                suff = True
+            else:
+                size = size or self.info(url)["size"]
+                start = size + start
+        elif start is None:
+            start = 0
+        if not suff:
+            if end is not None and end < 0:
+                if start is not None:
+                    size = size or self.info(url)["size"]
+                    end = size + end
+            elif end is None:
+                end = ""
+            if isinstance(end, int):
+                end -= 1  # bytes range is inclusive
+        return f"bytes={start}-{end}"
+    def exists(self, path, **kwargs):
+        kw = self.kwargs.copy()
+        kw.update(kwargs)
+        try:
+            logger.debug(path)
+            r = self.session.get(self.encode_url(path), **kw)
+            return r.status_code < 400
+        except Exception:
+            return False
+    def isfile(self, path, **kwargs):
+        return self.exists(path, **kwargs)
+    def _open(
+        self,
+        path,
+        mode="rb",
+        block_size=None,
+        autocommit=None,  # XXX: This differs from the base class.
+        cache_type=None,
+        cache_options=None,
+        size=None,
+        **kwargs,
+    ):
+        """Make a file-like object
+        Parameters
+        ----------
+        path: str
+            Full URL with protocol
+        mode: string
+            must be "rb"
+        block_size: int or None
+            Bytes to download in one request; use instance value if None. If
+            zero, will return a streaming Requests file-like instance.
+        kwargs: key-value
+            Any other parameters, passed to requests calls
+        """
+        if mode != "rb":
+            raise NotImplementedError
+        block_size = block_size if block_size is not None else self.block_size
+        kw = self.kwargs.copy()
+        kw.update(kwargs)
+        size = size or self.info(path, **kwargs)["size"]
+        if block_size and size:
+            return HTTPFile(
+                self,
+                path,
+                session=self.session,
+                block_size=block_size,
+                mode=mode,
+                size=size,
+                cache_type=cache_type or self.cache_type,
+                cache_options=cache_options or self.cache_options,
+                **kw,
+            )
+        else:
+            return HTTPStreamFile(
+                self,
+                path,
+                mode=mode,
+                session=self.session,
+                **kw,
+            )
+    def ukey(self, url):
+        """Unique identifier; assume HTTP files are static, unchanging"""
+        return tokenize(url, self.kwargs, self.protocol)
+    def info(self, url, **kwargs):
+        """Get info of URL
+        Tries to access location via HEAD, and then GET methods, but does
+        not fetch the data.
+        It is possible that the server does not supply any size information, in
+        which case size will be given as None (and certain operations on the
+        corresponding file will not work).
+        """
+        info = {}
+        for policy in ["head", "get"]:
+            try:
+                info.update(
+                    _file_info(
+                        self.encode_url(url),
+                        size_policy=policy,
+                        session=self.session,
+                        **self.kwargs,
+                        **kwargs,
+                    )
+                )
+                if info.get("size") is not None:
+                    break
+            except Exception as exc:
+                if policy == "get":
+                    # If get failed, then raise a FileNotFoundError
+                    raise FileNotFoundError(url) from exc
+                logger.debug(str(exc))
+        return {"name": url, "size": None, **info, "type": "file"}
+    def glob(self, path, maxdepth=None, **kwargs):
+        """
+        Find files by glob-matching.
+        This implementation is idntical to the one in AbstractFileSystem,
+        but "?" is not considered as a character for globbing, because it is
+        so common in URLs, often identifying the "query" part.
+        """
+        import re
+        ends = path.endswith("/")
+        path = self._strip_protocol(path)
+        indstar = path.find("*") if path.find("*") >= 0 else len(path)
+        indbrace = path.find("[") if path.find("[") >= 0 else len(path)
+        ind = min(indstar, indbrace)
+        detail = kwargs.pop("detail", False)
+        if not has_magic(path):
+            root = path
+            depth = 1
+            if ends:
+                path += "/*"
+            elif self.exists(path):
+                if not detail:
+                    return [path]
+                else:
+                    return {path: self.info(path)}
+            else:
+                if not detail:
+                    return []  # glob of non-existent returns empty
+                else:
+                    return {}
+        elif "/" in path[:ind]:
+            ind2 = path[:ind].rindex("/")
+            root = path[: ind2 + 1]
+            depth = None if "**" in path else path[ind2 + 1 :].count("/") + 1
+        else:
+            root = ""
+            depth = None if "**" in path else path[ind + 1 :].count("/") + 1
+        allpaths = self.find(
+            root, maxdepth=maxdepth or depth, withdirs=True, detail=True, **kwargs
+        )
+        # Escape characters special to python regex, leaving our supported
+        # special characters in place.
+        # See https://www.gnu.org/software/bash/manual/html_node/Pattern-Matching.html
+        # for shell globbing details.
+        pattern = (
+            "^"
+            + (
+                path.replace("\\", r"\\")
+                .replace(".", r"\.")
+                .replace("+", r"\+")
+                .replace("//", "/")
+                .replace("(", r"\(")
+                .replace(")", r"\)")
+                .replace("|", r"\|")
+                .replace("^", r"\^")
+                .replace("$", r"\$")
+                .replace("{", r"\{")
+                .replace("}", r"\}")
+                .rstrip("/")
+            )
+            + "$"
+        )
+        pattern = re.sub("[*]{2}", "=PLACEHOLDER=", pattern)
+        pattern = re.sub("[*]", "[^/]*", pattern)
+        pattern = re.compile(pattern.replace("=PLACEHOLDER=", ".*"))
+        out = {
+            p: allpaths[p]
+            for p in sorted(allpaths)
+            if pattern.match(p.replace("//", "/").rstrip("/"))
+        }
+        if detail:
+            return out
+        else:
+            return list(out)
+    def isdir(self, path):
+        # override, since all URLs are (also) files
+        try:
+            return bool(self.ls(path))
+        except (FileNotFoundError, ValueError):
+            return False
+class HTTPFile(AbstractBufferedFile):
+    """
+    A file-like object pointing to a remove HTTP(S) resource
+    Supports only reading, with read-ahead of a predermined block-size.
+    In the case that the server does not supply the filesize, only reading of
+    the complete file in one go is supported.
+    Parameters
+    ----------
+    url: str
+        Full URL of the remote resource, including the protocol
+    session: requests.Session or None
+        All calls will be made within this session, to avoid restarting
+        connections where the server allows this
+    block_size: int or None
+        The amount of read-ahead to do, in bytes. Default is 5MB, or the value
+        configured for the FileSystem creating this file
+    size: None or int
+        If given, this is the size of the file in bytes, and we don't attempt
+        to call the server to find the value.
+    kwargs: all other key-values are passed to requests calls.
+    """
+    def __init__(
+        self,
+        fs,
+        url,
+        session=None,
+        block_size=None,
+        mode="rb",
+        cache_type="bytes",
+        cache_options=None,
+        size=None,
+        **kwargs,
+    ):
+        if mode != "rb":
+            raise NotImplementedError("File mode not supported")
+        self.url = url
+        self.session = session
+        self.details = {"name": url, "size": size, "type": "file"}
+        super().__init__(
+            fs=fs,
+            path=url,
+            mode=mode,
+            block_size=block_size,
+            cache_type=cache_type,
+            cache_options=cache_options,
+            **kwargs,
+        )
+    def read(self, length=-1):
+        """Read bytes from file
+        Parameters
+        ----------
+        length: int
+            Read up to this many bytes. If negative, read all content to end of
+            file. If the server has not supplied the filesize, attempting to
+            read only part of the data will raise a ValueError.
+        """
+        if (
+            (length < 0 and self.loc == 0)  # explicit read all
+            # but not when the size is known and fits into a block anyways
+            and not (self.size is not None and self.size <= self.blocksize)
+        ):
+            self._fetch_all()
+        if self.size is None:
+            if length < 0:
+                self._fetch_all()
+        else:
+            length = min(self.size - self.loc, length)
+        return super().read(length)
+    def _fetch_all(self):
+        """Read whole file in one shot, without caching
+        This is only called when position is still at zero,
+        and read() is called without a byte-count.
+        """
+        logger.debug(f"Fetch all for {self}")
+        if not isinstance(self.cache, AllBytes):
+            r = self.session.get(self.fs.encode_url(self.url), **self.kwargs)
+            r.raise_for_status()
+            out = r.content
+            self.cache = AllBytes(size=len(out), fetcher=None, blocksize=None, data=out)
+            self.size = len(out)
+    def _parse_content_range(self, headers):
+        """Parse the Content-Range header"""
+        s = headers.get("Content-Range", "")
+        m = re.match(r"bytes (\d+-\d+|\*)/(\d+|\*)", s)
+        if not m:
+            return None, None, None
+        if m[1] == "*":
+            start = end = None
+        else:
+            start, end = [int(x) for x in m[1].split("-")]
+        total = None if m[2] == "*" else int(m[2])
+        return start, end, total
+    def _fetch_range(self, start, end):
+        """Download a block of data
+        The expectation is that the server returns only the requested bytes,
+        with HTTP code 206. If this is not the case, we first check the headers,
+        and then stream the output - if the data size is bigger than we
+        requested, an exception is raised.
+        """
+        logger.debug(f"Fetch range for {self}: {start}-{end}")
+        kwargs = self.kwargs.copy()
+        headers = kwargs.pop("headers", {}).copy()
+        headers["Range"] = f"bytes={start}-{end - 1}"
+        logger.debug("%s : %s", self.url, headers["Range"])
+        r = self.session.get(self.fs.encode_url(self.url), headers=headers, **kwargs)
+        if r.status_code == 416:
+            # range request outside file
+            return b""
+        r.raise_for_status()
+        # If the server has handled the range request, it should reply
+        # with status 206 (partial content). But we'll guess that a suitable
+        # Content-Range header or a Content-Length no more than the
+        # requested range also mean we have got the desired range.
+        cl = r.headers.get("Content-Length", r.headers.get("content-length", end + 1))
+        response_is_range = (
+            r.status_code == 206
+            or self._parse_content_range(r.headers)[0] == start
+            or int(cl) <= end - start
+        )
+        if response_is_range:
+            # partial content, as expected
+            out = r.content
+        elif start > 0:
+            raise ValueError(
+                "The HTTP server doesn't appear to support range requests. "
+                "Only reading this file from the beginning is supported. "
+                "Open with block_size=0 for a streaming file interface."
+            )
+        else:
+            # Response is not a range, but we want the start of the file,
+            # so we can read the required amount anyway.
+            cl = 0
+            out = []
+            for chunk in r.iter_content(2**20, False):
+                out.append(chunk)
+                cl += len(chunk)
+            out = b"".join(out)[: end - start]
+        return out
+magic_check = re.compile("([*[])")
+def has_magic(s):
+    match = magic_check.search(s)
+    return match is not None
+class HTTPStreamFile(AbstractBufferedFile):
+    def __init__(self, fs, url, mode="rb", session=None, **kwargs):
+        self.url = url
+        self.session = session
+        if mode != "rb":
+            raise ValueError
+        self.details = {"name": url, "size": None}
+        super().__init__(fs=fs, path=url, mode=mode, cache_type="readahead", **kwargs)
+        r = self.session.get(self.fs.encode_url(url), stream=True, **kwargs)
+        self.fs._raise_not_found_for_status(r, url)
+        self.it = r.iter_content(1024, False)
+        self.leftover = b""
+        self.r = r
+    def seek(self, *args, **kwargs):
+        raise ValueError("Cannot seek streaming HTTP file")
+    def read(self, num=-1):
+        bufs = [self.leftover]
+        leng = len(self.leftover)
+        while leng < num or num < 0:
+            try:
+                out = self.it.__next__()
+            except StopIteration:
+                break
+            if out:
+                bufs.append(out)
+            else:
+                break
+            leng += len(out)
+        out = b"".join(bufs)
+        if num >= 0:
+            self.leftover = out[num:]
+            out = out[:num]
+        else:
+            self.leftover = b""
+        self.loc += len(out)
+        return out
+    def close(self):
+        self.r.close()
+        self.closed = True
+def get_range(session, url, start, end, **kwargs):
+    # explicit get a range when we know it must be safe
+    kwargs = kwargs.copy()
+    headers = kwargs.pop("headers", {}).copy()
+    headers["Range"] = f"bytes={start}-{end - 1}"
+    r = session.get(url, headers=headers, **kwargs)
+    r.raise_for_status()
+    return r.content
+def _file_info(url, session, size_policy="head", **kwargs):
+    """Call HEAD on the server to get details about the file (size/checksum etc.)
+    Default operation is to explicitly allow redirects and use encoding
+    'identity' (no compression) to get the true size of the target.
+    """
+    logger.debug("Retrieve file size for %s", url)
+    kwargs = kwargs.copy()
+    ar = kwargs.pop("allow_redirects", True)
+    head = kwargs.get("headers", {}).copy()
+    # TODO: not allowed in JS
+    # head["Accept-Encoding"] = "identity"
+    kwargs["headers"] = head
+    info = {}
+    if size_policy == "head":
+        r = session.head(url, allow_redirects=ar, **kwargs)
+    elif size_policy == "get":
+        r = session.get(url, allow_redirects=ar, **kwargs)
+    else:
+        raise TypeError(f'size_policy must be "head" or "get", got {size_policy}')
+    r.raise_for_status()
+    # TODO:
+    #  recognise lack of 'Accept-Ranges',
+    #                 or 'Accept-Ranges': 'none' (not 'bytes')
+    #  to mean streaming only, no random access => return None
+    if "Content-Length" in r.headers:
+        info["size"] = int(r.headers["Content-Length"])
+    elif "Content-Range" in r.headers:
+        info["size"] = int(r.headers["Content-Range"].split("/")[1])
+    elif "content-length" in r.headers:
+        info["size"] = int(r.headers["content-length"])
+    elif "content-range" in r.headers:
+        info["size"] = int(r.headers["content-range"].split("/")[1])
+    for checksum_field in ["ETag", "Content-MD5", "Digest"]:
+        if r.headers.get(checksum_field):
+            info[checksum_field] = r.headers[checksum_field]
+    return info
+# importing this is enough to register it
+def register():
+    register_implementation("http", HTTPFileSystem, clobber=True)
+    register_implementation("https", HTTPFileSystem, clobber=True)
+    register_implementation("sync-http", HTTPFileSystem, clobber=True)
+    register_implementation("sync-https", HTTPFileSystem, clobber=True)
+register()
+def unregister():
+    from fsspec.implementations.http import HTTPFileSystem
+    register_implementation("http", HTTPFileSystem, clobber=True)
+    register_implementation("https", HTTPFileSystem, clobber=True)

temp_venv/lib/python3.13/site-packages/fsspec/implementations/jupyter.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import base64
+import io
+import re
+import requests
+import fsspec
+class JupyterFileSystem(fsspec.AbstractFileSystem):
+    """View of the files as seen by a Jupyter server (notebook or lab)"""
+    protocol = ("jupyter", "jlab")
+    def __init__(self, url, tok=None, **kwargs):
+        """
+        Parameters
+        ----------
+        url : str
+            Base URL of the server, like "http://127.0.0.1:8888". May include
+            token in the string, which is given by the process when starting up
+        tok : str
+            If the token is obtained separately, can be given here
+        kwargs
+        """
+        if "?" in url:
+            if tok is None:
+                try:
+                    tok = re.findall("token=([a-z0-9]+)", url)[0]
+                except IndexError as e:
+                    raise ValueError("Could not determine token") from e
+            url = url.split("?", 1)[0]
+        self.url = url.rstrip("/") + "/api/contents"
+        self.session = requests.Session()
+        if tok:
+            self.session.headers["Authorization"] = f"token {tok}"
+        super().__init__(**kwargs)
+    def ls(self, path, detail=True, **kwargs):
+        path = self._strip_protocol(path)
+        r = self.session.get(f"{self.url}/{path}")
+        if r.status_code == 404:
+            return FileNotFoundError(path)
+        r.raise_for_status()
+        out = r.json()
+        if out["type"] == "directory":
+            out = out["content"]
+        else:
+            out = [out]
+        for o in out:
+            o["name"] = o.pop("path")
+            o.pop("content")
+            if o["type"] == "notebook":
+                o["type"] = "file"
+        if detail:
+            return out
+        return [o["name"] for o in out]
+    def cat_file(self, path, start=None, end=None, **kwargs):
+        path = self._strip_protocol(path)
+        r = self.session.get(f"{self.url}/{path}")
+        if r.status_code == 404:
+            return FileNotFoundError(path)
+        r.raise_for_status()
+        out = r.json()
+        if out["format"] == "text":
+            # data should be binary
+            b = out["content"].encode()
+        else:
+            b = base64.b64decode(out["content"])
+        return b[start:end]
+    def pipe_file(self, path, value, **_):
+        path = self._strip_protocol(path)
+        json = {
+            "name": path.rsplit("/", 1)[-1],
+            "path": path,
+            "size": len(value),
+            "content": base64.b64encode(value).decode(),
+            "format": "base64",
+            "type": "file",
+        }
+        self.session.put(f"{self.url}/{path}", json=json)
+    def mkdir(self, path, create_parents=True, **kwargs):
+        path = self._strip_protocol(path)
+        if create_parents and "/" in path:
+            self.mkdir(path.rsplit("/", 1)[0], True)
+        json = {
+            "name": path.rsplit("/", 1)[-1],
+            "path": path,
+            "size": None,
+            "content": None,
+            "type": "directory",
+        }
+        self.session.put(f"{self.url}/{path}", json=json)
+    def _rm(self, path):
+        path = self._strip_protocol(path)
+        self.session.delete(f"{self.url}/{path}")
+    def _open(self, path, mode="rb", **kwargs):
+        path = self._strip_protocol(path)
+        if mode == "rb":
+            data = self.cat_file(path)
+            return io.BytesIO(data)
+        else:
+            return SimpleFileWriter(self, path, mode="wb")
+class SimpleFileWriter(fsspec.spec.AbstractBufferedFile):
+    def _upload_chunk(self, final=False):
+        """Never uploads a chunk until file is done
+        Not suitable for large files
+        """
+        if final is False:
+            return False
+        self.buffer.seek(0)
+        data = self.buffer.read()
+        self.fs.pipe_file(self.path, data)

temp_venv/lib/python3.13/site-packages/fsspec/implementations/libarchive.py ADDED Viewed

	@@ -0,0 +1,213 @@

+from contextlib import contextmanager
+from ctypes import (
+    CFUNCTYPE,
+    POINTER,
+    c_int,
+    c_longlong,
+    c_void_p,
+    cast,
+    create_string_buffer,
+)
+import libarchive
+import libarchive.ffi as ffi
+from fsspec import open_files
+from fsspec.archive import AbstractArchiveFileSystem
+from fsspec.implementations.memory import MemoryFile
+from fsspec.utils import DEFAULT_BLOCK_SIZE
+# Libarchive requires seekable files or memory only for certain archive
+# types. However, since we read the directory first to cache the contents
+# and also allow random access to any file, the file-like object needs
+# to be seekable no matter what.
+# Seek call-backs (not provided in the libarchive python wrapper)
+SEEK_CALLBACK = CFUNCTYPE(c_longlong, c_int, c_void_p, c_longlong, c_int)
+read_set_seek_callback = ffi.ffi(
+    "read_set_seek_callback", [ffi.c_archive_p, SEEK_CALLBACK], c_int, ffi.check_int
+)
+new_api = hasattr(ffi, "NO_OPEN_CB")
+@contextmanager
+def custom_reader(file, format_name="all", filter_name="all", block_size=ffi.page_size):
+    """Read an archive from a seekable file-like object.
+    The `file` object must support the standard `readinto` and 'seek' methods.
+    """
+    buf = create_string_buffer(block_size)
+    buf_p = cast(buf, c_void_p)
+    def read_func(archive_p, context, ptrptr):
+        # readinto the buffer, returns number of bytes read
+        length = file.readinto(buf)
+        # write the address of the buffer into the pointer
+        ptrptr = cast(ptrptr, POINTER(c_void_p))
+        ptrptr[0] = buf_p
+        # tell libarchive how much data was written into the buffer
+        return length
+    def seek_func(archive_p, context, offset, whence):
+        file.seek(offset, whence)
+        # tell libarchvie the current position
+        return file.tell()
+    read_cb = ffi.READ_CALLBACK(read_func)
+    seek_cb = SEEK_CALLBACK(seek_func)
+    if new_api:
+        open_cb = ffi.NO_OPEN_CB
+        close_cb = ffi.NO_CLOSE_CB
+    else:
+        open_cb = libarchive.read.OPEN_CALLBACK(ffi.VOID_CB)
+        close_cb = libarchive.read.CLOSE_CALLBACK(ffi.VOID_CB)
+    with libarchive.read.new_archive_read(format_name, filter_name) as archive_p:
+        read_set_seek_callback(archive_p, seek_cb)
+        ffi.read_open(archive_p, None, open_cb, read_cb, close_cb)
+        yield libarchive.read.ArchiveRead(archive_p)
+class LibArchiveFileSystem(AbstractArchiveFileSystem):
+    """Compressed archives as a file-system (read-only)
+    Supports the following formats:
+    tar, pax , cpio, ISO9660, zip, mtree, shar, ar, raw, xar, lha/lzh, rar
+    Microsoft CAB, 7-Zip, WARC
+    See the libarchive documentation for further restrictions.
+    https://www.libarchive.org/
+    Keeps file object open while instance lives. It only works in seekable
+    file-like objects. In case the filesystem does not support this kind of
+    file object, it is recommended to cache locally.
+    This class is pickleable, but not necessarily thread-safe (depends on the
+    platform). See libarchive documentation for details.
+    """
+    root_marker = ""
+    protocol = "libarchive"
+    cachable = False
+    def __init__(
+        self,
+        fo="",
+        mode="r",
+        target_protocol=None,
+        target_options=None,
+        block_size=DEFAULT_BLOCK_SIZE,
+        **kwargs,
+    ):
+        """
+        Parameters
+        ----------
+        fo: str or file-like
+            Contains ZIP, and must exist. If a str, will fetch file using
+            :meth:`~fsspec.open_files`, which must return one file exactly.
+        mode: str
+            Currently, only 'r' accepted
+        target_protocol: str (optional)
+            If ``fo`` is a string, this value can be used to override the
+            FS protocol inferred from a URL
+        target_options: dict (optional)
+            Kwargs passed when instantiating the target FS, if ``fo`` is
+            a string.
+        """
+        super().__init__(self, **kwargs)
+        if mode != "r":
+            raise ValueError("Only read from archive files accepted")
+        if isinstance(fo, str):
+            files = open_files(fo, protocol=target_protocol, **(target_options or {}))
+            if len(files) != 1:
+                raise ValueError(
+                    f'Path "{fo}" did not resolve to exactly one file: "{files}"'
+                )
+            fo = files[0]
+        self.of = fo
+        self.fo = fo.__enter__()  # the whole instance is a context
+        self.block_size = block_size
+        self.dir_cache = None
+    @contextmanager
+    def _open_archive(self):
+        self.fo.seek(0)
+        with custom_reader(self.fo, block_size=self.block_size) as arc:
+            yield arc
+    @classmethod
+    def _strip_protocol(cls, path):
+        # file paths are always relative to the archive root
+        return super()._strip_protocol(path).lstrip("/")
+    def _get_dirs(self):
+        fields = {
+            "name": "pathname",
+            "size": "size",
+            "created": "ctime",
+            "mode": "mode",
+            "uid": "uid",
+            "gid": "gid",
+            "mtime": "mtime",
+        }
+        if self.dir_cache is not None:
+            return
+        self.dir_cache = {}
+        list_names = []
+        with self._open_archive() as arc:
+            for entry in arc:
+                if not entry.isdir and not entry.isfile:
+                    # Skip symbolic links, fifo entries, etc.
+                    continue
+                self.dir_cache.update(
+                    {
+                        dirname: {"name": dirname, "size": 0, "type": "directory"}
+                        for dirname in self._all_dirnames(set(entry.name))
+                    }
+                )
+                f = {key: getattr(entry, fields[key]) for key in fields}
+                f["type"] = "directory" if entry.isdir else "file"
+                list_names.append(entry.name)
+                self.dir_cache[f["name"]] = f
+        # libarchive does not seem to return an entry for the directories (at least
+        # not in all formats), so get the directories names from the files names
+        self.dir_cache.update(
+            {
+                dirname: {"name": dirname, "size": 0, "type": "directory"}
+                for dirname in self._all_dirnames(list_names)
+            }
+        )
+    def _open(
+        self,
+        path,
+        mode="rb",
+        block_size=None,
+        autocommit=True,
+        cache_options=None,
+        **kwargs,
+    ):
+        path = self._strip_protocol(path)
+        if mode != "rb":
+            raise NotImplementedError
+        data = bytes()
+        with self._open_archive() as arc:
+            for entry in arc:
+                if entry.pathname != path:
+                    continue
+                if entry.size == 0:
+                    # empty file, so there are no blocks
+                    break
+                for block in entry.get_blocks(entry.size):
+                    data = block
+                    break
+                else:
+                    raise ValueError
+        return MemoryFile(fs=self, path=path, data=data)

temp_venv/lib/python3.13/site-packages/fsspec/implementations/local.py ADDED Viewed

	@@ -0,0 +1,477 @@

+import datetime
+import io
+import logging
+import os
+import os.path as osp
+import shutil
+import stat
+import tempfile
+from fsspec import AbstractFileSystem
+from fsspec.compression import compr
+from fsspec.core import get_compression
+from fsspec.utils import isfilelike, stringify_path
+logger = logging.getLogger("fsspec.local")
+class LocalFileSystem(AbstractFileSystem):
+    """Interface to files on local storage
+    Parameters
+    ----------
+    auto_mkdir: bool
+        Whether, when opening a file, the directory containing it should
+        be created (if it doesn't already exist). This is assumed by pyarrow
+        code.
+    """
+    root_marker = "/"
+    protocol = "file", "local"
+    local_file = True
+    def __init__(self, auto_mkdir=False, **kwargs):
+        super().__init__(**kwargs)
+        self.auto_mkdir = auto_mkdir
+    @property
+    def fsid(self):
+        return "local"
+    def mkdir(self, path, create_parents=True, **kwargs):
+        path = self._strip_protocol(path)
+        if self.exists(path):
+            raise FileExistsError(path)
+        if create_parents:
+            self.makedirs(path, exist_ok=True)
+        else:
+            os.mkdir(path, **kwargs)
+    def makedirs(self, path, exist_ok=False):
+        path = self._strip_protocol(path)
+        os.makedirs(path, exist_ok=exist_ok)
+    def rmdir(self, path):
+        path = self._strip_protocol(path)
+        os.rmdir(path)
+    def ls(self, path, detail=False, **kwargs):
+        path = self._strip_protocol(path)
+        path_info = self.info(path)
+        infos = []
+        if path_info["type"] == "directory":
+            with os.scandir(path) as it:
+                for f in it:
+                    try:
+                        # Only get the info if requested since it is a bit expensive (the stat call inside)
+                        # The strip_protocol is also used in info() and calls make_path_posix to always return posix paths
+                        info = self.info(f) if detail else self._strip_protocol(f.path)
+                        infos.append(info)
+                    except FileNotFoundError:
+                        pass
+        else:
+            infos = [path_info] if detail else [path_info["name"]]
+        return infos
+    def info(self, path, **kwargs):
+        if isinstance(path, os.DirEntry):
+            # scandir DirEntry
+            out = path.stat(follow_symlinks=False)
+            link = path.is_symlink()
+            if path.is_dir(follow_symlinks=False):
+                t = "directory"
+            elif path.is_file(follow_symlinks=False):
+                t = "file"
+            else:
+                t = "other"
+            size = out.st_size
+            if link:
+                try:
+                    out2 = path.stat(follow_symlinks=True)
+                    size = out2.st_size
+                except OSError:
+                    size = 0
+            path = self._strip_protocol(path.path)
+        else:
+            # str or path-like
+            path = self._strip_protocol(path)
+            out = os.stat(path, follow_symlinks=False)
+            link = stat.S_ISLNK(out.st_mode)
+            if link:
+                out = os.stat(path, follow_symlinks=True)
+            size = out.st_size
+            if stat.S_ISDIR(out.st_mode):
+                t = "directory"
+            elif stat.S_ISREG(out.st_mode):
+                t = "file"
+            else:
+                t = "other"
+        result = {
+            "name": path,
+            "size": size,
+            "type": t,
+            "created": out.st_ctime,
+            "islink": link,
+        }
+        for field in ["mode", "uid", "gid", "mtime", "ino", "nlink"]:
+            result[field] = getattr(out, f"st_{field}")
+        if link:
+            result["destination"] = os.readlink(path)
+        return result
+    def lexists(self, path, **kwargs):
+        return osp.lexists(path)
+    def cp_file(self, path1, path2, **kwargs):
+        path1 = self._strip_protocol(path1)
+        path2 = self._strip_protocol(path2)
+        if self.auto_mkdir:
+            self.makedirs(self._parent(path2), exist_ok=True)
+        if self.isfile(path1):
+            shutil.copyfile(path1, path2)
+        elif self.isdir(path1):
+            self.mkdirs(path2, exist_ok=True)
+        else:
+            raise FileNotFoundError(path1)
+    def isfile(self, path):
+        path = self._strip_protocol(path)
+        return os.path.isfile(path)
+    def isdir(self, path):
+        path = self._strip_protocol(path)
+        return os.path.isdir(path)
+    def get_file(self, path1, path2, callback=None, **kwargs):
+        if isfilelike(path2):
+            with open(path1, "rb") as f:
+                shutil.copyfileobj(f, path2)
+        else:
+            return self.cp_file(path1, path2, **kwargs)
+    def put_file(self, path1, path2, callback=None, **kwargs):
+        return self.cp_file(path1, path2, **kwargs)
+    def mv(self, path1, path2, **kwargs):
+        path1 = self._strip_protocol(path1)
+        path2 = self._strip_protocol(path2)
+        shutil.move(path1, path2)
+    def link(self, src, dst, **kwargs):
+        src = self._strip_protocol(src)
+        dst = self._strip_protocol(dst)
+        os.link(src, dst, **kwargs)
+    def symlink(self, src, dst, **kwargs):
+        src = self._strip_protocol(src)
+        dst = self._strip_protocol(dst)
+        os.symlink(src, dst, **kwargs)
+    def islink(self, path) -> bool:
+        return os.path.islink(self._strip_protocol(path))
+    def rm_file(self, path):
+        os.remove(self._strip_protocol(path))
+    def rm(self, path, recursive=False, maxdepth=None):
+        if not isinstance(path, list):
+            path = [path]
+        for p in path:
+            p = self._strip_protocol(p)
+            if self.isdir(p):
+                if not recursive:
+                    raise ValueError("Cannot delete directory, set recursive=True")
+                if osp.abspath(p) == os.getcwd():
+                    raise ValueError("Cannot delete current working directory")
+                shutil.rmtree(p)
+            else:
+                os.remove(p)
+    def unstrip_protocol(self, name):
+        name = self._strip_protocol(name)  # normalise for local/win/...
+        return f"file://{name}"
+    def _open(self, path, mode="rb", block_size=None, **kwargs):
+        path = self._strip_protocol(path)
+        if self.auto_mkdir and "w" in mode:
+            self.makedirs(self._parent(path), exist_ok=True)
+        return LocalFileOpener(path, mode, fs=self, **kwargs)
+    def touch(self, path, truncate=True, **kwargs):
+        path = self._strip_protocol(path)
+        if self.auto_mkdir:
+            self.makedirs(self._parent(path), exist_ok=True)
+        if self.exists(path):
+            os.utime(path, None)
+        else:
+            open(path, "a").close()
+        if truncate:
+            os.truncate(path, 0)
+    def created(self, path):
+        info = self.info(path=path)
+        return datetime.datetime.fromtimestamp(
+            info["created"], tz=datetime.timezone.utc
+        )
+    def modified(self, path):
+        info = self.info(path=path)
+        return datetime.datetime.fromtimestamp(info["mtime"], tz=datetime.timezone.utc)
+    @classmethod
+    def _parent(cls, path):
+        path = cls._strip_protocol(path)
+        if os.sep == "/":
+            # posix native
+            return path.rsplit("/", 1)[0] or "/"
+        else:
+            # NT
+            path_ = path.rsplit("/", 1)[0]
+            if len(path_) <= 3:
+                if path_[1:2] == ":":
+                    # nt root (something like c:/)
+                    return path_[0] + ":/"
+            # More cases may be required here
+            return path_
+    @classmethod
+    def _strip_protocol(cls, path):
+        path = stringify_path(path)
+        if path.startswith("file://"):
+            path = path[7:]
+        elif path.startswith("file:"):
+            path = path[5:]
+        elif path.startswith("local://"):
+            path = path[8:]
+        elif path.startswith("local:"):
+            path = path[6:]
+        path = make_path_posix(path)
+        if os.sep != "/":
+            # This code-path is a stripped down version of
+            # > drive, path = ntpath.splitdrive(path)
+            if path[1:2] == ":":
+                # Absolute drive-letter path, e.g. X:\Windows
+                # Relative path with drive, e.g. X:Windows
+                drive, path = path[:2], path[2:]
+            elif path[:2] == "//":
+                # UNC drives, e.g. \\server\share or \\?\UNC\server\share
+                # Device drives, e.g. \\.\device or \\?\device
+                if (index1 := path.find("/", 2)) == -1 or (
+                    index2 := path.find("/", index1 + 1)
+                ) == -1:
+                    drive, path = path, ""
+                else:
+                    drive, path = path[:index2], path[index2:]
+            else:
+                # Relative path, e.g. Windows
+                drive = ""
+            path = path.rstrip("/") or cls.root_marker
+            return drive + path
+        else:
+            return path.rstrip("/") or cls.root_marker
+    def _isfilestore(self):
+        # Inheriting from DaskFileSystem makes this False (S3, etc. were)
+        # the original motivation. But we are a posix-like file system.
+        # See https://github.com/dask/dask/issues/5526
+        return True
+    def chmod(self, path, mode):
+        path = stringify_path(path)
+        return os.chmod(path, mode)
+def make_path_posix(path):
+    """Make path generic and absolute for current OS"""
+    if not isinstance(path, str):
+        if isinstance(path, (list, set, tuple)):
+            return type(path)(make_path_posix(p) for p in path)
+        else:
+            path = stringify_path(path)
+            if not isinstance(path, str):
+                raise TypeError(f"could not convert {path!r} to string")
+    if os.sep == "/":
+        # Native posix
+        if path.startswith("/"):
+            # most common fast case for posix
+            return path
+        elif path.startswith("~"):
+            return osp.expanduser(path)
+        elif path.startswith("./"):
+            path = path[2:]
+        elif path == ".":
+            path = ""
+        return f"{os.getcwd()}/{path}"
+    else:
+        # NT handling
+        if path[0:1] == "/" and path[2:3] == ":":
+            # path is like "/c:/local/path"
+            path = path[1:]
+        if path[1:2] == ":":
+            # windows full path like "C:\\local\\path"
+            if len(path) <= 3:
+                # nt root (something like c:/)
+                return path[0] + ":/"
+            path = path.replace("\\", "/")
+            return path
+        elif path[0:1] == "~":
+            return make_path_posix(osp.expanduser(path))
+        elif path.startswith(("\\\\", "//")):
+            # windows UNC/DFS-style paths
+            return "//" + path[2:].replace("\\", "/")
+        elif path.startswith(("\\", "/")):
+            # windows relative path with root
+            path = path.replace("\\", "/")
+            return f"{osp.splitdrive(os.getcwd())[0]}{path}"
+        else:
+            path = path.replace("\\", "/")
+            if path.startswith("./"):
+                path = path[2:]
+            elif path == ".":
+                path = ""
+            return f"{make_path_posix(os.getcwd())}/{path}"
+def trailing_sep(path):
+    """Return True if the path ends with a path separator.
+    A forward slash is always considered a path separator, even on Operating
+    Systems that normally use a backslash.
+    """
+    # TODO: if all incoming paths were posix-compliant then separator would
+    # always be a forward slash, simplifying this function.
+    # See https://github.com/fsspec/filesystem_spec/pull/1250
+    return path.endswith(os.sep) or (os.altsep is not None and path.endswith(os.altsep))
+class LocalFileOpener(io.IOBase):
+    def __init__(
+        self, path, mode, autocommit=True, fs=None, compression=None, **kwargs
+    ):
+        logger.debug("open file: %s", path)
+        self.path = path
+        self.mode = mode
+        self.fs = fs
+        self.f = None
+        self.autocommit = autocommit
+        self.compression = get_compression(path, compression)
+        self.blocksize = io.DEFAULT_BUFFER_SIZE
+        self._open()
+    def _open(self):
+        if self.f is None or self.f.closed:
+            if self.autocommit or "w" not in self.mode:
+                self.f = open(self.path, mode=self.mode)
+                if self.compression:
+                    compress = compr[self.compression]
+                    self.f = compress(self.f, mode=self.mode)
+            else:
+                # TODO: check if path is writable?
+                i, name = tempfile.mkstemp()
+                os.close(i)  # we want normal open and normal buffered file
+                self.temp = name
+                self.f = open(name, mode=self.mode)
+            if "w" not in self.mode:
+                self.size = self.f.seek(0, 2)
+                self.f.seek(0)
+                self.f.size = self.size
+    def _fetch_range(self, start, end):
+        # probably only used by cached FS
+        if "r" not in self.mode:
+            raise ValueError
+        self._open()
+        self.f.seek(start)
+        return self.f.read(end - start)
+    def __setstate__(self, state):
+        self.f = None
+        loc = state.pop("loc", None)
+        self.__dict__.update(state)
+        if "r" in state["mode"]:
+            self.f = None
+            self._open()
+            self.f.seek(loc)
+    def __getstate__(self):
+        d = self.__dict__.copy()
+        d.pop("f")
+        if "r" in self.mode:
+            d["loc"] = self.f.tell()
+        else:
+            if not self.f.closed:
+                raise ValueError("Cannot serialise open write-mode local file")
+        return d
+    def commit(self):
+        if self.autocommit:
+            raise RuntimeError("Can only commit if not already set to autocommit")
+        shutil.move(self.temp, self.path)
+    def discard(self):
+        if self.autocommit:
+            raise RuntimeError("Cannot discard if set to autocommit")
+        os.remove(self.temp)
+    def readable(self) -> bool:
+        return True
+    def writable(self) -> bool:
+        return "r" not in self.mode
+    def read(self, *args, **kwargs):
+        return self.f.read(*args, **kwargs)
+    def write(self, *args, **kwargs):
+        return self.f.write(*args, **kwargs)
+    def tell(self, *args, **kwargs):
+        return self.f.tell(*args, **kwargs)
+    def seek(self, *args, **kwargs):
+        return self.f.seek(*args, **kwargs)
+    def seekable(self, *args, **kwargs):
+        return self.f.seekable(*args, **kwargs)
+    def readline(self, *args, **kwargs):
+        return self.f.readline(*args, **kwargs)
+    def readlines(self, *args, **kwargs):
+        return self.f.readlines(*args, **kwargs)
+    def close(self):
+        return self.f.close()
+    def truncate(self, size=None) -> int:
+        return self.f.truncate(size)
+    @property
+    def closed(self):
+        return self.f.closed
+    def fileno(self):
+        return self.raw.fileno()
+    def flush(self) -> None:
+        self.f.flush()
+    def __iter__(self):
+        return self.f.__iter__()
+    def __getattr__(self, item):
+        return getattr(self.f, item)
+    def __enter__(self):
+        self._incontext = True
+        return self
+    def __exit__(self, exc_type, exc_value, traceback):
+        self._incontext = False
+        self.f.__exit__(exc_type, exc_value, traceback)

temp_venv/lib/python3.13/site-packages/fsspec/implementations/memory.py ADDED Viewed

	@@ -0,0 +1,312 @@

+from __future__ import annotations
+import logging
+from datetime import datetime, timezone
+from errno import ENOTEMPTY
+from io import BytesIO
+from pathlib import PurePath, PureWindowsPath
+from typing import Any, ClassVar
+from fsspec import AbstractFileSystem
+from fsspec.implementations.local import LocalFileSystem
+from fsspec.utils import stringify_path
+logger = logging.getLogger("fsspec.memoryfs")
+class MemoryFileSystem(AbstractFileSystem):
+    """A filesystem based on a dict of BytesIO objects
+    This is a global filesystem so instances of this class all point to the same
+    in memory filesystem.
+    """
+    store: ClassVar[dict[str, Any]] = {}  # global, do not overwrite!
+    pseudo_dirs = [""]  # global, do not overwrite!
+    protocol = "memory"
+    root_marker = "/"
+    @classmethod
+    def _strip_protocol(cls, path):
+        if isinstance(path, PurePath):
+            if isinstance(path, PureWindowsPath):
+                return LocalFileSystem._strip_protocol(path)
+            else:
+                path = stringify_path(path)
+        if path.startswith("memory://"):
+            path = path[len("memory://") :]
+        if "::" in path or "://" in path:
+            return path.rstrip("/")
+        path = path.lstrip("/").rstrip("/")
+        return "/" + path if path else ""
+    def ls(self, path, detail=True, **kwargs):
+        path = self._strip_protocol(path)
+        if path in self.store:
+            # there is a key with this exact name
+            if not detail:
+                return [path]
+            return [
+                {
+                    "name": path,
+                    "size": self.store[path].size,
+                    "type": "file",
+                    "created": self.store[path].created.timestamp(),
+                }
+            ]
+        paths = set()
+        starter = path + "/"
+        out = []
+        for p2 in tuple(self.store):
+            if p2.startswith(starter):
+                if "/" not in p2[len(starter) :]:
+                    # exact child
+                    out.append(
+                        {
+                            "name": p2,
+                            "size": self.store[p2].size,
+                            "type": "file",
+                            "created": self.store[p2].created.timestamp(),
+                        }
+                    )
+                elif len(p2) > len(starter):
+                    # implied child directory
+                    ppath = starter + p2[len(starter) :].split("/", 1)[0]
+                    if ppath not in paths:
+                        out = out or []
+                        out.append(
+                            {
+                                "name": ppath,
+                                "size": 0,
+                                "type": "directory",
+                            }
+                        )
+                        paths.add(ppath)
+        for p2 in self.pseudo_dirs:
+            if p2.startswith(starter):
+                if "/" not in p2[len(starter) :]:
+                    # exact child pdir
+                    if p2 not in paths:
+                        out.append({"name": p2, "size": 0, "type": "directory"})
+                        paths.add(p2)
+                else:
+                    # directory implied by deeper pdir
+                    ppath = starter + p2[len(starter) :].split("/", 1)[0]
+                    if ppath not in paths:
+                        out.append({"name": ppath, "size": 0, "type": "directory"})
+                        paths.add(ppath)
+        if not out:
+            if path in self.pseudo_dirs:
+                # empty dir
+                return []
+            raise FileNotFoundError(path)
+        if detail:
+            return out
+        return sorted([f["name"] for f in out])
+    def mkdir(self, path, create_parents=True, **kwargs):
+        path = self._strip_protocol(path)
+        if path in self.store or path in self.pseudo_dirs:
+            raise FileExistsError(path)
+        if self._parent(path).strip("/") and self.isfile(self._parent(path)):
+            raise NotADirectoryError(self._parent(path))
+        if create_parents and self._parent(path).strip("/"):
+            try:
+                self.mkdir(self._parent(path), create_parents, **kwargs)
+            except FileExistsError:
+                pass
+        if path and path not in self.pseudo_dirs:
+            self.pseudo_dirs.append(path)
+    def makedirs(self, path, exist_ok=False):
+        try:
+            self.mkdir(path, create_parents=True)
+        except FileExistsError:
+            if not exist_ok:
+                raise
+    def pipe_file(self, path, value, mode="overwrite", **kwargs):
+        """Set the bytes of given file
+        Avoids copies of the data if possible
+        """
+        mode = "xb" if mode == "create" else "wb"
+        self.open(path, mode=mode, data=value)
+    def rmdir(self, path):
+        path = self._strip_protocol(path)
+        if path == "":
+            # silently avoid deleting FS root
+            return
+        if path in self.pseudo_dirs:
+            if not self.ls(path):
+                self.pseudo_dirs.remove(path)
+            else:
+                raise OSError(ENOTEMPTY, "Directory not empty", path)
+        else:
+            raise FileNotFoundError(path)
+    def info(self, path, **kwargs):
+        logger.debug("info: %s", path)
+        path = self._strip_protocol(path)
+        if path in self.pseudo_dirs or any(
+            p.startswith(path + "/") for p in list(self.store) + self.pseudo_dirs
+        ):
+            return {
+                "name": path,
+                "size": 0,
+                "type": "directory",
+            }
+        elif path in self.store:
+            filelike = self.store[path]
+            return {
+                "name": path,
+                "size": filelike.size,
+                "type": "file",
+                "created": getattr(filelike, "created", None),
+            }
+        else:
+            raise FileNotFoundError(path)
+    def _open(
+        self,
+        path,
+        mode="rb",
+        block_size=None,
+        autocommit=True,
+        cache_options=None,
+        **kwargs,
+    ):
+        path = self._strip_protocol(path)
+        if "x" in mode and self.exists(path):
+            raise FileExistsError
+        if path in self.pseudo_dirs:
+            raise IsADirectoryError(path)
+        parent = path
+        while len(parent) > 1:
+            parent = self._parent(parent)
+            if self.isfile(parent):
+                raise FileExistsError(parent)
+        if mode in ["rb", "ab", "r+b"]:
+            if path in self.store:
+                f = self.store[path]
+                if mode == "ab":
+                    # position at the end of file
+                    f.seek(0, 2)
+                else:
+                    # position at the beginning of file
+                    f.seek(0)
+                return f
+            else:
+                raise FileNotFoundError(path)
+        elif mode in {"wb", "xb"}:
+            if mode == "xb" and self.exists(path):
+                raise FileExistsError
+            m = MemoryFile(self, path, kwargs.get("data"))
+            if not self._intrans:
+                m.commit()
+            return m
+        else:
+            name = self.__class__.__name__
+            raise ValueError(f"unsupported file mode for {name}: {mode!r}")
+    def cp_file(self, path1, path2, **kwargs):
+        path1 = self._strip_protocol(path1)
+        path2 = self._strip_protocol(path2)
+        if self.isfile(path1):
+            self.store[path2] = MemoryFile(
+                self, path2, self.store[path1].getvalue()
+            )  # implicit copy
+        elif self.isdir(path1):
+            if path2 not in self.pseudo_dirs:
+                self.pseudo_dirs.append(path2)
+        else:
+            raise FileNotFoundError(path1)
+    def cat_file(self, path, start=None, end=None, **kwargs):
+        logger.debug("cat: %s", path)
+        path = self._strip_protocol(path)
+        try:
+            return bytes(self.store[path].getbuffer()[start:end])
+        except KeyError as e:
+            raise FileNotFoundError(path) from e
+    def _rm(self, path):
+        path = self._strip_protocol(path)
+        try:
+            del self.store[path]
+        except KeyError as e:
+            raise FileNotFoundError(path) from e
+    def modified(self, path):
+        path = self._strip_protocol(path)
+        try:
+            return self.store[path].modified
+        except KeyError as e:
+            raise FileNotFoundError(path) from e
+    def created(self, path):
+        path = self._strip_protocol(path)
+        try:
+            return self.store[path].created
+        except KeyError as e:
+            raise FileNotFoundError(path) from e
+    def isfile(self, path):
+        path = self._strip_protocol(path)
+        return path in self.store
+    def rm(self, path, recursive=False, maxdepth=None):
+        if isinstance(path, str):
+            path = self._strip_protocol(path)
+        else:
+            path = [self._strip_protocol(p) for p in path]
+        paths = self.expand_path(path, recursive=recursive, maxdepth=maxdepth)
+        for p in reversed(paths):
+            if self.isfile(p):
+                self.rm_file(p)
+            # If the expanded path doesn't exist, it is only because the expanded
+            # path was a directory that does not exist in self.pseudo_dirs. This
+            # is possible if you directly create files without making the
+            # directories first.
+            elif not self.exists(p):
+                continue
+            else:
+                self.rmdir(p)
+class MemoryFile(BytesIO):
+    """A BytesIO which can't close and works as a context manager
+    Can initialise with data. Each path should only be active once at any moment.
+    No need to provide fs, path if auto-committing (default)
+    """
+    def __init__(self, fs=None, path=None, data=None):
+        logger.debug("open file %s", path)
+        self.fs = fs
+        self.path = path
+        self.created = datetime.now(tz=timezone.utc)
+        self.modified = datetime.now(tz=timezone.utc)
+        if data:
+            super().__init__(data)
+            self.seek(0)
+    @property
+    def size(self):
+        return self.getbuffer().nbytes
+    def __enter__(self):
+        return self
+    def close(self):
+        pass
+    def discard(self):
+        pass
+    def commit(self):
+        self.fs.store[self.path] = self
+        self.modified = datetime.now(tz=timezone.utc)

temp_venv/lib/python3.13/site-packages/fsspec/implementations/reference.py ADDED Viewed

	@@ -0,0 +1,1305 @@

+import base64
+import collections
+import io
+import itertools
+import logging
+import math
+import os
+from functools import lru_cache
+from itertools import chain
+from typing import TYPE_CHECKING, Literal
+import fsspec.core
+from fsspec.spec import AbstractBufferedFile
+try:
+    import ujson as json
+except ImportError:
+    if not TYPE_CHECKING:
+        import json
+from fsspec.asyn import AsyncFileSystem
+from fsspec.callbacks import DEFAULT_CALLBACK
+from fsspec.core import filesystem, open, split_protocol
+from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper
+from fsspec.utils import isfilelike, merge_offset_ranges, other_paths
+logger = logging.getLogger("fsspec.reference")
+class ReferenceNotReachable(RuntimeError):
+    def __init__(self, reference, target, *args):
+        super().__init__(*args)
+        self.reference = reference
+        self.target = target
+    def __str__(self):
+        return f'Reference "{self.reference}" failed to fetch target {self.target}'
+def _first(d):
+    return next(iter(d.values()))
+def _prot_in_references(path, references):
+    ref = references.get(path)
+    if isinstance(ref, (list, tuple)) and isinstance(ref[0], str):
+        return split_protocol(ref[0])[0] if ref[0] else ref[0]
+def _protocol_groups(paths, references):
+    if isinstance(paths, str):
+        return {_prot_in_references(paths, references): [paths]}
+    out = {}
+    for path in paths:
+        protocol = _prot_in_references(path, references)
+        out.setdefault(protocol, []).append(path)
+    return out
+class RefsValuesView(collections.abc.ValuesView):
+    def __iter__(self):
+        for val in self._mapping.zmetadata.values():
+            yield json.dumps(val).encode()
+        yield from self._mapping._items.values()
+        for field in self._mapping.listdir():
+            chunk_sizes = self._mapping._get_chunk_sizes(field)
+            if len(chunk_sizes) == 0:
+                yield self._mapping[field + "/0"]
+                continue
+            yield from self._mapping._generate_all_records(field)
+class RefsItemsView(collections.abc.ItemsView):
+    def __iter__(self):
+        return zip(self._mapping.keys(), self._mapping.values())
+def ravel_multi_index(idx, sizes):
+    val = 0
+    mult = 1
+    for i, s in zip(idx[::-1], sizes[::-1]):
+        val += i * mult
+        mult *= s
+    return val
+class LazyReferenceMapper(collections.abc.MutableMapping):
+    """This interface can be used to read/write references from Parquet stores.
+    It is not intended for other types of references.
+    It can be used with Kerchunk's MultiZarrToZarr method to combine
+    references into a parquet store.
+    Examples of this use-case can be found here:
+    https://fsspec.github.io/kerchunk/advanced.html?highlight=parquet#parquet-storage"""
+    # import is class level to prevent numpy dep requirement for fsspec
+    @property
+    def np(self):
+        import numpy as np
+        return np
+    @property
+    def pd(self):
+        import pandas as pd
+        return pd
+    def __init__(
+        self,
+        root,
+        fs=None,
+        out_root=None,
+        cache_size=128,
+        categorical_threshold=10,
+        engine: Literal["fastparquet", "pyarrow"] = "fastparquet",
+    ):
+        """
+        This instance will be writable, storing changes in memory until full partitions
+        are accumulated or .flush() is called.
+        To create an empty lazy store, use .create()
+        Parameters
+        ----------
+        root : str
+            Root of parquet store
+        fs : fsspec.AbstractFileSystem
+            fsspec filesystem object, default is local filesystem.
+        cache_size : int, default=128
+            Maximum size of LRU cache, where cache_size*record_size denotes
+            the total number of references that can be loaded in memory at once.
+        categorical_threshold : int
+            Encode urls as pandas.Categorical to reduce memory footprint if the ratio
+            of the number of unique urls to total number of refs for each variable
+            is greater than or equal to this number. (default 10)
+        engine: Literal["fastparquet","pyarrow"]
+            Engine choice for reading parquet files. (default is "fastparquet")
+        """
+        self.root = root
+        self.chunk_sizes = {}
+        self.cat_thresh = categorical_threshold
+        self.engine = engine
+        self.cache_size = cache_size
+        self.url = self.root + "/{field}/refs.{record}.parq"
+        # TODO: derive fs from `root`
+        self.fs = fsspec.filesystem("file") if fs is None else fs
+        self.out_root = self.fs.unstrip_protocol(out_root or self.root)
+        from importlib.util import find_spec
+        if self.engine == "pyarrow" and find_spec("pyarrow") is None:
+            raise ImportError("engine choice `pyarrow` is not installed.")
+    def __getattr__(self, item):
+        if item in ("_items", "record_size", "zmetadata"):
+            self.setup()
+            # avoid possible recursion if setup fails somehow
+            return self.__dict__[item]
+        raise AttributeError(item)
+    def setup(self):
+        self._items = {}
+        self._items[".zmetadata"] = self.fs.cat_file(
+            "/".join([self.root, ".zmetadata"])
+        )
+        met = json.loads(self._items[".zmetadata"])
+        self.record_size = met["record_size"]
+        self.zmetadata = met["metadata"]
+        # Define function to open and decompress refs
+        @lru_cache(maxsize=self.cache_size)
+        def open_refs(field, record):
+            """cached parquet file loader"""
+            path = self.url.format(field=field, record=record)
+            data = io.BytesIO(self.fs.cat_file(path))
+            try:
+                df = self.pd.read_parquet(data, engine=self.engine)
+                refs = {c: df[c].to_numpy() for c in df.columns}
+            except OSError:
+                refs = None
+            return refs
+        self.open_refs = open_refs
+    @staticmethod
+    def create(root, storage_options=None, fs=None, record_size=10000, **kwargs):
+        """Make empty parquet reference set
+        First deletes the contents of the given directory, if it exists.
+        Parameters
+        ----------
+        root: str
+            Directory to contain the output; will be created
+        storage_options: dict | None
+            For making the filesystem to use for writing is fs is None
+        fs: FileSystem | None
+            Filesystem for writing
+        record_size: int
+            Number of references per parquet file
+        kwargs: passed to __init__
+        Returns
+        -------
+        LazyReferenceMapper instance
+        """
+        met = {"metadata": {}, "record_size": record_size}
+        if fs is None:
+            fs, root = fsspec.core.url_to_fs(root, **(storage_options or {}))
+        if fs.exists(root):
+            fs.rm(root, recursive=True)
+        fs.makedirs(root, exist_ok=True)
+        fs.pipe("/".join([root, ".zmetadata"]), json.dumps(met).encode())
+        return LazyReferenceMapper(root, fs, **kwargs)
+    @lru_cache()
+    def listdir(self):
+        """List top-level directories"""
+        dirs = (p.rsplit("/", 1)[0] for p in self.zmetadata if not p.startswith(".z"))
+        return set(dirs)
+    def ls(self, path="", detail=True):
+        """Shortcut file listings"""
+        path = path.rstrip("/")
+        pathdash = path + "/" if path else ""
+        dirnames = self.listdir()
+        dirs = [
+            d
+            for d in dirnames
+            if d.startswith(pathdash) and "/" not in d.lstrip(pathdash)
+        ]
+        if dirs:
+            others = {
+                f
+                for f in chain(
+                    [".zmetadata"],
+                    (name for name in self.zmetadata),
+                    (name for name in self._items),
+                )
+                if f.startswith(pathdash) and "/" not in f.lstrip(pathdash)
+            }
+            if detail is False:
+                others.update(dirs)
+                return sorted(others)
+            dirinfo = [{"name": name, "type": "directory", "size": 0} for name in dirs]
+            fileinfo = [
+                {
+                    "name": name,
+                    "type": "file",
+                    "size": len(
+                        json.dumps(self.zmetadata[name])
+                        if name in self.zmetadata
+                        else self._items[name]
+                    ),
+                }
+                for name in others
+            ]
+            return sorted(dirinfo + fileinfo, key=lambda s: s["name"])
+        field = path
+        others = set(
+            [name for name in self.zmetadata if name.startswith(f"{path}/")]
+            + [name for name in self._items if name.startswith(f"{path}/")]
+        )
+        fileinfo = [
+            {
+                "name": name,
+                "type": "file",
+                "size": len(
+                    json.dumps(self.zmetadata[name])
+                    if name in self.zmetadata
+                    else self._items[name]
+                ),
+            }
+            for name in others
+        ]
+        keys = self._keys_in_field(field)
+        if detail is False:
+            return list(others) + list(keys)
+        recs = self._generate_all_records(field)
+        recinfo = [
+            {"name": name, "type": "file", "size": rec[-1]}
+            for name, rec in zip(keys, recs)
+            if rec[0]  # filters out path==None, deleted/missing
+        ]
+        return fileinfo + recinfo
+    def _load_one_key(self, key):
+        """Get the reference for one key
+        Returns bytes, one-element list or three-element list.
+        """
+        if key in self._items:
+            return self._items[key]
+        elif key in self.zmetadata:
+            return json.dumps(self.zmetadata[key]).encode()
+        elif "/" not in key or self._is_meta(key):
+            raise KeyError(key)
+        field, _ = key.rsplit("/", 1)
+        record, ri, chunk_size = self._key_to_record(key)
+        maybe = self._items.get((field, record), {}).get(ri, False)
+        if maybe is None:
+            # explicitly deleted
+            raise KeyError
+        elif maybe:
+            return maybe
+        elif chunk_size == 0:
+            return b""
+        # Chunk keys can be loaded from row group and cached in LRU cache
+        try:
+            refs = self.open_refs(field, record)
+        except (ValueError, TypeError, FileNotFoundError) as exc:
+            raise KeyError(key) from exc
+        columns = ["path", "offset", "size", "raw"]
+        selection = [refs[c][ri] if c in refs else None for c in columns]
+        raw = selection[-1]
+        if raw is not None:
+            return raw
+        if selection[0] is None:
+            raise KeyError("This reference does not exist or has been deleted")
+        if selection[1:3] == [0, 0]:
+            # URL only
+            return selection[:1]
+        # URL, offset, size
+        return selection[:3]
+    @lru_cache(4096)
+    def _key_to_record(self, key):
+        """Details needed to construct a reference for one key"""
+        field, chunk = key.rsplit("/", 1)
+        chunk_sizes = self._get_chunk_sizes(field)
+        if len(chunk_sizes) == 0:
+            return 0, 0, 0
+        chunk_idx = [int(c) for c in chunk.split(".")]
+        chunk_number = ravel_multi_index(chunk_idx, chunk_sizes)
+        record = chunk_number // self.record_size
+        ri = chunk_number % self.record_size
+        return record, ri, len(chunk_sizes)
+    def _get_chunk_sizes(self, field):
+        """The number of chunks along each axis for a given field"""
+        if field not in self.chunk_sizes:
+            zarray = self.zmetadata[f"{field}/.zarray"]
+            size_ratio = [
+                math.ceil(s / c) for s, c in zip(zarray["shape"], zarray["chunks"])
+            ]
+            self.chunk_sizes[field] = size_ratio or [1]
+        return self.chunk_sizes[field]
+    def _generate_record(self, field, record):
+        """The references for a given parquet file of a given field"""
+        refs = self.open_refs(field, record)
+        it = iter(zip(*refs.values()))
+        if len(refs) == 3:
+            # All urls
+            return (list(t) for t in it)
+        elif len(refs) == 1:
+            # All raws
+            return refs["raw"]
+        else:
+            # Mix of urls and raws
+            return (list(t[:3]) if not t[3] else t[3] for t in it)
+    def _generate_all_records(self, field):
+        """Load all the references within a field by iterating over the parquet files"""
+        nrec = 1
+        for ch in self._get_chunk_sizes(field):
+            nrec *= ch
+        nrec = math.ceil(nrec / self.record_size)
+        for record in range(nrec):
+            yield from self._generate_record(field, record)
+    def values(self):
+        return RefsValuesView(self)
+    def items(self):
+        return RefsItemsView(self)
+    def __hash__(self):
+        return id(self)
+    def __getitem__(self, key):
+        return self._load_one_key(key)
+    def __setitem__(self, key, value):
+        if "/" in key and not self._is_meta(key):
+            field, chunk = key.rsplit("/", 1)
+            record, i, _ = self._key_to_record(key)
+            subdict = self._items.setdefault((field, record), {})
+            subdict[i] = value
+            if len(subdict) == self.record_size:
+                self.write(field, record)
+        else:
+            # metadata or top-level
+            if hasattr(value, "to_bytes"):
+                val = value.to_bytes().decode()
+            elif isinstance(value, bytes):
+                val = value.decode()
+            else:
+                val = value
+            self._items[key] = val
+            new_value = json.loads(val)
+            self.zmetadata[key] = {**self.zmetadata.get(key, {}), **new_value}
+    @staticmethod
+    def _is_meta(key):
+        return key.startswith(".z") or "/.z" in key
+    def __delitem__(self, key):
+        if key in self._items:
+            del self._items[key]
+        elif key in self.zmetadata:
+            del self.zmetadata[key]
+        else:
+            if "/" in key and not self._is_meta(key):
+                field, _ = key.rsplit("/", 1)
+                record, i, _ = self._key_to_record(key)
+                subdict = self._items.setdefault((field, record), {})
+                subdict[i] = None
+                if len(subdict) == self.record_size:
+                    self.write(field, record)
+            else:
+                # metadata or top-level
+                self._items[key] = None
+    def write(self, field, record, base_url=None, storage_options=None):
+        # extra requirements if writing
+        import kerchunk.df
+        import numpy as np
+        import pandas as pd
+        partition = self._items[(field, record)]
+        original = False
+        if len(partition) < self.record_size:
+            try:
+                original = self.open_refs(field, record)
+            except OSError:
+                pass
+        if original:
+            paths = original["path"]
+            offsets = original["offset"]
+            sizes = original["size"]
+            raws = original["raw"]
+        else:
+            paths = np.full(self.record_size, np.nan, dtype="O")
+            offsets = np.zeros(self.record_size, dtype="int64")
+            sizes = np.zeros(self.record_size, dtype="int64")
+            raws = np.full(self.record_size, np.nan, dtype="O")
+        for j, data in partition.items():
+            if isinstance(data, list):
+                if (
+                    str(paths.dtype) == "category"
+                    and data[0] not in paths.dtype.categories
+                ):
+                    paths = paths.add_categories(data[0])
+                paths[j] = data[0]
+                if len(data) > 1:
+                    offsets[j] = data[1]
+                    sizes[j] = data[2]
+            elif data is None:
+                # delete
+                paths[j] = None
+                offsets[j] = 0
+                sizes[j] = 0
+                raws[j] = None
+            else:
+                # this is the only call into kerchunk, could remove
+                raws[j] = kerchunk.df._proc_raw(data)
+        # TODO: only save needed columns
+        df = pd.DataFrame(
+            {
+                "path": paths,
+                "offset": offsets,
+                "size": sizes,
+                "raw": raws,
+            },
+            copy=False,
+        )
+        if df.path.count() / (df.path.nunique() or 1) > self.cat_thresh:
+            df["path"] = df["path"].astype("category")
+        object_encoding = {"raw": "bytes", "path": "utf8"}
+        has_nulls = ["path", "raw"]
+        fn = f"{base_url or self.out_root}/{field}/refs.{record}.parq"
+        self.fs.mkdirs(f"{base_url or self.out_root}/{field}", exist_ok=True)
+        if self.engine == "pyarrow":
+            df_backend_kwargs = {"write_statistics": False}
+        elif self.engine == "fastparquet":
+            df_backend_kwargs = {
+                "stats": False,
+                "object_encoding": object_encoding,
+                "has_nulls": has_nulls,
+            }
+        else:
+            raise NotImplementedError(f"{self.engine} not supported")
+        df.to_parquet(
+            fn,
+            engine=self.engine,
+            storage_options=storage_options
+            or getattr(self.fs, "storage_options", None),
+            compression="zstd",
+            index=False,
+            **df_backend_kwargs,
+        )
+        partition.clear()
+        self._items.pop((field, record))
+    def flush(self, base_url=None, storage_options=None):
+        """Output any modified or deleted keys
+        Parameters
+        ----------
+        base_url: str
+            Location of the output
+        """
+        # write what we have so far and clear sub chunks
+        for thing in list(self._items):
+            if isinstance(thing, tuple):
+                field, record = thing
+                self.write(
+                    field,
+                    record,
+                    base_url=base_url,
+                    storage_options=storage_options,
+                )
+        # gather .zmetadata from self._items and write that too
+        for k in list(self._items):
+            if k != ".zmetadata" and ".z" in k:
+                self.zmetadata[k] = json.loads(self._items.pop(k))
+        met = {"metadata": self.zmetadata, "record_size": self.record_size}
+        self._items.clear()
+        self._items[".zmetadata"] = json.dumps(met).encode()
+        self.fs.pipe(
+            "/".join([base_url or self.out_root, ".zmetadata"]),
+            self._items[".zmetadata"],
+        )
+        # TODO: only clear those that we wrote to?
+        self.open_refs.cache_clear()
+    def __len__(self):
+        # Caveat: This counts expected references, not actual - but is fast
+        count = 0
+        for field in self.listdir():
+            if field.startswith("."):
+                count += 1
+            else:
+                count += math.prod(self._get_chunk_sizes(field))
+        count += len(self.zmetadata)  # all metadata keys
+        # any other files not in reference partitions
+        count += sum(1 for _ in self._items if not isinstance(_, tuple))
+        return count
+    def __iter__(self):
+        # Caveat: returns only existing keys, so the number of these does not
+        #  match len(self)
+        metas = set(self.zmetadata)
+        metas.update(self._items)
+        for bit in metas:
+            if isinstance(bit, str):
+                yield bit
+        for field in self.listdir():
+            for k in self._keys_in_field(field):
+                if k in self:
+                    yield k
+    def __contains__(self, item):
+        try:
+            self._load_one_key(item)
+            return True
+        except KeyError:
+            return False
+    def _keys_in_field(self, field):
+        """List key names in given field
+        Produces strings like "field/x.y" appropriate from the chunking of the array
+        """
+        chunk_sizes = self._get_chunk_sizes(field)
+        if len(chunk_sizes) == 0:
+            yield field + "/0"
+            return
+        inds = itertools.product(*(range(i) for i in chunk_sizes))
+        for ind in inds:
+            yield field + "/" + ".".join([str(c) for c in ind])
+class ReferenceFileSystem(AsyncFileSystem):
+    """View byte ranges of some other file as a file system
+    Initial version: single file system target, which must support
+    async, and must allow start and end args in _cat_file. Later versions
+    may allow multiple arbitrary URLs for the targets.
+    This FileSystem is read-only. It is designed to be used with async
+    targets (for now). We do not get original file details from the target FS.
+    Configuration is by passing a dict of references at init, or a URL to
+    a JSON file containing the same; this dict
+    can also contain concrete data for some set of paths.
+    Reference dict format:
+    {path0: bytes_data, path1: (target_url, offset, size)}
+    https://github.com/fsspec/kerchunk/blob/main/README.md
+    """
+    protocol = "reference"
+    cachable = False
+    def __init__(
+        self,
+        fo,
+        target=None,
+        ref_storage_args=None,
+        target_protocol=None,
+        target_options=None,
+        remote_protocol=None,
+        remote_options=None,
+        fs=None,
+        template_overrides=None,
+        simple_templates=True,
+        max_gap=64_000,
+        max_block=256_000_000,
+        cache_size=128,
+        **kwargs,
+    ):
+        """
+        Parameters
+        ----------
+        fo : dict or str
+            The set of references to use for this instance, with a structure as above.
+            If str referencing a JSON file, will use fsspec.open, in conjunction
+            with target_options and target_protocol to open and parse JSON at this
+            location. If a directory, then assume references are a set of parquet
+            files to be loaded lazily.
+        target : str
+            For any references having target_url as None, this is the default file
+            target to use
+        ref_storage_args : dict
+            If references is a str, use these kwargs for loading the JSON file.
+            Deprecated: use target_options instead.
+        target_protocol : str
+            Used for loading the reference file, if it is a path. If None, protocol
+            will be derived from the given path
+        target_options : dict
+            Extra FS options for loading the reference file ``fo``, if given as a path
+        remote_protocol : str
+            The protocol of the filesystem on which the references will be evaluated
+            (unless fs is provided). If not given, will be derived from the first
+            URL that has a protocol in the templates or in the references, in that
+            order.
+        remote_options : dict
+            kwargs to go with remote_protocol
+        fs : AbstractFileSystem | dict(str, (AbstractFileSystem | dict))
+            Directly provide a file system(s):
+                - a single filesystem instance
+                - a dict of protocol:filesystem, where each value is either a filesystem
+                  instance, or a dict of kwargs that can be used to create in
+                  instance for the given protocol
+            If this is given, remote_options and remote_protocol are ignored.
+        template_overrides : dict
+            Swap out any templates in the references file with these - useful for
+            testing.
+        simple_templates: bool
+            Whether templates can be processed with simple replace (True) or if
+            jinja  is needed (False, much slower). All reference sets produced by
+            ``kerchunk`` are simple in this sense, but the spec allows for complex.
+        max_gap, max_block: int
+            For merging multiple concurrent requests to the same remote file.
+            Neighboring byte ranges will only be merged when their
+            inter-range gap is <= ``max_gap``. Default is 64KB. Set to 0
+            to only merge when it requires no extra bytes. Pass a negative
+            number to disable merging, appropriate for local target files.
+            Neighboring byte ranges will only be merged when the size of
+            the aggregated range is <= ``max_block``. Default is 256MB.
+        cache_size : int
+            Maximum size of LRU cache, where cache_size*record_size denotes
+            the total number of references that can be loaded in memory at once.
+            Only used for lazily loaded references.
+        kwargs : passed to parent class
+        """
+        super().__init__(**kwargs)
+        self.target = target
+        self.template_overrides = template_overrides
+        self.simple_templates = simple_templates
+        self.templates = {}
+        self.fss = {}
+        self._dircache = {}
+        self.max_gap = max_gap
+        self.max_block = max_block
+        if isinstance(fo, str):
+            dic = dict(
+                **(ref_storage_args or target_options or {}), protocol=target_protocol
+            )
+            ref_fs, fo2 = fsspec.core.url_to_fs(fo, **dic)
+            if ref_fs.isfile(fo2):
+                # text JSON
+                with fsspec.open(fo, "rb", **dic) as f:
+                    logger.info("Read reference from URL %s", fo)
+                    text = json.load(f)
+                self._process_references(text, template_overrides)
+            else:
+                # Lazy parquet refs
+                logger.info("Open lazy reference dict from URL %s", fo)
+                self.references = LazyReferenceMapper(
+                    fo2,
+                    fs=ref_fs,
+                    cache_size=cache_size,
+                )
+        else:
+            # dictionaries
+            self._process_references(fo, template_overrides)
+        if isinstance(fs, dict):
+            self.fss = {
+                k: (
+                    fsspec.filesystem(k.split(":", 1)[0], **opts)
+                    if isinstance(opts, dict)
+                    else opts
+                )
+                for k, opts in fs.items()
+            }
+            if None not in self.fss:
+                self.fss[None] = filesystem("file")
+            return
+        if fs is not None:
+            # single remote FS
+            remote_protocol = (
+                fs.protocol[0] if isinstance(fs.protocol, tuple) else fs.protocol
+            )
+            self.fss[remote_protocol] = fs
+        if remote_protocol is None:
+            # get single protocol from any templates
+            for ref in self.templates.values():
+                if callable(ref):
+                    ref = ref()
+                protocol, _ = fsspec.core.split_protocol(ref)
+                if protocol and protocol not in self.fss:
+                    fs = filesystem(protocol, **(remote_options or {}))
+                    self.fss[protocol] = fs
+        if remote_protocol is None:
+            # get single protocol from references
+            # TODO: warning here, since this can be very expensive?
+            for ref in self.references.values():
+                if callable(ref):
+                    ref = ref()
+                if isinstance(ref, list) and ref[0]:
+                    protocol, _ = fsspec.core.split_protocol(ref[0])
+                    if protocol not in self.fss:
+                        fs = filesystem(protocol, **(remote_options or {}))
+                        self.fss[protocol] = fs
+                        # only use first remote URL
+                        break
+        if remote_protocol and remote_protocol not in self.fss:
+            fs = filesystem(remote_protocol, **(remote_options or {}))
+            self.fss[remote_protocol] = fs
+        self.fss[None] = fs or filesystem("file")  # default one
+        # Wrap any non-async filesystems to ensure async methods are available below
+        for k, f in self.fss.items():
+            if not f.async_impl:
+                self.fss[k] = AsyncFileSystemWrapper(f, asynchronous=self.asynchronous)
+            elif self.asynchronous ^ f.asynchronous:
+                raise ValueError(
+                    "Reference-FS's target filesystem must have same value"
+                    "of asynchronous"
+                )
+    def _cat_common(self, path, start=None, end=None):
+        path = self._strip_protocol(path)
+        logger.debug(f"cat: {path}")
+        try:
+            part = self.references[path]
+        except KeyError as exc:
+            raise FileNotFoundError(path) from exc
+        if isinstance(part, str):
+            part = part.encode()
+        if hasattr(part, "to_bytes"):
+            part = part.to_bytes()
+        if isinstance(part, bytes):
+            logger.debug(f"Reference: {path}, type bytes")
+            if part.startswith(b"base64:"):
+                part = base64.b64decode(part[7:])
+            return part, None, None
+        if len(part) == 1:
+            logger.debug(f"Reference: {path}, whole file => {part}")
+            url = part[0]
+            start1, end1 = start, end
+        else:
+            url, start0, size = part
+            logger.debug(f"Reference: {path} => {url}, offset {start0}, size {size}")
+            end0 = start0 + size
+            if start is not None:
+                if start >= 0:
+                    start1 = start0 + start
+                else:
+                    start1 = end0 + start
+            else:
+                start1 = start0
+            if end is not None:
+                if end >= 0:
+                    end1 = start0 + end
+                else:
+                    end1 = end0 + end
+            else:
+                end1 = end0
+        if url is None:
+            url = self.target
+        return url, start1, end1
+    async def _cat_file(self, path, start=None, end=None, **kwargs):
+        part_or_url, start0, end0 = self._cat_common(path, start=start, end=end)
+        if isinstance(part_or_url, bytes):
+            return part_or_url[start:end]
+        protocol, _ = split_protocol(part_or_url)
+        try:
+            return await self.fss[protocol]._cat_file(
+                part_or_url, start=start0, end=end0
+            )
+        except Exception as e:
+            raise ReferenceNotReachable(path, part_or_url) from e
+    def cat_file(self, path, start=None, end=None, **kwargs):
+        part_or_url, start0, end0 = self._cat_common(path, start=start, end=end)
+        if isinstance(part_or_url, bytes):
+            return part_or_url[start:end]
+        protocol, _ = split_protocol(part_or_url)
+        try:
+            return self.fss[protocol].cat_file(part_or_url, start=start0, end=end0)
+        except Exception as e:
+            raise ReferenceNotReachable(path, part_or_url) from e
+    def pipe_file(self, path, value, **_):
+        """Temporarily add binary data or reference as a file"""
+        self.references[path] = value
+    async def _get_file(self, rpath, lpath, **kwargs):
+        if self.isdir(rpath):
+            return os.makedirs(lpath, exist_ok=True)
+        data = await self._cat_file(rpath)
+        with open(lpath, "wb") as f:
+            f.write(data)
+    def get_file(self, rpath, lpath, callback=DEFAULT_CALLBACK, **kwargs):
+        if self.isdir(rpath):
+            return os.makedirs(lpath, exist_ok=True)
+        data = self.cat_file(rpath, **kwargs)
+        callback.set_size(len(data))
+        if isfilelike(lpath):
+            lpath.write(data)
+        else:
+            with open(lpath, "wb") as f:
+                f.write(data)
+        callback.absolute_update(len(data))
+    def get(self, rpath, lpath, recursive=False, **kwargs):
+        if recursive:
+            # trigger directory build
+            self.ls("")
+        rpath = self.expand_path(rpath, recursive=recursive)
+        fs = fsspec.filesystem("file", auto_mkdir=True)
+        targets = other_paths(rpath, lpath)
+        if recursive:
+            data = self.cat([r for r in rpath if not self.isdir(r)])
+        else:
+            data = self.cat(rpath)
+        for remote, local in zip(rpath, targets):
+            if remote in data:
+                fs.pipe_file(local, data[remote])
+    def cat(self, path, recursive=False, on_error="raise", **kwargs):
+        if isinstance(path, str) and recursive:
+            raise NotImplementedError
+        if isinstance(path, list) and (recursive or any("*" in p for p in path)):
+            raise NotImplementedError
+        # TODO: if references is lazy, pre-fetch all paths in batch before access
+        proto_dict = _protocol_groups(path, self.references)
+        out = {}
+        for proto, paths in proto_dict.items():
+            fs = self.fss[proto]
+            urls, starts, ends, valid_paths = [], [], [], []
+            for p in paths:
+                # find references or label not-found. Early exit if any not
+                # found and on_error is "raise"
+                try:
+                    u, s, e = self._cat_common(p)
+                    if not isinstance(u, (bytes, str)):
+                        # nan/None from parquet
+                        continue
+                except FileNotFoundError as err:
+                    if on_error == "raise":
+                        raise
+                    if on_error != "omit":
+                        out[p] = err
+                else:
+                    urls.append(u)
+                    starts.append(s)
+                    ends.append(e)
+                    valid_paths.append(p)
+            # process references into form for merging
+            urls2 = []
+            starts2 = []
+            ends2 = []
+            paths2 = []
+            whole_files = set()
+            for u, s, e, p in zip(urls, starts, ends, valid_paths):
+                if isinstance(u, bytes):
+                    # data
+                    out[p] = u
+                elif s is None:
+                    # whole file - limits are None, None, but no further
+                    # entries take for this file
+                    whole_files.add(u)
+                    urls2.append(u)
+                    starts2.append(s)
+                    ends2.append(e)
+                    paths2.append(p)
+            for u, s, e, p in zip(urls, starts, ends, valid_paths):
+                # second run to account for files that are to be loaded whole
+                if s is not None and u not in whole_files:
+                    urls2.append(u)
+                    starts2.append(s)
+                    ends2.append(e)
+                    paths2.append(p)
+            # merge and fetch consolidated ranges
+            new_paths, new_starts, new_ends = merge_offset_ranges(
+                list(urls2),
+                list(starts2),
+                list(ends2),
+                sort=True,
+                max_gap=self.max_gap,
+                max_block=self.max_block,
+            )
+            bytes_out = fs.cat_ranges(new_paths, new_starts, new_ends)
+            # unbundle from merged bytes - simple approach
+            for u, s, e, p in zip(urls, starts, ends, valid_paths):
+                if p in out:
+                    continue  # was bytes, already handled
+                for np, ns, ne, b in zip(new_paths, new_starts, new_ends, bytes_out):
+                    if np == u and (ns is None or ne is None):
+                        if isinstance(b, Exception):
+                            out[p] = b
+                        else:
+                            out[p] = b[s:e]
+                    elif np == u and s >= ns and e <= ne:
+                        if isinstance(b, Exception):
+                            out[p] = b
+                        else:
+                            out[p] = b[s - ns : (e - ne) or None]
+        for k, v in out.copy().items():
+            # these were valid references, but fetch failed, so transform exc
+            if isinstance(v, Exception) and k in self.references:
+                ex = out[k]
+                new_ex = ReferenceNotReachable(k, self.references[k])
+                new_ex.__cause__ = ex
+                if on_error == "raise":
+                    raise new_ex
+                elif on_error != "omit":
+                    out[k] = new_ex
+        if len(out) == 1 and isinstance(path, str) and "*" not in path:
+            return _first(out)
+        return out
+    def _process_references(self, references, template_overrides=None):
+        vers = references.get("version", None)
+        if vers is None:
+            self._process_references0(references)
+        elif vers == 1:
+            self._process_references1(references, template_overrides=template_overrides)
+        else:
+            raise ValueError(f"Unknown reference spec version: {vers}")
+        # TODO: we make dircache by iterating over all entries, but for Spec >= 1,
+        #  can replace with programmatic. Is it even needed for mapper interface?
+    def _process_references0(self, references):
+        """Make reference dict for Spec Version 0"""
+        if isinstance(references, dict):
+            # do not do this for lazy/parquet backend, which will not make dicts,
+            # but must remain writable in the original object
+            references = {
+                key: json.dumps(val) if isinstance(val, dict) else val
+                for key, val in references.items()
+            }
+        self.references = references
+    def _process_references1(self, references, template_overrides=None):
+        if not self.simple_templates or self.templates:
+            import jinja2
+        self.references = {}
+        self._process_templates(references.get("templates", {}))
+        @lru_cache(1000)
+        def _render_jinja(u):
+            return jinja2.Template(u).render(**self.templates)
+        for k, v in references.get("refs", {}).items():
+            if isinstance(v, str):
+                if v.startswith("base64:"):
+                    self.references[k] = base64.b64decode(v[7:])
+                self.references[k] = v
+            elif isinstance(v, dict):
+                self.references[k] = json.dumps(v)
+            elif self.templates:
+                u = v[0]
+                if "{{" in u:
+                    if self.simple_templates:
+                        u = (
+                            u.replace("{{", "{")
+                            .replace("}}", "}")
+                            .format(**self.templates)
+                        )
+                    else:
+                        u = _render_jinja(u)
+                self.references[k] = [u] if len(v) == 1 else [u, v[1], v[2]]
+            else:
+                self.references[k] = v
+        self.references.update(self._process_gen(references.get("gen", [])))
+    def _process_templates(self, tmp):
+        self.templates = {}
+        if self.template_overrides is not None:
+            tmp.update(self.template_overrides)
+        for k, v in tmp.items():
+            if "{{" in v:
+                import jinja2
+                self.templates[k] = lambda temp=v, **kwargs: jinja2.Template(
+                    temp
+                ).render(**kwargs)
+            else:
+                self.templates[k] = v
+    def _process_gen(self, gens):
+        out = {}
+        for gen in gens:
+            dimension = {
+                k: (
+                    v
+                    if isinstance(v, list)
+                    else range(v.get("start", 0), v["stop"], v.get("step", 1))
+                )
+                for k, v in gen["dimensions"].items()
+            }
+            products = (
+                dict(zip(dimension.keys(), values))
+                for values in itertools.product(*dimension.values())
+            )
+            for pr in products:
+                import jinja2
+                key = jinja2.Template(gen["key"]).render(**pr, **self.templates)
+                url = jinja2.Template(gen["url"]).render(**pr, **self.templates)
+                if ("offset" in gen) and ("length" in gen):
+                    offset = int(
+                        jinja2.Template(gen["offset"]).render(**pr, **self.templates)
+                    )
+                    length = int(
+                        jinja2.Template(gen["length"]).render(**pr, **self.templates)
+                    )
+                    out[key] = [url, offset, length]
+                elif ("offset" in gen) ^ ("length" in gen):
+                    raise ValueError(
+                        "Both 'offset' and 'length' are required for a "
+                        "reference generator entry if either is provided."
+                    )
+                else:
+                    out[key] = [url]
+        return out
+    def _dircache_from_items(self):
+        self.dircache = {"": []}
+        it = self.references.items()
+        for path, part in it:
+            if isinstance(part, (bytes, str)) or hasattr(part, "to_bytes"):
+                size = len(part)
+            elif len(part) == 1:
+                size = None
+            else:
+                _, _, size = part
+            par = path.rsplit("/", 1)[0] if "/" in path else ""
+            par0 = par
+            subdirs = [par0]
+            while par0 and par0 not in self.dircache:
+                # collect parent directories
+                par0 = self._parent(par0)
+                subdirs.append(par0)
+            subdirs.reverse()
+            for parent, child in zip(subdirs, subdirs[1:]):
+                # register newly discovered directories
+                assert child not in self.dircache
+                assert parent in self.dircache
+                self.dircache[parent].append(
+                    {"name": child, "type": "directory", "size": 0}
+                )
+                self.dircache[child] = []
+            self.dircache[par].append({"name": path, "type": "file", "size": size})
+    def _open(self, path, mode="rb", block_size=None, cache_options=None, **kwargs):
+        part_or_url, start0, end0 = self._cat_common(path)
+        # This logic is kept outside `ReferenceFile` to avoid unnecessary redirection.
+        # That does mean `_cat_common` gets called twice if it eventually reaches `ReferenceFile`.
+        if isinstance(part_or_url, bytes):
+            return io.BytesIO(part_or_url[start0:end0])
+        protocol, _ = split_protocol(part_or_url)
+        if start0 is None and end0 is None:
+            return self.fss[protocol]._open(
+                part_or_url,
+                mode,
+                block_size=block_size,
+                cache_options=cache_options,
+                **kwargs,
+            )
+        return ReferenceFile(
+            self,
+            path,
+            mode,
+            block_size=block_size,
+            cache_options=cache_options,
+            **kwargs,
+        )
+    def ls(self, path, detail=True, **kwargs):
+        logger.debug("list %s", path)
+        path = self._strip_protocol(path)
+        if isinstance(self.references, LazyReferenceMapper):
+            try:
+                return self.references.ls(path, detail)
+            except KeyError:
+                pass
+            raise FileNotFoundError(f"'{path}' is not a known key")
+        if not self.dircache:
+            self._dircache_from_items()
+        out = self._ls_from_cache(path)
+        if out is None:
+            raise FileNotFoundError(path)
+        if detail:
+            return out
+        return [o["name"] for o in out]
+    def exists(self, path, **kwargs):  # overwrite auto-sync version
+        return self.isdir(path) or self.isfile(path)
+    def isdir(self, path):  # overwrite auto-sync version
+        if self.dircache:
+            return path in self.dircache
+        elif isinstance(self.references, LazyReferenceMapper):
+            return path in self.references.listdir()
+        else:
+            # this may be faster than building dircache for single calls, but
+            # by looping will be slow for many calls; could cache it?
+            return any(_.startswith(f"{path}/") for _ in self.references)
+    def isfile(self, path):  # overwrite auto-sync version
+        return path in self.references
+    async def _ls(self, path, detail=True, **kwargs):  # calls fast sync code
+        return self.ls(path, detail, **kwargs)
+    def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
+        if withdirs:
+            return super().find(
+                path, maxdepth=maxdepth, withdirs=withdirs, detail=detail, **kwargs
+            )
+        if path:
+            path = self._strip_protocol(path)
+            r = sorted(k for k in self.references if k.startswith(path))
+        else:
+            r = sorted(self.references)
+        if detail:
+            if not self.dircache:
+                self._dircache_from_items()
+            return {k: self._ls_from_cache(k)[0] for k in r}
+        else:
+            return r
+    def info(self, path, **kwargs):
+        out = self.references.get(path)
+        if out is not None:
+            if isinstance(out, (str, bytes)):
+                # decode base64 here
+                return {"name": path, "type": "file", "size": len(out)}
+            elif len(out) > 1:
+                return {"name": path, "type": "file", "size": out[2]}
+            else:
+                out0 = [{"name": path, "type": "file", "size": None}]
+        else:
+            out = self.ls(path, True)
+            out0 = [o for o in out if o["name"] == path]
+            if not out0:
+                return {"name": path, "type": "directory", "size": 0}
+        if out0[0]["size"] is None:
+            # if this is a whole remote file, update size using remote FS
+            prot, _ = split_protocol(self.references[path][0])
+            out0[0]["size"] = self.fss[prot].size(self.references[path][0])
+        return out0[0]
+    async def _info(self, path, **kwargs):  # calls fast sync code
+        return self.info(path)
+    async def _rm_file(self, path, **kwargs):
+        self.references.pop(
+            path, None
+        )  # ignores FileNotFound, just as well for directories
+        self.dircache.clear()  # this is a bit heavy handed
+    async def _pipe_file(self, path, data, mode="overwrite", **kwargs):
+        if mode == "create" and self.exists(path):
+            raise FileExistsError
+        # can be str or bytes
+        self.references[path] = data
+        self.dircache.clear()  # this is a bit heavy handed
+    async def _put_file(self, lpath, rpath, mode="overwrite", **kwargs):
+        # puts binary
+        if mode == "create" and self.exists(rpath):
+            raise FileExistsError
+        with open(lpath, "rb") as f:
+            self.references[rpath] = f.read()
+        self.dircache.clear()  # this is a bit heavy handed
+    def save_json(self, url, **storage_options):
+        """Write modified references into new location"""
+        out = {}
+        for k, v in self.references.items():
+            if isinstance(v, bytes):
+                try:
+                    out[k] = v.decode("ascii")
+                except UnicodeDecodeError:
+                    out[k] = (b"base64:" + base64.b64encode(v)).decode()
+            else:
+                out[k] = v
+        with fsspec.open(url, "wb", **storage_options) as f:
+            f.write(json.dumps({"version": 1, "refs": out}).encode())
+class ReferenceFile(AbstractBufferedFile):
+    def __init__(
+        self,
+        fs,
+        path,
+        mode="rb",
+        block_size="default",
+        autocommit=True,
+        cache_type="readahead",
+        cache_options=None,
+        size=None,
+        **kwargs,
+    ):
+        super().__init__(
+            fs,
+            path,
+            mode=mode,
+            block_size=block_size,
+            autocommit=autocommit,
+            size=size,
+            cache_type=cache_type,
+            cache_options=cache_options,
+            **kwargs,
+        )
+        part_or_url, self.start, self.end = self.fs._cat_common(self.path)
+        protocol, _ = split_protocol(part_or_url)
+        self.src_fs = self.fs.fss[protocol]
+        self.src_path = part_or_url
+        self._f = None
+    @property
+    def f(self):
+        if self._f is None or self._f.closed:
+            self._f = self.src_fs._open(
+                self.src_path,
+                mode=self.mode,
+                block_size=self.blocksize,
+                autocommit=self.autocommit,
+                cache_type="none",
+                **self.kwargs,
+            )
+        return self._f
+    def close(self):
+        if self._f is not None:
+            self._f.close()
+        return super().close()
+    def _fetch_range(self, start, end):
+        start = start + self.start
+        end = min(end + self.start, self.end)
+        self.f.seek(start)
+        return self.f.read(end - start)

temp_venv/lib/python3.13/site-packages/fsspec/implementations/sftp.py ADDED Viewed

	@@ -0,0 +1,180 @@

+import datetime
+import logging
+import os
+import types
+import uuid
+from stat import S_ISDIR, S_ISLNK
+import paramiko
+from .. import AbstractFileSystem
+from ..utils import infer_storage_options
+logger = logging.getLogger("fsspec.sftp")
+class SFTPFileSystem(AbstractFileSystem):
+    """Files over SFTP/SSH
+    Peer-to-peer filesystem over SSH using paramiko.
+    Note: if using this with the ``open`` or ``open_files``, with full URLs,
+    there is no way to tell if a path is relative, so all paths are assumed
+    to be absolute.
+    """
+    protocol = "sftp", "ssh"
+    def __init__(self, host, **ssh_kwargs):
+        """
+        Parameters
+        ----------
+        host: str
+            Hostname or IP as a string
+        temppath: str
+            Location on the server to put files, when within a transaction
+        ssh_kwargs: dict
+            Parameters passed on to connection. See details in
+            https://docs.paramiko.org/en/3.3/api/client.html#paramiko.client.SSHClient.connect
+            May include port, username, password...
+        """
+        if self._cached:
+            return
+        super().__init__(**ssh_kwargs)
+        self.temppath = ssh_kwargs.pop("temppath", "/tmp")  # remote temp directory
+        self.host = host
+        self.ssh_kwargs = ssh_kwargs
+        self._connect()
+    def _connect(self):
+        logger.debug("Connecting to SFTP server %s", self.host)
+        self.client = paramiko.SSHClient()
+        self.client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+        self.client.connect(self.host, **self.ssh_kwargs)
+        self.ftp = self.client.open_sftp()
+    @classmethod
+    def _strip_protocol(cls, path):
+        return infer_storage_options(path)["path"]
+    @staticmethod
+    def _get_kwargs_from_urls(urlpath):
+        out = infer_storage_options(urlpath)
+        out.pop("path", None)
+        out.pop("protocol", None)
+        return out
+    def mkdir(self, path, create_parents=True, mode=511):
+        logger.debug("Creating folder %s", path)
+        if self.exists(path):
+            raise FileExistsError(f"File exists: {path}")
+        if create_parents:
+            self.makedirs(path)
+        else:
+            self.ftp.mkdir(path, mode)
+    def makedirs(self, path, exist_ok=False, mode=511):
+        if self.exists(path) and not exist_ok:
+            raise FileExistsError(f"File exists: {path}")
+        parts = path.split("/")
+        new_path = "/" if path[:1] == "/" else ""
+        for part in parts:
+            if part:
+                new_path = f"{new_path}/{part}" if new_path else part
+                if not self.exists(new_path):
+                    self.ftp.mkdir(new_path, mode)
+    def rmdir(self, path):
+        logger.debug("Removing folder %s", path)
+        self.ftp.rmdir(path)
+    def info(self, path):
+        stat = self._decode_stat(self.ftp.stat(path))
+        stat["name"] = path
+        return stat
+    @staticmethod
+    def _decode_stat(stat, parent_path=None):
+        if S_ISDIR(stat.st_mode):
+            t = "directory"
+        elif S_ISLNK(stat.st_mode):
+            t = "link"
+        else:
+            t = "file"
+        out = {
+            "name": "",
+            "size": stat.st_size,
+            "type": t,
+            "uid": stat.st_uid,
+            "gid": stat.st_gid,
+            "time": datetime.datetime.fromtimestamp(
+                stat.st_atime, tz=datetime.timezone.utc
+            ),
+            "mtime": datetime.datetime.fromtimestamp(
+                stat.st_mtime, tz=datetime.timezone.utc
+            ),
+        }
+        if parent_path:
+            out["name"] = "/".join([parent_path.rstrip("/"), stat.filename])
+        return out
+    def ls(self, path, detail=False):
+        logger.debug("Listing folder %s", path)
+        stats = [self._decode_stat(stat, path) for stat in self.ftp.listdir_iter(path)]
+        if detail:
+            return stats
+        else:
+            paths = [stat["name"] for stat in stats]
+            return sorted(paths)
+    def put(self, lpath, rpath, callback=None, **kwargs):
+        logger.debug("Put file %s into %s", lpath, rpath)
+        self.ftp.put(lpath, rpath)
+    def get_file(self, rpath, lpath, **kwargs):
+        if self.isdir(rpath):
+            os.makedirs(lpath, exist_ok=True)
+        else:
+            self.ftp.get(self._strip_protocol(rpath), lpath)
+    def _open(self, path, mode="rb", block_size=None, **kwargs):
+        """
+        block_size: int or None
+            If 0, no buffering, if 1, line buffering, if >1, buffer that many
+            bytes, if None use default from paramiko.
+        """
+        logger.debug("Opening file %s", path)
+        if kwargs.get("autocommit", True) is False:
+            # writes to temporary file, move on commit
+            path2 = "/".join([self.temppath, str(uuid.uuid4())])
+            f = self.ftp.open(path2, mode, bufsize=block_size if block_size else -1)
+            f.temppath = path2
+            f.targetpath = path
+            f.fs = self
+            f.commit = types.MethodType(commit_a_file, f)
+            f.discard = types.MethodType(discard_a_file, f)
+        else:
+            f = self.ftp.open(path, mode, bufsize=block_size if block_size else -1)
+        return f
+    def _rm(self, path):
+        if self.isdir(path):
+            self.ftp.rmdir(path)
+        else:
+            self.ftp.remove(path)
+    def mv(self, old, new):
+        logger.debug("Renaming %s into %s", old, new)
+        self.ftp.posix_rename(old, new)
+def commit_a_file(self):
+    self.fs.mv(self.temppath, self.targetpath)
+def discard_a_file(self):
+    self.fs._rm(self.temppath)

temp_venv/lib/python3.13/site-packages/fsspec/implementations/smb.py ADDED Viewed

	@@ -0,0 +1,416 @@

+"""
+This module contains SMBFileSystem class responsible for handling access to
+Windows Samba network shares by using package smbprotocol
+"""
+import datetime
+import re
+import uuid
+from stat import S_ISDIR, S_ISLNK
+import smbclient
+import smbprotocol.exceptions
+from .. import AbstractFileSystem
+from ..utils import infer_storage_options
+# ! pylint: disable=bad-continuation
+class SMBFileSystem(AbstractFileSystem):
+    """Allow reading and writing to Windows and Samba network shares.
+    When using `fsspec.open()` for getting a file-like object the URI
+    should be specified as this format:
+    ``smb://workgroup;user:password@server:port/share/folder/file.csv``.
+    Example::
+        >>> import fsspec
+        >>> with fsspec.open(
+        ...     'smb://myuser:[email protected]/' 'share/folder/file.csv'
+        ... ) as smbfile:
+        ...     df = pd.read_csv(smbfile, sep='|', header=None)
+    Note that you need to pass in a valid hostname or IP address for the host
+    component of the URL. Do not use the Windows/NetBIOS machine name for the
+    host component.
+    The first component of the path in the URL points to the name of the shared
+    folder. Subsequent path components will point to the directory/folder/file.
+    The URL components ``workgroup`` , ``user``, ``password`` and ``port`` may be
+    optional.
+    .. note::
+        For working this source require `smbprotocol`_ to be installed, e.g.::
+            $ pip install smbprotocol
+            # or
+            # pip install smbprotocol[kerberos]
+    .. _smbprotocol: https://github.com/jborean93/smbprotocol#requirements
+    Note: if using this with the ``open`` or ``open_files``, with full URLs,
+    there is no way to tell if a path is relative, so all paths are assumed
+    to be absolute.
+    """
+    protocol = "smb"
+    # pylint: disable=too-many-arguments
+    def __init__(
+        self,
+        host,
+        port=None,
+        username=None,
+        password=None,
+        timeout=60,
+        encrypt=None,
+        share_access=None,
+        register_session_retries=4,
+        register_session_retry_wait=1,
+        register_session_retry_factor=10,
+        auto_mkdir=False,
+        **kwargs,
+    ):
+        """
+        You can use _get_kwargs_from_urls to get some kwargs from
+        a reasonable SMB url.
+        Authentication will be anonymous or integrated if username/password are not
+        given.
+        Parameters
+        ----------
+        host: str
+            The remote server name/ip to connect to
+        port: int or None
+            Port to connect with. Usually 445, sometimes 139.
+        username: str or None
+            Username to connect with. Required if Kerberos auth is not being used.
+        password: str or None
+            User's password on the server, if using username
+        timeout: int
+            Connection timeout in seconds
+        encrypt: bool
+            Whether to force encryption or not, once this has been set to True
+            the session cannot be changed back to False.
+        share_access: str or None
+            Specifies the default access applied to file open operations
+            performed with this file system object.
+            This affects whether other processes can concurrently open a handle
+            to the same file.
+            - None (the default): exclusively locks the file until closed.
+            - 'r': Allow other handles to be opened with read access.
+            - 'w': Allow other handles to be opened with write access.
+            - 'd': Allow other handles to be opened with delete access.
+        register_session_retries: int
+            Number of retries to register a session with the server. Retries are not performed
+            for authentication errors, as they are considered as invalid credentials and not network
+            issues. If set to negative value, no register attempts will be performed.
+        register_session_retry_wait: int
+            Time in seconds to wait between each retry. Number must be non-negative.
+        register_session_retry_factor: int
+            Base factor for the wait time between each retry. The wait time
+            is calculated using exponential function. For factor=1 all wait times
+            will be equal to `register_session_retry_wait`. For any number of retries,
+            the last wait time will be equal to `register_session_retry_wait` and for retries>1
+            the first wait time will be equal to `register_session_retry_wait / factor`.
+            Number must be equal to or greater than 1. Optimal factor is 10.
+        auto_mkdir: bool
+            Whether, when opening a file, the directory containing it should
+            be created (if it doesn't already exist). This is assumed by pyarrow
+            and zarr-python code.
+        """
+        super().__init__(**kwargs)
+        self.host = host
+        self.port = port
+        self.username = username
+        self.password = password
+        self.timeout = timeout
+        self.encrypt = encrypt
+        self.temppath = kwargs.pop("temppath", "")
+        self.share_access = share_access
+        self.register_session_retries = register_session_retries
+        if register_session_retry_wait < 0:
+            raise ValueError(
+                "register_session_retry_wait must be a non-negative integer"
+            )
+        self.register_session_retry_wait = register_session_retry_wait
+        if register_session_retry_factor < 1:
+            raise ValueError(
+                "register_session_retry_factor must be a positive "
+                "integer equal to or greater than 1"
+            )
+        self.register_session_retry_factor = register_session_retry_factor
+        self.auto_mkdir = auto_mkdir
+        self._connect()
+    @property
+    def _port(self):
+        return 445 if self.port is None else self.port
+    def _connect(self):
+        import time
+        if self.register_session_retries <= -1:
+            return
+        retried_errors = []
+        wait_time = self.register_session_retry_wait
+        n_waits = (
+            self.register_session_retries - 1
+        )  # -1 = No wait time after the last retry
+        factor = self.register_session_retry_factor
+        # Generate wait times for each retry attempt.
+        # Wait times are calculated using exponential function. For factor=1 all wait times
+        # will be equal to `wait`. For any number of retries the last wait time will be
+        # equal to `wait` and for retries>2 the first wait time will be equal to `wait / factor`.
+        wait_times = iter(
+            factor ** (n / n_waits - 1) * wait_time for n in range(0, n_waits + 1)
+        )
+        for attempt in range(self.register_session_retries + 1):
+            try:
+                smbclient.register_session(
+                    self.host,
+                    username=self.username,
+                    password=self.password,
+                    port=self._port,
+                    encrypt=self.encrypt,
+                    connection_timeout=self.timeout,
+                )
+                return
+            except (
+                smbprotocol.exceptions.SMBAuthenticationError,
+                smbprotocol.exceptions.LogonFailure,
+            ):
+                # These exceptions should not be repeated, as they clearly indicate
+                # that the credentials are invalid and not a network issue.
+                raise
+            except ValueError as exc:
+                if re.findall(r"\[Errno -\d+]", str(exc)):
+                    # This exception is raised by the smbprotocol.transport:Tcp.connect
+                    # and originates from socket.gaierror (OSError). These exceptions might
+                    # be raised due to network instability. We will retry to connect.
+                    retried_errors.append(exc)
+                else:
+                    # All another ValueError exceptions should be raised, as they are not
+                    # related to network issues.
+                    raise
+            except Exception as exc:
+                # Save the exception and retry to connect. This except might be dropped
+                # in the future, once all exceptions suited for retry are identified.
+                retried_errors.append(exc)
+            if attempt < self.register_session_retries:
+                time.sleep(next(wait_times))
+        # Raise last exception to inform user about the connection issues.
+        # Note: Should we use ExceptionGroup to raise all exceptions?
+        raise retried_errors[-1]
+    @classmethod
+    def _strip_protocol(cls, path):
+        return infer_storage_options(path)["path"]
+    @staticmethod
+    def _get_kwargs_from_urls(path):
+        # smb://workgroup;user:password@host:port/share/folder/file.csv
+        out = infer_storage_options(path)
+        out.pop("path", None)
+        out.pop("protocol", None)
+        return out
+    def mkdir(self, path, create_parents=True, **kwargs):
+        wpath = _as_unc_path(self.host, path)
+        if create_parents:
+            smbclient.makedirs(wpath, exist_ok=False, port=self._port, **kwargs)
+        else:
+            smbclient.mkdir(wpath, port=self._port, **kwargs)
+    def makedirs(self, path, exist_ok=False):
+        if _share_has_path(path):
+            wpath = _as_unc_path(self.host, path)
+            smbclient.makedirs(wpath, exist_ok=exist_ok, port=self._port)
+    def rmdir(self, path):
+        if _share_has_path(path):
+            wpath = _as_unc_path(self.host, path)
+            smbclient.rmdir(wpath, port=self._port)
+    def info(self, path, **kwargs):
+        wpath = _as_unc_path(self.host, path)
+        stats = smbclient.stat(wpath, port=self._port, **kwargs)
+        if S_ISDIR(stats.st_mode):
+            stype = "directory"
+        elif S_ISLNK(stats.st_mode):
+            stype = "link"
+        else:
+            stype = "file"
+        res = {
+            "name": path + "/" if stype == "directory" else path,
+            "size": stats.st_size,
+            "type": stype,
+            "uid": stats.st_uid,
+            "gid": stats.st_gid,
+            "time": stats.st_atime,
+            "mtime": stats.st_mtime,
+        }
+        return res
+    def created(self, path):
+        """Return the created timestamp of a file as a datetime.datetime"""
+        wpath = _as_unc_path(self.host, path)
+        stats = smbclient.stat(wpath, port=self._port)
+        return datetime.datetime.fromtimestamp(stats.st_ctime, tz=datetime.timezone.utc)
+    def modified(self, path):
+        """Return the modified timestamp of a file as a datetime.datetime"""
+        wpath = _as_unc_path(self.host, path)
+        stats = smbclient.stat(wpath, port=self._port)
+        return datetime.datetime.fromtimestamp(stats.st_mtime, tz=datetime.timezone.utc)
+    def ls(self, path, detail=True, **kwargs):
+        unc = _as_unc_path(self.host, path)
+        listed = smbclient.listdir(unc, port=self._port, **kwargs)
+        dirs = ["/".join([path.rstrip("/"), p]) for p in listed]
+        if detail:
+            dirs = [self.info(d) for d in dirs]
+        return dirs
+    # pylint: disable=too-many-arguments
+    def _open(
+        self,
+        path,
+        mode="rb",
+        block_size=-1,
+        autocommit=True,
+        cache_options=None,
+        **kwargs,
+    ):
+        """
+        block_size: int or None
+            If 0, no buffering, 1, line buffering, >1, buffer that many bytes
+        Notes
+        -----
+        By specifying 'share_access' in 'kwargs' it is possible to override the
+        default shared access setting applied in the constructor of this object.
+        """
+        if self.auto_mkdir and "w" in mode:
+            self.makedirs(self._parent(path), exist_ok=True)
+        bls = block_size if block_size is not None and block_size >= 0 else -1
+        wpath = _as_unc_path(self.host, path)
+        share_access = kwargs.pop("share_access", self.share_access)
+        if "w" in mode and autocommit is False:
+            temp = _as_temp_path(self.host, path, self.temppath)
+            return SMBFileOpener(
+                wpath, temp, mode, port=self._port, block_size=bls, **kwargs
+            )
+        return smbclient.open_file(
+            wpath,
+            mode,
+            buffering=bls,
+            share_access=share_access,
+            port=self._port,
+            **kwargs,
+        )
+    def copy(self, path1, path2, **kwargs):
+        """Copy within two locations in the same filesystem"""
+        wpath1 = _as_unc_path(self.host, path1)
+        wpath2 = _as_unc_path(self.host, path2)
+        if self.auto_mkdir:
+            self.makedirs(self._parent(path2), exist_ok=True)
+        smbclient.copyfile(wpath1, wpath2, port=self._port, **kwargs)
+    def _rm(self, path):
+        if _share_has_path(path):
+            wpath = _as_unc_path(self.host, path)
+            stats = smbclient.stat(wpath, port=self._port)
+            if S_ISDIR(stats.st_mode):
+                smbclient.rmdir(wpath, port=self._port)
+            else:
+                smbclient.remove(wpath, port=self._port)
+    def mv(self, path1, path2, recursive=None, maxdepth=None, **kwargs):
+        wpath1 = _as_unc_path(self.host, path1)
+        wpath2 = _as_unc_path(self.host, path2)
+        smbclient.rename(wpath1, wpath2, port=self._port, **kwargs)
+def _as_unc_path(host, path):
+    rpath = path.replace("/", "\\")
+    unc = f"\\\\{host}{rpath}"
+    return unc
+def _as_temp_path(host, path, temppath):
+    share = path.split("/")[1]
+    temp_file = f"/{share}{temppath}/{uuid.uuid4()}"
+    unc = _as_unc_path(host, temp_file)
+    return unc
+def _share_has_path(path):
+    parts = path.count("/")
+    if path.endswith("/"):
+        return parts > 2
+    return parts > 1
+class SMBFileOpener:
+    """writes to remote temporary file, move on commit"""
+    def __init__(self, path, temp, mode, port=445, block_size=-1, **kwargs):
+        self.path = path
+        self.temp = temp
+        self.mode = mode
+        self.block_size = block_size
+        self.kwargs = kwargs
+        self.smbfile = None
+        self._incontext = False
+        self.port = port
+        self._open()
+    def _open(self):
+        if self.smbfile is None or self.smbfile.closed:
+            self.smbfile = smbclient.open_file(
+                self.temp,
+                self.mode,
+                port=self.port,
+                buffering=self.block_size,
+                **self.kwargs,
+            )
+    def commit(self):
+        """Move temp file to definitive on success."""
+        # TODO: use transaction support in SMB protocol
+        smbclient.replace(self.temp, self.path, port=self.port)
+    def discard(self):
+        """Remove the temp file on failure."""
+        smbclient.remove(self.temp, port=self.port)
+    def __fspath__(self):
+        return self.path
+    def __iter__(self):
+        return self.smbfile.__iter__()
+    def __getattr__(self, item):
+        return getattr(self.smbfile, item)
+    def __enter__(self):
+        self._incontext = True
+        return self.smbfile.__enter__()
+    def __exit__(self, exc_type, exc_value, traceback):
+        self._incontext = False
+        self.smbfile.__exit__(exc_type, exc_value, traceback)

temp_venv/lib/python3.13/site-packages/fsspec/implementations/tar.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import logging
+import tarfile
+import fsspec
+from fsspec.archive import AbstractArchiveFileSystem
+from fsspec.compression import compr
+from fsspec.utils import infer_compression
+typemap = {b"0": "file", b"5": "directory"}
+logger = logging.getLogger("tar")
+class TarFileSystem(AbstractArchiveFileSystem):
+    """Compressed Tar archives as a file-system (read-only)
+    Supports the following formats:
+    tar.gz, tar.bz2, tar.xz
+    """
+    root_marker = ""
+    protocol = "tar"
+    cachable = False
+    def __init__(
+        self,
+        fo="",
+        index_store=None,
+        target_options=None,
+        target_protocol=None,
+        compression=None,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        target_options = target_options or {}
+        if isinstance(fo, str):
+            self.of = fsspec.open(fo, protocol=target_protocol, **target_options)
+            fo = self.of.open()  # keep the reference
+        # Try to infer compression.
+        if compression is None:
+            name = None
+            # Try different ways to get hold of the filename. `fo` might either
+            # be a `fsspec.LocalFileOpener`, an `io.BufferedReader` or an
+            # `fsspec.AbstractFileSystem` instance.
+            try:
+                # Amended io.BufferedReader or similar.
+                # This uses a "protocol extension" where original filenames are
+                # propagated to archive-like filesystems in order to let them
+                # infer the right compression appropriately.
+                if hasattr(fo, "original"):
+                    name = fo.original
+                # fsspec.LocalFileOpener
+                elif hasattr(fo, "path"):
+                    name = fo.path
+                # io.BufferedReader
+                elif hasattr(fo, "name"):
+                    name = fo.name
+                # fsspec.AbstractFileSystem
+                elif hasattr(fo, "info"):
+                    name = fo.info()["name"]
+            except Exception as ex:
+                logger.warning(
+                    f"Unable to determine file name, not inferring compression: {ex}"
+                )
+            if name is not None:
+                compression = infer_compression(name)
+                logger.info(f"Inferred compression {compression} from file name {name}")
+        if compression is not None:
+            # TODO: tarfile already implements compression with modes like "'r:gz'",
+            #  but then would seek to offset in the file work?
+            fo = compr[compression](fo)
+        self._fo_ref = fo
+        self.fo = fo  # the whole instance is a context
+        self.tar = tarfile.TarFile(fileobj=self.fo)
+        self.dir_cache = None
+        self.index_store = index_store
+        self.index = None
+        self._index()
+    def _index(self):
+        # TODO: load and set saved index, if exists
+        out = {}
+        for ti in self.tar:
+            info = ti.get_info()
+            info["type"] = typemap.get(info["type"], "file")
+            name = ti.get_info()["name"].rstrip("/")
+            out[name] = (info, ti.offset_data)
+        self.index = out
+        # TODO: save index to self.index_store here, if set
+    def _get_dirs(self):
+        if self.dir_cache is not None:
+            return
+        # This enables ls to get directories as children as well as files
+        self.dir_cache = {
+            dirname: {"name": dirname, "size": 0, "type": "directory"}
+            for dirname in self._all_dirnames(self.tar.getnames())
+        }
+        for member in self.tar.getmembers():
+            info = member.get_info()
+            info["name"] = info["name"].rstrip("/")
+            info["type"] = typemap.get(info["type"], "file")
+            self.dir_cache[info["name"]] = info
+    def _open(self, path, mode="rb", **kwargs):
+        if mode != "rb":
+            raise ValueError("Read-only filesystem implementation")
+        details, offset = self.index[path]
+        if details["type"] != "file":
+            raise ValueError("Can only handle regular files")
+        return self.tar.extractfile(path)

temp_venv/lib/python3.13/site-packages/fsspec/implementations/webhdfs.py ADDED Viewed

	@@ -0,0 +1,485 @@

+# https://hadoop.apache.org/docs/r1.0.4/webhdfs.html
+import logging
+import os
+import secrets
+import shutil
+import tempfile
+import uuid
+from contextlib import suppress
+from urllib.parse import quote
+import requests
+from ..spec import AbstractBufferedFile, AbstractFileSystem
+from ..utils import infer_storage_options, tokenize
+logger = logging.getLogger("webhdfs")
+class WebHDFS(AbstractFileSystem):
+    """
+    Interface to HDFS over HTTP using the WebHDFS API. Supports also HttpFS gateways.
+    Four auth mechanisms are supported:
+    insecure: no auth is done, and the user is assumed to be whoever they
+        say they are (parameter ``user``), or a predefined value such as
+        "dr.who" if not given
+    spnego: when kerberos authentication is enabled, auth is negotiated by
+        requests_kerberos https://github.com/requests/requests-kerberos .
+        This establishes a session based on existing kinit login and/or
+        specified principal/password; parameters are passed with ``kerb_kwargs``
+    token: uses an existing Hadoop delegation token from another secured
+        service. Indeed, this client can also generate such tokens when
+        not insecure. Note that tokens expire, but can be renewed (by a
+        previously specified user) and may allow for proxying.
+    basic-auth: used when both parameter ``user`` and parameter ``password``
+        are provided.
+    """
+    tempdir = str(tempfile.gettempdir())
+    protocol = "webhdfs", "webHDFS"
+    def __init__(
+        self,
+        host,
+        port=50070,
+        kerberos=False,
+        token=None,
+        user=None,
+        password=None,
+        proxy_to=None,
+        kerb_kwargs=None,
+        data_proxy=None,
+        use_https=False,
+        session_cert=None,
+        session_verify=True,
+        **kwargs,
+    ):
+        """
+        Parameters
+        ----------
+        host: str
+            Name-node address
+        port: int
+            Port for webHDFS
+        kerberos: bool
+            Whether to authenticate with kerberos for this connection
+        token: str or None
+            If given, use this token on every call to authenticate. A user
+            and user-proxy may be encoded in the token and should not be also
+            given
+        user: str or None
+            If given, assert the user name to connect with
+        password: str or None
+            If given, assert the password to use for basic auth. If password
+            is provided, user must be provided also
+        proxy_to: str or None
+            If given, the user has the authority to proxy, and this value is
+            the user in who's name actions are taken
+        kerb_kwargs: dict
+            Any extra arguments for HTTPKerberosAuth, see
+            `<https://github.com/requests/requests-kerberos/blob/master/requests_kerberos/kerberos_.py>`_
+        data_proxy: dict, callable or None
+            If given, map data-node addresses. This can be necessary if the
+            HDFS cluster is behind a proxy, running on Docker or otherwise has
+            a mismatch between the host-names given by the name-node and the
+            address by which to refer to them from the client. If a dict,
+            maps host names ``host->data_proxy[host]``; if a callable, full
+            URLs are passed, and function must conform to
+            ``url->data_proxy(url)``.
+        use_https: bool
+            Whether to connect to the Name-node using HTTPS instead of HTTP
+        session_cert: str or Tuple[str, str] or None
+            Path to a certificate file, or tuple of (cert, key) files to use
+            for the requests.Session
+        session_verify: str, bool or None
+            Path to a certificate file to use for verifying the requests.Session.
+        kwargs
+        """
+        if self._cached:
+            return
+        super().__init__(**kwargs)
+        self.url = f"{'https' if use_https else 'http'}://{host}:{port}/webhdfs/v1"
+        self.kerb = kerberos
+        self.kerb_kwargs = kerb_kwargs or {}
+        self.pars = {}
+        self.proxy = data_proxy or {}
+        if token is not None:
+            if user is not None or proxy_to is not None:
+                raise ValueError(
+                    "If passing a delegation token, must not set "
+                    "user or proxy_to, as these are encoded in the"
+                    " token"
+                )
+            self.pars["delegation"] = token
+        self.user = user
+        self.password = password
+        if password is not None:
+            if user is None:
+                raise ValueError(
+                    "If passing a password, the user must also be"
+                    "set in order to set up the basic-auth"
+                )
+        else:
+            if user is not None:
+                self.pars["user.name"] = user
+        if proxy_to is not None:
+            self.pars["doas"] = proxy_to
+        if kerberos and user is not None:
+            raise ValueError(
+                "If using Kerberos auth, do not specify the "
+                "user, this is handled by kinit."
+            )
+        self.session_cert = session_cert
+        self.session_verify = session_verify
+        self._connect()
+        self._fsid = f"webhdfs_{tokenize(host, port)}"
+    @property
+    def fsid(self):
+        return self._fsid
+    def _connect(self):
+        self.session = requests.Session()
+        if self.session_cert:
+            self.session.cert = self.session_cert
+        self.session.verify = self.session_verify
+        if self.kerb:
+            from requests_kerberos import HTTPKerberosAuth
+            self.session.auth = HTTPKerberosAuth(**self.kerb_kwargs)
+        if self.user is not None and self.password is not None:
+            from requests.auth import HTTPBasicAuth
+            self.session.auth = HTTPBasicAuth(self.user, self.password)
+    def _call(self, op, method="get", path=None, data=None, redirect=True, **kwargs):
+        path = self._strip_protocol(path) if path is not None else ""
+        url = self._apply_proxy(self.url + quote(path, safe="/="))
+        args = kwargs.copy()
+        args.update(self.pars)
+        args["op"] = op.upper()
+        logger.debug("sending %s with %s", url, method)
+        out = self.session.request(
+            method=method.upper(),
+            url=url,
+            params=args,
+            data=data,
+            allow_redirects=redirect,
+        )
+        if out.status_code in [400, 401, 403, 404, 500]:
+            try:
+                err = out.json()
+                msg = err["RemoteException"]["message"]
+                exp = err["RemoteException"]["exception"]
+            except (ValueError, KeyError):
+                pass
+            else:
+                if exp in ["IllegalArgumentException", "UnsupportedOperationException"]:
+                    raise ValueError(msg)
+                elif exp in ["SecurityException", "AccessControlException"]:
+                    raise PermissionError(msg)
+                elif exp in ["FileNotFoundException"]:
+                    raise FileNotFoundError(msg)
+                else:
+                    raise RuntimeError(msg)
+        out.raise_for_status()
+        return out
+    def _open(
+        self,
+        path,
+        mode="rb",
+        block_size=None,
+        autocommit=True,
+        replication=None,
+        permissions=None,
+        **kwargs,
+    ):
+        """
+        Parameters
+        ----------
+        path: str
+            File location
+        mode: str
+            'rb', 'wb', etc.
+        block_size: int
+            Client buffer size for read-ahead or write buffer
+        autocommit: bool
+            If False, writes to temporary file that only gets put in final
+            location upon commit
+        replication: int
+            Number of copies of file on the cluster, write mode only
+        permissions: str or int
+            posix permissions, write mode only
+        kwargs
+        Returns
+        -------
+        WebHDFile instance
+        """
+        block_size = block_size or self.blocksize
+        return WebHDFile(
+            self,
+            path,
+            mode=mode,
+            block_size=block_size,
+            tempdir=self.tempdir,
+            autocommit=autocommit,
+            replication=replication,
+            permissions=permissions,
+        )
+    @staticmethod
+    def _process_info(info):
+        info["type"] = info["type"].lower()
+        info["size"] = info["length"]
+        return info
+    @classmethod
+    def _strip_protocol(cls, path):
+        return infer_storage_options(path)["path"]
+    @staticmethod
+    def _get_kwargs_from_urls(urlpath):
+        out = infer_storage_options(urlpath)
+        out.pop("path", None)
+        out.pop("protocol", None)
+        if "username" in out:
+            out["user"] = out.pop("username")
+        return out
+    def info(self, path):
+        out = self._call("GETFILESTATUS", path=path)
+        info = out.json()["FileStatus"]
+        info["name"] = path
+        return self._process_info(info)
+    def ls(self, path, detail=False):
+        out = self._call("LISTSTATUS", path=path)
+        infos = out.json()["FileStatuses"]["FileStatus"]
+        for info in infos:
+            self._process_info(info)
+            info["name"] = path.rstrip("/") + "/" + info["pathSuffix"]
+        if detail:
+            return sorted(infos, key=lambda i: i["name"])
+        else:
+            return sorted(info["name"] for info in infos)
+    def content_summary(self, path):
+        """Total numbers of files, directories and bytes under path"""
+        out = self._call("GETCONTENTSUMMARY", path=path)
+        return out.json()["ContentSummary"]
+    def ukey(self, path):
+        """Checksum info of file, giving method and result"""
+        out = self._call("GETFILECHECKSUM", path=path, redirect=False)
+        if "Location" in out.headers:
+            location = self._apply_proxy(out.headers["Location"])
+            out2 = self.session.get(location)
+            out2.raise_for_status()
+            return out2.json()["FileChecksum"]
+        else:
+            out.raise_for_status()
+            return out.json()["FileChecksum"]
+    def home_directory(self):
+        """Get user's home directory"""
+        out = self._call("GETHOMEDIRECTORY")
+        return out.json()["Path"]
+    def get_delegation_token(self, renewer=None):
+        """Retrieve token which can give the same authority to other uses
+        Parameters
+        ----------
+        renewer: str or None
+            User who may use this token; if None, will be current user
+        """
+        if renewer:
+            out = self._call("GETDELEGATIONTOKEN", renewer=renewer)
+        else:
+            out = self._call("GETDELEGATIONTOKEN")
+        t = out.json()["Token"]
+        if t is None:
+            raise ValueError("No token available for this user/security context")
+        return t["urlString"]
+    def renew_delegation_token(self, token):
+        """Make token live longer. Returns new expiry time"""
+        out = self._call("RENEWDELEGATIONTOKEN", method="put", token=token)
+        return out.json()["long"]
+    def cancel_delegation_token(self, token):
+        """Stop the token from being useful"""
+        self._call("CANCELDELEGATIONTOKEN", method="put", token=token)
+    def chmod(self, path, mod):
+        """Set the permission at path
+        Parameters
+        ----------
+        path: str
+            location to set (file or directory)
+        mod: str or int
+            posix epresentation or permission, give as oct string, e.g, '777'
+            or 0o777
+        """
+        self._call("SETPERMISSION", method="put", path=path, permission=mod)
+    def chown(self, path, owner=None, group=None):
+        """Change owning user and/or group"""
+        kwargs = {}
+        if owner is not None:
+            kwargs["owner"] = owner
+        if group is not None:
+            kwargs["group"] = group
+        self._call("SETOWNER", method="put", path=path, **kwargs)
+    def set_replication(self, path, replication):
+        """
+        Set file replication factor
+        Parameters
+        ----------
+        path: str
+            File location (not for directories)
+        replication: int
+            Number of copies of file on the cluster. Should be smaller than
+            number of data nodes; normally 3 on most systems.
+        """
+        self._call("SETREPLICATION", path=path, method="put", replication=replication)
+    def mkdir(self, path, **kwargs):
+        self._call("MKDIRS", method="put", path=path)
+    def makedirs(self, path, exist_ok=False):
+        if exist_ok is False and self.exists(path):
+            raise FileExistsError(path)
+        self.mkdir(path)
+    def mv(self, path1, path2, **kwargs):
+        self._call("RENAME", method="put", path=path1, destination=path2)
+    def rm(self, path, recursive=False, **kwargs):
+        self._call(
+            "DELETE",
+            method="delete",
+            path=path,
+            recursive="true" if recursive else "false",
+        )
+    def rm_file(self, path, **kwargs):
+        self.rm(path)
+    def cp_file(self, lpath, rpath, **kwargs):
+        with self.open(lpath) as lstream:
+            tmp_fname = "/".join([self._parent(rpath), f".tmp.{secrets.token_hex(16)}"])
+            # Perform an atomic copy (stream to a temporary file and
+            # move it to the actual destination).
+            try:
+                with self.open(tmp_fname, "wb") as rstream:
+                    shutil.copyfileobj(lstream, rstream)
+                self.mv(tmp_fname, rpath)
+            except BaseException:
+                with suppress(FileNotFoundError):
+                    self.rm(tmp_fname)
+                raise
+    def _apply_proxy(self, location):
+        if self.proxy and callable(self.proxy):
+            location = self.proxy(location)
+        elif self.proxy:
+            # as a dict
+            for k, v in self.proxy.items():
+                location = location.replace(k, v, 1)
+        return location
+class WebHDFile(AbstractBufferedFile):
+    """A file living in HDFS over webHDFS"""
+    def __init__(self, fs, path, **kwargs):
+        super().__init__(fs, path, **kwargs)
+        kwargs = kwargs.copy()
+        if kwargs.get("permissions", None) is None:
+            kwargs.pop("permissions", None)
+        if kwargs.get("replication", None) is None:
+            kwargs.pop("replication", None)
+        self.permissions = kwargs.pop("permissions", 511)
+        tempdir = kwargs.pop("tempdir")
+        if kwargs.pop("autocommit", False) is False:
+            self.target = self.path
+            self.path = os.path.join(tempdir, str(uuid.uuid4()))
+    def _upload_chunk(self, final=False):
+        """Write one part of a multi-block file upload
+        Parameters
+        ==========
+        final: bool
+            This is the last block, so should complete file, if
+            self.autocommit is True.
+        """
+        out = self.fs.session.post(
+            self.location,
+            data=self.buffer.getvalue(),
+            headers={"content-type": "application/octet-stream"},
+        )
+        out.raise_for_status()
+        return True
+    def _initiate_upload(self):
+        """Create remote file/upload"""
+        kwargs = self.kwargs.copy()
+        if "a" in self.mode:
+            op, method = "APPEND", "POST"
+        else:
+            op, method = "CREATE", "PUT"
+            kwargs["overwrite"] = "true"
+        out = self.fs._call(op, method, self.path, redirect=False, **kwargs)
+        location = self.fs._apply_proxy(out.headers["Location"])
+        if "w" in self.mode:
+            # create empty file to append to
+            out2 = self.fs.session.put(
+                location, headers={"content-type": "application/octet-stream"}
+            )
+            out2.raise_for_status()
+            # after creating empty file, change location to append to
+            out2 = self.fs._call("APPEND", "POST", self.path, redirect=False, **kwargs)
+            self.location = self.fs._apply_proxy(out2.headers["Location"])
+    def _fetch_range(self, start, end):
+        start = max(start, 0)
+        end = min(self.size, end)
+        if start >= end or start >= self.size:
+            return b""
+        out = self.fs._call(
+            "OPEN", path=self.path, offset=start, length=end - start, redirect=False
+        )
+        out.raise_for_status()
+        if "Location" in out.headers:
+            location = out.headers["Location"]
+            out2 = self.fs.session.get(self.fs._apply_proxy(location))
+            return out2.content
+        else:
+            return out.content
+    def commit(self):
+        self.fs.mv(self.path, self.target)
+    def discard(self):
+        self.fs.rm(self.path)

temp_venv/lib/python3.13/site-packages/fsspec/implementations/zip.py ADDED Viewed

	@@ -0,0 +1,177 @@

+import os
+import zipfile
+import fsspec
+from fsspec.archive import AbstractArchiveFileSystem
+class ZipFileSystem(AbstractArchiveFileSystem):
+    """Read/Write contents of ZIP archive as a file-system
+    Keeps file object open while instance lives.
+    This class is pickleable, but not necessarily thread-safe
+    """
+    root_marker = ""
+    protocol = "zip"
+    cachable = False
+    def __init__(
+        self,
+        fo="",
+        mode="r",
+        target_protocol=None,
+        target_options=None,
+        compression=zipfile.ZIP_STORED,
+        allowZip64=True,
+        compresslevel=None,
+        **kwargs,
+    ):
+        """
+        Parameters
+        ----------
+        fo: str or file-like
+            Contains ZIP, and must exist. If a str, will fetch file using
+            :meth:`~fsspec.open_files`, which must return one file exactly.
+        mode: str
+            Accept: "r", "w", "a"
+        target_protocol: str (optional)
+            If ``fo`` is a string, this value can be used to override the
+            FS protocol inferred from a URL
+        target_options: dict (optional)
+            Kwargs passed when instantiating the target FS, if ``fo`` is
+            a string.
+        compression, allowZip64, compresslevel: passed to ZipFile
+            Only relevant when creating a ZIP
+        """
+        super().__init__(self, **kwargs)
+        if mode not in set("rwa"):
+            raise ValueError(f"mode '{mode}' no understood")
+        self.mode = mode
+        if isinstance(fo, (str, os.PathLike)):
+            if mode == "a":
+                m = "r+b"
+            else:
+                m = mode + "b"
+            fo = fsspec.open(
+                fo, mode=m, protocol=target_protocol, **(target_options or {})
+            )
+        self.force_zip_64 = allowZip64
+        self.of = fo
+        self.fo = fo.__enter__()  # the whole instance is a context
+        self.zip = zipfile.ZipFile(
+            self.fo,
+            mode=mode,
+            compression=compression,
+            allowZip64=allowZip64,
+            compresslevel=compresslevel,
+        )
+        self.dir_cache = None
+    @classmethod
+    def _strip_protocol(cls, path):
+        # zip file paths are always relative to the archive root
+        return super()._strip_protocol(path).lstrip("/")
+    def __del__(self):
+        if hasattr(self, "zip"):
+            self.close()
+            del self.zip
+    def close(self):
+        """Commits any write changes to the file. Done on ``del`` too."""
+        self.zip.close()
+    def _get_dirs(self):
+        if self.dir_cache is None or self.mode in set("wa"):
+            # when writing, dir_cache is always in the ZipFile's attributes,
+            # not read from the file.
+            files = self.zip.infolist()
+            self.dir_cache = {
+                dirname.rstrip("/"): {
+                    "name": dirname.rstrip("/"),
+                    "size": 0,
+                    "type": "directory",
+                }
+                for dirname in self._all_dirnames(self.zip.namelist())
+            }
+            for z in files:
+                f = {s: getattr(z, s, None) for s in zipfile.ZipInfo.__slots__}
+                f.update(
+                    {
+                        "name": z.filename.rstrip("/"),
+                        "size": z.file_size,
+                        "type": ("directory" if z.is_dir() else "file"),
+                    }
+                )
+                self.dir_cache[f["name"]] = f
+    def pipe_file(self, path, value, **kwargs):
+        # override upstream, because we know the exact file size in this case
+        self.zip.writestr(path, value, **kwargs)
+    def _open(
+        self,
+        path,
+        mode="rb",
+        block_size=None,
+        autocommit=True,
+        cache_options=None,
+        **kwargs,
+    ):
+        path = self._strip_protocol(path)
+        if "r" in mode and self.mode in set("wa"):
+            if self.exists(path):
+                raise OSError("ZipFS can only be open for reading or writing, not both")
+            raise FileNotFoundError(path)
+        if "r" in self.mode and "w" in mode:
+            raise OSError("ZipFS can only be open for reading or writing, not both")
+        out = self.zip.open(path, mode.strip("b"), force_zip64=self.force_zip_64)
+        if "r" in mode:
+            info = self.info(path)
+            out.size = info["size"]
+            out.name = info["name"]
+        return out
+    def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
+        if maxdepth is not None and maxdepth < 1:
+            raise ValueError("maxdepth must be at least 1")
+        # Remove the leading slash, as the zip file paths are always
+        # given without a leading slash
+        path = path.lstrip("/")
+        path_parts = list(filter(lambda s: bool(s), path.split("/")))
+        def _matching_starts(file_path):
+            file_parts = filter(lambda s: bool(s), file_path.split("/"))
+            return all(a == b for a, b in zip(path_parts, file_parts))
+        self._get_dirs()
+        result = {}
+        # To match posix find, if an exact file name is given, we should
+        # return only that file
+        if path in self.dir_cache and self.dir_cache[path]["type"] == "file":
+            result[path] = self.dir_cache[path]
+            return result if detail else [path]
+        for file_path, file_info in self.dir_cache.items():
+            if not (path == "" or _matching_starts(file_path)):
+                continue
+            if file_info["type"] == "directory":
+                if withdirs:
+                    if file_path not in result:
+                        result[file_path.strip("/")] = file_info
+                continue
+            if file_path not in result:
+                result[file_path] = file_info if detail else None
+        if maxdepth:
+            path_depth = path.count("/")
+            result = {
+                k: v for k, v in result.items() if k.count("/") - path_depth < maxdepth
+            }
+        return result if detail else sorted(result)

temp_venv/lib/python3.13/site-packages/fsspec/tests/abstract/__init__.py ADDED Viewed

	@@ -0,0 +1,289 @@

+import os
+from hashlib import md5
+import pytest
+from fsspec.implementations.local import LocalFileSystem
+from fsspec.tests.abstract.copy import AbstractCopyTests  # noqa: F401
+from fsspec.tests.abstract.get import AbstractGetTests  # noqa: F401
+from fsspec.tests.abstract.open import AbstractOpenTests  # noqa: F401
+from fsspec.tests.abstract.pipe import AbstractPipeTests  # noqa: F401
+from fsspec.tests.abstract.put import AbstractPutTests  # noqa: F401
+class BaseAbstractFixtures:
+    """
+    Abstract base class containing fixtures that are used by but never need to
+    be overridden in derived filesystem-specific classes to run the abstract
+    tests on such filesystems.
+    """
+    @pytest.fixture
+    def fs_bulk_operations_scenario_0(self, fs, fs_join, fs_path):
+        """
+        Scenario on remote filesystem that is used for many cp/get/put tests.
+        Cleans up at the end of each test it which it is used.
+        """
+        source = self._bulk_operations_scenario_0(fs, fs_join, fs_path)
+        yield source
+        fs.rm(source, recursive=True)
+    @pytest.fixture
+    def fs_glob_edge_cases_files(self, fs, fs_join, fs_path):
+        """
+        Scenario on remote filesystem that is used for glob edge cases cp/get/put tests.
+        Cleans up at the end of each test it which it is used.
+        """
+        source = self._glob_edge_cases_files(fs, fs_join, fs_path)
+        yield source
+        fs.rm(source, recursive=True)
+    @pytest.fixture
+    def fs_dir_and_file_with_same_name_prefix(self, fs, fs_join, fs_path):
+        """
+        Scenario on remote filesystem that is used to check cp/get/put on directory
+        and file with the same name prefixes.
+        Cleans up at the end of each test it which it is used.
+        """
+        source = self._dir_and_file_with_same_name_prefix(fs, fs_join, fs_path)
+        yield source
+        fs.rm(source, recursive=True)
+    @pytest.fixture
+    def fs_10_files_with_hashed_names(self, fs, fs_join, fs_path):
+        """
+        Scenario on remote filesystem that is used to check cp/get/put files order
+        when source and destination are lists.
+        Cleans up at the end of each test it which it is used.
+        """
+        source = self._10_files_with_hashed_names(fs, fs_join, fs_path)
+        yield source
+        fs.rm(source, recursive=True)
+    @pytest.fixture
+    def fs_target(self, fs, fs_join, fs_path):
+        """
+        Return name of remote directory that does not yet exist to copy into.
+        Cleans up at the end of each test it which it is used.
+        """
+        target = fs_join(fs_path, "target")
+        yield target
+        if fs.exists(target):
+            fs.rm(target, recursive=True)
+    @pytest.fixture
+    def local_bulk_operations_scenario_0(self, local_fs, local_join, local_path):
+        """
+        Scenario on local filesystem that is used for many cp/get/put tests.
+        Cleans up at the end of each test it which it is used.
+        """
+        source = self._bulk_operations_scenario_0(local_fs, local_join, local_path)
+        yield source
+        local_fs.rm(source, recursive=True)
+    @pytest.fixture
+    def local_glob_edge_cases_files(self, local_fs, local_join, local_path):
+        """
+        Scenario on local filesystem that is used for glob edge cases cp/get/put tests.
+        Cleans up at the end of each test it which it is used.
+        """
+        source = self._glob_edge_cases_files(local_fs, local_join, local_path)
+        yield source
+        local_fs.rm(source, recursive=True)
+    @pytest.fixture
+    def local_dir_and_file_with_same_name_prefix(
+        self, local_fs, local_join, local_path
+    ):
+        """
+        Scenario on local filesystem that is used to check cp/get/put on directory
+        and file with the same name prefixes.
+        Cleans up at the end of each test it which it is used.
+        """
+        source = self._dir_and_file_with_same_name_prefix(
+            local_fs, local_join, local_path
+        )
+        yield source
+        local_fs.rm(source, recursive=True)
+    @pytest.fixture
+    def local_10_files_with_hashed_names(self, local_fs, local_join, local_path):
+        """
+        Scenario on local filesystem that is used to check cp/get/put files order
+        when source and destination are lists.
+        Cleans up at the end of each test it which it is used.
+        """
+        source = self._10_files_with_hashed_names(local_fs, local_join, local_path)
+        yield source
+        local_fs.rm(source, recursive=True)
+    @pytest.fixture
+    def local_target(self, local_fs, local_join, local_path):
+        """
+        Return name of local directory that does not yet exist to copy into.
+        Cleans up at the end of each test it which it is used.
+        """
+        target = local_join(local_path, "target")
+        yield target
+        if local_fs.exists(target):
+            local_fs.rm(target, recursive=True)
+    def _glob_edge_cases_files(self, some_fs, some_join, some_path):
+        """
+        Scenario that is used for glob edge cases cp/get/put tests.
+        Creates the following directory and file structure:
+        📁 source
+        ├── 📄 file1
+        ├── 📄 file2
+        ├── 📁 subdir0
+        │   ├── 📄 subfile1
+        │   ├── 📄 subfile2
+        │   └── 📁 nesteddir
+        │       └── 📄 nestedfile
+        └── 📁 subdir1
+            ├── 📄 subfile1
+            ├── 📄 subfile2
+            └── 📁 nesteddir
+                └── 📄 nestedfile
+        """
+        source = some_join(some_path, "source")
+        some_fs.touch(some_join(source, "file1"))
+        some_fs.touch(some_join(source, "file2"))
+        for subdir_idx in range(2):
+            subdir = some_join(source, f"subdir{subdir_idx}")
+            nesteddir = some_join(subdir, "nesteddir")
+            some_fs.makedirs(nesteddir)
+            some_fs.touch(some_join(subdir, "subfile1"))
+            some_fs.touch(some_join(subdir, "subfile2"))
+            some_fs.touch(some_join(nesteddir, "nestedfile"))
+        return source
+    def _bulk_operations_scenario_0(self, some_fs, some_join, some_path):
+        """
+        Scenario that is used for many cp/get/put tests. Creates the following
+        directory and file structure:
+        📁 source
+        ├── 📄 file1
+        ├── 📄 file2
+        └── 📁 subdir
+            ├── 📄 subfile1
+            ├── 📄 subfile2
+            └── 📁 nesteddir
+                └── 📄 nestedfile
+        """
+        source = some_join(some_path, "source")
+        subdir = some_join(source, "subdir")
+        nesteddir = some_join(subdir, "nesteddir")
+        some_fs.makedirs(nesteddir)
+        some_fs.touch(some_join(source, "file1"))
+        some_fs.touch(some_join(source, "file2"))
+        some_fs.touch(some_join(subdir, "subfile1"))
+        some_fs.touch(some_join(subdir, "subfile2"))
+        some_fs.touch(some_join(nesteddir, "nestedfile"))
+        return source
+    def _dir_and_file_with_same_name_prefix(self, some_fs, some_join, some_path):
+        """
+        Scenario that is used to check cp/get/put on directory and file with
+        the same name prefixes. Creates the following directory and file structure:
+        📁 source
+        ├── 📄 subdir.txt
+        └── 📁 subdir
+            └── 📄 subfile.txt
+        """
+        source = some_join(some_path, "source")
+        subdir = some_join(source, "subdir")
+        file = some_join(source, "subdir.txt")
+        subfile = some_join(subdir, "subfile.txt")
+        some_fs.makedirs(subdir)
+        some_fs.touch(file)
+        some_fs.touch(subfile)
+        return source
+    def _10_files_with_hashed_names(self, some_fs, some_join, some_path):
+        """
+        Scenario that is used to check cp/get/put files order when source and
+        destination are lists. Creates the following directory and file structure:
+        📁 source
+        └── 📄 {hashed([0-9])}.txt
+        """
+        source = some_join(some_path, "source")
+        for i in range(10):
+            hashed_i = md5(str(i).encode("utf-8")).hexdigest()
+            path = some_join(source, f"{hashed_i}.txt")
+            some_fs.pipe(path=path, value=f"{i}".encode())
+        return source
+class AbstractFixtures(BaseAbstractFixtures):
+    """
+    Abstract base class containing fixtures that may be overridden in derived
+    filesystem-specific classes to run the abstract tests on such filesystems.
+    For any particular filesystem some of these fixtures must be overridden,
+    such as ``fs`` and ``fs_path``, and others may be overridden if the
+    default functions here are not appropriate, such as ``fs_join``.
+    """
+    @pytest.fixture
+    def fs(self):
+        raise NotImplementedError("This function must be overridden in derived classes")
+    @pytest.fixture
+    def fs_join(self):
+        """
+        Return a function that joins its arguments together into a path.
+        Most fsspec implementations join paths in a platform-dependent way,
+        but some will override this to always use a forward slash.
+        """
+        return os.path.join
+    @pytest.fixture
+    def fs_path(self):
+        raise NotImplementedError("This function must be overridden in derived classes")
+    @pytest.fixture(scope="class")
+    def local_fs(self):
+        # Maybe need an option for auto_mkdir=False?  This is only relevant
+        # for certain implementations.
+        return LocalFileSystem(auto_mkdir=True)
+    @pytest.fixture
+    def local_join(self):
+        """
+        Return a function that joins its arguments together into a path, on
+        the local filesystem.
+        """
+        return os.path.join
+    @pytest.fixture
+    def local_path(self, tmpdir):
+        return tmpdir
+    @pytest.fixture
+    def supports_empty_directories(self):
+        """
+        Return whether this implementation supports empty directories.
+        """
+        return True
+    @pytest.fixture
+    def fs_sanitize_path(self):
+        return lambda x: x

temp_venv/lib/python3.13/site-packages/fsspec/tests/abstract/common.py ADDED Viewed

	@@ -0,0 +1,175 @@

+GLOB_EDGE_CASES_TESTS = {
+    "argnames": ("path", "recursive", "maxdepth", "expected"),
+    "argvalues": [
+        ("fil?1", False, None, ["file1"]),
+        ("fil?1", True, None, ["file1"]),
+        ("file[1-2]", False, None, ["file1", "file2"]),
+        ("file[1-2]", True, None, ["file1", "file2"]),
+        ("*", False, None, ["file1", "file2"]),
+        (
+            "*",
+            True,
+            None,
+            [
+                "file1",
+                "file2",
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir0/nesteddir/nestedfile",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+                "subdir1/nesteddir/nestedfile",
+            ],
+        ),
+        ("*", True, 1, ["file1", "file2"]),
+        (
+            "*",
+            True,
+            2,
+            [
+                "file1",
+                "file2",
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+            ],
+        ),
+        ("*1", False, None, ["file1"]),
+        (
+            "*1",
+            True,
+            None,
+            [
+                "file1",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+                "subdir1/nesteddir/nestedfile",
+            ],
+        ),
+        ("*1", True, 2, ["file1", "subdir1/subfile1", "subdir1/subfile2"]),
+        (
+            "**",
+            False,
+            None,
+            [
+                "file1",
+                "file2",
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir0/nesteddir/nestedfile",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+                "subdir1/nesteddir/nestedfile",
+            ],
+        ),
+        (
+            "**",
+            True,
+            None,
+            [
+                "file1",
+                "file2",
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir0/nesteddir/nestedfile",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+                "subdir1/nesteddir/nestedfile",
+            ],
+        ),
+        ("**", True, 1, ["file1", "file2"]),
+        (
+            "**",
+            True,
+            2,
+            [
+                "file1",
+                "file2",
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir0/nesteddir/nestedfile",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+                "subdir1/nesteddir/nestedfile",
+            ],
+        ),
+        (
+            "**",
+            False,
+            2,
+            [
+                "file1",
+                "file2",
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+            ],
+        ),
+        ("**/*1", False, None, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
+        (
+            "**/*1",
+            True,
+            None,
+            [
+                "file1",
+                "subdir0/subfile1",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+                "subdir1/nesteddir/nestedfile",
+            ],
+        ),
+        ("**/*1", True, 1, ["file1"]),
+        (
+            "**/*1",
+            True,
+            2,
+            ["file1", "subdir0/subfile1", "subdir1/subfile1", "subdir1/subfile2"],
+        ),
+        ("**/*1", False, 2, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
+        ("**/subdir0", False, None, []),
+        ("**/subdir0", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]),
+        ("**/subdir0/nested*", False, 2, []),
+        ("**/subdir0/nested*", True, 2, ["nestedfile"]),
+        ("subdir[1-2]", False, None, []),
+        ("subdir[1-2]", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]),
+        ("subdir[1-2]", True, 2, ["subfile1", "subfile2"]),
+        ("subdir[0-1]", False, None, []),
+        (
+            "subdir[0-1]",
+            True,
+            None,
+            [
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir0/nesteddir/nestedfile",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+                "subdir1/nesteddir/nestedfile",
+            ],
+        ),
+        (
+            "subdir[0-1]/*fil[e]*",
+            False,
+            None,
+            [
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+            ],
+        ),
+        (
+            "subdir[0-1]/*fil[e]*",
+            True,
+            None,
+            [
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+            ],
+        ),
+    ],
+}

temp_venv/lib/python3.13/site-packages/fsspec/tests/abstract/copy.py ADDED Viewed

	@@ -0,0 +1,557 @@

+from hashlib import md5
+from itertools import product
+import pytest
+from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
+class AbstractCopyTests:
+    def test_copy_file_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        fs_target,
+        supports_empty_directories,
+    ):
+        # Copy scenario 1a
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        if not supports_empty_directories:
+            # Force target directory to exist by adding a dummy file
+            fs.touch(fs_join(target, "dummy"))
+        assert fs.isdir(target)
+        target_file2 = fs_join(target, "file2")
+        target_subfile1 = fs_join(target, "subfile1")
+        # Copy from source directory
+        fs.cp(fs_join(source, "file2"), target)
+        assert fs.isfile(target_file2)
+        # Copy from sub directory
+        fs.cp(fs_join(source, "subdir", "subfile1"), target)
+        assert fs.isfile(target_subfile1)
+        # Remove copied files
+        fs.rm([target_file2, target_subfile1])
+        assert not fs.exists(target_file2)
+        assert not fs.exists(target_subfile1)
+        # Repeat with trailing slash on target
+        fs.cp(fs_join(source, "file2"), target + "/")
+        assert fs.isdir(target)
+        assert fs.isfile(target_file2)
+        fs.cp(fs_join(source, "subdir", "subfile1"), target + "/")
+        assert fs.isfile(target_subfile1)
+    def test_copy_file_to_new_directory(
+        self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
+    ):
+        # Copy scenario 1b
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        fs.cp(
+            fs_join(source, "subdir", "subfile1"), fs_join(target, "newdir/")
+        )  # Note trailing slash
+        assert fs.isdir(target)
+        assert fs.isdir(fs_join(target, "newdir"))
+        assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+    def test_copy_file_to_file_in_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        fs_target,
+        supports_empty_directories,
+    ):
+        # Copy scenario 1c
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        if not supports_empty_directories:
+            # Force target directory to exist by adding a dummy file
+            fs.touch(fs_join(target, "dummy"))
+        assert fs.isdir(target)
+        fs.cp(fs_join(source, "subdir", "subfile1"), fs_join(target, "newfile"))
+        assert fs.isfile(fs_join(target, "newfile"))
+    def test_copy_file_to_file_in_new_directory(
+        self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
+    ):
+        # Copy scenario 1d
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        fs.cp(
+            fs_join(source, "subdir", "subfile1"), fs_join(target, "newdir", "newfile")
+        )
+        assert fs.isdir(fs_join(target, "newdir"))
+        assert fs.isfile(fs_join(target, "newdir", "newfile"))
+    def test_copy_directory_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        fs_target,
+        supports_empty_directories,
+    ):
+        # Copy scenario 1e
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        if not supports_empty_directories:
+            # Force target directory to exist by adding a dummy file
+            dummy = fs_join(target, "dummy")
+            fs.touch(dummy)
+        assert fs.isdir(target)
+        for source_slash, target_slash in zip([False, True], [False, True]):
+            s = fs_join(source, "subdir")
+            if source_slash:
+                s += "/"
+            t = target + "/" if target_slash else target
+            # Without recursive does nothing
+            fs.cp(s, t)
+            assert fs.ls(target, detail=False) == (
+                [] if supports_empty_directories else [dummy]
+            )
+            # With recursive
+            fs.cp(s, t, recursive=True)
+            if source_slash:
+                assert fs.isfile(fs_join(target, "subfile1"))
+                assert fs.isfile(fs_join(target, "subfile2"))
+                assert fs.isdir(fs_join(target, "nesteddir"))
+                assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                fs.rm(
+                    [
+                        fs_join(target, "subfile1"),
+                        fs_join(target, "subfile2"),
+                        fs_join(target, "nesteddir"),
+                    ],
+                    recursive=True,
+                )
+            else:
+                assert fs.isdir(fs_join(target, "subdir"))
+                assert fs.isfile(fs_join(target, "subdir", "subfile1"))
+                assert fs.isfile(fs_join(target, "subdir", "subfile2"))
+                assert fs.isdir(fs_join(target, "subdir", "nesteddir"))
+                assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile"))
+                fs.rm(fs_join(target, "subdir"), recursive=True)
+            assert fs.ls(target, detail=False) == (
+                [] if supports_empty_directories else [dummy]
+            )
+            # Limit recursive by maxdepth
+            fs.cp(s, t, recursive=True, maxdepth=1)
+            if source_slash:
+                assert fs.isfile(fs_join(target, "subfile1"))
+                assert fs.isfile(fs_join(target, "subfile2"))
+                assert not fs.exists(fs_join(target, "nesteddir"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                fs.rm(
+                    [
+                        fs_join(target, "subfile1"),
+                        fs_join(target, "subfile2"),
+                    ],
+                    recursive=True,
+                )
+            else:
+                assert fs.isdir(fs_join(target, "subdir"))
+                assert fs.isfile(fs_join(target, "subdir", "subfile1"))
+                assert fs.isfile(fs_join(target, "subdir", "subfile2"))
+                assert not fs.exists(fs_join(target, "subdir", "nesteddir"))
+                fs.rm(fs_join(target, "subdir"), recursive=True)
+            assert fs.ls(target, detail=False) == (
+                [] if supports_empty_directories else [dummy]
+            )
+    def test_copy_directory_to_new_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        fs_target,
+        supports_empty_directories,
+    ):
+        # Copy scenario 1f
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        for source_slash, target_slash in zip([False, True], [False, True]):
+            s = fs_join(source, "subdir")
+            if source_slash:
+                s += "/"
+            t = fs_join(target, "newdir")
+            if target_slash:
+                t += "/"
+            # Without recursive does nothing
+            fs.cp(s, t)
+            if supports_empty_directories:
+                assert fs.ls(target) == []
+            else:
+                with pytest.raises(FileNotFoundError):
+                    fs.ls(target)
+            # With recursive
+            fs.cp(s, t, recursive=True)
+            assert fs.isdir(fs_join(target, "newdir"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile2"))
+            assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
+            assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
+            assert not fs.exists(fs_join(target, "subdir"))
+            fs.rm(fs_join(target, "newdir"), recursive=True)
+            assert not fs.exists(fs_join(target, "newdir"))
+            # Limit recursive by maxdepth
+            fs.cp(s, t, recursive=True, maxdepth=1)
+            assert fs.isdir(fs_join(target, "newdir"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile2"))
+            assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
+            assert not fs.exists(fs_join(target, "subdir"))
+            fs.rm(fs_join(target, "newdir"), recursive=True)
+            assert not fs.exists(fs_join(target, "newdir"))
+    def test_copy_glob_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        fs_target,
+        supports_empty_directories,
+    ):
+        # Copy scenario 1g
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        if not supports_empty_directories:
+            # Force target directory to exist by adding a dummy file
+            dummy = fs_join(target, "dummy")
+            fs.touch(dummy)
+        assert fs.isdir(target)
+        for target_slash in [False, True]:
+            t = target + "/" if target_slash else target
+            # Without recursive
+            fs.cp(fs_join(source, "subdir", "*"), t)
+            assert fs.isfile(fs_join(target, "subfile1"))
+            assert fs.isfile(fs_join(target, "subfile2"))
+            assert not fs.isdir(fs_join(target, "nesteddir"))
+            assert not fs.exists(fs_join(target, "nesteddir", "nestedfile"))
+            assert not fs.exists(fs_join(target, "subdir"))
+            fs.rm(
+                [
+                    fs_join(target, "subfile1"),
+                    fs_join(target, "subfile2"),
+                ],
+                recursive=True,
+            )
+            assert fs.ls(target, detail=False) == (
+                [] if supports_empty_directories else [dummy]
+            )
+            # With recursive
+            for glob, recursive in zip(["*", "**"], [True, False]):
+                fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive)
+                assert fs.isfile(fs_join(target, "subfile1"))
+                assert fs.isfile(fs_join(target, "subfile2"))
+                assert fs.isdir(fs_join(target, "nesteddir"))
+                assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                fs.rm(
+                    [
+                        fs_join(target, "subfile1"),
+                        fs_join(target, "subfile2"),
+                        fs_join(target, "nesteddir"),
+                    ],
+                    recursive=True,
+                )
+                assert fs.ls(target, detail=False) == (
+                    [] if supports_empty_directories else [dummy]
+                )
+                # Limit recursive by maxdepth
+                fs.cp(
+                    fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
+                )
+                assert fs.isfile(fs_join(target, "subfile1"))
+                assert fs.isfile(fs_join(target, "subfile2"))
+                assert not fs.exists(fs_join(target, "nesteddir"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                fs.rm(
+                    [
+                        fs_join(target, "subfile1"),
+                        fs_join(target, "subfile2"),
+                    ],
+                    recursive=True,
+                )
+                assert fs.ls(target, detail=False) == (
+                    [] if supports_empty_directories else [dummy]
+                )
+    def test_copy_glob_to_new_directory(
+        self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
+    ):
+        # Copy scenario 1h
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        for target_slash in [False, True]:
+            t = fs_join(target, "newdir")
+            if target_slash:
+                t += "/"
+            # Without recursive
+            fs.cp(fs_join(source, "subdir", "*"), t)
+            assert fs.isdir(fs_join(target, "newdir"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile2"))
+            assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
+            assert not fs.exists(fs_join(target, "newdir", "nesteddir", "nestedfile"))
+            assert not fs.exists(fs_join(target, "subdir"))
+            assert not fs.exists(fs_join(target, "newdir", "subdir"))
+            fs.rm(fs_join(target, "newdir"), recursive=True)
+            assert not fs.exists(fs_join(target, "newdir"))
+            # With recursive
+            for glob, recursive in zip(["*", "**"], [True, False]):
+                fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive)
+                assert fs.isdir(fs_join(target, "newdir"))
+                assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+                assert fs.isfile(fs_join(target, "newdir", "subfile2"))
+                assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
+                assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                assert not fs.exists(fs_join(target, "newdir", "subdir"))
+                fs.rm(fs_join(target, "newdir"), recursive=True)
+                assert not fs.exists(fs_join(target, "newdir"))
+                # Limit recursive by maxdepth
+                fs.cp(
+                    fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
+                )
+                assert fs.isdir(fs_join(target, "newdir"))
+                assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+                assert fs.isfile(fs_join(target, "newdir", "subfile2"))
+                assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                assert not fs.exists(fs_join(target, "newdir", "subdir"))
+                fs.rm(fs_join(target, "newdir"), recursive=True)
+                assert not fs.exists(fs_join(target, "newdir"))
+    @pytest.mark.parametrize(
+        GLOB_EDGE_CASES_TESTS["argnames"],
+        GLOB_EDGE_CASES_TESTS["argvalues"],
+    )
+    def test_copy_glob_edge_cases(
+        self,
+        path,
+        recursive,
+        maxdepth,
+        expected,
+        fs,
+        fs_join,
+        fs_glob_edge_cases_files,
+        fs_target,
+        fs_sanitize_path,
+    ):
+        # Copy scenario 1g
+        source = fs_glob_edge_cases_files
+        target = fs_target
+        for new_dir, target_slash in product([True, False], [True, False]):
+            fs.mkdir(target)
+            t = fs_join(target, "newdir") if new_dir else target
+            t = t + "/" if target_slash else t
+            fs.copy(fs_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
+            output = fs.find(target)
+            if new_dir:
+                prefixed_expected = [
+                    fs_sanitize_path(fs_join(target, "newdir", p)) for p in expected
+                ]
+            else:
+                prefixed_expected = [
+                    fs_sanitize_path(fs_join(target, p)) for p in expected
+                ]
+            assert sorted(output) == sorted(prefixed_expected)
+            try:
+                fs.rm(target, recursive=True)
+            except FileNotFoundError:
+                pass
+    def test_copy_list_of_files_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        fs_target,
+        supports_empty_directories,
+    ):
+        # Copy scenario 2a
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        if not supports_empty_directories:
+            # Force target directory to exist by adding a dummy file
+            dummy = fs_join(target, "dummy")
+            fs.touch(dummy)
+        assert fs.isdir(target)
+        source_files = [
+            fs_join(source, "file1"),
+            fs_join(source, "file2"),
+            fs_join(source, "subdir", "subfile1"),
+        ]
+        for target_slash in [False, True]:
+            t = target + "/" if target_slash else target
+            fs.cp(source_files, t)
+            assert fs.isfile(fs_join(target, "file1"))
+            assert fs.isfile(fs_join(target, "file2"))
+            assert fs.isfile(fs_join(target, "subfile1"))
+            fs.rm(
+                [
+                    fs_join(target, "file1"),
+                    fs_join(target, "file2"),
+                    fs_join(target, "subfile1"),
+                ],
+                recursive=True,
+            )
+            assert fs.ls(target, detail=False) == (
+                [] if supports_empty_directories else [dummy]
+            )
+    def test_copy_list_of_files_to_new_directory(
+        self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
+    ):
+        # Copy scenario 2b
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        source_files = [
+            fs_join(source, "file1"),
+            fs_join(source, "file2"),
+            fs_join(source, "subdir", "subfile1"),
+        ]
+        fs.cp(source_files, fs_join(target, "newdir") + "/")  # Note trailing slash
+        assert fs.isdir(fs_join(target, "newdir"))
+        assert fs.isfile(fs_join(target, "newdir", "file1"))
+        assert fs.isfile(fs_join(target, "newdir", "file2"))
+        assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+    def test_copy_two_files_new_directory(
+        self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
+    ):
+        # This is a duplicate of test_copy_list_of_files_to_new_directory and
+        # can eventually be removed.
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        assert not fs.exists(target)
+        fs.cp([fs_join(source, "file1"), fs_join(source, "file2")], target)
+        assert fs.isdir(target)
+        assert fs.isfile(fs_join(target, "file1"))
+        assert fs.isfile(fs_join(target, "file2"))
+    def test_copy_directory_without_files_with_same_name_prefix(
+        self,
+        fs,
+        fs_join,
+        fs_target,
+        fs_dir_and_file_with_same_name_prefix,
+        supports_empty_directories,
+    ):
+        # Create the test dirs
+        source = fs_dir_and_file_with_same_name_prefix
+        target = fs_target
+        # Test without glob
+        fs.cp(fs_join(source, "subdir"), target, recursive=True)
+        assert fs.isfile(fs_join(target, "subfile.txt"))
+        assert not fs.isfile(fs_join(target, "subdir.txt"))
+        fs.rm([fs_join(target, "subfile.txt")])
+        if supports_empty_directories:
+            assert fs.ls(target) == []
+        else:
+            assert not fs.exists(target)
+        # Test with glob
+        fs.cp(fs_join(source, "subdir*"), target, recursive=True)
+        assert fs.isdir(fs_join(target, "subdir"))
+        assert fs.isfile(fs_join(target, "subdir", "subfile.txt"))
+        assert fs.isfile(fs_join(target, "subdir.txt"))
+    def test_copy_with_source_and_destination_as_list(
+        self, fs, fs_target, fs_join, fs_10_files_with_hashed_names
+    ):
+        # Create the test dir
+        source = fs_10_files_with_hashed_names
+        target = fs_target
+        # Create list of files for source and destination
+        source_files = []
+        destination_files = []
+        for i in range(10):
+            hashed_i = md5(str(i).encode("utf-8")).hexdigest()
+            source_files.append(fs_join(source, f"{hashed_i}.txt"))
+            destination_files.append(fs_join(target, f"{hashed_i}.txt"))
+        # Copy and assert order was kept
+        fs.copy(path1=source_files, path2=destination_files)
+        for i in range(10):
+            file_content = fs.cat(destination_files[i]).decode("utf-8")
+            assert file_content == str(i)

temp_venv/lib/python3.13/site-packages/fsspec/tests/abstract/get.py ADDED Viewed

	@@ -0,0 +1,587 @@

+from hashlib import md5
+from itertools import product
+import pytest
+from fsspec.implementations.local import make_path_posix
+from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
+class AbstractGetTests:
+    def test_get_file_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 1a
+        source = fs_bulk_operations_scenario_0
+        target = local_target
+        local_fs.mkdir(target)
+        assert local_fs.isdir(target)
+        target_file2 = local_join(target, "file2")
+        target_subfile1 = local_join(target, "subfile1")
+        # Copy from source directory
+        fs.get(fs_join(source, "file2"), target)
+        assert local_fs.isfile(target_file2)
+        # Copy from sub directory
+        fs.get(fs_join(source, "subdir", "subfile1"), target)
+        assert local_fs.isfile(target_subfile1)
+        # Remove copied files
+        local_fs.rm([target_file2, target_subfile1])
+        assert not local_fs.exists(target_file2)
+        assert not local_fs.exists(target_subfile1)
+        # Repeat with trailing slash on target
+        fs.get(fs_join(source, "file2"), target + "/")
+        assert local_fs.isdir(target)
+        assert local_fs.isfile(target_file2)
+        fs.get(fs_join(source, "subdir", "subfile1"), target + "/")
+        assert local_fs.isfile(target_subfile1)
+    def test_get_file_to_new_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 1b
+        source = fs_bulk_operations_scenario_0
+        target = local_target
+        local_fs.mkdir(target)
+        fs.get(
+            fs_join(source, "subdir", "subfile1"), local_join(target, "newdir/")
+        )  # Note trailing slash
+        assert local_fs.isdir(target)
+        assert local_fs.isdir(local_join(target, "newdir"))
+        assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
+    def test_get_file_to_file_in_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 1c
+        source = fs_bulk_operations_scenario_0
+        target = local_target
+        local_fs.mkdir(target)
+        fs.get(fs_join(source, "subdir", "subfile1"), local_join(target, "newfile"))
+        assert local_fs.isfile(local_join(target, "newfile"))
+    def test_get_file_to_file_in_new_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 1d
+        source = fs_bulk_operations_scenario_0
+        target = local_target
+        local_fs.mkdir(target)
+        fs.get(
+            fs_join(source, "subdir", "subfile1"),
+            local_join(target, "newdir", "newfile"),
+        )
+        assert local_fs.isdir(local_join(target, "newdir"))
+        assert local_fs.isfile(local_join(target, "newdir", "newfile"))
+    def test_get_directory_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 1e
+        source = fs_bulk_operations_scenario_0
+        target = local_target
+        local_fs.mkdir(target)
+        assert local_fs.isdir(target)
+        for source_slash, target_slash in zip([False, True], [False, True]):
+            s = fs_join(source, "subdir")
+            if source_slash:
+                s += "/"
+            t = target + "/" if target_slash else target
+            # Without recursive does nothing
+            fs.get(s, t)
+            assert local_fs.ls(target) == []
+            # With recursive
+            fs.get(s, t, recursive=True)
+            if source_slash:
+                assert local_fs.isfile(local_join(target, "subfile1"))
+                assert local_fs.isfile(local_join(target, "subfile2"))
+                assert local_fs.isdir(local_join(target, "nesteddir"))
+                assert local_fs.isfile(local_join(target, "nesteddir", "nestedfile"))
+                assert not local_fs.exists(local_join(target, "subdir"))
+                local_fs.rm(
+                    [
+                        local_join(target, "subfile1"),
+                        local_join(target, "subfile2"),
+                        local_join(target, "nesteddir"),
+                    ],
+                    recursive=True,
+                )
+            else:
+                assert local_fs.isdir(local_join(target, "subdir"))
+                assert local_fs.isfile(local_join(target, "subdir", "subfile1"))
+                assert local_fs.isfile(local_join(target, "subdir", "subfile2"))
+                assert local_fs.isdir(local_join(target, "subdir", "nesteddir"))
+                assert local_fs.isfile(
+                    local_join(target, "subdir", "nesteddir", "nestedfile")
+                )
+                local_fs.rm(local_join(target, "subdir"), recursive=True)
+            assert local_fs.ls(target) == []
+            # Limit recursive by maxdepth
+            fs.get(s, t, recursive=True, maxdepth=1)
+            if source_slash:
+                assert local_fs.isfile(local_join(target, "subfile1"))
+                assert local_fs.isfile(local_join(target, "subfile2"))
+                assert not local_fs.exists(local_join(target, "nesteddir"))
+                assert not local_fs.exists(local_join(target, "subdir"))
+                local_fs.rm(
+                    [
+                        local_join(target, "subfile1"),
+                        local_join(target, "subfile2"),
+                    ],
+                    recursive=True,
+                )
+            else:
+                assert local_fs.isdir(local_join(target, "subdir"))
+                assert local_fs.isfile(local_join(target, "subdir", "subfile1"))
+                assert local_fs.isfile(local_join(target, "subdir", "subfile2"))
+                assert not local_fs.exists(local_join(target, "subdir", "nesteddir"))
+                local_fs.rm(local_join(target, "subdir"), recursive=True)
+            assert local_fs.ls(target) == []
+    def test_get_directory_to_new_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 1f
+        source = fs_bulk_operations_scenario_0
+        target = local_target
+        local_fs.mkdir(target)
+        for source_slash, target_slash in zip([False, True], [False, True]):
+            s = fs_join(source, "subdir")
+            if source_slash:
+                s += "/"
+            t = local_join(target, "newdir")
+            if target_slash:
+                t += "/"
+            # Without recursive does nothing
+            fs.get(s, t)
+            assert local_fs.ls(target) == []
+            # With recursive
+            fs.get(s, t, recursive=True)
+            assert local_fs.isdir(local_join(target, "newdir"))
+            assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
+            assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
+            assert local_fs.isdir(local_join(target, "newdir", "nesteddir"))
+            assert local_fs.isfile(
+                local_join(target, "newdir", "nesteddir", "nestedfile")
+            )
+            assert not local_fs.exists(local_join(target, "subdir"))
+            local_fs.rm(local_join(target, "newdir"), recursive=True)
+            assert local_fs.ls(target) == []
+            # Limit recursive by maxdepth
+            fs.get(s, t, recursive=True, maxdepth=1)
+            assert local_fs.isdir(local_join(target, "newdir"))
+            assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
+            assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
+            assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
+            assert not local_fs.exists(local_join(target, "subdir"))
+            local_fs.rm(local_join(target, "newdir"), recursive=True)
+            assert not local_fs.exists(local_join(target, "newdir"))
+    def test_get_glob_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 1g
+        source = fs_bulk_operations_scenario_0
+        target = local_target
+        local_fs.mkdir(target)
+        for target_slash in [False, True]:
+            t = target + "/" if target_slash else target
+            # Without recursive
+            fs.get(fs_join(source, "subdir", "*"), t)
+            assert local_fs.isfile(local_join(target, "subfile1"))
+            assert local_fs.isfile(local_join(target, "subfile2"))
+            assert not local_fs.isdir(local_join(target, "nesteddir"))
+            assert not local_fs.exists(local_join(target, "nesteddir", "nestedfile"))
+            assert not local_fs.exists(local_join(target, "subdir"))
+            local_fs.rm(
+                [
+                    local_join(target, "subfile1"),
+                    local_join(target, "subfile2"),
+                ],
+                recursive=True,
+            )
+            assert local_fs.ls(target) == []
+            # With recursive
+            for glob, recursive in zip(["*", "**"], [True, False]):
+                fs.get(fs_join(source, "subdir", glob), t, recursive=recursive)
+                assert local_fs.isfile(local_join(target, "subfile1"))
+                assert local_fs.isfile(local_join(target, "subfile2"))
+                assert local_fs.isdir(local_join(target, "nesteddir"))
+                assert local_fs.isfile(local_join(target, "nesteddir", "nestedfile"))
+                assert not local_fs.exists(local_join(target, "subdir"))
+                local_fs.rm(
+                    [
+                        local_join(target, "subfile1"),
+                        local_join(target, "subfile2"),
+                        local_join(target, "nesteddir"),
+                    ],
+                    recursive=True,
+                )
+                assert local_fs.ls(target) == []
+                # Limit recursive by maxdepth
+                fs.get(
+                    fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
+                )
+                assert local_fs.isfile(local_join(target, "subfile1"))
+                assert local_fs.isfile(local_join(target, "subfile2"))
+                assert not local_fs.exists(local_join(target, "nesteddir"))
+                assert not local_fs.exists(local_join(target, "subdir"))
+                local_fs.rm(
+                    [
+                        local_join(target, "subfile1"),
+                        local_join(target, "subfile2"),
+                    ],
+                    recursive=True,
+                )
+                assert local_fs.ls(target) == []
+    def test_get_glob_to_new_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 1h
+        source = fs_bulk_operations_scenario_0
+        target = local_target
+        local_fs.mkdir(target)
+        for target_slash in [False, True]:
+            t = fs_join(target, "newdir")
+            if target_slash:
+                t += "/"
+            # Without recursive
+            fs.get(fs_join(source, "subdir", "*"), t)
+            assert local_fs.isdir(local_join(target, "newdir"))
+            assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
+            assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
+            assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
+            assert not local_fs.exists(
+                local_join(target, "newdir", "nesteddir", "nestedfile")
+            )
+            assert not local_fs.exists(local_join(target, "subdir"))
+            assert not local_fs.exists(local_join(target, "newdir", "subdir"))
+            local_fs.rm(local_join(target, "newdir"), recursive=True)
+            assert local_fs.ls(target) == []
+            # With recursive
+            for glob, recursive in zip(["*", "**"], [True, False]):
+                fs.get(fs_join(source, "subdir", glob), t, recursive=recursive)
+                assert local_fs.isdir(local_join(target, "newdir"))
+                assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
+                assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
+                assert local_fs.isdir(local_join(target, "newdir", "nesteddir"))
+                assert local_fs.isfile(
+                    local_join(target, "newdir", "nesteddir", "nestedfile")
+                )
+                assert not local_fs.exists(local_join(target, "subdir"))
+                assert not local_fs.exists(local_join(target, "newdir", "subdir"))
+                local_fs.rm(local_join(target, "newdir"), recursive=True)
+                assert not local_fs.exists(local_join(target, "newdir"))
+                # Limit recursive by maxdepth
+                fs.get(
+                    fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
+                )
+                assert local_fs.isdir(local_join(target, "newdir"))
+                assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
+                assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
+                assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
+                assert not local_fs.exists(local_join(target, "subdir"))
+                assert not local_fs.exists(local_join(target, "newdir", "subdir"))
+                local_fs.rm(local_fs.ls(target, detail=False), recursive=True)
+                assert not local_fs.exists(local_join(target, "newdir"))
+    @pytest.mark.parametrize(
+        GLOB_EDGE_CASES_TESTS["argnames"],
+        GLOB_EDGE_CASES_TESTS["argvalues"],
+    )
+    def test_get_glob_edge_cases(
+        self,
+        path,
+        recursive,
+        maxdepth,
+        expected,
+        fs,
+        fs_join,
+        fs_glob_edge_cases_files,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 1g
+        source = fs_glob_edge_cases_files
+        target = local_target
+        for new_dir, target_slash in product([True, False], [True, False]):
+            local_fs.mkdir(target)
+            t = local_join(target, "newdir") if new_dir else target
+            t = t + "/" if target_slash else t
+            fs.get(fs_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
+            output = local_fs.find(target)
+            if new_dir:
+                prefixed_expected = [
+                    make_path_posix(local_join(target, "newdir", p)) for p in expected
+                ]
+            else:
+                prefixed_expected = [
+                    make_path_posix(local_join(target, p)) for p in expected
+                ]
+            assert sorted(output) == sorted(prefixed_expected)
+            try:
+                local_fs.rm(target, recursive=True)
+            except FileNotFoundError:
+                pass
+    def test_get_list_of_files_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 2a
+        source = fs_bulk_operations_scenario_0
+        target = local_target
+        local_fs.mkdir(target)
+        source_files = [
+            fs_join(source, "file1"),
+            fs_join(source, "file2"),
+            fs_join(source, "subdir", "subfile1"),
+        ]
+        for target_slash in [False, True]:
+            t = target + "/" if target_slash else target
+            fs.get(source_files, t)
+            assert local_fs.isfile(local_join(target, "file1"))
+            assert local_fs.isfile(local_join(target, "file2"))
+            assert local_fs.isfile(local_join(target, "subfile1"))
+            local_fs.rm(
+                [
+                    local_join(target, "file1"),
+                    local_join(target, "file2"),
+                    local_join(target, "subfile1"),
+                ],
+                recursive=True,
+            )
+            assert local_fs.ls(target) == []
+    def test_get_list_of_files_to_new_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 2b
+        source = fs_bulk_operations_scenario_0
+        target = local_target
+        local_fs.mkdir(target)
+        source_files = [
+            fs_join(source, "file1"),
+            fs_join(source, "file2"),
+            fs_join(source, "subdir", "subfile1"),
+        ]
+        fs.get(source_files, local_join(target, "newdir") + "/")  # Note trailing slash
+        assert local_fs.isdir(local_join(target, "newdir"))
+        assert local_fs.isfile(local_join(target, "newdir", "file1"))
+        assert local_fs.isfile(local_join(target, "newdir", "file2"))
+        assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
+    def test_get_directory_recursive(
+        self, fs, fs_join, fs_path, local_fs, local_join, local_target
+    ):
+        # https://github.com/fsspec/filesystem_spec/issues/1062
+        # Recursive cp/get/put of source directory into non-existent target directory.
+        src = fs_join(fs_path, "src")
+        src_file = fs_join(src, "file")
+        fs.mkdir(src)
+        fs.touch(src_file)
+        target = local_target
+        # get without slash
+        assert not local_fs.exists(target)
+        for loop in range(2):
+            fs.get(src, target, recursive=True)
+            assert local_fs.isdir(target)
+            if loop == 0:
+                assert local_fs.isfile(local_join(target, "file"))
+                assert not local_fs.exists(local_join(target, "src"))
+            else:
+                assert local_fs.isfile(local_join(target, "file"))
+                assert local_fs.isdir(local_join(target, "src"))
+                assert local_fs.isfile(local_join(target, "src", "file"))
+        local_fs.rm(target, recursive=True)
+        # get with slash
+        assert not local_fs.exists(target)
+        for loop in range(2):
+            fs.get(src + "/", target, recursive=True)
+            assert local_fs.isdir(target)
+            assert local_fs.isfile(local_join(target, "file"))
+            assert not local_fs.exists(local_join(target, "src"))
+    def test_get_directory_without_files_with_same_name_prefix(
+        self,
+        fs,
+        fs_join,
+        local_fs,
+        local_join,
+        local_target,
+        fs_dir_and_file_with_same_name_prefix,
+    ):
+        # Create the test dirs
+        source = fs_dir_and_file_with_same_name_prefix
+        target = local_target
+        # Test without glob
+        fs.get(fs_join(source, "subdir"), target, recursive=True)
+        assert local_fs.isfile(local_join(target, "subfile.txt"))
+        assert not local_fs.isfile(local_join(target, "subdir.txt"))
+        local_fs.rm([local_join(target, "subfile.txt")])
+        assert local_fs.ls(target) == []
+        # Test with glob
+        fs.get(fs_join(source, "subdir*"), target, recursive=True)
+        assert local_fs.isdir(local_join(target, "subdir"))
+        assert local_fs.isfile(local_join(target, "subdir", "subfile.txt"))
+        assert local_fs.isfile(local_join(target, "subdir.txt"))
+    def test_get_with_source_and_destination_as_list(
+        self,
+        fs,
+        fs_join,
+        local_fs,
+        local_join,
+        local_target,
+        fs_10_files_with_hashed_names,
+    ):
+        # Create the test dir
+        source = fs_10_files_with_hashed_names
+        target = local_target
+        # Create list of files for source and destination
+        source_files = []
+        destination_files = []
+        for i in range(10):
+            hashed_i = md5(str(i).encode("utf-8")).hexdigest()
+            source_files.append(fs_join(source, f"{hashed_i}.txt"))
+            destination_files.append(
+                make_path_posix(local_join(target, f"{hashed_i}.txt"))
+            )
+        # Copy and assert order was kept
+        fs.get(rpath=source_files, lpath=destination_files)
+        for i in range(10):
+            file_content = local_fs.cat(destination_files[i]).decode("utf-8")
+            assert file_content == str(i)

temp_venv/lib/python3.13/site-packages/fsspec/tests/abstract/mv.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import os
+import pytest
+import fsspec
+def test_move_raises_error_with_tmpdir(tmpdir):
+    # Create a file in the temporary directory
+    source = tmpdir.join("source_file.txt")
+    source.write("content")
+    # Define a destination that simulates a protected or invalid path
+    destination = tmpdir.join("non_existent_directory/destination_file.txt")
+    # Instantiate the filesystem (assuming the local file system interface)
+    fs = fsspec.filesystem("file")
+    # Use the actual file paths as string
+    with pytest.raises(FileNotFoundError):
+        fs.mv(str(source), str(destination))
+@pytest.mark.parametrize("recursive", (True, False))
+def test_move_raises_error_with_tmpdir_permission(recursive, tmpdir):
+    # Create a file in the temporary directory
+    source = tmpdir.join("source_file.txt")
+    source.write("content")
+    # Create a protected directory (non-writable)
+    protected_dir = tmpdir.mkdir("protected_directory")
+    protected_path = str(protected_dir)
+    # Set the directory to read-only
+    if os.name == "nt":
+        os.system(f'icacls "{protected_path}" /deny Everyone:(W)')
+    else:
+        os.chmod(protected_path, 0o555)  # Sets the directory to read-only
+    # Define a destination inside the protected directory
+    destination = protected_dir.join("destination_file.txt")
+    # Instantiate the filesystem (assuming the local file system interface)
+    fs = fsspec.filesystem("file")
+    # Try to move the file to the read-only directory, expecting a permission error
+    with pytest.raises(PermissionError):
+        fs.mv(str(source), str(destination), recursive=recursive)
+    # Assert the file was not created in the destination
+    assert not os.path.exists(destination)
+    # Cleanup: Restore permissions so the directory can be cleaned up
+    if os.name == "nt":
+        os.system(f'icacls "{protected_path}" /remove:d Everyone')
+    else:
+        os.chmod(protected_path, 0o755)  # Restore write permission for cleanup

temp_venv/lib/python3.13/site-packages/fsspec/tests/abstract/open.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import pytest
+class AbstractOpenTests:
+    def test_open_exclusive(self, fs, fs_target):
+        with fs.open(fs_target, "wb") as f:
+            f.write(b"data")
+        with fs.open(fs_target, "rb") as f:
+            assert f.read() == b"data"
+        with pytest.raises(FileExistsError):
+            fs.open(fs_target, "xb")