import requests import logging from typing import Iterator, List, Union from langchain_core.document_loaders import BaseLoader from langchain_core.documents import Document from open_webui.env import SRC_LOG_LEVELS log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["RAG"]) class ExternalDocumentLoader(BaseLoader): def __init__( self, file_path, url: str, api_key: str, mime_type=None, **kwargs, ) -> None: self.url = url self.api_key = api_key self.file_path = file_path self.mime_type = mime_type def load(self) -> list[Document]: with open(self.file_path, "rb") as f: data = f.read() headers = {} if self.mime_type is not None: headers["Content-Type"] = self.mime_type if self.api_key is not None: headers["Authorization"] = f"Bearer {self.api_key}" url = self.url if url.endswith("/"): url = url[:-1] r = requests.put(f"{url}/process", data=data, headers=headers) if r.ok: res = r.json() if res: return [ Document( page_content=res.get("page_content"), metadata=res.get("metadata"), ) ] else: raise Exception("Error loading document: No content returned") else: raise Exception(f"Error loading document: {r.status_code} {r.text}")