tommymarto's picture
first attempt to hf spaces
7f7b773
raw
history blame contribute delete
606 Bytes
from typing import Iterator
from langchain.docstore.document import Document
from langchain.document_loaders.parsers import GrobidParser
from langchain.document_loaders.generic import GenericLoader
class GrobidLoader:
def __init__(self, grobid_parser: GrobidParser):
self.parser = grobid_parser
def load_documents(self, root: str) -> Iterator[Document]:
loader = GenericLoader.from_filesystem(
root,
glob="**/*.pdf",
show_progress=True,
suffixes=[".pdf"],
parser=self.parser,
)
return loader.lazy_load()