Spaces:
Runtime error
Runtime error
| from datasets import load_dataset | |
| from datasets import Dataset | |
| from langchain.docstore.document import Document as LangchainDocument | |
| from sentence_transformers import SentenceTransformer | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader | |
| from sentence_transformers import SentenceTransformer | |
| from huggingface_hub import Repository, upload_file | |
| from datasets import Dataset | |
| import os | |
| HF_TOKEN = os.getenv('HF_Token') | |
| url = "https://oxyjon.com/blog/" | |
| loader = WebBaseLoader(url) | |
| document = loader.load() | |
| def create_vector_db(): | |
| # split the document into chunks | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=256, chunk_overlap=50) | |
| texts = text_splitter.split_documents(document) | |
| print(texts[1]) | |
| print(texts[3]) | |
| print(texts[17]) | |
| df = pd.DataFrame(texts) | |
| column_headers = list(df.columns.values) | |
| print(column_headers) | |
| pd.options.display.max_colwidth = 300 | |
| print(df.iloc[[3]]) | |
| dataset = Dataset.from_pandas(df) | |
| print("check2b") | |
| print(dataset[3]) | |
| dataset.push_to_hub("Namitg02/Test",token = HF_TOKEN) | |
| if __name__ == "__main__": | |
| print("check31") | |
| create_vector_db() |