Spaces:
Runtime error
Runtime error
| # MediaWiki Import Configuration | |
| # Database settings | |
| database: | |
| sqlite_path: './Databases/media_summary.db' | |
| chroma_db_path: 'chroma_db' | |
| # Chunking options | |
| chunking: | |
| default_method: 'sentences' | |
| default_size: 1000 | |
| default_overlap: 100 | |
| adaptive: true | |
| language: 'en' | |
| methods: | |
| - 'sentences' | |
| - 'words' | |
| - 'paragraphs' | |
| - 'tokens' | |
| # Import settings | |
| import: | |
| batch_size: 1000 # Number of pages to process in a single batch | |
| default_skip_redirects: true | |
| default_namespaces: [0] # Main namespace by default | |
| single_item_default: false | |
| # Processing options | |
| processing: | |
| max_workers: 4 # Number of worker threads for async processing | |
| # Embedding settings | |
| embeddings: | |
| provider: 'openai' # or 'local' or 'huggingface' | |
| model: 'text-embedding-ada-002' | |
| api_key: 'your_openai_api_key_here' # Remove if using local embeddings | |
| local_url: 'http://localhost:8080/embeddings' # Only for local embeddings | |
| # ChromaDB settings | |
| chromadb: | |
| collection_prefix: 'mediawiki_' | |
| # Logging settings | |
| logging: | |
| level: 'INFO' | |
| file: 'mediawiki_import.log' | |
| # Checkpoint settings | |
| checkpoints: | |
| enabled: true | |
| directory: 'import_checkpoints' | |
| # Error handling | |
| error_handling: | |
| max_retries: 3 | |
| retry_delay: 5 # seconds | |
| # User interface settings | |
| ui: | |
| default_chunk_size: 1000 | |
| min_chunk_size: 100 | |
| max_chunk_size: 2000 | |
| default_chunk_overlap: 100 |