Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	File size: 1,434 Bytes
			
			| 32b7e17 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | # MediaWiki Import Configuration
# Database settings
database:
  sqlite_path: './Databases/media_summary.db'
  chroma_db_path: 'chroma_db'
# Chunking options
chunking:
  default_method: 'sentences'
  default_size: 1000
  default_overlap: 100
  adaptive: true
  language: 'en'
  methods:
    - 'sentences'
    - 'words'
    - 'paragraphs'
    - 'tokens'
# Import settings
import:
  batch_size: 1000  # Number of pages to process in a single batch
  default_skip_redirects: true
  default_namespaces: [0]  # Main namespace by default
  single_item_default: false
# Processing options
processing:
  max_workers: 4  # Number of worker threads for async processing
# Embedding settings
embeddings:
  provider: 'openai'  # or 'local' or 'huggingface'
  model: 'text-embedding-ada-002'
  api_key: 'your_openai_api_key_here'  # Remove if using local embeddings
  local_url: 'http://localhost:8080/embeddings'  # Only for local embeddings
# ChromaDB settings
chromadb:
  collection_prefix: 'mediawiki_'
# Logging settings
logging:
  level: 'INFO'
  file: 'mediawiki_import.log'
# Checkpoint settings
checkpoints:
  enabled: true
  directory: 'import_checkpoints'
# Error handling
error_handling:
  max_retries: 3
  retry_delay: 5  # seconds
# User interface settings
ui:
  default_chunk_size: 1000
  min_chunk_size: 100
  max_chunk_size: 2000
  default_chunk_overlap: 100 |