BioChat2 / bulk_loader_script.py
AYS11231's picture
Upload folder using huggingface_hub
839e960 verified
#!/usr/bin/env python3
"""
Simple bulk loader for raw text summaries and reports
Just drop your .txt files in a folder and run this script
"""
from app import Me
import os
def main():
# Initialize the RAG system
me = Me()
print("=== Simple RAG Text Loader ===\n")
print("ℹ️ Note: All files in me/ directory are automatically loaded on startup!")
print(" Just add .txt, .pdf, or .md files to me/ and restart the app.\n")
# Method 1: Load a single text file/summary/report
single_file = "data/summary.txt"
if os.path.exists(single_file):
print(f"Loading single file: {single_file}")
with open(single_file, 'r', encoding='utf-8') as f:
content = f.read()
me.bulk_load_text_content(content, "summary_report")
# Method 2: Load all .txt files from a directory
text_directory = "data/reports"
if os.path.exists(text_directory):
print(f"Loading all text files from: {text_directory}")
me.load_directory(text_directory)
# Method 3: Load specific files
specific_files = [
"data/project_summary.txt",
"data/technical_report.txt",
"data/meeting_notes.txt"
]
existing_files = [f for f in specific_files if os.path.exists(f)]
if existing_files:
print(f"Loading {len(existing_files)} specific files...")
me.load_text_files(existing_files)
# Method 4: Load raw text directly (for testing)
sample_text = """
Alexandre completed a major project involving AI implementation
for a Fortune 500 company. The project improved efficiency by 40%
and was delivered 2 weeks ahead of schedule. Technologies used
included Python, TensorFlow, and cloud deployment on AWS.
"""
print("Loading sample text content...")
me.bulk_load_text_content(sample_text, "sample_project_info")
# Method 5: Reload me/ directory if you added new files
print("\n💡 If you added new files to me/, you can reload them:")
print(" me.reload_me_directory()")
# Show final stats
print("\n=== Knowledge Base Stats ===")
me.get_knowledge_stats()
print("\n✅ Raw text loading completed!")
print("Your RAG system now has the text content available for chat.")
if __name__ == "__main__":
main()