Spaces:
Sleeping
Sleeping
| # Suppress warnings - must be before any imports | |
| import os | |
| os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' | |
| os.environ['TOKENIZERS_PARALLELISM'] = 'false' | |
| os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0' | |
| import warnings | |
| import logging | |
| # Suppress all warnings | |
| warnings.filterwarnings('ignore') | |
| # Specific suppressions | |
| warnings.filterwarnings('ignore', category=UserWarning) | |
| warnings.filterwarnings('ignore', category=DeprecationWarning) | |
| warnings.filterwarnings('ignore', category=FutureWarning) | |
| warnings.filterwarnings('ignore', message='.*benefit from vacuuming.*') | |
| warnings.filterwarnings('ignore', message='.*sparse_softmax_cross_entropy.*') | |
| # Suppress all logging | |
| logging.getLogger().setLevel(logging.ERROR) | |
| # Suppress TensorFlow logging | |
| logging.getLogger('tensorflow').setLevel(logging.ERROR) | |
| os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' | |
| import json | |
| from typing import Dict, List | |
| import chromadb | |
| from chromadb.utils import embedding_functions | |
| def clean_text(text: str) -> str: | |
| """Clean the text by removing extra spaces and formatting.""" | |
| # Remove multiple spaces | |
| text = ' '.join(text.split()) | |
| # Remove unnecessary line breaks | |
| text = text.replace('\n', ' ') | |
| # Remove text before first complete sentence | |
| if '.' in text: | |
| # Split by period and remove any incomplete sentence at start | |
| sentences = text.split('.') | |
| # Remove first part if it seems like a partial sentence | |
| if len(sentences) > 1: # Only if there are multiple sentences | |
| sentences = sentences[1:] # Remove first part | |
| text = '.'.join(sentences) | |
| text = text.strip() # Remove leading/trailing whitespace | |
| if text: # Add period back if text is not empty | |
| text += '.' | |
| return text | |
| def get_answer_from_osho(question: str, n_results: int = 5) -> Dict: | |
| """ | |
| Get answer from Osho's books based on the question. | |
| Args: | |
| question (str): The question to ask | |
| n_results (int): Number of relevant passages to return | |
| Returns: | |
| Dict: A dictionary containing the question and formatted answer with sources | |
| """ | |
| # Initialize ChromaDB client | |
| db_dir = os.path.join(os.getcwd(), "vector_db") | |
| if not os.path.exists(db_dir): | |
| # If local path doesn't exist, download from Hugging Face | |
| from huggingface_hub import snapshot_download | |
| db_dir = snapshot_download(repo_id="harithapliyal/osho-vector-db") | |
| client = chromadb.PersistentClient(path=db_dir) | |
| # Initialize embedding function | |
| embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction( | |
| model_name="all-MiniLM-L6-v2" | |
| ) | |
| # Get the collection | |
| collection = client.get_collection( | |
| name="osho_books", | |
| embedding_function=embedding_function | |
| ) | |
| # Query the collection | |
| results = collection.query( | |
| query_texts=[question], | |
| n_results=n_results | |
| ) | |
| # Format the answer | |
| answer_parts = [] | |
| for i, (doc, metadata) in enumerate(zip(results['documents'][0], results['metadatas'][0])): | |
| answer_part = { | |
| "passage_number": i + 1, | |
| "book": metadata['book'], | |
| "text": clean_text(doc.strip()) | |
| } | |
| answer_parts.append(answer_part) | |
| # Create the response | |
| response = { | |
| "question": question, | |
| "answer_passages": answer_parts, | |
| "total_passages": len(answer_parts) | |
| } | |
| return response | |
| def save_qa_to_file(qa_response: Dict, output_file: str = None): | |
| """ | |
| Save the Q&A response to a JSON file. | |
| Args: | |
| qa_response (Dict): The Q&A response to save | |
| output_file (str): Optional output file path. If None, generates a filename | |
| """ | |
| if output_file is None: | |
| # Create answers directory if it doesn't exist | |
| answers_dir = os.path.join(os.getcwd(), "answers") | |
| os.makedirs(answers_dir, exist_ok=True) | |
| # Generate filename from question | |
| filename = f"answer_{qa_response['question'][:30].lower().replace(' ', '_')}.json" | |
| output_file = os.path.join(answers_dir, filename) | |
| # Save to file | |
| with open(output_file, 'w', encoding='utf-8') as f: | |
| json.dump(qa_response, f, ensure_ascii=False, indent=2) | |
| return output_file | |
| if __name__ == "__main__": | |
| # Example usage | |
| question = "What is the nature of consciousness?" | |
| # Get answer | |
| response = get_answer_from_osho(question) | |
| # Save to file | |
| output_file = save_qa_to_file(response) | |
| # Print the response | |
| print(f"\nQuestion: {response['question']}\n") | |
| for passage in response['answer_passages']: | |
| print(f"\nPassage {passage['passage_number']}:") | |
| print(f"Book: {passage['book']}") | |
| print(f"Text: {passage['text'][:200]}...") | |
| print("-" * 80) | |
| print(f"\nResponse saved to: {output_file}") | |