Spaces:
Sleeping
Sleeping
# Suppress warnings - must be before any imports | |
import os | |
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' | |
os.environ['TOKENIZERS_PARALLELISM'] = 'false' | |
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0' | |
import warnings | |
import logging | |
# Suppress all warnings | |
warnings.filterwarnings('ignore') | |
# Specific suppressions | |
warnings.filterwarnings('ignore', category=UserWarning) | |
warnings.filterwarnings('ignore', category=DeprecationWarning) | |
warnings.filterwarnings('ignore', category=FutureWarning) | |
warnings.filterwarnings('ignore', message='.*benefit from vacuuming.*') | |
warnings.filterwarnings('ignore', message='.*sparse_softmax_cross_entropy.*') | |
# Suppress all logging | |
logging.getLogger().setLevel(logging.ERROR) | |
# Suppress TensorFlow logging | |
logging.getLogger('tensorflow').setLevel(logging.ERROR) | |
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' | |
import json | |
from typing import Dict, List | |
import chromadb | |
from chromadb.utils import embedding_functions | |
def clean_text(text: str) -> str: | |
"""Clean the text by removing extra spaces and formatting.""" | |
# Remove multiple spaces | |
text = ' '.join(text.split()) | |
# Remove unnecessary line breaks | |
text = text.replace('\n', ' ') | |
# Remove text before first complete sentence | |
if '.' in text: | |
# Split by period and remove any incomplete sentence at start | |
sentences = text.split('.') | |
# Remove first part if it seems like a partial sentence | |
if len(sentences) > 1: # Only if there are multiple sentences | |
sentences = sentences[1:] # Remove first part | |
text = '.'.join(sentences) | |
text = text.strip() # Remove leading/trailing whitespace | |
if text: # Add period back if text is not empty | |
text += '.' | |
return text | |
def get_answer_from_osho(question: str, n_results: int = 5) -> Dict: | |
""" | |
Get answer from Osho's books based on the question. | |
Args: | |
question (str): The question to ask | |
n_results (int): Number of relevant passages to return | |
Returns: | |
Dict: A dictionary containing the question and formatted answer with sources | |
""" | |
# Initialize ChromaDB client | |
db_dir = os.path.join(os.getcwd(), "vector_db") | |
if not os.path.exists(db_dir): | |
# If local path doesn't exist, download from Hugging Face | |
from huggingface_hub import snapshot_download | |
db_dir = snapshot_download(repo_id="harithapliyal/osho-vector-db") | |
client = chromadb.PersistentClient(path=db_dir) | |
# Initialize embedding function | |
embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction( | |
model_name="all-MiniLM-L6-v2" | |
) | |
# Get the collection | |
collection = client.get_collection( | |
name="osho_books", | |
embedding_function=embedding_function | |
) | |
# Query the collection | |
results = collection.query( | |
query_texts=[question], | |
n_results=n_results | |
) | |
# Format the answer | |
answer_parts = [] | |
for i, (doc, metadata) in enumerate(zip(results['documents'][0], results['metadatas'][0])): | |
answer_part = { | |
"passage_number": i + 1, | |
"book": metadata['book'], | |
"text": clean_text(doc.strip()) | |
} | |
answer_parts.append(answer_part) | |
# Create the response | |
response = { | |
"question": question, | |
"answer_passages": answer_parts, | |
"total_passages": len(answer_parts) | |
} | |
return response | |
def save_qa_to_file(qa_response: Dict, output_file: str = None): | |
""" | |
Save the Q&A response to a JSON file. | |
Args: | |
qa_response (Dict): The Q&A response to save | |
output_file (str): Optional output file path. If None, generates a filename | |
""" | |
if output_file is None: | |
# Create answers directory if it doesn't exist | |
answers_dir = os.path.join(os.getcwd(), "answers") | |
os.makedirs(answers_dir, exist_ok=True) | |
# Generate filename from question | |
filename = f"answer_{qa_response['question'][:30].lower().replace(' ', '_')}.json" | |
output_file = os.path.join(answers_dir, filename) | |
# Save to file | |
with open(output_file, 'w', encoding='utf-8') as f: | |
json.dump(qa_response, f, ensure_ascii=False, indent=2) | |
return output_file | |
if __name__ == "__main__": | |
# Example usage | |
question = "What is the nature of consciousness?" | |
# Get answer | |
response = get_answer_from_osho(question) | |
# Save to file | |
output_file = save_qa_to_file(response) | |
# Print the response | |
print(f"\nQuestion: {response['question']}\n") | |
for passage in response['answer_passages']: | |
print(f"\nPassage {passage['passage_number']}:") | |
print(f"Book: {passage['book']}") | |
print(f"Text: {passage['text'][:200]}...") | |
print("-" * 80) | |
print(f"\nResponse saved to: {output_file}") | |