Spaces:

harithapliyal
/

ask-osho

Sleeping

App Files Files Community

ask-osho / osho_qa_service.py

harithapliyal

Update Hugging Face username in vector db loading

7e1608f 11 months ago

raw

history blame contribute delete

4.9 kB

	# Suppress warnings - must be before any imports
	import os
	os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
	os.environ['TOKENIZERS_PARALLELISM'] = 'false'
	os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'

	import warnings
	import logging
	# Suppress all warnings
	warnings.filterwarnings('ignore')
	# Specific suppressions
	warnings.filterwarnings('ignore', category=UserWarning)
	warnings.filterwarnings('ignore', category=DeprecationWarning)
	warnings.filterwarnings('ignore', category=FutureWarning)
	warnings.filterwarnings('ignore', message='.benefit from vacuuming.')
	warnings.filterwarnings('ignore', message='.sparse_softmax_cross_entropy.')

	# Suppress all logging
	logging.getLogger().setLevel(logging.ERROR)
	# Suppress TensorFlow logging
	logging.getLogger('tensorflow').setLevel(logging.ERROR)
	os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

	import json
	from typing import Dict, List
	import chromadb
	from chromadb.utils import embedding_functions

	def clean_text(text: str) -> str:
	"""Clean the text by removing extra spaces and formatting."""
	# Remove multiple spaces
	text = ' '.join(text.split())
	# Remove unnecessary line breaks
	text = text.replace('\n', ' ')

	# Remove text before first complete sentence
	if '.' in text:
	# Split by period and remove any incomplete sentence at start
	sentences = text.split('.')
	# Remove first part if it seems like a partial sentence
	if len(sentences) > 1: # Only if there are multiple sentences
	sentences = sentences[1:] # Remove first part
	text = '.'.join(sentences)
	text = text.strip() # Remove leading/trailing whitespace
	if text: # Add period back if text is not empty
	text += '.'

	return text

	def get_answer_from_osho(question: str, n_results: int = 5) -> Dict:
	"""
	Get answer from Osho's books based on the question.

	Args:
	question (str): The question to ask
	n_results (int): Number of relevant passages to return

	Returns:
	Dict: A dictionary containing the question and formatted answer with sources
	"""
	# Initialize ChromaDB client
	db_dir = os.path.join(os.getcwd(), "vector_db")
	if not os.path.exists(db_dir):
	# If local path doesn't exist, download from Hugging Face
	from huggingface_hub import snapshot_download
	db_dir = snapshot_download(repo_id="harithapliyal/osho-vector-db")
	client = chromadb.PersistentClient(path=db_dir)

	# Initialize embedding function
	embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(
	model_name="all-MiniLM-L6-v2"
	)

	# Get the collection
	collection = client.get_collection(
	name="osho_books",
	embedding_function=embedding_function
	)

	# Query the collection
	results = collection.query(
	query_texts=[question],
	n_results=n_results
	)

	# Format the answer
	answer_parts = []
	for i, (doc, metadata) in enumerate(zip(results['documents'][0], results['metadatas'][0])):
	answer_part = {
	"passage_number": i + 1,
	"book": metadata['book'],
	"text": clean_text(doc.strip())
	}
	answer_parts.append(answer_part)

	# Create the response
	response = {
	"question": question,
	"answer_passages": answer_parts,
	"total_passages": len(answer_parts)
	}

	return response

	def save_qa_to_file(qa_response: Dict, output_file: str = None):
	"""
	Save the Q&A response to a JSON file.

	Args:
	qa_response (Dict): The Q&A response to save
	output_file (str): Optional output file path. If None, generates a filename
	"""
	if output_file is None:
	# Create answers directory if it doesn't exist
	answers_dir = os.path.join(os.getcwd(), "answers")
	os.makedirs(answers_dir, exist_ok=True)

	# Generate filename from question
	filename = f"answer_{qa_response['question'][:30].lower().replace(' ', '_')}.json"
	output_file = os.path.join(answers_dir, filename)

	# Save to file
	with open(output_file, 'w', encoding='utf-8') as f:
	json.dump(qa_response, f, ensure_ascii=False, indent=2)

	return output_file

	if __name__ == "__main__":
	# Example usage
	question = "What is the nature of consciousness?"

	# Get answer
	response = get_answer_from_osho(question)

	# Save to file
	output_file = save_qa_to_file(response)

	# Print the response
	print(f"\nQuestion: {response['question']}\n")
	for passage in response['answer_passages']:
	print(f"\nPassage {passage['passage_number']}:")
	print(f"Book: {passage['book']}")
	print(f"Text: {passage['text'][:200]}...")
	print("-" * 80)

	print(f"\nResponse saved to: {output_file}")