ragV98's picture
switching to openai
2af85a2
raw
history blame
4.65 kB
import os
import json
import redis
from typing import List, Dict
from openai import OpenAI
from components.indexers.news_indexer import get_upstash_vector_store
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.core.query_engine import RetrieverQueryEngine
# πŸ” Environment variables
REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN")
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
# βœ… Redis client
try:
redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)
except Exception as e:
print("❌ [Redis Init Error]", e)
raise
# πŸ“° Topic list
TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]
TOPIC_KEYS = [t.lower().replace(" news", "") for t in TOPICS]
# 🧠 Summarization prompt
BASE_PROMPT = (
"You are Nuse’s editorial summarizer. Read the excerpts below and extract the most important stories. "
"Return up to 3 punchy headlines, each under 20 words, written like a premium editorial bulletin."
)
# πŸ“₯ Load topic-wise documents from Upstash vector store
def load_documents_by_topic() -> Dict[str, List[str]]:
try:
vector_store = get_upstash_vector_store()
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex([], storage_context=storage_context)
retriever = index.as_retriever(similarity_top_k=10)
query_engine = RetrieverQueryEngine(retriever=retriever)
topic_docs = {}
for topic, key in zip(TOPICS, TOPIC_KEYS):
try:
response = query_engine.query(topic)
doc_texts = [str(node.get_content()).strip() for node in response.source_nodes if node.get_content()]
topic_docs[key] = doc_texts
except Exception as e:
print(f"❌ [Topic Retrieval Error: {key}]", e)
topic_docs[key] = []
return topic_docs
except Exception as e:
print("❌ [load_documents_by_topic Error]", e)
return {}
# πŸ§ͺ Summarize one topic at a time using OpenAI GPT-4
def summarize_topic(topic_key: str, docs: List[str]) -> List[Dict]:
if not docs:
print(f"⚠️ No docs found for topic: {topic_key}")
return []
try:
client = OpenAI(api_key=OPENAI_API_KEY)
content = "\n\n---\n\n".join(docs)[:12000] # trim to avoid token overflow
print(f"🧠 Summarizing topic via OpenAI: {topic_key}")
completion = client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": BASE_PROMPT},
{"role": "user", "content": content},
],
max_tokens=512,
)
text = completion.choices[0].message.content.strip()
summaries = []
for line in text.splitlines():
line = line.strip("-–‒ ")
if line:
summaries.append({
"summary": line,
"image_url": "https://source.unsplash.com/800x600/?news",
"article_link": f"https://google.com/search?q={topic_key}+news"
})
return summaries
except Exception as e:
print(f"❌ [OpenAI Summarization Error: {topic_key}]", e)
return []
# πŸš€ Main callable
def generate_and_cache_daily_feed():
try:
print("πŸ†• Running OpenAI-powered daily feed generator...")
topic_docs = load_documents_by_topic()
feed_map = {}
for topic_key in TOPIC_KEYS:
try:
summaries = summarize_topic(topic_key, topic_docs.get(topic_key, []))
feed_map[topic_key] = summaries
except Exception as e:
print(f"❌ [Topic Loop Error: {topic_key}]", e)
feed_map[topic_key] = []
final_feed = [{"topic": topic, "feed": feed_map[topic]} for topic in TOPIC_KEYS]
try:
redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False))
print(f"βœ… Cached daily feed under key '{REDIS_KEY}'")
except Exception as e:
print("❌ [Redis Cache Error]", e)
return final_feed
except Exception as e:
print("❌ [generate_and_cache_daily_feed Error]", e)
return []
# πŸ“¦ Get cached data
def get_cached_daily_feed():
try:
cached = redis_client.get(REDIS_KEY)
return json.loads(cached) if cached else []
except Exception as e:
print("❌ [get_cached_daily_feed Error]", e)
return []