ragV98's picture
revert to by topic
3f4bef7
raw
history blame
5.01 kB
import os
import json
import redis
from typing import List, Dict
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.llms.base import LLM, LLMMetadata
from llama_index.core.llms import CompletionResponse
from llama_index.core.settings import Settings
from components.LLMs.Mistral import call_mistral
from components.indexers.news_indexer import get_upstash_vector_store
# βœ… Dummy LLM config to allow higher context window in LlamaIndex
class DummyLLM(LLM):
def complete(self, prompt: str, **kwargs) -> CompletionResponse:
return CompletionResponse(text="")
@property
def metadata(self) -> LLMMetadata:
return LLMMetadata(
context_window=8192,
num_output=1024,
is_chat_model=False
)
Settings.llm = DummyLLM()
# πŸ” Environment variables
REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN")
INDEX_DIR = os.environ.get("INDEX_DIR", "storage/index")
# βœ… Redis client
redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)
# πŸ“° Topic list
TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]
TOPIC_KEYS = [t.lower().replace(" news", "") for t in TOPICS]
# 🧠 Summarization prompt
BASE_PROMPT = (
"You are Nuse’s official news summarizer β€” fast, sharp, and never generic.\n"
"Your task is to read the following **collection of news excerpts** and extract the most important stories.\n"
"\n"
"For each distinct news item you find, write a punchy summary β€” exactly one line, no more than 20 words. Aim for 15–20 words per summary.\n"
"\n"
"Formatting rules:\n"
"- Each summary must begin with a dash (-)\n"
"- Do **not** number the summaries\n"
"- Do **not** include emojis or hashtags\n"
"- Do **not** add the source name or publication\n"
"\n"
"If a person is mentioned, include their designation in brackets. Examples:\n"
"- Jeff Bezos (Amazon founder)\n"
"- Narendra Modi (Prime Minister of India)\n"
"- NATO Chief Jens Stoltenberg\n"
"\n"
"βœ… Good examples:\n"
"- India stuns Australia in last-ball World Cup thriller, secures spot in finals\n"
"- U.S. imposes tariffs on Chinese tech giants, shaking global investor confidence\n"
"- Ceasefire breakthrough as Netanyahu (Israeli PM) relents under diplomatic pressure\n"
"\n"
"❌ Avoid:\n"
"- Source mentions like (The New York Times), (Reuters)\n"
"- Introductory fluff or meta comments\n"
"- Repeating prompt instructions or context\n"
"\n"
"You are generating sharp, editorial-style headlines. Only output the summaries. Nothing else."
)
# πŸ“₯ Load topic-wise documents from Upstash vector store
def load_documents_by_topic() -> Dict[str, List[str]]:
vector_store = get_upstash_vector_store()
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex([], storage_context=storage_context)
retriever = index.as_retriever(similarity_top_k=10)
query_engine = RetrieverQueryEngine(retriever=retriever)
topic_docs = {}
for topic, key in zip(TOPICS, TOPIC_KEYS):
response = query_engine.query(topic)
doc_texts = [str(node.get_content()).strip() for node in response.source_nodes if node.get_content()]
topic_docs[key] = doc_texts
return topic_docs
# πŸ§ͺ Summarize one topic at a time
def summarize_topic(topic_key: str, docs: List[str]) -> List[Dict]:
if not docs:
return []
merged_text = "\n\n---\n\n".join(docs)
print(f"🧠 Summarizing topic: {topic_key}")
summary_block = call_mistral(base_prompt=BASE_PROMPT, tail_prompt=merged_text)
summaries = []
if summary_block:
for line in summary_block.splitlines():
line = line.strip()
if line.startswith("-"):
clean = line.lstrip("-–").strip()
if clean:
summaries.append({
"summary": clean,
"image_url": "https://source.unsplash.com/800x600/?news",
"article_link": f"https://google.com/search?q={topic_key}+news"
})
return summaries
# πŸš€ Main callable
def generate_and_cache_daily_feed():
topic_docs = load_documents_by_topic()
feed_map = {}
for topic_key in TOPIC_KEYS:
summaries = summarize_topic(topic_key, topic_docs.get(topic_key, []))
feed_map[topic_key] = summaries
final_feed = [{"topic": topic, "feed": feed_map[topic]} for topic in TOPIC_KEYS]
redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False))
print(f"βœ… Cached daily feed under key '{REDIS_KEY}'")
return final_feed
# πŸ“¦ Get cached data
def get_cached_daily_feed():
cached = redis_client.get(REDIS_KEY)
return json.loads(cached) if cached else []