import os import json import redis from typing import List, Dict from llama_index.core import VectorStoreIndex, StorageContext from llama_index.core.query_engine import RetrieverQueryEngine from llama_index.llms.base import LLM, LLMMetadata from llama_index.core.llms import CompletionResponse from llama_index.core.settings import Settings from components.LLMs.Mistral import call_mistral from components.indexers.news_indexer import get_upstash_vector_store # โœ… Dummy LLM config to allow higher context window in LlamaIndex class DummyLLM(LLM): def complete(self, prompt: str, **kwargs) -> CompletionResponse: return CompletionResponse(text="") @property def metadata(self) -> LLMMetadata: return LLMMetadata( context_window=8192, num_output=1024, is_chat_model=False ) Settings.llm = DummyLLM() # ๐Ÿ” Environment variables REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379") REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN") INDEX_DIR = os.environ.get("INDEX_DIR", "storage/index") # โœ… Redis client redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True) # ๐Ÿ“ฐ Topic list TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"] TOPIC_KEYS = [t.lower().replace(" news", "") for t in TOPICS] # ๐Ÿง  Summarization prompt BASE_PROMPT = ( "You are Nuseโ€™s official news summarizer โ€” fast, sharp, and never generic.\n" "Your task is to read the following **collection of news excerpts** and extract the most important stories.\n" "\n" "For each distinct news item you find, write a punchy summary โ€” exactly one line, no more than 20 words. Aim for 15โ€“20 words per summary.\n" "\n" "Formatting rules:\n" "- Each summary must begin with a dash (-)\n" "- Do **not** number the summaries\n" "- Do **not** include emojis or hashtags\n" "- Do **not** add the source name or publication\n" "\n" "If a person is mentioned, include their designation in brackets. Examples:\n" "- Jeff Bezos (Amazon founder)\n" "- Narendra Modi (Prime Minister of India)\n" "- NATO Chief Jens Stoltenberg\n" "\n" "โœ… Good examples:\n" "- India stuns Australia in last-ball World Cup thriller, secures spot in finals\n" "- U.S. imposes tariffs on Chinese tech giants, shaking global investor confidence\n" "- Ceasefire breakthrough as Netanyahu (Israeli PM) relents under diplomatic pressure\n" "\n" "โŒ Avoid:\n" "- Source mentions like (The New York Times), (Reuters)\n" "- Introductory fluff or meta comments\n" "- Repeating prompt instructions or context\n" "\n" "You are generating sharp, editorial-style headlines. Only output the summaries. Nothing else." ) # ๐Ÿ“ฅ Load topic-wise documents from Upstash vector store def load_documents_by_topic() -> Dict[str, List[str]]: vector_store = get_upstash_vector_store() storage_context = StorageContext.from_defaults(vector_store=vector_store) index = VectorStoreIndex([], storage_context=storage_context) retriever = index.as_retriever(similarity_top_k=10) query_engine = RetrieverQueryEngine(retriever=retriever) topic_docs = {} for topic, key in zip(TOPICS, TOPIC_KEYS): response = query_engine.query(topic) doc_texts = [str(node.get_content()).strip() for node in response.source_nodes if node.get_content()] topic_docs[key] = doc_texts return topic_docs # ๐Ÿงช Summarize one topic at a time def summarize_topic(topic_key: str, docs: List[str]) -> List[Dict]: if not docs: return [] merged_text = "\n\n---\n\n".join(docs) print(f"๐Ÿง  Summarizing topic: {topic_key}") summary_block = call_mistral(base_prompt=BASE_PROMPT, tail_prompt=merged_text) summaries = [] if summary_block: for line in summary_block.splitlines(): line = line.strip() if line.startswith("-"): clean = line.lstrip("-โ€“").strip() if clean: summaries.append({ "summary": clean, "image_url": "https://source.unsplash.com/800x600/?news", "article_link": f"https://google.com/search?q={topic_key}+news" }) return summaries # ๐Ÿš€ Main callable def generate_and_cache_daily_feed(): topic_docs = load_documents_by_topic() feed_map = {} for topic_key in TOPIC_KEYS: summaries = summarize_topic(topic_key, topic_docs.get(topic_key, [])) feed_map[topic_key] = summaries final_feed = [{"topic": topic, "feed": feed_map[topic]} for topic in TOPIC_KEYS] redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False)) print(f"โœ… Cached daily feed under key '{REDIS_KEY}'") return final_feed # ๐Ÿ“ฆ Get cached data def get_cached_daily_feed(): cached = redis_client.get(REDIS_KEY) return json.loads(cached) if cached else []