import os import json import redis from typing import List, Dict from llama_index.core.schema import Document from components.LLMs.Mistral import call_mistral # ๐Ÿ” Environment variables REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379") REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN") # โœ… Redis client redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True) # ๐Ÿ“ฐ Topic list TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"] # ๐Ÿ”ง Flattened topic keys for JSON output TOPIC_KEYS = [t.lower().replace(" news", "") for t in TOPICS] # ๐Ÿง  Summarization prompt BASE_PROMPT = ( "You are Nuseโ€™s official news summarizer โ€” fast, sharp, and never generic.\n" "Your task is to read the following **collection of news excerpts** and extract the most important stories.\n" "\n" "For each distinct news item you find, write a punchy summary โ€” exactly one line, no more than 20 words. Aim for 15โ€“20 words per summary.\n" "\n" "Formatting rules:\n" "- Each summary must begin with a dash (-)\n" "- Do **not** number the summaries\n" "- Do **not** include emojis or hashtags\n" "- Do **not** add the source name or publication\n" "\n" "If a person is mentioned, include their designation in brackets. Examples:\n" "- Jeff Bezos (Amazon founder)\n" "- Narendra Modi (Prime Minister of India)\n" "- NATO Chief Jens Stoltenberg\n" "\n" "โœ… Good examples:\n" "- India stuns Australia in last-ball World Cup thriller, secures spot in finals\n" "- U.S. imposes tariffs on Chinese tech giants, shaking global investor confidence\n" "- Ceasefire breakthrough as Netanyahu (Israeli PM) relents under diplomatic pressure\n" "\n" "โŒ Avoid:\n" "- Source mentions like (The New York Times), (Reuters)\n" "- Introductory fluff or meta comments\n" "- Repeating prompt instructions or context\n" "\n" "You are generating sharp, editorial-style headlines. Only output the summaries. Nothing else." ) # ๐Ÿง  Categorize summary line into topic def categorize_summary(summary: str) -> str: s = summary.lower() if "india" in s or "modi" in s: return "india" elif any(x in s for x in ["us", "uk", "gaza", "china", "russia", "bangladesh", "israel", "trump", "biden", "world"]): return "world" elif any(x in s for x in ["ai", "tech", "space", "innovation", "startup", "software", "device"]): return "tech" elif any(x in s for x in ["market", "stock", "inflation", "finance", "fed", "reserve", "earnings", "revenue", "economy"]): return "finance" elif any(x in s for x in ["cricket", "football", "nba", "nfl", "sports", "match", "league", "tournament"]): return "sports" else: return "world" # ๐Ÿงช Summarize the entire dayโ€™s documents in one LLM pass def summarize_all_documents(documents: List[Document]) -> Dict[str, List[Dict]]: merged_text = "\n\n---\n\n".join(doc.text.strip() for doc in documents if doc.text.strip()) print("\n๐Ÿง  Sending merged prompt to summarizer...\n") summary_block = call_mistral(base_prompt=BASE_PROMPT, tail_prompt=merged_text) categorized_feed = {key: [] for key in TOPIC_KEYS} if summary_block: for line in summary_block.splitlines(): line = line.strip() if line.startswith("-"): clean = line.lstrip("-โ€“").strip() if clean: topic_key = categorize_summary(clean) categorized_feed[topic_key].append({ "summary": clean, "image_url": "https://source.unsplash.com/800x600/?news", "article_link": f"https://google.com/search?q={topic_key}+news" }) return categorized_feed # ๐Ÿš€ Final callable to build and cache the feed def generate_and_cache_daily_feed(documents: List[Document]): all_feed = summarize_all_documents(documents) final_feed = [{"topic": topic, "feed": all_feed[topic]} for topic in TOPIC_KEYS] redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False)) print(f"โœ… Cached daily feed under key '{REDIS_KEY}'") return final_feed # ๐Ÿ“ฆ Utility to read cached data def get_cached_daily_feed(): cached = redis_client.get(REDIS_KEY) return json.loads(cached) if cached else []