import os import sys import json import requests import redis from typing import List, Dict, Optional from llama_index.core import VectorStoreIndex from llama_index.core.query_engine import RetrieverQueryEngine from llama_index.core.schema import Document from llama_index.core.settings import Settings # ✅ Disable implicit LLM usage Settings.llm = None # 🔐 Environment variables REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379") REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN") MISTRAL_URL = os.environ.get("MISTRAL_URL") HF_TOKEN = os.environ.get("HF_TOKEN") # ✅ Redis client redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True) # 📰 Topics TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"] # 📄 Headers for HF endpoint HEADERS = { "Authorization": f"Bearer {HF_TOKEN}", "Content-Type": "application/json" } # 🧠 Build Mistral-style instruction prompt def build_prompt(content: str, topic: str) -> str: base_instruction = ( "You are Nuse’s official news summarizer — insightful, punchy, and always on point. 🧠✨\n" "Your job is to scan the content below and extract the key news items. For each item, craft a crisp summary (15–20 words), add 1–2 fitting emojis, and make it pop.\n" "List each summary on a new line starting with a dash (-) and no numbers. This is how Nuse keeps it clean and scannable.\n" "\n" "Example format:\n" "- India stuns Australia in a last-ball thriller at the World Cup finals 🏏🇮🇳\n (15–20 words)" "- U.S. imposes sweeping tariffs on Chinese tech giants, rattling global markets 📉🇺🇸\n (15–20 words)" "- Ceasefire breakthrough: Netanyahu bows to pressure after week-long escalation 🔥🕊️\n (15–20 words)" "\n" "Be sharp. Be brief. No fluff. No preambles. Just the summaries.\n" "Return only the final summary block — no extra commentary, no prompt repetition." ) tail = f"Topic: {topic}\n\n{content.strip()}" return f"[INST]{base_instruction}\n\n{tail}[/INST]" # 🔁 Call Mistral using HF Inference Endpoint def call_mistral(prompt: str) -> Optional[str]: headers = { "Authorization": f"Bearer {HF_TOKEN}", "Content-Type": "application/json" } payload = { "inputs": prompt } try: response = requests.post(MISTRAL_URL, headers=headers, json=payload, timeout=20) response.raise_for_status() data = response.json() # Get the generated text if isinstance(data, list) and data: raw_output = data[0].get("generated_text", "") elif isinstance(data, dict): raw_output = data.get("generated_text", "") else: return None # ✅ Extract only the portion after the [/INST] marker if "[/INST]" in raw_output: return raw_output.split("[/INST]")[-1].strip() return raw_output.strip() except Exception as e: print(f"⚠️ Mistral error: {e}") return None # ✂️ Summarize top N documents def summarize_topic(docs: List[str], topic: str) -> List[Dict]: feed = [] for doc in docs[:5]: prompt = build_prompt(doc, topic) print("\n📤 Prompt sent to Mistral:\n", prompt[:300], "...\n") summary_block = call_mistral(prompt) if summary_block: # Split by lines that start with "- " or "– " (dash or en dash) for line in summary_block.splitlines(): line = line.strip() if line.startswith("-") or line.startswith("–"): clean_summary = line.lstrip("-–").strip() if clean_summary: feed.append({ "summary": clean_summary, "image_url": "https://source.unsplash.com/800x600/?news", "article_link": "https://google.com/search?q=" + topic.replace(" ", "+") }) return feed # ⚡ Generate and cache daily feed def generate_and_cache_daily_feed(documents: List[Document]): index = VectorStoreIndex.from_documents(documents) retriever = index.as_retriever() query_engine = RetrieverQueryEngine(retriever=retriever) final_feed = [] for topic in TOPICS: print(f"\n🔍 Generating for: {topic}") response = query_engine.query(topic) docs = [str(node.get_content()) for node in response.source_nodes] topic_feed = summarize_topic(docs, topic) final_feed.append({ "topic": topic.lower().replace(" news", ""), "feed": topic_feed }) redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False)) print(f"✅ Cached daily feed under key '{REDIS_KEY}'") return final_feed # 📦 For testing or API access def get_cached_daily_feed(): cached = redis_client.get(REDIS_KEY) return json.loads(cached) if cached else []