import os import sys import json import requests import redis from typing import List, Dict from llama_index.core import VectorStoreIndex from llama_index.core.query_engine import RetrieverQueryEngine from llama_index.core.schema import Document from llama_index.core.settings import Settings # ✅ Disable implicit LLM usage Settings.llm = None # 🔐 Environment variables REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379") REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN") MISTRAL_URL = os.environ.get("MISTRAL_URL") HF_TOKEN = os.environ.get("HF_TOKEN") # ✅ Redis client redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True) # 📰 Topics TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"] # 📄 Headers for HF endpoint HEADERS = { "Authorization": f"Bearer {HF_TOKEN}", "Content-Type": "application/json" } # 🧠 Build Mistral-style instruction prompt def build_prompt(content: str, topic: str) -> str: base_instruction = ( "You are Nuse’s official news summarizer — factual, concise, and engaging.\n" "Summarize the following article in 25–30 words with 1–2 emojis.\n" "The given content might contain multiple new items, so summarise each news item in 25-30 words and arranage them one line after the other starting them with a -" "For example:" " -India wins the biggest...." " -The U.S trade tarrifs...." " -Netanyahu agrees for a ceasefire...." "Return only the summary." ) tail = f"Topic: {topic}\n\n{content.strip()}" return f"[INST]{base_instruction}\n\n{tail}[/INST]" # 🔁 Call Mistral using HF Inference Endpoint def call_mistral(prompt: str) -> Optional[str]: headers = { "Authorization": f"Bearer {HF_TOKEN}", "Content-Type": "application/json" } payload = { "inputs": prompt } try: response = requests.post(MISTRAL_URL, headers=headers, json=payload, timeout=20) response.raise_for_status() data = response.json() # Get the generated text if isinstance(data, list) and data: raw_output = data[0].get("generated_text", "") elif isinstance(data, dict): raw_output = data.get("generated_text", "") else: return None # ✅ Extract only the portion after the [/INST] marker if "[/INST]" in raw_output: return raw_output.split("[/INST]")[-1].strip() return raw_output.strip() except Exception as e: print(f"⚠️ Mistral error: {e}") return None # ✂️ Summarize top N documents def summarize_topic(docs: List[str], topic: str) -> List[Dict]: feed = [] for doc in docs[:5]: prompt = build_prompt(doc, topic) print("\n📤 Prompt sent to Mistral:\n", prompt[:300], "...\n") summary = call_mistral(prompt) if summary: feed.append({ "summary": summary, "image_url": "https://source.unsplash.com/800x600/?news", "article_link": "https://google.com/search?q=" + topic.replace(" ", "+") }) return feed # ⚡ Generate and cache daily feed def generate_and_cache_daily_feed(documents: List[Document]): index = VectorStoreIndex.from_documents(documents) retriever = index.as_retriever() query_engine = RetrieverQueryEngine(retriever=retriever) final_feed = [] for topic in TOPICS: print(f"\n🔍 Generating for: {topic}") response = query_engine.query(topic) docs = [str(node.get_content()) for node in response.source_nodes] topic_feed = summarize_topic(docs, topic) final_feed.append({ "topic": topic.lower().replace(" news", ""), "feed": topic_feed }) redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False)) print(f"✅ Cached daily feed under key '{REDIS_KEY}'") return final_feed # 📦 For testing or API access def get_cached_daily_feed(): cached = redis_client.get(REDIS_KEY) return json.loads(cached) if cached else []