Spaces:

nuseAI
/

fastAPIv2

Sleeping

File size: 4,429 Bytes

import os
import json
import redis
from typing import List, Dict
from llama_index.core.schema import Document
from components.LLMs.Mistral import call_mistral

# 🔐 Environment variables
REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN")

# ✅ Redis client
redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)

# 📰 Topic list
TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]

# 🔧 Flattened topic keys for JSON output
TOPIC_KEYS = [t.lower().replace(" news", "") for t in TOPICS]

# 🧠 Summarization prompt
BASE_PROMPT = (
    "You are Nuse’s official news summarizer — fast, sharp, and never generic.\n"
    "Your task is to read the following **collection of news excerpts** and extract the most important stories.\n"
    "\n"
    "For each distinct news item you find, write a punchy summary — exactly one line, no more than 20 words. Aim for 15–20 words per summary.\n"
    "\n"
    "Formatting rules:\n"
    "- Each summary must begin with a dash (-)\n"
    "- Do **not** number the summaries\n"
    "- Do **not** include emojis or hashtags\n"
    "- Do **not** add the source name or publication\n"
    "\n"
    "If a person is mentioned, include their designation in brackets. Examples:\n"
    "- Jeff Bezos (Amazon founder)\n"
    "- Narendra Modi (Prime Minister of India)\n"
    "- NATO Chief Jens Stoltenberg\n"
    "\n"
    "✅ Good examples:\n"
    "- India stuns Australia in last-ball World Cup thriller, secures spot in finals\n"
    "- U.S. imposes tariffs on Chinese tech giants, shaking global investor confidence\n"
    "- Ceasefire breakthrough as Netanyahu (Israeli PM) relents under diplomatic pressure\n"
    "\n"
    "❌ Avoid:\n"
    "- Source mentions like (The New York Times), (Reuters)\n"
    "- Introductory fluff or meta comments\n"
    "- Repeating prompt instructions or context\n"
    "\n"
    "You are generating sharp, editorial-style headlines. Only output the summaries. Nothing else."
)

# 🧠 Categorize summary line into topic
def categorize_summary(summary: str) -> str:
    s = summary.lower()
    if "india" in s or "modi" in s:
        return "india"
    elif any(x in s for x in ["us", "uk", "gaza", "china", "russia", "bangladesh", "israel", "trump", "biden", "world"]):
        return "world"
    elif any(x in s for x in ["ai", "tech", "space", "innovation", "startup", "software", "device"]):
        return "tech"
    elif any(x in s for x in ["market", "stock", "inflation", "finance", "fed", "reserve", "earnings", "revenue", "economy"]):
        return "finance"
    elif any(x in s for x in ["cricket", "football", "nba", "nfl", "sports", "match", "league", "tournament"]):
        return "sports"
    else:
        return "world"

# 🧪 Summarize the entire day’s documents in one LLM pass
def summarize_all_documents(documents: List[Document]) -> Dict[str, List[Dict]]:
    merged_text = "\n\n---\n\n".join(doc.text.strip() for doc in documents if doc.text.strip())

    print("\n🧠 Sending merged prompt to summarizer...\n")
    summary_block = call_mistral(base_prompt=BASE_PROMPT, tail_prompt=merged_text)

    categorized_feed = {key: [] for key in TOPIC_KEYS}

    if summary_block:
        for line in summary_block.splitlines():
            line = line.strip()
            if line.startswith("-"):
                clean = line.lstrip("-–").strip()
                if clean:
                    topic_key = categorize_summary(clean)
                    categorized_feed[topic_key].append({
                        "summary": clean,
                        "image_url": "https://source.unsplash.com/800x600/?news",
                        "article_link": f"https://google.com/search?q={topic_key}+news"
                    })
    return categorized_feed

# 🚀 Final callable to build and cache the feed
def generate_and_cache_daily_feed(documents: List[Document]):
    all_feed = summarize_all_documents(documents)
    final_feed = [{"topic": topic, "feed": all_feed[topic]} for topic in TOPIC_KEYS]

    redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False))
    print(f"✅ Cached daily feed under key '{REDIS_KEY}'")
    return final_feed

# 📦 Utility to read cached data
def get_cached_daily_feed():
    cached = redis_client.get(REDIS_KEY)
    return json.loads(cached) if cached else []