File size: 4,272 Bytes
69210b9 c86bfda 69210b9 a092d54 67fbb52 e465159 c8b3b66 69210b9 6716a7e f312f0d 1804706 0e7d7a3 69210b9 0e7d7a3 69210b9 6716a7e 69210b9 c8b3b66 f675b06 c8b3b66 f675b06 c8b3b66 71257bd 69210b9 7200af5 69210b9 f675b06 7200af5 6716a7e 67fbb52 e465159 69210b9 6716a7e 69210b9 c8b3b66 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
import os
import sys
import json
import requests
import redis
from typing import List, Dict, Optional
from llama_index.core import VectorStoreIndex
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.schema import Document
from llama_index.core.settings import Settings
from components.LLMs.Mistral import call_mistral
# β
Disable implicit LLM usage
Settings.llm = None
# π Environment variables
REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN")
# β
Redis client
redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)
# π° Topics
TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]
# π§ Base summarization prompt (used for all topics)
# π§ Define the base summarization prompt
BASE_PROMPT = (
"You are Nuseβs official news summarizer β insightful, punchy, and always on point.\n"
"Your job is to scan the content below and extract the key news items. For each item, craft a crisp summary (15β20 words). Avoid using any emojis.\n"
"List each summary on a new line starting with a dash (-) and no numbers. This is how Nuse keeps it clean and scannable.\n"
"\n"
"Example format:\n"
"- India stuns Australia in a last-ball thriller at the World Cup finals\n"
"- U.S. imposes sweeping tariffs on Chinese tech giants, rattling global markets\n"
"- Ceasefire breakthrough: Netanyahu (Prime minister of Israel) bows to pressure after week-long escalation\n"
"\n"
"If you are mentioning a person, include their designation in brackets. For example: Jeff Bezos (Amazon CEO), Narendra Modi (Prime minister of India).\n"
"If you're referencing a post like 'NATO Chief', also include the name of the person who holds the post.\n"
"If you don't find anything useful, don't return anything for that news item.\n"
"Skim through the content and write summaries that are compelling, include essential facts, and feel like strong hook lines.\n"
"Be sharp. Be brief. No fluff. No preambles. Avoid source citations like (U.S. Security Council) or (The New York Times).\n"
"Return only the summary block β no extra commentary, no prompt repetition."
)
def summarize_topic(docs: List[str], topic: str) -> List[Dict]:
feed = []
if not docs:
return feed
# π§ Merge all docs with separators
merged_context = "\n\n---\n\n".join(doc.strip() for doc in docs)
tail_prompt = f"Topic: {topic}\n\n{merged_context}"
print(f"\nπ€ Prompt tail for summarization:\n{tail_prompt[:500]}...\n")
# π§ Single call to summarizer
summary_block = call_mistral(base_prompt=BASE_PROMPT, tail_prompt=tail_prompt)
if summary_block:
for line in summary_block.splitlines():
line = line.strip()
if line.startswith("-") or line.startswith("β"):
clean_summary = line.lstrip("-β").strip()
if clean_summary:
feed.append({
"summary": clean_summary,
"image_url": "https://source.unsplash.com/800x600/?news",
"article_link": f"https://google.com/search?q={topic.replace(' ', '+')}"
})
return feed
# β‘ Generate and cache daily feed
def generate_and_cache_daily_feed(documents: List[Document]):
index = VectorStoreIndex.from_documents(documents)
retriever = index.as_retriever()
query_engine = RetrieverQueryEngine(retriever=retriever)
final_feed = []
for topic in TOPICS:
print(f"\nπ Generating for: {topic}")
response = query_engine.query(topic)
docs = [str(node.get_content()) for node in response.source_nodes]
topic_feed = summarize_topic(docs, topic)
final_feed.append({
"topic": topic.lower().replace(" news", ""),
"feed": topic_feed
})
redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False))
print(f"β
Cached daily feed under key '{REDIS_KEY}'")
return final_feed
# π¦ For testing or API access
def get_cached_daily_feed():
cached = redis_client.get(REDIS_KEY)
return json.loads(cached) if cached else []
|