Spaces:

nuseAI
/

fastAPIv2

Running

App Files Files Community

fastAPIv2 / components /generators /daily_feed.py

ragV98

Prompt revision 7

d220492 about 2 months ago

raw

history blame

5.44 kB

	import os
	import sys
	import json
	import requests
	import redis
	from typing import List, Dict, Optional
	from llama_index.core import VectorStoreIndex
	from llama_index.core.query_engine import RetrieverQueryEngine
	from llama_index.core.schema import Document
	from llama_index.core.settings import Settings

	# ✅ Disable implicit LLM usage
	Settings.llm = None

	# 🔐 Environment variables
	REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
	REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN")
	MISTRAL_URL = os.environ.get("MISTRAL_URL")
	HF_TOKEN = os.environ.get("HF_TOKEN")

	# ✅ Redis client
	redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)

	# 📰 Topics
	TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]

	# 📄 Headers for HF endpoint
	HEADERS = {
	"Authorization": f"Bearer {HF_TOKEN}",
	"Content-Type": "application/json"
	}

	# 🧠 Build Mistral-style instruction prompt
	def build_prompt(content: str, topic: str) -> str:
	base_instruction = (
	"You are Nuse’s official news summarizer — insightful, punchy, and always on point.\n"
	"Your job is to scan the content below and extract the key news items. For each item, craft a crisp summary (15–20 words).\n"
	"List each summary on a new line starting with a dash (-) and no numbers. This is how Nuse keeps it clean and scannable.\n"
	"\n"
	"Example format:\n"
	"- India stuns Australia in a last-ball thriller at the World Cup finals \n (15–20 words)"
	"- U.S. imposes sweeping tariffs on Chinese tech giants, rattling global markets \n (15–20 words)"
	"- Ceasefire breakthrough: Netanyahu bows to pressure after week-long escalation \n (15–20 words)"
	"\n"
	"If you are mentioning a person, make sure you include who that person is in brackets next to their name. For example: Jeff Bezos (Amazon CEO), Narendra Modi (Prime minister of India)"
	"If you don't find anything useful, don't return anything for that news item"
	"Skim through the news item and form the summary in a way to make it hookable, add essentials data points and meat, in short, the summary should be a hook line."
	"Be sharp. Be brief. No fluff. No preambles. Just the summaries.\n"
	"Return only the final summary block — no extra commentary, no prompt repetition."
	)
	tail = f"Topic: {topic}\n\n{content.strip()}"
	return f"<s>[INST]{base_instruction}\n\n{tail}[/INST]</s>"


	# 🔁 Call Mistral using HF Inference Endpoint
	def call_mistral(prompt: str) -> Optional[str]:
	headers = {
	"Authorization": f"Bearer {HF_TOKEN}",
	"Content-Type": "application/json"
	}
	payload = {
	"inputs": prompt
	}

	try:
	response = requests.post(MISTRAL_URL, headers=headers, json=payload, timeout=20)
	response.raise_for_status()
	data = response.json()

	# Get the generated text
	if isinstance(data, list) and data:
	raw_output = data[0].get("generated_text", "")
	elif isinstance(data, dict):
	raw_output = data.get("generated_text", "")
	else:
	return None

	# ✅ Extract only the portion after the [/INST]</s> marker
	if "[/INST]</s>" in raw_output:
	return raw_output.split("[/INST]</s>")[-1].strip()
	return raw_output.strip()

	except Exception as e:
	print(f"⚠️ Mistral error: {e}")
	return None

	# ✂️ Summarize top N documents
	def summarize_topic(docs: List[str], topic: str) -> List[Dict]:
	feed = []
	for doc in docs[:5]:
	prompt = build_prompt(doc, topic)
	print("\n📤 Prompt sent to Mistral:\n", prompt[:300], "...\n")
	summary_block = call_mistral(prompt)

	if summary_block:
	# Split by lines that start with "- " or "– " (dash or en dash)
	for line in summary_block.splitlines():
	line = line.strip()
	if line.startswith("-") or line.startswith("–"):
	clean_summary = line.lstrip("-–").strip()
	if clean_summary:
	feed.append({
	"summary": clean_summary,
	"image_url": "https://source.unsplash.com/800x600/?news",
	"article_link": "https://google.com/search?q=" + topic.replace(" ", "+")
	})

	return feed


	# ⚡ Generate and cache daily feed
	def generate_and_cache_daily_feed(documents: List[Document]):
	index = VectorStoreIndex.from_documents(documents)
	retriever = index.as_retriever()
	query_engine = RetrieverQueryEngine(retriever=retriever)

	final_feed = []

	for topic in TOPICS:
	print(f"\n🔍 Generating for: {topic}")
	response = query_engine.query(topic)
	docs = [str(node.get_content()) for node in response.source_nodes]

	topic_feed = summarize_topic(docs, topic)
	final_feed.append({
	"topic": topic.lower().replace(" news", ""),
	"feed": topic_feed
	})

	redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False))
	print(f"✅ Cached daily feed under key '{REDIS_KEY}'")
	return final_feed

	# 📦 For testing or API access
	def get_cached_daily_feed():
	cached = redis_client.get(REDIS_KEY)
	return json.loads(cached) if cached else []