Spaces:

nuseAI
/

fastAPIv2

Sleeping

App Files Files Community

ragV98 commited on Jul 23

Commit

3f4bef7

1 Parent(s): 430007a

revert to by topic

Browse files

Files changed (1) hide show

components/generators/daily_feed.py +28 -47

components/generators/daily_feed.py CHANGED Viewed

@@ -2,15 +2,15 @@ import os
 import json
 import redis
 from typing import List, Dict
-from llama_index.core import VectorStoreIndex, StorageContext, load_index_from_storage
-from llama_index.core.schema import Document
 from llama_index.core.query_engine import RetrieverQueryEngine
-from components.LLMs.Mistral import call_mistral
-from components.indexers.news_indexer import load_news_index,get_upstash_vector_store
-from llama_index.core.settings import Settings
 from llama_index.llms.base import LLM, LLMMetadata
 from llama_index.core.llms import CompletionResponse
 class DummyLLM(LLM):
     def complete(self, prompt: str, **kwargs) -> CompletionResponse:
         return CompletionResponse(text="")
@@ -68,73 +68,54 @@ BASE_PROMPT = (
     "You are generating sharp, editorial-style headlines. Only output the summaries. Nothing else."
 )
-# 🧠 Categorize summary line into topic
-def categorize_summary(summary: str) -> str:
-    s = summary.lower()
-    if "india" in s or "modi" in s:
-        return "india"
-    elif any(x in s for x in ["us", "uk", "gaza", "china", "russia", "bangladesh", "israel", "trump", "biden", "world"]):
-        return "world"
-    elif any(x in s for x in ["ai", "tech", "space", "innovation", "startup", "software", "device"]):
-        return "tech"
-    elif any(x in s for x in ["market", "stock", "inflation", "finance", "fed", "reserve", "earnings", "revenue", "economy"]):
-        return "finance"
-    elif any(x in s for x in ["cricket", "football", "nba", "nfl", "sports", "match", "league", "tournament"]):
-        return "sports"
-    else:
-        return "world"
-# 📥 Load all documents from the vector store
-def load_all_documents() -> List[str]:
     vector_store = get_upstash_vector_store()
     storage_context = StorageContext.from_defaults(vector_store=vector_store)
-    # Rebuild index using storage context directly (not from_disk)
     index = VectorStoreIndex([], storage_context=storage_context)
     retriever = index.as_retriever(similarity_top_k=10)
     query_engine = RetrieverQueryEngine(retriever=retriever)
-    combined_docs = []
-    for topic in TOPICS:
         response = query_engine.query(topic)
-        for node in response.source_nodes:
-            doc_text = str(node.get_content()).strip()
-            if doc_text:
-                combined_docs.append(doc_text)
-    return combined_docs
-# 🧪 Summarize entire day's content in one call
-def summarize_and_categorize(docs: List[str]) -> Dict[str, List[Dict]]:
     merged_text = "\n\n---\n\n".join(docs)
-    print("\n🧠 Sending merged prompt to summarizer...\n")
     summary_block = call_mistral(base_prompt=BASE_PROMPT, tail_prompt=merged_text)
-    categorized_feed = {key: [] for key in TOPIC_KEYS}
     if summary_block:
         for line in summary_block.splitlines():
             line = line.strip()
             if line.startswith("-"):
                 clean = line.lstrip("-–").strip()
                 if clean:
-                    topic_key = categorize_summary(clean)
-                    categorized_feed[topic_key].append({
                         "summary": clean,
                         "image_url": "https://source.unsplash.com/800x600/?news",
                         "article_link": f"https://google.com/search?q={topic_key}+news"
                     })
-    return categorized_feed
 # 🚀 Main callable
 def generate_and_cache_daily_feed():
-    docs = load_all_documents()
-    if not docs:
-        print("⚠️ No documents found in vector store.")
-        return []
-    feed_map = summarize_and_categorize(docs)
-    final_feed = [{"topic": topic, "feed": feed_map[topic]} for topic in TOPIC_KEYS]
     redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False))
     print(f"✅ Cached daily feed under key '{REDIS_KEY}'")
     return final_feed

 import json
 import redis
 from typing import List, Dict
+from llama_index.core import VectorStoreIndex, StorageContext
 from llama_index.core.query_engine import RetrieverQueryEngine
 from llama_index.llms.base import LLM, LLMMetadata
 from llama_index.core.llms import CompletionResponse
+from llama_index.core.settings import Settings
+from components.LLMs.Mistral import call_mistral
+from components.indexers.news_indexer import get_upstash_vector_store
+# ✅ Dummy LLM config to allow higher context window in LlamaIndex
 class DummyLLM(LLM):
     def complete(self, prompt: str, **kwargs) -> CompletionResponse:
         return CompletionResponse(text="")
     "You are generating sharp, editorial-style headlines. Only output the summaries. Nothing else."
 )
+# 📥 Load topic-wise documents from Upstash vector store
+def load_documents_by_topic() -> Dict[str, List[str]]:
     vector_store = get_upstash_vector_store()
     storage_context = StorageContext.from_defaults(vector_store=vector_store)
     index = VectorStoreIndex([], storage_context=storage_context)
     retriever = index.as_retriever(similarity_top_k=10)
     query_engine = RetrieverQueryEngine(retriever=retriever)
+    topic_docs = {}
+    for topic, key in zip(TOPICS, TOPIC_KEYS):
         response = query_engine.query(topic)
+        doc_texts = [str(node.get_content()).strip() for node in response.source_nodes if node.get_content()]
+        topic_docs[key] = doc_texts
+    return topic_docs
+# 🧪 Summarize one topic at a time
+def summarize_topic(topic_key: str, docs: List[str]) -> List[Dict]:
+    if not docs:
+        return []
     merged_text = "\n\n---\n\n".join(docs)
+    print(f"🧠 Summarizing topic: {topic_key}")
     summary_block = call_mistral(base_prompt=BASE_PROMPT, tail_prompt=merged_text)
+    summaries = []
     if summary_block:
         for line in summary_block.splitlines():
             line = line.strip()
             if line.startswith("-"):
                 clean = line.lstrip("-–").strip()
                 if clean:
+                    summaries.append({
                         "summary": clean,
                         "image_url": "https://source.unsplash.com/800x600/?news",
                         "article_link": f"https://google.com/search?q={topic_key}+news"
                     })
+    return summaries
 # 🚀 Main callable
 def generate_and_cache_daily_feed():
+    topic_docs = load_documents_by_topic()
+    feed_map = {}
+    for topic_key in TOPIC_KEYS:
+        summaries = summarize_topic(topic_key, topic_docs.get(topic_key, []))
+        feed_map[topic_key] = summaries
+    final_feed = [{"topic": topic, "feed": feed_map[topic]} for topic in TOPIC_KEYS]
     redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False))
     print(f"✅ Cached daily feed under key '{REDIS_KEY}'")
     return final_feed