ragV98 commited on
Commit
3f4bef7
Β·
1 Parent(s): 430007a

revert to by topic

Browse files
Files changed (1) hide show
  1. components/generators/daily_feed.py +28 -47
components/generators/daily_feed.py CHANGED
@@ -2,15 +2,15 @@ import os
2
  import json
3
  import redis
4
  from typing import List, Dict
5
- from llama_index.core import VectorStoreIndex, StorageContext, load_index_from_storage
6
- from llama_index.core.schema import Document
7
  from llama_index.core.query_engine import RetrieverQueryEngine
8
- from components.LLMs.Mistral import call_mistral
9
- from components.indexers.news_indexer import load_news_index,get_upstash_vector_store
10
- from llama_index.core.settings import Settings
11
  from llama_index.llms.base import LLM, LLMMetadata
12
  from llama_index.core.llms import CompletionResponse
 
 
 
13
 
 
14
  class DummyLLM(LLM):
15
  def complete(self, prompt: str, **kwargs) -> CompletionResponse:
16
  return CompletionResponse(text="")
@@ -68,73 +68,54 @@ BASE_PROMPT = (
68
  "You are generating sharp, editorial-style headlines. Only output the summaries. Nothing else."
69
  )
70
 
71
- # 🧠 Categorize summary line into topic
72
- def categorize_summary(summary: str) -> str:
73
- s = summary.lower()
74
- if "india" in s or "modi" in s:
75
- return "india"
76
- elif any(x in s for x in ["us", "uk", "gaza", "china", "russia", "bangladesh", "israel", "trump", "biden", "world"]):
77
- return "world"
78
- elif any(x in s for x in ["ai", "tech", "space", "innovation", "startup", "software", "device"]):
79
- return "tech"
80
- elif any(x in s for x in ["market", "stock", "inflation", "finance", "fed", "reserve", "earnings", "revenue", "economy"]):
81
- return "finance"
82
- elif any(x in s for x in ["cricket", "football", "nba", "nfl", "sports", "match", "league", "tournament"]):
83
- return "sports"
84
- else:
85
- return "world"
86
-
87
- # πŸ“₯ Load all documents from the vector store
88
- def load_all_documents() -> List[str]:
89
  vector_store = get_upstash_vector_store()
90
  storage_context = StorageContext.from_defaults(vector_store=vector_store)
91
-
92
- # Rebuild index using storage context directly (not from_disk)
93
  index = VectorStoreIndex([], storage_context=storage_context)
94
-
95
  retriever = index.as_retriever(similarity_top_k=10)
96
  query_engine = RetrieverQueryEngine(retriever=retriever)
97
 
98
- combined_docs = []
99
- for topic in TOPICS:
100
  response = query_engine.query(topic)
101
- for node in response.source_nodes:
102
- doc_text = str(node.get_content()).strip()
103
- if doc_text:
104
- combined_docs.append(doc_text)
105
- return combined_docs
106
-
107
- # πŸ§ͺ Summarize entire day's content in one call
108
- def summarize_and_categorize(docs: List[str]) -> Dict[str, List[Dict]]:
 
109
  merged_text = "\n\n---\n\n".join(docs)
110
- print("\n🧠 Sending merged prompt to summarizer...\n")
111
  summary_block = call_mistral(base_prompt=BASE_PROMPT, tail_prompt=merged_text)
112
 
113
- categorized_feed = {key: [] for key in TOPIC_KEYS}
114
  if summary_block:
115
  for line in summary_block.splitlines():
116
  line = line.strip()
117
  if line.startswith("-"):
118
  clean = line.lstrip("-–").strip()
119
  if clean:
120
- topic_key = categorize_summary(clean)
121
- categorized_feed[topic_key].append({
122
  "summary": clean,
123
  "image_url": "https://source.unsplash.com/800x600/?news",
124
  "article_link": f"https://google.com/search?q={topic_key}+news"
125
  })
126
- return categorized_feed
127
 
128
  # πŸš€ Main callable
129
  def generate_and_cache_daily_feed():
130
- docs = load_all_documents()
131
- if not docs:
132
- print("⚠️ No documents found in vector store.")
133
- return []
134
 
135
- feed_map = summarize_and_categorize(docs)
136
- final_feed = [{"topic": topic, "feed": feed_map[topic]} for topic in TOPIC_KEYS]
 
137
 
 
138
  redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False))
139
  print(f"βœ… Cached daily feed under key '{REDIS_KEY}'")
140
  return final_feed
 
2
  import json
3
  import redis
4
  from typing import List, Dict
5
+ from llama_index.core import VectorStoreIndex, StorageContext
 
6
  from llama_index.core.query_engine import RetrieverQueryEngine
 
 
 
7
  from llama_index.llms.base import LLM, LLMMetadata
8
  from llama_index.core.llms import CompletionResponse
9
+ from llama_index.core.settings import Settings
10
+ from components.LLMs.Mistral import call_mistral
11
+ from components.indexers.news_indexer import get_upstash_vector_store
12
 
13
+ # βœ… Dummy LLM config to allow higher context window in LlamaIndex
14
  class DummyLLM(LLM):
15
  def complete(self, prompt: str, **kwargs) -> CompletionResponse:
16
  return CompletionResponse(text="")
 
68
  "You are generating sharp, editorial-style headlines. Only output the summaries. Nothing else."
69
  )
70
 
71
+ # πŸ“₯ Load topic-wise documents from Upstash vector store
72
+ def load_documents_by_topic() -> Dict[str, List[str]]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  vector_store = get_upstash_vector_store()
74
  storage_context = StorageContext.from_defaults(vector_store=vector_store)
 
 
75
  index = VectorStoreIndex([], storage_context=storage_context)
 
76
  retriever = index.as_retriever(similarity_top_k=10)
77
  query_engine = RetrieverQueryEngine(retriever=retriever)
78
 
79
+ topic_docs = {}
80
+ for topic, key in zip(TOPICS, TOPIC_KEYS):
81
  response = query_engine.query(topic)
82
+ doc_texts = [str(node.get_content()).strip() for node in response.source_nodes if node.get_content()]
83
+ topic_docs[key] = doc_texts
84
+ return topic_docs
85
+
86
+ # πŸ§ͺ Summarize one topic at a time
87
+ def summarize_topic(topic_key: str, docs: List[str]) -> List[Dict]:
88
+ if not docs:
89
+ return []
90
+
91
  merged_text = "\n\n---\n\n".join(docs)
92
+ print(f"🧠 Summarizing topic: {topic_key}")
93
  summary_block = call_mistral(base_prompt=BASE_PROMPT, tail_prompt=merged_text)
94
 
95
+ summaries = []
96
  if summary_block:
97
  for line in summary_block.splitlines():
98
  line = line.strip()
99
  if line.startswith("-"):
100
  clean = line.lstrip("-–").strip()
101
  if clean:
102
+ summaries.append({
 
103
  "summary": clean,
104
  "image_url": "https://source.unsplash.com/800x600/?news",
105
  "article_link": f"https://google.com/search?q={topic_key}+news"
106
  })
107
+ return summaries
108
 
109
  # πŸš€ Main callable
110
  def generate_and_cache_daily_feed():
111
+ topic_docs = load_documents_by_topic()
112
+ feed_map = {}
 
 
113
 
114
+ for topic_key in TOPIC_KEYS:
115
+ summaries = summarize_topic(topic_key, topic_docs.get(topic_key, []))
116
+ feed_map[topic_key] = summaries
117
 
118
+ final_feed = [{"topic": topic, "feed": feed_map[topic]} for topic in TOPIC_KEYS]
119
  redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False))
120
  print(f"βœ… Cached daily feed under key '{REDIS_KEY}'")
121
  return final_feed