vishurdx commited on
Commit
71dd4c8
·
verified ·
1 Parent(s): c7726c6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +149 -0
app.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from eventbrite_scrapper import Eventbrite
4
+ from sentence_transformers import SentenceTransformer
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+ import numpy as np
7
+ from datetime import datetime
8
+ from dataclasses import dataclass, field, replace
9
+ from typing import List, Any
10
+
11
+ # Dataclasses for event structure
12
+ @dataclass(frozen=True)
13
+ class EventAddress:
14
+ latitude: float = None
15
+ longitude: float = None
16
+ region: str = None
17
+ postal_code: str = None
18
+ address_1: str = None
19
+
20
+ @dataclass(frozen=True)
21
+ class EventVenue:
22
+ id: str = None
23
+ name: str = None
24
+ url: str = None
25
+ address: EventAddress = field(default_factory=lambda: EventAddress())
26
+
27
+ @dataclass(frozen=True)
28
+ class EventImage:
29
+ url: str = None
30
+
31
+ @dataclass(frozen=True)
32
+ class EventTag:
33
+ text: str = None
34
+
35
+ @dataclass(frozen=True)
36
+ class Event:
37
+ id: str = None
38
+ name: str = None
39
+ url: str = None
40
+ is_online_event: bool = False
41
+ short_description: str = None
42
+ published_datetime: datetime = None
43
+ start_datetime: datetime = None
44
+ end_datetime: datetime = None
45
+ timezone: str = None
46
+ hide_start_date: bool = False
47
+ hide_end_date: bool = False
48
+ parent_event_url: str = None
49
+ series_id: str = None
50
+ primary_venue: EventVenue = field(default_factory=lambda: EventVenue())
51
+ tickets_url: str = None
52
+ checkout_flow: str = None
53
+ language: str = None
54
+ image: EventImage = field(default_factory=lambda: EventImage())
55
+ tags_categories: tuple = field(default_factory=tuple)
56
+ tags_formats: tuple = field(default_factory=tuple)
57
+ tags_by_organizer: tuple = field(default_factory=tuple)
58
+
59
+ def __hash__(self):
60
+ return hash(self.id) if self.id else hash((self.name, self.is_online_event, self.start_datetime, self.primary_venue.name))
61
+
62
+ # Event Retrieval Pipeline
63
+ class EventbriteRAGPipeline:
64
+ def __init__(self, events: List[Event], embedding_model: str = 'all-MiniLM-L6-v2'):
65
+ self.events = [
66
+ replace(
67
+ event,
68
+ tags_categories=tuple(event.tags_categories),
69
+ tags_formats=tuple(event.tags_formats),
70
+ tags_by_organizer=tuple(event.tags_by_organizer),
71
+ )
72
+ for event in events
73
+ ]
74
+ self.model = SentenceTransformer(embedding_model)
75
+ self.event_embeddings = self._compute_embeddings()
76
+
77
+ def _compute_embeddings(self) -> List[np.ndarray]:
78
+ def event_to_text(event: Event) -> str:
79
+ text_parts = [
80
+ event.name or '',
81
+ event.short_description or '',
82
+ ' '.join(tag.text for tag in event.tags_categories),
83
+ ' '.join(tag.text for tag in event.tags_formats),
84
+ ' '.join(tag.text for tag in event.tags_by_organizer),
85
+ event.primary_venue.name or '',
86
+ event.primary_venue.address.region or '',
87
+ event.language or ''
88
+ ]
89
+ return ' '.join(filter(bool, text_parts))
90
+
91
+ return self.model.encode([event_to_text(event) for event in self.events])
92
+
93
+ def query_events(self, query: str, top_k: int = 5) -> List[Event]:
94
+ query_embedding = self.model.encode(query).reshape(1, -1)
95
+ similarities = cosine_similarity(query_embedding, self.event_embeddings)[0]
96
+ top_indices = similarities.argsort()[-top_k:][::-1]
97
+ return [self.events[idx] for idx in top_indices]
98
+
99
+ # Event Evaluator
100
+ class EventEvaluator:
101
+ def __init__(self, pipeline):
102
+ self.pipeline = pipeline
103
+
104
+ def evaluate_query(self, query):
105
+ """Evaluate a single query and return results."""
106
+ top_events = self.pipeline.query_events(query)
107
+ results = []
108
+ for event in top_events:
109
+ result = {
110
+ "Event Name": event.name,
111
+ "Online Event": event.is_online_event,
112
+ "Start Time": event.start_datetime,
113
+ "Venue Address": event.primary_venue.address.address_1,
114
+ "Venue Name": event.primary_venue.name,
115
+ "Description": event.short_description,
116
+ "Tickets URL": event.tickets_url,
117
+ "Language": event.language,
118
+ "Categories": [tag.text for tag in event.tags_categories],
119
+ }
120
+ results.append(result)
121
+ return results
122
+
123
+ # Fetch events from Eventbrite API
124
+ client = Eventbrite()
125
+ events = client.search_events.get_results(
126
+ region="ca--los-angeles",
127
+ dt_start="2024-11-28",
128
+ dt_end="2024-12-25",
129
+ max_pages=4,
130
+ )
131
+
132
+ # Initialize pipeline and evaluator
133
+ rag_pipeline = EventbriteRAGPipeline(events)
134
+ evaluator = EventEvaluator(rag_pipeline)
135
+
136
+ # Streamlit UI
137
+ st.title("🎟️ Event Search App")
138
+
139
+ st.write("Find events based on your interests!")
140
+
141
+ query = st.text_input("🔎 Enter your search query:")
142
+ if query:
143
+ results = evaluator.evaluate_query(query)
144
+
145
+ if results:
146
+ df = pd.DataFrame(results)
147
+ st.dataframe(df) # Display results as a formatted table
148
+ else:
149
+ st.warning("No results found.")