import streamlit as st import pandas as pd from eventbrite_scrapper import Eventbrite from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity import numpy as np from datetime import datetime from dataclasses import dataclass, field, replace from typing import List, Any # Dataclasses for event structure @dataclass(frozen=True) class EventAddress: latitude: float = None longitude: float = None region: str = None postal_code: str = None address_1: str = None @dataclass(frozen=True) class EventVenue: id: str = None name: str = None url: str = None address: EventAddress = field(default_factory=lambda: EventAddress()) @dataclass(frozen=True) class EventImage: url: str = None @dataclass(frozen=True) class EventTag: text: str = None @dataclass(frozen=True) class Event: id: str = None name: str = None url: str = None is_online_event: bool = False short_description: str = None published_datetime: datetime = None start_datetime: datetime = None end_datetime: datetime = None timezone: str = None hide_start_date: bool = False hide_end_date: bool = False parent_event_url: str = None series_id: str = None primary_venue: EventVenue = field(default_factory=lambda: EventVenue()) tickets_url: str = None checkout_flow: str = None language: str = None image: EventImage = field(default_factory=lambda: EventImage()) tags_categories: tuple = field(default_factory=tuple) tags_formats: tuple = field(default_factory=tuple) tags_by_organizer: tuple = field(default_factory=tuple) def __hash__(self): return hash(self.id) if self.id else hash((self.name, self.is_online_event, self.start_datetime, self.primary_venue.name)) # Event Retrieval Pipeline class EventbriteRAGPipeline: def __init__(self, events: List[Event], embedding_model: str = 'all-MiniLM-L6-v2'): self.events = [ replace( event, tags_categories=tuple(event.tags_categories), tags_formats=tuple(event.tags_formats), tags_by_organizer=tuple(event.tags_by_organizer), ) for event in events ] self.model = SentenceTransformer(embedding_model) self.event_embeddings = self._compute_embeddings() def _compute_embeddings(self) -> List[np.ndarray]: def event_to_text(event: Event) -> str: text_parts = [ event.name or '', event.short_description or '', ' '.join(tag.text for tag in event.tags_categories), ' '.join(tag.text for tag in event.tags_formats), ' '.join(tag.text for tag in event.tags_by_organizer), event.primary_venue.name or '', event.primary_venue.address.region or '', event.language or '' ] return ' '.join(filter(bool, text_parts)) return self.model.encode([event_to_text(event) for event in self.events]) def query_events(self, query: str, top_k: int = 5) -> List[Event]: # query_embedding = self.model.encode(query).reshape(1, -1) # similarities = cosine_similarity(query_embedding, self.event_embeddings)[0] # top_indices = similarities.argsort()[-top_k:][::-1] # return [self.events[idx] for idx in top_indices] query_embedding = self.model.encode(query).reshape(1, -1) similarities = cosine_similarity(query_embedding, self.event_embeddings)[0] top_indices = similarities.argsort()[-(top_k * 2):][::-1] # Get extra events to filter duplicates unique_events = {} for idx in top_indices: event = self.events[idx] if event.id not in unique_events: unique_events[event.id] = event if len(unique_events) == top_k: break return list(unique_events.values()) # Event Evaluator class EventEvaluator: def __init__(self, pipeline): self.pipeline = pipeline def evaluate_query(self, query): """Evaluate a single query and return results.""" # top_events = self.pipeline.query_events(query) # results = [] # for event in top_events: # result = { # "Event Name": event.name, # "Online Event": event.is_online_event, # "Start Time": event.start_datetime, # "Venue Address": event.primary_venue.address.address_1, # "Venue Name": event.primary_venue.name, # "Description": event.short_description, # "Tickets URL": event.tickets_url, # "Language": event.language, # "Categories": [tag.text for tag in event.tags_categories], # } # results.append(result) top_events = self.pipeline.query_events(query) results = [] seen = set() for event in top_events: if event.id not in seen: # Ensure unique events seen.add(event.id) results.append({ "Event Name": event.name, "Online Event": event.is_online_event, "Start Time": event.start_datetime, "Venue Address": event.primary_venue.address.address_1, "Venue Name": event.primary_venue.name, "Description": event.short_description, "Tickets URL": event.tickets_url, "Language": event.language, "Categories": [tag.text for tag in event.tags_categories], }) return results # Fetch events from Eventbrite API client = Eventbrite() events = client.search_events.get_results( region="ca--los-angeles", dt_start="2025-02-26", dt_end="2025-02-28", max_pages=6, ) # Initialize pipeline and evaluator rag_pipeline = EventbriteRAGPipeline(events) evaluator = EventEvaluator(rag_pipeline) # Streamlit UI st.title("🎟️ Event Search App") st.write("Find events based on your interests!") query = st.text_input("🔎 Enter your search query:") # if query: # results = evaluator.evaluate_query(query) # if results: # df = pd.DataFrame(results) # st.dataframe(df) # Display results as a formatted table # else: # st.warning("No results found.") if query: print(f"🔍 Processing query: {query}") # Debugging query input results = evaluator.evaluate_query(query) if results: df = pd.DataFrame(results) st.dataframe(df) # Display results as a formatted table else: st.warning("No results found.")