From 06f3d219b4b0f66d8246e79d9362edafbb400cdb Mon Sep 17 00:00:00 2001 From: Broque Thomas Date: Wed, 1 Oct 2025 19:21:11 -0700 Subject: [PATCH] Add Beatport genre Top 10 lists API and UI integration Introduces backend scraping and API endpoint for Beatport genre Top 10 and Hype Top 10 tracks, with 1-hour caching. Updates frontend to load and display these lists in the genre browser modal, matching main page structure and click handling for chart discovery. --- beatport_unified_scraper.py | 128 ++++++++++++++ web_server.py | 71 ++++++++ webui/static/script.js | 331 +++++++++++++++++++++++++++++++++++- 3 files changed, 529 insertions(+), 1 deletion(-) diff --git a/beatport_unified_scraper.py b/beatport_unified_scraper.py index caaec451..5a566845 100644 --- a/beatport_unified_scraper.py +++ b/beatport_unified_scraper.py @@ -3539,6 +3539,134 @@ class BeatportUnifiedScraper: print(f" 📊 Successfully extracted {len(releases_data)} hero releases") return releases_data + def scrape_genre_top10_tracks(self, genre_slug, genre_id): + """Scrape Top 10 tracks lists from genre page (Beatport Top 10 + Hype Top 10 if available)""" + print(f"🎵 Scraping Top 10 tracks for {genre_slug} (ID: {genre_id})") + + genre_url = f"https://www.beatport.com/genre/{genre_slug}/{genre_id}" + + response = self.session.get(genre_url) + response.raise_for_status() + soup = BeautifulSoup(response.content, 'html.parser') + + # Find all tracks-list-item elements + track_items = soup.find_all(attrs={'data-testid': 'tracks-list-item'}) + + if not track_items: + print(f"❌ No tracks-list-item elements found on {genre_url}") + return { + 'beatport_top10': [], + 'hype_top10': [], + 'total_tracks': 0, + 'has_hype_section': False + } + + print(f"📊 Found {len(track_items)} total track items") + + # Extract track data from all items + all_tracks = [] + for index, item in enumerate(track_items): + track_data = self.extract_track_data_from_item(item, index + 1) + if track_data: + all_tracks.append(track_data) + + # Separate into Beatport Top 10 and Hype Top 10 with proper ranking + beatport_top10 = [] + hype_top10 = [] + + for i, track in enumerate(all_tracks): + if i < 10: + # First 10 tracks = Beatport Top 10 (ranks 1-10) + track_copy = track.copy() + track_copy['rank'] = i + 1 + beatport_top10.append(track_copy) + else: + # Remaining tracks = Hype Top 10 (ranks 1-10, not continuing from 11) + track_copy = track.copy() + track_copy['rank'] = (i - 10) + 1 # Reset ranking for Hype (1, 2, 3...) + hype_top10.append(track_copy) + + has_hype_section = len(all_tracks) > 10 + + print(f"✅ Extracted {len(beatport_top10)} Beatport Top 10 + {len(hype_top10)} Hype Top 10 tracks") + + return { + 'beatport_top10': beatport_top10, + 'hype_top10': hype_top10, + 'total_tracks': len(all_tracks), + 'has_hype_section': has_hype_section + } + + def extract_track_data_from_item(self, track_item, rank): + """Extract structured data from a tracks-list-item element""" + try: + # Extract title + title_elem = track_item.find('a') or track_item.find(class_=re.compile(r'title', re.I)) + title = title_elem.get_text(strip=True) if title_elem else "Unknown Title" + + # Extract URL + url = None + if title_elem and title_elem.name == 'a': + url = title_elem.get('href', '') + if url and not url.startswith('http'): + url = urljoin("https://www.beatport.com", url) + + # Extract artists + artist_links = track_item.find_all('a', href=re.compile(r'/artist/')) + artists = [] + artists_string = "" + + if artist_links: + for artist_link in artist_links: + artist_name = artist_link.get_text(strip=True) + artist_url = artist_link.get('href', '') + if not artist_url.startswith('http'): + artist_url = urljoin("https://www.beatport.com", artist_url) + + if artist_name: + artists.append({ + 'name': artist_name, + 'url': artist_url + }) + + artists_string = ', '.join([a['name'] for a in artists]) + else: + # Fallback: try to find artist text without links + artist_elem = track_item.find(class_=re.compile(r'artist', re.I)) + artists_string = artist_elem.get_text(strip=True) if artist_elem else "Unknown Artist" + + # Extract label + label_elem = track_item.find('a', href=re.compile(r'/label/')) + label = label_elem.get_text(strip=True) if label_elem else "Unknown Label" + + # Extract artwork + img_elem = track_item.find('img') + artwork_url = None + if img_elem: + artwork_url = img_elem.get('src') or img_elem.get('data-src', '') + if artwork_url and not artwork_url.startswith('http'): + artwork_url = urljoin("https://www.beatport.com", artwork_url) + + # Extract any additional metadata + classes = track_item.get('class', []) + + return { + 'title': title, + 'artist': artists_string, + 'artists': artists, + 'label': label, + 'url': url, + 'artwork_url': artwork_url, + 'rank': rank, + 'type': 'track', + 'source': 'genre_page', + 'classes': classes + } + + except Exception as e: + print(f"❌ Error extracting track data: {e}") + return None + def extract_hero_release_data(self, release_element) -> Dict: """Extract structured data from a hero release element""" data = { diff --git a/web_server.py b/web_server.py index cd9d44f2..68370e02 100644 --- a/web_server.py +++ b/web_server.py @@ -13208,6 +13208,77 @@ def get_beatport_genre_hero(genre_slug, genre_id): "genre_id": genre_id }), 500 +@app.route('/api/beatport/genre///top-10-lists', methods=['GET']) +def get_beatport_genre_top10_lists(genre_slug, genre_id): + """Get Top 10 lists (Beatport + Hype) for a specific genre with 1-hour caching""" + try: + logger.info(f"🎵 API request for {genre_slug} Top 10 lists (ID: {genre_id})") + + # Check cache first (1-hour TTL) + cached_data = get_cached_beatport_data('genre', 'top_10_lists', genre_slug) + if cached_data: + logger.info(f"✅ Returning cached Top 10 lists for {genre_slug}") + cached_data['success'] = True + cached_data['cached'] = True + return jsonify(cached_data) + + # Initialize the Beatport scraper + scraper = BeatportUnifiedScraper() + + # Scrape Top 10 lists from genre page + top10_data = scraper.scrape_genre_top10_tracks(genre_slug, genre_id) + + if not top10_data['beatport_top10'] and not top10_data['hype_top10']: + return jsonify({ + "success": False, + "error": "No Top 10 tracks found for this genre", + "beatport_top10": [], + "hype_top10": [], + "beatport_count": 0, + "hype_count": 0, + "has_hype_section": False, + "genre_slug": genre_slug, + "genre_id": genre_id, + "cached": False + }) + + # Prepare response data + response_data = { + "beatport_top10": top10_data['beatport_top10'], + "hype_top10": top10_data['hype_top10'], + "beatport_count": len(top10_data['beatport_top10']), + "hype_count": len(top10_data['hype_top10']), + "has_hype_section": top10_data['has_hype_section'], + "total_tracks": top10_data['total_tracks'], + "genre_slug": genre_slug, + "genre_id": genre_id, + "cached": False, + "cache_ttl": 3600 # 1 hour + } + + # Cache the data (1-hour TTL) + set_cached_beatport_data('genre', 'top_10_lists', response_data, genre_slug) + + logger.info(f"✅ Successfully fetched {response_data['beatport_count']} Beatport + {response_data['hype_count']} Hype Top 10 tracks for {genre_slug}") + + response_data['success'] = True + return jsonify(response_data) + + except Exception as e: + logger.error(f"❌ Error fetching Top 10 lists for {genre_slug}: {e}") + return jsonify({ + "success": False, + "error": str(e), + "beatport_top10": [], + "hype_top10": [], + "beatport_count": 0, + "hype_count": 0, + "has_hype_section": False, + "genre_slug": genre_slug, + "genre_id": genre_id, + "cached": False + }), 500 + @app.route('/api/beatport/genre///sections', methods=['GET']) def get_beatport_genre_sections(genre_slug, genre_id): """Discover all available sections for a specific Beatport genre""" diff --git a/webui/static/script.js b/webui/static/script.js index fb1d91ff..18849fa4 100644 --- a/webui/static/script.js +++ b/webui/static/script.js @@ -21253,7 +21253,11 @@ async function handleGenreBrowserCardClick(genreSlug, genreId, genreName) { showGenrePageView(genreSlug, genreId, genreName); // Load the hero slider data - await loadGenreHeroSlider(genreSlug, genreId, genreName); + // Load hero slider and Top 10 lists in parallel + await Promise.all([ + loadGenreHeroSlider(genreSlug, genreId, genreName), + loadGenreTop10Lists(genreSlug, genreId, genreName) + ]); } catch (error) { console.error(`❌ Error loading genre page for ${genreName}:`, error); @@ -21309,6 +21313,12 @@ function showGenrePageView(genreSlug, genreId, genreName) { +
+
+
+

🎵 Loading Top 10 lists...

+
+
`; modal.querySelector('.genre-browser-modal-content').appendChild(genrePageContent); @@ -21679,6 +21689,325 @@ function startGenreHeroSliderAutoPlay() { console.log(`▶️ Started auto-play for genre hero slider (${window.genreHeroSliderState.totalSlides} slides)`); } +/** + * Load Top 10 lists for a specific genre (Beatport + Hype) + */ +async function loadGenreTop10Lists(genreSlug, genreId, genreName) { + console.log(`🎵 Loading Top 10 lists for ${genreName}...`); + + const container = document.getElementById('genre-top10-lists-container'); + if (!container) { + console.error('❌ Genre Top 10 lists container not found'); + return; + } + + try { + const response = await fetch(`/api/beatport/genre/${genreSlug}/${genreId}/top-10-lists`); + const data = await response.json(); + + if (!data.success) { + throw new Error(data.error || 'Failed to load Top 10 lists'); + } + + console.log(`✅ Loaded ${data.beatport_count} Beatport + ${data.hype_count} Hype Top 10 tracks for ${genreName}`); + + // Generate HTML using exact same structure as main page (but unique IDs) + const top10ListsHTML = createGenreTop10ListsHTML(data, genreName); + container.innerHTML = top10ListsHTML; + + // Add container-level click handlers exactly like main page + addGenreTop10ClickHandlers(); + + console.log(`✅ Successfully populated genre Top 10 lists for ${genreName}`); + + } catch (error) { + console.error(`❌ Error loading Top 10 lists for ${genreName}:`, error); + + // Show error state + container.innerHTML = ` +
+

❌ Error Loading Top 10 Lists

+

Could not load Top 10 tracks for ${genreName}

+

${error.message}

+
+ `; + } +} + +/** + * Create HTML for genre Top 10 lists (exact structure as main page, unique IDs) + */ +function createGenreTop10ListsHTML(data, genreName) { + const { beatport_top10, hype_top10, has_hype_section } = data; + + // Use exact same structure as main page but with genre-specific IDs + let html = ` +
+
+

🏆 ${genreName} Top 10 Lists

+

Current trending ${genreName.toLowerCase()} tracks

+
+ +
+ +
+
+

🎵 Beatport Top 10

+

Most popular ${genreName.toLowerCase()} tracks

+
+
+ `; + + // Add Beatport Top 10 tracks (same classes as main page) + beatport_top10.forEach((track, index) => { + const cleanTitle = cleanTrackText(track.title || 'Unknown Title'); + const cleanArtist = cleanTrackText(track.artist || 'Unknown Artist'); + const cleanLabel = cleanTrackText(track.label || 'Unknown Label'); + + html += ` +
+
${track.rank || index + 1}
+
+ ${track.artwork_url ? + `${cleanTitle}` : + '
🎵
' + } +
+
+

${cleanTitle}

+

${cleanArtist}

+

${cleanLabel}

+
+
+ `; + }); + + html += ` +
+
+ `; + + // Add Hype Top 10 section (same classes, unique ID) + if (has_hype_section && hype_top10.length > 0) { + html += ` + +
+
+

🔥 Hype Top 10

+

Editor's trending ${genreName.toLowerCase()} picks

+
+
+ `; + + // Add Hype Top 10 tracks (same classes as main page) + hype_top10.forEach((track, index) => { + const cleanTitle = cleanTrackText(track.title || 'Unknown Title'); + const cleanArtist = cleanTrackText(track.artist || 'Unknown Artist'); + const cleanLabel = cleanTrackText(track.label || 'Unknown Label'); + + html += ` +
+
${track.rank || index + 1}
+
+ ${track.artwork_url ? + `${cleanTitle}` : + '
🔥
' + } +
+
+

${cleanTitle}

+

${cleanArtist}

+

${cleanLabel}

+
+
+ `; + }); + + html += ` +
+
+ `; + } else { + // Add empty hype placeholder to maintain side-by-side layout + html += ` + +
+
+

🔥 Hype Top 10

+

No trending picks available

+
+
+
+

No Hype Top 10 tracks for this genre

+
+
+
+ `; + } + + html += ` +
+
+ `; + + return html; +} + +/** + * Add container-level click handlers for genre Top 10 lists (exact parity with main page) + */ +function addGenreTop10ClickHandlers() { + console.log('🔗 Adding container-level click handlers for genre Top 10 lists...'); + + // Add container-level click handler for Beatport Top 10 (exact match to main page) + const beatportContainer = document.getElementById('genre-beatport-top10-list'); + if (beatportContainer) { + beatportContainer.addEventListener('click', () => { + console.log('🎵 Genre Beatport Top 10 container clicked'); + handleGenreBeatportTop10Click(); + }); + console.log('✅ Added Beatport Top 10 container click handler'); + } + + // Add container-level click handler for Hype Top 10 (exact match to main page) + const hypeContainer = document.getElementById('genre-beatport-hype10-list'); + if (hypeContainer) { + hypeContainer.addEventListener('click', () => { + console.log('🔥 Genre Hype Top 10 container clicked'); + handleGenreHypeTop10Click(); + }); + console.log('✅ Added Hype Top 10 container click handler'); + } + + console.log(`✅ Set up container-level click handlers for genre Top 10 lists`); +} + +/** + * Handle genre Beatport Top 10 container click (exact parity with main page) + */ +async function handleGenreBeatportTop10Click() { + console.log('🎵 Handling Genre Beatport Top 10 click'); + + // Use exact same pattern as main page + await handleGenreChartClick('genre_beatport_top10', 'Genre Beatport Top 10', 'genre_beatport_top10'); +} + +/** + * Handle genre Hype Top 10 container click (exact parity with main page) + */ +async function handleGenreHypeTop10Click() { + console.log('🔥 Handling Genre Hype Top 10 click'); + + // Use exact same pattern as main page + await handleGenreChartClick('genre_hype_top10', 'Genre Hype Top 10', 'genre_hype_top10'); +} + +/** + * Handle genre chart click (based on main page handleRebuildChartClick) + */ +async function handleGenreChartClick(trackDataKey, chartName, chartType) { + try { + // Create chart hash (following main page pattern) + const chartHash = `${chartType}_${Date.now()}`; + + // Check if we already have an existing state (following main page pattern) + const existingState = Object.values(beatportChartStates).find(state => + state.chart && state.chart.name === chartName && state.chart.chart_type === chartType + ); + + if (existingState) { + console.log(`🔄 Found existing ${chartName} card, opening existing modal`); + // Use existing card click handler (following main page pattern) + handleBeatportCardClick(existingState.chart.hash); + return; + } + + // Extract track data from DOM cards (exact same pattern as main page) + const trackData = await getGenrePageTrackData(trackDataKey); + if (!trackData || trackData.length === 0) { + throw new Error(`No track data found for ${chartName}`); + } + + // Transform DOM data to Browse Charts format EXACTLY like main page + const chartData = { + hash: chartHash, + name: chartName, + chart_type: chartType, + track_count: trackData.length, + tracks: trackData.map(track => ({ + name: cleanTrackText(track.title || 'Unknown Title'), + artists: [cleanTrackText(track.artist || 'Unknown Artist')], + album: chartName, + duration_ms: 0, + external_urls: { beatport: track.url || '' }, + source: 'beatport' + })) + }; + + // Follow main page pattern EXACTLY: + // 1. Add card to container (creates playlist card) + console.log(`🃏 Creating Beatport playlist card for: ${chartData.name}`); + addBeatportCardToContainer(chartData); + + // 2. Automatically open discovery modal (like when you click a card in fresh state) + handleBeatportCardClick(chartHash); + + console.log(`✅ Created ${chartName} card and opened discovery modal`); + + } catch (error) { + console.error(`❌ Error handling ${chartName} click:`, error); + showToast(`Error loading ${chartName}: ${error.message}`, 'error'); + } +} + +/** + * Extract track data from genre page DOM (based on main page getRebuildPageTrackData) + */ +async function getGenrePageTrackData(trackDataKey) { + console.log(`🔍 Extracting ${trackDataKey} data from genre page DOM`); + + let containerSelector, cardSelector; + if (trackDataKey === 'genre_beatport_top10') { + containerSelector = '#genre-beatport-top10-list'; + cardSelector = '.beatport-top10-card[data-url]'; + } else if (trackDataKey === 'genre_hype_top10') { + containerSelector = '#genre-beatport-hype10-list'; + cardSelector = '.beatport-hype10-card[data-url]'; + } else { + throw new Error(`Unknown track data key: ${trackDataKey}`); + } + + const container = document.querySelector(containerSelector); + if (!container) { + throw new Error(`Container ${containerSelector} not found`); + } + + const trackCards = container.querySelectorAll(cardSelector); + if (trackCards.length === 0) { + throw new Error(`No track cards found in ${containerSelector}`); + } + + // Extract track data from DOM cards (exact same pattern as main page) + const tracks = Array.from(trackCards).map(card => { + const title = card.querySelector('.beatport-top10-card-title, .beatport-hype10-card-title')?.textContent?.trim() || 'Unknown Title'; + const artist = card.querySelector('.beatport-top10-card-artist, .beatport-hype10-card-artist')?.textContent?.trim() || 'Unknown Artist'; + const label = card.querySelector('.beatport-top10-card-label, .beatport-hype10-card-label')?.textContent?.trim() || 'Unknown Label'; + const url = card.getAttribute('data-url') || ''; + const rank = card.querySelector('.beatport-top10-card-rank, .beatport-hype10-card-rank')?.textContent?.trim() || ''; + + return { + title: title, + artist: artist, + label: label, + url: url, + rank: rank + }; + }); + + console.log(`📋 Extracted ${tracks.length} tracks from ${containerSelector}`); + return tracks; +} + /** * Handle genre-specific Top 100 button click - create discovery process for genre top 100 tracks */