From 5834177f826e02d581b0a6f1bcb94c2f587c36be Mon Sep 17 00:00:00 2001 From: Broque Thomas Date: Wed, 1 Oct 2025 19:52:22 -0700 Subject: [PATCH] beatport progress --- beatport_unified_scraper.py | 99 +++++++++++++++ web_server.py | 61 +++++++++ webui/static/script.js | 240 +++++++++++++++++++++++++++++++++++- 3 files changed, 398 insertions(+), 2 deletions(-) diff --git a/beatport_unified_scraper.py b/beatport_unified_scraper.py index 5a566845..ab02715d 100644 --- a/beatport_unified_scraper.py +++ b/beatport_unified_scraper.py @@ -3667,6 +3667,105 @@ class BeatportUnifiedScraper: print(f"❌ Error extracting track data: {e}") return None + def scrape_genre_top10_releases(self, genre_slug, genre_id): + """Scrape Top 10 releases from genre page using .partial-artwork elements""" + print(f"💿 Scraping Top 10 releases for {genre_slug} (ID: {genre_id})") + + genre_url = f"https://www.beatport.com/genre/{genre_slug}/{genre_id}" + + response = self.session.get(genre_url) + response.raise_for_status() + soup = BeautifulSoup(response.content, 'html.parser') + + # Find all .partial-artwork elements (should return exactly 10) + partial_artwork_elements = soup.find_all(class_='partial-artwork') + + if not partial_artwork_elements: + print(f"❌ No .partial-artwork elements found on {genre_url}") + return [] + + print(f"📊 Found {len(partial_artwork_elements)} .partial-artwork elements") + + # Extract release data from each element + releases = [] + for index, element in enumerate(partial_artwork_elements): + release_data = self.extract_release_data_from_partial_artwork(element, index + 1) + if release_data: + releases.append(release_data) + + print(f"✅ Extracted {len(releases)} Top 10 releases") + return releases + + def extract_release_data_from_partial_artwork(self, artwork_element, rank): + """Extract structured data from a .partial-artwork element using proven selectors""" + try: + # Extract image + img_elem = artwork_element.find('img') + image_url = None + title = "Unknown Release" + if img_elem: + image_url = img_elem.get('src') or img_elem.get('data-src', '') + if image_url and not image_url.startswith('http'): + image_url = urljoin("https://www.beatport.com", image_url) + + # Extract title from img alt attribute (proven method) + alt_text = img_elem.get('alt', '').strip() + if alt_text: + title = alt_text + + # Extract URL from main link + url = None + link_elem = artwork_element.find('a') + if link_elem: + href = link_elem.get('href', '') + if href: + url = urljoin("https://www.beatport.com", href) + + # If no title from img alt, try title attribute from link + if title == "Unknown Release": + link_title = link_elem.get('title', '').strip() + if link_title: + title = link_title + + # Extract artist from artist links (proven method) + artist = "Unknown Artist" + artist_links = artwork_element.find_all('a', href=re.compile(r'/artist/')) + if artist_links: + # Get first artist (main artist) + first_artist = artist_links[0].get_text(strip=True) + if first_artist: + artist = first_artist + + # Extract label from label links + label = "Unknown Label" + label_link = artwork_element.find('a', href=re.compile(r'/label/')) + if label_link: + label_text = label_link.get_text(strip=True) + if label_text: + label = label_text + + # Clean the extracted data + title = self.clean_beatport_text(title) if title != "Unknown Release" else title + artist = self.clean_beatport_text(artist) if artist != "Unknown Artist" else artist + label = self.clean_beatport_text(label) if label != "Unknown Label" else label + + print(f" 📦 Release #{rank}: '{title}' by '{artist}' [{label}]") + + return { + 'title': title, + 'artist': artist, + 'label': label, + 'url': url, + 'image_url': image_url, + 'rank': rank, + 'type': 'release', + 'source': 'genre_partial_artwork' + } + + except Exception as e: + print(f"❌ Error extracting release data from .partial-artwork: {e}") + return None + def extract_hero_release_data(self, release_element) -> Dict: """Extract structured data from a hero release element""" data = { diff --git a/web_server.py b/web_server.py index 68370e02..0632b156 100644 --- a/web_server.py +++ b/web_server.py @@ -13279,6 +13279,67 @@ def get_beatport_genre_top10_lists(genre_slug, genre_id): "cached": False }), 500 +@app.route('/api/beatport/genre///top-10-releases', methods=['GET']) +def get_beatport_genre_top10_releases(genre_slug, genre_id): + """Get Top 10 releases for a specific genre using .partial-artwork elements with 1-hour caching""" + try: + logger.info(f"💿 API request for {genre_slug} Top 10 releases (ID: {genre_id})") + + # Check cache first (1-hour TTL) + cached_data = get_cached_beatport_data('genre', 'top_10_releases', genre_slug) + if cached_data: + logger.info(f"✅ Returning cached Top 10 releases for {genre_slug}") + cached_data['success'] = True + cached_data['cached'] = True + return jsonify(cached_data) + + # Initialize the Beatport scraper + scraper = BeatportUnifiedScraper() + + # Scrape Top 10 releases from genre page + releases = scraper.scrape_genre_top10_releases(genre_slug, genre_id) + + if not releases: + return jsonify({ + "success": False, + "error": "No Top 10 releases found for this genre", + "releases": [], + "releases_count": 0, + "genre_slug": genre_slug, + "genre_id": genre_id, + "cached": False + }) + + # Prepare response data + response_data = { + "releases": releases, + "releases_count": len(releases), + "genre_slug": genre_slug, + "genre_id": genre_id, + "cached": False, + "cache_ttl": 3600 # 1 hour + } + + # Cache the data (1-hour TTL) + set_cached_beatport_data('genre', 'top_10_releases', response_data, genre_slug) + + logger.info(f"✅ Successfully fetched {response_data['releases_count']} Top 10 releases for {genre_slug}") + + response_data['success'] = True + return jsonify(response_data) + + except Exception as e: + logger.error(f"❌ Error fetching Top 10 releases for {genre_slug}: {e}") + return jsonify({ + "success": False, + "error": str(e), + "releases": [], + "releases_count": 0, + "genre_slug": genre_slug, + "genre_id": genre_id, + "cached": False + }), 500 + @app.route('/api/beatport/genre///sections', methods=['GET']) def get_beatport_genre_sections(genre_slug, genre_id): """Discover all available sections for a specific Beatport genre""" diff --git a/webui/static/script.js b/webui/static/script.js index 1c422d2f..58308659 100644 --- a/webui/static/script.js +++ b/webui/static/script.js @@ -21253,10 +21253,11 @@ async function handleGenreBrowserCardClick(genreSlug, genreId, genreName) { showGenrePageView(genreSlug, genreId, genreName); // Load the hero slider data - // Load hero slider and Top 10 lists in parallel + // Load hero slider, Top 10 lists, and Top 10 releases in parallel await Promise.all([ loadGenreHeroSlider(genreSlug, genreId, genreName), - loadGenreTop10Lists(genreSlug, genreId, genreName) + loadGenreTop10Lists(genreSlug, genreId, genreName), + loadGenreTop10Releases(genreSlug, genreId, genreName) ]); } catch (error) { @@ -21319,6 +21320,12 @@ function showGenrePageView(genreSlug, genreId, genreName) {

🎵 Loading Top 10 lists...

+
+
+
+

💿 Loading Top 10 releases...

+
+
`; modal.querySelector('.genre-browser-modal-content').appendChild(genrePageContent); @@ -22088,6 +22095,235 @@ async function handleGenreTop100Click(genreSlug, genreId, genreName) { } } +/** + * Load Top 10 releases for a specific genre + */ +async function loadGenreTop10Releases(genreSlug, genreId, genreName) { + console.log(`💿 Loading Top 10 releases for ${genreName}...`); + + const container = document.getElementById('genre-top10-releases-container'); + if (!container) { + console.error('❌ Genre Top 10 releases container not found'); + return; + } + + try { + const response = await fetch(`/api/beatport/genre/${genreSlug}/${genreId}/top-10-releases`); + const data = await response.json(); + + if (!data.success) { + throw new Error(data.error || 'Failed to load Top 10 releases'); + } + + console.log(`💿 Loaded ${data.releases.length} Top 10 releases for ${genreName}`); + createGenreTop10ReleasesHTML(data.releases, genreName); + + } catch (error) { + console.error(`❌ Error loading Top 10 releases for ${genreName}:`, error); + showGenreTop10ReleasesError(error.message || 'Failed to load Top 10 releases'); + } +} + +/** + * Create HTML for genre Top 10 releases section (exact parity with main page) + */ +function createGenreTop10ReleasesHTML(releases, genreName) { + const container = document.getElementById('genre-top10-releases-container'); + if (!container || !releases || releases.length === 0) return; + + // Create section with unique ID but exact same structure as main page + const sectionHtml = ` +
+
+

💿 Top 10 ${genreName} Releases

+

Most popular albums and EPs for ${genreName}

+
+
+
+ ${createGenreTop10ReleasesCardsHTML(releases)} +
+
+
+ `; + + container.innerHTML = sectionHtml; + + // Add background images and click handlers + addGenreTop10ReleasesInteractivity(releases); +} + +/** + * Create release cards HTML for genre Top 10 releases + */ +function createGenreTop10ReleasesCardsHTML(releases) { + let cardsHtml = '
'; + + releases.forEach((release, index) => { + cardsHtml += ` +
+
${release.rank || index + 1}
+
+ ${release.image_url ? + `${release.title}` : + '
💿
' + } +
+
+

${release.title || 'Unknown Title'}

+

${release.artist || 'Unknown Artist'}

+

${release.label || 'Unknown Label'}

+
+
+ `; + }); + + cardsHtml += '
'; + return cardsHtml; +} + +/** + * Add interactivity to genre Top 10 releases cards + */ +function addGenreTop10ReleasesInteractivity(releases) { + const container = document.getElementById('genre-beatport-releases-top10-list'); + if (!container) return; + + // Set background images for cards + const cards = container.querySelectorAll('.beatport-releases-top10-card[data-bg-image]'); + cards.forEach(card => { + const bgImage = card.getAttribute('data-bg-image'); + if (bgImage) { + // Transform image URL from 95x95 to 500x500 for higher quality background + const highResImage = bgImage.replace('/image_size/95x95/', '/image_size/500x500/'); + card.style.backgroundImage = `linear-gradient(rgba(0,0,0,0.7), rgba(0,0,0,0.8)), url('${highResImage}')`; + card.style.backgroundSize = 'cover'; + card.style.backgroundPosition = 'center'; + } + }); + + // Add click handlers for individual release discovery (exact same pattern as main page) + const releaseCards = container.querySelectorAll('.beatport-releases-top10-card[data-url]'); + releaseCards.forEach((card, index) => { + card.addEventListener('click', () => handleGenreReleaseCardClick(card, releases[index])); + card.style.cursor = 'pointer'; + }); +} + +/** + * Handle click on individual genre Top 10 Release card (exact parity with main page) + */ +async function handleGenreReleaseCardClick(cardElement, release) { + console.log(`💿 Individual genre release card clicked: ${release.title} by ${release.artist}`); + + if (!release.url || release.url === '#') { + showToast('No release URL available', 'error'); + return; + } + + try { + // Create unique identifiers for this release + const releaseHash = `genre_release_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`; + const chartName = `${release.title} - ${release.artist}`; + + showToast(`Loading ${release.title}...`, 'info'); + showLoadingOverlay(`Getting tracks from ${release.title}...`); + + // Check if we already have a card for this release + const existingState = Object.values(beatportChartStates).find(state => + state.chart && + state.chart.name === chartName && + state.chart.chart_type === 'individual_release' + ); + + if (existingState) { + console.log(`🔄 Found existing card for ${release.title}, opening existing modal`); + hideLoadingOverlay(); + handleBeatportCardClick(existingState.chart.hash); + return; + } + + // Get track data from this single release (exact same API call as main page) + console.log(`🎵 Fetching tracks from release: ${release.url}`); + const response = await fetch('/api/beatport/scrape-releases', { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ + release_urls: [release.url], + source_name: `Genre Top 10 Release: ${release.title}` + }) + }); + + const data = await response.json(); + + if (!data.success || !data.tracks || data.tracks.length === 0) { + throw new Error('No tracks found in this release'); + } + + console.log(`✅ Successfully fetched ${data.tracks.length} tracks from ${release.title}`); + + // Transform to standard chart format (exact same pattern as main page) + const chartData = { + hash: releaseHash, + name: chartName, + chart_type: 'individual_release', + track_count: data.tracks.length, + tracks: data.tracks.map(track => ({ + name: cleanTrackText(track.title || 'Unknown Title'), + artists: [cleanTrackText(track.artist || 'Unknown Artist')], + album: chartName, + duration_ms: 0, + external_urls: { beatport: track.url || '' }, + source: 'beatport', + // Include release metadata + release_title: release.title, + release_artist: release.artist, + release_label: release.label, + release_image: release.image_url + })) + }; + + // Create Beatport playlist card (exact same pattern as main page) + addBeatportCardToContainer(chartData); + + // Automatically open discovery modal (exact same pattern as main page) + hideLoadingOverlay(); + handleBeatportCardClick(releaseHash); + + console.log(`✅ Created individual release card and opened discovery modal for ${release.title}`); + + } catch (error) { + console.error(`❌ Error handling release click for ${release.title}:`, error); + hideLoadingOverlay(); + showToast(`Error loading ${release.title}: ${error.message}`, 'error'); + } +} + +/** + * Show error message for genre Top 10 releases + */ +function showGenreTop10ReleasesError(errorMessage) { + const container = document.getElementById('genre-top10-releases-container'); + + const errorHtml = ` +
+
+

💿 Top 10 Releases

+

Error loading releases

+
+
+
+

❌ Error Loading Releases

+

${errorMessage}

+
+
+
+ `; + + if (container) container.innerHTML = errorHtml; +} + // Initialize the Genre Browser Modal when the page loads document.addEventListener('DOMContentLoaded', () => { initializeGenreBrowserModal();