diff --git a/core/auto_import_worker.py b/core/auto_import_worker.py index 1f3d6591..910f6808 100644 --- a/core/auto_import_worker.py +++ b/core/auto_import_worker.py @@ -1533,6 +1533,17 @@ class AutoImportWorker: processed = 0 errors = [] all_matches = list(match_result.get('matches', [])) + + # Album total duration — sum of every matched track's duration. + # Mirrors `SoulSyncAlbum.duration` in soulsync_client (which is + # `sum(t.duration for t in self._tracks)`). Without this, the + # album row gets whatever the FIRST imported track's duration + # was — random per album (would be track 1 for a normal in- + # order import, but no guarantee). + album_total_duration_ms = sum( + int(m.get('track', {}).get('duration_ms', 0) or 0) + for m in all_matches + ) # Ensure an active-import entry exists for this candidate. # Callers from `_process_one_candidate` already registered, but # tests invoke `_process_matches` directly without going @@ -1658,6 +1669,13 @@ class AutoImportWorker: 'images': album_data.get('images', [{'url': image_url}] if image_url else []), 'artists': [{'name': artist_name, 'id': identification.get('artist_id') or ''}], 'album_type': album_data.get('album_type', 'album'), + # Album total duration in ms (sum of every + # matched track). Read by side_effects to + # populate the album row's `duration` column — + # without this the album row gets whatever + # the first-imported track's duration happened + # to be. + 'duration_ms': album_total_duration_ms, }, 'track_info': { 'name': track_name, diff --git a/core/imports/side_effects.py b/core/imports/side_effects.py index 35b3a4c9..0868d6a2 100644 --- a/core/imports/side_effects.py +++ b/core/imports/side_effects.py @@ -50,6 +50,115 @@ def _stable_soulsync_id(text: str) -> str: return str(abs(int(hashlib.md5(text.encode("utf-8", errors="replace")).hexdigest(), 16)) % (10 ** 9)) +# Tiny SQL allowlist for the fill-empty helpers — prevents accidental +# SQL injection through the f-string column-name interpolation. Only +# columns the soulsync library write path ever updates are listed. +_SOULSYNC_FILLABLE_COLUMNS = { + "artists": frozenset({"thumb_url", "genres", "summary", "spotify_artist_id", + "itunes_artist_id", "deezer_id", "discogs_id", "soul_id", + "hifi_artist_id"}), + "albums": frozenset({"thumb_url", "genres", "year", "track_count", "duration", + "spotify_album_id", "itunes_album_id", "deezer_id", + "discogs_id", "soul_id", "hifi_album_id"}), +} + + +def _fill_empty_columns(cursor, table: str, row_id: Any, fields: Dict[str, Any]) -> None: + """UPDATE only the columns whose current value is NULL or empty. + + Conservative: never overwrites populated values. Lets a re-import + fill metadata gaps (e.g. cover art that wasn't available the first + time) without trampling enrichment data the metadata workers wrote + later. Mirrors how the media-server scanner refreshes rows on each + pass, but with the safety belt of "don't clobber". + + Empty-check happens in Python (not SQL) because SQLite's + `NULLIF(text_col, 0)` returns the original text value instead of + NULL — type-coercion mismatch makes the SQL-only conditional + unreliable. Reading the row first, comparing in Python, then + issuing only the necessary SET clauses sidesteps that entirely. + + Column names are validated against `_SOULSYNC_FILLABLE_COLUMNS` + before any f-string interpolation — defense against accidental + misuse adding new columns without an allowlist update. + """ + allowed = _SOULSYNC_FILLABLE_COLUMNS.get(table, frozenset()) + safe_fields = {col: val for col, val in fields.items() if col in allowed} + if not safe_fields: + return + # Read current values so we can decide per-column whether a fill + # is needed. Single SELECT instead of one-per-column saves + # round-trips. + col_list = ", ".join(safe_fields.keys()) + try: + cursor.execute(f"SELECT {col_list} FROM {table} WHERE id = ?", (row_id,)) + except Exception as e: + logger.debug("fill-empty SELECT on %s failed: %s", table, e) + return + row = cursor.fetchone() + if not row: + return + set_clauses: list[str] = [] + values: list[Any] = [] + for col, new_value in safe_fields.items(): + # Skip when payload itself is empty — no point writing NULL → NULL. + # For numeric columns (year, duration, track_count) 0 means + # "unknown" so treat as no-op too. + if new_value in (None, "", 0): + continue + # Read current value; only fill when it's empty/zero. + try: + current = row[col] + except (KeyError, IndexError): + continue + if current not in (None, "", 0): + continue + set_clauses.append(f"{col} = ?") + values.append(new_value) + if not set_clauses: + return + values.append(row_id) + try: + cursor.execute( + f"UPDATE {table} SET {', '.join(set_clauses)}, updated_at = CURRENT_TIMESTAMP WHERE id = ?", + values, + ) + except Exception as e: + logger.debug("fill-empty UPDATE on %s failed: %s", table, e) + + +def _fill_empty_source_id(cursor, table: str, column: str, value: str, row_id: Any) -> None: + """Single-column variant of _fill_empty_columns for the + `__id` columns whose names come from + `get_library_source_id_columns(source)`.""" + if column not in _SOULSYNC_FILLABLE_COLUMNS.get(table, frozenset()): + logger.debug("skipping non-allowlisted source-id column %s.%s", table, column) + return + if not value: + return + try: + cursor.execute(f"SELECT {column} FROM {table} WHERE id = ?", (row_id,)) + row = cursor.fetchone() + except Exception as e: + logger.debug("fill-empty source-id SELECT on %s.%s failed: %s", table, column, e) + return + if not row: + return + try: + current = row[column] + except (KeyError, IndexError): + return + if current not in (None, ""): + return + try: + cursor.execute( + f"UPDATE {table} SET {column} = ? WHERE id = ?", + (value, row_id), + ) + except Exception as e: + logger.debug("fill-empty source-id UPDATE on %s.%s failed: %s", table, column, e) + + def emit_track_downloaded(context: Dict[str, Any], automation_engine=None) -> None: """Emit the track_downloaded automation event.""" try: @@ -352,71 +461,136 @@ def record_soulsync_library_entry(context: Dict[str, Any], artist_context: Dict[ album_id = _stable_soulsync_id(f"{artist_name}::{album_name}".lower().strip()) track_id = _stable_soulsync_id(final_path) total_tracks = album_ctx.get("total_tracks", 0) or 0 + # Album total duration — auto-import passes the sum of every + # matched track's duration via `album.duration_ms`, mirroring + # what soulsync_client's deep scan computes. Falls back to + # the per-track duration for callers that don't provide an + # album total (legacy direct-download flow). + album_total_duration_ms = int( + album_ctx.get("duration_ms") or duration_ms or 0 + ) db = get_database() with db._get_connection() as conn: cursor = conn.cursor() - cursor.execute("SELECT id FROM artists WHERE id = ? AND server_source = 'soulsync'", (artist_id,)) - if not cursor.fetchone(): + # ── Artist row: insert-or-fill-empty-fields ──────────── + # + # Pre-refactor was insert-only: subsequent imports of the + # same artist (same name, second album) found the existing + # row via the name-fallback SELECT and skipped completely. + # That meant artist genres / thumb / source-id reflected + # whatever the FIRST imported album supplied, never + # refreshing as more albums by that artist landed. + # + # Conservative fix: when an existing row matches, run an + # UPDATE that only fills NULL/empty fields (`thumb_url IS + # NULL OR thumb_url = ''`). Never overwrites populated + # values — protects manual edits + enrichment-worker + # writes. + artist_source_col = source_columns.get("artist") + + cursor.execute( + "SELECT id FROM artists WHERE id = ? AND server_source = 'soulsync'", + (artist_id,), + ) + row = cursor.fetchone() + if not row: cursor.execute( "SELECT id FROM artists WHERE name COLLATE NOCASE = ? AND server_source = 'soulsync' LIMIT 1", (artist_name,), ) - existing_by_name = cursor.fetchone() - if existing_by_name: - artist_id = existing_by_name[0] - else: - cursor.execute("SELECT id FROM artists WHERE id = ?", (artist_id,)) - if cursor.fetchone(): - artist_id = _stable_soulsync_id(artist_name.lower().strip() + "::soulsync") - cursor.execute( - """ - INSERT INTO artists (id, name, genres, thumb_url, server_source, created_at, updated_at) - VALUES (?, ?, ?, ?, 'soulsync', CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) - """, - (artist_id, artist_name, genres_json, image_url), - ) - artist_source_col = source_columns.get("artist") - if artist_source_col and artist_source_id: - try: - cursor.execute( - f"UPDATE artists SET {artist_source_col} = ? WHERE id = ?", - (artist_source_id, artist_id), - ) - except Exception as e: - logger.debug("artist source-id update failed: %s", e) + row = cursor.fetchone() + if row: + artist_id = row[0] + + if row: + _fill_empty_columns( + cursor, + table="artists", + row_id=artist_id, + fields={ + "thumb_url": image_url, + "genres": genres_json, + }, + ) + if artist_source_col and artist_source_id: + _fill_empty_source_id(cursor, "artists", artist_source_col, artist_source_id, artist_id) + else: + # Hash collision protection — if the stable ID is + # already in use by a different server's row, mint a + # soulsync-suffixed ID so we don't trample. + cursor.execute("SELECT id FROM artists WHERE id = ?", (artist_id,)) + if cursor.fetchone(): + artist_id = _stable_soulsync_id(artist_name.lower().strip() + "::soulsync") + cursor.execute( + """ + INSERT INTO artists (id, name, genres, thumb_url, server_source, created_at, updated_at) + VALUES (?, ?, ?, ?, 'soulsync', CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) + """, + (artist_id, artist_name, genres_json, image_url), + ) + if artist_source_col and artist_source_id: + try: + cursor.execute( + f"UPDATE artists SET {artist_source_col} = ? WHERE id = ?", + (artist_source_id, artist_id), + ) + except Exception as e: + logger.debug("artist source-id update failed: %s", e) - cursor.execute("SELECT id FROM albums WHERE id = ? AND server_source = 'soulsync'", (album_id,)) - if not cursor.fetchone(): + # ── Album row: same insert-or-fill-empty-fields shape ── + album_source_col = source_columns.get("album") + + cursor.execute( + "SELECT id FROM albums WHERE id = ? AND server_source = 'soulsync'", + (album_id,), + ) + row = cursor.fetchone() + if not row: cursor.execute( "SELECT id FROM albums WHERE title COLLATE NOCASE = ? AND artist_id = ? AND server_source = 'soulsync' LIMIT 1", (album_name, artist_id), ) - existing_album_by_name = cursor.fetchone() - if existing_album_by_name: - album_id = existing_album_by_name[0] - else: - cursor.execute("SELECT id FROM albums WHERE id = ?", (album_id,)) - if cursor.fetchone(): - album_id = _stable_soulsync_id(f"{artist_name}::{album_name}::soulsync".lower().strip()) - cursor.execute( - """ - INSERT INTO albums (id, artist_id, title, year, thumb_url, genres, track_count, - duration, server_source, created_at, updated_at) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, 'soulsync', CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) - """, - (album_id, artist_id, album_name, year, image_url, genres_json, total_tracks, duration_ms), - ) - album_source_col = source_columns.get("album") - if album_source_col and album_source_id: - try: - cursor.execute( - f"UPDATE albums SET {album_source_col} = ? WHERE id = ?", - (album_source_id, album_id), - ) - except Exception as e: - logger.debug("album source-id update failed: %s", e) + row = cursor.fetchone() + if row: + album_id = row[0] + + if row: + _fill_empty_columns( + cursor, + table="albums", + row_id=album_id, + fields={ + "thumb_url": image_url, + "genres": genres_json, + "year": year, + "track_count": total_tracks, + "duration": album_total_duration_ms, + }, + ) + if album_source_col and album_source_id: + _fill_empty_source_id(cursor, "albums", album_source_col, album_source_id, album_id) + else: + cursor.execute("SELECT id FROM albums WHERE id = ?", (album_id,)) + if cursor.fetchone(): + album_id = _stable_soulsync_id(f"{artist_name}::{album_name}::soulsync".lower().strip()) + cursor.execute( + """ + INSERT INTO albums (id, artist_id, title, year, thumb_url, genres, track_count, + duration, server_source, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, 'soulsync', CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) + """, + (album_id, artist_id, album_name, year, image_url, genres_json, total_tracks, album_total_duration_ms), + ) + if album_source_col and album_source_id: + try: + cursor.execute( + f"UPDATE albums SET {album_source_col} = ? WHERE id = ?", + (album_source_id, album_id), + ) + except Exception as e: + logger.debug("album source-id update failed: %s", e) track_artist = None track_artists_list = track_info.get("artists", []) or original_search.get("artists", []) diff --git a/tests/imports/test_import_side_effects.py b/tests/imports/test_import_side_effects.py index 92e00706..455d2d4f 100644 --- a/tests/imports/test_import_side_effects.py +++ b/tests/imports/test_import_side_effects.py @@ -364,6 +364,269 @@ def test_library_history_labels_auto_import(monkeypatch): assert captured["title"] == "Auto-Imported Track" +# --------------------------------------------------------------------------- +# Album duration parity — must equal sum of all track durations, not whatever +# the first imported track happened to be. +# --------------------------------------------------------------------------- + + +def test_album_duration_uses_album_total_not_single_track(tmp_path, monkeypatch): + """Pre-fix `record_soulsync_library_entry` wrote + `track_info.duration_ms` (one track's duration) into the album row's + `duration` column. SoulSync standalone scanner sums every track's + duration to populate that column — mirror it. This test passes + `album.duration_ms` explicitly on the context (the worker computes + it as `sum(match['track']['duration_ms'])`) and verifies the album + row reads it instead of falling back to the per-track value.""" + conn = _make_soulsync_db() + fake_db = _FakeDB(conn) + final_path = tmp_path / "track5.flac" + final_path.write_bytes(b"audio") + + monkeypatch.setattr(side_effects, "get_database", lambda: fake_db) + monkeypatch.setattr( + side_effects, + "_get_config_manager", + lambda: SimpleNamespace(get_active_media_server=lambda: "soulsync"), + ) + import core.genre_filter as genre_filter + monkeypatch.setattr(genre_filter, "filter_genres", lambda genres, _cfg: genres) + + context = { + "source": "spotify", + "artist": {"id": "sp-artist", "name": "Artist"}, + "album": { + "id": "sp-album", + "name": "Long Album", + "release_date": "2024-01-01", + "total_tracks": 12, + # Sum across the album — the worker computes this from + # match_result.matches. Single-track payload below is + # 200_000ms but album total is 2_500_000. + "duration_ms": 2_500_000, + }, + "track_info": { + "id": "sp-track-5", + "name": "Track 5", + "track_number": 5, + "duration_ms": 200_000, + "artists": [{"name": "Artist"}], + }, + "original_search_result": {"title": "Track 5"}, + "_final_processed_path": str(final_path), + } + artist_context = {"name": "Artist", "genres": []} + album_info = {"is_album": True, "album_name": "Long Album", "track_number": 5} + + side_effects.record_soulsync_library_entry(context, artist_context, album_info) + + album_row = conn.execute("SELECT duration FROM albums").fetchone() + track_row = conn.execute("SELECT duration FROM tracks").fetchone() + assert album_row["duration"] == 2_500_000, ( + f"Album duration must equal album total, got {album_row['duration']}. " + f"Bug: it's writing the single track's duration (200_000) instead." + ) + assert track_row["duration"] == 200_000 + + +# --------------------------------------------------------------------------- +# Conservative UPDATE path — second import refreshes empty fields without +# clobbering populated ones. +# --------------------------------------------------------------------------- + + +def test_re_import_fills_empty_artist_fields(tmp_path, monkeypatch): + """First import lands an artist row with no thumb_url + no genres + (e.g. genre tags were absent on those tracks). Second import for + the SAME artist comes in with thumb_url + genres present — those + must land on the existing row instead of being silently ignored + (the pre-fix behaviour was insert-only).""" + conn = _make_soulsync_db() + fake_db = _FakeDB(conn) + final_path1 = tmp_path / "first.flac" + final_path1.write_bytes(b"audio") + final_path2 = tmp_path / "second.flac" + final_path2.write_bytes(b"audio") + + monkeypatch.setattr(side_effects, "get_database", lambda: fake_db) + monkeypatch.setattr( + side_effects, + "_get_config_manager", + lambda: SimpleNamespace(get_active_media_server=lambda: "soulsync"), + ) + import core.genre_filter as genre_filter + monkeypatch.setattr(genre_filter, "filter_genres", lambda genres, _cfg: genres) + + # First import — artist with no thumb_url + no genres + ctx1 = { + "source": "spotify", + "artist": {"id": "sp-artist", "name": "Same Artist"}, + "album": {"id": "sp-album-1", "name": "First Album", "total_tracks": 1}, + "track_info": {"id": "sp-track-1", "name": "T1", "track_number": 1, + "duration_ms": 200000, "artists": [{"name": "Same Artist"}]}, + "original_search_result": {}, + "_final_processed_path": str(final_path1), + } + side_effects.record_soulsync_library_entry( + ctx1, + {"name": "Same Artist", "genres": []}, # NO genres + {"is_album": True, "album_name": "First Album", "track_number": 1}, + ) + + artist_row = conn.execute("SELECT id, thumb_url, genres FROM artists").fetchone() + artist_id_first = artist_row["id"] + assert artist_row["thumb_url"] in (None, "") + assert artist_row["genres"] in (None, "") + + # Second import — artist with thumb + genres present + ctx2 = dict(ctx1) + ctx2["album"] = {"id": "sp-album-2", "name": "Second Album", "total_tracks": 1, + "image_url": "https://img.example/cover2.jpg"} + ctx2["track_info"] = {"id": "sp-track-2", "name": "T2", "track_number": 1, + "duration_ms": 200000, "artists": [{"name": "Same Artist"}]} + ctx2["_final_processed_path"] = str(final_path2) + side_effects.record_soulsync_library_entry( + ctx2, + {"name": "Same Artist", "genres": ["Hip-Hop", "Rap"]}, + {"is_album": True, "album_name": "Second Album", "track_number": 1}, + ) + + # Same artist row updated — empty fields filled + artist_row2 = conn.execute("SELECT id, thumb_url, genres FROM artists").fetchone() + assert artist_row2["id"] == artist_id_first, "Should reuse existing artist row" + assert artist_row2["thumb_url"] == "https://img.example/cover2.jpg", ( + "Empty thumb_url should be filled from second import" + ) + assert "Hip-Hop" in (artist_row2["genres"] or ""), ( + "Empty genres should be filled from second import" + ) + # Two album rows now (different albums for same artist) + album_count = conn.execute("SELECT COUNT(*) FROM albums").fetchone()[0] + assert album_count == 2 + + +def test_re_import_does_not_clobber_populated_artist_fields(tmp_path, monkeypatch): + """First import with rich genres + thumb. Second import with + DIFFERENT (worse) genres + DIFFERENT thumb. Existing populated + values must be preserved, not overwritten — protects manual + edits + enrichment-worker writes.""" + conn = _make_soulsync_db() + fake_db = _FakeDB(conn) + final_path1 = tmp_path / "first.flac" + final_path1.write_bytes(b"audio") + final_path2 = tmp_path / "second.flac" + final_path2.write_bytes(b"audio") + + monkeypatch.setattr(side_effects, "get_database", lambda: fake_db) + monkeypatch.setattr( + side_effects, + "_get_config_manager", + lambda: SimpleNamespace(get_active_media_server=lambda: "soulsync"), + ) + import core.genre_filter as genre_filter + monkeypatch.setattr(genre_filter, "filter_genres", lambda genres, _cfg: genres) + + ctx1 = { + "source": "spotify", + "artist": {"id": "sp-artist", "name": "Stable Artist"}, + "album": {"id": "sp-album-1", "name": "A1", "total_tracks": 1, + "image_url": "https://img.example/original.jpg"}, + "track_info": {"id": "sp-track-1", "name": "T1", "track_number": 1, + "duration_ms": 200000, "artists": [{"name": "Stable Artist"}]}, + "original_search_result": {}, + "_final_processed_path": str(final_path1), + } + side_effects.record_soulsync_library_entry( + ctx1, + {"name": "Stable Artist", "genres": ["Hip-Hop", "Rap", "Trap"]}, + {"is_album": True, "album_name": "A1", "track_number": 1}, + ) + + # Second import with worse / different metadata + ctx2 = dict(ctx1) + ctx2["album"] = {"id": "sp-album-2", "name": "A2", "total_tracks": 1, + "image_url": "https://img.example/replacement.jpg"} + ctx2["track_info"] = {"id": "sp-track-2", "name": "T2", "track_number": 1, + "duration_ms": 200000, "artists": [{"name": "Stable Artist"}]} + ctx2["_final_processed_path"] = str(final_path2) + side_effects.record_soulsync_library_entry( + ctx2, + {"name": "Stable Artist", "genres": ["Pop"]}, # Different + worse + {"is_album": True, "album_name": "A2", "track_number": 1}, + ) + + artist_row = conn.execute("SELECT thumb_url, genres FROM artists").fetchone() + # Original values preserved + assert artist_row["thumb_url"] == "https://img.example/original.jpg", ( + "Existing thumb_url must NOT be clobbered by re-import" + ) + assert "Hip-Hop" in artist_row["genres"], ( + "Existing genres must NOT be clobbered by re-import" + ) + # NEW values must NOT have replaced the originals + assert "replacement" not in (artist_row["thumb_url"] or "") + assert "Pop" not in (artist_row["genres"] or "") + + +def test_re_import_fills_empty_source_id_when_missing(tmp_path, monkeypatch): + """First import via fingerprint identification — no spotify_track_id + on the artist row. Second import (same artist) via tag-based match + that DOES carry a spotify_artist_id. The fill-empty UPDATE must + populate the column.""" + conn = _make_soulsync_db() + fake_db = _FakeDB(conn) + final_path1 = tmp_path / "first.flac" + final_path1.write_bytes(b"audio") + final_path2 = tmp_path / "second.flac" + final_path2.write_bytes(b"audio") + + monkeypatch.setattr(side_effects, "get_database", lambda: fake_db) + monkeypatch.setattr( + side_effects, + "_get_config_manager", + lambda: SimpleNamespace(get_active_media_server=lambda: "soulsync"), + ) + import core.genre_filter as genre_filter + monkeypatch.setattr(genre_filter, "filter_genres", lambda genres, _cfg: genres) + + # First import — no source artist ID + ctx1 = { + "source": "spotify", + "artist": {"id": "", "name": "Same Artist"}, # No ID + "album": {"id": "sp-album-1", "name": "A1", "total_tracks": 1}, + "track_info": {"id": "sp-track-1", "name": "T1", "track_number": 1, + "duration_ms": 200000, "artists": [{"name": "Same Artist"}]}, + "original_search_result": {}, + "_final_processed_path": str(final_path1), + } + side_effects.record_soulsync_library_entry( + ctx1, {"name": "Same Artist", "genres": []}, + {"is_album": True, "album_name": "A1", "track_number": 1}, + ) + + artist_row = conn.execute("SELECT spotify_artist_id FROM artists").fetchone() + assert artist_row["spotify_artist_id"] in (None, "") + + # Second import — now carries a valid source ID + ctx2 = dict(ctx1) + ctx2["artist"] = {"id": "sp-artist-real", "name": "Same Artist"} + ctx2["album"] = {"id": "sp-album-2", "name": "A2", "total_tracks": 1} + ctx2["track_info"] = {"id": "sp-track-2", "name": "T2", "track_number": 1, + "duration_ms": 200000, "artists": [{"name": "Same Artist"}]} + ctx2["_final_processed_path"] = str(final_path2) + side_effects.record_soulsync_library_entry( + ctx2, {"name": "Same Artist", "genres": []}, + {"is_album": True, "album_name": "A2", "track_number": 1}, + ) + + artist_row2 = conn.execute("SELECT spotify_artist_id FROM artists").fetchone() + assert artist_row2["spotify_artist_id"] == "sp-artist-real", ( + "Empty spotify_artist_id should be filled by the second import. " + "This is what makes the watchlist scanner recognise the artist " + "as already in library by stable source ID." + ) + + def test_provenance_labels_auto_import(monkeypatch): """Same gate for provenance: `_download_username='auto_import'` must register the provenance row as `auto_import` (lowercase / diff --git a/webui/static/helper.js b/webui/static/helper.js index eb882149..5d686cb2 100644 --- a/webui/static/helper.js +++ b/webui/static/helper.js @@ -3416,6 +3416,7 @@ const WHATS_NEW = { '2.4.3': [ // --- post-release patch work on the 2.4.3 line — entries hidden by _getLatestWhatsNewVersion until the build version bumps --- { date: 'Unreleased — 2.4.3 patch work' }, + { title: 'Auto-Import: Album Duration Is Album Total + Re-Imports Fill Metadata Gaps', desc: 'two more parity gaps closed in the soulsync standalone library write path. (1) album row\'s `duration` column was being written with the FIRST imported track\'s duration instead of the album total — pre-existing bug that survived the prior parity commit. soulsync_client deep scan computes `sum(t.duration for t in self._tracks)` for each album; auto-import now mirrors that by computing the sum across every matched track in the worker and threading it through context to the album INSERT. (2) `record_soulsync_library_entry` was insert-only on artists + albums — once a row existed (matched by id OR name fallback), subsequent imports of the same artist or album skipped completely. meant: artist genres / thumb / source-id reflected ONLY whatever the FIRST imported album supplied, never refreshing as more albums by that artist landed (ten more deezer/spotify imports later, artist row still had whatever the first random import wrote). new conservative UPDATE path: when an existing row matches, fill ONLY the columns whose current value is NULL or empty — never overwrites populated values. protects manual edits + enrichment-worker writes the same way scanner UPDATEs preserve enrichment columns. f-string column names are validated against an allowlist (`_SOULSYNC_FILLABLE_COLUMNS`) before interpolation — defensive against accidental misuse adding columns without an allowlist update. 4 new tests pin: album duration uses sum not single-track, re-import fills empty thumb + genres on existing artist row, re-import does NOT clobber populated values, re-import fills empty source-id columns when later import has them.', page: 'import' }, { title: 'Auto-Import: Genre Tags Land On The Artists Row + ISRC/MBID Type Hardening', desc: 'small followup to the standalone-library parity commit. (1) auto-import now reads the GENRE tag from each matched audio file (mutagen easy mode, supports flac / mp3 / m4a) and aggregates the deduped set across the album onto the new artists row\'s genres column. matches what soulsync_client._scan_transfer would have written if you\'d done a fresh deep scan after the import — your imported artists no longer feel hollow compared to plex / jellyfin / navidrome scans. dedup is case-insensitive but preserves original casing + insertion order so the json column reads naturally ("Hip-Hop, Rap, Trap" not "hip-hop, rap, trap"). (2) defensive `str()` cast on the worker\'s isrc + mbid extraction. metadata source clients all coerce to string today via `_build_album_track_entry`, but if a future source ever returned int / None for either id the side-effects layer would crash on `.strip()`. cheap insurance. 3 new tests pin: genre aggregation produces deduped insertion-order list, empty when no GENRE tags, isrc/mbid hostile-type input (int, None) coerced to safe string before propagation.', page: 'import' }, { title: 'Auto-Import: SoulSync Standalone Library Now Gets Full Server-Quality Rows', desc: 'soulsync standalone is meant to be a full replacement for plex / jellyfin / navidrome — the imported tracks should land in the db with the same field richness a media server scan would write. they weren\'t. the auto-import context dict (the payload it handed to the post-process pipeline) had no `source` field anywhere, so `record_soulsync_library_entry` couldn\'t pick the right source-id column on the new tracks/albums/artists rows. result: every auto-imported track landed with NULL on `spotify_track_id` / `deezer_id` / `itunes_track_id` / etc. — watchlist scans (which match by stable source IDs) couldn\'t recognise these tracks as already in library and would re-download them on the next pass. fixed by threading `identification[\'source\']` onto the top-level context, plus per-recording IDs (`isrc`, `musicbrainz_recording_id`) onto track_info so picard-tagged libraries land their per-recording metadata directly. also extracted the artist source ID from the metadata source\'s search response (`_search_metadata_source` and `_search_single_track` now pull `best_result.artists[0][\'id\']`) and threaded it through identification → context → standalone library write, so the artists row finally gets its source-ID column populated instead of staying NULL forever. also added `_download_username=\'auto_import\'` so library history shows "Auto-Import" instead of mislabeling every staging import as "Soulseek" (the fallback default), and an "auto_import" → "Auto-Import" mapping in the source-map dicts at side_effects.py to honour it. record_soulsync_library_entry tracks INSERT now also writes `musicbrainz_recording_id` + `isrc` columns directly (matches the navidrome scanner write path). 17 new tests pin: auto-import context carries source for every metadata source (spotify/deezer/itunes/discogs), `_download_username=auto_import`, isrc + mbid pass-through to track_info, album-id back-reference on track_info, artist source-id flows from identification → context (and not from album_id, the prior copy-paste bug), `_search_metadata_source` extracts artist_id from search response, soulsync library writes mbid + isrc to dedicated columns, deezer source maps to deezer_id column, library history + provenance use Auto-Import / auto_import labels.', page: 'import' }, { title: 'Auto-Import: Process Multiple Albums At Once', desc: 'auto-import used to process one album at a time. drop 5 albums into staging → wait for the first to fully finish (identify + match + every track post-processed) before the second one even starts. on a slow network or with a big batch this means 30+ minutes of staring at "Processing AlbumOne" while the others sit untouched. now there\'s a small bounded thread pool (3 workers by default, configurable) — up to 3 albums process in parallel, the queue moves through the rest as workers free up. clicking "Scan Now" multiple times no longer spawns extra unbounded scan threads — every trigger (timer + manual button) routes through one shared scan lock so duplicate triggers no-op instead of stacking up. live progress widget on the auto-import card now lists EACH in-flight album with its own track index/total/name instead of one shared scalar that the parallel workers used to stomp on each other. graceful shutdown: stopping the worker waits for in-flight pool work to finish before reporting stopped — no half-moved files or partial DB writes mid-album. stats counters (`scanned` / `auto_processed` / `pending_review` / `failed`) now use a lock so parallel workers don\'t lose increments under load. 17 new tests pin: pool size config, scan lock dedup, executor dispatch + bounded parallelism, cross-trigger candidate dedup, graceful shutdown, per-candidate UI state isolation across parallel workers, stats counter thread-safety, and snapshot consistency.', page: 'import' },