diff --git a/core/auto_import_worker.py b/core/auto_import_worker.py index 3144a994..10f1bb73 100644 --- a/core/auto_import_worker.py +++ b/core/auto_import_worker.py @@ -1027,6 +1027,11 @@ class AutoImportWorker: 'album_id': result.get('album_id') if result else None, 'album_name': album or (result.get('album_name') if result else None) or title, 'artist_name': artist, + # Carry the metadata-source artist ID forward when the + # search result had one — without this the standalone + # library write can't populate the source-id column on + # the artists row even though we know the ID. + 'artist_id': result.get('artist_id', '') if result else '', 'track_name': title, 'image_url': result.get('image_url', '') if result else '', 'release_date': tags.get('year', '') or (result.get('release_date', '') if result else ''), @@ -1102,12 +1107,17 @@ class AutoImportWorker: return None r_artist = '' + r_artist_id = '' r_album = '' r_album_id = '' r_image = '' if hasattr(best_result, 'artists') and best_result.artists: a = best_result.artists[0] - r_artist = a.get('name', str(a)) if isinstance(a, dict) else str(a) + if isinstance(a, dict): + r_artist = a.get('name', str(a)) + r_artist_id = str(a.get('id', '') or '') + else: + r_artist = str(a) # Extract image — try direct image_url first (Deezer), then album.images (Spotify) r_image = getattr(best_result, 'image_url', '') or '' @@ -1131,6 +1141,7 @@ class AutoImportWorker: 'album_id': r_album_id or None, 'album_name': r_album or title, 'artist_name': r_artist or artist or '', + 'artist_id': r_artist_id, 'track_name': getattr(best_result, 'name', '') or title, 'track_id': getattr(best_result, 'id', ''), 'image_url': r_image, @@ -1284,9 +1295,20 @@ class AutoImportWorker: image_url = img.get('url', '') if isinstance(img, dict) else str(img) r_artist = '' + r_artist_id = '' if hasattr(best_result, 'artists') and best_result.artists: a = best_result.artists[0] - r_artist = a.get('name', str(a)) if isinstance(a, dict) else str(a) + if isinstance(a, dict): + r_artist = a.get('name', str(a)) + # Surface the metadata-source artist ID so the + # standalone-library write can land it on the right + # `_artist_id` column. Without this the + # artists row gets created but with NULL on the + # source-id, and watchlist scans can't recognise + # the artist as already in library by stable ID. + r_artist_id = str(a.get('id', '') or '') + else: + r_artist = str(a) # Get release date release_date = getattr(best_result, 'release_date', '') or '' @@ -1295,6 +1317,7 @@ class AutoImportWorker: 'album_id': best_result.id, 'album_name': best_result.name, 'artist_name': r_artist or artist or '', + 'artist_id': r_artist_id, 'image_url': image_url, 'release_date': release_date, 'total_tracks': getattr(best_result, 'total_tracks', 0), diff --git a/tests/imports/test_auto_import_context_shape.py b/tests/imports/test_auto_import_context_shape.py index 37155f0c..3cb6995a 100644 --- a/tests/imports/test_auto_import_context_shape.py +++ b/tests/imports/test_auto_import_context_shape.py @@ -253,3 +253,112 @@ def test_track_info_includes_album_id_back_reference(worker_with_capture, tmp_pa ti = worker_with_capture._captured[0]["track_info"] assert ti.get("album_id") == "ALBUM-ID-FROM-SOURCE" + + +# --------------------------------------------------------------------------- +# Artist source-id propagation — identification dict → context → DB write +# --------------------------------------------------------------------------- + + +def test_context_artist_id_uses_identification_artist_id(worker_with_capture, tmp_path): + """When `identification` carries `artist_id` (from the metadata + source's search response), it must end up on + `context['spotify_artist']['id']` so the standalone library write + populates the `_artist_id` column on the artists row. + + Before this fix the worker put `identification['album_id']` into + that field — a copy-paste bug that wrote the album ID into the + artist's source-ID column. Honest pin: artist_id flows from + identification through to context, no falsey fallback.""" + f = tmp_path / "01.flac" + f.write_bytes(b"audio") + cand = _FakeCandidate(path=str(tmp_path), name="Album") + ident = _make_identification("spotify") + ident["artist_id"] = "SPOTIFY-ARTIST-ID-XYZ" + ident["album_id"] = "SPOTIFY-ALBUM-ID-DIFFERENT" + mr = _make_match_result("spotify", 1) + mr["matches"] = [{ + "track": {"id": "t1", "name": "Track", "track_number": 1, + "disc_number": 1, "duration_ms": 200000, + "artists": [{"name": "A"}]}, + "file": str(f), "confidence": 0.95, + }] + + worker_with_capture._process_matches(cand, ident, mr) + + ctx = worker_with_capture._captured[0] + assert ctx["spotify_artist"]["id"] == "SPOTIFY-ARTIST-ID-XYZ", ( + "spotify_artist['id'] should hold the artist's source ID, NOT " + "the album_id (regression case for the prior copy-paste bug)." + ) + # Album artists list must also carry the artist source ID so + # `get_import_source_ids` can resolve it via the album→artists + # fallback path. + assert ctx["spotify_album"]["artists"][0]["id"] == "SPOTIFY-ARTIST-ID-XYZ" + + +def test_context_artist_id_is_empty_when_identification_missing_it(worker_with_capture, tmp_path): + """When the identification dict doesn't surface artist_id (e.g. + filename-only identification fallback), context falls back to + empty string — NOT to album_id (the prior wrong fallback).""" + f = tmp_path / "01.flac" + f.write_bytes(b"audio") + cand = _FakeCandidate(path=str(tmp_path), name="Album") + ident = _make_identification("spotify") + ident.pop("artist_id", None) # force no artist_id + ident["album_id"] = "SOME-ALBUM-ID" + mr = _make_match_result("spotify", 1) + mr["matches"] = [{ + "track": {"id": "t1", "name": "Track", "track_number": 1, + "disc_number": 1, "duration_ms": 200000, + "artists": [{"name": "A"}]}, + "file": str(f), "confidence": 0.95, + }] + + worker_with_capture._process_matches(cand, ident, mr) + + ctx = worker_with_capture._captured[0] + assert ctx["spotify_artist"]["id"] == "", ( + "spotify_artist['id'] must be empty (NULL on the artists row) " + "when the identification dict has no artist_id. It must NEVER " + "fall back to album_id — that was the bug this PR fixed." + ) + + +def test_search_metadata_source_extracts_artist_id_from_dict_artist(): + """`_search_metadata_source` must extract the artist source ID + from `best_result.artists[0]['id']` so identification carries it + forward. Without this, the worker's context-shape contract above + is satisfied syntactically but the DB always sees empty.""" + from core.auto_import_worker import AutoImportWorker, FolderCandidate + from unittest.mock import patch, MagicMock + + fake_album = MagicMock() + fake_album.id = "ALBUM-ID" + fake_album.name = "Test Album" + fake_album.artists = [{"id": "ARTIST-SRC-ID", "name": "Test Artist"}] + fake_album.image_url = "https://img.example/cover.jpg" + fake_album.release_date = "2024-01-01" + fake_album.total_tracks = 10 + + fake_client = MagicMock() + fake_client.search_albums.return_value = [fake_album] + + candidate = FolderCandidate( + path="/staging/album", name="Test Album", + audio_files=[f"/staging/album/0{i}.flac" for i in range(1, 11)], + ) + + worker = AutoImportWorker(database=MagicMock(), process_callback=lambda *a, **k: None) + with patch("core.metadata_service.get_primary_source", return_value="spotify"), \ + patch("core.metadata_service.get_client_for_source", return_value=fake_client): + result = worker._search_metadata_source( + "Test Artist", "Test Album", "tags", candidate, + ) + + assert result is not None + assert result.get("artist_id") == "ARTIST-SRC-ID", ( + "_search_metadata_source must extract artist_id from " + "best_result.artists[0]['id'] so the rest of the pipeline " + "can write it to the artists table." + ) diff --git a/webui/static/helper.js b/webui/static/helper.js index 1bf44555..67127e05 100644 --- a/webui/static/helper.js +++ b/webui/static/helper.js @@ -3416,7 +3416,7 @@ const WHATS_NEW = { '2.4.3': [ // --- post-release patch work on the 2.4.3 line — entries hidden by _getLatestWhatsNewVersion until the build version bumps --- { date: 'Unreleased — 2.4.3 patch work' }, - { title: 'Auto-Import: SoulSync Standalone Library Now Gets Full Server-Quality Rows', desc: 'soulsync standalone is meant to be a full replacement for plex / jellyfin / navidrome — the imported tracks should land in the db with the same field richness a media server scan would write. they weren\'t. the auto-import context dict (the payload it handed to the post-process pipeline) had no `source` field anywhere, so `record_soulsync_library_entry` couldn\'t pick the right source-id column on the new tracks/albums/artists rows. result: every auto-imported track landed with NULL on `spotify_track_id` / `deezer_id` / `itunes_track_id` / etc. — watchlist scans (which match by stable source IDs) couldn\'t recognise these tracks as already in library and would re-download them on the next pass. fixed by threading `identification[\'source\']` onto the top-level context, plus per-recording IDs (`isrc`, `musicbrainz_recording_id`) onto track_info so picard-tagged libraries land their per-recording metadata directly. also added `_download_username=\'auto_import\'` so library history shows "Auto-Import" instead of mislabeling every staging import as "Soulseek" (the fallback default), and an "auto_import" → "Auto-Import" mapping in the source-map dicts at side_effects.py to honour it. record_soulsync_library_entry tracks INSERT now also writes `musicbrainz_recording_id` + `isrc` columns directly (matches the navidrome scanner write path). 14 new tests pin: auto-import context carries source for every metadata source (spotify/deezer/itunes/discogs), `_download_username=auto_import`, isrc + mbid pass-through to track_info, album-id back-reference on track_info, soulsync library writes mbid + isrc to dedicated columns, deezer source maps to deezer_id column, library history + provenance use Auto-Import / auto_import labels.', page: 'import' }, + { title: 'Auto-Import: SoulSync Standalone Library Now Gets Full Server-Quality Rows', desc: 'soulsync standalone is meant to be a full replacement for plex / jellyfin / navidrome — the imported tracks should land in the db with the same field richness a media server scan would write. they weren\'t. the auto-import context dict (the payload it handed to the post-process pipeline) had no `source` field anywhere, so `record_soulsync_library_entry` couldn\'t pick the right source-id column on the new tracks/albums/artists rows. result: every auto-imported track landed with NULL on `spotify_track_id` / `deezer_id` / `itunes_track_id` / etc. — watchlist scans (which match by stable source IDs) couldn\'t recognise these tracks as already in library and would re-download them on the next pass. fixed by threading `identification[\'source\']` onto the top-level context, plus per-recording IDs (`isrc`, `musicbrainz_recording_id`) onto track_info so picard-tagged libraries land their per-recording metadata directly. also extracted the artist source ID from the metadata source\'s search response (`_search_metadata_source` and `_search_single_track` now pull `best_result.artists[0][\'id\']`) and threaded it through identification → context → standalone library write, so the artists row finally gets its source-ID column populated instead of staying NULL forever. also added `_download_username=\'auto_import\'` so library history shows "Auto-Import" instead of mislabeling every staging import as "Soulseek" (the fallback default), and an "auto_import" → "Auto-Import" mapping in the source-map dicts at side_effects.py to honour it. record_soulsync_library_entry tracks INSERT now also writes `musicbrainz_recording_id` + `isrc` columns directly (matches the navidrome scanner write path). 17 new tests pin: auto-import context carries source for every metadata source (spotify/deezer/itunes/discogs), `_download_username=auto_import`, isrc + mbid pass-through to track_info, album-id back-reference on track_info, artist source-id flows from identification → context (and not from album_id, the prior copy-paste bug), `_search_metadata_source` extracts artist_id from search response, soulsync library writes mbid + isrc to dedicated columns, deezer source maps to deezer_id column, library history + provenance use Auto-Import / auto_import labels.', page: 'import' }, { title: 'Auto-Import: Process Multiple Albums At Once', desc: 'auto-import used to process one album at a time. drop 5 albums into staging → wait for the first to fully finish (identify + match + every track post-processed) before the second one even starts. on a slow network or with a big batch this means 30+ minutes of staring at "Processing AlbumOne" while the others sit untouched. now there\'s a small bounded thread pool (3 workers by default, configurable) — up to 3 albums process in parallel, the queue moves through the rest as workers free up. clicking "Scan Now" multiple times no longer spawns extra unbounded scan threads — every trigger (timer + manual button) routes through one shared scan lock so duplicate triggers no-op instead of stacking up. live progress widget on the auto-import card now lists EACH in-flight album with its own track index/total/name instead of one shared scalar that the parallel workers used to stomp on each other. graceful shutdown: stopping the worker waits for in-flight pool work to finish before reporting stopped — no half-moved files or partial DB writes mid-album. stats counters (`scanned` / `auto_processed` / `pending_review` / `failed`) now use a lock so parallel workers don\'t lose increments under load. 17 new tests pin: pool size config, scan lock dedup, executor dispatch + bounded parallelism, cross-trigger candidate dedup, graceful shutdown, per-candidate UI state isolation across parallel workers, stats counter thread-safety, and snapshot consistency.', page: 'import' }, { title: 'Manual Search In The Failed-Track Candidates Modal', desc: 'when a download fails or returns "not found" the user can already click the status cell to open a modal showing whatever search candidates the auto-search left over and pick a different one. that modal now ALSO has a manual search bar. type any query, hit search, get a fresh round of results from the download sources without having to start the whole download flow over from the search page. solves the case where the auto-query was bad (featured artist not in title, parentheticals like "(remastered 2019)" tripping the matcher, slight artist-name variants) but the file genuinely exists on the source. source picker is smart per download mode: single-source mode (soulseek-only / youtube-only / etc) shows a "searching X" label, no dropdown; hybrid mode shows a dropdown with "all sources" default plus every configured source — picking "all" runs parallel searches across all of them and tags each result row with its source badge. only configured sources show up; unconfigured ones are hidden. results stream in as each source completes via NDJSON instead of blocking on the slowest source — the table starts populating the moment the first source returns. clicking a result reuses the existing retry-download flow → same path, same acoustid verification on the file when it lands, no shortcut around the safety net. additive in the truest sense: the existing modal layout / candidates table / download buttons are byte-identical when the user doesn\'t use manual search. backend extends the candidates endpoint with `download_mode` + `available_sources` + a `source` field per candidate (purely additive — old fields untouched), and adds a new `POST /api/downloads/task//manual-search` that streams NDJSON (one header line, one source_results line per source as completed, one done terminator) so the frontend renderer can append rows incrementally. 11 tests pin the streaming contract: query length / source whitelist / task 404 validation, single-source dispatch, parallel "all" dispatch, one-event-per-source streaming shape, unconfigured-source skip + reject, header metadata, and per-source exception isolation (one source raising emits a `source_error` event but doesn\'t fail the stream).', page: 'downloads' }, { title: 'Manual Picks Don\'t Auto-Retry Anymore (And The Modal Always Opens)', desc: 'three follow-on fixes to the manual-search feature once people started actually using it. (1) when the user picked a candidate and that download failed (e.g. soundcloud 404 on a stale track url), the auto-retry monitor would treat it like any other failed auto-attempt — yank the task back to "searching" and pick a different candidate. felt completely wrong from the user\'s perspective: "i picked THIS one, why is it searching for something else?" now manual picks are tagged with a `_user_manual_pick` flag and the auto-retry path bails on it. failure surfaces to the user instead of getting silently fallen-back. (2) non-soulseek manual picks (youtube / tidal / qobuz / hifi / deezer / soundcloud / lidarr) were getting stuck at "downloading 0%" forever even after their engine reported terminal failure. cause: status polling went into a "let monitor handle retry" branch that never fired because manual picks bail on retry — task was orphaned in downloading state. fix: when the engine reports Errored on a manual pick, mark the task failed directly, don\'t defer to the monitor. plus an engine-state fallback path covers the rare race where the orchestrator\'s pre-populated transfer lookup is missing the entry. (3) failed / not_found rows were only clickable when the auto-search had cached candidates — but the whole point of opening the modal now is to RUN a manual search, which doesn\'t need pre-existing candidates. now every failed / not_found / cancelled row opens the modal regardless. (4) one nasty deadlock fix in the process: the new "mark failed" path was synchronously calling `on_download_completed` while holding `tasks_lock`, which itself re-acquires the same lock — `threading.Lock` is non-reentrant so the polling thread wedged forever. while wedged the lock was held → every other endpoint that needed it (including /candidates → can\'t open OTHER modals) hung waiting. moved completion callbacks onto a daemon thread so the lock releases first. (5) manual download worker now runs on its own dedicated thread instead of competing with the batch\'s 3-worker `missing_download_executor` pool — saturated batches no longer queue manual picks indefinitely. all changes are scoped to manual picks only via the `_user_manual_pick` flag — auto-attempt flow is byte-identical to before. 17 unit tests pin the gate behavior (status engine fallback / monitor retry skip / IF-branch failure transition / auto-attempt skip).', page: 'downloads' },