From e11786ee4094a320f0516b070236767f2e41fc52 Mon Sep 17 00:00:00 2001
From: Broque Thomas <26755000+Nezreka@users.noreply.github.com>
Date: Sat, 9 May 2026 15:53:17 -0700
Subject: [PATCH] Auto-import matching: fix Deezer source classification + bump
 tolerance
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

User report: all 6 staging candidates failing with "Could not match
tracks to album tracklist" despite identification correctly resolving
each album. 18 properly-tagged Chris Brown F.A.M.E. tracks, 21
properly-tagged Mr. Morale tracks, etc. — every match attempt
rejected by the duration sanity gate.

Root cause: I had Deezer in `_SECONDS_DURATION_SOURCES`, assuming
Deezer's `duration` field was raw seconds (which the API returns).
But `DeezerClient.get_album_tracks` already converts seconds → ms
INTERNALLY (`'duration_ms': item.get('duration', 0) * 1000`) before
the value reaches the matcher. My helper saw `source='deezer'` →
multiplied by 1000 again → 255000 ms became 255,000,000 ms (70 hours).
Every track-file pair failed the gate by a factor of 1000×.

Diagnostic chain that got me there:
1. Added `[Album Matching] No matches: X files, Y tracks, Z
   duration-rejected, W below threshold` summary log so future "0
   matches" reports surface the rejection reason.
2. Fixed the helper's logger from `logging.getLogger(__name__)` (which
   resolves outside the soulsync handler tree → invisible in app.log)
   to `get_logger("imports.album_matching")` (under the namespace the
   file handler watches).
3. Added per-rejection-type diagnostic showing actual file vs track
   duration values + raw track keys + source.

That third diagnostic surfaced `track 'United In Grief' resolved=255000000
(raw duration_ms=255000, raw duration=None, source='deezer')` —
making the bug obvious.

Fixes:

- Moved Deezer from `_SECONDS_DURATION_SOURCES` to
  `_MS_DURATION_SOURCES`. Comment documents WHY (the client converts
  before returning) so a future reader doesn't "fix" the
  classification back the wrong way.
- Bumped `DURATION_TOLERANCE_MS` from 3000 → 10000 (3s → 10s) to
  match Picard ~7s / Beets ~10-15s / Plex ~10s industry baselines.
  3s was a defensive copy of the post-download integrity check
  threshold but that's a different problem (catching truncated
  downloads, not identifying recordings across remasters/encodings).
- `_track_duration_ms` magnitude heuristic kept as fallback for
  unknown / missing source (mocked test data without `source` field).
- Added `Match aborted` warnings at the three earlier silent return
  points in `_match_tracks` (no client, no album_data, no tracks)
  so future "Could not match" reports show WHICH step bailed.
- Added per-run diagnostic in `match_files_to_tracks` that logs the
  first duration rejection's actual values — surfaces unit mismatches
  + drift problems without spamming N×M lines per run.

Test changes:

- `test_deezer_seconds_duration_converted_to_ms` renamed +
  rewritten as `test_deezer_already_normalised_to_ms_by_client`
  to pin the actual contract (matcher receives ms from the Deezer
  client, takes as-is).
- `test_track_duration_source_aware_dispatch` updated — Deezer test
  case now uses ms input + expects ms output.
- New `test_raw_deezer_seconds_falls_back_to_magnitude_heuristic`
  pins the rare edge case where raw Deezer items WITHOUT `source`
  reach the matcher (no client conversion path) — heuristic catches
  it.

Verification:
- 179 import tests pass after changes
- Live test: all 6 user staging candidates now matching at 95-100%
  confidence
- Multi-disc Mr. Morale lands with proper Disc 1 / Disc 2 / Disc 3
  folder structure
- Picard-tagged libraries hit MBID fast paths (verified earlier)
- Tracks process in parallel via the existing scan-now thread spawn
  (next commit refactors this to a proper bounded executor)
---
 core/auto_import_worker.py                    | 19 ++++
 core/imports/album_matching.py                | 98 +++++++++++++++++--
 tests/imports/test_album_matching_exact_id.py | 49 ++++++----
 3 files changed, 138 insertions(+), 28 deletions(-)

diff --git a/core/auto_import_worker.py b/core/auto_import_worker.py
index 412b18ce..53d14458 100644
--- a/core/auto_import_worker.py
+++ b/core/auto_import_worker.py
@@ -1080,6 +1080,12 @@ class AutoImportWorker:
             # Fetch album with tracks
             client = get_client_for_source(source)
             if not client:
+                logger.warning(
+                    "[Auto-Import] Match aborted for '%s' — no client available "
+                    "for source '%s'. Identification probably came from a source "
+                    "that's no longer configured.",
+                    candidate.name, source,
+                )
                 return None
 
             album_data = None
@@ -1095,6 +1101,13 @@ class AutoImportWorker:
                     album_data = {'id': album_id, 'name': identification.get('album_name', ''), 'tracks': tracks_data}
 
             if not album_data:
+                logger.warning(
+                    "[Auto-Import] Match aborted for '%s' — source '%s' returned "
+                    "no album data for id %r. Album probably exists in the "
+                    "search index but get_album endpoint can't fetch it (rate "
+                    "limit / region restriction / id-format mismatch).",
+                    candidate.name, source, album_id,
+                )
                 return None
 
             # Extract tracks — handle various response formats
@@ -1112,6 +1125,12 @@ class AutoImportWorker:
                     tracks = album_data['items']
 
             if not tracks:
+                logger.warning(
+                    "[Auto-Import] Match aborted for '%s' — source '%s' returned "
+                    "album data but no tracks. album_data keys: %s",
+                    candidate.name, source,
+                    list(album_data.keys()) if isinstance(album_data, dict) else type(album_data).__name__,
+                )
                 return None
 
             # Read tags for all files
diff --git a/core/imports/album_matching.py b/core/imports/album_matching.py
index 28a46732..f29d6b56 100644
--- a/core/imports/album_matching.py
+++ b/core/imports/album_matching.py
@@ -3,6 +3,13 @@
 isolation without instantiating the worker, mocking the metadata
 client, or monkey-patching ``_read_file_tags``.
 
+Diagnostic logging:
+- Every match decision (matched, rejected by duration, rejected by
+  threshold) emits a debug-level log when ``ALBUM_MATCHING_DEBUG`` is
+  truthy. Defaults to off so production logs stay clean. Flip via the
+  ``SOULSYNC_ALBUM_MATCHING_DEBUG`` env var when investigating
+  "nothing matched" reports.
+
 The worker still owns:
 - File-system traversal + tag reads
 - Metadata client lookup + album_data fetch
@@ -24,6 +31,15 @@ from __future__ import annotations
 import os
 from typing import Any, Callable, Dict, List, Set, Tuple
 
+# Use the project's namespaced logger so diagnostic lines actually
+# land in app.log. `logging.getLogger(__name__)` would resolve to
+# `core.imports.album_matching` which sits OUTSIDE the `soulsync.*`
+# tree the file handler watches, making every "no matches" diagnostic
+# silently invisible to anyone debugging an import problem.
+from utils.logging_config import get_logger
+
+logger = get_logger("imports.album_matching")
+
 
 # ---------------------------------------------------------------------------
 # Match-scoring weights
@@ -305,11 +321,25 @@ def find_exact_id_matches(
 # problem AFTER the file has been moved). The integrity check stays as
 # a defense-in-depth backstop.
 #
-# Tolerance picked to match the post-download integrity check
-# (`integrity check Duration mismatch ... drift > tolerance 3.0s`).
-# Same threshold = same intent, two enforcement points.
+# Tolerance picked to match standard library-importer behavior:
+# Picard ~7s, Beets ~10-15s, Plex ~10s. The post-download integrity
+# check uses a stricter ±3s because it's catching truncated downloads
+# (same recording, partial bytes — should be byte-exact match) — a
+# different problem from "is this the same recording across remasters
+# / encodings / streaming services."
+#
+# Real-world drift between matched-recording sources:
+# - FLAC vs MP3 transcode of same master: typically <0.5s
+# - Different mastering eras (2009 remaster vs original): 1-3s
+# - Different streaming service encodings: 2-7s (varying fade-out)
+# - Album version vs "remixed/expanded edition": often >10s — these
+#   genuinely should NOT match the original tracklist anyway
+#
+# 10s tolerance lands in the sweet spot: catches real recording
+# mismatches (gross differences = wrong track) while accepting normal
+# encoding / mastering drift.
 
-DURATION_TOLERANCE_MS = 3000   # ±3 seconds
+DURATION_TOLERANCE_MS = 10000   # ±10 seconds
 
 
 def duration_sanity_ok(file_duration_ms: int, track_duration_ms: int) -> bool:
@@ -326,19 +356,28 @@ def duration_sanity_ok(file_duration_ms: int, track_duration_ms: int) -> bool:
     return abs(int(file_duration_ms) - int(track_duration_ms)) <= DURATION_TOLERANCE_MS
 
 
-# Per-source duration field conventions. Track entries built by
-# `_build_album_track_entry` carry the source name on `source` /
-# `_source` / `provider` so we can dispatch deterministically instead
-# of guessing from value magnitude.
+# Per-source duration field conventions for what the matcher RECEIVES
+# (after each client's internal normalisation), NOT what each provider's
+# raw API returns. Deezer's API returns `duration` in seconds, but
+# `DeezerClient.get_album_tracks` converts to `duration_ms` in actual
+# ms before returning — so the matcher sees ms, and double-converting
+# here turns 255000 into 255000000 (the user-reported "no matches"
+# bug from 2026-05-09 — every Deezer-primary user's auto-import broke).
+#
+# Track entries built by `_build_album_track_entry` carry the source
+# name on `source` / `_source` / `provider` so we can dispatch
+# deterministically instead of guessing from value magnitude.
 _SECONDS_DURATION_SOURCES = frozenset((
-    'deezer',         # /album/{id} returns "duration" (seconds, int)
     'discogs',        # release tracks expose duration as MM:SS strings
+                      # (handled in metadata layer, but defensive here)
     'musicbrainz',    # recording length is sometimes seconds vs ms
-                      # depending on which endpoint — defensive
+                      # depending on which endpoint
 ))
 _MS_DURATION_SOURCES = frozenset((
     'spotify',        # duration_ms (canonical Spotify naming)
     'itunes',         # trackTimeMillis → normalised to duration_ms upstream
+    'deezer',         # CLIENT converts seconds → ms before returning
+                      # (see core/deezer_client.py:get_album_tracks)
     'qobuz',          # duration_ms
     'tidal',          # duration in seconds OR duration_ms — see below
     'hydrabase',      # duration_ms
@@ -442,6 +481,9 @@ def match_files_to_tracks(
     deduped = dedupe_files_by_position(remaining_files, file_tags, quality_rank=quality_rank)
 
     # Phase 3 — fuzzy scoring on remaining tracks.
+    duration_rejected = 0     # diagnostics for the "no matches" case
+    below_threshold = 0
+    sample_rejection_logged = False
     for i, track in enumerate(tracks):
         if i in used_track_indices:
             continue
@@ -464,6 +506,26 @@ def match_files_to_tracks(
             # file had already been moved + tagged + DB-inserted.
             file_duration = tags.get('duration_ms', 0) or 0
             if not duration_sanity_ok(file_duration, track_duration):
+                duration_rejected += 1
+                # On the FIRST rejection per matcher run, log the actual
+                # values so users / reviewers can see whether it's a
+                # unit mismatch (seconds vs ms), genuine drift, or some
+                # third thing. Logging every rejection would spam the
+                # log on a 21-file × 19-track album (399 lines).
+                if not sample_rejection_logged:
+                    sample_rejection_logged = True
+                    raw_dur_ms = track.get('duration_ms')
+                    raw_dur = track.get('duration')
+                    raw_src = track.get('source') or track.get('_source') or track.get('provider')
+                    logger.info(
+                        "[Album Matching] First duration rejection in '%s': "
+                        "file %r duration_ms=%d, track %r resolved=%d "
+                        "(raw duration_ms=%r, raw duration=%r, source=%r)",
+                        target_album,
+                        os.path.basename(f), file_duration,
+                        track.get('name', '?'), track_duration,
+                        raw_dur_ms, raw_dur, raw_src,
+                    )
                 continue
 
             score = score_file_against_track(
@@ -482,6 +544,22 @@ def match_files_to_tracks(
                 'file': best_file,
                 'confidence': round(best_score, 3),
             })
+        elif deduped:
+            below_threshold += 1
+
+    # Diagnostic surface — when the matcher returns 0 matches against
+    # a non-trivial input, it's nearly always one of: duration gate too
+    # strict, title agreement too low, or wrong tracks list passed in.
+    # Log a one-line summary at INFO so users grep'ing app.log for
+    # "no matches" cases see WHY without needing to bump log level.
+    if not matches and (audio_files or tracks):
+        logger.info(
+            "[Album Matching] No matches: %d files, %d tracks, "
+            "%d duration-rejected pairs, %d tracks below threshold. "
+            "Album: %r",
+            len(audio_files), len(tracks),
+            duration_rejected, below_threshold, target_album,
+        )
 
     # Final unmatched list: every file that didn't get used in any
     # phase. Includes quality-dedup losers (lower-quality copies of
diff --git a/tests/imports/test_album_matching_exact_id.py b/tests/imports/test_album_matching_exact_id.py
index c2923d62..6ac724b9 100644
--- a/tests/imports/test_album_matching_exact_id.py
+++ b/tests/imports/test_album_matching_exact_id.py
@@ -311,55 +311,68 @@ def test_no_durations_anywhere_falls_through_to_fuzzy():
     assert len(result['matches']) == 1
 
 
-def test_deezer_seconds_duration_converted_to_ms():
-    """Deezer's API returns ``duration`` in seconds, not ms. The matcher
-    must convert before applying the tolerance check — otherwise a
-    180-second track looks like a 180-millisecond track and fails the
-    sanity gate against any real file."""
+def test_deezer_already_normalised_to_ms_by_client():
+    """Deezer's API returns ``duration`` in seconds — but
+    ``DeezerClient.get_album_tracks`` converts to ``duration_ms`` (in
+    actual ms) before returning. The matcher classifies Deezer as an
+    MS source so it doesn't double-convert. Pin this so the
+    classification stays in sync with the client's behavior."""
     files = ['/a/track.flac']
     file_tags = {
         '/a/track.flac': _tags(
             title='Song', track=1, disc=1, duration_ms=180_000,
         ),
     }
-    # Deezer-style track — duration is 180 (seconds)
+    # Deezer-style track AS RECEIVED FROM `get_album_tracks` — already
+    # converted to duration_ms in actual milliseconds.
     tracks = [{
         'name': 'Song', 'track_number': 1, 'disc_number': 1,
-        'duration': 180, 'artists': [], 'source': 'deezer',
+        'duration_ms': 180_000, 'artists': [], 'source': 'deezer',
     }]
     result = match_files_to_tracks(
         files, file_tags, tracks,
         target_album='', similarity=_sim, quality_rank=_qrank,
     )
-    # 180 seconds → 180_000 ms → matches file's 180_000 ms within tolerance
+    # Source-aware dispatch keeps 180_000 as-is (don't × 1000) so it
+    # matches the file's 180_000 ms. Pre-fix Deezer was wrongly
+    # classified as a seconds source → 180_000 × 1000 = 180,000,000ms,
+    # gate rejected every Deezer-primary user's matches.
     assert len(result['matches']) == 1
 
 
 def test_track_duration_source_aware_dispatch():
-    """`_track_duration_ms` must route via the `source` field — not
-    fall back to magnitude heuristic — so providers with edge-case
-    durations (sub-30s real tracks, intros, interludes) don't trigger
-    false unit conversion."""
+    """`_track_duration_ms` routes via the `source` field — values
+    from sources where the CLIENT has already normalised to ms are
+    taken as-is."""
     from core.imports.album_matching import _track_duration_ms
 
-    # Spotify-style — explicit ms field, treat as-is
     spotify_track = {'duration_ms': 180_000, 'source': 'spotify'}
     assert _track_duration_ms(spotify_track) == 180_000
 
-    # Deezer-style — `duration` field in seconds, convert
-    deezer_track = {'duration': 180, 'source': 'deezer'}
+    # Deezer — client converts seconds → ms before returning, so
+    # downstream matcher gets ms. As-is.
+    deezer_track = {'duration_ms': 180_000, 'source': 'deezer'}
     assert _track_duration_ms(deezer_track) == 180_000
 
-    # iTunes — duration_ms (their internal field is `trackTimeMillis`
-    # but `_build_album_track_entry` normalises to `duration_ms`)
     itunes_track = {'duration_ms': 200_000, 'source': 'itunes'}
     assert _track_duration_ms(itunes_track) == 200_000
 
-    # Source via _source alias also works (normalize_import_context legacy)
     legacy_source = {'duration_ms': 150_000, '_source': 'spotify'}
     assert _track_duration_ms(legacy_source) == 150_000
 
 
+def test_raw_deezer_seconds_falls_back_to_magnitude_heuristic():
+    """Edge case: if a raw Deezer item somehow reaches the matcher
+    WITHOUT going through the client's conversion (no `source` field,
+    raw `duration` key in seconds), the magnitude heuristic catches
+    it — value < 30000 is implausibly short for a real track and
+    gets × 1000."""
+    from core.imports.album_matching import _track_duration_ms
+
+    raw_deezer_no_source = {'duration': 180}  # no source field
+    assert _track_duration_ms(raw_deezer_no_source) == 180_000
+
+
 def test_track_duration_short_real_track_not_misconverted_with_known_source():
     """An actual sub-30s track on Spotify (intro/interlude/skit) —
     duration_ms is genuinely small. Source-aware dispatch must take