diff --git a/core/auto_import_worker.py b/core/auto_import_worker.py index 42190f9e..412b18ce 100644 --- a/core/auto_import_worker.py +++ b/core/auto_import_worker.py @@ -182,7 +182,13 @@ class AutoImportWorker: # State self._folder_snapshots: Dict[str, float] = {} # path -> mtime_sum - self._processing_paths: set = set() # Paths currently being processed (skip on rescan) + # Candidates currently being processed (skip on rescan). Keyed + # on folder_hash, NOT path — multiple candidates can share a + # path (each loose-file group at staging root has the same + # parent directory but a distinct hash from its own audio + # files). Path-keyed dedup would treat siblings as duplicates + # and silently skip all but the first. + self._processing_hashes: set = set() self._current_folder = '' self._current_status = 'idle' # 'idle' | 'scanning' | 'processing' # Live per-track progress so the UI can show "Processing Speak Now @@ -296,8 +302,12 @@ class AutoImportWorker: self._current_folder = candidate.name - # Skip folders currently being processed by a previous scan cycle - if candidate.path in self._processing_paths: + # Skip candidates currently being processed by a previous + # scan cycle. Keyed on folder_hash because multiple + # candidates can share a path (loose-file groups at the + # same directory level each get their own candidate but + # share the parent directory). + if candidate.folder_hash in self._processing_hashes: logger.debug(f"[Auto-Import] Skipping {candidate.name} — still processing from previous cycle") continue @@ -312,8 +322,8 @@ class AutoImportWorker: self._stats['scanned'] += 1 logger.info(f"[Auto-Import] Processing folder: {candidate.name} ({len(candidate.audio_files)} files)") - # Mark as in-progress so next scan cycle skips this folder - self._processing_paths.add(candidate.path) + # Mark as in-progress so next scan cycle skips this candidate + self._processing_hashes.add(candidate.folder_hash) try: # Phase 3: Identify identification = self._identify_folder(candidate) @@ -403,7 +413,7 @@ class AutoImportWorker: self._record_result(candidate, 'failed', 0.0, error_message=str(e)) self._stats['failed'] += 1 finally: - self._processing_paths.discard(candidate.path) + self._processing_hashes.discard(candidate.folder_hash) # Defensive: if the inner code path didn't reset live # progress (early raise, etc.), clear it so the UI # doesn't show stale "processing track 3/14" forever. @@ -636,14 +646,20 @@ class AutoImportWorker: )) def _is_folder_stable(self, candidate: FolderCandidate) -> bool: - """Check if folder contents have stopped changing.""" + """Check if the candidate's audio files have stopped changing. + + Keyed on folder_hash, NOT path — multiple candidates can share + a path (loose-file groups at the same directory level) so + path-keyed snapshots would overwrite each other's mtimes and + make stability checks unreliable for sibling candidates. + """ try: current_mtime = sum(os.path.getmtime(f) for f in candidate.audio_files if os.path.exists(f)) except OSError: return False - prev = self._folder_snapshots.get(candidate.path) - self._folder_snapshots[candidate.path] = current_mtime + prev = self._folder_snapshots.get(candidate.folder_hash) + self._folder_snapshots[candidate.folder_hash] = current_mtime if prev is None: return False # First scan — wait for next cycle to confirm stability diff --git a/tests/imports/test_auto_import_scanner_grouping.py b/tests/imports/test_auto_import_scanner_grouping.py index 1d13c831..2267525c 100644 --- a/tests/imports/test_auto_import_scanner_grouping.py +++ b/tests/imports/test_auto_import_scanner_grouping.py @@ -242,6 +242,39 @@ def test_disc_folder_with_no_matching_loose_group_becomes_standalone(worker, tmp # --------------------------------------------------------------------------- +def test_sibling_candidates_have_unique_folder_hashes(worker, tmp_path): + """Pin the dedup-collision bug: when loose files at one level + produce multiple candidates (one per album group), each must have + a unique folder_hash. The runtime's `_processing_hashes` set + the + DB's `auto_import_history.folder_hash` index both key on this — + if siblings collide, only the first one gets processed and the + rest silently skip as "still processing from previous cycle." + + Reported on 2026-05-09 — multi-album loose root produced 3 + candidates with the same path. Path-keyed dedup treated them as + duplicates and skipped two of three. Fixed by switching dedup to + folder_hash + ensuring each candidate's hash is unique.""" + for i in range(1, 4): + _write_flac( + str(tmp_path / f'A_{i}.flac'), + album='Album A', track=i, title=f'A {i}', + ) + for i in range(1, 4): + _write_flac( + str(tmp_path / f'B_{i}.flac'), + album='Album B', track=i, title=f'B {i}', + ) + + candidates = [] + worker._scan_directory(str(tmp_path), candidates, staging_root=str(tmp_path)) + + hashes = [c.folder_hash for c in candidates] + assert len(hashes) == len(set(hashes)), ( + f"Sibling candidates collided on folder_hash: {hashes}. " + f"Path-keyed dedup would silently skip duplicates." + ) + + def test_disc_only_directory_still_works(worker, tmp_path): """No loose files, only Disc 1/Disc 2 subfolders → treat parent directory as the album candidate. Pre-existing behavior preserved."""