You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
SoulSync/core/library/duplicate_keep.py

58 lines
2.2 KiB

"""Choosing which copy of a duplicate track to keep.
The keeper is the highest-quality copy, and **format/lossless is the primary
criterion**: a lossless FLAC must beat a lossy MP3 regardless of the recorded
bitrate number — a FLAC whose bitrate the library scan never populated (a
common gap) still has to win over a 282 kbps MP3. Only *within* the same format
tier do bitrate, then duration, then track number break the tie.
This was lifted out of the repair worker so the deletion path and the UI's
"Keep Best" recommendation rank identically (previously both ranked by bitrate
first, which kept the MP3 when the FLAC's bitrate was missing), and so the
ranking is unit-testable in isolation.
"""
from __future__ import annotations
import os
from typing import Dict, List, Optional, Tuple
# Format quality rank by file extension (higher = better). Mirrors
# ``auto_import_worker._quality_rank`` so lossless always outranks lossy.
_FORMAT_RANK = {
".flac": 10, ".wav": 9, ".aiff": 9, ".aif": 9, ".ape": 8,
".m4a": 7, ".ogg": 6, ".opus": 6, ".mp3": 5, ".aac": 5, ".wma": 3,
}
def format_rank_for_path(file_path: Optional[str]) -> int:
"""Quality rank for a file by extension (higher = better, unknown = 1)."""
if not file_path:
return 1
ext = os.path.splitext(str(file_path))[1].lower()
return _FORMAT_RANK.get(ext, 1)
def duplicate_keep_sort_key(track: Dict) -> Tuple[int, int, float, int]:
"""Sort key for picking the keeper — the higher tuple wins.
Order of precedence: format/lossless tier, then bitrate, then duration,
then track number (a real number over a placeholder ``01``). Putting format
first is the whole point — it makes FLAC beat MP3 even when the FLAC's
bitrate is 0/missing in the DB.
"""
return (
format_rank_for_path(track.get("file_path")),
track.get("bitrate") or 0,
track.get("duration") or 0,
track.get("track_number") or 0,
)
def pick_duplicate_to_keep(tracks: List[Dict]) -> Optional[Dict]:
"""Return the copy to keep from a duplicate group (highest sort key), or
``None`` if the group is empty."""
if not tracks:
return None
return max(tracks, key=duplicate_keep_sort_key)