review fixes

This commit is contained in:
Alex Shnitman
2026-06-16 21:35:07 +03:00
parent 5429200fba
commit 5aa7d033e2
13 changed files with 441 additions and 101 deletions
+32 -5
View File
@@ -130,6 +130,10 @@ class Config:
)
sys.exit(1)
self._validate_int('MAX_CONCURRENT_DOWNLOADS', minimum=1)
self._validate_int('PORT', minimum=1, maximum=65535)
self._validate_int('CLEAR_COMPLETED_AFTER', minimum=0)
self._runtime_overrides = {}
success,_ = self.load_ytdl_options()
@@ -139,6 +143,20 @@ class Config:
if not success:
sys.exit(1)
def _validate_int(self, key, *, minimum=None, maximum=None):
raw = getattr(self, key)
try:
value = int(raw)
except (TypeError, ValueError):
log.error('Environment variable "%s" must be an integer, got "%s"', key, raw)
sys.exit(1)
if minimum is not None and value < minimum:
log.error('Environment variable "%s" must be >= %d, got "%s"', key, minimum, raw)
sys.exit(1)
if maximum is not None and value > maximum:
log.error('Environment variable "%s" must be <= %d, got "%s"', key, maximum, raw)
sys.exit(1)
def set_runtime_override(self, key, value):
self._runtime_overrides[key] = value
self.YTDL_OPTIONS[key] = value
@@ -241,7 +259,13 @@ logging.getLogger().setLevel(parseLogLevel(str(config.LOGLEVEL)) or logging.INFO
class ObjectSerializer(json.JSONEncoder):
def default(self, obj):
# First try to use __dict__ for custom objects
# Prefer an explicit client-facing view when the object provides one
# (e.g. DownloadInfo / SubscriptionInfo) so server-only or bulky fields
# are never broadcast to browser clients.
to_public = getattr(obj, 'to_public_dict', None)
if callable(to_public):
return to_public()
# Fall back to __dict__ for other custom objects
if hasattr(obj, '__dict__'):
return obj.__dict__
# Convert iterables (generators, dict_items, etc.) to lists
@@ -827,10 +851,7 @@ async def cancel_add(request):
@routes.post(config.URL_PREFIX + 'subscribe')
async def subscribe(request):
post = await _read_json_request(request)
try:
o = parse_download_options(post)
except web.HTTPBadRequest:
raise
o = parse_download_options(post)
cic = post.get('check_interval_minutes')
if cic is None:
cic = config.SUBSCRIPTION_DEFAULT_CHECK_INTERVAL
@@ -964,6 +985,12 @@ async def upload_cookies(request):
tmp_cookie_path = f"{COOKIES_PATH}.tmp"
with open(tmp_cookie_path, 'wb') as f:
f.write(content)
# Cookies are sensitive auth material; restrict to owner read/write only
# (the container's default umask would otherwise leave them group/world readable).
try:
os.chmod(tmp_cookie_path, 0o600)
except OSError as exc:
log.warning(f'Could not restrict permissions on cookies file: {exc}')
os.replace(tmp_cookie_path, COOKIES_PATH)
config.set_runtime_override('cookiefile', COOKIES_PATH)
log.info(f'Cookies file uploaded ({size} bytes)')
+17
View File
@@ -312,6 +312,7 @@ class SubscriptionManager:
self._subs: dict[str, SubscriptionInfo] = {}
self._url_index: dict[str, str] = {} # normalized url -> id
self._pending_urls: set[str] = set()
self._checks_in_flight: set[str] = set() # subscription ids being checked right now
self._lock = asyncio.Lock()
self._loop_task: Optional[asyncio.Task] = None
self._load_all()
@@ -677,6 +678,22 @@ class SubscriptionManager:
return {"status": "ok"}
async def _check_one_unlocked(self, sub: SubscriptionInfo) -> None:
sid = sub.id
# Prevent overlapping checks for the same subscription (e.g. the periodic
# loop and a manual check-now firing together), which could double-queue
# entries and drop seen_ids via a read-modify-write race.
async with self._lock:
if sid in self._checks_in_flight:
log.info("Subscription check already in progress for %s, skipping", sub.name)
return
self._checks_in_flight.add(sid)
try:
await self._check_one_inner(sub)
finally:
async with self._lock:
self._checks_in_flight.discard(sid)
async def _check_one_inner(self, sub: SubscriptionInfo) -> None:
sid = sub.id
scan = int(getattr(self.config, "SUBSCRIPTION_SCAN_PLAYLIST_END", 50))
log.info("Checking subscription: %s", sub.name)
+23
View File
@@ -123,6 +123,29 @@ class ConfigTests(unittest.TestCase):
with self.assertRaises(SystemExit):
Config()
def test_invalid_max_concurrent_downloads_exits(self):
for bad in ("0", "-1", "abc"):
with patch.dict(os.environ, _base_env(MAX_CONCURRENT_DOWNLOADS=bad), clear=False):
with self.assertRaises(SystemExit):
Config()
def test_invalid_port_exits(self):
for bad in ("0", "70000", "notaport"):
with patch.dict(os.environ, _base_env(PORT=bad), clear=False):
with self.assertRaises(SystemExit):
Config()
def test_invalid_clear_completed_after_exits(self):
for bad in ("-5", "soon"):
with patch.dict(os.environ, _base_env(CLEAR_COMPLETED_AFTER=bad), clear=False):
with self.assertRaises(SystemExit):
Config()
def test_clear_completed_after_zero_allowed(self):
with patch.dict(os.environ, _base_env(CLEAR_COMPLETED_AFTER="0"), clear=False):
c = Config()
self.assertEqual(c.CLEAR_COMPLETED_AFTER, "0")
def test_runtime_override_roundtrip(self):
with patch.dict(os.environ, _base_env(), clear=False):
c = Config()
+60
View File
@@ -627,3 +627,63 @@ def test_seconds_until_next_probe_none_when_empty(dq_env):
notifier = AsyncMock()
dq = DownloadQueue(dq_env, notifier)
assert dq._seconds_until_next_probe() is None
def test_calc_download_path_allows_subfolder(dq_env):
notifier = AsyncMock()
dq = DownloadQueue(dq_env, notifier)
path, err = dq._DownloadQueue__calc_download_path("video", "sub/dir")
assert err is None
assert os.path.realpath(path) == os.path.join(os.path.realpath(dq_env.DOWNLOAD_DIR), "sub", "dir")
def test_calc_download_path_rejects_sibling_prefix_escape(dq_env):
"""A folder resolving to a sibling sharing a name prefix must be rejected.
Regression test: ``startswith`` would have accepted ``../downloads-secret``
when the base directory is ``.../downloads``.
"""
notifier = AsyncMock()
base = os.path.realpath(dq_env.DOWNLOAD_DIR)
sibling = base + "-secret"
os.makedirs(sibling, exist_ok=True)
dq = DownloadQueue(dq_env, notifier)
escape_folder = os.path.join("..", os.path.basename(sibling), "x")
path, err = dq._DownloadQueue__calc_download_path("video", escape_folder)
assert path is None
assert err is not None and err["status"] == "error"
def test_calc_download_path_rejects_parent_escape(dq_env):
notifier = AsyncMock()
dq = DownloadQueue(dq_env, notifier)
path, err = dq._DownloadQueue__calc_download_path("video", "../../etc")
assert path is None
assert err is not None and err["status"] == "error"
def test_download_info_to_public_dict_excludes_server_only_fields():
info = DownloadInfo(
id="vid1",
title="Test Video",
url="https://example.com/watch?v=1",
quality="best",
download_type="video",
codec="auto",
format="any",
folder="",
custom_name_prefix="",
error=None,
entry={"id": "vid1", "huge": "x" * 100000},
playlist_item_limit=0,
split_by_chapters=False,
chapter_template="",
)
info.subtitle_files = [{"filename": "a.srt", "size": 10}]
public = info.to_public_dict()
assert "entry" not in public
assert "subtitle_files" not in public
# Client-facing fields are still present.
assert public["url"] == "https://example.com/watch?v=1"
assert public["title"] == "Test Video"
assert public["status"] == "pending"
+52 -9
View File
@@ -232,6 +232,20 @@ class DownloadInfo:
self.live_release_timestamp = live_release_timestamp
self.subtitle_files = []
# Fields that are useful server-side but must not be broadcast to browser
# clients: ``entry`` is the full yt-dlp info-dict (potentially large and
# re-sent on every progress tick) and ``subtitle_files`` is only used
# internally to derive the primary caption ``filename``.
_PUBLIC_EXCLUDED_FIELDS = ("entry", "subtitle_files")
def to_public_dict(self) -> dict:
"""Return the client-facing view, omitting server-only/bulky fields."""
return {
k: v
for k, v in self.__dict__.items()
if k not in self._PUBLIC_EXCLUDED_FIELDS
}
def __setstate__(self, state):
"""BACKWARD COMPATIBILITY: migrate old DownloadInfo from persistent queue files."""
self.__dict__.update(state)
@@ -584,7 +598,10 @@ class Download:
self.info.filename = rel_name
self.info.size = os.path.getsize(fileName) if os.path.exists(fileName) else None
if getattr(self.info, 'download_type', '') == 'thumbnail':
self.info.filename = re.sub(r'\.webm$', '.jpg', self.info.filename)
# The thumbnail convertor always emits a .jpg, but yt-dlp may
# report the pre-conversion media/thumbnail extension
# (.webm/.mp4/.png/.webp/...). Normalise to .jpg regardless.
self.info.filename = os.path.splitext(self.info.filename)[0] + '.jpg'
# Handle chapter files
log.debug(f"Update status for {self.info.title}: {status}")
@@ -647,8 +664,8 @@ class PersistentQueue:
def __init__(self, name, path):
self.identifier = name
pdir = os.path.dirname(path)
if not os.path.isdir(pdir):
os.mkdir(pdir)
if pdir and not os.path.isdir(pdir):
os.makedirs(pdir, exist_ok=True)
self.legacy_path = path
self.path = f"{path}.json"
self.store = AtomicJsonStore(self.path, kind=f"persistent_queue:{name}")
@@ -1026,7 +1043,16 @@ class DownloadQueue:
return None, {'status': 'error', 'msg': 'A folder for the download was specified but CUSTOM_DIRS is not true in the configuration.'}
dldirectory = os.path.realpath(os.path.join(base_directory, folder))
real_base_directory = os.path.realpath(base_directory)
if not dldirectory.startswith(real_base_directory):
# Use commonpath rather than startswith so that a sibling directory
# sharing a name prefix (e.g. base "/downloads" vs "/downloads-secret")
# cannot be reached via "../downloads-secret".
try:
inside_base = os.path.commonpath([real_base_directory, dldirectory]) == real_base_directory
except ValueError:
# Raised when paths are on different drives (Windows) or mix
# absolute/relative; treat as outside the base directory.
inside_base = False
if not inside_base:
return None, {'status': 'error', 'msg': f'Folder "{folder}" must resolve inside the base download directory "{real_base_directory}"'}
if not os.path.isdir(dldirectory):
if not self.config.CREATE_CUSTOM_DIRS:
@@ -1387,11 +1413,28 @@ class DownloadQueue:
continue
if self.config.DELETE_FILE_ON_TRASHCAN:
dl = self.done.get(id)
try:
dldirectory, _ = self.__calc_download_path(dl.info.download_type, dl.info.folder)
os.remove(os.path.join(dldirectory, dl.info.filename))
except Exception as e:
log.warning(f'deleting file for download {id} failed with error message {e!r}')
dldirectory, calc_error = self.__calc_download_path(dl.info.download_type, dl.info.folder)
if calc_error is not None or not dldirectory:
log.warning(f'deleting files for download {id} skipped: could not resolve download directory')
else:
# Remove the primary output plus any per-chapter / per-subtitle
# outputs. Each filename is relative to the download directory.
rel_names = []
if getattr(dl.info, 'filename', None):
rel_names.append(dl.info.filename)
for extra in (getattr(dl.info, 'chapter_files', None) or []):
if isinstance(extra, dict) and extra.get('filename'):
rel_names.append(extra['filename'])
for extra in (getattr(dl.info, 'subtitle_files', None) or []):
if isinstance(extra, dict) and extra.get('filename'):
rel_names.append(extra['filename'])
for rel_name in rel_names:
try:
os.remove(os.path.join(dldirectory, rel_name))
except FileNotFoundError:
pass
except OSError as e:
log.warning(f'deleting file "{rel_name}" for download {id} failed with error message {e!r}')
self.done.delete(id)
await self.notifier.cleared(id)
return {'status': 'ok'}