diff --git a/app/main.py b/app/main.py index 283de01..e566bab 100644 --- a/app/main.py +++ b/app/main.py @@ -645,6 +645,7 @@ async def subscribe(request): subtitle_mode=o['subtitle_mode'], ytdl_options_presets=o['ytdl_options_presets'], ytdl_options_overrides=o['ytdl_options_overrides'], + title_regex=post.get('title_regex'), ) return web.Response(text=serializer.encode(result)) @@ -660,7 +661,11 @@ async def subscriptions_update(request): sub_id = post.get('id') if not sub_id: raise web.HTTPBadRequest(reason='missing subscription id') - changes = {k: v for k, v in post.items() if k != 'id' and k in ('enabled', 'check_interval_minutes', 'name')} + changes = { + k: v + for k, v in post.items() + if k != 'id' and k in ('enabled', 'check_interval_minutes', 'name', 'title_regex') + } if not changes: raise web.HTTPBadRequest(reason='no valid fields to update') log.info("Subscription update requested for %s: %s", sub_id, sorted(changes.keys())) diff --git a/app/subscriptions.py b/app/subscriptions.py index 08d76c6..170859c 100644 --- a/app/subscriptions.py +++ b/app/subscriptions.py @@ -6,6 +6,7 @@ import asyncio import copy import logging import os +import re import time import types import uuid @@ -147,6 +148,7 @@ class SubscriptionInfo: subtitle_mode: str = "prefer_manual" ytdl_options_presets: list[str] = field(default_factory=list) ytdl_options_overrides: dict[str, Any] = field(default_factory=dict) + title_regex: str = "" last_checked: Optional[float] = None seen_ids: list[str] = field(default_factory=list) error: Optional[str] = None @@ -167,6 +169,7 @@ class SubscriptionInfo: "format": self.format, "quality": self.quality, "folder": self.folder, + "title_regex": self.title_regex, "last_checked": self.last_checked, "seen_count": len(self.seen_ids), "error": self.error, @@ -194,6 +197,7 @@ def _subscription_to_record(sub: SubscriptionInfo) -> dict[str, Any]: "subtitle_mode": sub.subtitle_mode, "ytdl_options_presets": list(sub.ytdl_options_presets), "ytdl_options_overrides": sub.ytdl_options_overrides, + "title_regex": sub.title_regex, "last_checked": sub.last_checked, "seen_ids": list(sub.seen_ids), "error": sub.error, @@ -231,6 +235,22 @@ def _subscription_from_record(record: Any) -> Optional[SubscriptionInfo]: return None +def _normalize_title_regex_value(value: Any) -> str: + if value is None: + return "" + if isinstance(value, str): + return value.strip() + return str(value).strip() + + +def validate_title_regex(value: Any) -> str: + """Return stored title regex string; non-empty values must compile (re.error on failure).""" + s = _normalize_title_regex_value(value) + if s: + re.compile(s) + return s + + def _coerce_bool(value: Any) -> bool: """Accept JSON booleans and common string forms used by API clients.""" if isinstance(value, bool): @@ -448,10 +468,15 @@ class SubscriptionManager: subtitle_mode: str, ytdl_options_presets: Optional[list[str]] = None, ytdl_options_overrides: Optional[dict[str, Any]] = None, + title_regex: Any = None, ) -> dict: url = self._normalize_url(url) if not url: return {"status": "error", "msg": "Missing URL"} + try: + title_regex_stored = validate_title_regex(title_regex) + except re.error as exc: + return {"status": "error", "msg": f"Invalid title_regex: {exc}"} async with self._lock: if url in self._url_index or url in self._pending_urls: @@ -509,6 +534,7 @@ class SubscriptionManager: subtitle_mode=subtitle_mode, ytdl_options_presets=list(ytdl_options_presets or []), ytdl_options_overrides=dict(ytdl_options_overrides or {}), + title_regex=title_regex_stored, last_checked=time.time(), seen_ids=list(dict.fromkeys(all_ids)), error=None, @@ -555,6 +581,13 @@ class SubscriptionManager: return {"status": "ok"} async def update_subscription(self, sub_id: str, changes: dict) -> dict: + validated_tr: Optional[str] = None + if "title_regex" in changes: + try: + validated_tr = validate_title_regex(changes["title_regex"]) + except re.error as exc: + return {"status": "error", "msg": f"Invalid title_regex: {exc}"} + async with self._lock: sub = self._subs.get(sub_id) if not sub: @@ -568,6 +601,8 @@ class SubscriptionManager: sub.check_interval_minutes = max(1, int(changes["check_interval_minutes"])) if "name" in changes and changes["name"]: sub.name = str(changes["name"]) + if validated_tr is not None: + sub.title_regex = validated_tr try: self._save_locked() @@ -659,9 +694,9 @@ class SubscriptionManager: dl_submode = cur.subtitle_mode dl_ytdl_presets = list(cur.ytdl_options_presets) dl_ytdl_overrides = dict(cur.ytdl_options_overrides) + dl_title_regex = cur.title_regex or "" new_entries: list[dict] = [] - new_ids: list[str] = [] for ent in entries: eid = _entry_id(ent) if not eid: @@ -669,10 +704,31 @@ class SubscriptionManager: if eid in seen and ent.get("live_status") != "is_live": continue new_entries.append(ent) - new_ids.append(eid) + + pattern_re: Optional[re.Pattern[str]] = None + if dl_title_regex: + try: + pattern_re = re.compile(dl_title_regex) + except re.error: + log.warning( + "Invalid stored title_regex on subscription %s, ignoring filter", + sub.name, + ) + + queue_entries: list[dict] = [] + filtered_ids: list[str] = [] + for ent in new_entries: + eid = _entry_id(ent) + if pattern_re is not None: + title = str(ent.get("title") or "") + if not pattern_re.search(title): + if eid: + filtered_ids.append(eid) + continue + queue_entries.append(ent) queued_ids, queue_errors = await self._queue_subscription_entries( - new_entries, + queue_entries, download_type=dl_type, codec=dl_codec, format=dl_format, @@ -689,14 +745,15 @@ class SubscriptionManager: ytdl_options_overrides=dl_ytdl_overrides, ) log.info( - "Subscription check finished for %s: %d new, %d queued, %d failed", + "Subscription check finished for %s: %d new, %d filtered, %d queued, %d failed", sub.name, len(new_entries), + len(filtered_ids), len(queued_ids), len(queue_errors), ) - merged = list(dict.fromkeys(queued_ids + seen_ids_snapshot)) + merged = list(dict.fromkeys(queued_ids + filtered_ids + seen_ids_snapshot)) max_seen = int(getattr(self.config, "SUBSCRIPTION_MAX_SEEN_IDS", 50000)) if len(merged) > max_seen: merged = merged[:max_seen] diff --git a/app/tests/test_subscriptions.py b/app/tests/test_subscriptions.py index e3f6229..a5bde09 100644 --- a/app/tests/test_subscriptions.py +++ b/app/tests/test_subscriptions.py @@ -453,6 +453,283 @@ class SubscriptionPersistenceTests(unittest.IsolatedAsyncioTestCase): with self.assertRaises(ValueError): await mgr.update_subscription(sub_id, {"enabled": "maybe"}) + async def test_add_subscription_rejects_invalid_title_regex(self): + with tempfile.TemporaryDirectory() as tmp: + mgr = SubscriptionManager(_Config(tmp), _Queue(), _Notifier()) + with patch( + "subscriptions.extract_flat_playlist", + return_value=( + {"_type": "channel", "title": "Channel"}, + [{"id": "v1", "title": "One", "webpage_url": "https://example.com/v1"}], + ), + ): + result = await mgr.add_subscription( + "https://example.com/channel", + check_interval_minutes=60, + download_type="video", + codec="auto", + format="any", + quality="best", + folder="", + custom_name_prefix="", + auto_start=True, + playlist_item_limit=0, + split_by_chapters=False, + chapter_template="", + subtitle_language="en", + subtitle_mode="prefer_manual", + title_regex="[", + ) + self.assertEqual(result["status"], "error") + self.assertIn("title_regex", result["msg"].lower()) + self.assertEqual(mgr.list_all(), []) + + async def test_add_subscription_stores_and_exposes_title_regex(self): + with tempfile.TemporaryDirectory() as tmp: + queue = _Queue() + mgr = SubscriptionManager(_Config(tmp), queue, _Notifier()) + with patch( + "subscriptions.extract_flat_playlist", + return_value=( + {"_type": "channel", "title": "Channel"}, + [{"id": "v1", "title": "One", "webpage_url": "https://example.com/v1"}], + ), + ): + result = await mgr.add_subscription( + "https://example.com/channel", + check_interval_minutes=60, + download_type="video", + codec="auto", + format="any", + quality="best", + folder="", + custom_name_prefix="", + auto_start=True, + playlist_item_limit=0, + split_by_chapters=False, + chapter_template="", + subtitle_language="en", + subtitle_mode="prefer_manual", + title_regex="EPISODE", + ) + self.assertEqual(result["status"], "ok") + self.assertEqual(result["subscription"]["title_regex"], "EPISODE") + self.assertEqual(mgr.list_all()[0].title_regex, "EPISODE") + + async def test_check_now_title_regex_queues_only_matches_and_marks_unmatched_seen(self): + with tempfile.TemporaryDirectory() as tmp: + queue = _Queue() + mgr = SubscriptionManager(_Config(tmp), queue, _Notifier()) + with patch( + "subscriptions.extract_flat_playlist", + side_effect=[ + ( + {"_type": "channel", "title": "Channel"}, + [{"id": "v1", "title": "Old", "webpage_url": "https://example.com/v1"}], + ), + ( + {"_type": "channel", "title": "Channel"}, + [ + { + "id": "v2", + "title": "Minecraft | EPISODE 1", + "webpage_url": "https://example.com/v2", + }, + { + "id": "v3", + "title": "Unrelated IRL", + "webpage_url": "https://example.com/v3", + }, + { + "id": "v1", + "title": "Old", + "webpage_url": "https://example.com/v1", + }, + ], + ), + ], + ): + result = await mgr.add_subscription( + "https://example.com/channel", + check_interval_minutes=60, + download_type="video", + codec="auto", + format="any", + quality="best", + folder="", + custom_name_prefix="", + auto_start=True, + playlist_item_limit=0, + split_by_chapters=False, + chapter_template="", + subtitle_language="en", + subtitle_mode="prefer_manual", + title_regex="EPISODE", + ) + await mgr.check_now([result["subscription"]["id"]]) + self.assertEqual([e["webpage_url"] for e, _, _ in queue.entries], ["https://example.com/v2"]) + sub = mgr.list_all()[0] + self.assertEqual(sub.seen_ids[:3], ["v2", "v3", "v1"]) + + async def test_check_now_title_regex_queue_failure_keeps_matched_id_unseen(self): + with tempfile.TemporaryDirectory() as tmp: + queue = _Queue() + mgr = SubscriptionManager(_Config(tmp), queue, _Notifier()) + with patch( + "subscriptions.extract_flat_playlist", + side_effect=[ + ( + {"_type": "channel", "title": "Channel"}, + [{"id": "v1", "title": "Old", "webpage_url": "https://example.com/v1"}], + ), + ( + {"_type": "channel", "title": "Channel"}, + [ + { + "id": "v2", + "title": "Show | EPISODE 1", + "webpage_url": "https://example.com/v2", + }, + { + "id": "v3", + "title": "Other", + "webpage_url": "https://example.com/v3", + }, + ], + ), + ], + ): + result = await mgr.add_subscription( + "https://example.com/channel", + check_interval_minutes=60, + download_type="video", + codec="auto", + format="any", + quality="best", + folder="", + custom_name_prefix="", + auto_start=True, + playlist_item_limit=0, + split_by_chapters=False, + chapter_template="", + subtitle_language="en", + subtitle_mode="prefer_manual", + title_regex="EPISODE", + ) + queue.fail = True + await mgr.check_now([result["subscription"]["id"]]) + sub = mgr.list_all()[0] + self.assertEqual(sub.error, "queue failed") + self.assertEqual(set(sub.seen_ids), {"v1", "v3"}) + self.assertNotIn("v2", sub.seen_ids) + + async def test_update_subscription_rejects_invalid_title_regex(self): + with tempfile.TemporaryDirectory() as tmp: + queue = _Queue() + mgr = SubscriptionManager(_Config(tmp), queue, _Notifier()) + with patch( + "subscriptions.extract_flat_playlist", + return_value=( + {"_type": "channel", "title": "Channel"}, + [{"id": "v1", "title": "One", "webpage_url": "https://example.com/v1"}], + ), + ): + result = await mgr.add_subscription( + "https://example.com/channel", + check_interval_minutes=60, + download_type="video", + codec="auto", + format="any", + quality="best", + folder="", + custom_name_prefix="", + auto_start=True, + playlist_item_limit=0, + split_by_chapters=False, + chapter_template="", + subtitle_language="en", + subtitle_mode="prefer_manual", + ) + sub_id = result["subscription"]["id"] + upd = await mgr.update_subscription(sub_id, {"title_regex": "("}) + self.assertEqual(upd["status"], "error") + self.assertEqual(mgr.list_all()[0].title_regex, "") + + async def test_update_subscription_persists_valid_title_regex(self): + with tempfile.TemporaryDirectory() as tmp: + queue = _Queue() + mgr = SubscriptionManager(_Config(tmp), queue, _Notifier()) + with patch( + "subscriptions.extract_flat_playlist", + return_value=( + {"_type": "channel", "title": "Channel"}, + [{"id": "v1", "title": "One", "webpage_url": "https://example.com/v1"}], + ), + ): + result = await mgr.add_subscription( + "https://example.com/channel", + check_interval_minutes=60, + download_type="video", + codec="auto", + format="any", + quality="best", + folder="", + custom_name_prefix="", + auto_start=True, + playlist_item_limit=0, + split_by_chapters=False, + chapter_template="", + subtitle_language="en", + subtitle_mode="prefer_manual", + ) + sub_id = result["subscription"]["id"] + upd = await mgr.update_subscription(sub_id, {"title_regex": "foo|bar"}) + self.assertEqual(upd["status"], "ok") + self.assertEqual(upd["subscription"]["title_regex"], "foo|bar") + self.assertEqual(mgr.list_all()[0].title_regex, "foo|bar") + + def test_persistence_includes_title_regex(self): + with tempfile.TemporaryDirectory() as tmp: + json_path = os.path.join(tmp, "subscriptions.json") + with open(json_path, "w", encoding="utf-8") as f: + json.dump( + { + "schema_version": 2, + "kind": "subscriptions", + "items": [ + { + "id": "sub-1", + "name": "Channel", + "url": "https://example.com/channel", + "enabled": True, + "check_interval_minutes": 60, + "download_type": "video", + "codec": "auto", + "format": "any", + "quality": "best", + "folder": "", + "custom_name_prefix": "", + "auto_start": True, + "playlist_item_limit": 0, + "split_by_chapters": False, + "chapter_template": "", + "subtitle_language": "en", + "subtitle_mode": "prefer_manual", + "ytdl_options_presets": [], + "ytdl_options_overrides": {}, + "title_regex": "EPISODE", + "last_checked": None, + "seen_ids": [], + "error": None, + } + ], + }, + f, + ) + mgr = SubscriptionManager(_Config(tmp), _Queue(), _Notifier()) + self.assertEqual(mgr.list_all()[0].title_regex, "EPISODE") + + class ExtractFlatPlaylistTests(unittest.TestCase): def test_descends_one_level_when_root_entries_are_nested_collections(self): responses = iter( diff --git a/ui/src/app/app.html b/ui/src/app/app.html index 9efb6a9..cf4986c 100644 --- a/ui/src/app/app.html +++ b/ui/src/app/app.html @@ -475,6 +475,18 @@ ngbTooltip="How often to poll subscriptions for new videos"> +