From 91ee8312bf44e6a557d0a623837e016657f0fe4a Mon Sep 17 00:00:00 2001 From: Alex Shnitman Date: Sun, 26 Apr 2026 22:51:48 +0300 Subject: [PATCH] title filter for subscriptions (closes #968) --- app/main.py | 7 +- app/subscriptions.py | 67 ++++- app/tests/test_subscriptions.py | 277 +++++++++++++++++++ ui/src/app/app.html | 40 +++ ui/src/app/app.spec.ts | 33 ++- ui/src/app/app.ts | 46 +++ ui/src/app/interfaces/subscription.ts | 1 + ui/src/app/services/subscriptions.service.ts | 7 +- 8 files changed, 470 insertions(+), 8 deletions(-) diff --git a/app/main.py b/app/main.py index 283de01..e566bab 100644 --- a/app/main.py +++ b/app/main.py @@ -645,6 +645,7 @@ async def subscribe(request): subtitle_mode=o['subtitle_mode'], ytdl_options_presets=o['ytdl_options_presets'], ytdl_options_overrides=o['ytdl_options_overrides'], + title_regex=post.get('title_regex'), ) return web.Response(text=serializer.encode(result)) @@ -660,7 +661,11 @@ async def subscriptions_update(request): sub_id = post.get('id') if not sub_id: raise web.HTTPBadRequest(reason='missing subscription id') - changes = {k: v for k, v in post.items() if k != 'id' and k in ('enabled', 'check_interval_minutes', 'name')} + changes = { + k: v + for k, v in post.items() + if k != 'id' and k in ('enabled', 'check_interval_minutes', 'name', 'title_regex') + } if not changes: raise web.HTTPBadRequest(reason='no valid fields to update') log.info("Subscription update requested for %s: %s", sub_id, sorted(changes.keys())) diff --git a/app/subscriptions.py b/app/subscriptions.py index 08d76c6..170859c 100644 --- a/app/subscriptions.py +++ b/app/subscriptions.py @@ -6,6 +6,7 @@ import asyncio import copy import logging import os +import re import time import types import uuid @@ -147,6 +148,7 @@ class SubscriptionInfo: subtitle_mode: str = "prefer_manual" ytdl_options_presets: list[str] = field(default_factory=list) ytdl_options_overrides: dict[str, Any] = field(default_factory=dict) + title_regex: str = "" last_checked: Optional[float] = None seen_ids: list[str] = field(default_factory=list) error: Optional[str] = None @@ -167,6 +169,7 @@ class SubscriptionInfo: "format": self.format, "quality": self.quality, "folder": self.folder, + "title_regex": self.title_regex, "last_checked": self.last_checked, "seen_count": len(self.seen_ids), "error": self.error, @@ -194,6 +197,7 @@ def _subscription_to_record(sub: SubscriptionInfo) -> dict[str, Any]: "subtitle_mode": sub.subtitle_mode, "ytdl_options_presets": list(sub.ytdl_options_presets), "ytdl_options_overrides": sub.ytdl_options_overrides, + "title_regex": sub.title_regex, "last_checked": sub.last_checked, "seen_ids": list(sub.seen_ids), "error": sub.error, @@ -231,6 +235,22 @@ def _subscription_from_record(record: Any) -> Optional[SubscriptionInfo]: return None +def _normalize_title_regex_value(value: Any) -> str: + if value is None: + return "" + if isinstance(value, str): + return value.strip() + return str(value).strip() + + +def validate_title_regex(value: Any) -> str: + """Return stored title regex string; non-empty values must compile (re.error on failure).""" + s = _normalize_title_regex_value(value) + if s: + re.compile(s) + return s + + def _coerce_bool(value: Any) -> bool: """Accept JSON booleans and common string forms used by API clients.""" if isinstance(value, bool): @@ -448,10 +468,15 @@ class SubscriptionManager: subtitle_mode: str, ytdl_options_presets: Optional[list[str]] = None, ytdl_options_overrides: Optional[dict[str, Any]] = None, + title_regex: Any = None, ) -> dict: url = self._normalize_url(url) if not url: return {"status": "error", "msg": "Missing URL"} + try: + title_regex_stored = validate_title_regex(title_regex) + except re.error as exc: + return {"status": "error", "msg": f"Invalid title_regex: {exc}"} async with self._lock: if url in self._url_index or url in self._pending_urls: @@ -509,6 +534,7 @@ class SubscriptionManager: subtitle_mode=subtitle_mode, ytdl_options_presets=list(ytdl_options_presets or []), ytdl_options_overrides=dict(ytdl_options_overrides or {}), + title_regex=title_regex_stored, last_checked=time.time(), seen_ids=list(dict.fromkeys(all_ids)), error=None, @@ -555,6 +581,13 @@ class SubscriptionManager: return {"status": "ok"} async def update_subscription(self, sub_id: str, changes: dict) -> dict: + validated_tr: Optional[str] = None + if "title_regex" in changes: + try: + validated_tr = validate_title_regex(changes["title_regex"]) + except re.error as exc: + return {"status": "error", "msg": f"Invalid title_regex: {exc}"} + async with self._lock: sub = self._subs.get(sub_id) if not sub: @@ -568,6 +601,8 @@ class SubscriptionManager: sub.check_interval_minutes = max(1, int(changes["check_interval_minutes"])) if "name" in changes and changes["name"]: sub.name = str(changes["name"]) + if validated_tr is not None: + sub.title_regex = validated_tr try: self._save_locked() @@ -659,9 +694,9 @@ class SubscriptionManager: dl_submode = cur.subtitle_mode dl_ytdl_presets = list(cur.ytdl_options_presets) dl_ytdl_overrides = dict(cur.ytdl_options_overrides) + dl_title_regex = cur.title_regex or "" new_entries: list[dict] = [] - new_ids: list[str] = [] for ent in entries: eid = _entry_id(ent) if not eid: @@ -669,10 +704,31 @@ class SubscriptionManager: if eid in seen and ent.get("live_status") != "is_live": continue new_entries.append(ent) - new_ids.append(eid) + + pattern_re: Optional[re.Pattern[str]] = None + if dl_title_regex: + try: + pattern_re = re.compile(dl_title_regex) + except re.error: + log.warning( + "Invalid stored title_regex on subscription %s, ignoring filter", + sub.name, + ) + + queue_entries: list[dict] = [] + filtered_ids: list[str] = [] + for ent in new_entries: + eid = _entry_id(ent) + if pattern_re is not None: + title = str(ent.get("title") or "") + if not pattern_re.search(title): + if eid: + filtered_ids.append(eid) + continue + queue_entries.append(ent) queued_ids, queue_errors = await self._queue_subscription_entries( - new_entries, + queue_entries, download_type=dl_type, codec=dl_codec, format=dl_format, @@ -689,14 +745,15 @@ class SubscriptionManager: ytdl_options_overrides=dl_ytdl_overrides, ) log.info( - "Subscription check finished for %s: %d new, %d queued, %d failed", + "Subscription check finished for %s: %d new, %d filtered, %d queued, %d failed", sub.name, len(new_entries), + len(filtered_ids), len(queued_ids), len(queue_errors), ) - merged = list(dict.fromkeys(queued_ids + seen_ids_snapshot)) + merged = list(dict.fromkeys(queued_ids + filtered_ids + seen_ids_snapshot)) max_seen = int(getattr(self.config, "SUBSCRIPTION_MAX_SEEN_IDS", 50000)) if len(merged) > max_seen: merged = merged[:max_seen] diff --git a/app/tests/test_subscriptions.py b/app/tests/test_subscriptions.py index e3f6229..a5bde09 100644 --- a/app/tests/test_subscriptions.py +++ b/app/tests/test_subscriptions.py @@ -453,6 +453,283 @@ class SubscriptionPersistenceTests(unittest.IsolatedAsyncioTestCase): with self.assertRaises(ValueError): await mgr.update_subscription(sub_id, {"enabled": "maybe"}) + async def test_add_subscription_rejects_invalid_title_regex(self): + with tempfile.TemporaryDirectory() as tmp: + mgr = SubscriptionManager(_Config(tmp), _Queue(), _Notifier()) + with patch( + "subscriptions.extract_flat_playlist", + return_value=( + {"_type": "channel", "title": "Channel"}, + [{"id": "v1", "title": "One", "webpage_url": "https://example.com/v1"}], + ), + ): + result = await mgr.add_subscription( + "https://example.com/channel", + check_interval_minutes=60, + download_type="video", + codec="auto", + format="any", + quality="best", + folder="", + custom_name_prefix="", + auto_start=True, + playlist_item_limit=0, + split_by_chapters=False, + chapter_template="", + subtitle_language="en", + subtitle_mode="prefer_manual", + title_regex="[", + ) + self.assertEqual(result["status"], "error") + self.assertIn("title_regex", result["msg"].lower()) + self.assertEqual(mgr.list_all(), []) + + async def test_add_subscription_stores_and_exposes_title_regex(self): + with tempfile.TemporaryDirectory() as tmp: + queue = _Queue() + mgr = SubscriptionManager(_Config(tmp), queue, _Notifier()) + with patch( + "subscriptions.extract_flat_playlist", + return_value=( + {"_type": "channel", "title": "Channel"}, + [{"id": "v1", "title": "One", "webpage_url": "https://example.com/v1"}], + ), + ): + result = await mgr.add_subscription( + "https://example.com/channel", + check_interval_minutes=60, + download_type="video", + codec="auto", + format="any", + quality="best", + folder="", + custom_name_prefix="", + auto_start=True, + playlist_item_limit=0, + split_by_chapters=False, + chapter_template="", + subtitle_language="en", + subtitle_mode="prefer_manual", + title_regex="EPISODE", + ) + self.assertEqual(result["status"], "ok") + self.assertEqual(result["subscription"]["title_regex"], "EPISODE") + self.assertEqual(mgr.list_all()[0].title_regex, "EPISODE") + + async def test_check_now_title_regex_queues_only_matches_and_marks_unmatched_seen(self): + with tempfile.TemporaryDirectory() as tmp: + queue = _Queue() + mgr = SubscriptionManager(_Config(tmp), queue, _Notifier()) + with patch( + "subscriptions.extract_flat_playlist", + side_effect=[ + ( + {"_type": "channel", "title": "Channel"}, + [{"id": "v1", "title": "Old", "webpage_url": "https://example.com/v1"}], + ), + ( + {"_type": "channel", "title": "Channel"}, + [ + { + "id": "v2", + "title": "Minecraft | EPISODE 1", + "webpage_url": "https://example.com/v2", + }, + { + "id": "v3", + "title": "Unrelated IRL", + "webpage_url": "https://example.com/v3", + }, + { + "id": "v1", + "title": "Old", + "webpage_url": "https://example.com/v1", + }, + ], + ), + ], + ): + result = await mgr.add_subscription( + "https://example.com/channel", + check_interval_minutes=60, + download_type="video", + codec="auto", + format="any", + quality="best", + folder="", + custom_name_prefix="", + auto_start=True, + playlist_item_limit=0, + split_by_chapters=False, + chapter_template="", + subtitle_language="en", + subtitle_mode="prefer_manual", + title_regex="EPISODE", + ) + await mgr.check_now([result["subscription"]["id"]]) + self.assertEqual([e["webpage_url"] for e, _, _ in queue.entries], ["https://example.com/v2"]) + sub = mgr.list_all()[0] + self.assertEqual(sub.seen_ids[:3], ["v2", "v3", "v1"]) + + async def test_check_now_title_regex_queue_failure_keeps_matched_id_unseen(self): + with tempfile.TemporaryDirectory() as tmp: + queue = _Queue() + mgr = SubscriptionManager(_Config(tmp), queue, _Notifier()) + with patch( + "subscriptions.extract_flat_playlist", + side_effect=[ + ( + {"_type": "channel", "title": "Channel"}, + [{"id": "v1", "title": "Old", "webpage_url": "https://example.com/v1"}], + ), + ( + {"_type": "channel", "title": "Channel"}, + [ + { + "id": "v2", + "title": "Show | EPISODE 1", + "webpage_url": "https://example.com/v2", + }, + { + "id": "v3", + "title": "Other", + "webpage_url": "https://example.com/v3", + }, + ], + ), + ], + ): + result = await mgr.add_subscription( + "https://example.com/channel", + check_interval_minutes=60, + download_type="video", + codec="auto", + format="any", + quality="best", + folder="", + custom_name_prefix="", + auto_start=True, + playlist_item_limit=0, + split_by_chapters=False, + chapter_template="", + subtitle_language="en", + subtitle_mode="prefer_manual", + title_regex="EPISODE", + ) + queue.fail = True + await mgr.check_now([result["subscription"]["id"]]) + sub = mgr.list_all()[0] + self.assertEqual(sub.error, "queue failed") + self.assertEqual(set(sub.seen_ids), {"v1", "v3"}) + self.assertNotIn("v2", sub.seen_ids) + + async def test_update_subscription_rejects_invalid_title_regex(self): + with tempfile.TemporaryDirectory() as tmp: + queue = _Queue() + mgr = SubscriptionManager(_Config(tmp), queue, _Notifier()) + with patch( + "subscriptions.extract_flat_playlist", + return_value=( + {"_type": "channel", "title": "Channel"}, + [{"id": "v1", "title": "One", "webpage_url": "https://example.com/v1"}], + ), + ): + result = await mgr.add_subscription( + "https://example.com/channel", + check_interval_minutes=60, + download_type="video", + codec="auto", + format="any", + quality="best", + folder="", + custom_name_prefix="", + auto_start=True, + playlist_item_limit=0, + split_by_chapters=False, + chapter_template="", + subtitle_language="en", + subtitle_mode="prefer_manual", + ) + sub_id = result["subscription"]["id"] + upd = await mgr.update_subscription(sub_id, {"title_regex": "("}) + self.assertEqual(upd["status"], "error") + self.assertEqual(mgr.list_all()[0].title_regex, "") + + async def test_update_subscription_persists_valid_title_regex(self): + with tempfile.TemporaryDirectory() as tmp: + queue = _Queue() + mgr = SubscriptionManager(_Config(tmp), queue, _Notifier()) + with patch( + "subscriptions.extract_flat_playlist", + return_value=( + {"_type": "channel", "title": "Channel"}, + [{"id": "v1", "title": "One", "webpage_url": "https://example.com/v1"}], + ), + ): + result = await mgr.add_subscription( + "https://example.com/channel", + check_interval_minutes=60, + download_type="video", + codec="auto", + format="any", + quality="best", + folder="", + custom_name_prefix="", + auto_start=True, + playlist_item_limit=0, + split_by_chapters=False, + chapter_template="", + subtitle_language="en", + subtitle_mode="prefer_manual", + ) + sub_id = result["subscription"]["id"] + upd = await mgr.update_subscription(sub_id, {"title_regex": "foo|bar"}) + self.assertEqual(upd["status"], "ok") + self.assertEqual(upd["subscription"]["title_regex"], "foo|bar") + self.assertEqual(mgr.list_all()[0].title_regex, "foo|bar") + + def test_persistence_includes_title_regex(self): + with tempfile.TemporaryDirectory() as tmp: + json_path = os.path.join(tmp, "subscriptions.json") + with open(json_path, "w", encoding="utf-8") as f: + json.dump( + { + "schema_version": 2, + "kind": "subscriptions", + "items": [ + { + "id": "sub-1", + "name": "Channel", + "url": "https://example.com/channel", + "enabled": True, + "check_interval_minutes": 60, + "download_type": "video", + "codec": "auto", + "format": "any", + "quality": "best", + "folder": "", + "custom_name_prefix": "", + "auto_start": True, + "playlist_item_limit": 0, + "split_by_chapters": False, + "chapter_template": "", + "subtitle_language": "en", + "subtitle_mode": "prefer_manual", + "ytdl_options_presets": [], + "ytdl_options_overrides": {}, + "title_regex": "EPISODE", + "last_checked": None, + "seen_ids": [], + "error": None, + } + ], + }, + f, + ) + mgr = SubscriptionManager(_Config(tmp), _Queue(), _Notifier()) + self.assertEqual(mgr.list_all()[0].title_regex, "EPISODE") + + class ExtractFlatPlaylistTests(unittest.TestCase): def test_descends_one_level_when_root_entries_are_nested_collections(self): responses = iter( diff --git a/ui/src/app/app.html b/ui/src/app/app.html index 9efb6a9..cf4986c 100644 --- a/ui/src/app/app.html +++ b/ui/src/app/app.html @@ -475,6 +475,18 @@ ngbTooltip="How often to poll subscriptions for new videos"> +
+
+ Subscription Title Filter + +
+
@@ -887,6 +899,8 @@ Name URL + Sub. title filter Interval (min) Last checked Status @@ -905,6 +919,32 @@ {{ entry[1].name }} {{ entry[1].url }} + + @if (editingTitleRegexId === entry[0]) { +
+ + + +
+ } @else { +
+ {{ entry[1].title_regex || '—' }} + +
+ } + {{ entry[1].check_interval_minutes }} @if (entry[1].last_checked !== null) { diff --git a/ui/src/app/app.spec.ts b/ui/src/app/app.spec.ts index 3b83aeb..0e09bd7 100644 --- a/ui/src/app/app.spec.ts +++ b/ui/src/app/app.spec.ts @@ -63,8 +63,10 @@ class DownloadsServiceStub { class SubscriptionsServiceStub { subscriptions = new Map(); subscriptionsChanged = new Subject(); + subscribeCalls: unknown[] = []; - subscribe() { + subscribe(payload: unknown) { + this.subscribeCalls.push(payload); return of({ status: 'ok' as const }); } @@ -72,6 +74,10 @@ class SubscriptionsServiceStub { return of({}); } + update() { + return of({ status: 'ok' as const }); + } + refreshList() { return of([]); } @@ -175,4 +181,29 @@ describe('App', () => { expect(payload.ytdlOptionsOverrides).toBe(''); }); + + it('includes titleRegex in subscribe payload', () => { + const fixture = TestBed.createComponent(App); + const app = fixture.componentInstance; + const subs = TestBed.inject(SubscriptionsService) as unknown as SubscriptionsServiceStub; + app.addUrl = 'https://example.com/channel'; + app.titleRegex = 'EPISODE'; + app.addSubscription(); + expect(subs.subscribeCalls.length).toBe(1); + const payload = subs.subscribeCalls[0] as { titleRegex: string }; + expect(payload.titleRegex).toBe('EPISODE'); + }); + + it('blocks subscribe with invalid title regex', () => { + const alertSpy = vi.spyOn(window, 'alert').mockImplementation(() => undefined); + const fixture = TestBed.createComponent(App); + const app = fixture.componentInstance; + const subs = TestBed.inject(SubscriptionsService) as unknown as SubscriptionsServiceStub; + app.addUrl = 'https://example.com/channel'; + app.titleRegex = '['; + app.addSubscription(); + expect(subs.subscribeCalls.length).toBe(0); + expect(alertSpy).toHaveBeenCalledWith('Invalid subscription title filter (regex)'); + alertSpy.mockRestore(); + }); }); diff --git a/ui/src/app/app.ts b/ui/src/app/app.ts index cd88283..b7f1f6b 100644 --- a/ui/src/app/app.ts +++ b/ui/src/app/app.ts @@ -90,6 +90,9 @@ export class App implements AfterViewInit, OnInit, OnDestroy { cancelRequested = false; subscribeInProgress = false; checkIntervalMinutes = 60; + titleRegex = ''; + editingTitleRegexId: string | null = null; + titleRegexEditDraft = ''; cachedSubs: [string, SubscriptionRow][] = []; selectedSubscriptionIds = new Set(); checkingSubscriptionIds = new Set(); @@ -560,6 +563,15 @@ export class App implements AfterViewInit, OnInit, OnDestroy { alert('Please enter a URL'); return; } + const tr = (this.titleRegex || '').trim(); + if (tr) { + try { + void RegExp(tr); + } catch { + alert('Invalid subscription title filter (regex)'); + return; + } + } if (payload.splitByChapters && !payload.chapterTemplate.includes('%(section_number)')) { alert('Chapter template must include %(section_number)'); return; @@ -572,6 +584,7 @@ export class App implements AfterViewInit, OnInit, OnDestroy { .subscribe({ ...payload, checkIntervalMinutes: this.checkIntervalMinutes, + titleRegex: tr, }) .pipe( takeUntilDestroyed(this.destroyRef), @@ -587,11 +600,44 @@ export class App implements AfterViewInit, OnInit, OnDestroy { alert(r.msg || 'Subscribe failed'); } else { this.addUrl = ''; + this.titleRegex = ''; } }, }); } + beginEditTitleRegex(id: string, current: string | undefined) { + this.editingTitleRegexId = id; + this.titleRegexEditDraft = current ?? ''; + this.cdr.markForCheck(); + } + + cancelEditTitleRegex() { + this.editingTitleRegexId = null; + this.titleRegexEditDraft = ''; + this.cdr.markForCheck(); + } + + saveTitleRegex(id: string) { + const raw = (this.titleRegexEditDraft || '').trim(); + if (raw) { + try { + void RegExp(raw); + } catch { + alert('Invalid subscription title filter (regex)'); + return; + } + } + this.subscriptionsSvc.update(id, { title_regex: raw }).subscribe((res) => { + const error = this.getStatusError(res); + if (error) { + alert(error || 'Update subscription failed'); + return; + } + this.cancelEditTitleRegex(); + }); + } + deleteSubscription(id: string) { this.subscriptionsSvc.delete([id]).subscribe((res) => { const error = this.getStatusError(res); diff --git a/ui/src/app/interfaces/subscription.ts b/ui/src/app/interfaces/subscription.ts index 85bc794..f5ab68e 100644 --- a/ui/src/app/interfaces/subscription.ts +++ b/ui/src/app/interfaces/subscription.ts @@ -9,6 +9,7 @@ export interface SubscriptionRow { format: string; quality: string; folder: string; + title_regex?: string; last_checked: number | null; seen_count: number; error: string | null; diff --git a/ui/src/app/services/subscriptions.service.ts b/ui/src/app/services/subscriptions.service.ts index 2a95fb2..4db08cf 100644 --- a/ui/src/app/services/subscriptions.service.ts +++ b/ui/src/app/services/subscriptions.service.ts @@ -10,6 +10,7 @@ import { AddDownloadPayload } from './downloads.service'; export interface SubscribePayload extends AddDownloadPayload { checkIntervalMinutes: number; + titleRegex: string; } @Injectable({ @@ -97,6 +98,7 @@ export class SubscriptionsService { ytdl_options_presets: payload.ytdlOptionsPresets, ytdl_options_overrides: payload.ytdlOptionsOverrides, check_interval_minutes: payload.checkIntervalMinutes, + title_regex: payload.titleRegex, }) .pipe(catchError((err) => this.handleHTTPError(err))); } @@ -105,7 +107,10 @@ export class SubscriptionsService { return this.http.post('subscriptions/delete', { ids }).pipe(catchError((err) => this.handleHTTPError(err))); } - update(id: string, changes: Partial>) { + update( + id: string, + changes: Partial>, + ) { return this.http .post('subscriptions/update', { id, ...changes }) .pipe(catchError((err) => this.handleHTTPError(err)));