title filter for subscriptions (closes #968)

This commit is contained in:
Alex Shnitman
2026-04-26 22:51:48 +03:00
parent d89a5ddbe5
commit 91ee8312bf
8 changed files with 470 additions and 8 deletions
+6 -1
View File
@@ -645,6 +645,7 @@ async def subscribe(request):
subtitle_mode=o['subtitle_mode'],
ytdl_options_presets=o['ytdl_options_presets'],
ytdl_options_overrides=o['ytdl_options_overrides'],
title_regex=post.get('title_regex'),
)
return web.Response(text=serializer.encode(result))
@@ -660,7 +661,11 @@ async def subscriptions_update(request):
sub_id = post.get('id')
if not sub_id:
raise web.HTTPBadRequest(reason='missing subscription id')
changes = {k: v for k, v in post.items() if k != 'id' and k in ('enabled', 'check_interval_minutes', 'name')}
changes = {
k: v
for k, v in post.items()
if k != 'id' and k in ('enabled', 'check_interval_minutes', 'name', 'title_regex')
}
if not changes:
raise web.HTTPBadRequest(reason='no valid fields to update')
log.info("Subscription update requested for %s: %s", sub_id, sorted(changes.keys()))
+62 -5
View File
@@ -6,6 +6,7 @@ import asyncio
import copy
import logging
import os
import re
import time
import types
import uuid
@@ -147,6 +148,7 @@ class SubscriptionInfo:
subtitle_mode: str = "prefer_manual"
ytdl_options_presets: list[str] = field(default_factory=list)
ytdl_options_overrides: dict[str, Any] = field(default_factory=dict)
title_regex: str = ""
last_checked: Optional[float] = None
seen_ids: list[str] = field(default_factory=list)
error: Optional[str] = None
@@ -167,6 +169,7 @@ class SubscriptionInfo:
"format": self.format,
"quality": self.quality,
"folder": self.folder,
"title_regex": self.title_regex,
"last_checked": self.last_checked,
"seen_count": len(self.seen_ids),
"error": self.error,
@@ -194,6 +197,7 @@ def _subscription_to_record(sub: SubscriptionInfo) -> dict[str, Any]:
"subtitle_mode": sub.subtitle_mode,
"ytdl_options_presets": list(sub.ytdl_options_presets),
"ytdl_options_overrides": sub.ytdl_options_overrides,
"title_regex": sub.title_regex,
"last_checked": sub.last_checked,
"seen_ids": list(sub.seen_ids),
"error": sub.error,
@@ -231,6 +235,22 @@ def _subscription_from_record(record: Any) -> Optional[SubscriptionInfo]:
return None
def _normalize_title_regex_value(value: Any) -> str:
if value is None:
return ""
if isinstance(value, str):
return value.strip()
return str(value).strip()
def validate_title_regex(value: Any) -> str:
"""Return stored title regex string; non-empty values must compile (re.error on failure)."""
s = _normalize_title_regex_value(value)
if s:
re.compile(s)
return s
def _coerce_bool(value: Any) -> bool:
"""Accept JSON booleans and common string forms used by API clients."""
if isinstance(value, bool):
@@ -448,10 +468,15 @@ class SubscriptionManager:
subtitle_mode: str,
ytdl_options_presets: Optional[list[str]] = None,
ytdl_options_overrides: Optional[dict[str, Any]] = None,
title_regex: Any = None,
) -> dict:
url = self._normalize_url(url)
if not url:
return {"status": "error", "msg": "Missing URL"}
try:
title_regex_stored = validate_title_regex(title_regex)
except re.error as exc:
return {"status": "error", "msg": f"Invalid title_regex: {exc}"}
async with self._lock:
if url in self._url_index or url in self._pending_urls:
@@ -509,6 +534,7 @@ class SubscriptionManager:
subtitle_mode=subtitle_mode,
ytdl_options_presets=list(ytdl_options_presets or []),
ytdl_options_overrides=dict(ytdl_options_overrides or {}),
title_regex=title_regex_stored,
last_checked=time.time(),
seen_ids=list(dict.fromkeys(all_ids)),
error=None,
@@ -555,6 +581,13 @@ class SubscriptionManager:
return {"status": "ok"}
async def update_subscription(self, sub_id: str, changes: dict) -> dict:
validated_tr: Optional[str] = None
if "title_regex" in changes:
try:
validated_tr = validate_title_regex(changes["title_regex"])
except re.error as exc:
return {"status": "error", "msg": f"Invalid title_regex: {exc}"}
async with self._lock:
sub = self._subs.get(sub_id)
if not sub:
@@ -568,6 +601,8 @@ class SubscriptionManager:
sub.check_interval_minutes = max(1, int(changes["check_interval_minutes"]))
if "name" in changes and changes["name"]:
sub.name = str(changes["name"])
if validated_tr is not None:
sub.title_regex = validated_tr
try:
self._save_locked()
@@ -659,9 +694,9 @@ class SubscriptionManager:
dl_submode = cur.subtitle_mode
dl_ytdl_presets = list(cur.ytdl_options_presets)
dl_ytdl_overrides = dict(cur.ytdl_options_overrides)
dl_title_regex = cur.title_regex or ""
new_entries: list[dict] = []
new_ids: list[str] = []
for ent in entries:
eid = _entry_id(ent)
if not eid:
@@ -669,10 +704,31 @@ class SubscriptionManager:
if eid in seen and ent.get("live_status") != "is_live":
continue
new_entries.append(ent)
new_ids.append(eid)
pattern_re: Optional[re.Pattern[str]] = None
if dl_title_regex:
try:
pattern_re = re.compile(dl_title_regex)
except re.error:
log.warning(
"Invalid stored title_regex on subscription %s, ignoring filter",
sub.name,
)
queue_entries: list[dict] = []
filtered_ids: list[str] = []
for ent in new_entries:
eid = _entry_id(ent)
if pattern_re is not None:
title = str(ent.get("title") or "")
if not pattern_re.search(title):
if eid:
filtered_ids.append(eid)
continue
queue_entries.append(ent)
queued_ids, queue_errors = await self._queue_subscription_entries(
new_entries,
queue_entries,
download_type=dl_type,
codec=dl_codec,
format=dl_format,
@@ -689,14 +745,15 @@ class SubscriptionManager:
ytdl_options_overrides=dl_ytdl_overrides,
)
log.info(
"Subscription check finished for %s: %d new, %d queued, %d failed",
"Subscription check finished for %s: %d new, %d filtered, %d queued, %d failed",
sub.name,
len(new_entries),
len(filtered_ids),
len(queued_ids),
len(queue_errors),
)
merged = list(dict.fromkeys(queued_ids + seen_ids_snapshot))
merged = list(dict.fromkeys(queued_ids + filtered_ids + seen_ids_snapshot))
max_seen = int(getattr(self.config, "SUBSCRIPTION_MAX_SEEN_IDS", 50000))
if len(merged) > max_seen:
merged = merged[:max_seen]
+277
View File
@@ -453,6 +453,283 @@ class SubscriptionPersistenceTests(unittest.IsolatedAsyncioTestCase):
with self.assertRaises(ValueError):
await mgr.update_subscription(sub_id, {"enabled": "maybe"})
async def test_add_subscription_rejects_invalid_title_regex(self):
with tempfile.TemporaryDirectory() as tmp:
mgr = SubscriptionManager(_Config(tmp), _Queue(), _Notifier())
with patch(
"subscriptions.extract_flat_playlist",
return_value=(
{"_type": "channel", "title": "Channel"},
[{"id": "v1", "title": "One", "webpage_url": "https://example.com/v1"}],
),
):
result = await mgr.add_subscription(
"https://example.com/channel",
check_interval_minutes=60,
download_type="video",
codec="auto",
format="any",
quality="best",
folder="",
custom_name_prefix="",
auto_start=True,
playlist_item_limit=0,
split_by_chapters=False,
chapter_template="",
subtitle_language="en",
subtitle_mode="prefer_manual",
title_regex="[",
)
self.assertEqual(result["status"], "error")
self.assertIn("title_regex", result["msg"].lower())
self.assertEqual(mgr.list_all(), [])
async def test_add_subscription_stores_and_exposes_title_regex(self):
with tempfile.TemporaryDirectory() as tmp:
queue = _Queue()
mgr = SubscriptionManager(_Config(tmp), queue, _Notifier())
with patch(
"subscriptions.extract_flat_playlist",
return_value=(
{"_type": "channel", "title": "Channel"},
[{"id": "v1", "title": "One", "webpage_url": "https://example.com/v1"}],
),
):
result = await mgr.add_subscription(
"https://example.com/channel",
check_interval_minutes=60,
download_type="video",
codec="auto",
format="any",
quality="best",
folder="",
custom_name_prefix="",
auto_start=True,
playlist_item_limit=0,
split_by_chapters=False,
chapter_template="",
subtitle_language="en",
subtitle_mode="prefer_manual",
title_regex="EPISODE",
)
self.assertEqual(result["status"], "ok")
self.assertEqual(result["subscription"]["title_regex"], "EPISODE")
self.assertEqual(mgr.list_all()[0].title_regex, "EPISODE")
async def test_check_now_title_regex_queues_only_matches_and_marks_unmatched_seen(self):
with tempfile.TemporaryDirectory() as tmp:
queue = _Queue()
mgr = SubscriptionManager(_Config(tmp), queue, _Notifier())
with patch(
"subscriptions.extract_flat_playlist",
side_effect=[
(
{"_type": "channel", "title": "Channel"},
[{"id": "v1", "title": "Old", "webpage_url": "https://example.com/v1"}],
),
(
{"_type": "channel", "title": "Channel"},
[
{
"id": "v2",
"title": "Minecraft | EPISODE 1",
"webpage_url": "https://example.com/v2",
},
{
"id": "v3",
"title": "Unrelated IRL",
"webpage_url": "https://example.com/v3",
},
{
"id": "v1",
"title": "Old",
"webpage_url": "https://example.com/v1",
},
],
),
],
):
result = await mgr.add_subscription(
"https://example.com/channel",
check_interval_minutes=60,
download_type="video",
codec="auto",
format="any",
quality="best",
folder="",
custom_name_prefix="",
auto_start=True,
playlist_item_limit=0,
split_by_chapters=False,
chapter_template="",
subtitle_language="en",
subtitle_mode="prefer_manual",
title_regex="EPISODE",
)
await mgr.check_now([result["subscription"]["id"]])
self.assertEqual([e["webpage_url"] for e, _, _ in queue.entries], ["https://example.com/v2"])
sub = mgr.list_all()[0]
self.assertEqual(sub.seen_ids[:3], ["v2", "v3", "v1"])
async def test_check_now_title_regex_queue_failure_keeps_matched_id_unseen(self):
with tempfile.TemporaryDirectory() as tmp:
queue = _Queue()
mgr = SubscriptionManager(_Config(tmp), queue, _Notifier())
with patch(
"subscriptions.extract_flat_playlist",
side_effect=[
(
{"_type": "channel", "title": "Channel"},
[{"id": "v1", "title": "Old", "webpage_url": "https://example.com/v1"}],
),
(
{"_type": "channel", "title": "Channel"},
[
{
"id": "v2",
"title": "Show | EPISODE 1",
"webpage_url": "https://example.com/v2",
},
{
"id": "v3",
"title": "Other",
"webpage_url": "https://example.com/v3",
},
],
),
],
):
result = await mgr.add_subscription(
"https://example.com/channel",
check_interval_minutes=60,
download_type="video",
codec="auto",
format="any",
quality="best",
folder="",
custom_name_prefix="",
auto_start=True,
playlist_item_limit=0,
split_by_chapters=False,
chapter_template="",
subtitle_language="en",
subtitle_mode="prefer_manual",
title_regex="EPISODE",
)
queue.fail = True
await mgr.check_now([result["subscription"]["id"]])
sub = mgr.list_all()[0]
self.assertEqual(sub.error, "queue failed")
self.assertEqual(set(sub.seen_ids), {"v1", "v3"})
self.assertNotIn("v2", sub.seen_ids)
async def test_update_subscription_rejects_invalid_title_regex(self):
with tempfile.TemporaryDirectory() as tmp:
queue = _Queue()
mgr = SubscriptionManager(_Config(tmp), queue, _Notifier())
with patch(
"subscriptions.extract_flat_playlist",
return_value=(
{"_type": "channel", "title": "Channel"},
[{"id": "v1", "title": "One", "webpage_url": "https://example.com/v1"}],
),
):
result = await mgr.add_subscription(
"https://example.com/channel",
check_interval_minutes=60,
download_type="video",
codec="auto",
format="any",
quality="best",
folder="",
custom_name_prefix="",
auto_start=True,
playlist_item_limit=0,
split_by_chapters=False,
chapter_template="",
subtitle_language="en",
subtitle_mode="prefer_manual",
)
sub_id = result["subscription"]["id"]
upd = await mgr.update_subscription(sub_id, {"title_regex": "("})
self.assertEqual(upd["status"], "error")
self.assertEqual(mgr.list_all()[0].title_regex, "")
async def test_update_subscription_persists_valid_title_regex(self):
with tempfile.TemporaryDirectory() as tmp:
queue = _Queue()
mgr = SubscriptionManager(_Config(tmp), queue, _Notifier())
with patch(
"subscriptions.extract_flat_playlist",
return_value=(
{"_type": "channel", "title": "Channel"},
[{"id": "v1", "title": "One", "webpage_url": "https://example.com/v1"}],
),
):
result = await mgr.add_subscription(
"https://example.com/channel",
check_interval_minutes=60,
download_type="video",
codec="auto",
format="any",
quality="best",
folder="",
custom_name_prefix="",
auto_start=True,
playlist_item_limit=0,
split_by_chapters=False,
chapter_template="",
subtitle_language="en",
subtitle_mode="prefer_manual",
)
sub_id = result["subscription"]["id"]
upd = await mgr.update_subscription(sub_id, {"title_regex": "foo|bar"})
self.assertEqual(upd["status"], "ok")
self.assertEqual(upd["subscription"]["title_regex"], "foo|bar")
self.assertEqual(mgr.list_all()[0].title_regex, "foo|bar")
def test_persistence_includes_title_regex(self):
with tempfile.TemporaryDirectory() as tmp:
json_path = os.path.join(tmp, "subscriptions.json")
with open(json_path, "w", encoding="utf-8") as f:
json.dump(
{
"schema_version": 2,
"kind": "subscriptions",
"items": [
{
"id": "sub-1",
"name": "Channel",
"url": "https://example.com/channel",
"enabled": True,
"check_interval_minutes": 60,
"download_type": "video",
"codec": "auto",
"format": "any",
"quality": "best",
"folder": "",
"custom_name_prefix": "",
"auto_start": True,
"playlist_item_limit": 0,
"split_by_chapters": False,
"chapter_template": "",
"subtitle_language": "en",
"subtitle_mode": "prefer_manual",
"ytdl_options_presets": [],
"ytdl_options_overrides": {},
"title_regex": "EPISODE",
"last_checked": None,
"seen_ids": [],
"error": None,
}
],
},
f,
)
mgr = SubscriptionManager(_Config(tmp), _Queue(), _Notifier())
self.assertEqual(mgr.list_all()[0].title_regex, "EPISODE")
class ExtractFlatPlaylistTests(unittest.TestCase):
def test_descends_one_level_when_root_entries_are_nested_collections(self):
responses = iter(
+40
View File
@@ -475,6 +475,18 @@
ngbTooltip="How often to poll subscriptions for new videos">
</div>
</div>
<div class="col-md-6">
<div class="input-group">
<span class="input-group-text">Subscription Title Filter</span>
<input type="text"
class="form-control"
name="titleRegex"
[(ngModel)]="titleRegex"
[disabled]="addInProgress || subscribeInProgress || downloads.loading"
placeholder="Optional regex"
ngbTooltip="In subscriptions, only titles matching this Python-style regex are queued. Empty = all. Case-sensitive; use (?i) in the pattern for case-insensitive.">
</div>
</div>
</div>
<!-- yt-dlp -->
@@ -887,6 +899,8 @@
</th>
<th scope="col">Name</th>
<th scope="col">URL</th>
<th scope="col" class="text-nowrap"
ngbTooltip="Subscriptions only — which new video titles to queue when this feed is checked. Does not affect manual downloads.">Sub. title filter</th>
<th scope="col" class="text-nowrap">Interval (min)</th>
<th scope="col" class="text-nowrap">Last checked</th>
<th scope="col">Status</th>
@@ -905,6 +919,32 @@
</td>
<td>{{ entry[1].name }}</td>
<td class="text-break"><a [href]="entry[1].url" target="_blank" rel="noopener">{{ entry[1].url }}</a></td>
<td>
@if (editingTitleRegexId === entry[0]) {
<div class="d-flex flex-wrap gap-1 align-items-center">
<input type="text"
class="form-control form-control-sm flex-grow-1"
[name]="'subTitleRegex' + entry[0]"
[(ngModel)]="titleRegexEditDraft"
[disabled]="downloads.loading" />
<button type="button" class="btn btn-sm btn-outline-secondary"
(click)="saveTitleRegex(entry[0])"
[disabled]="downloads.loading">Save</button>
<button type="button" class="btn btn-sm btn-outline-secondary"
(click)="cancelEditTitleRegex()"
[disabled]="downloads.loading">Cancel</button>
</div>
} @else {
<div class="d-flex flex-wrap gap-1 align-items-center">
<span class="text-muted small text-break"
[class.text-secondary]="!entry[1].title_regex">{{ entry[1].title_regex || '—' }}</span>
<button type="button" class="btn btn-link btn-sm p-0"
(click)="beginEditTitleRegex(entry[0], entry[1].title_regex)"
[disabled]="downloads.loading"
ngbTooltip="Edit subscription title filter (subscriptions only; not for one-off downloads)">Edit</button>
</div>
}
</td>
<td>{{ entry[1].check_interval_minutes }}</td>
<td class="text-nowrap">
@if (entry[1].last_checked !== null) {
+32 -1
View File
@@ -63,8 +63,10 @@ class DownloadsServiceStub {
class SubscriptionsServiceStub {
subscriptions = new Map();
subscriptionsChanged = new Subject<void>();
subscribeCalls: unknown[] = [];
subscribe() {
subscribe(payload: unknown) {
this.subscribeCalls.push(payload);
return of({ status: 'ok' as const });
}
@@ -72,6 +74,10 @@ class SubscriptionsServiceStub {
return of({});
}
update() {
return of({ status: 'ok' as const });
}
refreshList() {
return of([]);
}
@@ -175,4 +181,29 @@ describe('App', () => {
expect(payload.ytdlOptionsOverrides).toBe('');
});
it('includes titleRegex in subscribe payload', () => {
const fixture = TestBed.createComponent(App);
const app = fixture.componentInstance;
const subs = TestBed.inject(SubscriptionsService) as unknown as SubscriptionsServiceStub;
app.addUrl = 'https://example.com/channel';
app.titleRegex = 'EPISODE';
app.addSubscription();
expect(subs.subscribeCalls.length).toBe(1);
const payload = subs.subscribeCalls[0] as { titleRegex: string };
expect(payload.titleRegex).toBe('EPISODE');
});
it('blocks subscribe with invalid title regex', () => {
const alertSpy = vi.spyOn(window, 'alert').mockImplementation(() => undefined);
const fixture = TestBed.createComponent(App);
const app = fixture.componentInstance;
const subs = TestBed.inject(SubscriptionsService) as unknown as SubscriptionsServiceStub;
app.addUrl = 'https://example.com/channel';
app.titleRegex = '[';
app.addSubscription();
expect(subs.subscribeCalls.length).toBe(0);
expect(alertSpy).toHaveBeenCalledWith('Invalid subscription title filter (regex)');
alertSpy.mockRestore();
});
});
+46
View File
@@ -90,6 +90,9 @@ export class App implements AfterViewInit, OnInit, OnDestroy {
cancelRequested = false;
subscribeInProgress = false;
checkIntervalMinutes = 60;
titleRegex = '';
editingTitleRegexId: string | null = null;
titleRegexEditDraft = '';
cachedSubs: [string, SubscriptionRow][] = [];
selectedSubscriptionIds = new Set<string>();
checkingSubscriptionIds = new Set<string>();
@@ -560,6 +563,15 @@ export class App implements AfterViewInit, OnInit, OnDestroy {
alert('Please enter a URL');
return;
}
const tr = (this.titleRegex || '').trim();
if (tr) {
try {
void RegExp(tr);
} catch {
alert('Invalid subscription title filter (regex)');
return;
}
}
if (payload.splitByChapters && !payload.chapterTemplate.includes('%(section_number)')) {
alert('Chapter template must include %(section_number)');
return;
@@ -572,6 +584,7 @@ export class App implements AfterViewInit, OnInit, OnDestroy {
.subscribe({
...payload,
checkIntervalMinutes: this.checkIntervalMinutes,
titleRegex: tr,
})
.pipe(
takeUntilDestroyed(this.destroyRef),
@@ -587,11 +600,44 @@ export class App implements AfterViewInit, OnInit, OnDestroy {
alert(r.msg || 'Subscribe failed');
} else {
this.addUrl = '';
this.titleRegex = '';
}
},
});
}
beginEditTitleRegex(id: string, current: string | undefined) {
this.editingTitleRegexId = id;
this.titleRegexEditDraft = current ?? '';
this.cdr.markForCheck();
}
cancelEditTitleRegex() {
this.editingTitleRegexId = null;
this.titleRegexEditDraft = '';
this.cdr.markForCheck();
}
saveTitleRegex(id: string) {
const raw = (this.titleRegexEditDraft || '').trim();
if (raw) {
try {
void RegExp(raw);
} catch {
alert('Invalid subscription title filter (regex)');
return;
}
}
this.subscriptionsSvc.update(id, { title_regex: raw }).subscribe((res) => {
const error = this.getStatusError(res);
if (error) {
alert(error || 'Update subscription failed');
return;
}
this.cancelEditTitleRegex();
});
}
deleteSubscription(id: string) {
this.subscriptionsSvc.delete([id]).subscribe((res) => {
const error = this.getStatusError(res);
+1
View File
@@ -9,6 +9,7 @@ export interface SubscriptionRow {
format: string;
quality: string;
folder: string;
title_regex?: string;
last_checked: number | null;
seen_count: number;
error: string | null;
+6 -1
View File
@@ -10,6 +10,7 @@ import { AddDownloadPayload } from './downloads.service';
export interface SubscribePayload extends AddDownloadPayload {
checkIntervalMinutes: number;
titleRegex: string;
}
@Injectable({
@@ -97,6 +98,7 @@ export class SubscriptionsService {
ytdl_options_presets: payload.ytdlOptionsPresets,
ytdl_options_overrides: payload.ytdlOptionsOverrides,
check_interval_minutes: payload.checkIntervalMinutes,
title_regex: payload.titleRegex,
})
.pipe(catchError((err) => this.handleHTTPError(err)));
}
@@ -105,7 +107,10 @@ export class SubscriptionsService {
return this.http.post('subscriptions/delete', { ids }).pipe(catchError((err) => this.handleHTTPError(err)));
}
update(id: string, changes: Partial<Pick<SubscriptionRow, 'enabled' | 'check_interval_minutes' | 'name'>>) {
update(
id: string,
changes: Partial<Pick<SubscriptionRow, 'enabled' | 'check_interval_minutes' | 'name' | 'title_regex'>>,
) {
return this.http
.post('subscriptions/update', { id, ...changes })
.pipe(catchError((err) => this.handleHTTPError(err)));