mirror of
https://github.com/alexta69/metube.git
synced 2026-06-13 16:40:05 +00:00
title filter for subscriptions (closes #968)
This commit is contained in:
+6
-1
@@ -645,6 +645,7 @@ async def subscribe(request):
|
||||
subtitle_mode=o['subtitle_mode'],
|
||||
ytdl_options_presets=o['ytdl_options_presets'],
|
||||
ytdl_options_overrides=o['ytdl_options_overrides'],
|
||||
title_regex=post.get('title_regex'),
|
||||
)
|
||||
return web.Response(text=serializer.encode(result))
|
||||
|
||||
@@ -660,7 +661,11 @@ async def subscriptions_update(request):
|
||||
sub_id = post.get('id')
|
||||
if not sub_id:
|
||||
raise web.HTTPBadRequest(reason='missing subscription id')
|
||||
changes = {k: v for k, v in post.items() if k != 'id' and k in ('enabled', 'check_interval_minutes', 'name')}
|
||||
changes = {
|
||||
k: v
|
||||
for k, v in post.items()
|
||||
if k != 'id' and k in ('enabled', 'check_interval_minutes', 'name', 'title_regex')
|
||||
}
|
||||
if not changes:
|
||||
raise web.HTTPBadRequest(reason='no valid fields to update')
|
||||
log.info("Subscription update requested for %s: %s", sub_id, sorted(changes.keys()))
|
||||
|
||||
+62
-5
@@ -6,6 +6,7 @@ import asyncio
|
||||
import copy
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import types
|
||||
import uuid
|
||||
@@ -147,6 +148,7 @@ class SubscriptionInfo:
|
||||
subtitle_mode: str = "prefer_manual"
|
||||
ytdl_options_presets: list[str] = field(default_factory=list)
|
||||
ytdl_options_overrides: dict[str, Any] = field(default_factory=dict)
|
||||
title_regex: str = ""
|
||||
last_checked: Optional[float] = None
|
||||
seen_ids: list[str] = field(default_factory=list)
|
||||
error: Optional[str] = None
|
||||
@@ -167,6 +169,7 @@ class SubscriptionInfo:
|
||||
"format": self.format,
|
||||
"quality": self.quality,
|
||||
"folder": self.folder,
|
||||
"title_regex": self.title_regex,
|
||||
"last_checked": self.last_checked,
|
||||
"seen_count": len(self.seen_ids),
|
||||
"error": self.error,
|
||||
@@ -194,6 +197,7 @@ def _subscription_to_record(sub: SubscriptionInfo) -> dict[str, Any]:
|
||||
"subtitle_mode": sub.subtitle_mode,
|
||||
"ytdl_options_presets": list(sub.ytdl_options_presets),
|
||||
"ytdl_options_overrides": sub.ytdl_options_overrides,
|
||||
"title_regex": sub.title_regex,
|
||||
"last_checked": sub.last_checked,
|
||||
"seen_ids": list(sub.seen_ids),
|
||||
"error": sub.error,
|
||||
@@ -231,6 +235,22 @@ def _subscription_from_record(record: Any) -> Optional[SubscriptionInfo]:
|
||||
return None
|
||||
|
||||
|
||||
def _normalize_title_regex_value(value: Any) -> str:
|
||||
if value is None:
|
||||
return ""
|
||||
if isinstance(value, str):
|
||||
return value.strip()
|
||||
return str(value).strip()
|
||||
|
||||
|
||||
def validate_title_regex(value: Any) -> str:
|
||||
"""Return stored title regex string; non-empty values must compile (re.error on failure)."""
|
||||
s = _normalize_title_regex_value(value)
|
||||
if s:
|
||||
re.compile(s)
|
||||
return s
|
||||
|
||||
|
||||
def _coerce_bool(value: Any) -> bool:
|
||||
"""Accept JSON booleans and common string forms used by API clients."""
|
||||
if isinstance(value, bool):
|
||||
@@ -448,10 +468,15 @@ class SubscriptionManager:
|
||||
subtitle_mode: str,
|
||||
ytdl_options_presets: Optional[list[str]] = None,
|
||||
ytdl_options_overrides: Optional[dict[str, Any]] = None,
|
||||
title_regex: Any = None,
|
||||
) -> dict:
|
||||
url = self._normalize_url(url)
|
||||
if not url:
|
||||
return {"status": "error", "msg": "Missing URL"}
|
||||
try:
|
||||
title_regex_stored = validate_title_regex(title_regex)
|
||||
except re.error as exc:
|
||||
return {"status": "error", "msg": f"Invalid title_regex: {exc}"}
|
||||
|
||||
async with self._lock:
|
||||
if url in self._url_index or url in self._pending_urls:
|
||||
@@ -509,6 +534,7 @@ class SubscriptionManager:
|
||||
subtitle_mode=subtitle_mode,
|
||||
ytdl_options_presets=list(ytdl_options_presets or []),
|
||||
ytdl_options_overrides=dict(ytdl_options_overrides or {}),
|
||||
title_regex=title_regex_stored,
|
||||
last_checked=time.time(),
|
||||
seen_ids=list(dict.fromkeys(all_ids)),
|
||||
error=None,
|
||||
@@ -555,6 +581,13 @@ class SubscriptionManager:
|
||||
return {"status": "ok"}
|
||||
|
||||
async def update_subscription(self, sub_id: str, changes: dict) -> dict:
|
||||
validated_tr: Optional[str] = None
|
||||
if "title_regex" in changes:
|
||||
try:
|
||||
validated_tr = validate_title_regex(changes["title_regex"])
|
||||
except re.error as exc:
|
||||
return {"status": "error", "msg": f"Invalid title_regex: {exc}"}
|
||||
|
||||
async with self._lock:
|
||||
sub = self._subs.get(sub_id)
|
||||
if not sub:
|
||||
@@ -568,6 +601,8 @@ class SubscriptionManager:
|
||||
sub.check_interval_minutes = max(1, int(changes["check_interval_minutes"]))
|
||||
if "name" in changes and changes["name"]:
|
||||
sub.name = str(changes["name"])
|
||||
if validated_tr is not None:
|
||||
sub.title_regex = validated_tr
|
||||
|
||||
try:
|
||||
self._save_locked()
|
||||
@@ -659,9 +694,9 @@ class SubscriptionManager:
|
||||
dl_submode = cur.subtitle_mode
|
||||
dl_ytdl_presets = list(cur.ytdl_options_presets)
|
||||
dl_ytdl_overrides = dict(cur.ytdl_options_overrides)
|
||||
dl_title_regex = cur.title_regex or ""
|
||||
|
||||
new_entries: list[dict] = []
|
||||
new_ids: list[str] = []
|
||||
for ent in entries:
|
||||
eid = _entry_id(ent)
|
||||
if not eid:
|
||||
@@ -669,10 +704,31 @@ class SubscriptionManager:
|
||||
if eid in seen and ent.get("live_status") != "is_live":
|
||||
continue
|
||||
new_entries.append(ent)
|
||||
new_ids.append(eid)
|
||||
|
||||
pattern_re: Optional[re.Pattern[str]] = None
|
||||
if dl_title_regex:
|
||||
try:
|
||||
pattern_re = re.compile(dl_title_regex)
|
||||
except re.error:
|
||||
log.warning(
|
||||
"Invalid stored title_regex on subscription %s, ignoring filter",
|
||||
sub.name,
|
||||
)
|
||||
|
||||
queue_entries: list[dict] = []
|
||||
filtered_ids: list[str] = []
|
||||
for ent in new_entries:
|
||||
eid = _entry_id(ent)
|
||||
if pattern_re is not None:
|
||||
title = str(ent.get("title") or "")
|
||||
if not pattern_re.search(title):
|
||||
if eid:
|
||||
filtered_ids.append(eid)
|
||||
continue
|
||||
queue_entries.append(ent)
|
||||
|
||||
queued_ids, queue_errors = await self._queue_subscription_entries(
|
||||
new_entries,
|
||||
queue_entries,
|
||||
download_type=dl_type,
|
||||
codec=dl_codec,
|
||||
format=dl_format,
|
||||
@@ -689,14 +745,15 @@ class SubscriptionManager:
|
||||
ytdl_options_overrides=dl_ytdl_overrides,
|
||||
)
|
||||
log.info(
|
||||
"Subscription check finished for %s: %d new, %d queued, %d failed",
|
||||
"Subscription check finished for %s: %d new, %d filtered, %d queued, %d failed",
|
||||
sub.name,
|
||||
len(new_entries),
|
||||
len(filtered_ids),
|
||||
len(queued_ids),
|
||||
len(queue_errors),
|
||||
)
|
||||
|
||||
merged = list(dict.fromkeys(queued_ids + seen_ids_snapshot))
|
||||
merged = list(dict.fromkeys(queued_ids + filtered_ids + seen_ids_snapshot))
|
||||
max_seen = int(getattr(self.config, "SUBSCRIPTION_MAX_SEEN_IDS", 50000))
|
||||
if len(merged) > max_seen:
|
||||
merged = merged[:max_seen]
|
||||
|
||||
@@ -453,6 +453,283 @@ class SubscriptionPersistenceTests(unittest.IsolatedAsyncioTestCase):
|
||||
with self.assertRaises(ValueError):
|
||||
await mgr.update_subscription(sub_id, {"enabled": "maybe"})
|
||||
|
||||
async def test_add_subscription_rejects_invalid_title_regex(self):
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
mgr = SubscriptionManager(_Config(tmp), _Queue(), _Notifier())
|
||||
with patch(
|
||||
"subscriptions.extract_flat_playlist",
|
||||
return_value=(
|
||||
{"_type": "channel", "title": "Channel"},
|
||||
[{"id": "v1", "title": "One", "webpage_url": "https://example.com/v1"}],
|
||||
),
|
||||
):
|
||||
result = await mgr.add_subscription(
|
||||
"https://example.com/channel",
|
||||
check_interval_minutes=60,
|
||||
download_type="video",
|
||||
codec="auto",
|
||||
format="any",
|
||||
quality="best",
|
||||
folder="",
|
||||
custom_name_prefix="",
|
||||
auto_start=True,
|
||||
playlist_item_limit=0,
|
||||
split_by_chapters=False,
|
||||
chapter_template="",
|
||||
subtitle_language="en",
|
||||
subtitle_mode="prefer_manual",
|
||||
title_regex="[",
|
||||
)
|
||||
self.assertEqual(result["status"], "error")
|
||||
self.assertIn("title_regex", result["msg"].lower())
|
||||
self.assertEqual(mgr.list_all(), [])
|
||||
|
||||
async def test_add_subscription_stores_and_exposes_title_regex(self):
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
queue = _Queue()
|
||||
mgr = SubscriptionManager(_Config(tmp), queue, _Notifier())
|
||||
with patch(
|
||||
"subscriptions.extract_flat_playlist",
|
||||
return_value=(
|
||||
{"_type": "channel", "title": "Channel"},
|
||||
[{"id": "v1", "title": "One", "webpage_url": "https://example.com/v1"}],
|
||||
),
|
||||
):
|
||||
result = await mgr.add_subscription(
|
||||
"https://example.com/channel",
|
||||
check_interval_minutes=60,
|
||||
download_type="video",
|
||||
codec="auto",
|
||||
format="any",
|
||||
quality="best",
|
||||
folder="",
|
||||
custom_name_prefix="",
|
||||
auto_start=True,
|
||||
playlist_item_limit=0,
|
||||
split_by_chapters=False,
|
||||
chapter_template="",
|
||||
subtitle_language="en",
|
||||
subtitle_mode="prefer_manual",
|
||||
title_regex="EPISODE",
|
||||
)
|
||||
self.assertEqual(result["status"], "ok")
|
||||
self.assertEqual(result["subscription"]["title_regex"], "EPISODE")
|
||||
self.assertEqual(mgr.list_all()[0].title_regex, "EPISODE")
|
||||
|
||||
async def test_check_now_title_regex_queues_only_matches_and_marks_unmatched_seen(self):
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
queue = _Queue()
|
||||
mgr = SubscriptionManager(_Config(tmp), queue, _Notifier())
|
||||
with patch(
|
||||
"subscriptions.extract_flat_playlist",
|
||||
side_effect=[
|
||||
(
|
||||
{"_type": "channel", "title": "Channel"},
|
||||
[{"id": "v1", "title": "Old", "webpage_url": "https://example.com/v1"}],
|
||||
),
|
||||
(
|
||||
{"_type": "channel", "title": "Channel"},
|
||||
[
|
||||
{
|
||||
"id": "v2",
|
||||
"title": "Minecraft | EPISODE 1",
|
||||
"webpage_url": "https://example.com/v2",
|
||||
},
|
||||
{
|
||||
"id": "v3",
|
||||
"title": "Unrelated IRL",
|
||||
"webpage_url": "https://example.com/v3",
|
||||
},
|
||||
{
|
||||
"id": "v1",
|
||||
"title": "Old",
|
||||
"webpage_url": "https://example.com/v1",
|
||||
},
|
||||
],
|
||||
),
|
||||
],
|
||||
):
|
||||
result = await mgr.add_subscription(
|
||||
"https://example.com/channel",
|
||||
check_interval_minutes=60,
|
||||
download_type="video",
|
||||
codec="auto",
|
||||
format="any",
|
||||
quality="best",
|
||||
folder="",
|
||||
custom_name_prefix="",
|
||||
auto_start=True,
|
||||
playlist_item_limit=0,
|
||||
split_by_chapters=False,
|
||||
chapter_template="",
|
||||
subtitle_language="en",
|
||||
subtitle_mode="prefer_manual",
|
||||
title_regex="EPISODE",
|
||||
)
|
||||
await mgr.check_now([result["subscription"]["id"]])
|
||||
self.assertEqual([e["webpage_url"] for e, _, _ in queue.entries], ["https://example.com/v2"])
|
||||
sub = mgr.list_all()[0]
|
||||
self.assertEqual(sub.seen_ids[:3], ["v2", "v3", "v1"])
|
||||
|
||||
async def test_check_now_title_regex_queue_failure_keeps_matched_id_unseen(self):
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
queue = _Queue()
|
||||
mgr = SubscriptionManager(_Config(tmp), queue, _Notifier())
|
||||
with patch(
|
||||
"subscriptions.extract_flat_playlist",
|
||||
side_effect=[
|
||||
(
|
||||
{"_type": "channel", "title": "Channel"},
|
||||
[{"id": "v1", "title": "Old", "webpage_url": "https://example.com/v1"}],
|
||||
),
|
||||
(
|
||||
{"_type": "channel", "title": "Channel"},
|
||||
[
|
||||
{
|
||||
"id": "v2",
|
||||
"title": "Show | EPISODE 1",
|
||||
"webpage_url": "https://example.com/v2",
|
||||
},
|
||||
{
|
||||
"id": "v3",
|
||||
"title": "Other",
|
||||
"webpage_url": "https://example.com/v3",
|
||||
},
|
||||
],
|
||||
),
|
||||
],
|
||||
):
|
||||
result = await mgr.add_subscription(
|
||||
"https://example.com/channel",
|
||||
check_interval_minutes=60,
|
||||
download_type="video",
|
||||
codec="auto",
|
||||
format="any",
|
||||
quality="best",
|
||||
folder="",
|
||||
custom_name_prefix="",
|
||||
auto_start=True,
|
||||
playlist_item_limit=0,
|
||||
split_by_chapters=False,
|
||||
chapter_template="",
|
||||
subtitle_language="en",
|
||||
subtitle_mode="prefer_manual",
|
||||
title_regex="EPISODE",
|
||||
)
|
||||
queue.fail = True
|
||||
await mgr.check_now([result["subscription"]["id"]])
|
||||
sub = mgr.list_all()[0]
|
||||
self.assertEqual(sub.error, "queue failed")
|
||||
self.assertEqual(set(sub.seen_ids), {"v1", "v3"})
|
||||
self.assertNotIn("v2", sub.seen_ids)
|
||||
|
||||
async def test_update_subscription_rejects_invalid_title_regex(self):
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
queue = _Queue()
|
||||
mgr = SubscriptionManager(_Config(tmp), queue, _Notifier())
|
||||
with patch(
|
||||
"subscriptions.extract_flat_playlist",
|
||||
return_value=(
|
||||
{"_type": "channel", "title": "Channel"},
|
||||
[{"id": "v1", "title": "One", "webpage_url": "https://example.com/v1"}],
|
||||
),
|
||||
):
|
||||
result = await mgr.add_subscription(
|
||||
"https://example.com/channel",
|
||||
check_interval_minutes=60,
|
||||
download_type="video",
|
||||
codec="auto",
|
||||
format="any",
|
||||
quality="best",
|
||||
folder="",
|
||||
custom_name_prefix="",
|
||||
auto_start=True,
|
||||
playlist_item_limit=0,
|
||||
split_by_chapters=False,
|
||||
chapter_template="",
|
||||
subtitle_language="en",
|
||||
subtitle_mode="prefer_manual",
|
||||
)
|
||||
sub_id = result["subscription"]["id"]
|
||||
upd = await mgr.update_subscription(sub_id, {"title_regex": "("})
|
||||
self.assertEqual(upd["status"], "error")
|
||||
self.assertEqual(mgr.list_all()[0].title_regex, "")
|
||||
|
||||
async def test_update_subscription_persists_valid_title_regex(self):
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
queue = _Queue()
|
||||
mgr = SubscriptionManager(_Config(tmp), queue, _Notifier())
|
||||
with patch(
|
||||
"subscriptions.extract_flat_playlist",
|
||||
return_value=(
|
||||
{"_type": "channel", "title": "Channel"},
|
||||
[{"id": "v1", "title": "One", "webpage_url": "https://example.com/v1"}],
|
||||
),
|
||||
):
|
||||
result = await mgr.add_subscription(
|
||||
"https://example.com/channel",
|
||||
check_interval_minutes=60,
|
||||
download_type="video",
|
||||
codec="auto",
|
||||
format="any",
|
||||
quality="best",
|
||||
folder="",
|
||||
custom_name_prefix="",
|
||||
auto_start=True,
|
||||
playlist_item_limit=0,
|
||||
split_by_chapters=False,
|
||||
chapter_template="",
|
||||
subtitle_language="en",
|
||||
subtitle_mode="prefer_manual",
|
||||
)
|
||||
sub_id = result["subscription"]["id"]
|
||||
upd = await mgr.update_subscription(sub_id, {"title_regex": "foo|bar"})
|
||||
self.assertEqual(upd["status"], "ok")
|
||||
self.assertEqual(upd["subscription"]["title_regex"], "foo|bar")
|
||||
self.assertEqual(mgr.list_all()[0].title_regex, "foo|bar")
|
||||
|
||||
def test_persistence_includes_title_regex(self):
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
json_path = os.path.join(tmp, "subscriptions.json")
|
||||
with open(json_path, "w", encoding="utf-8") as f:
|
||||
json.dump(
|
||||
{
|
||||
"schema_version": 2,
|
||||
"kind": "subscriptions",
|
||||
"items": [
|
||||
{
|
||||
"id": "sub-1",
|
||||
"name": "Channel",
|
||||
"url": "https://example.com/channel",
|
||||
"enabled": True,
|
||||
"check_interval_minutes": 60,
|
||||
"download_type": "video",
|
||||
"codec": "auto",
|
||||
"format": "any",
|
||||
"quality": "best",
|
||||
"folder": "",
|
||||
"custom_name_prefix": "",
|
||||
"auto_start": True,
|
||||
"playlist_item_limit": 0,
|
||||
"split_by_chapters": False,
|
||||
"chapter_template": "",
|
||||
"subtitle_language": "en",
|
||||
"subtitle_mode": "prefer_manual",
|
||||
"ytdl_options_presets": [],
|
||||
"ytdl_options_overrides": {},
|
||||
"title_regex": "EPISODE",
|
||||
"last_checked": None,
|
||||
"seen_ids": [],
|
||||
"error": None,
|
||||
}
|
||||
],
|
||||
},
|
||||
f,
|
||||
)
|
||||
mgr = SubscriptionManager(_Config(tmp), _Queue(), _Notifier())
|
||||
self.assertEqual(mgr.list_all()[0].title_regex, "EPISODE")
|
||||
|
||||
|
||||
class ExtractFlatPlaylistTests(unittest.TestCase):
|
||||
def test_descends_one_level_when_root_entries_are_nested_collections(self):
|
||||
responses = iter(
|
||||
|
||||
Reference in New Issue
Block a user