From a4d42573ef63bdb9b02d202696549063014e3bfd Mon Sep 17 00:00:00 2001 From: simon Date: Thu, 13 Apr 2023 17:29:17 +0700 Subject: [PATCH 01/23] move DurationConverter to separate module --- tubearchivist/home/src/download/queue.py | 3 +- tubearchivist/home/src/index/video.py | 3 +- tubearchivist/home/src/index/video_streams.py | 54 +++++++++++++++++++ tubearchivist/home/src/ta/helper.py | 52 ------------------ 4 files changed, 58 insertions(+), 54 deletions(-) create mode 100644 tubearchivist/home/src/index/video_streams.py diff --git a/tubearchivist/home/src/download/queue.py b/tubearchivist/home/src/download/queue.py index 5a453ec..3309f51 100644 --- a/tubearchivist/home/src/download/queue.py +++ b/tubearchivist/home/src/download/queue.py @@ -16,8 +16,9 @@ from home.src.download.yt_dlp_base import YtWrap from home.src.es.connect import ElasticWrap, IndexPaginate from home.src.index.playlist import YoutubePlaylist from home.src.index.video_constants import VideoTypeEnum +from home.src.index.video_streams import DurationConverter from home.src.ta.config import AppConfig -from home.src.ta.helper import DurationConverter, is_shorts +from home.src.ta.helper import is_shorts from home.src.ta.ta_redis import RedisQueue diff --git a/tubearchivist/home/src/index/video.py b/tubearchivist/home/src/index/video.py index 406ad66..f9afffa 100644 --- a/tubearchivist/home/src/index/video.py +++ b/tubearchivist/home/src/index/video.py @@ -16,7 +16,8 @@ from home.src.index import playlist as ta_playlist from home.src.index.generic import YouTubeItem from home.src.index.subtitle import YoutubeSubtitle from home.src.index.video_constants import VideoTypeEnum -from home.src.ta.helper import DurationConverter, clean_string, randomizor +from home.src.index.video_streams import DurationConverter +from home.src.ta.helper import clean_string, randomizor from home.src.ta.ta_redis import RedisArchivist from ryd_client import ryd_client diff --git a/tubearchivist/home/src/index/video_streams.py b/tubearchivist/home/src/index/video_streams.py new file mode 100644 index 0000000..01873a8 --- /dev/null +++ b/tubearchivist/home/src/index/video_streams.py @@ -0,0 +1,54 @@ +"""extract metadata from video streams""" + +import subprocess + + +class DurationConverter: + """ + using ffmpeg to get and parse duration from filepath + """ + + @staticmethod + def get_sec(file_path): + """read duration from file""" + duration = subprocess.run( + [ + "ffprobe", + "-v", + "error", + "-show_entries", + "format=duration", + "-of", + "default=noprint_wrappers=1:nokey=1", + file_path, + ], + capture_output=True, + check=True, + ) + duration_raw = duration.stdout.decode().strip() + if duration_raw == "N/A": + return 0 + + duration_sec = int(float(duration_raw)) + return duration_sec + + @staticmethod + def get_str(duration_sec): + """takes duration in sec and returns clean string""" + if not duration_sec: + # failed to extract + return "NA" + + hours = duration_sec // 3600 + minutes = (duration_sec - (hours * 3600)) // 60 + secs = duration_sec - (hours * 3600) - (minutes * 60) + + duration_str = str() + if hours: + duration_str = str(hours).zfill(2) + ":" + if minutes: + duration_str = duration_str + str(minutes).zfill(2) + ":" + else: + duration_str = duration_str + "00:" + duration_str = duration_str + str(secs).zfill(2) + return duration_str diff --git a/tubearchivist/home/src/ta/helper.py b/tubearchivist/home/src/ta/helper.py index 1fdf15a..c1a6024 100644 --- a/tubearchivist/home/src/ta/helper.py +++ b/tubearchivist/home/src/ta/helper.py @@ -8,7 +8,6 @@ import os import random import re import string -import subprocess import unicodedata from datetime import datetime from urllib.parse import urlparse @@ -163,54 +162,3 @@ def ta_host_parser(ta_host): csrf_trusted_origins.append(f"{parsed.scheme}://{parsed.hostname}") return allowed_hosts, csrf_trusted_origins - - -class DurationConverter: - """ - using ffmpeg to get and parse duration from filepath - """ - - @staticmethod - def get_sec(file_path): - """read duration from file""" - duration = subprocess.run( - [ - "ffprobe", - "-v", - "error", - "-show_entries", - "format=duration", - "-of", - "default=noprint_wrappers=1:nokey=1", - file_path, - ], - capture_output=True, - check=True, - ) - duration_raw = duration.stdout.decode().strip() - if duration_raw == "N/A": - return 0 - - duration_sec = int(float(duration_raw)) - return duration_sec - - @staticmethod - def get_str(duration_sec): - """takes duration in sec and returns clean string""" - if not duration_sec: - # failed to extract - return "NA" - - hours = duration_sec // 3600 - minutes = (duration_sec - (hours * 3600)) // 60 - secs = duration_sec - (hours * 3600) - (minutes * 60) - - duration_str = str() - if hours: - duration_str = str(hours).zfill(2) + ":" - if minutes: - duration_str = duration_str + str(minutes).zfill(2) + ":" - else: - duration_str = duration_str + "00:" - duration_str = duration_str + str(secs).zfill(2) - return duration_str From a17f05ef21ff3719fb98488260a1dfc3c357825b Mon Sep 17 00:00:00 2001 From: simon Date: Sat, 15 Apr 2023 17:45:20 +0700 Subject: [PATCH 02/23] index streams and file size --- tubearchivist/home/src/es/index_mapping.json | 27 +++++++ tubearchivist/home/src/index/video.py | 17 +++- tubearchivist/home/src/index/video_streams.py | 78 +++++++++++++++++++ tubearchivist/home/templates/home/video.html | 42 ++++++---- tubearchivist/static/css/style.css | 4 + 5 files changed, 153 insertions(+), 15 deletions(-) diff --git a/tubearchivist/home/src/es/index_mapping.json b/tubearchivist/home/src/es/index_mapping.json index b47ea44..6641159 100644 --- a/tubearchivist/home/src/es/index_mapping.json +++ b/tubearchivist/home/src/es/index_mapping.json @@ -146,6 +146,9 @@ "type": "keyword", "index": false }, + "media_size": { + "type": "long" + }, "tags": { "type": "text", "analyzer": "english", @@ -239,6 +242,30 @@ } } }, + "streams": { + "properties": { + "type": { + "type": "keyword", + "index": false + }, + "index": { + "type": "short", + "index": false + }, + "codec": { + "type": "text" + }, + "width": { + "type": "short" + }, + "height": { + "type": "short" + }, + "bitrate": { + "type": "integer" + } + } + }, "sponsorblock": { "properties": { "last_refresh": { diff --git a/tubearchivist/home/src/index/video.py b/tubearchivist/home/src/index/video.py index f9afffa..f3ade1c 100644 --- a/tubearchivist/home/src/index/video.py +++ b/tubearchivist/home/src/index/video.py @@ -16,7 +16,10 @@ from home.src.index import playlist as ta_playlist from home.src.index.generic import YouTubeItem from home.src.index.subtitle import YoutubeSubtitle from home.src.index.video_constants import VideoTypeEnum -from home.src.index.video_streams import DurationConverter +from home.src.index.video_streams import ( + DurationConverter, + MediaStreamExtractor, +) from home.src.ta.helper import clean_string, randomizor from home.src.ta.ta_redis import RedisArchivist from ryd_client import ryd_client @@ -153,6 +156,7 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle): self._add_stats() self.add_file_path() self.add_player(media_path) + self.add_streams(media_path) if self.config["downloads"]["integrate_ryd"]: self._get_ryd_stats() @@ -254,6 +258,17 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle): } ) + def add_streams(self, media_path=False): + """add stream metadata""" + vid_path = self._get_vid_path(media_path) + media = MediaStreamExtractor(vid_path) + self.json_data.update( + { + "streams": media.extract_metadata(), + "media_size": media.get_file_size(), + } + ) + def _get_vid_path(self, media_path=False): """get path of media file""" if media_path: diff --git a/tubearchivist/home/src/index/video_streams.py b/tubearchivist/home/src/index/video_streams.py index 01873a8..e477760 100644 --- a/tubearchivist/home/src/index/video_streams.py +++ b/tubearchivist/home/src/index/video_streams.py @@ -1,6 +1,8 @@ """extract metadata from video streams""" +import json import subprocess +from os import stat class DurationConverter: @@ -52,3 +54,79 @@ class DurationConverter: duration_str = duration_str + "00:" duration_str = duration_str + str(secs).zfill(2) return duration_str + + +class MediaStreamExtractor: + """extract stream metadata""" + + def __init__(self, media_path): + self.media_path = media_path + self.metadata = [] + + def extract_metadata(self): + """entry point to extract metadata""" + + cmd = [ + "ffprobe", + "-v", + "quiet", + "-print_format", + "json", + "-show_streams", + "-show_format", + self.media_path, + ] + result = subprocess.run( + cmd, capture_output=True, text=True, check=False + ) + + if result.returncode != 0: + return self.metadata + + streams = json.loads(result.stdout).get("streams") + for stream in streams: + self.process_stream(stream) + + return self.metadata + + def process_stream(self, stream): + """parse stream to metadata""" + codec_type = stream.get("codec_type") + if codec_type == "video": + self._extract_video_metadata(stream) + elif codec_type == "audio": + self._extract_audio_metadata(stream) + else: + return + + def _extract_video_metadata(self, stream): + """parse video metadata""" + if "bit_rate" not in stream: + # is probably thumbnail + return + + self.metadata.append( + { + "type": "video", + "index": stream["index"], + "codec": stream["codec_name"], + "width": stream["width"], + "height": stream["height"], + "bitrate": int(stream["bit_rate"]), + } + ) + + def _extract_audio_metadata(self, stream): + """extract audio metadata""" + self.metadata.append( + { + "type": "audio", + "index": stream["index"], + "codec": stream["codec_name"], + "bitrate": int(stream["bit_rate"]), + } + ) + + def get_file_size(self): + """get filesize in bytes""" + return stat(self.media_path).st_size diff --git a/tubearchivist/home/templates/home/video.html b/tubearchivist/home/templates/home/video.html index a500a9c..698d2a8 100644 --- a/tubearchivist/home/templates/home/video.html +++ b/tubearchivist/home/templates/home/video.html @@ -56,20 +56,6 @@ {% else %}

Youtube: Deactivated

{% endif %} - {% if reindex %} -

Reindex scheduled

- {% else %} -
- -
- {% endif %} -
- - -
- Are you sure? -
-
@@ -89,6 +75,34 @@
+
+
+
+ {% if reindex %} +

Reindex scheduled

+ {% else %} +
+ +
+ {% endif %} + + +
+ Are you sure? +
+
+
+
+ {% if video.media_size %} +

File size: {{ video.media_size|filesizeformat }}

+ {% endif %} + {% if video.streams %} + {% for stream in video.streams %} +

{{ stream.type|title }}: {{ stream.codec }} {{ stream.bitrate|filesizeformat }}/s{% if stream.width %} | {{ stream.width }}x{{ stream.height}}{% endif %}

+ {% endfor %} + {% endif %} +
+
{% if video.tags %}
diff --git a/tubearchivist/static/css/style.css b/tubearchivist/static/css/style.css index 4119946..07b59c0 100644 --- a/tubearchivist/static/css/style.css +++ b/tubearchivist/static/css/style.css @@ -660,6 +660,10 @@ video:-webkit-full-screen { background-color: var(--highlight-bg); } +.info-box-item p { + width: 100%; +} + .description-text { width: 100%; } From 306323663443578aaa76d39b006514f003e84476 Mon Sep 17 00:00:00 2001 From: simon Date: Sat, 15 Apr 2023 18:27:03 +0700 Subject: [PATCH 03/23] add media stream index startup migration --- .../config/management/commands/ta_startup.py | 45 ++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/tubearchivist/config/management/commands/ta_startup.py b/tubearchivist/config/management/commands/ta_startup.py index bba4eff..80c6d9c 100644 --- a/tubearchivist/config/management/commands/ta_startup.py +++ b/tubearchivist/config/management/commands/ta_startup.py @@ -8,9 +8,10 @@ import os from time import sleep from django.core.management.base import BaseCommand, CommandError -from home.src.es.connect import ElasticWrap +from home.src.es.connect import ElasticWrap, IndexPaginate from home.src.es.index_setup import ElasitIndexWrap from home.src.es.snapshot import ElasticSnapshot +from home.src.index.video_streams import MediaStreamExtractor from home.src.ta.config import AppConfig, ReleaseVersion from home.src.ta.helper import clear_dl_cache from home.src.ta.ta_redis import RedisArchivist @@ -42,6 +43,7 @@ class Command(BaseCommand): self._mig_index_setup() self._mig_snapshot_check() self._mig_set_vid_type() + self._mig_set_streams() def _sync_redis_state(self): """make sure redis gets new config.json values""" @@ -193,3 +195,44 @@ class Command(BaseCommand): self.stdout.write(response) sleep(60) raise CommandError(message) + + def _mig_set_streams(self): + """migration: update from 0.3.5 to 0.3.6, set streams and media_size""" + self.stdout.write("[MIGRATION] index streams and media size") + videos = AppConfig().config["application"]["videos"] + data = { + "query": { + "bool": {"must_not": [{"exists": {"field": "streams"}}]} + }, + "_source": ["media_url", "youtube_id"], + } + all_missing = IndexPaginate("ta_video", data).get_results() + if not all_missing: + self.stdout.write(" no videos need updating") + return + + total = len(all_missing) + for idx, missing in enumerate(all_missing): + media_url = missing["media_url"] + youtube_id = missing["youtube_id"] + media_path = os.path.join(videos, media_url) + if not os.path.exists(media_path): + self.stdout.errors(f" file not found: {media_path}") + continue + + media = MediaStreamExtractor(media_path) + vid_data = { + "doc": { + "streams": media.extract_metadata(), + "media_size": media.get_file_size(), + } + } + path = f"ta_video/_update/{youtube_id}" + response, status_code = ElasticWrap(path).post(data=vid_data) + if not status_code == 200: + self.stdout.errors( + f" update failed: {path}, {response}, {status_code}" + ) + + if idx % 100 == 0: + self.stdout.write(f" progress {idx}/{total}") From 4067b6c182afd9525a5391626a3ddd1854563031 Mon Sep 17 00:00:00 2001 From: simon Date: Sat, 15 Apr 2023 22:55:30 +0700 Subject: [PATCH 04/23] add channel aggs --- tubearchivist/home/src/frontend/searching.py | 58 ++++--------------- tubearchivist/home/src/index/video_streams.py | 6 +- .../home/templates/home/channel_id.html | 10 ++-- tubearchivist/home/views.py | 12 +++- 4 files changed, 26 insertions(+), 60 deletions(-) diff --git a/tubearchivist/home/src/frontend/searching.py b/tubearchivist/home/src/frontend/searching.py index a082f05..3e7894b 100644 --- a/tubearchivist/home/src/frontend/searching.py +++ b/tubearchivist/home/src/frontend/searching.py @@ -11,6 +11,7 @@ from datetime import datetime from home.src.download.thumbnails import ThumbManager from home.src.es.connect import ElasticWrap +from home.src.index.video_streams import DurationConverter from home.src.ta.config import AppConfig @@ -19,6 +20,7 @@ class SearchHandler: def __init__(self, path, config, data=False): self.max_hits = None + self.aggs = None self.path = path self.config = config self.data = data @@ -34,62 +36,22 @@ class SearchHandler: # simulate list for single result to reuse rest of class return_value = [response] - # stop if empty if not return_value: return False - all_videos = [] - all_channels = [] for idx, hit in enumerate(return_value): return_value[idx] = self.hit_cleanup(hit) - if hit["_index"] == "ta_video": - video_dict, channel_dict = self.vid_cache_link(hit) - if video_dict not in all_videos: - all_videos.append(video_dict) - if channel_dict not in all_channels: - all_channels.append(channel_dict) - elif hit["_index"] == "ta_channel": - channel_dict = self.channel_cache_link(hit) - if channel_dict not in all_channels: - all_channels.append(channel_dict) + + if response.get("aggregations"): + self.aggs = response["aggregations"] + if "total_duration" in self.aggs: + duration_sec = self.aggs["total_duration"]["value"] + self.aggs["total_duration"].update( + {"value_str": DurationConverter().get_str(duration_sec)} + ) return return_value - @staticmethod - def vid_cache_link(hit): - """download thumbnails into cache""" - vid_thumb = hit["source"]["vid_thumb_url"] - youtube_id = hit["source"]["youtube_id"] - channel_id_hit = hit["source"]["channel"]["channel_id"] - chan_thumb = hit["source"]["channel"]["channel_thumb_url"] - try: - chan_banner = hit["source"]["channel"]["channel_banner_url"] - except KeyError: - chan_banner = False - video_dict = {"youtube_id": youtube_id, "vid_thumb": vid_thumb} - channel_dict = { - "channel_id": channel_id_hit, - "chan_thumb": chan_thumb, - "chan_banner": chan_banner, - } - return video_dict, channel_dict - - @staticmethod - def channel_cache_link(hit): - """build channel thumb links""" - channel_id_hit = hit["source"]["channel_id"] - chan_thumb = hit["source"]["channel_thumb_url"] - try: - chan_banner = hit["source"]["channel_banner_url"] - except KeyError: - chan_banner = False - channel_dict = { - "channel_id": channel_id_hit, - "chan_thumb": chan_thumb, - "chan_banner": chan_banner, - } - return channel_dict - @staticmethod def hit_cleanup(hit): """clean up and parse data from a single hit""" diff --git a/tubearchivist/home/src/index/video_streams.py b/tubearchivist/home/src/index/video_streams.py index e477760..5379df9 100644 --- a/tubearchivist/home/src/index/video_streams.py +++ b/tubearchivist/home/src/index/video_streams.py @@ -41,9 +41,9 @@ class DurationConverter: # failed to extract return "NA" - hours = duration_sec // 3600 - minutes = (duration_sec - (hours * 3600)) // 60 - secs = duration_sec - (hours * 3600) - (minutes * 60) + hours = int(duration_sec // 3600) + minutes = int((duration_sec - (hours * 3600)) // 60) + secs = int(duration_sec - (hours * 3600) - (minutes * 60)) duration_str = str() if hours: diff --git a/tubearchivist/home/templates/home/channel_id.html b/tubearchivist/home/templates/home/channel_id.html index fc85d0f..8948772 100644 --- a/tubearchivist/home/templates/home/channel_id.html +++ b/tubearchivist/home/templates/home/channel_id.html @@ -45,12 +45,10 @@
-
- {% if max_hits %} -

Total Videos: {{ max_hits }}

- - {% endif %} -
+ {% if aggs %} +

{{ aggs.total_items.value }} videos | {{ aggs.total_duration.value_str }} playback | Total size {{ aggs.total_size.value|filesizeformat }}

+ + {% endif %}
diff --git a/tubearchivist/home/views.py b/tubearchivist/home/views.py index 527e18b..9f891c4 100644 --- a/tubearchivist/home/views.py +++ b/tubearchivist/home/views.py @@ -148,8 +148,8 @@ class ArchivistViewConfig(View): class ArchivistResultsView(ArchivistViewConfig): """View class to inherit from when searching data in es""" - view_origin = False - es_search = False + view_origin = "" + es_search = "" def __init__(self): super().__init__(self.view_origin) @@ -259,6 +259,7 @@ class ArchivistResultsView(ArchivistViewConfig): self.pagination_handler.validate(search.max_hits) self.context["max_hits"] = search.max_hits self.context["pagination"] = self.pagination_handler.pagination + self.context["aggs"] = search.aggs class MinView(View): @@ -613,6 +614,11 @@ class ChannelIdView(ChannelIdBaseView): ] } } + self.data["aggs"] = { + "total_items": {"value_count": {"field": "youtube_id"}}, + "total_size": {"sum": {"field": "media_size"}}, + "total_duration": {"sum": {"field": "player.duration"}}, + } self.data["sort"].append({"title.keyword": {"order": "asc"}}) if self.context["hide_watched"]: @@ -982,7 +988,7 @@ class SearchView(ArchivistResultsView): """ view_origin = "home" - es_search = False + es_search = "" def get(self, request): """handle get request""" From e385331f6cb48ed47ca367cfd02432b02c046330 Mon Sep 17 00:00:00 2001 From: simon Date: Sun, 16 Apr 2023 13:59:15 +0700 Subject: [PATCH 05/23] add typing --- tubearchivist/home/src/ta/helper.py | 34 ++++++++++++++--------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/tubearchivist/home/src/ta/helper.py b/tubearchivist/home/src/ta/helper.py index c1a6024..9934598 100644 --- a/tubearchivist/home/src/ta/helper.py +++ b/tubearchivist/home/src/ta/helper.py @@ -15,20 +15,20 @@ from urllib.parse import urlparse import requests -def clean_string(file_name): +def clean_string(file_name: str) -> str: """clean string to only asci characters""" whitelist = "-_.() " + string.ascii_letters + string.digits normalized = unicodedata.normalize("NFKD", file_name) ascii_only = normalized.encode("ASCII", "ignore").decode().strip() - white_listed = "".join(c for c in ascii_only if c in whitelist) - cleaned = re.sub(r"[ ]{2,}", " ", white_listed) + white_listed: str = "".join(c for c in ascii_only if c in whitelist) + cleaned: str = re.sub(r"[ ]{2,}", " ", white_listed) return cleaned -def ignore_filelist(filelist): +def ignore_filelist(filelist: list[str]) -> list[str]: """ignore temp files for os.listdir sanitizer""" to_ignore = ["Icon\r\r", "Temporary Items", "Network Trash Folder"] - cleaned = [] + cleaned: list[str] = [] for file_name in filelist: if file_name.startswith(".") or file_name in to_ignore: continue @@ -38,13 +38,13 @@ def ignore_filelist(filelist): return cleaned -def randomizor(length): +def randomizor(length: int) -> str: """generate random alpha numeric string""" - pool = string.digits + string.ascii_letters + pool: str = string.digits + string.ascii_letters return "".join(random.choice(pool) for i in range(length)) -def requests_headers(): +def requests_headers() -> dict[str, str]: """build header with random user agent for requests outside of yt-dlp""" chrome_versions = ( @@ -96,7 +96,7 @@ def requests_headers(): return {"User-Agent": template} -def date_praser(timestamp): +def date_praser(timestamp: int | str) -> str: """return formatted date string""" if isinstance(timestamp, int): date_obj = datetime.fromtimestamp(timestamp) @@ -106,7 +106,7 @@ def date_praser(timestamp): return datetime.strftime(date_obj, "%d %b, %Y") -def time_parser(timestamp): +def time_parser(timestamp: str) -> float: """return seconds from timestamp, false on empty""" if not timestamp: return False @@ -118,7 +118,7 @@ def time_parser(timestamp): return int(hours) * 60 * 60 + int(minutes) * 60 + float(seconds) -def clear_dl_cache(config): +def clear_dl_cache(config: dict) -> int: """clear leftover files from dl cache""" print("clear download cache") cache_dir = os.path.join(config["application"]["cache_dir"], "download") @@ -130,15 +130,15 @@ def clear_dl_cache(config): return len(leftover_files) -def get_mapping(): +def get_mapping() -> dict: """read index_mapping.json and get expected mapping and settings""" with open("home/src/es/index_mapping.json", "r", encoding="utf-8") as f: - index_config = json.load(f).get("index_config") + index_config: dict = json.load(f).get("index_config") return index_config -def is_shorts(youtube_id): +def is_shorts(youtube_id: str) -> bool: """check if youtube_id is a shorts video, bot not it it's not a shorts""" shorts_url = f"https://www.youtube.com/shorts/{youtube_id}" response = requests.head( @@ -148,10 +148,10 @@ def is_shorts(youtube_id): return response.status_code == 200 -def ta_host_parser(ta_host): +def ta_host_parser(ta_host: str) -> tuple[list[str], list[str]]: """parse ta_host env var for ALLOWED_HOSTS and CSRF_TRUSTED_ORIGINS""" - allowed_hosts = [] - csrf_trusted_origins = [] + allowed_hosts: list[str] = [] + csrf_trusted_origins: list[str] = [] for host in ta_host.split(): host_clean = host.strip() if not host_clean.startswith("http"): From 3a091ac28744e3beb877d69214e92f217c3e7262 Mon Sep 17 00:00:00 2001 From: simon Date: Sun, 16 Apr 2023 18:30:39 +0700 Subject: [PATCH 06/23] implement auto_start indexing --- .../config/management/commands/ta_startup.py | 32 +++++++++++++++++++ tubearchivist/home/config.json | 4 +-- tubearchivist/home/src/download/queue.py | 10 ++++-- .../home/src/download/subscriptions.py | 1 + tubearchivist/home/src/es/index_mapping.json | 3 ++ tubearchivist/home/tasks.py | 10 +++--- 6 files changed, 52 insertions(+), 8 deletions(-) diff --git a/tubearchivist/config/management/commands/ta_startup.py b/tubearchivist/config/management/commands/ta_startup.py index 80c6d9c..e204bf2 100644 --- a/tubearchivist/config/management/commands/ta_startup.py +++ b/tubearchivist/config/management/commands/ta_startup.py @@ -44,6 +44,7 @@ class Command(BaseCommand): self._mig_snapshot_check() self._mig_set_vid_type() self._mig_set_streams() + self._mig_set_autostart() def _sync_redis_state(self): """make sure redis gets new config.json values""" @@ -236,3 +237,34 @@ class Command(BaseCommand): if idx % 100 == 0: self.stdout.write(f" progress {idx}/{total}") + + def _mig_set_autostart(self): + """migration: update from 0.3.5 to 0.3.6 set auto_start to false""" + self.stdout.write("[MIGRATION] set default download auto_start") + data = { + "query": { + "bool": {"must_not": [{"exists": {"field": "auto_start"}}]} + }, + "script": {"source": "ctx._source['auto_start'] = false"}, + } + path = "ta_download/_update_by_query" + response, status_code = ElasticWrap(path).post(data=data) + if status_code == 200: + updated = response.get("updated", 0) + if not updated: + self.stdout.write( + " no videos needed updating in ta_download" + ) + + self.stdout.write( + self.style.SUCCESS( + f" ✓ {updated} videos updated in ta_download" + ) + ) + return + + message = " 🗙 ta_download auto_start update failed" + self.stdout.write(self.style.ERROR(message)) + self.stdout.write(response) + sleep(60) + raise CommandError(message) diff --git a/tubearchivist/home/config.json b/tubearchivist/home/config.json index 08f7645..8235133 100644 --- a/tubearchivist/home/config.json +++ b/tubearchivist/home/config.json @@ -12,11 +12,11 @@ "grid_items": 3 }, "subscriptions": { - "auto_search": false, "auto_download": false, "channel_size": 50, "live_channel_size": 50, - "shorts_channel_size": 50 + "shorts_channel_size": 50, + "auto_start": false }, "downloads": { "limit_count": false, diff --git a/tubearchivist/home/src/download/queue.py b/tubearchivist/home/src/download/queue.py index 3309f51..325f8fd 100644 --- a/tubearchivist/home/src/download/queue.py +++ b/tubearchivist/home/src/download/queue.py @@ -236,7 +236,7 @@ class PendingList(PendingIndex): # match vid_type later self._add_video(video_id, VideoTypeEnum.UNKNOWN) - def add_to_pending(self, status="pending"): + def add_to_pending(self, status="pending", auto_start=False): """add missing videos to pending list""" self.get_channels() bulk_list = [] @@ -252,7 +252,13 @@ class PendingList(PendingIndex): if not video_details: continue - video_details["status"] = status + video_details.update( + { + "status": status, + "auto_start": auto_start, + } + ) + action = {"create": {"_id": youtube_id, "_index": "ta_download"}} bulk_list.append(json.dumps(action)) bulk_list.append(json.dumps(video_details)) diff --git a/tubearchivist/home/src/download/subscriptions.py b/tubearchivist/home/src/download/subscriptions.py index b006f84..760b303 100644 --- a/tubearchivist/home/src/download/subscriptions.py +++ b/tubearchivist/home/src/download/subscriptions.py @@ -284,6 +284,7 @@ class SubscriptionScanner: def __init__(self, task=False): self.task = task self.missing_videos = False + self.auto_start = AppConfig().config["subscriptions"].get("auto_start") def scan(self): """scan channels and playlists""" diff --git a/tubearchivist/home/src/es/index_mapping.json b/tubearchivist/home/src/es/index_mapping.json index 6641159..c748204 100644 --- a/tubearchivist/home/src/es/index_mapping.json +++ b/tubearchivist/home/src/es/index_mapping.json @@ -357,6 +357,9 @@ }, "vid_type": { "type": "keyword" + }, + "auto_start": { + "type": "boolean" } }, "expected_set": { diff --git a/tubearchivist/home/tasks.py b/tubearchivist/home/tasks.py index 2196468..2166db7 100644 --- a/tubearchivist/home/tasks.py +++ b/tubearchivist/home/tasks.py @@ -171,10 +171,12 @@ def update_subscribed(self): return manager.init(self) - missing_videos = SubscriptionScanner(task=self).scan() + handler = SubscriptionScanner(task=self) + missing_videos = handler.scan() + auto_start = handler.auto_start if missing_videos: print(missing_videos) - extrac_dl.delay(missing_videos) + extrac_dl.delay(missing_videos, auto_start=auto_start) @shared_task(name="download_pending", bind=True, base=BaseTask) @@ -194,12 +196,12 @@ def download_pending(self, from_queue=True): @shared_task(name="extract_download", bind=True, base=BaseTask) -def extrac_dl(self, youtube_ids): +def extrac_dl(self, youtube_ids, auto_start=False): """parse list passed and add to pending""" TaskManager().init(self) pending_handler = PendingList(youtube_ids=youtube_ids, task=self) pending_handler.parse_url_list() - pending_handler.add_to_pending() + pending_handler.add_to_pending(auto_start=auto_start) @shared_task(bind=True, name="check_reindex", base=BaseTask) From 5cd845e55dd4b5af10fcadb078c6843faf7a7b63 Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 21 Apr 2023 16:09:12 +0700 Subject: [PATCH 07/23] fix type compatibility --- tubearchivist/api/views.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubearchivist/api/views.py b/tubearchivist/api/views.py index 85b44b0..22de8b7 100644 --- a/tubearchivist/api/views.py +++ b/tubearchivist/api/views.py @@ -38,8 +38,8 @@ class ApiBaseView(APIView): authentication_classes = [SessionAuthentication, TokenAuthentication] permission_classes = [IsAuthenticated] - search_base = False - data = False + search_base = "" + data = "" def __init__(self): super().__init__() From 1b6b219e02a54ba416b83aab1da830480bcf7344 Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 21 Apr 2023 16:11:37 +0700 Subject: [PATCH 08/23] modify _get_next to for auto_only attr --- .../home/src/download/yt_dlp_handler.py | 96 ++++++------------- tubearchivist/home/tasks.py | 7 +- 2 files changed, 32 insertions(+), 71 deletions(-) diff --git a/tubearchivist/home/src/download/yt_dlp_handler.py b/tubearchivist/home/src/download/yt_dlp_handler.py index 529526f..7ed11f9 100644 --- a/tubearchivist/home/src/download/yt_dlp_handler.py +++ b/tubearchivist/home/src/download/yt_dlp_handler.py @@ -6,14 +6,13 @@ functionality: - move to archive """ -import json import os import shutil from datetime import datetime from home.src.download.queue import PendingList from home.src.download.subscriptions import PlaylistSubscription -from home.src.download.yt_dlp_base import CookieHandler, YtWrap +from home.src.download.yt_dlp_base import YtWrap from home.src.es.connect import ElasticWrap, IndexPaginate from home.src.index.channel import YoutubeChannel from home.src.index.comments import CommentList @@ -22,7 +21,6 @@ from home.src.index.video import YoutubeVideo, index_new_video from home.src.index.video_constants import VideoTypeEnum from home.src.ta.config import AppConfig from home.src.ta.helper import clean_string, ignore_filelist -from home.src.ta.ta_redis import RedisQueue class DownloadPostProcess: @@ -159,29 +157,17 @@ class VideoDownloader: self.channels = set() self.videos = set() - def run_queue(self): + def run_queue(self, auto_only=False): """setup download queue in redis loop until no more items""" - self._setup_queue() - queue = RedisQueue(queue_name="dl_queue") - - limit_queue = self.config["downloads"]["limit_count"] - if limit_queue: - queue.trim(limit_queue - 1) - + self._get_overwrites() while True: - youtube_data = queue.get_next() - if self.task.is_stopped() or not youtube_data: - queue.clear() + video_data = self._get_next(auto_only) + if self.task.is_stopped() or not video_data: break - youtube_data = json.loads(youtube_data) - youtube_id = youtube_data.get("youtube_id") - - tmp_vid_type = youtube_data.get( - "vid_type", VideoTypeEnum.VIDEOS.value - ) - video_type = VideoTypeEnum(tmp_vid_type) - print(f"{youtube_id}: Downloading type: {video_type}") + youtube_id = video_data.get("youtube_id") + video_type = VideoTypeEnum(video_data["vid_type"]) + print(f"{youtube_id}: Downloading type: {video_type.value}") success = self._dl_single_vid(youtube_id) if not success: @@ -212,61 +198,39 @@ class VideoDownloader: ) self.move_to_archive(vid_dict) - - if queue.has_item(): - message = "Continue with next video." - else: - message = "Download queue is finished." - - if self.task: - self.task.send_progress([message]) - self._delete_from_pending(youtube_id) # post processing self._add_subscribed_channels() DownloadPostProcess(self).run() - def _setup_queue(self): - """setup required and validate""" - if self.config["downloads"]["cookie_import"]: - valid = CookieHandler(self.config).validate() - if not valid: - return + def _get_next(self, auto_only): + """get next item in queue""" + must_list = [{"term": {"status": {"value": "pending"}}}] + if auto_only: + must_list.append({"term": {"auto_start": {"value": True}}}) + data = { + "size": 1, + "query": {"bool": {"must": must_list}}, + "sort": [ + {"auto_start": {"order": "desc"}}, + {"timestamp": {"order": "asc"}}, + ], + } + path = "ta_download/_search" + response, _ = ElasticWrap(path).get(data=data) + if not response["hits"]["hits"]: + return False + + return response["hits"]["hits"][0]["_source"] + + def _get_overwrites(self): + """get channel overwrites""" pending = PendingList() - pending.get_download() pending.get_channels() self.video_overwrites = pending.video_overwrites - def add_pending(self): - """add pending videos to download queue""" - if self.task: - self.task.send_progress(["Scanning your download queue."]) - - pending = PendingList() - pending.get_download() - to_add = [ - json.dumps( - { - "youtube_id": i["youtube_id"], - # Using .value in default val to match what would be - # decoded when parsing json if not set - "vid_type": i.get("vid_type", VideoTypeEnum.VIDEOS.value), - } - ) - for i in pending.all_pending - ] - if not to_add: - # there is nothing pending - print("download queue is empty") - if self.task: - self.task.send_progress(["Download queue is empty."]) - - return - - RedisQueue(queue_name="dl_queue").add_list(to_add) - def _progress_hook(self, response): """process the progress_hooks from yt_dlp""" progress = False diff --git a/tubearchivist/home/tasks.py b/tubearchivist/home/tasks.py index 2166db7..fee5986 100644 --- a/tubearchivist/home/tasks.py +++ b/tubearchivist/home/tasks.py @@ -180,7 +180,7 @@ def update_subscribed(self): @shared_task(name="download_pending", bind=True, base=BaseTask) -def download_pending(self, from_queue=True): +def download_pending(self, auto_only=False): """download latest pending videos""" manager = TaskManager() if manager.is_pending(self): @@ -189,10 +189,7 @@ def download_pending(self, from_queue=True): return manager.init(self) - downloader = VideoDownloader(task=self) - if from_queue: - downloader.add_pending() - downloader.run_queue() + VideoDownloader(task=self).run_queue(auto_only) @shared_task(name="extract_download", bind=True, base=BaseTask) From 89779ec13be02cd25762daeec013086aaee44102 Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 21 Apr 2023 17:25:04 +0700 Subject: [PATCH 09/23] consolidate update status priority --- tubearchivist/api/views.py | 10 ++-------- tubearchivist/home/src/download/queue.py | 19 ++++++------------- 2 files changed, 8 insertions(+), 21 deletions(-) diff --git a/tubearchivist/api/views.py b/tubearchivist/api/views.py index 22de8b7..be9e712 100644 --- a/tubearchivist/api/views.py +++ b/tubearchivist/api/views.py @@ -12,13 +12,12 @@ from home.src.index.generic import Pagination from home.src.index.reindex import ReindexProgress from home.src.index.video import SponsorBlock, YoutubeVideo from home.src.ta.config import AppConfig -from home.src.ta.ta_redis import RedisArchivist, RedisQueue +from home.src.ta.ta_redis import RedisArchivist from home.src.ta.task_manager import TaskCommand, TaskManager from home.src.ta.urlparser import Parser from home.tasks import ( BaseTask, check_reindex, - download_pending, extrac_dl, subscribe_to, ) @@ -436,12 +435,7 @@ class DownloadApiView(ApiBaseView): return Response({"message": message}, status=404) print(f"{video_id}: change status to {item_status}") - if item_status == "priority": - PendingInteract(youtube_id=video_id).prioritize() - download_pending.delay(from_queue=False) - else: - PendingInteract(video_id, item_status).update_status() - RedisQueue(queue_name="dl_queue").clear_item(video_id) + PendingInteract(video_id, item_status).update_status() return Response(request.data) diff --git a/tubearchivist/home/src/download/queue.py b/tubearchivist/home/src/download/queue.py index 325f8fd..b276f21 100644 --- a/tubearchivist/home/src/download/queue.py +++ b/tubearchivist/home/src/download/queue.py @@ -19,7 +19,6 @@ from home.src.index.video_constants import VideoTypeEnum from home.src.index.video_streams import DurationConverter from home.src.ta.config import AppConfig from home.src.ta.helper import is_shorts -from home.src.ta.ta_redis import RedisQueue class PendingIndex: @@ -113,21 +112,15 @@ class PendingInteract: _, _ = ElasticWrap(path).post(data=data) def update_status(self): - """update status field of pending item""" - data = {"doc": {"status": self.status}} + """update status of pending item""" + if self.status == "priority": + data = {"doc": {"status": "pending", "auto_start": True}} + else: + data = {"doc": {"status": self.status}} + path = f"ta_download/_update/{self.youtube_id}" _, _ = ElasticWrap(path).post(data=data) - def prioritize(self): - """prioritize pending item in redis queue""" - pending_video, _ = self.get_item() - vid_type = pending_video.get("vid_type", VideoTypeEnum.VIDEOS.value) - to_add = { - "youtube_id": pending_video["youtube_id"], - "vid_type": vid_type, - } - RedisQueue(queue_name="dl_queue").add_priority(to_add) - def get_item(self): """return pending item dict""" path = f"ta_download/_doc/{self.youtube_id}" From 790ba3d20e234485028ad79c7a72eee021f17338 Mon Sep 17 00:00:00 2001 From: simon Date: Sat, 22 Apr 2023 11:19:44 +0700 Subject: [PATCH 10/23] fix linter --- tubearchivist/api/views.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tubearchivist/api/views.py b/tubearchivist/api/views.py index be9e712..2e3f142 100644 --- a/tubearchivist/api/views.py +++ b/tubearchivist/api/views.py @@ -15,12 +15,7 @@ from home.src.ta.config import AppConfig from home.src.ta.ta_redis import RedisArchivist from home.src.ta.task_manager import TaskCommand, TaskManager from home.src.ta.urlparser import Parser -from home.tasks import ( - BaseTask, - check_reindex, - extrac_dl, - subscribe_to, -) +from home.tasks import BaseTask, check_reindex, extrac_dl, subscribe_to from rest_framework.authentication import ( SessionAuthentication, TokenAuthentication, From 76535c6304c4697aaddc48a7c6de414d6aeaed38 Mon Sep 17 00:00:00 2001 From: simon Date: Sat, 22 Apr 2023 11:57:09 +0700 Subject: [PATCH 11/23] move add to queue to api call --- tubearchivist/home/tasks.py | 8 ++++- .../home/templates/home/downloads.html | 4 +-- tubearchivist/home/views.py | 33 ++----------------- tubearchivist/static/script.js | 14 ++++++++ 4 files changed, 25 insertions(+), 34 deletions(-) diff --git a/tubearchivist/home/tasks.py b/tubearchivist/home/tasks.py index fee5986..66a9c58 100644 --- a/tubearchivist/home/tasks.py +++ b/tubearchivist/home/tasks.py @@ -25,6 +25,7 @@ from home.src.index.reindex import Reindex, ReindexManual, ReindexPopulate from home.src.ta.config import AppConfig, ReleaseVersion, ScheduleBuilder from home.src.ta.ta_redis import RedisArchivist from home.src.ta.task_manager import TaskManager +from home.src.ta.urlparser import Parser CONFIG = AppConfig().config REDIS_HOST = os.environ.get("REDIS_HOST") @@ -196,7 +197,12 @@ def download_pending(self, auto_only=False): def extrac_dl(self, youtube_ids, auto_start=False): """parse list passed and add to pending""" TaskManager().init(self) - pending_handler = PendingList(youtube_ids=youtube_ids, task=self) + if isinstance(youtube_ids, str): + to_add = Parser(youtube_ids).parse() + else: + to_add = youtube_ids + + pending_handler = PendingList(youtube_ids=to_add, task=self) pending_handler.parse_url_list() pending_handler.add_to_pending(auto_start=auto_start) diff --git a/tubearchivist/home/templates/home/downloads.html b/tubearchivist/home/templates/home/downloads.html index 35ed3d9..a1d9be2 100644 --- a/tubearchivist/home/templates/home/downloads.html +++ b/tubearchivist/home/templates/home/downloads.html @@ -20,10 +20,10 @@ add-icon

Add to download queue

-
+ {% csrf_token %} {{ add_form }} - +
diff --git a/tubearchivist/home/views.py b/tubearchivist/home/views.py index 9f891c4..142c6bc 100644 --- a/tubearchivist/home/views.py +++ b/tubearchivist/home/views.py @@ -41,8 +41,7 @@ from home.src.index.video_constants import VideoTypeEnum from home.src.ta.config import AppConfig, ReleaseVersion, ScheduleBuilder from home.src.ta.helper import time_parser from home.src.ta.ta_redis import RedisArchivist -from home.src.ta.urlparser import Parser -from home.tasks import extrac_dl, index_channel_playlists, subscribe_to +from home.tasks import index_channel_playlists, subscribe_to from rest_framework.authtoken.models import Token @@ -368,7 +367,7 @@ class AboutView(MinView): class DownloadView(ArchivistResultsView): """resolves to /download/ - takes POST for downloading youtube links + handle the download queue """ view_origin = "downloads" @@ -452,34 +451,6 @@ class DownloadView(ArchivistResultsView): return buckets_sorted - @staticmethod - def post(request): - """handle post requests""" - to_queue = AddToQueueForm(data=request.POST) - if to_queue.is_valid(): - url_str = request.POST.get("vid_url") - print(url_str) - try: - youtube_ids = Parser(url_str).parse() - except ValueError: - # failed to process - key = "message:add" - print(f"failed to parse: {url_str}") - mess_dict = { - "status": key, - "level": "error", - "title": "Failed to extract links.", - "message": "Not a video, channel or playlist ID or URL", - } - RedisArchivist().set_message(key, mess_dict, expire=True) - return redirect("downloads") - - print(youtube_ids) - extrac_dl.delay(youtube_ids) - - sleep(2) - return redirect("downloads", permanent=True) - class ChannelIdBaseView(ArchivistResultsView): """base class for all channel-id views""" diff --git a/tubearchivist/static/script.js b/tubearchivist/static/script.js index 5df3f08..5916144 100644 --- a/tubearchivist/static/script.js +++ b/tubearchivist/static/script.js @@ -160,6 +160,20 @@ function dlPending() { }, 500); } +function addToQueue() { + let textArea = document.getElementById('id_vid_url'); + if (textArea.value === '') { + return + } + let toPost = {data: [{youtube_id: textArea.value, status: 'pending'}]}; + let apiEndpoint = '/api/download/'; + apiRequest(apiEndpoint, 'POST', toPost); + textArea.value = ""; + setTimeout(function () { + checkMessages(); + }, 500); +} + function toIgnore(button) { let youtube_id = button.getAttribute('data-id'); let apiEndpoint = '/api/download/' + youtube_id + '/'; From bc39561606007f2b013720e4f6cf6b9c91999d7a Mon Sep 17 00:00:00 2001 From: simon Date: Sat, 22 Apr 2023 13:42:54 +0700 Subject: [PATCH 12/23] consolidate dl queue notification to method --- .../home/src/download/yt_dlp_handler.py | 35 +++++++++---------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/tubearchivist/home/src/download/yt_dlp_handler.py b/tubearchivist/home/src/download/yt_dlp_handler.py index 7ed11f9..329b30f 100644 --- a/tubearchivist/home/src/download/yt_dlp_handler.py +++ b/tubearchivist/home/src/download/yt_dlp_handler.py @@ -166,37 +166,24 @@ class VideoDownloader: break youtube_id = video_data.get("youtube_id") - video_type = VideoTypeEnum(video_data["vid_type"]) - print(f"{youtube_id}: Downloading type: {video_type.value}") + print(f"{youtube_id}: Downloading video") + self._notify(video_data, "Validate download format") success = self._dl_single_vid(youtube_id) if not success: continue - if self.task: - self.task.send_progress( - [ - f"Processing video {youtube_id}", - "Add video metadata to index.", - ] - ) + self._notify(video_data, "Add video metadata to index") vid_dict = index_new_video( youtube_id, video_overwrites=self.video_overwrites, - video_type=video_type, + video_type=VideoTypeEnum(video_data["vid_type"]), ) self.channels.add(vid_dict["channel"]["channel_id"]) self.videos.add(vid_dict["youtube_id"]) - if self.task: - self.task.send_progress( - [ - f"Processing video {youtube_id}", - "Move downloaded file to archive.", - ] - ) - + self._notify(video_data, "Move downloaded file to archive") self.move_to_archive(vid_dict) self._delete_from_pending(youtube_id) @@ -204,6 +191,15 @@ class VideoDownloader: self._add_subscribed_channels() DownloadPostProcess(self).run() + def _notify(self, video_data, message): + """send progress notification to task""" + if not self.task: + return + + typ = VideoTypeEnum(video_data["vid_type"]).value.rstrip("s").title() + title = video_data.get("title") + self.task.send_progress([f"Processing {typ}: {title}", message]) + def _get_next(self, auto_only): """get next item in queue""" must_list = [{"term": {"status": {"value": "pending"}}}] @@ -228,6 +224,7 @@ class VideoDownloader: def _get_overwrites(self): """get channel overwrites""" pending = PendingList() + pending.get_download() pending.get_channels() self.video_overwrites = pending.video_overwrites @@ -386,7 +383,7 @@ class VideoDownloader: @staticmethod def _delete_from_pending(youtube_id): """delete downloaded video from pending index if its there""" - path = f"ta_download/_doc/{youtube_id}" + path = f"ta_download/_doc/{youtube_id}?refresh=true" _, _ = ElasticWrap(path).delete() def _add_subscribed_channels(self): From 77900f89e3a5239a0bc25892c82436246d699fa7 Mon Sep 17 00:00:00 2001 From: simon Date: Sun, 23 Apr 2023 13:59:32 +0700 Subject: [PATCH 13/23] remove legacy limit_count config --- tubearchivist/home/config.json | 1 - tubearchivist/home/src/frontend/forms.py | 1 - tubearchivist/home/templates/home/settings.html | 5 ----- 3 files changed, 7 deletions(-) diff --git a/tubearchivist/home/config.json b/tubearchivist/home/config.json index 8235133..bf8d3a1 100644 --- a/tubearchivist/home/config.json +++ b/tubearchivist/home/config.json @@ -19,7 +19,6 @@ "auto_start": false }, "downloads": { - "limit_count": false, "limit_speed": false, "sleep_interval": 3, "autodelete_days": false, diff --git a/tubearchivist/home/src/frontend/forms.py b/tubearchivist/home/src/frontend/forms.py index 85c132a..88c3c1a 100644 --- a/tubearchivist/home/src/frontend/forms.py +++ b/tubearchivist/home/src/frontend/forms.py @@ -107,7 +107,6 @@ class ApplicationSettingsForm(forms.Form): subscriptions_shorts_channel_size = forms.IntegerField( required=False, min_value=0 ) - downloads_limit_count = forms.IntegerField(required=False) downloads_limit_speed = forms.IntegerField(required=False) downloads_throttledratelimit = forms.IntegerField(required=False) downloads_sleep_interval = forms.IntegerField(required=False) diff --git a/tubearchivist/home/templates/home/settings.html b/tubearchivist/home/templates/home/settings.html index 51012cc..0d893b1 100644 --- a/tubearchivist/home/templates/home/settings.html +++ b/tubearchivist/home/templates/home/settings.html @@ -52,11 +52,6 @@

Downloads

-
-

Current download limit: {{ config.downloads.limit_count }}

- Limit the number of videos getting downloaded on every run. 0 (zero) to deactivate.
- {{ app_form.downloads_limit_count }} -

Current download speed limit in KB/s: {{ config.downloads.limit_speed }}

Limit download speed. 0 (zero) to deactivate, e.g. 1000 (1MB/s). Speeds are in KB/s. Setting takes effect on new download jobs or application restart.
From 06bbe2e4006dc4c83dee1f4e627d6b348f2fbdf3 Mon Sep 17 00:00:00 2001 From: simon Date: Thu, 27 Apr 2023 22:06:41 +0700 Subject: [PATCH 14/23] fix playlist sub parser --- tubearchivist/home/src/download/subscriptions.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tubearchivist/home/src/download/subscriptions.py b/tubearchivist/home/src/download/subscriptions.py index 760b303..6325cb4 100644 --- a/tubearchivist/home/src/download/subscriptions.py +++ b/tubearchivist/home/src/download/subscriptions.py @@ -175,10 +175,7 @@ class PlaylistSubscription: def process_url_str(self, new_playlists, subscribed=True): """process playlist subscribe form url_str""" - data = { - "query": {"match_all": {}}, - "sort": [{"published": {"order": "desc"}}], - } + data = {"query": {"match_all": {}}, "_source": ["youtube_id"]} all_indexed = IndexPaginate("ta_video", data).get_results() all_youtube_ids = [i["youtube_id"] for i in all_indexed] From 9a38aff03dbbea6aa52d0ca8d49c9bd922342597 Mon Sep 17 00:00:00 2001 From: Matthew Glinski Date: Thu, 27 Apr 2023 23:20:49 -0400 Subject: [PATCH 15/23] Accept members-only/paid-content playlist IDs/URLs (#469) This adds a new check to the playlist url_id check to allow playlist urls from subscription only content on youtube channels. The channel I tested on had an ID length of 26 (https://www.youtube.com/playlist?list=UUMOvk0KB4Ue0vfPqvDzjIAwiQ), I am not sure if other lengths are valid here also. I have tested this on my local install and it is downloading the members only playlist correctly as long as I have valid youtube cookies. --- tubearchivist/home/src/ta/urlparser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubearchivist/home/src/ta/urlparser.py b/tubearchivist/home/src/ta/urlparser.py index 24dfc68..04429fa 100644 --- a/tubearchivist/home/src/ta/urlparser.py +++ b/tubearchivist/home/src/ta/urlparser.py @@ -92,7 +92,7 @@ class Parser: item_type = "video" elif len_id_str == 24: item_type = "channel" - elif len_id_str in (34, 18): + elif len_id_str in (34, 26, 18): item_type = "playlist" else: raise ValueError(f"not a valid id_str: {id_str}") From 8f3b832069d12f1dfdc6a68fad81663b8bdb373f Mon Sep 17 00:00:00 2001 From: simon Date: Sat, 29 Apr 2023 12:55:12 +0700 Subject: [PATCH 16/23] [API] add dl autostart query param --- tubearchivist/api/views.py | 3 ++- tubearchivist/home/tasks.py | 5 ++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tubearchivist/api/views.py b/tubearchivist/api/views.py index 2e3f142..5b2bf7f 100644 --- a/tubearchivist/api/views.py +++ b/tubearchivist/api/views.py @@ -483,6 +483,7 @@ class DownloadApiListView(ApiBaseView): def post(request): """add list of videos to download queue""" data = request.data + auto_start = bool(request.GET.get("autostart")) try: to_add = data["data"] except KeyError: @@ -499,7 +500,7 @@ class DownloadApiListView(ApiBaseView): print(message) return Response({"message": message}, status=400) - extrac_dl.delay(youtube_ids) + extrac_dl.delay(youtube_ids, auto_start=auto_start) return Response(data) diff --git a/tubearchivist/home/tasks.py b/tubearchivist/home/tasks.py index 66a9c58..1251891 100644 --- a/tubearchivist/home/tasks.py +++ b/tubearchivist/home/tasks.py @@ -190,7 +190,7 @@ def download_pending(self, auto_only=False): return manager.init(self) - VideoDownloader(task=self).run_queue(auto_only) + VideoDownloader(task=self).run_queue(auto_only=auto_only) @shared_task(name="extract_download", bind=True, base=BaseTask) @@ -206,6 +206,9 @@ def extrac_dl(self, youtube_ids, auto_start=False): pending_handler.parse_url_list() pending_handler.add_to_pending(auto_start=auto_start) + if auto_start: + download_pending.delay(auto_only=True) + @shared_task(bind=True, name="check_reindex", base=BaseTask) def check_reindex(self, data=False, extract_videos=False): From efca460e9d9927c4334f4da28062e92f1a492872 Mon Sep 17 00:00:00 2001 From: simon Date: Sat, 29 Apr 2023 13:22:10 +0700 Subject: [PATCH 17/23] download now task handling --- tubearchivist/api/views.py | 10 +++++++++- tubearchivist/home/src/download/queue.py | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tubearchivist/api/views.py b/tubearchivist/api/views.py index 5b2bf7f..15fde1d 100644 --- a/tubearchivist/api/views.py +++ b/tubearchivist/api/views.py @@ -15,7 +15,13 @@ from home.src.ta.config import AppConfig from home.src.ta.ta_redis import RedisArchivist from home.src.ta.task_manager import TaskCommand, TaskManager from home.src.ta.urlparser import Parser -from home.tasks import BaseTask, check_reindex, extrac_dl, subscribe_to +from home.tasks import ( + BaseTask, + check_reindex, + download_pending, + extrac_dl, + subscribe_to, +) from rest_framework.authentication import ( SessionAuthentication, TokenAuthentication, @@ -431,6 +437,8 @@ class DownloadApiView(ApiBaseView): print(f"{video_id}: change status to {item_status}") PendingInteract(video_id, item_status).update_status() + if item_status == "priority": + download_pending.delay(auto_only=True) return Response(request.data) diff --git a/tubearchivist/home/src/download/queue.py b/tubearchivist/home/src/download/queue.py index b276f21..1ff7917 100644 --- a/tubearchivist/home/src/download/queue.py +++ b/tubearchivist/home/src/download/queue.py @@ -118,7 +118,7 @@ class PendingInteract: else: data = {"doc": {"status": self.status}} - path = f"ta_download/_update/{self.youtube_id}" + path = f"ta_download/_update/{self.youtube_id}/?refresh=true" _, _ = ElasticWrap(path).post(data=data) def get_item(self): From 280c773441dbd4be3e67b4adf96feb1ad7881ac3 Mon Sep 17 00:00:00 2001 From: simon Date: Sat, 29 Apr 2023 14:17:36 +0700 Subject: [PATCH 18/23] add download now to form --- tubearchivist/home/src/download/queue.py | 2 +- tubearchivist/home/templates/home/downloads.html | 7 ++++--- tubearchivist/static/css/style.css | 4 ++++ tubearchivist/static/script.js | 8 ++++++-- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/tubearchivist/home/src/download/queue.py b/tubearchivist/home/src/download/queue.py index 1ff7917..006b7a0 100644 --- a/tubearchivist/home/src/download/queue.py +++ b/tubearchivist/home/src/download/queue.py @@ -273,7 +273,7 @@ class PendingList(PendingIndex): # add last newline bulk_list.append("\n") query_str = "\n".join(bulk_list) - _, _ = ElasticWrap("_bulk").post(query_str, ndjson=True) + _, _ = ElasticWrap("_bulk?refresh=true").post(query_str, ndjson=True) def _notify_add(self, idx, total): """send notification for adding videos to download queue""" diff --git a/tubearchivist/home/templates/home/downloads.html b/tubearchivist/home/templates/home/downloads.html index a1d9be2..59f4f03 100644 --- a/tubearchivist/home/templates/home/downloads.html +++ b/tubearchivist/home/templates/home/downloads.html @@ -20,11 +20,12 @@ add-icon

Add to download queue

-
+
{% csrf_token %} {{ add_form }} - - + + +
diff --git a/tubearchivist/static/css/style.css b/tubearchivist/static/css/style.css index 07b59c0..9ece987 100644 --- a/tubearchivist/static/css/style.css +++ b/tubearchivist/static/css/style.css @@ -369,6 +369,10 @@ button:hover { display: none; } +#hidden-form button { + margin-right: 1rem; +} + #text-reveal { height: 0; overflow: hidden; diff --git a/tubearchivist/static/script.js b/tubearchivist/static/script.js index 5916144..cc77074 100644 --- a/tubearchivist/static/script.js +++ b/tubearchivist/static/script.js @@ -160,18 +160,22 @@ function dlPending() { }, 500); } -function addToQueue() { +function addToQueue(autostart=false) { let textArea = document.getElementById('id_vid_url'); if (textArea.value === '') { return } let toPost = {data: [{youtube_id: textArea.value, status: 'pending'}]}; let apiEndpoint = '/api/download/'; + if (autostart) { + apiEndpoint = `${apiEndpoint}?autostart=true`; + } apiRequest(apiEndpoint, 'POST', toPost); - textArea.value = ""; + textArea.value = ''; setTimeout(function () { checkMessages(); }, 500); + showForm(); } function toIgnore(button) { From 1c0b407f3f119c18be745a65a891db41d96d5031 Mon Sep 17 00:00:00 2001 From: Dominik Sander Date: Sat, 29 Apr 2023 11:32:52 +0200 Subject: [PATCH 19/23] Allow to configure yt-dlp `--format-sort` argument (#471) * Allow to configure yt-dlp `--format-sort` argument This exposes the [`--format-sort`][1] yt-dlp option to the user. Implements parts of #316 [1]: https://github.com/yt-dlp/yt-dlp#sorting-formats * Trim split values of format_sort, obey black * Add `format_sort` to default configuration * Add note about codec compatibility to settings page --- tubearchivist/home/config.json | 1 + tubearchivist/home/src/download/yt_dlp_handler.py | 4 ++++ tubearchivist/home/src/frontend/forms.py | 1 + tubearchivist/home/templates/home/settings.html | 13 +++++++++++++ 4 files changed, 19 insertions(+) diff --git a/tubearchivist/home/config.json b/tubearchivist/home/config.json index 08f7645..f2fa66e 100644 --- a/tubearchivist/home/config.json +++ b/tubearchivist/home/config.json @@ -24,6 +24,7 @@ "sleep_interval": 3, "autodelete_days": false, "format": false, + "format_sort": false, "add_metadata": false, "add_thumbnail": false, "subtitle": false, diff --git a/tubearchivist/home/src/download/yt_dlp_handler.py b/tubearchivist/home/src/download/yt_dlp_handler.py index 529526f..2a57d03 100644 --- a/tubearchivist/home/src/download/yt_dlp_handler.py +++ b/tubearchivist/home/src/download/yt_dlp_handler.py @@ -312,6 +312,10 @@ class VideoDownloader: """build user customized options""" if self.config["downloads"]["format"]: self.obs["format"] = self.config["downloads"]["format"] + if self.config["downloads"]["format_sort"]: + format_sort = self.config["downloads"]["format_sort"] + format_sort_list = [i.strip() for i in format_sort.split(",")] + self.obs["format_sort"] = format_sort_list if self.config["downloads"]["limit_speed"]: self.obs["ratelimit"] = ( self.config["downloads"]["limit_speed"] * 1024 diff --git a/tubearchivist/home/src/frontend/forms.py b/tubearchivist/home/src/frontend/forms.py index 85c132a..e06fda8 100644 --- a/tubearchivist/home/src/frontend/forms.py +++ b/tubearchivist/home/src/frontend/forms.py @@ -113,6 +113,7 @@ class ApplicationSettingsForm(forms.Form): downloads_sleep_interval = forms.IntegerField(required=False) downloads_autodelete_days = forms.IntegerField(required=False) downloads_format = forms.CharField(required=False) + downloads_format_sort = forms.CharField(required=False) downloads_add_metadata = forms.ChoiceField( widget=forms.Select, choices=METADATA_CHOICES, required=False ) diff --git a/tubearchivist/home/templates/home/settings.html b/tubearchivist/home/templates/home/settings.html index 51012cc..eaccae7 100644 --- a/tubearchivist/home/templates/home/settings.html +++ b/tubearchivist/home/templates/home/settings.html @@ -95,6 +95,19 @@ {{ app_form.downloads_format }}
+
+

Force sort order to have precedence over all yt-dlp fields.
+ Currently: {{ config.downloads.format_sort }} +

+

Example configurations:

+
    +
  • res,codec:av1: prefer AV1 over all other video codecs.
  • +
  • 0: deactivate and keep the default as decided by yt-dlp.
  • +
+ Not all codecs are supported by all browsers. The default value ensures best compatibility. Check out the documentation for valid configurations.
+ {{ app_form.downloads_format_sort }} +
+

Current metadata embed setting: {{ config.downloads.add_metadata }}

Metadata is not embedded into the downloaded files by default.
From 63021bd3139318419497fe00fd6efbcca686b81d Mon Sep 17 00:00:00 2001 From: simon Date: Sat, 29 Apr 2023 16:54:54 +0700 Subject: [PATCH 20/23] add autostart subs to config form --- tubearchivist/home/src/frontend/forms.py | 9 +++++++++ tubearchivist/home/templates/home/settings.html | 5 +++++ 2 files changed, 14 insertions(+) diff --git a/tubearchivist/home/src/frontend/forms.py b/tubearchivist/home/src/frontend/forms.py index a643fb8..19b3058 100644 --- a/tubearchivist/home/src/frontend/forms.py +++ b/tubearchivist/home/src/frontend/forms.py @@ -44,6 +44,12 @@ class UserSettingsForm(forms.Form): class ApplicationSettingsForm(forms.Form): """handle all application settings""" + AUTOSTART_CHOICES = [ + ("", "-- change subscription autostart --"), + ("0", "disable auto start"), + ("1", "enable auto start"), + ] + METADATA_CHOICES = [ ("", "-- change metadata embed --"), ("0", "don't embed metadata"), @@ -107,6 +113,9 @@ class ApplicationSettingsForm(forms.Form): subscriptions_shorts_channel_size = forms.IntegerField( required=False, min_value=0 ) + subscriptions_auto_start = forms.ChoiceField( + widget=forms.Select, choices=AUTOSTART_CHOICES, required=False + ) downloads_limit_speed = forms.IntegerField(required=False) downloads_throttledratelimit = forms.IntegerField(required=False) downloads_sleep_interval = forms.IntegerField(required=False) diff --git a/tubearchivist/home/templates/home/settings.html b/tubearchivist/home/templates/home/settings.html index 55ea208..e739fde 100644 --- a/tubearchivist/home/templates/home/settings.html +++ b/tubearchivist/home/templates/home/settings.html @@ -49,6 +49,11 @@ Shorts Videos to scan to find new items for the Rescan subscriptions task, max recommended 50.
{{ app_form.subscriptions_shorts_channel_size }}
+
+

Auto start download from your subscriptions: {{ config.subscriptions.auto_start}}

+ Enable this will automatically start and prioritize videos from your subscriptions.
+ {{ app_form.subscriptions_auto_start }} +

Downloads

From e866bb3be58c030eb9ed55f1dcde09de5b1d0c4d Mon Sep 17 00:00:00 2001 From: simon Date: Sat, 29 Apr 2023 17:08:13 +0700 Subject: [PATCH 21/23] bump libs --- tubearchivist/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubearchivist/requirements.txt b/tubearchivist/requirements.txt index 345d8d7..54cad2c 100644 --- a/tubearchivist/requirements.txt +++ b/tubearchivist/requirements.txt @@ -1,12 +1,12 @@ beautifulsoup4==4.12.2 celery==5.2.7 Django==4.2 -django-auth-ldap==4.2.0 +django-auth-ldap==4.3.0 django-cors-headers==3.14.0 djangorestframework==3.14.0 Pillow==9.5.0 redis==4.5.4 -requests==2.28.2 +requests==2.29.0 ryd-client==0.0.6 uWSGI==2.0.21 whitenoise==6.4.0 From 7c01ad88b2ec134e25c984e2ee4f7771c0d238d1 Mon Sep 17 00:00:00 2001 From: simon Date: Sat, 29 Apr 2023 18:37:54 +0700 Subject: [PATCH 22/23] fix startup MediaStreamExtractor error handling --- tubearchivist/config/management/commands/ta_startup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubearchivist/config/management/commands/ta_startup.py b/tubearchivist/config/management/commands/ta_startup.py index e204bf2..a1c4adf 100644 --- a/tubearchivist/config/management/commands/ta_startup.py +++ b/tubearchivist/config/management/commands/ta_startup.py @@ -218,7 +218,7 @@ class Command(BaseCommand): youtube_id = missing["youtube_id"] media_path = os.path.join(videos, media_url) if not os.path.exists(media_path): - self.stdout.errors(f" file not found: {media_path}") + self.stdout.write(f" file not found: {media_path}") continue media = MediaStreamExtractor(media_path) @@ -255,13 +255,13 @@ class Command(BaseCommand): self.stdout.write( " no videos needed updating in ta_download" ) + return self.stdout.write( self.style.SUCCESS( f" ✓ {updated} videos updated in ta_download" ) ) - return message = " 🗙 ta_download auto_start update failed" self.stdout.write(self.style.ERROR(message)) From 5cc642098d253b147f7bac1e5809696c824d970a Mon Sep 17 00:00:00 2001 From: simon Date: Sat, 29 Apr 2023 18:42:29 +0700 Subject: [PATCH 23/23] remove old migration --- .../config/management/commands/ta_startup.py | 50 ------------------- 1 file changed, 50 deletions(-) diff --git a/tubearchivist/config/management/commands/ta_startup.py b/tubearchivist/config/management/commands/ta_startup.py index a1c4adf..c65b39e 100644 --- a/tubearchivist/config/management/commands/ta_startup.py +++ b/tubearchivist/config/management/commands/ta_startup.py @@ -42,7 +42,6 @@ class Command(BaseCommand): self._version_check() self._mig_index_setup() self._mig_snapshot_check() - self._mig_set_vid_type() self._mig_set_streams() self._mig_set_autostart() @@ -148,55 +147,6 @@ class Command(BaseCommand): self.stdout.write("[MIGRATION] setup snapshots") ElasticSnapshot().setup() - def _mig_set_vid_type(self): - """migration: update 0.3.0 to 0.3.1 set vid_type default""" - self.stdout.write("[MIGRATION] set default vid_type") - index_list = ["ta_video", "ta_download"] - data = { - "query": { - "bool": { - "should": [ - { - "bool": { - "must_not": [{"exists": {"field": "vid_type"}}] - } - }, - {"term": {"vid_type": {"value": "unknown"}}}, - ] - } - }, - "script": {"source": "ctx._source['vid_type'] = 'videos'"}, - } - - for index_name in index_list: - path = f"{index_name}/_update_by_query" - response, status_code = ElasticWrap(path).post(data=data) - if status_code == 503: - message = f" 🗙 {index_name} retry failed migration." - self.stdout.write(self.style.ERROR(message)) - sleep(10) - response, status_code = ElasticWrap(path).post(data=data) - - if status_code == 200: - updated = response.get("updated", 0) - if not updated: - self.stdout.write( - f" no videos needed updating in {index_name}" - ) - continue - - self.stdout.write( - self.style.SUCCESS( - f" ✓ {updated} videos updated in {index_name}" - ) - ) - else: - message = f" 🗙 {index_name} vid_type update failed" - self.stdout.write(self.style.ERROR(message)) - self.stdout.write(response) - sleep(60) - raise CommandError(message) - def _mig_set_streams(self): """migration: update from 0.3.5 to 0.3.6, set streams and media_size""" self.stdout.write("[MIGRATION] index streams and media size")