Index media metadata, #build

Changed: - Added stream and coded details - Added channel aggregations - Added autostart download - Added members only playlist support - Added yt-dlp format sort option
2023-04-29 18:44:14 +07:00 · 2023-04-29 18:44:14 +07:00 · cf37800c2b
parent bf7a429dac 5cc642098d
commit cf37800c2b
22 changed files with 473 additions and 353 deletions
--- a/tubearchivist/api/views.py
+++ b/tubearchivist/api/views.py
@ -12,7 +12,7 @@ from home.src.index.generic import Pagination
 from home.src.index.reindex import ReindexProgress
 from home.src.index.video import SponsorBlock, YoutubeVideo
 from home.src.ta.config import AppConfig
-from home.src.ta.ta_redis import RedisArchivist, RedisQueue
+from home.src.ta.ta_redis import RedisArchivist
 from home.src.ta.task_manager import TaskCommand, TaskManager
 from home.src.ta.urlparser import Parser
 from home.tasks import (
@ -38,8 +38,8 @@ class ApiBaseView(APIView):

    authentication_classes = [SessionAuthentication, TokenAuthentication]
    permission_classes = [IsAuthenticated]
-    search_base = False
-    data = False
+    search_base = ""
+    data = ""

    def __init__(self):
        super().__init__()
@ -436,12 +436,9 @@ class DownloadApiView(ApiBaseView):
            return Response({"message": message}, status=404)

        print(f"{video_id}: change status to {item_status}")
+        PendingInteract(video_id, item_status).update_status()
        if item_status == "priority":
-            PendingInteract(youtube_id=video_id).prioritize()
-            download_pending.delay(from_queue=False)
-        else:
-            PendingInteract(video_id, item_status).update_status()
-            RedisQueue(queue_name="dl_queue").clear_item(video_id)
+            download_pending.delay(auto_only=True)

        return Response(request.data)

@ -494,6 +491,7 @@ class DownloadApiListView(ApiBaseView):
    def post(request):
        """add list of videos to download queue"""
        data = request.data
+        auto_start = bool(request.GET.get("autostart"))
        try:
            to_add = data["data"]
        except KeyError:
@ -510,7 +508,7 @@ class DownloadApiListView(ApiBaseView):
            print(message)
            return Response({"message": message}, status=400)

-        extrac_dl.delay(youtube_ids)
+        extrac_dl.delay(youtube_ids, auto_start=auto_start)

        return Response(data)

--- a/tubearchivist/config/management/commands/ta_startup.py
+++ b/tubearchivist/config/management/commands/ta_startup.py
@ -8,9 +8,10 @@ import os
 from time import sleep

 from django.core.management.base import BaseCommand, CommandError
-from home.src.es.connect import ElasticWrap
+from home.src.es.connect import ElasticWrap, IndexPaginate
 from home.src.es.index_setup import ElasitIndexWrap
 from home.src.es.snapshot import ElasticSnapshot
+from home.src.index.video_streams import MediaStreamExtractor
 from home.src.ta.config import AppConfig, ReleaseVersion
 from home.src.ta.helper import clear_dl_cache
 from home.src.ta.ta_redis import RedisArchivist
@ -41,7 +42,8 @@ class Command(BaseCommand):
        self._version_check()
        self._mig_index_setup()
        self._mig_snapshot_check()
-        self._mig_set_vid_type()
+        self._mig_set_streams()
+        self._mig_set_autostart()

    def _sync_redis_state(self):
        """make sure redis gets new config.json values"""
@ -145,51 +147,74 @@ class Command(BaseCommand):
        self.stdout.write("[MIGRATION] setup snapshots")
        ElasticSnapshot().setup()

-    def _mig_set_vid_type(self):
-        """migration: update 0.3.0 to 0.3.1 set vid_type default"""
-        self.stdout.write("[MIGRATION] set default vid_type")
-        index_list = ["ta_video", "ta_download"]
+    def _mig_set_streams(self):
+        """migration: update from 0.3.5 to 0.3.6, set streams and media_size"""
+        self.stdout.write("[MIGRATION] index streams and media size")
+        videos = AppConfig().config["application"]["videos"]
        data = {
            "query": {
-                "bool": {
-                    "should": [
-                        {
-                            "bool": {
-                                "must_not": [{"exists": {"field": "vid_type"}}]
-                            }
-                        },
-                        {"term": {"vid_type": {"value": "unknown"}}},
-                    ]
-                }
+                "bool": {"must_not": [{"exists": {"field": "streams"}}]}
            },
-            "script": {"source": "ctx._source['vid_type'] = 'videos'"},
+            "_source": ["media_url", "youtube_id"],
        }
+        all_missing = IndexPaginate("ta_video", data).get_results()
+        if not all_missing:
+            self.stdout.write("    no videos need updating")
+            return

-        for index_name in index_list:
-            path = f"{index_name}/_update_by_query"
-            response, status_code = ElasticWrap(path).post(data=data)
-            if status_code == 503:
-                message = f"    🗙 {index_name} retry failed migration."
-                self.stdout.write(self.style.ERROR(message))
-                sleep(10)
-                response, status_code = ElasticWrap(path).post(data=data)
+        total = len(all_missing)
+        for idx, missing in enumerate(all_missing):
+            media_url = missing["media_url"]
+            youtube_id = missing["youtube_id"]
+            media_path = os.path.join(videos, media_url)
+            if not os.path.exists(media_path):
+                self.stdout.write(f"    file not found: {media_path}")
+                continue

-            if status_code == 200:
-                updated = response.get("updated", 0)
-                if not updated:
-                    self.stdout.write(
-                        f"    no videos needed updating in {index_name}"
-                    )
-                    continue
-
-                self.stdout.write(
-                    self.style.SUCCESS(
-                        f"    ✓ {updated} videos updated in {index_name}"
-                    )
+            media = MediaStreamExtractor(media_path)
+            vid_data = {
+                "doc": {
+                    "streams": media.extract_metadata(),
+                    "media_size": media.get_file_size(),
+                }
+            }
+            path = f"ta_video/_update/{youtube_id}"
+            response, status_code = ElasticWrap(path).post(data=vid_data)
+            if not status_code == 200:
+                self.stdout.errors(
+                    f"    update failed: {path}, {response}, {status_code}"
                )
-            else:
-                message = f"    🗙 {index_name} vid_type update failed"
-                self.stdout.write(self.style.ERROR(message))
-                self.stdout.write(response)
-                sleep(60)
-                raise CommandError(message)
+
+            if idx % 100 == 0:
+                self.stdout.write(f"    progress {idx}/{total}")
+
+    def _mig_set_autostart(self):
+        """migration: update from 0.3.5 to 0.3.6 set auto_start to false"""
+        self.stdout.write("[MIGRATION] set default download auto_start")
+        data = {
+            "query": {
+                "bool": {"must_not": [{"exists": {"field": "auto_start"}}]}
+            },
+            "script": {"source": "ctx._source['auto_start'] = false"},
+        }
+        path = "ta_download/_update_by_query"
+        response, status_code = ElasticWrap(path).post(data=data)
+        if status_code == 200:
+            updated = response.get("updated", 0)
+            if not updated:
+                self.stdout.write(
+                    "    no videos needed updating in ta_download"
+                )
+                return
+
+            self.stdout.write(
+                self.style.SUCCESS(
+                    f"    ✓ {updated} videos updated in ta_download"
+                )
+            )
+
+        message = "    🗙 ta_download auto_start update failed"
+        self.stdout.write(self.style.ERROR(message))
+        self.stdout.write(response)
+        sleep(60)
+        raise CommandError(message)
--- a/tubearchivist/home/config.json
+++ b/tubearchivist/home/config.json
@ -12,18 +12,18 @@
        "grid_items": 3
    },
    "subscriptions": {
-        "auto_search": false,
        "auto_download": false,
        "channel_size": 50,
        "live_channel_size": 50,
-        "shorts_channel_size": 50
+        "shorts_channel_size": 50,
+        "auto_start": false
    },
    "downloads": {
-        "limit_count": false,
        "limit_speed": false,
        "sleep_interval": 3,
        "autodelete_days": false,
        "format": false,
+        "format_sort": false,
        "add_metadata": false,
        "add_thumbnail": false,
        "subtitle": false,
--- a/tubearchivist/home/src/download/queue.py
+++ b/tubearchivist/home/src/download/queue.py
@ -16,9 +16,9 @@ from home.src.download.yt_dlp_base import YtWrap
 from home.src.es.connect import ElasticWrap, IndexPaginate
 from home.src.index.playlist import YoutubePlaylist
 from home.src.index.video_constants import VideoTypeEnum
+from home.src.index.video_streams import DurationConverter
 from home.src.ta.config import AppConfig
-from home.src.ta.helper import DurationConverter, is_shorts
-from home.src.ta.ta_redis import RedisQueue
+from home.src.ta.helper import is_shorts


 class PendingIndex:
@ -112,20 +112,14 @@ class PendingInteract:
        _, _ = ElasticWrap(path).post(data=data)

    def update_status(self):
-        """update status field of pending item"""
-        data = {"doc": {"status": self.status}}
-        path = f"ta_download/_update/{self.youtube_id}"
-        _, _ = ElasticWrap(path).post(data=data)
+        """update status of pending item"""
+        if self.status == "priority":
+            data = {"doc": {"status": "pending", "auto_start": True}}
+        else:
+            data = {"doc": {"status": self.status}}

-    def prioritize(self):
-        """prioritize pending item in redis queue"""
-        pending_video, _ = self.get_item()
-        vid_type = pending_video.get("vid_type", VideoTypeEnum.VIDEOS.value)
-        to_add = {
-            "youtube_id": pending_video["youtube_id"],
-            "vid_type": vid_type,
-        }
-        RedisQueue(queue_name="dl_queue").add_priority(to_add)
+        path = f"ta_download/_update/{self.youtube_id}/?refresh=true"
+        _, _ = ElasticWrap(path).post(data=data)

    def get_item(self):
        """return pending item dict"""
@ -235,7 +229,7 @@ class PendingList(PendingIndex):
            # match vid_type later
            self._add_video(video_id, VideoTypeEnum.UNKNOWN)

-    def add_to_pending(self, status="pending"):
+    def add_to_pending(self, status="pending", auto_start=False):
        """add missing videos to pending list"""
        self.get_channels()
        bulk_list = []
@ -251,7 +245,13 @@ class PendingList(PendingIndex):
            if not video_details:
                continue

-            video_details["status"] = status
+            video_details.update(
+                {
+                    "status": status,
+                    "auto_start": auto_start,
+                }
+            )
+
            action = {"create": {"_id": youtube_id, "_index": "ta_download"}}
            bulk_list.append(json.dumps(action))
            bulk_list.append(json.dumps(video_details))
@ -273,7 +273,7 @@ class PendingList(PendingIndex):
        # add last newline
        bulk_list.append("\n")
        query_str = "\n".join(bulk_list)
-        _, _ = ElasticWrap("_bulk").post(query_str, ndjson=True)
+        _, _ = ElasticWrap("_bulk?refresh=true").post(query_str, ndjson=True)

    def _notify_add(self, idx, total):
        """send notification for adding videos to download queue"""
--- a/tubearchivist/home/src/download/subscriptions.py
+++ b/tubearchivist/home/src/download/subscriptions.py
@ -175,10 +175,7 @@ class PlaylistSubscription:

    def process_url_str(self, new_playlists, subscribed=True):
        """process playlist subscribe form url_str"""
-        data = {
-            "query": {"match_all": {}},
-            "sort": [{"published": {"order": "desc"}}],
-        }
+        data = {"query": {"match_all": {}}, "_source": ["youtube_id"]}
        all_indexed = IndexPaginate("ta_video", data).get_results()
        all_youtube_ids = [i["youtube_id"] for i in all_indexed]

@ -284,6 +281,7 @@ class SubscriptionScanner:
    def __init__(self, task=False):
        self.task = task
        self.missing_videos = False
+        self.auto_start = AppConfig().config["subscriptions"].get("auto_start")

    def scan(self):
        """scan channels and playlists"""
--- a/tubearchivist/home/src/download/yt_dlp_handler.py
+++ b/tubearchivist/home/src/download/yt_dlp_handler.py
@ -6,14 +6,13 @@ functionality:
 - move to archive
 """

-import json
 import os
 import shutil
 from datetime import datetime

 from home.src.download.queue import PendingList
 from home.src.download.subscriptions import PlaylistSubscription
-from home.src.download.yt_dlp_base import CookieHandler, YtWrap
+from home.src.download.yt_dlp_base import YtWrap
 from home.src.es.connect import ElasticWrap, IndexPaginate
 from home.src.index.channel import YoutubeChannel
 from home.src.index.comments import CommentList
@ -22,7 +21,6 @@ from home.src.index.video import YoutubeVideo, index_new_video
 from home.src.index.video_constants import VideoTypeEnum
 from home.src.ta.config import AppConfig
 from home.src.ta.helper import clean_string, ignore_filelist
-from home.src.ta.ta_redis import RedisQueue


 class DownloadPostProcess:
@ -159,114 +157,77 @@ class VideoDownloader:
        self.channels = set()
        self.videos = set()

-    def run_queue(self):
+    def run_queue(self, auto_only=False):
        """setup download queue in redis loop until no more items"""
-        self._setup_queue()
-        queue = RedisQueue(queue_name="dl_queue")
-
-        limit_queue = self.config["downloads"]["limit_count"]
-        if limit_queue:
-            queue.trim(limit_queue - 1)
-
+        self._get_overwrites()
        while True:
-            youtube_data = queue.get_next()
-            if self.task.is_stopped() or not youtube_data:
-                queue.clear()
+            video_data = self._get_next(auto_only)
+            if self.task.is_stopped() or not video_data:
                break

-            youtube_data = json.loads(youtube_data)
-            youtube_id = youtube_data.get("youtube_id")
-
-            tmp_vid_type = youtube_data.get(
-                "vid_type", VideoTypeEnum.VIDEOS.value
-            )
-            video_type = VideoTypeEnum(tmp_vid_type)
-            print(f"{youtube_id}: Downloading type: {video_type}")
+            youtube_id = video_data.get("youtube_id")
+            print(f"{youtube_id}: Downloading video")
+            self._notify(video_data, "Validate download format")

            success = self._dl_single_vid(youtube_id)
            if not success:
                continue

-            if self.task:
-                self.task.send_progress(
-                    [
-                        f"Processing video {youtube_id}",
-                        "Add video metadata to index.",
-                    ]
-                )
+            self._notify(video_data, "Add video metadata to index")

            vid_dict = index_new_video(
                youtube_id,
                video_overwrites=self.video_overwrites,
-                video_type=video_type,
+                video_type=VideoTypeEnum(video_data["vid_type"]),
            )
            self.channels.add(vid_dict["channel"]["channel_id"])
            self.videos.add(vid_dict["youtube_id"])

-            if self.task:
-                self.task.send_progress(
-                    [
-                        f"Processing video {youtube_id}",
-                        "Move downloaded file to archive.",
-                    ]
-                )
-
+            self._notify(video_data, "Move downloaded file to archive")
            self.move_to_archive(vid_dict)
-
-            if queue.has_item():
-                message = "Continue with next video."
-            else:
-                message = "Download queue is finished."
-
-            if self.task:
-                self.task.send_progress([message])
-
            self._delete_from_pending(youtube_id)

        # post processing
        self._add_subscribed_channels()
        DownloadPostProcess(self).run()

-    def _setup_queue(self):
-        """setup required and validate"""
-        if self.config["downloads"]["cookie_import"]:
-            valid = CookieHandler(self.config).validate()
-            if not valid:
-                return
+    def _notify(self, video_data, message):
+        """send progress notification to task"""
+        if not self.task:
+            return

+        typ = VideoTypeEnum(video_data["vid_type"]).value.rstrip("s").title()
+        title = video_data.get("title")
+        self.task.send_progress([f"Processing {typ}: {title}", message])
+
+    def _get_next(self, auto_only):
+        """get next item in queue"""
+        must_list = [{"term": {"status": {"value": "pending"}}}]
+        if auto_only:
+            must_list.append({"term": {"auto_start": {"value": True}}})
+
+        data = {
+            "size": 1,
+            "query": {"bool": {"must": must_list}},
+            "sort": [
+                {"auto_start": {"order": "desc"}},
+                {"timestamp": {"order": "asc"}},
+            ],
+        }
+        path = "ta_download/_search"
+        response, _ = ElasticWrap(path).get(data=data)
+        if not response["hits"]["hits"]:
+            return False
+
+        return response["hits"]["hits"][0]["_source"]
+
+    def _get_overwrites(self):
+        """get channel overwrites"""
        pending = PendingList()
        pending.get_download()
        pending.get_channels()
        self.video_overwrites = pending.video_overwrites

-    def add_pending(self):
-        """add pending videos to download queue"""
-        if self.task:
-            self.task.send_progress(["Scanning your download queue."])
-
-        pending = PendingList()
-        pending.get_download()
-        to_add = [
-            json.dumps(
-                {
-                    "youtube_id": i["youtube_id"],
-                    # Using .value in default val to match what would be
-                    # decoded when parsing json if not set
-                    "vid_type": i.get("vid_type", VideoTypeEnum.VIDEOS.value),
-                }
-            )
-            for i in pending.all_pending
-        ]
-        if not to_add:
-            # there is nothing pending
-            print("download queue is empty")
-            if self.task:
-                self.task.send_progress(["Download queue is empty."])
-
-            return
-
-        RedisQueue(queue_name="dl_queue").add_list(to_add)
-
    def _progress_hook(self, response):
        """process the progress_hooks from yt_dlp"""
        progress = False
@ -312,6 +273,10 @@ class VideoDownloader:
        """build user customized options"""
        if self.config["downloads"]["format"]:
            self.obs["format"] = self.config["downloads"]["format"]
+        if self.config["downloads"]["format_sort"]:
+            format_sort = self.config["downloads"]["format_sort"]
+            format_sort_list = [i.strip() for i in format_sort.split(",")]
+            self.obs["format_sort"] = format_sort_list
        if self.config["downloads"]["limit_speed"]:
            self.obs["ratelimit"] = (
                self.config["downloads"]["limit_speed"] * 1024
@ -422,7 +387,7 @@ class VideoDownloader:
    @staticmethod
    def _delete_from_pending(youtube_id):
        """delete downloaded video from pending index if its there"""
-        path = f"ta_download/_doc/{youtube_id}"
+        path = f"ta_download/_doc/{youtube_id}?refresh=true"
        _, _ = ElasticWrap(path).delete()

    def _add_subscribed_channels(self):
--- a/tubearchivist/home/src/es/index_mapping.json
+++ b/tubearchivist/home/src/es/index_mapping.json
@ -146,6 +146,9 @@
                    "type": "keyword",
                    "index": false
                },
+                "media_size": {
+                    "type": "long"
+                },
                "tags": {
                    "type": "text",
                    "analyzer": "english",
@ -239,6 +242,30 @@
                        }
                    }
                },
+                "streams": {
+                    "properties": {
+                        "type": {
+                            "type": "keyword",
+                            "index": false
+                        },
+                        "index": {
+                            "type": "short",
+                            "index": false
+                        },
+                        "codec": {
+                            "type": "text"
+                        },
+                        "width": {
+                            "type": "short"
+                        },
+                        "height": {
+                            "type": "short"
+                        },
+                        "bitrate": {
+                            "type": "integer"
+                        }
+                    }
+                },
                "sponsorblock": {
                    "properties": {
                        "last_refresh": {
@ -330,6 +357,9 @@
                },
                "vid_type": {
                    "type": "keyword"
+                },
+                "auto_start": {
+                    "type": "boolean"
                }
            },
            "expected_set": {
--- a/tubearchivist/home/src/frontend/forms.py
+++ b/tubearchivist/home/src/frontend/forms.py
@ -44,6 +44,12 @@ class UserSettingsForm(forms.Form):
 class ApplicationSettingsForm(forms.Form):
    """handle all application settings"""

+    AUTOSTART_CHOICES = [
+        ("", "-- change subscription autostart --"),
+        ("0", "disable auto start"),
+        ("1", "enable auto start"),
+    ]
+
    METADATA_CHOICES = [
        ("", "-- change metadata embed --"),
        ("0", "don't embed metadata"),
@ -107,12 +113,15 @@ class ApplicationSettingsForm(forms.Form):
    subscriptions_shorts_channel_size = forms.IntegerField(
        required=False, min_value=0
    )
-    downloads_limit_count = forms.IntegerField(required=False)
+    subscriptions_auto_start = forms.ChoiceField(
+        widget=forms.Select, choices=AUTOSTART_CHOICES, required=False
+    )
    downloads_limit_speed = forms.IntegerField(required=False)
    downloads_throttledratelimit = forms.IntegerField(required=False)
    downloads_sleep_interval = forms.IntegerField(required=False)
    downloads_autodelete_days = forms.IntegerField(required=False)
    downloads_format = forms.CharField(required=False)
+    downloads_format_sort = forms.CharField(required=False)
    downloads_add_metadata = forms.ChoiceField(
        widget=forms.Select, choices=METADATA_CHOICES, required=False
    )
--- a/tubearchivist/home/src/frontend/searching.py
+++ b/tubearchivist/home/src/frontend/searching.py
@ -11,6 +11,7 @@ from datetime import datetime

 from home.src.download.thumbnails import ThumbManager
 from home.src.es.connect import ElasticWrap
+from home.src.index.video_streams import DurationConverter
 from home.src.ta.config import AppConfig


@ -19,6 +20,7 @@ class SearchHandler:

    def __init__(self, path, config, data=False):
        self.max_hits = None
+        self.aggs = None
        self.path = path
        self.config = config
        self.data = data
@ -34,62 +36,22 @@ class SearchHandler:
            # simulate list for single result to reuse rest of class
            return_value = [response]

-        # stop if empty
        if not return_value:
            return False

-        all_videos = []
-        all_channels = []
        for idx, hit in enumerate(return_value):
            return_value[idx] = self.hit_cleanup(hit)
-            if hit["_index"] == "ta_video":
-                video_dict, channel_dict = self.vid_cache_link(hit)
-                if video_dict not in all_videos:
-                    all_videos.append(video_dict)
-                if channel_dict not in all_channels:
-                    all_channels.append(channel_dict)
-            elif hit["_index"] == "ta_channel":
-                channel_dict = self.channel_cache_link(hit)
-                if channel_dict not in all_channels:
-                    all_channels.append(channel_dict)
+
+        if response.get("aggregations"):
+            self.aggs = response["aggregations"]
+            if "total_duration" in self.aggs:
+                duration_sec = self.aggs["total_duration"]["value"]
+                self.aggs["total_duration"].update(
+                    {"value_str": DurationConverter().get_str(duration_sec)}
+                )

        return return_value

-    @staticmethod
-    def vid_cache_link(hit):
-        """download thumbnails into cache"""
-        vid_thumb = hit["source"]["vid_thumb_url"]
-        youtube_id = hit["source"]["youtube_id"]
-        channel_id_hit = hit["source"]["channel"]["channel_id"]
-        chan_thumb = hit["source"]["channel"]["channel_thumb_url"]
-        try:
-            chan_banner = hit["source"]["channel"]["channel_banner_url"]
-        except KeyError:
-            chan_banner = False
-        video_dict = {"youtube_id": youtube_id, "vid_thumb": vid_thumb}
-        channel_dict = {
-            "channel_id": channel_id_hit,
-            "chan_thumb": chan_thumb,
-            "chan_banner": chan_banner,
-        }
-        return video_dict, channel_dict
-
-    @staticmethod
-    def channel_cache_link(hit):
-        """build channel thumb links"""
-        channel_id_hit = hit["source"]["channel_id"]
-        chan_thumb = hit["source"]["channel_thumb_url"]
-        try:
-            chan_banner = hit["source"]["channel_banner_url"]
-        except KeyError:
-            chan_banner = False
-        channel_dict = {
-            "channel_id": channel_id_hit,
-            "chan_thumb": chan_thumb,
-            "chan_banner": chan_banner,
-        }
-        return channel_dict
-
    @staticmethod
    def hit_cleanup(hit):
        """clean up and parse data from a single hit"""
--- a/tubearchivist/home/src/index/video.py
+++ b/tubearchivist/home/src/index/video.py
@ -16,7 +16,11 @@ from home.src.index import playlist as ta_playlist
 from home.src.index.generic import YouTubeItem
 from home.src.index.subtitle import YoutubeSubtitle
 from home.src.index.video_constants import VideoTypeEnum
-from home.src.ta.helper import DurationConverter, clean_string, randomizor
+from home.src.index.video_streams import (
+    DurationConverter,
+    MediaStreamExtractor,
+)
+from home.src.ta.helper import clean_string, randomizor
 from home.src.ta.ta_redis import RedisArchivist
 from ryd_client import ryd_client

@ -152,6 +156,7 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
        self._add_stats()
        self.add_file_path()
        self.add_player(media_path)
+        self.add_streams(media_path)
        if self.config["downloads"]["integrate_ryd"]:
            self._get_ryd_stats()

@ -253,6 +258,17 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
            }
        )

+    def add_streams(self, media_path=False):
+        """add stream metadata"""
+        vid_path = self._get_vid_path(media_path)
+        media = MediaStreamExtractor(vid_path)
+        self.json_data.update(
+            {
+                "streams": media.extract_metadata(),
+                "media_size": media.get_file_size(),
+            }
+        )
+
    def _get_vid_path(self, media_path=False):
        """get path of media file"""
        if media_path:
--- a/tubearchivist/home/src/index/video_streams.py
+++ b/tubearchivist/home/src/index/video_streams.py
@ -0,0 +1,132 @@
+"""extract metadata from video streams"""
+
+import json
+import subprocess
+from os import stat
+
+
+class DurationConverter:
+    """
+    using ffmpeg to get and parse duration from filepath
+    """
+
+    @staticmethod
+    def get_sec(file_path):
+        """read duration from file"""
+        duration = subprocess.run(
+            [
+                "ffprobe",
+                "-v",
+                "error",
+                "-show_entries",
+                "format=duration",
+                "-of",
+                "default=noprint_wrappers=1:nokey=1",
+                file_path,
+            ],
+            capture_output=True,
+            check=True,
+        )
+        duration_raw = duration.stdout.decode().strip()
+        if duration_raw == "N/A":
+            return 0
+
+        duration_sec = int(float(duration_raw))
+        return duration_sec
+
+    @staticmethod
+    def get_str(duration_sec):
+        """takes duration in sec and returns clean string"""
+        if not duration_sec:
+            # failed to extract
+            return "NA"
+
+        hours = int(duration_sec // 3600)
+        minutes = int((duration_sec - (hours * 3600)) // 60)
+        secs = int(duration_sec - (hours * 3600) - (minutes * 60))
+
+        duration_str = str()
+        if hours:
+            duration_str = str(hours).zfill(2) + ":"
+        if minutes:
+            duration_str = duration_str + str(minutes).zfill(2) + ":"
+        else:
+            duration_str = duration_str + "00:"
+        duration_str = duration_str + str(secs).zfill(2)
+        return duration_str
+
+
+class MediaStreamExtractor:
+    """extract stream metadata"""
+
+    def __init__(self, media_path):
+        self.media_path = media_path
+        self.metadata = []
+
+    def extract_metadata(self):
+        """entry point to extract metadata"""
+
+        cmd = [
+            "ffprobe",
+            "-v",
+            "quiet",
+            "-print_format",
+            "json",
+            "-show_streams",
+            "-show_format",
+            self.media_path,
+        ]
+        result = subprocess.run(
+            cmd, capture_output=True, text=True, check=False
+        )
+
+        if result.returncode != 0:
+            return self.metadata
+
+        streams = json.loads(result.stdout).get("streams")
+        for stream in streams:
+            self.process_stream(stream)
+
+        return self.metadata
+
+    def process_stream(self, stream):
+        """parse stream to metadata"""
+        codec_type = stream.get("codec_type")
+        if codec_type == "video":
+            self._extract_video_metadata(stream)
+        elif codec_type == "audio":
+            self._extract_audio_metadata(stream)
+        else:
+            return
+
+    def _extract_video_metadata(self, stream):
+        """parse video metadata"""
+        if "bit_rate" not in stream:
+            # is probably thumbnail
+            return
+
+        self.metadata.append(
+            {
+                "type": "video",
+                "index": stream["index"],
+                "codec": stream["codec_name"],
+                "width": stream["width"],
+                "height": stream["height"],
+                "bitrate": int(stream["bit_rate"]),
+            }
+        )
+
+    def _extract_audio_metadata(self, stream):
+        """extract audio metadata"""
+        self.metadata.append(
+            {
+                "type": "audio",
+                "index": stream["index"],
+                "codec": stream["codec_name"],
+                "bitrate": int(stream["bit_rate"]),
+            }
+        )
+
+    def get_file_size(self):
+        """get filesize in bytes"""
+        return stat(self.media_path).st_size
--- a/tubearchivist/home/src/ta/helper.py
+++ b/tubearchivist/home/src/ta/helper.py
@ -8,7 +8,6 @@ import os
 import random
 import re
 import string
-import subprocess
 import unicodedata
 from datetime import datetime
 from urllib.parse import urlparse
@ -16,20 +15,20 @@ from urllib.parse import urlparse
 import requests


-def clean_string(file_name):
+def clean_string(file_name: str) -> str:
    """clean string to only asci characters"""
    whitelist = "-_.() " + string.ascii_letters + string.digits
    normalized = unicodedata.normalize("NFKD", file_name)
    ascii_only = normalized.encode("ASCII", "ignore").decode().strip()
-    white_listed = "".join(c for c in ascii_only if c in whitelist)
-    cleaned = re.sub(r"[ ]{2,}", " ", white_listed)
+    white_listed: str = "".join(c for c in ascii_only if c in whitelist)
+    cleaned: str = re.sub(r"[ ]{2,}", " ", white_listed)
    return cleaned


-def ignore_filelist(filelist):
+def ignore_filelist(filelist: list[str]) -> list[str]:
    """ignore temp files for os.listdir sanitizer"""
    to_ignore = ["Icon\r\r", "Temporary Items", "Network Trash Folder"]
-    cleaned = []
+    cleaned: list[str] = []
    for file_name in filelist:
        if file_name.startswith(".") or file_name in to_ignore:
            continue
@ -39,13 +38,13 @@ def ignore_filelist(filelist):
    return cleaned


-def randomizor(length):
+def randomizor(length: int) -> str:
    """generate random alpha numeric string"""
-    pool = string.digits + string.ascii_letters
+    pool: str = string.digits + string.ascii_letters
    return "".join(random.choice(pool) for i in range(length))


-def requests_headers():
+def requests_headers() -> dict[str, str]:
    """build header with random user agent for requests outside of yt-dlp"""

    chrome_versions = (
@ -97,7 +96,7 @@ def requests_headers():
    return {"User-Agent": template}


-def date_praser(timestamp):
+def date_praser(timestamp: int | str) -> str:
    """return formatted date string"""
    if isinstance(timestamp, int):
        date_obj = datetime.fromtimestamp(timestamp)
@ -107,7 +106,7 @@ def date_praser(timestamp):
    return datetime.strftime(date_obj, "%d %b, %Y")


-def time_parser(timestamp):
+def time_parser(timestamp: str) -> float:
    """return seconds from timestamp, false on empty"""
    if not timestamp:
        return False
@ -119,7 +118,7 @@ def time_parser(timestamp):
    return int(hours) * 60 * 60 + int(minutes) * 60 + float(seconds)


-def clear_dl_cache(config):
+def clear_dl_cache(config: dict) -> int:
    """clear leftover files from dl cache"""
    print("clear download cache")
    cache_dir = os.path.join(config["application"]["cache_dir"], "download")
@ -131,15 +130,15 @@ def clear_dl_cache(config):
    return len(leftover_files)


-def get_mapping():
+def get_mapping() -> dict:
    """read index_mapping.json and get expected mapping and settings"""
    with open("home/src/es/index_mapping.json", "r", encoding="utf-8") as f:
-        index_config = json.load(f).get("index_config")
+        index_config: dict = json.load(f).get("index_config")

    return index_config


-def is_shorts(youtube_id):
+def is_shorts(youtube_id: str) -> bool:
    """check if youtube_id is a shorts video, bot not it it's not a shorts"""
    shorts_url = f"https://www.youtube.com/shorts/{youtube_id}"
    response = requests.head(
@ -149,10 +148,10 @@ def is_shorts(youtube_id):
    return response.status_code == 200


-def ta_host_parser(ta_host):
+def ta_host_parser(ta_host: str) -> tuple[list[str], list[str]]:
    """parse ta_host env var for ALLOWED_HOSTS and CSRF_TRUSTED_ORIGINS"""
-    allowed_hosts = []
-    csrf_trusted_origins = []
+    allowed_hosts: list[str] = []
+    csrf_trusted_origins: list[str] = []
    for host in ta_host.split():
        host_clean = host.strip()
        if not host_clean.startswith("http"):
@ -163,54 +162,3 @@ def ta_host_parser(ta_host):
        csrf_trusted_origins.append(f"{parsed.scheme}://{parsed.hostname}")

    return allowed_hosts, csrf_trusted_origins
-
-
-class DurationConverter:
-    """
-    using ffmpeg to get and parse duration from filepath
-    """
-
-    @staticmethod
-    def get_sec(file_path):
-        """read duration from file"""
-        duration = subprocess.run(
-            [
-                "ffprobe",
-                "-v",
-                "error",
-                "-show_entries",
-                "format=duration",
-                "-of",
-                "default=noprint_wrappers=1:nokey=1",
-                file_path,
-            ],
-            capture_output=True,
-            check=True,
-        )
-        duration_raw = duration.stdout.decode().strip()
-        if duration_raw == "N/A":
-            return 0
-
-        duration_sec = int(float(duration_raw))
-        return duration_sec
-
-    @staticmethod
-    def get_str(duration_sec):
-        """takes duration in sec and returns clean string"""
-        if not duration_sec:
-            # failed to extract
-            return "NA"
-
-        hours = duration_sec // 3600
-        minutes = (duration_sec - (hours * 3600)) // 60
-        secs = duration_sec - (hours * 3600) - (minutes * 60)
-
-        duration_str = str()
-        if hours:
-            duration_str = str(hours).zfill(2) + ":"
-        if minutes:
-            duration_str = duration_str + str(minutes).zfill(2) + ":"
-        else:
-            duration_str = duration_str + "00:"
-        duration_str = duration_str + str(secs).zfill(2)
-        return duration_str
--- a/tubearchivist/home/src/ta/urlparser.py
+++ b/tubearchivist/home/src/ta/urlparser.py
@ -92,7 +92,7 @@ class Parser:
            item_type = "video"
        elif len_id_str == 24:
            item_type = "channel"
-        elif len_id_str in (34, 18):
+        elif len_id_str in (34, 26, 18):
            item_type = "playlist"
        else:
            raise ValueError(f"not a valid id_str: {id_str}")
--- a/tubearchivist/home/tasks.py
+++ b/tubearchivist/home/tasks.py
@ -25,6 +25,7 @@ from home.src.index.reindex import Reindex, ReindexManual, ReindexPopulate
 from home.src.ta.config import AppConfig, ReleaseVersion, ScheduleBuilder
 from home.src.ta.ta_redis import RedisArchivist
 from home.src.ta.task_manager import TaskManager
+from home.src.ta.urlparser import Parser

 CONFIG = AppConfig().config
 REDIS_HOST = os.environ.get("REDIS_HOST")
@ -171,14 +172,16 @@ def update_subscribed(self):
        return

    manager.init(self)
-    missing_videos = SubscriptionScanner(task=self).scan()
+    handler = SubscriptionScanner(task=self)
+    missing_videos = handler.scan()
+    auto_start = handler.auto_start
    if missing_videos:
        print(missing_videos)
-        extrac_dl.delay(missing_videos)
+        extrac_dl.delay(missing_videos, auto_start=auto_start)


@shared_task(name="download_pending", bind=True, base=BaseTask)
-def download_pending(self, from_queue=True):
+def download_pending(self, auto_only=False):
    """download latest pending videos"""
    manager = TaskManager()
    if manager.is_pending(self):
@ -187,19 +190,24 @@ def download_pending(self, from_queue=True):
        return

    manager.init(self)
-    downloader = VideoDownloader(task=self)
-    if from_queue:
-        downloader.add_pending()
-    downloader.run_queue()
+    VideoDownloader(task=self).run_queue(auto_only=auto_only)


@shared_task(name="extract_download", bind=True, base=BaseTask)
-def extrac_dl(self, youtube_ids):
+def extrac_dl(self, youtube_ids, auto_start=False):
    """parse list passed and add to pending"""
    TaskManager().init(self)
-    pending_handler = PendingList(youtube_ids=youtube_ids, task=self)
+    if isinstance(youtube_ids, str):
+        to_add = Parser(youtube_ids).parse()
+    else:
+        to_add = youtube_ids
+
+    pending_handler = PendingList(youtube_ids=to_add, task=self)
    pending_handler.parse_url_list()
-    pending_handler.add_to_pending()
+    pending_handler.add_to_pending(auto_start=auto_start)
+
+    if auto_start:
+        download_pending.delay(auto_only=True)


@shared_task(bind=True, name="check_reindex", base=BaseTask)
--- a/tubearchivist/home/templates/home/channel_id.html
+++ b/tubearchivist/home/templates/home/channel_id.html
@ -45,12 +45,10 @@
            </div>
        </div>
        <div class="info-box-item">
-            <div>
-                {% if max_hits %}
-                    <p>Total Videos: {{ max_hits }}</p>
-                    <button title="Mark all videos from {{ channel_info.channel_name }} as watched" type="button" id="watched-button" data-id="{{ channel_info.channel_id }}" onclick="isWatchedButton(this)">Mark as watched</button>
-                {% endif %}
-            </div>
+            {% if aggs %}
+                <p>{{ aggs.total_items.value }} videos <span class="space-carrot">|</span> {{ aggs.total_duration.value_str }} playback <span class="space-carrot">|</span> Total size {{ aggs.total_size.value|filesizeformat }}</p>
+                <button title="Mark all videos from {{ channel_info.channel_name }} as watched" type="button" id="watched-button" data-id="{{ channel_info.channel_id }}" onclick="isWatchedButton(this)">Mark as watched</button>
+            {% endif %}
        </div>
    </div>
 </div>
--- a/tubearchivist/home/templates/home/downloads.html
+++ b/tubearchivist/home/templates/home/downloads.html
@ -20,11 +20,12 @@
            <img id="animate-icon" onclick="showForm()" src="{% static 'img/icon-add.svg' %}" alt="add-icon">
            <p>Add to download queue</p>
            <div class="show-form">
-                <form id='hidden-form' action="/downloads/" method="post">
+                <div id='hidden-form' novalidate>
                    {% csrf_token %}
                    {{ add_form }}
-                    <button type="submit">Add to download queue</button>
-                </form>
+                    <button onclick="addToQueue()">Add to queue</button>
+                    <button onclick="addToQueue(true)">Download now</button>
+                </div>
            </div>
        </div>
    </div>
--- a/tubearchivist/home/templates/home/settings.html
+++ b/tubearchivist/home/templates/home/settings.html
@ -49,14 +49,14 @@
                <i>Shorts Videos to scan to find new items for the <b>Rescan subscriptions</b> task, max recommended 50.</i><br>
                {{ app_form.subscriptions_shorts_channel_size }}
            </div>
+            <div class="settings-item">
+                <p>Auto start download from your subscriptions: <span class="settings-current">{{ config.subscriptions.auto_start}}</span></p>
+                <i>Enable this will automatically start and prioritize videos from your subscriptions.</i><br>
+                {{ app_form.subscriptions_auto_start }}
+            </div>
        </div>
        <div class="settings-group">
            <h2 id="downloads">Downloads</h2>
-            <div class="settings-item">
-                <p>Current download limit: <span class="settings-current">{{ config.downloads.limit_count }}</span></p>
-                <i>Limit the number of videos getting downloaded on every run. 0 (zero) to deactivate.</i><br>
-                {{ app_form.downloads_limit_count }}
-            </div>
            <div class="settings-item">
                <p>Current download speed limit in KB/s: <span class="settings-current">{{ config.downloads.limit_speed }}</span></p>
                <i>Limit download speed. 0 (zero) to deactivate, e.g. 1000 (1MB/s). Speeds are in KB/s. Setting takes effect on new download jobs or application restart.</i><br>
@ -95,6 +95,19 @@
                {{ app_form.downloads_format }}
                <br>
            </div>
+            <div class="settings-item">
+                <p>Force sort order to have precedence over all yt-dlp fields.<br>
+                    Currently: <span class="settings-current">{{ config.downloads.format_sort }}</span>
+                </p>
+                <p>Example configurations:</p>
+                <ul>
+                    <li><span class="settings-current">res,codec:av1</span>: prefer AV1 over all other video codecs.</li>
+                    <li><span class="settings-current">0</span>: deactivate and keep the default as decided by yt-dlp.</li>
+                </ul>
+                <i>Not all codecs are supported by all browsers. The default value ensures best compatibility. Check out the <a href="https://github.com/yt-dlp/yt-dlp#sorting-formats" target="_blank">documentation</a> for valid configurations.</i><br>
+                {{ app_form.downloads_format_sort }}
+                <br>
+            </div>
            <div class="settings-item">
                <p>Current metadata embed setting: <span class="settings-current">{{ config.downloads.add_metadata }}</span></p>
                <i>Metadata is not embedded into the downloaded files by default.</i><br>
--- a/tubearchivist/home/templates/home/video.html
+++ b/tubearchivist/home/templates/home/video.html
@ -56,20 +56,6 @@
                {% else %}
                    <p>Youtube: Deactivated</p>
                {% endif %}
-                {% if reindex %}
-                    <p>Reindex scheduled</p>
-                {% else %}
-                    <div id="reindex-button" class="button-box">
-                        <button data-id="{{ video.youtube_id }}" data-type="video" onclick="reindex(this)" title="Reindex {{ video.title }}">Reindex</button>
-                    </div>
-                {% endif %}
-                <div class="button-box">
-                    <a download="" href="/media/{{ video.media_url }}"><button id="download-item">Download File</button></a>
-                    <button onclick="deleteConfirm()" id="delete-item">Delete Video</button>
-                    <div class="delete-confirm" id="delete-button">
-                        <span>Are you sure? </span><button class="danger-button" onclick="deleteVideo(this)" data-id="{{ video.youtube_id }}" data-redirect = "{{ video.channel.channel_id }}">Delete</button> <button onclick="cancelDelete()">Cancel</button>
-                    </div>
-                </div>
            </div>
        </div>
        <div class="info-box-item">
@ -89,6 +75,34 @@
            </div>
        </div>
    </div>
+    <div class="info-box info-box-2">
+        <div class="info-box-item">
+            <div class="button-box">
+                {% if reindex %}
+                    <p>Reindex scheduled</p>
+                {% else %}
+                    <div id="reindex-button" class="button-box">
+                        <button data-id="{{ video.youtube_id }}" data-type="video" onclick="reindex(this)" title="Reindex {{ video.title }}">Reindex</button>
+                    </div>
+                {% endif %}
+                <a download="" href="/media/{{ video.media_url }}"><button id="download-item">Download File</button></a>
+                <button onclick="deleteConfirm()" id="delete-item">Delete Video</button>
+                <div class="delete-confirm" id="delete-button">
+                    <span>Are you sure? </span><button class="danger-button" onclick="deleteVideo(this)" data-id="{{ video.youtube_id }}" data-redirect = "{{ video.channel.channel_id }}">Delete</button> <button onclick="cancelDelete()">Cancel</button>
+                </div>
+            </div>
+        </div>
+        <div class="info-box-item">
+            {% if video.media_size %}
+                <p>File size: {{ video.media_size|filesizeformat }}</p>
+            {% endif %}
+            {% if video.streams %}
+                {% for stream in video.streams %}
+                    <p>{{ stream.type|title }}: {{ stream.codec }} {{ stream.bitrate|filesizeformat }}/s{% if stream.width %} <span class="space-carrot">|</span> {{ stream.width }}x{{ stream.height}}{% endif %}</p>
+                {% endfor %}
+            {% endif %}
+        </div>
+    </div>
    {% if video.tags %}
        <div class="description-box">
            <div class="video-tag-box">
--- a/tubearchivist/home/views.py
+++ b/tubearchivist/home/views.py
@ -41,8 +41,7 @@ from home.src.index.video_constants import VideoTypeEnum
 from home.src.ta.config import AppConfig, ReleaseVersion, ScheduleBuilder
 from home.src.ta.helper import time_parser
 from home.src.ta.ta_redis import RedisArchivist
-from home.src.ta.urlparser import Parser
-from home.tasks import extrac_dl, index_channel_playlists, subscribe_to
+from home.tasks import index_channel_playlists, subscribe_to
 from rest_framework.authtoken.models import Token


@ -148,8 +147,8 @@ class ArchivistViewConfig(View):
 class ArchivistResultsView(ArchivistViewConfig):
    """View class to inherit from when searching data in es"""

-    view_origin = False
-    es_search = False
+    view_origin = ""
+    es_search = ""

    def __init__(self):
        super().__init__(self.view_origin)
@ -259,6 +258,7 @@ class ArchivistResultsView(ArchivistViewConfig):
        self.pagination_handler.validate(search.max_hits)
        self.context["max_hits"] = search.max_hits
        self.context["pagination"] = self.pagination_handler.pagination
+        self.context["aggs"] = search.aggs


 class MinView(View):
@ -367,7 +367,7 @@ class AboutView(MinView):

 class DownloadView(ArchivistResultsView):
    """resolves to /download/
-    takes POST for downloading youtube links
+    handle the download queue
    """

    view_origin = "downloads"
@ -451,34 +451,6 @@ class DownloadView(ArchivistResultsView):

        return buckets_sorted

-    @staticmethod
-    def post(request):
-        """handle post requests"""
-        to_queue = AddToQueueForm(data=request.POST)
-        if to_queue.is_valid():
-            url_str = request.POST.get("vid_url")
-            print(url_str)
-            try:
-                youtube_ids = Parser(url_str).parse()
-            except ValueError:
-                # failed to process
-                key = "message:add"
-                print(f"failed to parse: {url_str}")
-                mess_dict = {
-                    "status": key,
-                    "level": "error",
-                    "title": "Failed to extract links.",
-                    "message": "Not a video, channel or playlist ID or URL",
-                }
-                RedisArchivist().set_message(key, mess_dict, expire=True)
-                return redirect("downloads")
-
-            print(youtube_ids)
-            extrac_dl.delay(youtube_ids)
-
-        sleep(2)
-        return redirect("downloads", permanent=True)
-

 class ChannelIdBaseView(ArchivistResultsView):
    """base class for all channel-id views"""
@ -613,6 +585,11 @@ class ChannelIdView(ChannelIdBaseView):
                ]
            }
        }
+        self.data["aggs"] = {
+            "total_items": {"value_count": {"field": "youtube_id"}},
+            "total_size": {"sum": {"field": "media_size"}},
+            "total_duration": {"sum": {"field": "player.duration"}},
+        }
        self.data["sort"].append({"title.keyword": {"order": "asc"}})

        if self.context["hide_watched"]:
@ -982,7 +959,7 @@ class SearchView(ArchivistResultsView):
    """

    view_origin = "home"
-    es_search = False
+    es_search = ""

    def get(self, request):
        """handle get request"""
--- a/tubearchivist/requirements.txt
+++ b/tubearchivist/requirements.txt
@ -1,12 +1,12 @@
 beautifulsoup4==4.12.2
 celery==5.2.7
 Django==4.2
-django-auth-ldap==4.2.0
+django-auth-ldap==4.3.0
 django-cors-headers==3.14.0
 djangorestframework==3.14.0
 Pillow==9.5.0
 redis==4.5.4
-requests==2.28.2
+requests==2.29.0
 ryd-client==0.0.6
 uWSGI==2.0.21
 whitenoise==6.4.0
--- a/tubearchivist/static/css/style.css
+++ b/tubearchivist/static/css/style.css
@ -369,6 +369,10 @@ button:hover {
    display: none;
 }

+#hidden-form button {
+    margin-right: 1rem;
+}
+
 #text-reveal {
    height: 0;
    overflow: hidden;
@ -660,6 +664,10 @@ video:-webkit-full-screen {
    background-color: var(--highlight-bg);
 }

+.info-box-item p {
+    width: 100%;
+}
+
 .description-text {
    width: 100%;
 }
--- a/tubearchivist/static/script.js
+++ b/tubearchivist/static/script.js
@ -160,6 +160,24 @@ function dlPending() {
  }, 500);
 }

+function addToQueue(autostart=false) {
+  let textArea = document.getElementById('id_vid_url');
+  if (textArea.value === '') {
+    return
+  }
+  let toPost = {data: [{youtube_id: textArea.value, status: 'pending'}]};
+  let apiEndpoint = '/api/download/';
+  if (autostart) {
+    apiEndpoint = `${apiEndpoint}?autostart=true`;
+  }
+  apiRequest(apiEndpoint, 'POST', toPost);
+  textArea.value = '';
+  setTimeout(function () {
+    checkMessages();
+  }, 500);
+  showForm();
+}
+
 function toIgnore(button) {
  let youtube_id = button.getAttribute('data-id');
  let apiEndpoint = '/api/download/' + youtube_id + '/';