Index media metadata, #build

Changed:
- Added stream and coded details
- Added channel aggregations
- Added autostart download
- Added members only playlist support
- Added yt-dlp format sort option
This commit is contained in:
simon 2023-04-29 18:44:14 +07:00
commit cf37800c2b
22 changed files with 473 additions and 353 deletions

View File

@ -12,7 +12,7 @@ from home.src.index.generic import Pagination
from home.src.index.reindex import ReindexProgress
from home.src.index.video import SponsorBlock, YoutubeVideo
from home.src.ta.config import AppConfig
from home.src.ta.ta_redis import RedisArchivist, RedisQueue
from home.src.ta.ta_redis import RedisArchivist
from home.src.ta.task_manager import TaskCommand, TaskManager
from home.src.ta.urlparser import Parser
from home.tasks import (
@ -38,8 +38,8 @@ class ApiBaseView(APIView):
authentication_classes = [SessionAuthentication, TokenAuthentication]
permission_classes = [IsAuthenticated]
search_base = False
data = False
search_base = ""
data = ""
def __init__(self):
super().__init__()
@ -436,12 +436,9 @@ class DownloadApiView(ApiBaseView):
return Response({"message": message}, status=404)
print(f"{video_id}: change status to {item_status}")
PendingInteract(video_id, item_status).update_status()
if item_status == "priority":
PendingInteract(youtube_id=video_id).prioritize()
download_pending.delay(from_queue=False)
else:
PendingInteract(video_id, item_status).update_status()
RedisQueue(queue_name="dl_queue").clear_item(video_id)
download_pending.delay(auto_only=True)
return Response(request.data)
@ -494,6 +491,7 @@ class DownloadApiListView(ApiBaseView):
def post(request):
"""add list of videos to download queue"""
data = request.data
auto_start = bool(request.GET.get("autostart"))
try:
to_add = data["data"]
except KeyError:
@ -510,7 +508,7 @@ class DownloadApiListView(ApiBaseView):
print(message)
return Response({"message": message}, status=400)
extrac_dl.delay(youtube_ids)
extrac_dl.delay(youtube_ids, auto_start=auto_start)
return Response(data)

View File

@ -8,9 +8,10 @@ import os
from time import sleep
from django.core.management.base import BaseCommand, CommandError
from home.src.es.connect import ElasticWrap
from home.src.es.connect import ElasticWrap, IndexPaginate
from home.src.es.index_setup import ElasitIndexWrap
from home.src.es.snapshot import ElasticSnapshot
from home.src.index.video_streams import MediaStreamExtractor
from home.src.ta.config import AppConfig, ReleaseVersion
from home.src.ta.helper import clear_dl_cache
from home.src.ta.ta_redis import RedisArchivist
@ -41,7 +42,8 @@ class Command(BaseCommand):
self._version_check()
self._mig_index_setup()
self._mig_snapshot_check()
self._mig_set_vid_type()
self._mig_set_streams()
self._mig_set_autostart()
def _sync_redis_state(self):
"""make sure redis gets new config.json values"""
@ -145,51 +147,74 @@ class Command(BaseCommand):
self.stdout.write("[MIGRATION] setup snapshots")
ElasticSnapshot().setup()
def _mig_set_vid_type(self):
"""migration: update 0.3.0 to 0.3.1 set vid_type default"""
self.stdout.write("[MIGRATION] set default vid_type")
index_list = ["ta_video", "ta_download"]
def _mig_set_streams(self):
"""migration: update from 0.3.5 to 0.3.6, set streams and media_size"""
self.stdout.write("[MIGRATION] index streams and media size")
videos = AppConfig().config["application"]["videos"]
data = {
"query": {
"bool": {
"should": [
{
"bool": {
"must_not": [{"exists": {"field": "vid_type"}}]
}
},
{"term": {"vid_type": {"value": "unknown"}}},
]
}
"bool": {"must_not": [{"exists": {"field": "streams"}}]}
},
"script": {"source": "ctx._source['vid_type'] = 'videos'"},
"_source": ["media_url", "youtube_id"],
}
all_missing = IndexPaginate("ta_video", data).get_results()
if not all_missing:
self.stdout.write(" no videos need updating")
return
for index_name in index_list:
path = f"{index_name}/_update_by_query"
response, status_code = ElasticWrap(path).post(data=data)
if status_code == 503:
message = f" 🗙 {index_name} retry failed migration."
self.stdout.write(self.style.ERROR(message))
sleep(10)
response, status_code = ElasticWrap(path).post(data=data)
total = len(all_missing)
for idx, missing in enumerate(all_missing):
media_url = missing["media_url"]
youtube_id = missing["youtube_id"]
media_path = os.path.join(videos, media_url)
if not os.path.exists(media_path):
self.stdout.write(f" file not found: {media_path}")
continue
if status_code == 200:
updated = response.get("updated", 0)
if not updated:
self.stdout.write(
f" no videos needed updating in {index_name}"
)
continue
self.stdout.write(
self.style.SUCCESS(
f"{updated} videos updated in {index_name}"
)
media = MediaStreamExtractor(media_path)
vid_data = {
"doc": {
"streams": media.extract_metadata(),
"media_size": media.get_file_size(),
}
}
path = f"ta_video/_update/{youtube_id}"
response, status_code = ElasticWrap(path).post(data=vid_data)
if not status_code == 200:
self.stdout.errors(
f" update failed: {path}, {response}, {status_code}"
)
else:
message = f" 🗙 {index_name} vid_type update failed"
self.stdout.write(self.style.ERROR(message))
self.stdout.write(response)
sleep(60)
raise CommandError(message)
if idx % 100 == 0:
self.stdout.write(f" progress {idx}/{total}")
def _mig_set_autostart(self):
"""migration: update from 0.3.5 to 0.3.6 set auto_start to false"""
self.stdout.write("[MIGRATION] set default download auto_start")
data = {
"query": {
"bool": {"must_not": [{"exists": {"field": "auto_start"}}]}
},
"script": {"source": "ctx._source['auto_start'] = false"},
}
path = "ta_download/_update_by_query"
response, status_code = ElasticWrap(path).post(data=data)
if status_code == 200:
updated = response.get("updated", 0)
if not updated:
self.stdout.write(
" no videos needed updating in ta_download"
)
return
self.stdout.write(
self.style.SUCCESS(
f"{updated} videos updated in ta_download"
)
)
message = " 🗙 ta_download auto_start update failed"
self.stdout.write(self.style.ERROR(message))
self.stdout.write(response)
sleep(60)
raise CommandError(message)

View File

@ -12,18 +12,18 @@
"grid_items": 3
},
"subscriptions": {
"auto_search": false,
"auto_download": false,
"channel_size": 50,
"live_channel_size": 50,
"shorts_channel_size": 50
"shorts_channel_size": 50,
"auto_start": false
},
"downloads": {
"limit_count": false,
"limit_speed": false,
"sleep_interval": 3,
"autodelete_days": false,
"format": false,
"format_sort": false,
"add_metadata": false,
"add_thumbnail": false,
"subtitle": false,

View File

@ -16,9 +16,9 @@ from home.src.download.yt_dlp_base import YtWrap
from home.src.es.connect import ElasticWrap, IndexPaginate
from home.src.index.playlist import YoutubePlaylist
from home.src.index.video_constants import VideoTypeEnum
from home.src.index.video_streams import DurationConverter
from home.src.ta.config import AppConfig
from home.src.ta.helper import DurationConverter, is_shorts
from home.src.ta.ta_redis import RedisQueue
from home.src.ta.helper import is_shorts
class PendingIndex:
@ -112,20 +112,14 @@ class PendingInteract:
_, _ = ElasticWrap(path).post(data=data)
def update_status(self):
"""update status field of pending item"""
data = {"doc": {"status": self.status}}
path = f"ta_download/_update/{self.youtube_id}"
_, _ = ElasticWrap(path).post(data=data)
"""update status of pending item"""
if self.status == "priority":
data = {"doc": {"status": "pending", "auto_start": True}}
else:
data = {"doc": {"status": self.status}}
def prioritize(self):
"""prioritize pending item in redis queue"""
pending_video, _ = self.get_item()
vid_type = pending_video.get("vid_type", VideoTypeEnum.VIDEOS.value)
to_add = {
"youtube_id": pending_video["youtube_id"],
"vid_type": vid_type,
}
RedisQueue(queue_name="dl_queue").add_priority(to_add)
path = f"ta_download/_update/{self.youtube_id}/?refresh=true"
_, _ = ElasticWrap(path).post(data=data)
def get_item(self):
"""return pending item dict"""
@ -235,7 +229,7 @@ class PendingList(PendingIndex):
# match vid_type later
self._add_video(video_id, VideoTypeEnum.UNKNOWN)
def add_to_pending(self, status="pending"):
def add_to_pending(self, status="pending", auto_start=False):
"""add missing videos to pending list"""
self.get_channels()
bulk_list = []
@ -251,7 +245,13 @@ class PendingList(PendingIndex):
if not video_details:
continue
video_details["status"] = status
video_details.update(
{
"status": status,
"auto_start": auto_start,
}
)
action = {"create": {"_id": youtube_id, "_index": "ta_download"}}
bulk_list.append(json.dumps(action))
bulk_list.append(json.dumps(video_details))
@ -273,7 +273,7 @@ class PendingList(PendingIndex):
# add last newline
bulk_list.append("\n")
query_str = "\n".join(bulk_list)
_, _ = ElasticWrap("_bulk").post(query_str, ndjson=True)
_, _ = ElasticWrap("_bulk?refresh=true").post(query_str, ndjson=True)
def _notify_add(self, idx, total):
"""send notification for adding videos to download queue"""

View File

@ -175,10 +175,7 @@ class PlaylistSubscription:
def process_url_str(self, new_playlists, subscribed=True):
"""process playlist subscribe form url_str"""
data = {
"query": {"match_all": {}},
"sort": [{"published": {"order": "desc"}}],
}
data = {"query": {"match_all": {}}, "_source": ["youtube_id"]}
all_indexed = IndexPaginate("ta_video", data).get_results()
all_youtube_ids = [i["youtube_id"] for i in all_indexed]
@ -284,6 +281,7 @@ class SubscriptionScanner:
def __init__(self, task=False):
self.task = task
self.missing_videos = False
self.auto_start = AppConfig().config["subscriptions"].get("auto_start")
def scan(self):
"""scan channels and playlists"""

View File

@ -6,14 +6,13 @@ functionality:
- move to archive
"""
import json
import os
import shutil
from datetime import datetime
from home.src.download.queue import PendingList
from home.src.download.subscriptions import PlaylistSubscription
from home.src.download.yt_dlp_base import CookieHandler, YtWrap
from home.src.download.yt_dlp_base import YtWrap
from home.src.es.connect import ElasticWrap, IndexPaginate
from home.src.index.channel import YoutubeChannel
from home.src.index.comments import CommentList
@ -22,7 +21,6 @@ from home.src.index.video import YoutubeVideo, index_new_video
from home.src.index.video_constants import VideoTypeEnum
from home.src.ta.config import AppConfig
from home.src.ta.helper import clean_string, ignore_filelist
from home.src.ta.ta_redis import RedisQueue
class DownloadPostProcess:
@ -159,114 +157,77 @@ class VideoDownloader:
self.channels = set()
self.videos = set()
def run_queue(self):
def run_queue(self, auto_only=False):
"""setup download queue in redis loop until no more items"""
self._setup_queue()
queue = RedisQueue(queue_name="dl_queue")
limit_queue = self.config["downloads"]["limit_count"]
if limit_queue:
queue.trim(limit_queue - 1)
self._get_overwrites()
while True:
youtube_data = queue.get_next()
if self.task.is_stopped() or not youtube_data:
queue.clear()
video_data = self._get_next(auto_only)
if self.task.is_stopped() or not video_data:
break
youtube_data = json.loads(youtube_data)
youtube_id = youtube_data.get("youtube_id")
tmp_vid_type = youtube_data.get(
"vid_type", VideoTypeEnum.VIDEOS.value
)
video_type = VideoTypeEnum(tmp_vid_type)
print(f"{youtube_id}: Downloading type: {video_type}")
youtube_id = video_data.get("youtube_id")
print(f"{youtube_id}: Downloading video")
self._notify(video_data, "Validate download format")
success = self._dl_single_vid(youtube_id)
if not success:
continue
if self.task:
self.task.send_progress(
[
f"Processing video {youtube_id}",
"Add video metadata to index.",
]
)
self._notify(video_data, "Add video metadata to index")
vid_dict = index_new_video(
youtube_id,
video_overwrites=self.video_overwrites,
video_type=video_type,
video_type=VideoTypeEnum(video_data["vid_type"]),
)
self.channels.add(vid_dict["channel"]["channel_id"])
self.videos.add(vid_dict["youtube_id"])
if self.task:
self.task.send_progress(
[
f"Processing video {youtube_id}",
"Move downloaded file to archive.",
]
)
self._notify(video_data, "Move downloaded file to archive")
self.move_to_archive(vid_dict)
if queue.has_item():
message = "Continue with next video."
else:
message = "Download queue is finished."
if self.task:
self.task.send_progress([message])
self._delete_from_pending(youtube_id)
# post processing
self._add_subscribed_channels()
DownloadPostProcess(self).run()
def _setup_queue(self):
"""setup required and validate"""
if self.config["downloads"]["cookie_import"]:
valid = CookieHandler(self.config).validate()
if not valid:
return
def _notify(self, video_data, message):
"""send progress notification to task"""
if not self.task:
return
typ = VideoTypeEnum(video_data["vid_type"]).value.rstrip("s").title()
title = video_data.get("title")
self.task.send_progress([f"Processing {typ}: {title}", message])
def _get_next(self, auto_only):
"""get next item in queue"""
must_list = [{"term": {"status": {"value": "pending"}}}]
if auto_only:
must_list.append({"term": {"auto_start": {"value": True}}})
data = {
"size": 1,
"query": {"bool": {"must": must_list}},
"sort": [
{"auto_start": {"order": "desc"}},
{"timestamp": {"order": "asc"}},
],
}
path = "ta_download/_search"
response, _ = ElasticWrap(path).get(data=data)
if not response["hits"]["hits"]:
return False
return response["hits"]["hits"][0]["_source"]
def _get_overwrites(self):
"""get channel overwrites"""
pending = PendingList()
pending.get_download()
pending.get_channels()
self.video_overwrites = pending.video_overwrites
def add_pending(self):
"""add pending videos to download queue"""
if self.task:
self.task.send_progress(["Scanning your download queue."])
pending = PendingList()
pending.get_download()
to_add = [
json.dumps(
{
"youtube_id": i["youtube_id"],
# Using .value in default val to match what would be
# decoded when parsing json if not set
"vid_type": i.get("vid_type", VideoTypeEnum.VIDEOS.value),
}
)
for i in pending.all_pending
]
if not to_add:
# there is nothing pending
print("download queue is empty")
if self.task:
self.task.send_progress(["Download queue is empty."])
return
RedisQueue(queue_name="dl_queue").add_list(to_add)
def _progress_hook(self, response):
"""process the progress_hooks from yt_dlp"""
progress = False
@ -312,6 +273,10 @@ class VideoDownloader:
"""build user customized options"""
if self.config["downloads"]["format"]:
self.obs["format"] = self.config["downloads"]["format"]
if self.config["downloads"]["format_sort"]:
format_sort = self.config["downloads"]["format_sort"]
format_sort_list = [i.strip() for i in format_sort.split(",")]
self.obs["format_sort"] = format_sort_list
if self.config["downloads"]["limit_speed"]:
self.obs["ratelimit"] = (
self.config["downloads"]["limit_speed"] * 1024
@ -422,7 +387,7 @@ class VideoDownloader:
@staticmethod
def _delete_from_pending(youtube_id):
"""delete downloaded video from pending index if its there"""
path = f"ta_download/_doc/{youtube_id}"
path = f"ta_download/_doc/{youtube_id}?refresh=true"
_, _ = ElasticWrap(path).delete()
def _add_subscribed_channels(self):

View File

@ -146,6 +146,9 @@
"type": "keyword",
"index": false
},
"media_size": {
"type": "long"
},
"tags": {
"type": "text",
"analyzer": "english",
@ -239,6 +242,30 @@
}
}
},
"streams": {
"properties": {
"type": {
"type": "keyword",
"index": false
},
"index": {
"type": "short",
"index": false
},
"codec": {
"type": "text"
},
"width": {
"type": "short"
},
"height": {
"type": "short"
},
"bitrate": {
"type": "integer"
}
}
},
"sponsorblock": {
"properties": {
"last_refresh": {
@ -330,6 +357,9 @@
},
"vid_type": {
"type": "keyword"
},
"auto_start": {
"type": "boolean"
}
},
"expected_set": {

View File

@ -44,6 +44,12 @@ class UserSettingsForm(forms.Form):
class ApplicationSettingsForm(forms.Form):
"""handle all application settings"""
AUTOSTART_CHOICES = [
("", "-- change subscription autostart --"),
("0", "disable auto start"),
("1", "enable auto start"),
]
METADATA_CHOICES = [
("", "-- change metadata embed --"),
("0", "don't embed metadata"),
@ -107,12 +113,15 @@ class ApplicationSettingsForm(forms.Form):
subscriptions_shorts_channel_size = forms.IntegerField(
required=False, min_value=0
)
downloads_limit_count = forms.IntegerField(required=False)
subscriptions_auto_start = forms.ChoiceField(
widget=forms.Select, choices=AUTOSTART_CHOICES, required=False
)
downloads_limit_speed = forms.IntegerField(required=False)
downloads_throttledratelimit = forms.IntegerField(required=False)
downloads_sleep_interval = forms.IntegerField(required=False)
downloads_autodelete_days = forms.IntegerField(required=False)
downloads_format = forms.CharField(required=False)
downloads_format_sort = forms.CharField(required=False)
downloads_add_metadata = forms.ChoiceField(
widget=forms.Select, choices=METADATA_CHOICES, required=False
)

View File

@ -11,6 +11,7 @@ from datetime import datetime
from home.src.download.thumbnails import ThumbManager
from home.src.es.connect import ElasticWrap
from home.src.index.video_streams import DurationConverter
from home.src.ta.config import AppConfig
@ -19,6 +20,7 @@ class SearchHandler:
def __init__(self, path, config, data=False):
self.max_hits = None
self.aggs = None
self.path = path
self.config = config
self.data = data
@ -34,62 +36,22 @@ class SearchHandler:
# simulate list for single result to reuse rest of class
return_value = [response]
# stop if empty
if not return_value:
return False
all_videos = []
all_channels = []
for idx, hit in enumerate(return_value):
return_value[idx] = self.hit_cleanup(hit)
if hit["_index"] == "ta_video":
video_dict, channel_dict = self.vid_cache_link(hit)
if video_dict not in all_videos:
all_videos.append(video_dict)
if channel_dict not in all_channels:
all_channels.append(channel_dict)
elif hit["_index"] == "ta_channel":
channel_dict = self.channel_cache_link(hit)
if channel_dict not in all_channels:
all_channels.append(channel_dict)
if response.get("aggregations"):
self.aggs = response["aggregations"]
if "total_duration" in self.aggs:
duration_sec = self.aggs["total_duration"]["value"]
self.aggs["total_duration"].update(
{"value_str": DurationConverter().get_str(duration_sec)}
)
return return_value
@staticmethod
def vid_cache_link(hit):
"""download thumbnails into cache"""
vid_thumb = hit["source"]["vid_thumb_url"]
youtube_id = hit["source"]["youtube_id"]
channel_id_hit = hit["source"]["channel"]["channel_id"]
chan_thumb = hit["source"]["channel"]["channel_thumb_url"]
try:
chan_banner = hit["source"]["channel"]["channel_banner_url"]
except KeyError:
chan_banner = False
video_dict = {"youtube_id": youtube_id, "vid_thumb": vid_thumb}
channel_dict = {
"channel_id": channel_id_hit,
"chan_thumb": chan_thumb,
"chan_banner": chan_banner,
}
return video_dict, channel_dict
@staticmethod
def channel_cache_link(hit):
"""build channel thumb links"""
channel_id_hit = hit["source"]["channel_id"]
chan_thumb = hit["source"]["channel_thumb_url"]
try:
chan_banner = hit["source"]["channel_banner_url"]
except KeyError:
chan_banner = False
channel_dict = {
"channel_id": channel_id_hit,
"chan_thumb": chan_thumb,
"chan_banner": chan_banner,
}
return channel_dict
@staticmethod
def hit_cleanup(hit):
"""clean up and parse data from a single hit"""

View File

@ -16,7 +16,11 @@ from home.src.index import playlist as ta_playlist
from home.src.index.generic import YouTubeItem
from home.src.index.subtitle import YoutubeSubtitle
from home.src.index.video_constants import VideoTypeEnum
from home.src.ta.helper import DurationConverter, clean_string, randomizor
from home.src.index.video_streams import (
DurationConverter,
MediaStreamExtractor,
)
from home.src.ta.helper import clean_string, randomizor
from home.src.ta.ta_redis import RedisArchivist
from ryd_client import ryd_client
@ -152,6 +156,7 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
self._add_stats()
self.add_file_path()
self.add_player(media_path)
self.add_streams(media_path)
if self.config["downloads"]["integrate_ryd"]:
self._get_ryd_stats()
@ -253,6 +258,17 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
}
)
def add_streams(self, media_path=False):
"""add stream metadata"""
vid_path = self._get_vid_path(media_path)
media = MediaStreamExtractor(vid_path)
self.json_data.update(
{
"streams": media.extract_metadata(),
"media_size": media.get_file_size(),
}
)
def _get_vid_path(self, media_path=False):
"""get path of media file"""
if media_path:

View File

@ -0,0 +1,132 @@
"""extract metadata from video streams"""
import json
import subprocess
from os import stat
class DurationConverter:
"""
using ffmpeg to get and parse duration from filepath
"""
@staticmethod
def get_sec(file_path):
"""read duration from file"""
duration = subprocess.run(
[
"ffprobe",
"-v",
"error",
"-show_entries",
"format=duration",
"-of",
"default=noprint_wrappers=1:nokey=1",
file_path,
],
capture_output=True,
check=True,
)
duration_raw = duration.stdout.decode().strip()
if duration_raw == "N/A":
return 0
duration_sec = int(float(duration_raw))
return duration_sec
@staticmethod
def get_str(duration_sec):
"""takes duration in sec and returns clean string"""
if not duration_sec:
# failed to extract
return "NA"
hours = int(duration_sec // 3600)
minutes = int((duration_sec - (hours * 3600)) // 60)
secs = int(duration_sec - (hours * 3600) - (minutes * 60))
duration_str = str()
if hours:
duration_str = str(hours).zfill(2) + ":"
if minutes:
duration_str = duration_str + str(minutes).zfill(2) + ":"
else:
duration_str = duration_str + "00:"
duration_str = duration_str + str(secs).zfill(2)
return duration_str
class MediaStreamExtractor:
"""extract stream metadata"""
def __init__(self, media_path):
self.media_path = media_path
self.metadata = []
def extract_metadata(self):
"""entry point to extract metadata"""
cmd = [
"ffprobe",
"-v",
"quiet",
"-print_format",
"json",
"-show_streams",
"-show_format",
self.media_path,
]
result = subprocess.run(
cmd, capture_output=True, text=True, check=False
)
if result.returncode != 0:
return self.metadata
streams = json.loads(result.stdout).get("streams")
for stream in streams:
self.process_stream(stream)
return self.metadata
def process_stream(self, stream):
"""parse stream to metadata"""
codec_type = stream.get("codec_type")
if codec_type == "video":
self._extract_video_metadata(stream)
elif codec_type == "audio":
self._extract_audio_metadata(stream)
else:
return
def _extract_video_metadata(self, stream):
"""parse video metadata"""
if "bit_rate" not in stream:
# is probably thumbnail
return
self.metadata.append(
{
"type": "video",
"index": stream["index"],
"codec": stream["codec_name"],
"width": stream["width"],
"height": stream["height"],
"bitrate": int(stream["bit_rate"]),
}
)
def _extract_audio_metadata(self, stream):
"""extract audio metadata"""
self.metadata.append(
{
"type": "audio",
"index": stream["index"],
"codec": stream["codec_name"],
"bitrate": int(stream["bit_rate"]),
}
)
def get_file_size(self):
"""get filesize in bytes"""
return stat(self.media_path).st_size

View File

@ -8,7 +8,6 @@ import os
import random
import re
import string
import subprocess
import unicodedata
from datetime import datetime
from urllib.parse import urlparse
@ -16,20 +15,20 @@ from urllib.parse import urlparse
import requests
def clean_string(file_name):
def clean_string(file_name: str) -> str:
"""clean string to only asci characters"""
whitelist = "-_.() " + string.ascii_letters + string.digits
normalized = unicodedata.normalize("NFKD", file_name)
ascii_only = normalized.encode("ASCII", "ignore").decode().strip()
white_listed = "".join(c for c in ascii_only if c in whitelist)
cleaned = re.sub(r"[ ]{2,}", " ", white_listed)
white_listed: str = "".join(c for c in ascii_only if c in whitelist)
cleaned: str = re.sub(r"[ ]{2,}", " ", white_listed)
return cleaned
def ignore_filelist(filelist):
def ignore_filelist(filelist: list[str]) -> list[str]:
"""ignore temp files for os.listdir sanitizer"""
to_ignore = ["Icon\r\r", "Temporary Items", "Network Trash Folder"]
cleaned = []
cleaned: list[str] = []
for file_name in filelist:
if file_name.startswith(".") or file_name in to_ignore:
continue
@ -39,13 +38,13 @@ def ignore_filelist(filelist):
return cleaned
def randomizor(length):
def randomizor(length: int) -> str:
"""generate random alpha numeric string"""
pool = string.digits + string.ascii_letters
pool: str = string.digits + string.ascii_letters
return "".join(random.choice(pool) for i in range(length))
def requests_headers():
def requests_headers() -> dict[str, str]:
"""build header with random user agent for requests outside of yt-dlp"""
chrome_versions = (
@ -97,7 +96,7 @@ def requests_headers():
return {"User-Agent": template}
def date_praser(timestamp):
def date_praser(timestamp: int | str) -> str:
"""return formatted date string"""
if isinstance(timestamp, int):
date_obj = datetime.fromtimestamp(timestamp)
@ -107,7 +106,7 @@ def date_praser(timestamp):
return datetime.strftime(date_obj, "%d %b, %Y")
def time_parser(timestamp):
def time_parser(timestamp: str) -> float:
"""return seconds from timestamp, false on empty"""
if not timestamp:
return False
@ -119,7 +118,7 @@ def time_parser(timestamp):
return int(hours) * 60 * 60 + int(minutes) * 60 + float(seconds)
def clear_dl_cache(config):
def clear_dl_cache(config: dict) -> int:
"""clear leftover files from dl cache"""
print("clear download cache")
cache_dir = os.path.join(config["application"]["cache_dir"], "download")
@ -131,15 +130,15 @@ def clear_dl_cache(config):
return len(leftover_files)
def get_mapping():
def get_mapping() -> dict:
"""read index_mapping.json and get expected mapping and settings"""
with open("home/src/es/index_mapping.json", "r", encoding="utf-8") as f:
index_config = json.load(f).get("index_config")
index_config: dict = json.load(f).get("index_config")
return index_config
def is_shorts(youtube_id):
def is_shorts(youtube_id: str) -> bool:
"""check if youtube_id is a shorts video, bot not it it's not a shorts"""
shorts_url = f"https://www.youtube.com/shorts/{youtube_id}"
response = requests.head(
@ -149,10 +148,10 @@ def is_shorts(youtube_id):
return response.status_code == 200
def ta_host_parser(ta_host):
def ta_host_parser(ta_host: str) -> tuple[list[str], list[str]]:
"""parse ta_host env var for ALLOWED_HOSTS and CSRF_TRUSTED_ORIGINS"""
allowed_hosts = []
csrf_trusted_origins = []
allowed_hosts: list[str] = []
csrf_trusted_origins: list[str] = []
for host in ta_host.split():
host_clean = host.strip()
if not host_clean.startswith("http"):
@ -163,54 +162,3 @@ def ta_host_parser(ta_host):
csrf_trusted_origins.append(f"{parsed.scheme}://{parsed.hostname}")
return allowed_hosts, csrf_trusted_origins
class DurationConverter:
"""
using ffmpeg to get and parse duration from filepath
"""
@staticmethod
def get_sec(file_path):
"""read duration from file"""
duration = subprocess.run(
[
"ffprobe",
"-v",
"error",
"-show_entries",
"format=duration",
"-of",
"default=noprint_wrappers=1:nokey=1",
file_path,
],
capture_output=True,
check=True,
)
duration_raw = duration.stdout.decode().strip()
if duration_raw == "N/A":
return 0
duration_sec = int(float(duration_raw))
return duration_sec
@staticmethod
def get_str(duration_sec):
"""takes duration in sec and returns clean string"""
if not duration_sec:
# failed to extract
return "NA"
hours = duration_sec // 3600
minutes = (duration_sec - (hours * 3600)) // 60
secs = duration_sec - (hours * 3600) - (minutes * 60)
duration_str = str()
if hours:
duration_str = str(hours).zfill(2) + ":"
if minutes:
duration_str = duration_str + str(minutes).zfill(2) + ":"
else:
duration_str = duration_str + "00:"
duration_str = duration_str + str(secs).zfill(2)
return duration_str

View File

@ -92,7 +92,7 @@ class Parser:
item_type = "video"
elif len_id_str == 24:
item_type = "channel"
elif len_id_str in (34, 18):
elif len_id_str in (34, 26, 18):
item_type = "playlist"
else:
raise ValueError(f"not a valid id_str: {id_str}")

View File

@ -25,6 +25,7 @@ from home.src.index.reindex import Reindex, ReindexManual, ReindexPopulate
from home.src.ta.config import AppConfig, ReleaseVersion, ScheduleBuilder
from home.src.ta.ta_redis import RedisArchivist
from home.src.ta.task_manager import TaskManager
from home.src.ta.urlparser import Parser
CONFIG = AppConfig().config
REDIS_HOST = os.environ.get("REDIS_HOST")
@ -171,14 +172,16 @@ def update_subscribed(self):
return
manager.init(self)
missing_videos = SubscriptionScanner(task=self).scan()
handler = SubscriptionScanner(task=self)
missing_videos = handler.scan()
auto_start = handler.auto_start
if missing_videos:
print(missing_videos)
extrac_dl.delay(missing_videos)
extrac_dl.delay(missing_videos, auto_start=auto_start)
@shared_task(name="download_pending", bind=True, base=BaseTask)
def download_pending(self, from_queue=True):
def download_pending(self, auto_only=False):
"""download latest pending videos"""
manager = TaskManager()
if manager.is_pending(self):
@ -187,19 +190,24 @@ def download_pending(self, from_queue=True):
return
manager.init(self)
downloader = VideoDownloader(task=self)
if from_queue:
downloader.add_pending()
downloader.run_queue()
VideoDownloader(task=self).run_queue(auto_only=auto_only)
@shared_task(name="extract_download", bind=True, base=BaseTask)
def extrac_dl(self, youtube_ids):
def extrac_dl(self, youtube_ids, auto_start=False):
"""parse list passed and add to pending"""
TaskManager().init(self)
pending_handler = PendingList(youtube_ids=youtube_ids, task=self)
if isinstance(youtube_ids, str):
to_add = Parser(youtube_ids).parse()
else:
to_add = youtube_ids
pending_handler = PendingList(youtube_ids=to_add, task=self)
pending_handler.parse_url_list()
pending_handler.add_to_pending()
pending_handler.add_to_pending(auto_start=auto_start)
if auto_start:
download_pending.delay(auto_only=True)
@shared_task(bind=True, name="check_reindex", base=BaseTask)

View File

@ -45,12 +45,10 @@
</div>
</div>
<div class="info-box-item">
<div>
{% if max_hits %}
<p>Total Videos: {{ max_hits }}</p>
<button title="Mark all videos from {{ channel_info.channel_name }} as watched" type="button" id="watched-button" data-id="{{ channel_info.channel_id }}" onclick="isWatchedButton(this)">Mark as watched</button>
{% endif %}
</div>
{% if aggs %}
<p>{{ aggs.total_items.value }} videos <span class="space-carrot">|</span> {{ aggs.total_duration.value_str }} playback <span class="space-carrot">|</span> Total size {{ aggs.total_size.value|filesizeformat }}</p>
<button title="Mark all videos from {{ channel_info.channel_name }} as watched" type="button" id="watched-button" data-id="{{ channel_info.channel_id }}" onclick="isWatchedButton(this)">Mark as watched</button>
{% endif %}
</div>
</div>
</div>

View File

@ -20,11 +20,12 @@
<img id="animate-icon" onclick="showForm()" src="{% static 'img/icon-add.svg' %}" alt="add-icon">
<p>Add to download queue</p>
<div class="show-form">
<form id='hidden-form' action="/downloads/" method="post">
<div id='hidden-form' novalidate>
{% csrf_token %}
{{ add_form }}
<button type="submit">Add to download queue</button>
</form>
<button onclick="addToQueue()">Add to queue</button>
<button onclick="addToQueue(true)">Download now</button>
</div>
</div>
</div>
</div>

View File

@ -49,14 +49,14 @@
<i>Shorts Videos to scan to find new items for the <b>Rescan subscriptions</b> task, max recommended 50.</i><br>
{{ app_form.subscriptions_shorts_channel_size }}
</div>
<div class="settings-item">
<p>Auto start download from your subscriptions: <span class="settings-current">{{ config.subscriptions.auto_start}}</span></p>
<i>Enable this will automatically start and prioritize videos from your subscriptions.</i><br>
{{ app_form.subscriptions_auto_start }}
</div>
</div>
<div class="settings-group">
<h2 id="downloads">Downloads</h2>
<div class="settings-item">
<p>Current download limit: <span class="settings-current">{{ config.downloads.limit_count }}</span></p>
<i>Limit the number of videos getting downloaded on every run. 0 (zero) to deactivate.</i><br>
{{ app_form.downloads_limit_count }}
</div>
<div class="settings-item">
<p>Current download speed limit in KB/s: <span class="settings-current">{{ config.downloads.limit_speed }}</span></p>
<i>Limit download speed. 0 (zero) to deactivate, e.g. 1000 (1MB/s). Speeds are in KB/s. Setting takes effect on new download jobs or application restart.</i><br>
@ -95,6 +95,19 @@
{{ app_form.downloads_format }}
<br>
</div>
<div class="settings-item">
<p>Force sort order to have precedence over all yt-dlp fields.<br>
Currently: <span class="settings-current">{{ config.downloads.format_sort }}</span>
</p>
<p>Example configurations:</p>
<ul>
<li><span class="settings-current">res,codec:av1</span>: prefer AV1 over all other video codecs.</li>
<li><span class="settings-current">0</span>: deactivate and keep the default as decided by yt-dlp.</li>
</ul>
<i>Not all codecs are supported by all browsers. The default value ensures best compatibility. Check out the <a href="https://github.com/yt-dlp/yt-dlp#sorting-formats" target="_blank">documentation</a> for valid configurations.</i><br>
{{ app_form.downloads_format_sort }}
<br>
</div>
<div class="settings-item">
<p>Current metadata embed setting: <span class="settings-current">{{ config.downloads.add_metadata }}</span></p>
<i>Metadata is not embedded into the downloaded files by default.</i><br>

View File

@ -56,20 +56,6 @@
{% else %}
<p>Youtube: Deactivated</p>
{% endif %}
{% if reindex %}
<p>Reindex scheduled</p>
{% else %}
<div id="reindex-button" class="button-box">
<button data-id="{{ video.youtube_id }}" data-type="video" onclick="reindex(this)" title="Reindex {{ video.title }}">Reindex</button>
</div>
{% endif %}
<div class="button-box">
<a download="" href="/media/{{ video.media_url }}"><button id="download-item">Download File</button></a>
<button onclick="deleteConfirm()" id="delete-item">Delete Video</button>
<div class="delete-confirm" id="delete-button">
<span>Are you sure? </span><button class="danger-button" onclick="deleteVideo(this)" data-id="{{ video.youtube_id }}" data-redirect = "{{ video.channel.channel_id }}">Delete</button> <button onclick="cancelDelete()">Cancel</button>
</div>
</div>
</div>
</div>
<div class="info-box-item">
@ -89,6 +75,34 @@
</div>
</div>
</div>
<div class="info-box info-box-2">
<div class="info-box-item">
<div class="button-box">
{% if reindex %}
<p>Reindex scheduled</p>
{% else %}
<div id="reindex-button" class="button-box">
<button data-id="{{ video.youtube_id }}" data-type="video" onclick="reindex(this)" title="Reindex {{ video.title }}">Reindex</button>
</div>
{% endif %}
<a download="" href="/media/{{ video.media_url }}"><button id="download-item">Download File</button></a>
<button onclick="deleteConfirm()" id="delete-item">Delete Video</button>
<div class="delete-confirm" id="delete-button">
<span>Are you sure? </span><button class="danger-button" onclick="deleteVideo(this)" data-id="{{ video.youtube_id }}" data-redirect = "{{ video.channel.channel_id }}">Delete</button> <button onclick="cancelDelete()">Cancel</button>
</div>
</div>
</div>
<div class="info-box-item">
{% if video.media_size %}
<p>File size: {{ video.media_size|filesizeformat }}</p>
{% endif %}
{% if video.streams %}
{% for stream in video.streams %}
<p>{{ stream.type|title }}: {{ stream.codec }} {{ stream.bitrate|filesizeformat }}/s{% if stream.width %} <span class="space-carrot">|</span> {{ stream.width }}x{{ stream.height}}{% endif %}</p>
{% endfor %}
{% endif %}
</div>
</div>
{% if video.tags %}
<div class="description-box">
<div class="video-tag-box">

View File

@ -41,8 +41,7 @@ from home.src.index.video_constants import VideoTypeEnum
from home.src.ta.config import AppConfig, ReleaseVersion, ScheduleBuilder
from home.src.ta.helper import time_parser
from home.src.ta.ta_redis import RedisArchivist
from home.src.ta.urlparser import Parser
from home.tasks import extrac_dl, index_channel_playlists, subscribe_to
from home.tasks import index_channel_playlists, subscribe_to
from rest_framework.authtoken.models import Token
@ -148,8 +147,8 @@ class ArchivistViewConfig(View):
class ArchivistResultsView(ArchivistViewConfig):
"""View class to inherit from when searching data in es"""
view_origin = False
es_search = False
view_origin = ""
es_search = ""
def __init__(self):
super().__init__(self.view_origin)
@ -259,6 +258,7 @@ class ArchivistResultsView(ArchivistViewConfig):
self.pagination_handler.validate(search.max_hits)
self.context["max_hits"] = search.max_hits
self.context["pagination"] = self.pagination_handler.pagination
self.context["aggs"] = search.aggs
class MinView(View):
@ -367,7 +367,7 @@ class AboutView(MinView):
class DownloadView(ArchivistResultsView):
"""resolves to /download/
takes POST for downloading youtube links
handle the download queue
"""
view_origin = "downloads"
@ -451,34 +451,6 @@ class DownloadView(ArchivistResultsView):
return buckets_sorted
@staticmethod
def post(request):
"""handle post requests"""
to_queue = AddToQueueForm(data=request.POST)
if to_queue.is_valid():
url_str = request.POST.get("vid_url")
print(url_str)
try:
youtube_ids = Parser(url_str).parse()
except ValueError:
# failed to process
key = "message:add"
print(f"failed to parse: {url_str}")
mess_dict = {
"status": key,
"level": "error",
"title": "Failed to extract links.",
"message": "Not a video, channel or playlist ID or URL",
}
RedisArchivist().set_message(key, mess_dict, expire=True)
return redirect("downloads")
print(youtube_ids)
extrac_dl.delay(youtube_ids)
sleep(2)
return redirect("downloads", permanent=True)
class ChannelIdBaseView(ArchivistResultsView):
"""base class for all channel-id views"""
@ -613,6 +585,11 @@ class ChannelIdView(ChannelIdBaseView):
]
}
}
self.data["aggs"] = {
"total_items": {"value_count": {"field": "youtube_id"}},
"total_size": {"sum": {"field": "media_size"}},
"total_duration": {"sum": {"field": "player.duration"}},
}
self.data["sort"].append({"title.keyword": {"order": "asc"}})
if self.context["hide_watched"]:
@ -982,7 +959,7 @@ class SearchView(ArchivistResultsView):
"""
view_origin = "home"
es_search = False
es_search = ""
def get(self, request):
"""handle get request"""

View File

@ -1,12 +1,12 @@
beautifulsoup4==4.12.2
celery==5.2.7
Django==4.2
django-auth-ldap==4.2.0
django-auth-ldap==4.3.0
django-cors-headers==3.14.0
djangorestframework==3.14.0
Pillow==9.5.0
redis==4.5.4
requests==2.28.2
requests==2.29.0
ryd-client==0.0.6
uWSGI==2.0.21
whitenoise==6.4.0

View File

@ -369,6 +369,10 @@ button:hover {
display: none;
}
#hidden-form button {
margin-right: 1rem;
}
#text-reveal {
height: 0;
overflow: hidden;
@ -660,6 +664,10 @@ video:-webkit-full-screen {
background-color: var(--highlight-bg);
}
.info-box-item p {
width: 100%;
}
.description-text {
width: 100%;
}

View File

@ -160,6 +160,24 @@ function dlPending() {
}, 500);
}
function addToQueue(autostart=false) {
let textArea = document.getElementById('id_vid_url');
if (textArea.value === '') {
return
}
let toPost = {data: [{youtube_id: textArea.value, status: 'pending'}]};
let apiEndpoint = '/api/download/';
if (autostart) {
apiEndpoint = `${apiEndpoint}?autostart=true`;
}
apiRequest(apiEndpoint, 'POST', toPost);
textArea.value = '';
setTimeout(function () {
checkMessages();
}, 500);
showForm();
}
function toIgnore(button) {
let youtube_id = button.getAttribute('data-id');
let apiEndpoint = '/api/download/' + youtube_id + '/';