diff --git a/tubearchivist/api/views.py b/tubearchivist/api/views.py index fe07637..d1165c2 100644 --- a/tubearchivist/api/views.py +++ b/tubearchivist/api/views.py @@ -3,7 +3,7 @@ from api.src.search_processor import SearchProcess from api.src.task_processor import TaskHandler from home.src.download.queue import PendingInteract -from home.src.download.yt_cookie import CookieHandler +from home.src.download.yt_dlp_base import CookieHandler from home.src.es.connect import ElasticWrap from home.src.index.generic import Pagination from home.src.index.video import SponsorBlock @@ -484,6 +484,7 @@ class CookieView(ApiBaseView): def post(request): """handle post request""" # pylint: disable=unused-argument - validated = CookieHandler().validate() + config = AppConfig().config + validated = CookieHandler(config).validate() return Response({"cookie_validated": validated}) diff --git a/tubearchivist/home/src/download/queue.py b/tubearchivist/home/src/download/queue.py index 27217c4..36e3d1e 100644 --- a/tubearchivist/home/src/download/queue.py +++ b/tubearchivist/home/src/download/queue.py @@ -7,13 +7,12 @@ Functionality: import json from datetime import datetime -import yt_dlp from home.src.download.subscriptions import ( ChannelSubscription, PlaylistSubscription, ) from home.src.download.thumbnails import ThumbManager -from home.src.download.yt_cookie import CookieHandler +from home.src.download.yt_dlp_base import YtWrap from home.src.es.connect import ElasticWrap, IndexPaginate from home.src.index.playlist import YoutubePlaylist from home.src.ta.config import AppConfig @@ -133,28 +132,11 @@ class PendingList(PendingIndex): def __init__(self, youtube_ids=False): super().__init__() - self.process_config() + self.config = AppConfig().config self.youtube_ids = youtube_ids self.to_skip = False self.missing_videos = False - def process_config(self): - """add user config to yt_obs""" - config = AppConfig().config - if config["downloads"]["cookie_import"]: - cookie_path = CookieHandler().use() - self.yt_obs.update({"cookiefile": cookie_path}) - - def close_config(self): - """remove config after task finished""" - config = AppConfig().config - if config["downloads"]["cookie_import"]: - CookieHandler().hide() - try: - del self.yt_obs["cookiefile"] - except KeyError: - pass - def parse_url_list(self): """extract youtube ids from list""" self.missing_videos = [] @@ -235,8 +217,6 @@ class PendingList(PendingIndex): query_str = "\n".join(bulk_list) _, _ = ElasticWrap("_bulk").post(query_str, ndjson=True) - self.close_config() - def _notify_add(self, idx): """send notification for adding videos to download queue""" progress = f"{idx + 1}/{len(self.missing_videos)}" @@ -256,11 +236,10 @@ class PendingList(PendingIndex): def get_youtube_details(self, youtube_id): """get details from youtubedl for single pending video""" - try: - vid = yt_dlp.YoutubeDL(self.yt_obs).extract_info(youtube_id) - except yt_dlp.utils.DownloadError: - print(f"{youtube_id}: failed to extract info") + vid = YtWrap(self.yt_obs, self.config).extract(youtube_id) + if not vid: return False + if vid.get("id") != youtube_id: # skip premium videos with different id print(f"{youtube_id}: skipping premium video, id not matching") diff --git a/tubearchivist/home/src/download/subscriptions.py b/tubearchivist/home/src/download/subscriptions.py index 75aede2..973c76e 100644 --- a/tubearchivist/home/src/download/subscriptions.py +++ b/tubearchivist/home/src/download/subscriptions.py @@ -4,8 +4,8 @@ Functionality: - handle playlist subscriptions """ -import yt_dlp from home.src.download import queue # partial import +from home.src.download.yt_dlp_base import YtWrap from home.src.es.connect import IndexPaginate from home.src.index.channel import YoutubeChannel from home.src.index.playlist import YoutubePlaylist @@ -17,10 +17,7 @@ class ChannelSubscription: """manage the list of channels subscribed""" def __init__(self): - config = AppConfig().config - self.es_url = config["application"]["es_url"] - self.es_auth = config["application"]["es_auth"] - self.channel_size = config["subscriptions"]["channel_size"] + self.config = AppConfig().config @staticmethod def get_channels(subscribed_only=True): @@ -39,23 +36,18 @@ class ChannelSubscription: def get_last_youtube_videos(self, channel_id, limit=True): """get a list of last videos from channel""" - url = f"https://www.youtube.com/channel/{channel_id}/videos" obs = { - "default_search": "ytsearch", - "quiet": True, "skip_download": True, "extract_flat": True, } if limit: - obs["playlistend"] = self.channel_size + obs["playlistend"] = self.config["subscriptions"]["channel_size"] - try: - chan = yt_dlp.YoutubeDL(obs).extract_info(url, download=False) - except yt_dlp.utils.DownloadError: - print(f"{channel_id}: failed to extract videos, skipping.") + channel = YtWrap(obs, self.config).extract(channel_id) + if not channel: return False - last_videos = [(i["id"], i["title"]) for i in chan["entries"]] + last_videos = [(i["id"], i["title"]) for i in channel["entries"]] return last_videos def find_missing(self): diff --git a/tubearchivist/home/src/download/yt_cookie.py b/tubearchivist/home/src/download/yt_cookie.py deleted file mode 100644 index 3a7244e..0000000 --- a/tubearchivist/home/src/download/yt_cookie.py +++ /dev/null @@ -1,88 +0,0 @@ -""" -functionality: -- import yt cookie from filesystem -- make cookie available for yt-dlp -""" - -import os - -import yt_dlp -from home.src.ta.config import AppConfig -from home.src.ta.ta_redis import RedisArchivist - - -class CookieHandler: - """handle youtube cookie for yt-dlp""" - - CONFIG = AppConfig().config - CACHE_PATH = CONFIG["application"]["cache_dir"] - COOKIE_FILE_NAME = "cookies.google.txt" - COOKIE_KEY = "cookie" - COOKIE_PATH = "cookie.txt" - - def import_cookie(self): - """import cookie from file""" - import_path = os.path.join( - self.CACHE_PATH, "import", self.COOKIE_FILE_NAME - ) - with open(import_path, encoding="utf-8") as cookie_file: - cookie = cookie_file.read() - - RedisArchivist().set_message(self.COOKIE_KEY, cookie, expire=False) - - os.remove(import_path) - print("cookie: import successfully") - - def use(self): - """make cookie available in FS""" - cookie = RedisArchivist().get_message(self.COOKIE_KEY) - if isinstance(cookie, dict): - print("no cookie imported") - raise FileNotFoundError - - if os.path.exists(self.COOKIE_PATH): - return self.COOKIE_PATH - - with open(self.COOKIE_PATH, "w", encoding="utf-8") as cookie_file: - cookie_file.write(cookie) - - print("cookie: made available") - return self.COOKIE_PATH - - def hide(self): - """hide cookie file if not in use""" - try: - os.remove(self.COOKIE_PATH) - except FileNotFoundError: - print("cookie: not available") - return - - print("cookie: hidden") - - def revoke(self): - """revoke cookie""" - self.hide() - RedisArchivist().del_message(self.COOKIE_KEY) - print("cookie: revoked") - - def validate(self): - """validate cookie using the liked videos playlist""" - try: - _ = self.use() - except FileNotFoundError: - return False - - url = "https://www.youtube.com/playlist?list=LL" - yt_obs = { - "quiet": True, - "skip_download": True, - "extract_flat": True, - "cookiefile": self.COOKIE_PATH, - } - try: - response = yt_dlp.YoutubeDL(yt_obs).extract_info(url) - except yt_dlp.utils.DownloadError: - print("failed to validate cookie") - response = False - - return bool(response) diff --git a/tubearchivist/home/src/download/yt_dlp_base.py b/tubearchivist/home/src/download/yt_dlp_base.py new file mode 100644 index 0000000..b32d536 --- /dev/null +++ b/tubearchivist/home/src/download/yt_dlp_base.py @@ -0,0 +1,101 @@ +""" +functionality: +- base class to make all calls to yt-dlp +- handle yt-dlp errors +""" + +import os +from io import StringIO + +import yt_dlp +from home.src.ta.ta_redis import RedisArchivist + + +class YtWrap: + """wrap calls to yt""" + + OBS_BASE = { + "default_search": "ytsearch", + "quiet": True, + "check_formats": "selected", + "socket_timeout": 2, + } + + def __init__(self, obs_request, config=False): + self.obs_request = obs_request + self.config = config + self.build_obs() + + def build_obs(self): + """build yt-dlp obs""" + self.obs = self.OBS_BASE.copy() + self.obs.update(self.obs_request) + self.add_cookie() + + def add_cookie(self): + """add cookie if enabled""" + if self.config["downloads"]["cookie_import"]: + cookie_io = CookieHandler(self.config).get() + self.obs["cookiefile"] = cookie_io + + def download(self, url): + """make download request""" + with yt_dlp.YoutubeDL(self.obs) as ydl: + try: + ydl.download([url]) + except yt_dlp.utils.DownloadError: + print(f"{url}: failed to download.") + return False + + return True + + def extract(self, url): + """make extract request""" + try: + response = yt_dlp.YoutubeDL(self.obs).extract_info(url) + except (yt_dlp.utils.ExtractorError, yt_dlp.utils.DownloadError): + print(f"{url}: failed to get info from youtube") + response = False + + return response + + +class CookieHandler: + """handle youtube cookie for yt-dlp""" + + def __init__(self, config): + self.cookie_io = False + self.config = config + + def get(self): + """get cookie io stream""" + cookie = RedisArchivist().get_message("cookie") + self.cookie_io = StringIO(cookie) + return self.cookie_io + + def import_cookie(self): + """import cookie from file""" + cache_path = self.config["application"]["cache_dir"] + import_path = os.path.join(cache_path, "import", "cookies.google.txt") + with open(import_path, encoding="utf-8") as cookie_file: + cookie = cookie_file.read() + + RedisArchivist().set_message("cookie", cookie, expire=False) + + os.remove(import_path) + print("cookie: import successful") + + @staticmethod + def revoke(): + """revoke cookie""" + RedisArchivist().del_message("cookie") + print("cookie: revoked") + + def validate(self): + """validate cookie using the liked videos playlist""" + obs_request = { + "skip_download": True, + "extract_flat": True, + } + response = YtWrap(obs_request, self.config).extract("LL") + return bool(response) diff --git a/tubearchivist/home/src/download/yt_dlp_handler.py b/tubearchivist/home/src/download/yt_dlp_handler.py index 7ec06b5..e82b6c5 100644 --- a/tubearchivist/home/src/download/yt_dlp_handler.py +++ b/tubearchivist/home/src/download/yt_dlp_handler.py @@ -9,12 +9,10 @@ functionality: import os import shutil from datetime import datetime -from time import sleep -import yt_dlp from home.src.download.queue import PendingList from home.src.download.subscriptions import PlaylistSubscription -from home.src.download.yt_cookie import CookieHandler +from home.src.download.yt_dlp_base import YtWrap from home.src.es.connect import ElasticWrap, IndexPaginate from home.src.index.channel import YoutubeChannel from home.src.index.playlist import YoutubePlaylist @@ -41,7 +39,6 @@ class DownloadPostProcess: self.auto_delete_all() self.auto_delete_overwrites() self.validate_playlists() - self.pending.close_config() def auto_delete_all(self): """handle auto delete""" @@ -174,11 +171,10 @@ class VideoDownloader: if not youtube_id: break - try: - self._dl_single_vid(youtube_id) - except yt_dlp.utils.DownloadError: - print("failed to download " + youtube_id) + success = self._dl_single_vid(youtube_id) + if not success: continue + vid_dict = index_new_video( youtube_id, video_overwrites=self.video_overwrites ) @@ -293,9 +289,6 @@ class VideoDownloader: self.obs["ratelimit"] = ( self.config["downloads"]["limit_speed"] * 1024 ) - if self.config["downloads"]["cookie_import"]: - cookie_path = CookieHandler().use() - self.obs["cookiefile"] = cookie_path throttle = self.config["downloads"]["throttledratelimit"] if throttle: @@ -359,13 +352,7 @@ class VideoDownloader: if youtube_id in file_name: obs["outtmpl"] = os.path.join(dl_cache, file_name) - with yt_dlp.YoutubeDL(obs) as ydl: - try: - ydl.download([youtube_id]) - except yt_dlp.utils.DownloadError: - print("retry failed download: " + youtube_id) - sleep(10) - ydl.download([youtube_id]) + success = YtWrap(obs, self.config).download(youtube_id) if self.obs["writethumbnail"]: # webp files don't get cleaned up automatically @@ -375,6 +362,8 @@ class VideoDownloader: file_path = os.path.join(dl_cache, file_name) os.remove(file_path) + return success + def move_to_archive(self, vid_dict): """move downloaded video from cache to archive""" videos = self.config["application"]["videos"] diff --git a/tubearchivist/home/src/index/channel.py b/tubearchivist/home/src/index/channel.py index 06d0086..4761cd0 100644 --- a/tubearchivist/home/src/index/channel.py +++ b/tubearchivist/home/src/index/channel.py @@ -10,10 +10,10 @@ import re from datetime import datetime import requests -import yt_dlp from bs4 import BeautifulSoup from home.src.download import queue # partial import from home.src.download.thumbnails import ThumbManager +from home.src.download.yt_dlp_base import YtWrap from home.src.es.connect import ElasticWrap, IndexPaginate from home.src.index.generic import YouTubeItem from home.src.index.playlist import YoutubePlaylist @@ -314,12 +314,8 @@ class YoutubeChannel(YouTubeItem): f"https://www.youtube.com/channel/{self.youtube_id}" + "/playlists?view=1&sort=dd&shelf_id=0" ) - obs = { - "quiet": True, - "skip_download": True, - "extract_flat": True, - } - playlists = yt_dlp.YoutubeDL(obs).extract_info(url) + obs = {"skip_download": True, "extract_flat": True} + playlists = YtWrap(obs, self.config).extract(url) all_entries = [(i["id"], i["title"]) for i in playlists["entries"]] self.all_playlists = all_entries diff --git a/tubearchivist/home/src/index/filesystem.py b/tubearchivist/home/src/index/filesystem.py index 93a1bc4..c6f1caa 100644 --- a/tubearchivist/home/src/index/filesystem.py +++ b/tubearchivist/home/src/index/filesystem.py @@ -12,7 +12,6 @@ import shutil import subprocess from home.src.download.queue import PendingList -from home.src.download.yt_cookie import CookieHandler from home.src.download.yt_dlp_handler import VideoDownloader from home.src.es.connect import ElasticWrap from home.src.index.reindex import Reindex @@ -309,12 +308,6 @@ def scan_filesystem(): def reindex_old_documents(): """daily refresh of old documents""" handler = Reindex() - if handler.config["downloads"]["cookie_import"]: - CookieHandler().use() - try: - handler.check_outdated() - handler.reindex() - RedisArchivist().set_message("last_reindex", handler.now, expire=False) - finally: - if handler.config["downloads"]["cookie_import"]: - CookieHandler().hide() + handler.check_outdated() + handler.reindex() + RedisArchivist().set_message("last_reindex", handler.now, expire=False) diff --git a/tubearchivist/home/src/index/generic.py b/tubearchivist/home/src/index/generic.py index 0940258..15e1264 100644 --- a/tubearchivist/home/src/index/generic.py +++ b/tubearchivist/home/src/index/generic.py @@ -5,8 +5,7 @@ functionality: import math -import yt_dlp -from home.src.download.yt_cookie import CookieHandler +from home.src.download.yt_dlp_base import YtWrap from home.src.es.connect import ElasticWrap from home.src.ta.config import AppConfig from home.src.ta.ta_redis import RedisArchivist @@ -19,42 +18,22 @@ class YouTubeItem: index_name = False yt_base = False yt_obs = { - "quiet": True, - "default_search": "ytsearch", "skip_download": True, - "check_formats": "selected", - "socket_timeout": 3, "noplaylist": True, } def __init__(self, youtube_id): self.youtube_id = youtube_id - self.config = False - self.app_conf = False - self.youtube_meta = False - self.json_data = False - self._get_conf() - - def _get_conf(self): - """read user conf""" self.config = AppConfig().config self.app_conf = self.config["application"] - if self.config["downloads"]["cookie_import"]: - cookie_path = CookieHandler().use() - self.yt_obs.update({"cookiefile": cookie_path}) + self.youtube_meta = False + self.json_data = False def get_from_youtube(self): """use yt-dlp to get meta data from youtube""" print(f"{self.youtube_id}: get metadata from youtube") - try: - yt_item = yt_dlp.YoutubeDL(self.yt_obs) - response = yt_item.extract_info(self.yt_base + self.youtube_id) - except ( - yt_dlp.utils.ExtractorError, - yt_dlp.utils.DownloadError, - ): - print(f"{self.youtube_id}: failed to get info from youtube") - response = False + url = self.yt_base + self.youtube_id + response = YtWrap(self.yt_obs, self.config).extract(url) self.youtube_meta = response diff --git a/tubearchivist/home/src/index/playlist.py b/tubearchivist/home/src/index/playlist.py index fce019d..69dc5db 100644 --- a/tubearchivist/home/src/index/playlist.py +++ b/tubearchivist/home/src/index/playlist.py @@ -18,12 +18,7 @@ class YoutubePlaylist(YouTubeItem): es_path = False index_name = "ta_playlist" - yt_obs = { - "default_search": "ytsearch", - "quiet": True, - "skip_download": True, - "extract_flat": True, - } + yt_obs = {"extract_flat": True} yt_base = "https://www.youtube.com/playlist?list=" def __init__(self, youtube_id): diff --git a/tubearchivist/home/src/ta/helper.py b/tubearchivist/home/src/ta/helper.py index 5731115..3a7164d 100644 --- a/tubearchivist/home/src/ta/helper.py +++ b/tubearchivist/home/src/ta/helper.py @@ -11,7 +11,7 @@ import unicodedata from datetime import datetime from urllib.parse import parse_qs, urlparse -import yt_dlp +from home.src.download.yt_dlp_base import YtWrap def clean_string(file_name): @@ -184,14 +184,12 @@ class UrlListParser: @staticmethod def extract_channel_name(url): """find channel id from channel name with yt-dlp help""" - obs = { - "default_search": "ytsearch", - "quiet": True, + obs_request = { "skip_download": True, "extract_flat": True, "playlistend": 0, } - url_info = yt_dlp.YoutubeDL(obs).extract_info(url, download=False) + url_info = YtWrap(obs_request).extract(url) try: channel_id = url_info["channel_id"] except KeyError as error: diff --git a/tubearchivist/home/src/ta/ta_redis.py b/tubearchivist/home/src/ta/ta_redis.py index 0e9c23f..e13a997 100644 --- a/tubearchivist/home/src/ta/ta_redis.py +++ b/tubearchivist/home/src/ta/ta_redis.py @@ -8,7 +8,6 @@ import json import os import redis -from home.src.ta.helper import ignore_filelist class RedisBase: @@ -95,27 +94,6 @@ class RedisArchivist(RedisBase): return all_messages - @staticmethod - def monitor_cache_dir(cache_dir): - """ - look at download cache dir directly as alternative progress info - """ - dl_cache = os.path.join(cache_dir, "download") - all_cache_file = os.listdir(dl_cache) - cache_file = ignore_filelist(all_cache_file) - if cache_file: - filename = cache_file[0][12:].replace("_", " ").split(".")[0] - mess_dict = { - "status": "message:download", - "level": "info", - "title": "Downloading: " + filename, - "message": "", - } - else: - return False - - return mess_dict - class RedisQueue(RedisBase): """dynamically interact with the download queue in redis""" diff --git a/tubearchivist/home/views.py b/tubearchivist/home/views.py index 22d219a..bd5a2ea 100644 --- a/tubearchivist/home/views.py +++ b/tubearchivist/home/views.py @@ -14,7 +14,7 @@ from django.contrib.auth.forms import AuthenticationForm from django.http import JsonResponse from django.shortcuts import redirect, render from django.views import View -from home.src.download.yt_cookie import CookieHandler +from home.src.download.yt_dlp_base import CookieHandler from home.src.es.index_setup import get_available_backups from home.src.frontend.api_calls import PostData from home.src.frontend.forms import ( @@ -795,18 +795,19 @@ class SettingsView(View): def post(self, request): """handle form post to update settings""" user_form = UserSettingsForm(request.POST) + config_handler = AppConfig() if user_form.is_valid(): user_form_post = user_form.cleaned_data if any(user_form_post.values()): - AppConfig().set_user_config(user_form_post, request.user.id) + config_handler.set_user_config(user_form_post, request.user.id) app_form = ApplicationSettingsForm(request.POST) if app_form.is_valid(): app_form_post = app_form.cleaned_data if app_form_post: print(app_form_post) - updated = AppConfig().update_config(app_form_post) - self.post_process_updated(updated) + updated = config_handler.update_config(app_form_post) + self.post_process_updated(updated, config_handler.config) scheduler_form = SchedulerSettingsForm(request.POST) if scheduler_form.is_valid(): @@ -819,7 +820,7 @@ class SettingsView(View): return redirect("settings", permanent=True) @staticmethod - def post_process_updated(updated): + def post_process_updated(updated, config): """apply changes for config""" if not updated: return @@ -827,9 +828,9 @@ class SettingsView(View): for config_value, updated_value in updated: if config_value == "cookie_import": if updated_value: - CookieHandler().import_cookie() + CookieHandler(config).import_cookie() else: - CookieHandler().revoke() + CookieHandler(config).revoke() def progress(request):