refactor new YtWrap class for yt-dlp calls, #build

Changed:
- new yt-dlp base class
- new cookie class using io_stream
- MetadataFromField postprocessors
This commit is contained in:
simon 2022-05-25 16:15:38 +07:00
commit 88e526f5e5
No known key found for this signature in database
GPG Key ID: 2C15AA5E89985DD4
13 changed files with 145 additions and 231 deletions

View File

@ -3,7 +3,7 @@
from api.src.search_processor import SearchProcess from api.src.search_processor import SearchProcess
from api.src.task_processor import TaskHandler from api.src.task_processor import TaskHandler
from home.src.download.queue import PendingInteract from home.src.download.queue import PendingInteract
from home.src.download.yt_cookie import CookieHandler from home.src.download.yt_dlp_base import CookieHandler
from home.src.es.connect import ElasticWrap from home.src.es.connect import ElasticWrap
from home.src.index.generic import Pagination from home.src.index.generic import Pagination
from home.src.index.video import SponsorBlock from home.src.index.video import SponsorBlock
@ -484,6 +484,7 @@ class CookieView(ApiBaseView):
def post(request): def post(request):
"""handle post request""" """handle post request"""
# pylint: disable=unused-argument # pylint: disable=unused-argument
validated = CookieHandler().validate() config = AppConfig().config
validated = CookieHandler(config).validate()
return Response({"cookie_validated": validated}) return Response({"cookie_validated": validated})

View File

@ -7,13 +7,12 @@ Functionality:
import json import json
from datetime import datetime from datetime import datetime
import yt_dlp
from home.src.download.subscriptions import ( from home.src.download.subscriptions import (
ChannelSubscription, ChannelSubscription,
PlaylistSubscription, PlaylistSubscription,
) )
from home.src.download.thumbnails import ThumbManager from home.src.download.thumbnails import ThumbManager
from home.src.download.yt_cookie import CookieHandler from home.src.download.yt_dlp_base import YtWrap
from home.src.es.connect import ElasticWrap, IndexPaginate from home.src.es.connect import ElasticWrap, IndexPaginate
from home.src.index.playlist import YoutubePlaylist from home.src.index.playlist import YoutubePlaylist
from home.src.ta.config import AppConfig from home.src.ta.config import AppConfig
@ -133,28 +132,11 @@ class PendingList(PendingIndex):
def __init__(self, youtube_ids=False): def __init__(self, youtube_ids=False):
super().__init__() super().__init__()
self.process_config() self.config = AppConfig().config
self.youtube_ids = youtube_ids self.youtube_ids = youtube_ids
self.to_skip = False self.to_skip = False
self.missing_videos = False self.missing_videos = False
def process_config(self):
"""add user config to yt_obs"""
config = AppConfig().config
if config["downloads"]["cookie_import"]:
cookie_path = CookieHandler().use()
self.yt_obs.update({"cookiefile": cookie_path})
def close_config(self):
"""remove config after task finished"""
config = AppConfig().config
if config["downloads"]["cookie_import"]:
CookieHandler().hide()
try:
del self.yt_obs["cookiefile"]
except KeyError:
pass
def parse_url_list(self): def parse_url_list(self):
"""extract youtube ids from list""" """extract youtube ids from list"""
self.missing_videos = [] self.missing_videos = []
@ -235,8 +217,6 @@ class PendingList(PendingIndex):
query_str = "\n".join(bulk_list) query_str = "\n".join(bulk_list)
_, _ = ElasticWrap("_bulk").post(query_str, ndjson=True) _, _ = ElasticWrap("_bulk").post(query_str, ndjson=True)
self.close_config()
def _notify_add(self, idx): def _notify_add(self, idx):
"""send notification for adding videos to download queue""" """send notification for adding videos to download queue"""
progress = f"{idx + 1}/{len(self.missing_videos)}" progress = f"{idx + 1}/{len(self.missing_videos)}"
@ -256,11 +236,10 @@ class PendingList(PendingIndex):
def get_youtube_details(self, youtube_id): def get_youtube_details(self, youtube_id):
"""get details from youtubedl for single pending video""" """get details from youtubedl for single pending video"""
try: vid = YtWrap(self.yt_obs, self.config).extract(youtube_id)
vid = yt_dlp.YoutubeDL(self.yt_obs).extract_info(youtube_id) if not vid:
except yt_dlp.utils.DownloadError:
print(f"{youtube_id}: failed to extract info")
return False return False
if vid.get("id") != youtube_id: if vid.get("id") != youtube_id:
# skip premium videos with different id # skip premium videos with different id
print(f"{youtube_id}: skipping premium video, id not matching") print(f"{youtube_id}: skipping premium video, id not matching")

View File

@ -4,8 +4,8 @@ Functionality:
- handle playlist subscriptions - handle playlist subscriptions
""" """
import yt_dlp
from home.src.download import queue # partial import from home.src.download import queue # partial import
from home.src.download.yt_dlp_base import YtWrap
from home.src.es.connect import IndexPaginate from home.src.es.connect import IndexPaginate
from home.src.index.channel import YoutubeChannel from home.src.index.channel import YoutubeChannel
from home.src.index.playlist import YoutubePlaylist from home.src.index.playlist import YoutubePlaylist
@ -17,10 +17,7 @@ class ChannelSubscription:
"""manage the list of channels subscribed""" """manage the list of channels subscribed"""
def __init__(self): def __init__(self):
config = AppConfig().config self.config = AppConfig().config
self.es_url = config["application"]["es_url"]
self.es_auth = config["application"]["es_auth"]
self.channel_size = config["subscriptions"]["channel_size"]
@staticmethod @staticmethod
def get_channels(subscribed_only=True): def get_channels(subscribed_only=True):
@ -39,23 +36,18 @@ class ChannelSubscription:
def get_last_youtube_videos(self, channel_id, limit=True): def get_last_youtube_videos(self, channel_id, limit=True):
"""get a list of last videos from channel""" """get a list of last videos from channel"""
url = f"https://www.youtube.com/channel/{channel_id}/videos"
obs = { obs = {
"default_search": "ytsearch",
"quiet": True,
"skip_download": True, "skip_download": True,
"extract_flat": True, "extract_flat": True,
} }
if limit: if limit:
obs["playlistend"] = self.channel_size obs["playlistend"] = self.config["subscriptions"]["channel_size"]
try: channel = YtWrap(obs, self.config).extract(channel_id)
chan = yt_dlp.YoutubeDL(obs).extract_info(url, download=False) if not channel:
except yt_dlp.utils.DownloadError:
print(f"{channel_id}: failed to extract videos, skipping.")
return False return False
last_videos = [(i["id"], i["title"]) for i in chan["entries"]] last_videos = [(i["id"], i["title"]) for i in channel["entries"]]
return last_videos return last_videos
def find_missing(self): def find_missing(self):

View File

@ -1,88 +0,0 @@
"""
functionality:
- import yt cookie from filesystem
- make cookie available for yt-dlp
"""
import os
import yt_dlp
from home.src.ta.config import AppConfig
from home.src.ta.ta_redis import RedisArchivist
class CookieHandler:
"""handle youtube cookie for yt-dlp"""
CONFIG = AppConfig().config
CACHE_PATH = CONFIG["application"]["cache_dir"]
COOKIE_FILE_NAME = "cookies.google.txt"
COOKIE_KEY = "cookie"
COOKIE_PATH = "cookie.txt"
def import_cookie(self):
"""import cookie from file"""
import_path = os.path.join(
self.CACHE_PATH, "import", self.COOKIE_FILE_NAME
)
with open(import_path, encoding="utf-8") as cookie_file:
cookie = cookie_file.read()
RedisArchivist().set_message(self.COOKIE_KEY, cookie, expire=False)
os.remove(import_path)
print("cookie: import successfully")
def use(self):
"""make cookie available in FS"""
cookie = RedisArchivist().get_message(self.COOKIE_KEY)
if isinstance(cookie, dict):
print("no cookie imported")
raise FileNotFoundError
if os.path.exists(self.COOKIE_PATH):
return self.COOKIE_PATH
with open(self.COOKIE_PATH, "w", encoding="utf-8") as cookie_file:
cookie_file.write(cookie)
print("cookie: made available")
return self.COOKIE_PATH
def hide(self):
"""hide cookie file if not in use"""
try:
os.remove(self.COOKIE_PATH)
except FileNotFoundError:
print("cookie: not available")
return
print("cookie: hidden")
def revoke(self):
"""revoke cookie"""
self.hide()
RedisArchivist().del_message(self.COOKIE_KEY)
print("cookie: revoked")
def validate(self):
"""validate cookie using the liked videos playlist"""
try:
_ = self.use()
except FileNotFoundError:
return False
url = "https://www.youtube.com/playlist?list=LL"
yt_obs = {
"quiet": True,
"skip_download": True,
"extract_flat": True,
"cookiefile": self.COOKIE_PATH,
}
try:
response = yt_dlp.YoutubeDL(yt_obs).extract_info(url)
except yt_dlp.utils.DownloadError:
print("failed to validate cookie")
response = False
return bool(response)

View File

@ -0,0 +1,101 @@
"""
functionality:
- base class to make all calls to yt-dlp
- handle yt-dlp errors
"""
import os
from io import StringIO
import yt_dlp
from home.src.ta.ta_redis import RedisArchivist
class YtWrap:
"""wrap calls to yt"""
OBS_BASE = {
"default_search": "ytsearch",
"quiet": True,
"check_formats": "selected",
"socket_timeout": 2,
}
def __init__(self, obs_request, config=False):
self.obs_request = obs_request
self.config = config
self.build_obs()
def build_obs(self):
"""build yt-dlp obs"""
self.obs = self.OBS_BASE.copy()
self.obs.update(self.obs_request)
self.add_cookie()
def add_cookie(self):
"""add cookie if enabled"""
if self.config["downloads"]["cookie_import"]:
cookie_io = CookieHandler(self.config).get()
self.obs["cookiefile"] = cookie_io
def download(self, url):
"""make download request"""
with yt_dlp.YoutubeDL(self.obs) as ydl:
try:
ydl.download([url])
except yt_dlp.utils.DownloadError:
print(f"{url}: failed to download.")
return False
return True
def extract(self, url):
"""make extract request"""
try:
response = yt_dlp.YoutubeDL(self.obs).extract_info(url)
except (yt_dlp.utils.ExtractorError, yt_dlp.utils.DownloadError):
print(f"{url}: failed to get info from youtube")
response = False
return response
class CookieHandler:
"""handle youtube cookie for yt-dlp"""
def __init__(self, config):
self.cookie_io = False
self.config = config
def get(self):
"""get cookie io stream"""
cookie = RedisArchivist().get_message("cookie")
self.cookie_io = StringIO(cookie)
return self.cookie_io
def import_cookie(self):
"""import cookie from file"""
cache_path = self.config["application"]["cache_dir"]
import_path = os.path.join(cache_path, "import", "cookies.google.txt")
with open(import_path, encoding="utf-8") as cookie_file:
cookie = cookie_file.read()
RedisArchivist().set_message("cookie", cookie, expire=False)
os.remove(import_path)
print("cookie: import successful")
@staticmethod
def revoke():
"""revoke cookie"""
RedisArchivist().del_message("cookie")
print("cookie: revoked")
def validate(self):
"""validate cookie using the liked videos playlist"""
obs_request = {
"skip_download": True,
"extract_flat": True,
}
response = YtWrap(obs_request, self.config).extract("LL")
return bool(response)

View File

@ -9,12 +9,10 @@ functionality:
import os import os
import shutil import shutil
from datetime import datetime from datetime import datetime
from time import sleep
import yt_dlp
from home.src.download.queue import PendingList from home.src.download.queue import PendingList
from home.src.download.subscriptions import PlaylistSubscription from home.src.download.subscriptions import PlaylistSubscription
from home.src.download.yt_cookie import CookieHandler from home.src.download.yt_dlp_base import YtWrap
from home.src.es.connect import ElasticWrap, IndexPaginate from home.src.es.connect import ElasticWrap, IndexPaginate
from home.src.index.channel import YoutubeChannel from home.src.index.channel import YoutubeChannel
from home.src.index.playlist import YoutubePlaylist from home.src.index.playlist import YoutubePlaylist
@ -41,7 +39,6 @@ class DownloadPostProcess:
self.auto_delete_all() self.auto_delete_all()
self.auto_delete_overwrites() self.auto_delete_overwrites()
self.validate_playlists() self.validate_playlists()
self.pending.close_config()
def auto_delete_all(self): def auto_delete_all(self):
"""handle auto delete""" """handle auto delete"""
@ -174,11 +171,10 @@ class VideoDownloader:
if not youtube_id: if not youtube_id:
break break
try: success = self._dl_single_vid(youtube_id)
self._dl_single_vid(youtube_id) if not success:
except yt_dlp.utils.DownloadError:
print("failed to download " + youtube_id)
continue continue
vid_dict = index_new_video( vid_dict = index_new_video(
youtube_id, video_overwrites=self.video_overwrites youtube_id, video_overwrites=self.video_overwrites
) )
@ -293,9 +289,6 @@ class VideoDownloader:
self.obs["ratelimit"] = ( self.obs["ratelimit"] = (
self.config["downloads"]["limit_speed"] * 1024 self.config["downloads"]["limit_speed"] * 1024
) )
if self.config["downloads"]["cookie_import"]:
cookie_path = CookieHandler().use()
self.obs["cookiefile"] = cookie_path
throttle = self.config["downloads"]["throttledratelimit"] throttle = self.config["downloads"]["throttledratelimit"]
if throttle: if throttle:
@ -359,13 +352,7 @@ class VideoDownloader:
if youtube_id in file_name: if youtube_id in file_name:
obs["outtmpl"] = os.path.join(dl_cache, file_name) obs["outtmpl"] = os.path.join(dl_cache, file_name)
with yt_dlp.YoutubeDL(obs) as ydl: success = YtWrap(obs, self.config).download(youtube_id)
try:
ydl.download([youtube_id])
except yt_dlp.utils.DownloadError:
print("retry failed download: " + youtube_id)
sleep(10)
ydl.download([youtube_id])
if self.obs["writethumbnail"]: if self.obs["writethumbnail"]:
# webp files don't get cleaned up automatically # webp files don't get cleaned up automatically
@ -375,6 +362,8 @@ class VideoDownloader:
file_path = os.path.join(dl_cache, file_name) file_path = os.path.join(dl_cache, file_name)
os.remove(file_path) os.remove(file_path)
return success
def move_to_archive(self, vid_dict): def move_to_archive(self, vid_dict):
"""move downloaded video from cache to archive""" """move downloaded video from cache to archive"""
videos = self.config["application"]["videos"] videos = self.config["application"]["videos"]

View File

@ -10,10 +10,10 @@ import re
from datetime import datetime from datetime import datetime
import requests import requests
import yt_dlp
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from home.src.download import queue # partial import from home.src.download import queue # partial import
from home.src.download.thumbnails import ThumbManager from home.src.download.thumbnails import ThumbManager
from home.src.download.yt_dlp_base import YtWrap
from home.src.es.connect import ElasticWrap, IndexPaginate from home.src.es.connect import ElasticWrap, IndexPaginate
from home.src.index.generic import YouTubeItem from home.src.index.generic import YouTubeItem
from home.src.index.playlist import YoutubePlaylist from home.src.index.playlist import YoutubePlaylist
@ -314,12 +314,8 @@ class YoutubeChannel(YouTubeItem):
f"https://www.youtube.com/channel/{self.youtube_id}" f"https://www.youtube.com/channel/{self.youtube_id}"
+ "/playlists?view=1&sort=dd&shelf_id=0" + "/playlists?view=1&sort=dd&shelf_id=0"
) )
obs = { obs = {"skip_download": True, "extract_flat": True}
"quiet": True, playlists = YtWrap(obs, self.config).extract(url)
"skip_download": True,
"extract_flat": True,
}
playlists = yt_dlp.YoutubeDL(obs).extract_info(url)
all_entries = [(i["id"], i["title"]) for i in playlists["entries"]] all_entries = [(i["id"], i["title"]) for i in playlists["entries"]]
self.all_playlists = all_entries self.all_playlists = all_entries

View File

@ -12,7 +12,6 @@ import shutil
import subprocess import subprocess
from home.src.download.queue import PendingList from home.src.download.queue import PendingList
from home.src.download.yt_cookie import CookieHandler
from home.src.download.yt_dlp_handler import VideoDownloader from home.src.download.yt_dlp_handler import VideoDownloader
from home.src.es.connect import ElasticWrap from home.src.es.connect import ElasticWrap
from home.src.index.reindex import Reindex from home.src.index.reindex import Reindex
@ -309,12 +308,6 @@ def scan_filesystem():
def reindex_old_documents(): def reindex_old_documents():
"""daily refresh of old documents""" """daily refresh of old documents"""
handler = Reindex() handler = Reindex()
if handler.config["downloads"]["cookie_import"]: handler.check_outdated()
CookieHandler().use() handler.reindex()
try: RedisArchivist().set_message("last_reindex", handler.now, expire=False)
handler.check_outdated()
handler.reindex()
RedisArchivist().set_message("last_reindex", handler.now, expire=False)
finally:
if handler.config["downloads"]["cookie_import"]:
CookieHandler().hide()

View File

@ -5,8 +5,7 @@ functionality:
import math import math
import yt_dlp from home.src.download.yt_dlp_base import YtWrap
from home.src.download.yt_cookie import CookieHandler
from home.src.es.connect import ElasticWrap from home.src.es.connect import ElasticWrap
from home.src.ta.config import AppConfig from home.src.ta.config import AppConfig
from home.src.ta.ta_redis import RedisArchivist from home.src.ta.ta_redis import RedisArchivist
@ -19,42 +18,22 @@ class YouTubeItem:
index_name = False index_name = False
yt_base = False yt_base = False
yt_obs = { yt_obs = {
"quiet": True,
"default_search": "ytsearch",
"skip_download": True, "skip_download": True,
"check_formats": "selected",
"socket_timeout": 3,
"noplaylist": True, "noplaylist": True,
} }
def __init__(self, youtube_id): def __init__(self, youtube_id):
self.youtube_id = youtube_id self.youtube_id = youtube_id
self.config = False
self.app_conf = False
self.youtube_meta = False
self.json_data = False
self._get_conf()
def _get_conf(self):
"""read user conf"""
self.config = AppConfig().config self.config = AppConfig().config
self.app_conf = self.config["application"] self.app_conf = self.config["application"]
if self.config["downloads"]["cookie_import"]: self.youtube_meta = False
cookie_path = CookieHandler().use() self.json_data = False
self.yt_obs.update({"cookiefile": cookie_path})
def get_from_youtube(self): def get_from_youtube(self):
"""use yt-dlp to get meta data from youtube""" """use yt-dlp to get meta data from youtube"""
print(f"{self.youtube_id}: get metadata from youtube") print(f"{self.youtube_id}: get metadata from youtube")
try: url = self.yt_base + self.youtube_id
yt_item = yt_dlp.YoutubeDL(self.yt_obs) response = YtWrap(self.yt_obs, self.config).extract(url)
response = yt_item.extract_info(self.yt_base + self.youtube_id)
except (
yt_dlp.utils.ExtractorError,
yt_dlp.utils.DownloadError,
):
print(f"{self.youtube_id}: failed to get info from youtube")
response = False
self.youtube_meta = response self.youtube_meta = response

View File

@ -18,12 +18,7 @@ class YoutubePlaylist(YouTubeItem):
es_path = False es_path = False
index_name = "ta_playlist" index_name = "ta_playlist"
yt_obs = { yt_obs = {"extract_flat": True}
"default_search": "ytsearch",
"quiet": True,
"skip_download": True,
"extract_flat": True,
}
yt_base = "https://www.youtube.com/playlist?list=" yt_base = "https://www.youtube.com/playlist?list="
def __init__(self, youtube_id): def __init__(self, youtube_id):

View File

@ -11,7 +11,7 @@ import unicodedata
from datetime import datetime from datetime import datetime
from urllib.parse import parse_qs, urlparse from urllib.parse import parse_qs, urlparse
import yt_dlp from home.src.download.yt_dlp_base import YtWrap
def clean_string(file_name): def clean_string(file_name):
@ -184,14 +184,12 @@ class UrlListParser:
@staticmethod @staticmethod
def extract_channel_name(url): def extract_channel_name(url):
"""find channel id from channel name with yt-dlp help""" """find channel id from channel name with yt-dlp help"""
obs = { obs_request = {
"default_search": "ytsearch",
"quiet": True,
"skip_download": True, "skip_download": True,
"extract_flat": True, "extract_flat": True,
"playlistend": 0, "playlistend": 0,
} }
url_info = yt_dlp.YoutubeDL(obs).extract_info(url, download=False) url_info = YtWrap(obs_request).extract(url)
try: try:
channel_id = url_info["channel_id"] channel_id = url_info["channel_id"]
except KeyError as error: except KeyError as error:

View File

@ -8,7 +8,6 @@ import json
import os import os
import redis import redis
from home.src.ta.helper import ignore_filelist
class RedisBase: class RedisBase:
@ -95,27 +94,6 @@ class RedisArchivist(RedisBase):
return all_messages return all_messages
@staticmethod
def monitor_cache_dir(cache_dir):
"""
look at download cache dir directly as alternative progress info
"""
dl_cache = os.path.join(cache_dir, "download")
all_cache_file = os.listdir(dl_cache)
cache_file = ignore_filelist(all_cache_file)
if cache_file:
filename = cache_file[0][12:].replace("_", " ").split(".")[0]
mess_dict = {
"status": "message:download",
"level": "info",
"title": "Downloading: " + filename,
"message": "",
}
else:
return False
return mess_dict
class RedisQueue(RedisBase): class RedisQueue(RedisBase):
"""dynamically interact with the download queue in redis""" """dynamically interact with the download queue in redis"""

View File

@ -14,7 +14,7 @@ from django.contrib.auth.forms import AuthenticationForm
from django.http import JsonResponse from django.http import JsonResponse
from django.shortcuts import redirect, render from django.shortcuts import redirect, render
from django.views import View from django.views import View
from home.src.download.yt_cookie import CookieHandler from home.src.download.yt_dlp_base import CookieHandler
from home.src.es.index_setup import get_available_backups from home.src.es.index_setup import get_available_backups
from home.src.frontend.api_calls import PostData from home.src.frontend.api_calls import PostData
from home.src.frontend.forms import ( from home.src.frontend.forms import (
@ -795,18 +795,19 @@ class SettingsView(View):
def post(self, request): def post(self, request):
"""handle form post to update settings""" """handle form post to update settings"""
user_form = UserSettingsForm(request.POST) user_form = UserSettingsForm(request.POST)
config_handler = AppConfig()
if user_form.is_valid(): if user_form.is_valid():
user_form_post = user_form.cleaned_data user_form_post = user_form.cleaned_data
if any(user_form_post.values()): if any(user_form_post.values()):
AppConfig().set_user_config(user_form_post, request.user.id) config_handler.set_user_config(user_form_post, request.user.id)
app_form = ApplicationSettingsForm(request.POST) app_form = ApplicationSettingsForm(request.POST)
if app_form.is_valid(): if app_form.is_valid():
app_form_post = app_form.cleaned_data app_form_post = app_form.cleaned_data
if app_form_post: if app_form_post:
print(app_form_post) print(app_form_post)
updated = AppConfig().update_config(app_form_post) updated = config_handler.update_config(app_form_post)
self.post_process_updated(updated) self.post_process_updated(updated, config_handler.config)
scheduler_form = SchedulerSettingsForm(request.POST) scheduler_form = SchedulerSettingsForm(request.POST)
if scheduler_form.is_valid(): if scheduler_form.is_valid():
@ -819,7 +820,7 @@ class SettingsView(View):
return redirect("settings", permanent=True) return redirect("settings", permanent=True)
@staticmethod @staticmethod
def post_process_updated(updated): def post_process_updated(updated, config):
"""apply changes for config""" """apply changes for config"""
if not updated: if not updated:
return return
@ -827,9 +828,9 @@ class SettingsView(View):
for config_value, updated_value in updated: for config_value, updated_value in updated:
if config_value == "cookie_import": if config_value == "cookie_import":
if updated_value: if updated_value:
CookieHandler().import_cookie() CookieHandler(config).import_cookie()
else: else:
CookieHandler().revoke() CookieHandler(config).revoke()
def progress(request): def progress(request):