From ca3ea20283e77f82750800f0862f37ca64e0082e Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 2 May 2022 09:48:52 +0700 Subject: [PATCH 01/10] add socket_timeout when passing check_formats to yt-dlp --- tubearchivist/home/src/download/queue.py | 1 + tubearchivist/home/src/download/yt_dlp_handler.py | 1 + tubearchivist/home/src/index/generic.py | 1 + 3 files changed, 3 insertions(+) diff --git a/tubearchivist/home/src/download/queue.py b/tubearchivist/home/src/download/queue.py index 5ec4a19..57d520b 100644 --- a/tubearchivist/home/src/download/queue.py +++ b/tubearchivist/home/src/download/queue.py @@ -128,6 +128,7 @@ class PendingList(PendingIndex): "noplaylist": True, "writethumbnail": True, "simulate": True, + "socket_timeout": 3, } def __init__(self, youtube_ids=False): diff --git a/tubearchivist/home/src/download/yt_dlp_handler.py b/tubearchivist/home/src/download/yt_dlp_handler.py index 9712f95..816e58c 100644 --- a/tubearchivist/home/src/download/yt_dlp_handler.py +++ b/tubearchivist/home/src/download/yt_dlp_handler.py @@ -281,6 +281,7 @@ class VideoDownloader: "writethumbnail": False, "noplaylist": True, "check_formats": "selected", + "socket_timeout": 3, } def _build_obs_user(self): diff --git a/tubearchivist/home/src/index/generic.py b/tubearchivist/home/src/index/generic.py index c4dcac5..f384ec7 100644 --- a/tubearchivist/home/src/index/generic.py +++ b/tubearchivist/home/src/index/generic.py @@ -23,6 +23,7 @@ class YouTubeItem: "default_search": "ytsearch", "skip_download": True, "check_formats": "selected", + "socket_timeout": 3, "noplaylist": True, } From c4107fc3708b0de7081942694ba5f6d9bc7f8e07 Mon Sep 17 00:00:00 2001 From: Nathan DeTar Date: Sun, 1 May 2022 20:41:42 -0700 Subject: [PATCH 02/10] Fix to prevent holding onto SB info. (#236) --- tubearchivist/static/script.js | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tubearchivist/static/script.js b/tubearchivist/static/script.js index 4287100..69caac7 100644 --- a/tubearchivist/static/script.js +++ b/tubearchivist/static/script.js @@ -350,7 +350,10 @@ function createPlayer(button) { `; } } + } else { + sponsorBlock = null; } + var videoProgress = getVideoProgress(videoId).position; var videoName = videoData.data.title; From 6fbac12314a5fd4e1c1e6217b1c7bf7463b041be Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 2 May 2022 11:27:50 +0700 Subject: [PATCH 03/10] fix channel_validate in PlaylistSubscription, #223 --- tubearchivist/home/src/download/subscriptions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubearchivist/home/src/download/subscriptions.py b/tubearchivist/home/src/download/subscriptions.py index a460af9..75aede2 100644 --- a/tubearchivist/home/src/download/subscriptions.py +++ b/tubearchivist/home/src/download/subscriptions.py @@ -168,7 +168,7 @@ class PlaylistSubscription: def channel_validate(channel_id): """make sure channel of playlist is there""" channel = YoutubeChannel(channel_id) - channel.build_json() + channel.build_json(upload=True) @staticmethod def change_subscribe(playlist_id, subscribe_status): From 34c18e75f5fe5e5532a7a04d1d4b897bb325c6db Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 2 May 2022 11:30:38 +0700 Subject: [PATCH 04/10] add LL and WL as valid playlists with cookie --- tubearchivist/home/src/ta/helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubearchivist/home/src/ta/helper.py b/tubearchivist/home/src/ta/helper.py index c572ccc..5731115 100644 --- a/tubearchivist/home/src/ta/helper.py +++ b/tubearchivist/home/src/ta/helper.py @@ -173,7 +173,7 @@ class UrlListParser: id_type = "video" elif str_len == 24: id_type = "channel" - elif str_len in [34, 18]: + elif str_len in [34, 18] or id_str in ["LL", "WL"]: id_type = "playlist" else: # unable to parse From 34a1fe9e8ec9b14acac5316bbff8798c5c266749 Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 2 May 2022 17:38:20 +0700 Subject: [PATCH 05/10] fix thumb re-embed task, #231 --- tubearchivist/home/src/download/thumbnails.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tubearchivist/home/src/download/thumbnails.py b/tubearchivist/home/src/download/thumbnails.py index d25f4d1..35fc100 100644 --- a/tubearchivist/home/src/download/thumbnails.py +++ b/tubearchivist/home/src/download/thumbnails.py @@ -296,6 +296,7 @@ class ThumbManager: def get_thumb_list(self): """get list of mediafiles and matching thumbnails""" pending = queue.PendingList() + pending.get_download() pending.get_indexed() video_list = [] From ca5b00a373f31dd3b5c3fc34cd2ad39d1102ef85 Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 2 May 2022 18:20:56 +0700 Subject: [PATCH 06/10] refresh for PendingInteract delete to avoid race condition, #217 --- tubearchivist/home/src/download/queue.py | 2 +- tubearchivist/home/src/es/connect.py | 4 +++- tubearchivist/home/src/index/generic.py | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tubearchivist/home/src/download/queue.py b/tubearchivist/home/src/download/queue.py index 57d520b..6be7f82 100644 --- a/tubearchivist/home/src/download/queue.py +++ b/tubearchivist/home/src/download/queue.py @@ -103,7 +103,7 @@ class PendingInteract: def delete_item(self): """delete single item from pending""" path = f"ta_download/_doc/{self.video_id}" - _, _ = ElasticWrap(path).delete() + _, _ = ElasticWrap(path).delete(refresh=True) def delete_by_status(self): """delete all matching item by status""" diff --git a/tubearchivist/home/src/es/connect.py b/tubearchivist/home/src/es/connect.py index f976943..e61ae14 100644 --- a/tubearchivist/home/src/es/connect.py +++ b/tubearchivist/home/src/es/connect.py @@ -75,8 +75,10 @@ class ElasticWrap: return response.json(), response.status_code - def delete(self, data=False): + def delete(self, data=False, refresh=False): """delete document from es""" + if refresh: + self.url = f"{self.url}/?refresh=true" if data: response = requests.delete(self.url, json=data, auth=self.auth) else: diff --git a/tubearchivist/home/src/index/generic.py b/tubearchivist/home/src/index/generic.py index f384ec7..0940258 100644 --- a/tubearchivist/home/src/index/generic.py +++ b/tubearchivist/home/src/index/generic.py @@ -86,7 +86,7 @@ class YouTubeItem: def del_in_es(self): """delete item from elastic search""" print(f"{self.youtube_id}: delete from es") - _, _ = ElasticWrap(self.es_path).delete() + _, _ = ElasticWrap(self.es_path).delete(refresh=True) class Pagination: From 8a4c50779acfdd511c740457ea966508919ca5c7 Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 2 May 2022 18:39:54 +0700 Subject: [PATCH 07/10] standardize prints for PostData mapper --- tubearchivist/home/src/frontend/api_calls.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tubearchivist/home/src/frontend/api_calls.py b/tubearchivist/home/src/frontend/api_calls.py index 26b1730..a734996 100644 --- a/tubearchivist/home/src/frontend/api_calls.py +++ b/tubearchivist/home/src/frontend/api_calls.py @@ -111,7 +111,7 @@ class PostData: def _ignore(self): """ignore from download queue""" video_id = self.exec_val - print(f"ignore video {video_id}") + print(f"{video_id}: ignore video from download queue") PendingInteract(video_id=video_id, status="ignore").update_status() # also clear from redis queue RedisQueue().clear_item(video_id) @@ -123,7 +123,7 @@ class PostData: print("download pending") running = download_pending.delay() task_id = running.id - print("set task id: " + task_id) + print(f"{task_id}: set task id") RedisArchivist().set_message("dl_queue_id", task_id, expire=False) return {"success": True} @@ -146,7 +146,7 @@ class PostData: def _unsubscribe(self): """unsubscribe from channels or playlists""" id_unsub = self.exec_val - print("unsubscribe from " + id_unsub) + print(f"{id_unsub}: unsubscribe") to_unsub_list = UrlListParser(id_unsub).process_list() for to_unsub in to_unsub_list: unsub_type = to_unsub["type"] @@ -167,7 +167,7 @@ class PostData: def _subscribe(self): """subscribe to channel or playlist, called from js buttons""" id_sub = self.exec_val - print("subscribe to " + id_sub) + print(f"{id_sub}: subscribe") subscribe_to.delay(id_sub) return {"success": True} @@ -203,7 +203,7 @@ class PostData: def _dlnow(self): """start downloading single vid now""" youtube_id = self.exec_val - print("downloading: " + youtube_id) + print(f"{youtube_id}: downloading now") running = download_single.delay(youtube_id=youtube_id) task_id = running.id print("set task id: " + task_id) @@ -222,14 +222,14 @@ class PostData: def _forget_ignore(self): """delete from ta_download index""" video_id = self.exec_val - print(f"forgetting from download index: {video_id}") + print(f"{video_id}: forget from download") PendingInteract(video_id=video_id).delete_item() return {"success": True} def _add_single(self): """add single youtube_id to download queue""" video_id = self.exec_val - print(f"add vid to dl queue: {video_id}") + print(f"{video_id}: add single vid to download queue") PendingInteract(video_id=video_id).delete_item() video_ids = UrlListParser(video_id).process_list() extrac_dl.delay(video_ids) From 83beb53860c4c028d5a530c867440f7634e08fe3 Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 2 May 2022 21:38:03 +0700 Subject: [PATCH 08/10] simplify _add_single to call update_status in PendingInteract directly --- tubearchivist/home/src/frontend/api_calls.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tubearchivist/home/src/frontend/api_calls.py b/tubearchivist/home/src/frontend/api_calls.py index a734996..8b0d9b3 100644 --- a/tubearchivist/home/src/frontend/api_calls.py +++ b/tubearchivist/home/src/frontend/api_calls.py @@ -19,7 +19,6 @@ from home.src.ta.ta_redis import RedisArchivist, RedisQueue from home.tasks import ( download_pending, download_single, - extrac_dl, index_channel_playlists, kill_dl, re_sync_thumbs, @@ -230,9 +229,7 @@ class PostData: """add single youtube_id to download queue""" video_id = self.exec_val print(f"{video_id}: add single vid to download queue") - PendingInteract(video_id=video_id).delete_item() - video_ids = UrlListParser(video_id).process_list() - extrac_dl.delay(video_ids) + PendingInteract(video_id=video_id, status="pending").update_status() return {"success": True} def _delete_queue(self): From 1613b71942ecd5f2c6959b053d6dade24b597481 Mon Sep 17 00:00:00 2001 From: simon Date: Tue, 3 May 2022 17:32:32 +0700 Subject: [PATCH 09/10] hide cookie after rescan and download process --- tubearchivist/home/src/download/yt_dlp_handler.py | 6 ++++++ tubearchivist/home/src/index/filesystem.py | 13 ++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/tubearchivist/home/src/download/yt_dlp_handler.py b/tubearchivist/home/src/download/yt_dlp_handler.py index 816e58c..fd91432 100644 --- a/tubearchivist/home/src/download/yt_dlp_handler.py +++ b/tubearchivist/home/src/download/yt_dlp_handler.py @@ -41,6 +41,7 @@ class DownloadPostProcess: self.auto_delete_all() self.auto_delete_overwrites() self.validate_playlists() + self.clear_cookie() def auto_delete_all(self): """handle auto delete""" @@ -140,6 +141,11 @@ class DownloadPostProcess: else: RedisArchivist().set_message("message:download", mess_dict) + def clear_cookie(self): + """hide cookie file""" + if self.download.config["downloads"]["cookie_import"]: + CookieHandler().hide() + class VideoDownloader: """ diff --git a/tubearchivist/home/src/index/filesystem.py b/tubearchivist/home/src/index/filesystem.py index c6f1caa..93a1bc4 100644 --- a/tubearchivist/home/src/index/filesystem.py +++ b/tubearchivist/home/src/index/filesystem.py @@ -12,6 +12,7 @@ import shutil import subprocess from home.src.download.queue import PendingList +from home.src.download.yt_cookie import CookieHandler from home.src.download.yt_dlp_handler import VideoDownloader from home.src.es.connect import ElasticWrap from home.src.index.reindex import Reindex @@ -308,6 +309,12 @@ def scan_filesystem(): def reindex_old_documents(): """daily refresh of old documents""" handler = Reindex() - handler.check_outdated() - handler.reindex() - RedisArchivist().set_message("last_reindex", handler.now, expire=False) + if handler.config["downloads"]["cookie_import"]: + CookieHandler().use() + try: + handler.check_outdated() + handler.reindex() + RedisArchivist().set_message("last_reindex", handler.now, expire=False) + finally: + if handler.config["downloads"]["cookie_import"]: + CookieHandler().hide() From fcac10a6ced1d236c701d4331981897dc335d813 Mon Sep 17 00:00:00 2001 From: simon Date: Tue, 3 May 2022 18:17:36 +0700 Subject: [PATCH 10/10] add cookie documentation --- docs/Settings.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/docs/Settings.md b/docs/Settings.md index 50d1d4c..b048fbe 100644 --- a/docs/Settings.md +++ b/docs/Settings.md @@ -32,6 +32,27 @@ Additional settings passed to yt-dlp. - **Source Settings**: User created subtitles are provided from the uploader and are usually the video script. Auto generated is from YouTube, quality varies, particularly for auto translated tracks. - **Index Settings**: Enabling subtitle indexing will add the lines to Elasticsearch and will make subtitles searchable. This will increase the index size and is not recommended on low-end hardware. +## Cookie +Importing your YouTube Cookie into Tube Archivist allows yt-dlp to bypass age restrictions, gives access to private videos and your *watch later* or *liked videos*. + +### Security concerns +Cookies are used to store your session and contain your access token to your google account. Treat that file with utmost care as you would any other password or credential. *Tube Archivist* stores your cookie in Redis and will export the file container internally if it's needed. + +### Export your cookie +- Install **Cookies.txt** addon for [chrome](https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid) or [firefox](https://addons.mozilla.org/firefox/addon/cookies-txt). +- Visit YouTube and login with whichever YouTube account you wish to use to generate the cookies. +- Click on the extension icon in the toolbar - it will drop down showing the active cookies for YT. +- Click Export to export the cookies, filename is by default *cookies.google.txt*. + +### Import your cookie +Place the file *cookies.google.txt* into the */cache/import* folder of Tube Archivist and enable the cookie import. Once you click on *Update Application Configurations* to save your changes, your cookie will get imported and stored internally. + +### Use your cookie +Once imported, additionally to the advantages above, your [Watch Later](https://www.youtube.com/playlist?list=WL) and [Liked Videos](https://www.youtube.com/playlist?list=LL) become a regular playlist you can download and subscribe to as any other [playlist](Playlists). + +### Limitation +There is only one cookie per Tube Archivist instance, this will be shared between all users. + ## Integrations All third party integrations of TubeArchivist will **always** be *opt in*. - **API**: Your access token for the Tube Archivist API.