From 4eba36f7a03d076b1688b051c21733b9cbd50d1d Mon Sep 17 00:00:00 2001 From: Nathan DeTar Date: Wed, 4 May 2022 03:07:01 -0700 Subject: [PATCH 1/7] Cookie Validation Button (#242) * Added cookie validate button & message placeholder * Added handleCookieValidate(), postCookieValidate() * show validation only if enbled, add processing message for immediate feedback Co-authored-by: simon --- .../home/templates/home/settings.html | 7 ++++++- tubearchivist/static/script.js | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/tubearchivist/home/templates/home/settings.html b/tubearchivist/home/templates/home/settings.html index 522e488..7475171 100644 --- a/tubearchivist/home/templates/home/settings.html +++ b/tubearchivist/home/templates/home/settings.html @@ -119,7 +119,12 @@

Import YouTube cookie: {{ config.downloads.cookie_import }}

Place your cookie file named cookies.google.txt in /cache/import before enabling.
- {{ app_form.downloads_cookie_import }} + {{ app_form.downloads_cookie_import }}
+ {% if config.downloads.cookie_import %} +
+ +
+ {% endif %}
diff --git a/tubearchivist/static/script.js b/tubearchivist/static/script.js index 69caac7..e20d31c 100644 --- a/tubearchivist/static/script.js +++ b/tubearchivist/static/script.js @@ -699,6 +699,24 @@ function postSponsorSegmentVote(videoId, uuid, vote) { apiRequest(apiEndpoint, "POST", data); } +function handleCookieValidate() { + document.getElementById("cookieButton").remove(); + var cookieMessageElement = document.getElementById("cookieMessage"); + cookieMessageElement.innerHTML = `Processing.`; + response = postCookieValidate(); + if (response.cookie_validated == true) { + cookieMessageElement.innerHTML = `The cookie file is valid.`; + } else { + cookieMessageElement.innerHTML = `Warning, the cookie file is invalid.`; + } +} + +// Check youtube cookie settings +function postCookieValidate() { + var apiEndpoint = "/api/cookie/"; + return apiRequest(apiEndpoint, "POST"); +} + // Makes api requests when passed an endpoint and method ("GET", "POST", "DELETE") function apiRequest(apiEndpoint, method, data) { const xhttp = new XMLHttpRequest(); From 7fa0d0f33149b048a347dfa28712a65c543239ce Mon Sep 17 00:00:00 2001 From: simon Date: Thu, 5 May 2022 09:11:37 +0700 Subject: [PATCH 2/7] untangle delete subtitles from delete media file, #195 --- tubearchivist/home/src/index/video.py | 46 ++++++++++++++++----------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/tubearchivist/home/src/index/video.py b/tubearchivist/home/src/index/video.py index 290e0ce..67f2b8f 100644 --- a/tubearchivist/home/src/index/video.py +++ b/tubearchivist/home/src/index/video.py @@ -151,6 +151,23 @@ class YoutubeSubtitle: """send subtitle to es for indexing""" _, _ = ElasticWrap("_bulk").post(data=query_str, ndjson=True) + def delete(self): + """delete subtitles from index and filesystem""" + youtube_id = self.video.youtube_id + # delete files + videos_base = self.video.config["application"]["videos"] + files = [i["media_url"] for i in self.video.json_data["subtitles"]] + for file_name in files: + file_path = os.path.join(videos_base, file_name) + try: + os.remove(file_path) + except FileNotFoundError: + print(f"{youtube_id}: {file_path} failed to delete") + # delete from index + path = "ta_subtitle/_delete_by_query?refresh=true" + data = {"query": {"term": {"youtube_id": {"value": youtube_id}}}} + _, _ = ElasticWrap(path).post(data=data) + class SubtitleParser: """parse subtitle str from youtube""" @@ -542,22 +559,21 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle): """delete video file, meta data""" self.get_from_es() video_base = self.app_conf["videos"] - to_del = [self.json_data.get("media_url")] - - all_subtitles = self.json_data.get("subtitles") - if all_subtitles: - to_del = to_del + [i.get("media_url") for i in all_subtitles] - - for media_url in to_del: - file_path = os.path.join(video_base, media_url) - try: - os.remove(file_path) - except FileNotFoundError: - print(f"{self.youtube_id}: failed {media_url}, continue.") + media_url = self.json_data.get("media_url") + file_path = os.path.join(video_base, media_url) + try: + os.remove(file_path) + except FileNotFoundError: + print(f"{self.youtube_id}: failed {media_url}, continue.") self.del_in_es() self.delete_subtitles() + def delete_subtitles(self): + """delete indexed subtitles""" + print(f"{self.youtube_id}: delete subtitles") + YoutubeSubtitle(self).delete() + def _get_ryd_stats(self): """get optional stats from returnyoutubedislikeapi.com""" try: @@ -592,12 +608,6 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle): self.json_data["subtitles"] = subtitles handler.download_subtitles(relevant_subtitles=subtitles) - def delete_subtitles(self): - """delete indexed subtitles""" - path = "ta_subtitle/_delete_by_query?refresh=true" - data = {"query": {"term": {"youtube_id": {"value": self.youtube_id}}}} - _, _ = ElasticWrap(path).post(data=data) - def index_new_video(youtube_id, video_overwrites=False): """combined classes to create new video in index""" From 7a513689558b74eed87ee96cf199890bd16daef9 Mon Sep 17 00:00:00 2001 From: simon Date: Thu, 5 May 2022 22:56:22 +0700 Subject: [PATCH 3/7] fix skip deleting none existing subtitles, add update_media_url --- tubearchivist/home/src/index/video.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tubearchivist/home/src/index/video.py b/tubearchivist/home/src/index/video.py index 67f2b8f..7b951d8 100644 --- a/tubearchivist/home/src/index/video.py +++ b/tubearchivist/home/src/index/video.py @@ -156,6 +156,9 @@ class YoutubeSubtitle: youtube_id = self.video.youtube_id # delete files videos_base = self.video.config["application"]["videos"] + if not self.video.json_data.get("subtitles"): + return + files = [i["media_url"] for i in self.video.json_data["subtitles"]] for file_name in files: file_path = os.path.join(videos_base, file_name) @@ -608,6 +611,12 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle): self.json_data["subtitles"] = subtitles handler.download_subtitles(relevant_subtitles=subtitles) + def update_media_url(self): + """update only media_url in es for reindex channel rename""" + data = {"doc": {"media_url": self.json_data["media_url"]}} + path = f"{self.index_name}/_update/{self.youtube_id}" + _, _ = ElasticWrap(path).post(data=data) + def index_new_video(youtube_id, video_overwrites=False): """combined classes to create new video in index""" From 9a2b29c2ae0eb2ecf456fdf7f2503a08286557af Mon Sep 17 00:00:00 2001 From: simon Date: Thu, 5 May 2022 22:59:56 +0700 Subject: [PATCH 4/7] implement channle name change in reindex, #211 --- tubearchivist/home/src/index/reindex.py | 69 ++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/tubearchivist/home/src/index/reindex.py b/tubearchivist/home/src/index/reindex.py index ed29e89..5fbdeee 100644 --- a/tubearchivist/home/src/index/reindex.py +++ b/tubearchivist/home/src/index/reindex.py @@ -4,12 +4,15 @@ functionality: - index and update in es """ +import os +import shutil from datetime import datetime from math import ceil from time import sleep from home.src.download.queue import PendingList from home.src.download.thumbnails import ThumbManager +from home.src.download.yt_dlp_handler import VideoDownloader from home.src.es.connect import ElasticWrap from home.src.index.channel import YoutubeChannel from home.src.index.playlist import YoutubePlaylist @@ -211,7 +214,12 @@ class Reindex: # videos print(f"reindexing {len(self.all_youtube_ids)} videos") for youtube_id in self.all_youtube_ids: - self._reindex_single_video(youtube_id) + try: + self._reindex_single_video(youtube_id) + except FileNotFoundError: + # handle channel name change here + ChannelUrlFixer(youtube_id, self.config).run() + self._reindex_single_video(youtube_id) if sleep_interval: sleep(sleep_interval) # channels @@ -231,3 +239,62 @@ class Reindex: self._reindex_single_playlist(playlist_id, all_indexed_ids) if sleep_interval: sleep(sleep_interval) + + +class ChannelUrlFixer: + """fix not matching channel names in reindex""" + + def __init__(self, youtube_id, config): + self.youtube_id = youtube_id + self.config = config + self.video = False + + def run(self): + """check and run if needed""" + print(f"{self.youtube_id}: failed to build channel path, try to fix.") + video_path_is, video_folder_is = self.get_as_is() + if not os.path.exists(video_path_is): + print(f"giving up reindex, video in video: {self.video.json_data}") + raise ValueError + + _, video_folder_should = self.get_as_should() + + if video_folder_is != video_folder_should: + self.process(video_path_is) + else: + print(f"{self.youtube_id}: skip channel url fixer") + + def get_as_is(self): + """get video object as is""" + self.video = YoutubeVideo(self.youtube_id) + self.video.get_from_es() + video_path_is = os.path.join( + self.config["application"]["videos"], + self.video.json_data["media_url"], + ) + video_folder_is = os.path.split(video_path_is)[0] + + return video_path_is, video_folder_is + + def get_as_should(self): + """add fresh metadata from remote""" + self.video.get_from_youtube() + self.video.add_file_path() + + video_path_should = os.path.join( + self.config["application"]["videos"], + self.video.json_data["media_url"], + ) + video_folder_should = os.path.split(video_path_should)[0] + return video_path_should, video_folder_should + + def process(self, video_path_is): + """fix filepath""" + print(f"{self.youtube_id}: fixing channel rename.") + cache_dir = self.config["application"]["cache_dir"] + new_file_path = os.path.join( + cache_dir, "download", self.youtube_id + ".mp4" + ) + shutil.move(video_path_is, new_file_path) + VideoDownloader().move_to_archive(self.video.json_data) + self.video.update_media_url() From 94fa191fb85e1ee62f28d523f64862a7ecccc714 Mon Sep 17 00:00:00 2001 From: simon Date: Thu, 5 May 2022 23:38:10 +0700 Subject: [PATCH 5/7] skip premium videos, clean exit for empty bulk_list, #237 --- tubearchivist/home/src/download/queue.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tubearchivist/home/src/download/queue.py b/tubearchivist/home/src/download/queue.py index 6be7f82..8b4322e 100644 --- a/tubearchivist/home/src/download/queue.py +++ b/tubearchivist/home/src/download/queue.py @@ -219,10 +219,11 @@ class PendingList(PendingIndex): thumb_handler.download_vid(thumb_needed) self._notify_add(idx) - # add last newline - bulk_list.append("\n") - query_str = "\n".join(bulk_list) - _, _ = ElasticWrap("_bulk").post(query_str, ndjson=True) + if bulk_list: + # add last newline + bulk_list.append("\n") + query_str = "\n".join(bulk_list) + _, _ = ElasticWrap("_bulk").post(query_str, ndjson=True) def _notify_add(self, idx): """send notification for adding videos to download queue""" @@ -246,7 +247,11 @@ class PendingList(PendingIndex): try: vid = yt_dlp.YoutubeDL(self.yt_obs).extract_info(youtube_id) except yt_dlp.utils.DownloadError: - print("failed to extract info for: " + youtube_id) + print(f"{youtube_id}: failed to extract info") + return False + if vid.get("id") != youtube_id: + # skip premium videos with different id + print(f"{youtube_id}: skipping premium video, id not matching") return False # stop if video is streaming live now if vid["is_live"]: From 891911e56b9f67bc62e95b1de13731e8aba4e3da Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 6 May 2022 08:02:34 +0700 Subject: [PATCH 6/7] note about cookie validation --- docs/Settings.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/Settings.md b/docs/Settings.md index b048fbe..6bbf27c 100644 --- a/docs/Settings.md +++ b/docs/Settings.md @@ -36,7 +36,7 @@ Additional settings passed to yt-dlp. Importing your YouTube Cookie into Tube Archivist allows yt-dlp to bypass age restrictions, gives access to private videos and your *watch later* or *liked videos*. ### Security concerns -Cookies are used to store your session and contain your access token to your google account. Treat that file with utmost care as you would any other password or credential. *Tube Archivist* stores your cookie in Redis and will export the file container internally if it's needed. +Cookies are used to store your session and contain your access token to your google account, this file can be used to take over your account. Treat that file with utmost care as you would any other password or credential. *Tube Archivist* stores your cookie in Redis and will export the file container internally if it's needed. ### Export your cookie - Install **Cookies.txt** addon for [chrome](https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid) or [firefox](https://addons.mozilla.org/firefox/addon/cookies-txt). @@ -45,7 +45,9 @@ Cookies are used to store your session and contain your access token to your goo - Click Export to export the cookies, filename is by default *cookies.google.txt*. ### Import your cookie -Place the file *cookies.google.txt* into the */cache/import* folder of Tube Archivist and enable the cookie import. Once you click on *Update Application Configurations* to save your changes, your cookie will get imported and stored internally. +Place the file *cookies.google.txt* into the *cache/import* folder of Tube Archivist and enable the cookie import. Once you click on *Update Application Configurations* to save your changes, your cookie will get imported and stored internally. + +Once imported, a **Validate Cookie File** button will show, where you can confirm if your cookie is working or not. ### Use your cookie Once imported, additionally to the advantages above, your [Watch Later](https://www.youtube.com/playlist?list=WL) and [Liked Videos](https://www.youtube.com/playlist?list=LL) become a regular playlist you can download and subscribe to as any other [playlist](Playlists). From 160c23c36c4085cd3bd98f362e80d78fb4553754 Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 6 May 2022 08:09:19 +0700 Subject: [PATCH 7/7] conform import path style --- tubearchivist/home/templates/home/settings.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubearchivist/home/templates/home/settings.html b/tubearchivist/home/templates/home/settings.html index 7475171..c529443 100644 --- a/tubearchivist/home/templates/home/settings.html +++ b/tubearchivist/home/templates/home/settings.html @@ -118,7 +118,7 @@

Cookie

Import YouTube cookie: {{ config.downloads.cookie_import }}

- Place your cookie file named cookies.google.txt in /cache/import before enabling.
+ Place your cookie file named cookies.google.txt in cache/import before enabling.
{{ app_form.downloads_cookie_import }}
{% if config.downloads.cookie_import %}