diff --git a/docs/Settings.md b/docs/Settings.md index b048fbe..6bbf27c 100644 --- a/docs/Settings.md +++ b/docs/Settings.md @@ -36,7 +36,7 @@ Additional settings passed to yt-dlp. Importing your YouTube Cookie into Tube Archivist allows yt-dlp to bypass age restrictions, gives access to private videos and your *watch later* or *liked videos*. ### Security concerns -Cookies are used to store your session and contain your access token to your google account. Treat that file with utmost care as you would any other password or credential. *Tube Archivist* stores your cookie in Redis and will export the file container internally if it's needed. +Cookies are used to store your session and contain your access token to your google account, this file can be used to take over your account. Treat that file with utmost care as you would any other password or credential. *Tube Archivist* stores your cookie in Redis and will export the file container internally if it's needed. ### Export your cookie - Install **Cookies.txt** addon for [chrome](https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid) or [firefox](https://addons.mozilla.org/firefox/addon/cookies-txt). @@ -45,7 +45,9 @@ Cookies are used to store your session and contain your access token to your goo - Click Export to export the cookies, filename is by default *cookies.google.txt*. ### Import your cookie -Place the file *cookies.google.txt* into the */cache/import* folder of Tube Archivist and enable the cookie import. Once you click on *Update Application Configurations* to save your changes, your cookie will get imported and stored internally. +Place the file *cookies.google.txt* into the *cache/import* folder of Tube Archivist and enable the cookie import. Once you click on *Update Application Configurations* to save your changes, your cookie will get imported and stored internally. + +Once imported, a **Validate Cookie File** button will show, where you can confirm if your cookie is working or not. ### Use your cookie Once imported, additionally to the advantages above, your [Watch Later](https://www.youtube.com/playlist?list=WL) and [Liked Videos](https://www.youtube.com/playlist?list=LL) become a regular playlist you can download and subscribe to as any other [playlist](Playlists). diff --git a/tubearchivist/home/src/download/queue.py b/tubearchivist/home/src/download/queue.py index 6be7f82..8b4322e 100644 --- a/tubearchivist/home/src/download/queue.py +++ b/tubearchivist/home/src/download/queue.py @@ -219,10 +219,11 @@ class PendingList(PendingIndex): thumb_handler.download_vid(thumb_needed) self._notify_add(idx) - # add last newline - bulk_list.append("\n") - query_str = "\n".join(bulk_list) - _, _ = ElasticWrap("_bulk").post(query_str, ndjson=True) + if bulk_list: + # add last newline + bulk_list.append("\n") + query_str = "\n".join(bulk_list) + _, _ = ElasticWrap("_bulk").post(query_str, ndjson=True) def _notify_add(self, idx): """send notification for adding videos to download queue""" @@ -246,7 +247,11 @@ class PendingList(PendingIndex): try: vid = yt_dlp.YoutubeDL(self.yt_obs).extract_info(youtube_id) except yt_dlp.utils.DownloadError: - print("failed to extract info for: " + youtube_id) + print(f"{youtube_id}: failed to extract info") + return False + if vid.get("id") != youtube_id: + # skip premium videos with different id + print(f"{youtube_id}: skipping premium video, id not matching") return False # stop if video is streaming live now if vid["is_live"]: diff --git a/tubearchivist/home/src/index/reindex.py b/tubearchivist/home/src/index/reindex.py index ed29e89..5fbdeee 100644 --- a/tubearchivist/home/src/index/reindex.py +++ b/tubearchivist/home/src/index/reindex.py @@ -4,12 +4,15 @@ functionality: - index and update in es """ +import os +import shutil from datetime import datetime from math import ceil from time import sleep from home.src.download.queue import PendingList from home.src.download.thumbnails import ThumbManager +from home.src.download.yt_dlp_handler import VideoDownloader from home.src.es.connect import ElasticWrap from home.src.index.channel import YoutubeChannel from home.src.index.playlist import YoutubePlaylist @@ -211,7 +214,12 @@ class Reindex: # videos print(f"reindexing {len(self.all_youtube_ids)} videos") for youtube_id in self.all_youtube_ids: - self._reindex_single_video(youtube_id) + try: + self._reindex_single_video(youtube_id) + except FileNotFoundError: + # handle channel name change here + ChannelUrlFixer(youtube_id, self.config).run() + self._reindex_single_video(youtube_id) if sleep_interval: sleep(sleep_interval) # channels @@ -231,3 +239,62 @@ class Reindex: self._reindex_single_playlist(playlist_id, all_indexed_ids) if sleep_interval: sleep(sleep_interval) + + +class ChannelUrlFixer: + """fix not matching channel names in reindex""" + + def __init__(self, youtube_id, config): + self.youtube_id = youtube_id + self.config = config + self.video = False + + def run(self): + """check and run if needed""" + print(f"{self.youtube_id}: failed to build channel path, try to fix.") + video_path_is, video_folder_is = self.get_as_is() + if not os.path.exists(video_path_is): + print(f"giving up reindex, video in video: {self.video.json_data}") + raise ValueError + + _, video_folder_should = self.get_as_should() + + if video_folder_is != video_folder_should: + self.process(video_path_is) + else: + print(f"{self.youtube_id}: skip channel url fixer") + + def get_as_is(self): + """get video object as is""" + self.video = YoutubeVideo(self.youtube_id) + self.video.get_from_es() + video_path_is = os.path.join( + self.config["application"]["videos"], + self.video.json_data["media_url"], + ) + video_folder_is = os.path.split(video_path_is)[0] + + return video_path_is, video_folder_is + + def get_as_should(self): + """add fresh metadata from remote""" + self.video.get_from_youtube() + self.video.add_file_path() + + video_path_should = os.path.join( + self.config["application"]["videos"], + self.video.json_data["media_url"], + ) + video_folder_should = os.path.split(video_path_should)[0] + return video_path_should, video_folder_should + + def process(self, video_path_is): + """fix filepath""" + print(f"{self.youtube_id}: fixing channel rename.") + cache_dir = self.config["application"]["cache_dir"] + new_file_path = os.path.join( + cache_dir, "download", self.youtube_id + ".mp4" + ) + shutil.move(video_path_is, new_file_path) + VideoDownloader().move_to_archive(self.video.json_data) + self.video.update_media_url() diff --git a/tubearchivist/home/src/index/video.py b/tubearchivist/home/src/index/video.py index 290e0ce..7b951d8 100644 --- a/tubearchivist/home/src/index/video.py +++ b/tubearchivist/home/src/index/video.py @@ -151,6 +151,26 @@ class YoutubeSubtitle: """send subtitle to es for indexing""" _, _ = ElasticWrap("_bulk").post(data=query_str, ndjson=True) + def delete(self): + """delete subtitles from index and filesystem""" + youtube_id = self.video.youtube_id + # delete files + videos_base = self.video.config["application"]["videos"] + if not self.video.json_data.get("subtitles"): + return + + files = [i["media_url"] for i in self.video.json_data["subtitles"]] + for file_name in files: + file_path = os.path.join(videos_base, file_name) + try: + os.remove(file_path) + except FileNotFoundError: + print(f"{youtube_id}: {file_path} failed to delete") + # delete from index + path = "ta_subtitle/_delete_by_query?refresh=true" + data = {"query": {"term": {"youtube_id": {"value": youtube_id}}}} + _, _ = ElasticWrap(path).post(data=data) + class SubtitleParser: """parse subtitle str from youtube""" @@ -542,22 +562,21 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle): """delete video file, meta data""" self.get_from_es() video_base = self.app_conf["videos"] - to_del = [self.json_data.get("media_url")] - - all_subtitles = self.json_data.get("subtitles") - if all_subtitles: - to_del = to_del + [i.get("media_url") for i in all_subtitles] - - for media_url in to_del: - file_path = os.path.join(video_base, media_url) - try: - os.remove(file_path) - except FileNotFoundError: - print(f"{self.youtube_id}: failed {media_url}, continue.") + media_url = self.json_data.get("media_url") + file_path = os.path.join(video_base, media_url) + try: + os.remove(file_path) + except FileNotFoundError: + print(f"{self.youtube_id}: failed {media_url}, continue.") self.del_in_es() self.delete_subtitles() + def delete_subtitles(self): + """delete indexed subtitles""" + print(f"{self.youtube_id}: delete subtitles") + YoutubeSubtitle(self).delete() + def _get_ryd_stats(self): """get optional stats from returnyoutubedislikeapi.com""" try: @@ -592,10 +611,10 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle): self.json_data["subtitles"] = subtitles handler.download_subtitles(relevant_subtitles=subtitles) - def delete_subtitles(self): - """delete indexed subtitles""" - path = "ta_subtitle/_delete_by_query?refresh=true" - data = {"query": {"term": {"youtube_id": {"value": self.youtube_id}}}} + def update_media_url(self): + """update only media_url in es for reindex channel rename""" + data = {"doc": {"media_url": self.json_data["media_url"]}} + path = f"{self.index_name}/_update/{self.youtube_id}" _, _ = ElasticWrap(path).post(data=data) diff --git a/tubearchivist/home/templates/home/settings.html b/tubearchivist/home/templates/home/settings.html index 522e488..c529443 100644 --- a/tubearchivist/home/templates/home/settings.html +++ b/tubearchivist/home/templates/home/settings.html @@ -118,8 +118,13 @@

Cookie

Import YouTube cookie: {{ config.downloads.cookie_import }}

- Place your cookie file named cookies.google.txt in /cache/import before enabling.
- {{ app_form.downloads_cookie_import }} + Place your cookie file named cookies.google.txt in cache/import before enabling.
+ {{ app_form.downloads_cookie_import }}
+ {% if config.downloads.cookie_import %} +
+ +
+ {% endif %}
diff --git a/tubearchivist/static/script.js b/tubearchivist/static/script.js index 69caac7..e20d31c 100644 --- a/tubearchivist/static/script.js +++ b/tubearchivist/static/script.js @@ -699,6 +699,24 @@ function postSponsorSegmentVote(videoId, uuid, vote) { apiRequest(apiEndpoint, "POST", data); } +function handleCookieValidate() { + document.getElementById("cookieButton").remove(); + var cookieMessageElement = document.getElementById("cookieMessage"); + cookieMessageElement.innerHTML = `Processing.`; + response = postCookieValidate(); + if (response.cookie_validated == true) { + cookieMessageElement.innerHTML = `The cookie file is valid.`; + } else { + cookieMessageElement.innerHTML = `Warning, the cookie file is invalid.`; + } +} + +// Check youtube cookie settings +function postCookieValidate() { + var apiEndpoint = "/api/cookie/"; + return apiRequest(apiEndpoint, "POST"); +} + // Makes api requests when passed an endpoint and method ("GET", "POST", "DELETE") function apiRequest(apiEndpoint, method, data) { const xhttp = new XMLHttpRequest();