From 7d2bdc58baca98fd92fecbf5f21cdbd384202f1c Mon Sep 17 00:00:00 2001 From: simon Date: Wed, 11 Jan 2023 22:00:24 +0700 Subject: [PATCH 1/3] bump libraries --- tubearchivist/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubearchivist/requirements.txt b/tubearchivist/requirements.txt index f2e56a7..27586b9 100644 --- a/tubearchivist/requirements.txt +++ b/tubearchivist/requirements.txt @@ -5,9 +5,9 @@ django-auth-ldap==4.1.0 django-cors-headers==3.13.0 djangorestframework==3.14.0 Pillow==9.4.0 -redis==4.4.0 +redis==4.4.2 requests==2.28.1 ryd-client==0.0.6 uWSGI==2.0.21 whitenoise==6.3.0 -yt_dlp==2023.1.2 +yt_dlp==2023.1.6 From 00d7c33af63c9e1f3fe650cf39ed2eec9963f6ae Mon Sep 17 00:00:00 2001 From: simon Date: Wed, 11 Jan 2023 22:00:44 +0700 Subject: [PATCH 2/3] randomize version_check schedule --- tubearchivist/home/config.json | 2 +- tubearchivist/home/src/ta/config.py | 36 +++++++++++++++++++---------- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/tubearchivist/home/config.json b/tubearchivist/home/config.json index aa44639..420e27b 100644 --- a/tubearchivist/home/config.json +++ b/tubearchivist/home/config.json @@ -52,6 +52,6 @@ "thumbnail_check": {"minute": "0", "hour": "17", "day_of_week": "*"}, "run_backup": {"minute": "0", "hour": "8", "day_of_week": "0"}, "run_backup_rotate": 5, - "version_check": {"minute": "0", "hour": "11", "day_of_week": "*"} + "version_check": "rand-d" } } diff --git a/tubearchivist/home/src/ta/config.py b/tubearchivist/home/src/ta/config.py index abd4475..3b9a77b 100644 --- a/tubearchivist/home/src/ta/config.py +++ b/tubearchivist/home/src/ta/config.py @@ -7,6 +7,7 @@ Functionality: import json import os import re +from random import randint import requests from celery.schedules import crontab @@ -117,6 +118,15 @@ class AppConfig: self.config["application"]["colors"] = colors return colors + @staticmethod + def _build_rand_daily(): + """build random daily schedule per installation""" + return { + "minute": randint(0, 59), + "hour": randint(0, 23), + "day_of_week": "*", + } + def load_new_defaults(self): """check config.json for missing defaults""" default_config = self.get_config_file() @@ -140,6 +150,9 @@ class AppConfig: # missing nested values for sub_key, sub_value in value.items(): if sub_key not in redis_config[key].keys(): + if sub_value == "rand-d": + sub_value = self._build_rand_daily() + redis_config[key].update({sub_key: sub_value}) needs_update = True @@ -256,19 +269,18 @@ class ScheduleBuilder: if not item_conf: continue - minute = item_conf["minute"] - hour = item_conf["hour"] - day_of_week = item_conf["day_of_week"] - schedule_name = f"schedule_{schedule_item}" - to_add = { - schedule_name: { - "task": schedule_item, - "schedule": crontab( - minute=minute, hour=hour, day_of_week=day_of_week - ), + schedule_dict.update( + { + f"schedule_{schedule_item}": { + "task": schedule_item, + "schedule": crontab( + minute=item_conf["minute"], + hour=item_conf["hour"], + day_of_week=item_conf["day_of_week"], + ), + } } - } - schedule_dict.update(to_add) + ) return schedule_dict From 8db361cc8841a3d4977a9271663724317db2ef5a Mon Sep 17 00:00:00 2001 From: simon Date: Wed, 11 Jan 2023 23:06:02 +0700 Subject: [PATCH 3/3] channel fullscan to fix vid_type in refresh task --- tubearchivist/home/src/index/channel.py | 11 ++++ tubearchivist/home/src/index/reindex.py | 75 +++++++++++++++++++++++++ 2 files changed, 86 insertions(+) diff --git a/tubearchivist/home/src/index/channel.py b/tubearchivist/home/src/index/channel.py index 894ce32..477d178 100644 --- a/tubearchivist/home/src/index/channel.py +++ b/tubearchivist/home/src/index/channel.py @@ -381,6 +381,17 @@ class YoutubeChannel(YouTubeItem): return all_youtube_ids + def get_channel_videos(self): + """get all videos from channel""" + data = { + "query": { + "term": {"channel.channel_id": {"value": self.youtube_id}} + }, + "_source": ["youtube_id", "vid_type"], + } + all_videos = IndexPaginate("ta_video", data).get_results() + return all_videos + def get_all_playlists(self): """get all playlists owned by this channel""" url = ( diff --git a/tubearchivist/home/src/index/reindex.py b/tubearchivist/home/src/index/reindex.py index b8058c6..f50223c 100644 --- a/tubearchivist/home/src/index/reindex.py +++ b/tubearchivist/home/src/index/reindex.py @@ -4,12 +4,14 @@ functionality: - index and update in es """ +import json import os import shutil from datetime import datetime from time import sleep from home.src.download.queue import PendingList +from home.src.download.subscriptions import ChannelSubscription from home.src.download.thumbnails import ThumbManager from home.src.download.yt_dlp_base import CookieHandler from home.src.download.yt_dlp_handler import VideoDownloader @@ -307,6 +309,8 @@ class Reindex(ReindexBase): channel.upload_to_es() channel.sync_to_videos() + ChannelFullScan(channel_id).scan() + def _reindex_single_playlist(self, playlist_id): """refresh playlist data""" self._get_all_videos() @@ -473,3 +477,74 @@ class ChannelUrlFixer: shutil.move(video_path_is, new_path, copy_function=shutil.copyfile) VideoDownloader().move_to_archive(self.video.json_data) self.video.update_media_url() + + +class ChannelFullScan: + """ + update from v0.3.0 to v0.3.1 + full scan of channel to fix vid_type mismatch + """ + + def __init__(self, channel_id): + self.channel_id = channel_id + self.to_update = False + + def scan(self): + """match local with remote""" + print(f"{self.channel_id}: start full scan") + all_local_videos = self._get_all_local() + all_remote_videos = self._get_all_remote() + self.to_update = [] + for video in all_local_videos: + video_id = video["youtube_id"] + remote_match = [i for i in all_remote_videos if i[0] == video_id] + if not remote_match: + print(f"{video_id}: no remote match found") + continue + + expected_type = remote_match[0][-1].value + if video["vid_type"] != expected_type: + self.to_update.append( + { + "video_id": video_id, + "vid_type": expected_type, + } + ) + + self.update() + + def _get_all_remote(self): + """get all channel videos""" + sub = ChannelSubscription() + all_remote_videos = sub.get_last_youtube_videos( + self.channel_id, limit=False + ) + + return all_remote_videos + + def _get_all_local(self): + """get all local indexed channel_videos""" + channel = YoutubeChannel(self.channel_id) + all_local_videos = channel.get_channel_videos() + + return all_local_videos + + def update(self): + """build bulk query for updates""" + if not self.to_update: + print(f"{self.channel_id}: nothing to update") + return + + print(f"{self.channel_id}: fixing {len(self.to_update)} videos") + bulk_list = [] + for video in self.to_update: + action = { + "update": {"_id": video.get("video_id"), "_index": "ta_video"} + } + source = {"doc": {"vid_type": video.get("vid_type")}} + bulk_list.append(json.dumps(action)) + bulk_list.append(json.dumps(source)) + # add last newline + bulk_list.append("\n") + data = "\n".join(bulk_list) + _, _ = ElasticWrap("_bulk").post(data=data, ndjson=True)