From bc84696792c00fac58e187a1895a07dc7f0d3af5 Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 11 Oct 2021 13:33:28 +0700 Subject: [PATCH] new thumbnails module and new cache layout --- tubearchivist/home/apps.py | 2 + tubearchivist/home/src/download.py | 12 +- tubearchivist/home/src/searching.py | 11 +- tubearchivist/home/src/thumbnails.py | 114 ++++++++++++++++++ tubearchivist/home/tasks.py | 4 +- .../home/templates/home/channel_id.html | 4 +- .../home/templates/home/downloads.html | 2 +- tubearchivist/home/templates/home/home.html | 4 +- tubearchivist/home/templates/home/video.html | 2 +- 9 files changed, 142 insertions(+), 13 deletions(-) create mode 100644 tubearchivist/home/src/thumbnails.py diff --git a/tubearchivist/home/apps.py b/tubearchivist/home/apps.py index 741972d..a036900 100644 --- a/tubearchivist/home/apps.py +++ b/tubearchivist/home/apps.py @@ -6,6 +6,7 @@ from django.apps import AppConfig from home.src.config import AppConfig as ArchivistConfig from home.src.helper import RedisArchivist from home.src.index_management import index_check +from home.src.thumbnails import validate_thumbnails def sync_redis_state(): @@ -48,3 +49,4 @@ class HomeConfig(AppConfig): index_check() sync_redis_state() make_folders() + validate_thumbnails() diff --git a/tubearchivist/home/src/download.py b/tubearchivist/home/src/download.py index 7706b86..d109f18 100644 --- a/tubearchivist/home/src/download.py +++ b/tubearchivist/home/src/download.py @@ -70,6 +70,7 @@ class PendingList: all_downloaded = self.get_all_downloaded() # loop bulk_list = [] + all_videos_added = [] for video in missing_videos: if isinstance(video, str): youtube_id = video @@ -79,6 +80,7 @@ class PendingList: # skip already downloaded continue video = self.get_youtube_details(youtube_id) + thumb_url = video["vid_thumb_url"] # skip on download error if not video: continue @@ -91,6 +93,7 @@ class PendingList: action = {"create": {"_id": youtube_id, "_index": "ta_download"}} bulk_list.append(json.dumps(action)) bulk_list.append(json.dumps(video)) + all_videos_added.append((youtube_id, thumb_url)) # notify mess_dict = { "status": "pending", @@ -108,6 +111,8 @@ class PendingList: if not request.ok: print(request) + return all_videos_added + @staticmethod def get_youtube_details(youtube_id): """get details from youtubedl for single pending video""" @@ -165,12 +170,11 @@ class PendingList: all_hits = json_data["hits"]["hits"] if all_hits: for hit in all_hits: - youtube_id = hit["_source"]["youtube_id"] status = hit["_source"]["status"] if status == "pending": all_pending.append(hit["_source"]) elif status == "ignore": - all_ignore.append(youtube_id) + all_ignore.append(hit["_source"]) search_after = hit["sort"] # update search_after with last hit data data["search_after"] = search_after @@ -342,9 +346,9 @@ class ChannelSubscription: all_channels = self.get_channels() pending_handler = PendingList() all_pending, all_ignore = pending_handler.get_all_pending() - all_pending_ids = [i["youtube_id"] for i in all_pending] + all_ids = [i["youtube_id"] for i in all_ignore + all_pending] all_downloaded = pending_handler.get_all_downloaded() - to_ignore = all_pending_ids + all_ignore + all_downloaded + to_ignore = all_ids + all_downloaded missing_videos = [] counter = 1 for channel in all_channels: diff --git a/tubearchivist/home/src/searching.py b/tubearchivist/home/src/searching.py index b63f3f5..ce241c1 100644 --- a/tubearchivist/home/src/searching.py +++ b/tubearchivist/home/src/searching.py @@ -14,6 +14,7 @@ from datetime import datetime import requests from home.src.config import AppConfig from home.src.helper import ignore_filelist +from home.src.thumbnails import ThumbManager from PIL import Image @@ -63,8 +64,9 @@ class SearchHandler: all_channels.append(channel_dict) if self.cache: # validate cache - self.cache_dl_vids(all_videos) - self.cache_dl_chan(all_channels) + pass + # self.cache_dl_vids(all_videos) + # self.cache_dl_chan(all_channels) return return_value @@ -168,6 +170,11 @@ class SearchHandler: date_str = datetime.strftime(date_refresh, "%d %b, %Y") hit["source"]["vid_last_refresh"] = date_str + if "vid_thumb_url" in hit_keys: + youtube_id = hit["source"]["youtube_id"] + thumb_path = ThumbManager().vid_thumb_path(youtube_id) + hit["source"]["vid_thumb_url"] = thumb_path + if "channel_last_refresh" in hit_keys: refreshed = hit["source"]["channel_last_refresh"] date_refresh = datetime.fromtimestamp(refreshed) diff --git a/tubearchivist/home/src/thumbnails.py b/tubearchivist/home/src/thumbnails.py new file mode 100644 index 0000000..9aeee87 --- /dev/null +++ b/tubearchivist/home/src/thumbnails.py @@ -0,0 +1,114 @@ +""" +functionality: +- handle download and caching for thumbnails +""" + +import os + +import requests +from home.src.config import AppConfig +from home.src.download import PendingList +from home.src.helper import RedisArchivist, ignore_filelist +from PIL import Image + + +class ThumbManager: + """handle thumbnails related functions""" + + CONFIG = AppConfig().config + CACHE_DIR = CONFIG["application"]["cache_dir"] + VIDEO_DIR = os.path.join(CACHE_DIR, "videos") + + def get_all_thumbs(self): + """raise exception if cache not clean""" + all_thumb_folders = ignore_filelist(os.listdir(self.VIDEO_DIR)) + all_thumbs = [] + for folder in all_thumb_folders: + folder_path = os.path.join(self.VIDEO_DIR, folder) + if os.path.isfile(folder_path): + self.update_path(folder) + all_thumbs.append(folder_path) + continue + # raise exemption here in a future version + # raise FileExistsError("video cache dir has files inside") + + all_folder_thumbs = ignore_filelist(os.listdir(folder_path)) + all_thumbs.extend(all_folder_thumbs) + + return all_thumbs + + def update_path(self, file_name): + """reorganize thumbnails into folders as update path from v0.0.5""" + folder_name = file_name[0].lower() + folder_path = os.path.join(self.VIDEO_DIR, folder_name) + old_file = os.path.join(self.VIDEO_DIR, file_name) + new_file = os.path.join(folder_path, file_name) + os.makedirs(folder_path, exist_ok=True) + os.rename(old_file, new_file) + + def get_missing_thumbs(self): + """get a list of all missing thumbnails""" + all_thumbs = self.get_all_thumbs() + all_indexed = PendingList().get_all_indexed() + all_in_queue, all_ignored = PendingList().get_all_pending() + + missing_thumbs = [] + for video in all_indexed: + youtube_id = video["_source"]["youtube_id"] + if youtube_id + ".jpg" not in all_thumbs: + thumb_url = video["_source"]["vid_thumb_url"] + missing_thumbs.append((youtube_id, thumb_url)) + + for video in all_in_queue + all_ignored: + youtube_id = video["youtube_id"] + if youtube_id + ".jpg" not in all_thumbs: + thumb_url = video["vid_thumb_url"] + missing_thumbs.append((youtube_id, thumb_url)) + + return missing_thumbs + + def download_missing(self, missing_thumbs): + """download all missing thumbnails from list""" + print(f"downloading {len(missing_thumbs)} thumbnails") + vid_cache = os.path.join(self.CACHE_DIR, "videos") + # videos + for youtube_id, thumb_url in missing_thumbs: + folder_name = youtube_id[0].lower() + folder_path = os.path.join(vid_cache, folder_name) + thumb_path_part = self.vid_thumb_path(youtube_id) + thumb_path = os.path.join(self.CACHE_DIR, thumb_path_part) + + os.makedirs(folder_path, exist_ok=True) + img_raw = requests.get(thumb_url, stream=True).raw + img = Image.open(img_raw) + + width, height = img.size + if not width / height == 16 / 9: + new_height = width / 16 * 9 + offset = (height - new_height) / 2 + img = img.crop((0, offset, width, height - offset)) + + img.convert("RGB").save(thumb_path) + + mess_dict = { + "status": "pending", + "level": "info", + "title": "Adding to download queue.", + "message": "Downloading Thumbnails...", + } + RedisArchivist().set_message("progress:download", mess_dict) + + @staticmethod + def vid_thumb_path(youtube_id): + """build expected path for video thumbnail from youtube_id""" + folder_name = youtube_id[0].lower() + folder_path = os.path.join("videos", folder_name) + thumb_path = os.path.join(folder_path, youtube_id + ".jpg") + return thumb_path + + +def validate_thumbnails(): + """check if all thumbnails are there and organized correctly""" + handler = ThumbManager() + thumbs_to_download = handler.get_missing_thumbs() + handler.download_missing(thumbs_to_download) diff --git a/tubearchivist/home/tasks.py b/tubearchivist/home/tasks.py index 7543c17..fa8562e 100644 --- a/tubearchivist/home/tasks.py +++ b/tubearchivist/home/tasks.py @@ -16,6 +16,7 @@ from home.src.reindex import ( reindex_old_documents, scan_filesystem, ) +from home.src.thumbnails import ThumbManager CONFIG = AppConfig().config REDIS_HOST = os.environ.get("REDIS_HOST") @@ -90,7 +91,8 @@ def extrac_dl(youtube_ids): """parse list passed and add to pending""" pending_handler = PendingList() missing_videos = pending_handler.parse_url_list(youtube_ids) - pending_handler.add_to_pending(missing_videos) + all_videos_added = pending_handler.add_to_pending(missing_videos) + ThumbManager().download_missing(all_videos_added) @shared_task diff --git a/tubearchivist/home/templates/home/channel_id.html b/tubearchivist/home/templates/home/channel_id.html index 5fc417b..a1052e0 100644 --- a/tubearchivist/home/templates/home/channel_id.html +++ b/tubearchivist/home/templates/home/channel_id.html @@ -75,10 +75,10 @@ {% if videos %} {% for video in videos %}
- +