From 65d768bf02692843c0a6305acc9f6bb114f0e847 Mon Sep 17 00:00:00 2001 From: Merlin <4706504+MerlinScheurer@users.noreply.github.com> Date: Mon, 4 Sep 2023 12:24:48 +0200 Subject: [PATCH 01/25] Fix url to settings documentation (#535) --- tubearchivist/home/templates/home/settings_actions.html | 4 ++-- tubearchivist/home/templates/home/settings_application.html | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tubearchivist/home/templates/home/settings_actions.html b/tubearchivist/home/templates/home/settings_actions.html index 74405f7..be6ffba 100644 --- a/tubearchivist/home/templates/home/settings_actions.html +++ b/tubearchivist/home/templates/home/settings_actions.html @@ -12,7 +12,7 @@

Manual media files import.

-

Add files to the cache/import folder. Make sure to follow the instructions in the Github Wiki.

+

Add files to the cache/import folder. Make sure to follow the instructions in the Github Wiki.

@@ -58,7 +58,7 @@

Rescan filesystem

Danger Zone: This will delete the metadata of deleted videos from the filesystem.

-

Rescan your media folder looking for missing videos and clean up index. More infos on the Github Wiki.

+

Rescan your media folder looking for missing videos and clean up index. More infos on the Github Wiki.

diff --git a/tubearchivist/home/templates/home/settings_application.html b/tubearchivist/home/templates/home/settings_application.html index 12f7cdb..813ad02 100644 --- a/tubearchivist/home/templates/home/settings_application.html +++ b/tubearchivist/home/templates/home/settings_application.html @@ -141,7 +141,7 @@

Import YouTube cookie: {{ config.downloads.cookie_import }}

For automatic cookie import use Tube Archivist Companion browser extension.

- For manual cookie import, place your cookie file named cookies.google.txt in cache/import before enabling. Instructions in the Wiki.
+ For manual cookie import, place your cookie file named cookies.google.txt in cache/import before enabling. Instructions in the Wiki.
{{ app_form.downloads_cookie_import }}
{% if config.downloads.cookie_import %}
@@ -174,7 +174,7 @@

Snapshots

Current system snapshot: {{ config.application.enable_snapshot }}

- Automatically create daily deduplicated snapshots of the index, stored in Elasticsearch. Read first before activating: Wiki.
+ Automatically create daily deduplicated snapshots of the index, stored in Elasticsearch. Read first before activating: Wiki.
{{ app_form.application_enable_snapshot }}
From 317942b7e182da48d5c61ac531f202f013dece9b Mon Sep 17 00:00:00 2001 From: Merlin <4706504+MerlinScheurer@users.noreply.github.com> Date: Mon, 4 Sep 2023 12:51:06 +0200 Subject: [PATCH 02/25] Refac biggest channels to be 3 seperate tables with ordering and right align (#536) * Add right align to numbers on biggest channels in dashboard * Refac biggest channels to be 3 seperate tables with ordering * Fix aggs linting --- tubearchivist/api/src/aggs.py | 3 + tubearchivist/api/views.py | 4 +- .../home/templates/home/settings.html | 50 +++++++++--- tubearchivist/static/css/style.css | 14 ++-- tubearchivist/static/stats.js | 76 ++++++++++++++----- 5 files changed, 110 insertions(+), 37 deletions(-) diff --git a/tubearchivist/api/src/aggs.py b/tubearchivist/api/src/aggs.py index aa35c39..5c9049f 100644 --- a/tubearchivist/api/src/aggs.py +++ b/tubearchivist/api/src/aggs.py @@ -196,6 +196,9 @@ class DownloadHist(AggBase): class BiggestChannel(AggBase): """get channel aggregations""" + def __init__(self, order): + self.data["aggs"][self.name]["multi_terms"]["order"] = {order: "desc"} + name = "channel_stats" path = "ta_video/_search" data = { diff --git a/tubearchivist/api/views.py b/tubearchivist/api/views.py index 24db6dd..5d3e59c 100644 --- a/tubearchivist/api/views.py +++ b/tubearchivist/api/views.py @@ -1025,9 +1025,9 @@ class StatBiggestChannel(ApiBaseView): def get(self, request): """handle get request""" - order = request.GET.get("order", False) + order = request.GET.get("order", "doc_count") if order and order not in self.order_choices: message = {"message": f"invalid order parameter {order}"} return Response(message, status=400) - return Response(BiggestChannel().process()) + return Response(BiggestChannel(order).process()) diff --git a/tubearchivist/home/templates/home/settings.html b/tubearchivist/home/templates/home/settings.html index 9524e2a..d28d2a7 100644 --- a/tubearchivist/home/templates/home/settings.html +++ b/tubearchivist/home/templates/home/settings.html @@ -22,21 +22,47 @@

Loading...

+

Biggest Channels

-
- - - - - - - - - - -
NameVideosDurationMedia Size
+
+
+ + + + + + + + +
NameVideos
+
+ +
+ + + + + + + + +
NameDuration
+
+ +
+ + + + + + + + +
NameMedia Size
+
+ {% endblock settings_content %} diff --git a/tubearchivist/static/css/style.css b/tubearchivist/static/css/style.css index e0758a3..72a3b6d 100644 --- a/tubearchivist/static/css/style.css +++ b/tubearchivist/static/css/style.css @@ -1099,6 +1099,15 @@ video:-webkit-full-screen { min-width: 300px; } +.settings-item .agg-channel-table { + width: 100%; +} + +.settings-item .agg-channel-right-align { + white-space: nowrap; + text-align: right; +} + .danger-zone { background-color: var(--highlight-error); color: #fff; @@ -1316,11 +1325,6 @@ video:-webkit-full-screen { .playlist-nav-item img { width: 100%; } - .agg-channel-name { - min-width: 50px; - width: 100px; - max-width: 200px; - } .td, th, span, label { text-align: unset; } diff --git a/tubearchivist/static/stats.js b/tubearchivist/static/stats.js index 992615b..2373266 100644 --- a/tubearchivist/static/stats.js +++ b/tubearchivist/static/stats.js @@ -128,31 +128,71 @@ function buildDailyStat(dailyStat) { return tile; } -function biggestChannel() { - let apiEndpoint = '/api/stats/biggestchannels/'; - let responseData = apiRequest(apiEndpoint, 'GET'); - let tBody = document.getElementById('biggestChannelTable'); +function humanFileSize(size) { + let i = size === 0 ? 0 : Math.floor(Math.log(size) / Math.log(1024)); + return (size / Math.pow(1024, i)).toFixed(1) * 1 + ' ' + ['B', 'kB', 'MB', 'GB', 'TB'][i]; +} + +function buildChannelRow(id, name, value) { + let tableRow = document.createElement('tr'); + + tableRow.innerHTML = ` + ${name} + ${value} + `; + + return tableRow; +} + +function addBiggestChannelByDocCount() { + let tBody = document.getElementById('biggestChannelTableVideos'); + + let apiEndpoint = '/api/stats/biggestchannels/?order=doc_count'; + const responseData = apiRequest(apiEndpoint, 'GET'); + for (let i = 0; i < responseData.length; i++) { - const channelData = responseData[i]; - let tableRow = buildChannelRow(channelData); + const { id, name, doc_count } = responseData[i]; + + let tableRow = buildChannelRow(id, name, doc_count); + tBody.appendChild(tableRow); } } -function buildChannelRow(channelData) { - let tableRow = document.createElement('tr'); - tableRow.innerHTML = ` - ${channelData.name} - ${channelData.doc_count} - ${channelData.duration_str} - ${humanFileSize(channelData.media_size)} - `; - return tableRow; +function addBiggestChannelByDuration() { + const tBody = document.getElementById('biggestChannelTableDuration'); + + let apiEndpoint = '/api/stats/biggestchannels/?order=duration'; + const responseData = apiRequest(apiEndpoint, 'GET'); + + for (let i = 0; i < responseData.length; i++) { + const { id, name, duration_str } = responseData[i]; + + let tableRow = buildChannelRow(id, name, duration_str); + + tBody.appendChild(tableRow); + } } -function humanFileSize(size) { - let i = size === 0 ? 0 : Math.floor(Math.log(size) / Math.log(1024)); - return (size / Math.pow(1024, i)).toFixed(1) * 1 + ' ' + ['B', 'kB', 'MB', 'GB', 'TB'][i]; +function addBiggestChannelByMediaSize() { + let tBody = document.getElementById('biggestChannelTableMediaSize'); + + let apiEndpoint = '/api/stats/biggestchannels/?order=media_size'; + const responseData = apiRequest(apiEndpoint, 'GET'); + + for (let i = 0; i < responseData.length; i++) { + const { id, name, media_size } = responseData[i]; + + let tableRow = buildChannelRow(id, name, humanFileSize(media_size)); + + tBody.appendChild(tableRow); + } +} + +function biggestChannel() { + addBiggestChannelByDocCount(); + addBiggestChannelByDuration(); + addBiggestChannelByMediaSize(); } async function buildStats() { From dc41e5062d2e3d1ac5483a1bc37c489ddfeab4c2 Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 4 Sep 2023 18:49:10 +0700 Subject: [PATCH 03/25] refactor duration class into separate helper functions --- tubearchivist/api/src/aggs.py | 10 ++-- tubearchivist/home/src/download/queue.py | 8 +-- tubearchivist/home/src/frontend/searching.py | 6 +-- tubearchivist/home/src/index/video.py | 13 ++--- tubearchivist/home/src/index/video_streams.py | 54 ------------------- tubearchivist/home/src/ta/helper.py | 42 +++++++++++++++ 6 files changed, 55 insertions(+), 78 deletions(-) diff --git a/tubearchivist/api/src/aggs.py b/tubearchivist/api/src/aggs.py index 5c9049f..1e2f54c 100644 --- a/tubearchivist/api/src/aggs.py +++ b/tubearchivist/api/src/aggs.py @@ -1,7 +1,7 @@ """aggregations""" from home.src.es.connect import ElasticWrap -from home.src.index.video_streams import DurationConverter +from home.src.ta.helper import get_duration_str class AggBase: @@ -119,7 +119,7 @@ class WatchProgress(AggBase): { "all": { "duration": all_duration, - "duration_str": DurationConverter().get_str(all_duration), + "duration_str": get_duration_str(all_duration), "items": aggregations["total_vids"].get("value"), } } @@ -135,7 +135,7 @@ class WatchProgress(AggBase): """parse bucket""" duration = int(bucket["watch_docs"]["duration"]["value"]) - duration_str = DurationConverter().get_str(duration) + duration_str = get_duration_str(duration) items = bucket["watch_docs"]["true_count"]["value"] if bucket["key_as_string"] == "false": key = "unwatched" @@ -234,9 +234,7 @@ class BiggestChannel(AggBase): "name": i["key"][0].title(), "doc_count": i["doc_count"]["value"], "duration": i["duration"]["value"], - "duration_str": DurationConverter().get_str( - i["duration"]["value"] - ), + "duration_str": get_duration_str(int(i["duration"]["value"])), "media_size": i["media_size"]["value"], } for i in buckets diff --git a/tubearchivist/home/src/download/queue.py b/tubearchivist/home/src/download/queue.py index 97636bb..0f92bd4 100644 --- a/tubearchivist/home/src/download/queue.py +++ b/tubearchivist/home/src/download/queue.py @@ -16,9 +16,8 @@ from home.src.download.yt_dlp_base import YtWrap from home.src.es.connect import ElasticWrap, IndexPaginate from home.src.index.playlist import YoutubePlaylist from home.src.index.video_constants import VideoTypeEnum -from home.src.index.video_streams import DurationConverter from home.src.ta.config import AppConfig -from home.src.ta.helper import is_shorts +from home.src.ta.helper import get_duration_str, is_shorts class PendingIndex: @@ -335,9 +334,6 @@ class PendingList(PendingIndex): def _parse_youtube_details(self, vid, vid_type=VideoTypeEnum.VIDEOS): """parse response""" vid_id = vid.get("id") - duration_str = DurationConverter.get_str(vid["duration"]) - if duration_str == "NA": - print(f"skip extracting duration for: {vid_id}") published = datetime.strptime(vid["upload_date"], "%Y%m%d").strftime( "%Y-%m-%d" ) @@ -349,7 +345,7 @@ class PendingList(PendingIndex): "vid_thumb_url": vid["thumbnail"], "title": vid["title"], "channel_id": vid["channel_id"], - "duration": duration_str, + "duration": get_duration_str(vid["duration"]), "published": published, "timestamp": int(datetime.now().timestamp()), # Pulling enum value out so it is serializable diff --git a/tubearchivist/home/src/frontend/searching.py b/tubearchivist/home/src/frontend/searching.py index 9977f36..b9f2624 100644 --- a/tubearchivist/home/src/frontend/searching.py +++ b/tubearchivist/home/src/frontend/searching.py @@ -11,8 +11,8 @@ from datetime import datetime from home.src.download.thumbnails import ThumbManager from home.src.es.connect import ElasticWrap -from home.src.index.video_streams import DurationConverter from home.src.ta.config import AppConfig +from home.src.ta.helper import get_duration_str class SearchHandler: @@ -45,9 +45,9 @@ class SearchHandler: if response.get("aggregations"): self.aggs = response["aggregations"] if "total_duration" in self.aggs: - duration_sec = self.aggs["total_duration"]["value"] + duration_sec = int(self.aggs["total_duration"]["value"]) self.aggs["total_duration"].update( - {"value_str": DurationConverter().get_str(duration_sec)} + {"value_str": get_duration_str(duration_sec)} ) return return_value diff --git a/tubearchivist/home/src/index/video.py b/tubearchivist/home/src/index/video.py index 87360ae..626d257 100644 --- a/tubearchivist/home/src/index/video.py +++ b/tubearchivist/home/src/index/video.py @@ -16,11 +16,8 @@ from home.src.index import playlist as ta_playlist from home.src.index.generic import YouTubeItem from home.src.index.subtitle import YoutubeSubtitle from home.src.index.video_constants import VideoTypeEnum -from home.src.index.video_streams import ( - DurationConverter, - MediaStreamExtractor, -) -from home.src.ta.helper import randomizor +from home.src.index.video_streams import MediaStreamExtractor +from home.src.ta.helper import get_duration_sec, get_duration_str, randomizor from home.src.ta.ta_redis import RedisArchivist from ryd_client import ryd_client @@ -249,16 +246,14 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle): def add_player(self, media_path=False): """add player information for new videos""" vid_path = media_path or self.build_dl_cache_path() + duration = get_duration_sec(vid_path) - duration_handler = DurationConverter() - duration = duration_handler.get_sec(vid_path) - duration_str = duration_handler.get_str(duration) self.json_data.update( { "player": { "watched": False, "duration": duration, - "duration_str": duration_str, + "duration_str": get_duration_str(duration), } } ) diff --git a/tubearchivist/home/src/index/video_streams.py b/tubearchivist/home/src/index/video_streams.py index 7f6f2f2..0d8c182 100644 --- a/tubearchivist/home/src/index/video_streams.py +++ b/tubearchivist/home/src/index/video_streams.py @@ -5,60 +5,6 @@ import subprocess from os import stat -class DurationConverter: - """ - using ffmpeg to get and parse duration from filepath - """ - - @staticmethod - def get_sec(file_path): - """read duration from file""" - duration = subprocess.run( - [ - "ffprobe", - "-v", - "error", - "-show_entries", - "format=duration", - "-of", - "default=noprint_wrappers=1:nokey=1", - file_path, - ], - capture_output=True, - check=True, - ) - duration_raw = duration.stdout.decode().strip() - if duration_raw == "N/A": - return 0 - - duration_sec = int(float(duration_raw)) - return duration_sec - - @staticmethod - def get_str(seconds): - """takes duration in sec and returns clean string""" - if not seconds: - # failed to extract - return "NA" - - days = int(seconds // (24 * 3600)) - hours = int((seconds % (24 * 3600)) // 3600) - minutes = int((seconds % 3600) // 60) - seconds = int(seconds % 60) - - duration_str = str() - if days: - duration_str = f"{days}d " - if hours: - duration_str = duration_str + str(hours).zfill(2) + ":" - if minutes: - duration_str = duration_str + str(minutes).zfill(2) + ":" - else: - duration_str = duration_str + "00:" - duration_str = duration_str + str(seconds).zfill(2) - return duration_str - - class MediaStreamExtractor: """extract stream metadata""" diff --git a/tubearchivist/home/src/ta/helper.py b/tubearchivist/home/src/ta/helper.py index 0028c11..db6e4b6 100644 --- a/tubearchivist/home/src/ta/helper.py +++ b/tubearchivist/home/src/ta/helper.py @@ -7,6 +7,7 @@ import json import os import random import string +import subprocess from datetime import datetime from urllib.parse import urlparse @@ -141,6 +142,47 @@ def is_shorts(youtube_id: str) -> bool: return response.status_code == 200 +def get_duration_sec(file_path: str) -> int: + """get duration of media file from file path""" + + duration = subprocess.run( + [ + "ffprobe", + "-v", + "error", + "-show_entries", + "format=duration", + "-of", + "default=noprint_wrappers=1:nokey=1", + file_path, + ], + capture_output=True, + check=True, + ) + duration_raw = duration.stdout.decode().strip() + if duration_raw == "N/A": + return 0 + + duration_sec = int(float(duration_raw)) + return duration_sec + + +def get_duration_str(seconds: int) -> str: + """Return a human-readable duration string from seconds.""" + if not seconds: + return "NA" + + units = [("y", 31536000), ("d", 86400), ("h", 3600), ("m", 60), ("s", 1)] + duration_parts = [] + + for unit_label, unit_seconds in units: + if seconds >= unit_seconds: + unit_count, seconds = divmod(seconds, unit_seconds) + duration_parts.append(f"{unit_count}{unit_label}") + + return " ".join(duration_parts) + + def ta_host_parser(ta_host: str) -> tuple[list[str], list[str]]: """parse ta_host env var for ALLOWED_HOSTS and CSRF_TRUSTED_ORIGINS""" allowed_hosts: list[str] = [ From 47c433e7c1554ed764429b6e31698acb3b0435c6 Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 4 Sep 2023 20:22:00 +0700 Subject: [PATCH 04/25] refactor search form to use new SearchProcess --- tubearchivist/api/src/search_processor.py | 20 ++++++++++++++- tubearchivist/home/src/frontend/searching.py | 6 ++--- tubearchivist/static/script.js | 27 ++++++++------------ 3 files changed, 33 insertions(+), 20 deletions(-) diff --git a/tubearchivist/api/src/search_processor.py b/tubearchivist/api/src/search_processor.py index e9b4d24..b180486 100644 --- a/tubearchivist/api/src/search_processor.py +++ b/tubearchivist/api/src/search_processor.py @@ -39,7 +39,7 @@ class SearchProcess: def _process_result(self, result): """detect which type of data to process""" index = result["_index"] - processed = False + processed = {} if index == "ta_video": processed = self._process_video(result["_source"]) if index == "ta_channel": @@ -50,6 +50,10 @@ class SearchProcess: processed = self._process_download(result["_source"]) if index == "ta_comment": processed = self._process_comment(result["_source"]) + if index == "ta_subtitle": + processed = self._process_subtitle(result) + + processed.update({"_index": index}) return processed @@ -139,3 +143,17 @@ class SearchProcess: processed_comments[-1]["comment_replies"].append(comment) return processed_comments + + def _process_subtitle(self, result): + """take complete result dict to extract highlight""" + subtitle_dict = result["_source"] + highlight = result.get("highlight") + if highlight: + # replace lines with the highlighted markdown + subtitle_line = highlight.get("subtitle_line")[0] + subtitle_dict.update({"subtitle_line": subtitle_line}) + + thumb_path = ThumbManager(subtitle_dict["youtube_id"]).vid_thumb_path() + subtitle_dict.update({"vid_thumb_url": f"/cache/{thumb_path}"}) + + return subtitle_dict diff --git a/tubearchivist/home/src/frontend/searching.py b/tubearchivist/home/src/frontend/searching.py index b9f2624..167b90b 100644 --- a/tubearchivist/home/src/frontend/searching.py +++ b/tubearchivist/home/src/frontend/searching.py @@ -9,6 +9,7 @@ Functionality: import urllib.parse from datetime import datetime +from api.src.search_processor import SearchProcess from home.src.download.thumbnails import ThumbManager from home.src.es.connect import ElasticWrap from home.src.ta.config import AppConfig @@ -114,8 +115,8 @@ class SearchForm: def multi_search(self, search_query): """searching through index""" path, query, query_type = SearchParser(search_query).run() - look_up = SearchHandler(path, config=self.CONFIG, data=query) - search_results = look_up.get_data() + response, _ = ElasticWrap(path).get(data=query) + search_results = SearchProcess(response).process() all_results = self.build_results(search_results) return {"results": all_results, "queryType": query_type} @@ -465,7 +466,6 @@ class QueryBuilder: query = { "size": 30, - "_source": {"excludes": "subtitle_line"}, "query": {"bool": {"must": must_list}}, "highlight": { "fields": { diff --git a/tubearchivist/static/script.js b/tubearchivist/static/script.js index 990e76c..03984b2 100644 --- a/tubearchivist/static/script.js +++ b/tubearchivist/static/script.js @@ -938,7 +938,7 @@ function populateMultiSearchResults(allResults, queryType) { videoBox.parentElement.style.display = 'block'; if (allVideos.length > 0) { for (let index = 0; index < allVideos.length; index++) { - const video = allVideos[index].source; + const video = allVideos[index]; const videoDiv = createVideo(video, defaultVideo); videoBox.appendChild(videoDiv); } @@ -957,7 +957,7 @@ function populateMultiSearchResults(allResults, queryType) { channelBox.parentElement.style.display = 'block'; if (allChannels.length > 0) { for (let index = 0; index < allChannels.length; index++) { - const channel = allChannels[index].source; + const channel = allChannels[index]; const channelDiv = createChannel(channel, defaultChannel); channelBox.appendChild(channelDiv); } @@ -976,7 +976,7 @@ function populateMultiSearchResults(allResults, queryType) { playlistBox.parentElement.style.display = 'block'; if (allPlaylists.length > 0) { for (let index = 0; index < allPlaylists.length; index++) { - const playlist = allPlaylists[index].source; + const playlist = allPlaylists[index]; const playlistDiv = createPlaylist(playlist, defaultPlaylist); playlistBox.appendChild(playlistDiv); } @@ -995,7 +995,7 @@ function populateMultiSearchResults(allResults, queryType) { if (allFullText.length > 0) { for (let i = 0; i < allFullText.length; i++) { const fullText = allFullText[i]; - if ('highlight' in fullText) { + if ('subtitle_line' in fullText) { const fullTextDiv = createFulltext(fullText); fullTextBox.appendChild(fullTextDiv); } @@ -1132,19 +1132,14 @@ function createPlaylist(playlist, viewStyle) { } function createFulltext(fullText) { - const videoId = fullText.source.youtube_id; - const videoTitle = fullText.source.title; - const thumbUrl = fullText.source.vid_thumb_url; - const channelId = fullText.source.subtitle_channel_id; - const channelName = fullText.source.subtitle_channel; - const subtitleLine = fullText.highlight.subtitle_line[0]; - const subtitle_start = fullText.source.subtitle_start.split('.')[0]; - const subtitle_end = fullText.source.subtitle_end.split('.')[0]; + const videoId = fullText.youtube_id; + const subtitle_start = fullText.subtitle_start.split('.')[0]; + const subtitle_end = fullText.subtitle_end.split('.')[0]; const markup = `
- video-thumb + video-thumb
play-icon @@ -1153,10 +1148,10 @@ function createFulltext(fullText) {

${subtitle_start} - ${subtitle_end}

-

${subtitleLine}

+

${fullText.subtitle_line}

`; From c6458c6ec144e4660a9ecaad9245842c2d6eeca0 Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 4 Sep 2023 20:44:31 +0700 Subject: [PATCH 05/25] add score to full text search --- tubearchivist/api/src/search_processor.py | 7 ++++++- tubearchivist/static/script.js | 9 +++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/tubearchivist/api/src/search_processor.py b/tubearchivist/api/src/search_processor.py index b180486..63218e4 100644 --- a/tubearchivist/api/src/search_processor.py +++ b/tubearchivist/api/src/search_processor.py @@ -53,7 +53,12 @@ class SearchProcess: if index == "ta_subtitle": processed = self._process_subtitle(result) - processed.update({"_index": index}) + processed.update( + { + "_index": index, + "_score": round(result.get("_score", 0), 2), + } + ) return processed diff --git a/tubearchivist/static/script.js b/tubearchivist/static/script.js index 03984b2..f7375b8 100644 --- a/tubearchivist/static/script.js +++ b/tubearchivist/static/script.js @@ -1147,12 +1147,13 @@ function createFulltext(fullText) {
+

${subtitle_start} - ${subtitle_end}

${fullText.subtitle_line}

- + Score: ${fullText._score}
`; const fullTextDiv = document.createElement('div'); From 879f5ab52fbf366fda3c29d1f5ddf6e4485abc83 Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 4 Sep 2023 21:05:29 +0700 Subject: [PATCH 06/25] refactor match_progress to use SearchProcess --- tubearchivist/api/src/search_processor.py | 2 +- tubearchivist/home/templates/home/home.html | 24 ++++++++++----------- tubearchivist/home/views.py | 13 ++++++----- 3 files changed, 19 insertions(+), 20 deletions(-) diff --git a/tubearchivist/api/src/search_processor.py b/tubearchivist/api/src/search_processor.py index 63218e4..d48c6e5 100644 --- a/tubearchivist/api/src/search_processor.py +++ b/tubearchivist/api/src/search_processor.py @@ -56,7 +56,7 @@ class SearchProcess: processed.update( { "_index": index, - "_score": round(result.get("_score", 0), 2), + "_score": round(result.get("_score") or 0, 2), } ) diff --git a/tubearchivist/home/templates/home/home.html b/tubearchivist/home/templates/home/home.html index 46721ee..ade51aa 100644 --- a/tubearchivist/home/templates/home/home.html +++ b/tubearchivist/home/templates/home/home.html @@ -9,14 +9,14 @@
{% for video in continue_vids %}
- +
- video-thumb - {% if video.source.player.progress %} -
+ video-thumb + {% if video.player.progress %} +
{% else %} -
+
{% endif %}
@@ -25,17 +25,17 @@
-
- {% if video.source.player.watched %} - seen-icon +
+ {% if video.player.watched %} + seen-icon {% else %} - unseen-icon + unseen-icon {% endif %} - {{ video.source.published }} | {{ video.source.player.duration_str }} + {{ video.published }} | {{ video.player.duration_str }}
diff --git a/tubearchivist/home/views.py b/tubearchivist/home/views.py index 24c10fa..06129d4 100644 --- a/tubearchivist/home/views.py +++ b/tubearchivist/home/views.py @@ -216,21 +216,20 @@ class ArchivistResultsView(ArchivistViewConfig): "query": {"bool": {"should": ids}}, "sort": [{"published": {"order": "desc"}}], } - search = SearchHandler( - "ta_video/_search", self.default_conf, data=data - ) - videos = search.get_data() + response, _ = ElasticWrap("ta_video/_search").get(data) + videos = SearchProcess(response).process() + if not videos: return False for video in videos: - youtube_id = video["source"]["youtube_id"] + youtube_id = video["youtube_id"] matched = [i for i in results if i["youtube_id"] == youtube_id] played_sec = matched[0]["position"] - total = video["source"]["player"]["duration"] + total = video["player"]["duration"] if not total: total = matched[0].get("position") * 2 - video["source"]["player"]["progress"] = 100 * (played_sec / total) + video["player"]["progress"] = 100 * (played_sec / total) return videos From a7fc7902f0fd202b90136fd62e23d4dd693f9529 Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 4 Sep 2023 21:43:09 +0700 Subject: [PATCH 07/25] refactor single video page to use SearchProcess --- tubearchivist/api/src/search_processor.py | 15 ++++++++------- tubearchivist/home/views.py | 5 +++-- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/tubearchivist/api/src/search_processor.py b/tubearchivist/api/src/search_processor.py index d48c6e5..7c9d28e 100644 --- a/tubearchivist/api/src/search_processor.py +++ b/tubearchivist/api/src/search_processor.py @@ -39,7 +39,7 @@ class SearchProcess: def _process_result(self, result): """detect which type of data to process""" index = result["_index"] - processed = {} + processed = False if index == "ta_video": processed = self._process_video(result["_source"]) if index == "ta_channel": @@ -53,12 +53,13 @@ class SearchProcess: if index == "ta_subtitle": processed = self._process_subtitle(result) - processed.update( - { - "_index": index, - "_score": round(result.get("_score") or 0, 2), - } - ) + if isinstance(processed, dict): + processed.update( + { + "_index": index, + "_score": round(result.get("_score") or 0, 2), + } + ) return processed diff --git a/tubearchivist/home/views.py b/tubearchivist/home/views.py index 06129d4..40c3f52 100644 --- a/tubearchivist/home/views.py +++ b/tubearchivist/home/views.py @@ -892,8 +892,9 @@ class VideoView(MinView): def get(self, request, video_id): """get single video""" config_handler = AppConfig(request.user.id) - look_up = SearchHandler(f"ta_video/_doc/{video_id}", config=False) - video_data = look_up.get_data()[0]["source"] + response, _ = ElasticWrap(f"ta_video/_doc/{video_id}").get() + video_data = SearchProcess(response).process() + try: rating = video_data["stats"]["average_rating"] video_data["stats"]["average_rating"] = self.star_creator(rating) From 85b56300b3125cf351ee60af585b0db36c5ccd55 Mon Sep 17 00:00:00 2001 From: Clark <104835586+anonamouslyginger@users.noreply.github.com> Date: Thu, 21 Sep 2023 14:46:55 +0000 Subject: [PATCH 08/25] Move user configuration from Redis to ES (#533) * ES Client must bootstrap itself to be the source of config If this is not done a cyclic loop is created between the config loader and the ES client. This lays the ground work for ES being the source of all app config. * auto_download is not used anymore * Add UserConfig class that encapsulates user config storage This class will allow the rest of the code to 'not care' about how user properties are stored. This requires the addition of a ta_users index in ES. * Create migration task for user config transfer * Replace getters and setters for each property Strongly type the user configuration Migrate missed sponsorblock ID * Other DB settings will be another PR --- .../config/management/commands/ta_startup.py | 98 +++++++++++++ tubearchivist/home/config.json | 14 -- .../home/src/download/yt_dlp_handler.py | 2 +- tubearchivist/home/src/es/connect.py | 22 +-- tubearchivist/home/src/es/index_mapping.json | 13 +- tubearchivist/home/src/frontend/api_calls.py | 44 +++--- tubearchivist/home/src/frontend/searching.py | 10 +- tubearchivist/home/src/index/generic.py | 10 +- tubearchivist/home/src/index/video.py | 23 ++-- tubearchivist/home/src/ta/config.py | 46 +------ tubearchivist/home/src/ta/users.py | 104 ++++++++++++++ .../home/templates/home/settings_user.html | 4 +- tubearchivist/home/views.py | 129 ++++++------------ 13 files changed, 302 insertions(+), 217 deletions(-) create mode 100644 tubearchivist/home/src/ta/users.py diff --git a/tubearchivist/config/management/commands/ta_startup.py b/tubearchivist/config/management/commands/ta_startup.py index e45c46c..f715b8f 100644 --- a/tubearchivist/config/management/commands/ta_startup.py +++ b/tubearchivist/config/management/commands/ta_startup.py @@ -16,6 +16,7 @@ from home.src.ta.config import AppConfig, ReleaseVersion from home.src.ta.helper import clear_dl_cache from home.src.ta.ta_redis import RedisArchivist from home.src.ta.task_manager import TaskManager +from home.src.ta.users import UserConfig TOPIC = """ @@ -44,6 +45,7 @@ class Command(BaseCommand): self._mig_snapshot_check() self._mig_set_streams() self._mig_set_autostart() + self._mig_move_users_to_es() def _sync_redis_state(self): """make sure redis gets new config.json values""" @@ -219,3 +221,99 @@ class Command(BaseCommand): self.stdout.write(response) sleep(60) raise CommandError(message) + + def _mig_move_users_to_es(self): # noqa: C901 + """migration: update from 0.4.1 to 0.5.0 move user config to ES""" + self.stdout.write("[MIGRATION] move user configuration to ES") + redis = RedisArchivist() + + # 1: Find all users in Redis + users = {i.split(":")[0] for i in redis.list_keys("[0-9]*:")} + if not users: + self.stdout.write(" no users needed migrating to ES") + return + + # 2: Write all Redis user settings to ES + # 3: Remove user settings from Redis + try: + for user in users: + new_conf = UserConfig(user) + + colors_key = f"{user}:colors" + colors = redis.get_message(colors_key).get("status") + if colors: + new_conf.set_value("colors", colors) + redis.del_message(colors_key) + + sort_by_key = f"{user}:sort_by" + sort_by = redis.get_message(sort_by_key).get("status") + if sort_by: + new_conf.set_value("sort_by", sort_by) + redis.del_message(sort_by_key) + + page_size_key = f"{user}:page_size" + page_size = redis.get_message(page_size_key).get("status") + if page_size: + new_conf.set_value("page_size", page_size) + redis.del_message(page_size_key) + + sort_order_key = f"{user}:sort_order" + sort_order = redis.get_message(sort_order_key).get("status") + if sort_order: + new_conf.set_value("sort_order", sort_order) + redis.del_message(sort_order_key) + + grid_items_key = f"{user}:grid_items" + grid_items = redis.get_message(grid_items_key).get("status") + if grid_items: + new_conf.set_value("grid_items", grid_items) + redis.del_message(grid_items_key) + + hide_watch_key = f"{user}:hide_watched" + hide_watch = redis.get_message(hide_watch_key).get("status") + if hide_watch: + new_conf.set_value("hide_watched", hide_watch) + redis.del_message(hide_watch_key) + + ignore_only_key = f"{user}:show_ignored_only" + ignore_only = redis.get_message(ignore_only_key).get("status") + if ignore_only: + new_conf.set_value("show_ignored_only", ignore_only) + redis.del_message(ignore_only_key) + + subed_only_key = f"{user}:show_subed_only" + subed_only = redis.get_message(subed_only_key).get("status") + if subed_only: + new_conf.set_value("show_subed_only", subed_only) + redis.del_message(subed_only_key) + + sb_id_key = f"{user}:id_sb_id" + sb_id = redis.get_message(sb_id_key).get("status") + if sb_id: + new_conf.set_value("sb_id_id", sb_id) + redis.del_message(sb_id_key) + + for view in ["channel", "playlist", "home", "downloads"]: + view_key = f"{user}:view:{view}" + view_style = redis.get_message(view_key).get("status") + if view_style: + new_conf.set_value(f"view_style_{view}", view_style) + redis.del_message(view_key) + + self.stdout.write( + self.style.SUCCESS( + f" ✓ Settings for user '{user}' migrated to ES" + ) + ) + except Exception as e: + message = " 🗙 user migration to ES failed" + self.stdout.write(self.style.ERROR(message)) + self.stdout.write(self.style.ERROR(e)) + sleep(60) + raise CommandError(message) + else: + self.stdout.write( + self.style.SUCCESS( + " ✓ Settings for all users migrated to ES" + ) + ) diff --git a/tubearchivist/home/config.json b/tubearchivist/home/config.json index d45a2a1..26d3bf9 100644 --- a/tubearchivist/home/config.json +++ b/tubearchivist/home/config.json @@ -1,18 +1,5 @@ { - "archive": { - "sort_by": "published", - "sort_order": "desc", - "page_size": 12 - }, - "default_view": { - "home": "grid", - "channel": "list", - "downloads": "list", - "playlist": "grid", - "grid_items": 3 - }, "subscriptions": { - "auto_download": false, "channel_size": 50, "live_channel_size": 50, "shorts_channel_size": 50, @@ -41,7 +28,6 @@ "app_root": "/app", "cache_dir": "/cache", "videos": "/youtube", - "colors": "dark", "enable_cast": false, "enable_snapshot": true }, diff --git a/tubearchivist/home/src/download/yt_dlp_handler.py b/tubearchivist/home/src/download/yt_dlp_handler.py index f2bf800..9a865ab 100644 --- a/tubearchivist/home/src/download/yt_dlp_handler.py +++ b/tubearchivist/home/src/download/yt_dlp_handler.py @@ -417,7 +417,7 @@ class VideoDownloader: "lang": "painless", }, } - response, _ = ElasticWrap(path, config=self.config).post(data=data) + response, _ = ElasticWrap(path).post(data=data) updated = response.get("updated") if updated: print(f"[download] reset auto start on {updated} videos.") diff --git a/tubearchivist/home/src/es/connect.py b/tubearchivist/home/src/es/connect.py index 0b9d554..b526cf4 100644 --- a/tubearchivist/home/src/es/connect.py +++ b/tubearchivist/home/src/es/connect.py @@ -6,9 +6,9 @@ functionality: # pylint: disable=missing-timeout import json +import os import requests -from home.src.ta.config import AppConfig class ElasticWrap: @@ -16,21 +16,13 @@ class ElasticWrap: returns response json and status code tuple """ - def __init__(self, path, config=False): - self.url = False - self.auth = False - self.path = path - self.config = config - self._get_config() + ES_URL: str = str(os.environ.get("ES_URL")) + ES_PASS: str = str(os.environ.get("ELASTIC_PASSWORD")) + ES_USER: str = str(os.environ.get("ELASTIC_USER") or "elastic") - def _get_config(self): - """add config if not passed""" - if not self.config: - self.config = AppConfig().config - - es_url = self.config["application"]["es_url"] - self.auth = self.config["application"]["es_auth"] - self.url = f"{es_url}/{self.path}" + def __init__(self, path): + self.url = f"{self.ES_URL}/{path}" + self.auth = (self.ES_USER, self.ES_PASS) def get(self, data=False, timeout=10, print_error=True): """get data from es""" diff --git a/tubearchivist/home/src/es/index_mapping.json b/tubearchivist/home/src/es/index_mapping.json index 06bf13c..a25b300 100644 --- a/tubearchivist/home/src/es/index_mapping.json +++ b/tubearchivist/home/src/es/index_mapping.json @@ -1,5 +1,16 @@ { "index_config": [{ + "index_name": "config", + "expected_map": { + "config": { + "type": "object" + } + }, + "expected_set": { + "number_of_replicas": "0" + } + }, + { "index_name": "channel", "expected_map": { "channel_id": { @@ -601,4 +612,4 @@ } } ] -} \ No newline at end of file +} diff --git a/tubearchivist/home/src/frontend/api_calls.py b/tubearchivist/home/src/frontend/api_calls.py index 60764ea..c5402ab 100644 --- a/tubearchivist/home/src/frontend/api_calls.py +++ b/tubearchivist/home/src/frontend/api_calls.py @@ -4,7 +4,7 @@ Functionality: - called via user input """ -from home.src.ta.ta_redis import RedisArchivist +from home.src.ta.users import UserConfig from home.tasks import run_restore_backup @@ -41,10 +41,8 @@ class PostData: def _change_view(self): """process view changes in home, channel, and downloads""" - origin, new_view = self.exec_val.split(":") - key = f"{self.current_user}:view:{origin}" - print(f"change view: {key} to {new_view}") - RedisArchivist().set_message(key, {"status": new_view}) + view, setting = self.exec_val.split(":") + UserConfig(self.current_user).set_value(f"view_style_{view}", setting) return {"success": True} def _change_grid(self): @@ -52,48 +50,38 @@ class PostData: grid_items = int(self.exec_val) grid_items = max(grid_items, 3) grid_items = min(grid_items, 7) - - key = f"{self.current_user}:grid_items" - print(f"change grid items: {grid_items}") - RedisArchivist().set_message(key, {"status": grid_items}) + UserConfig(self.current_user).set_value("grid_items", grid_items) return {"success": True} def _sort_order(self): """change the sort between published to downloaded""" - sort_order = {"status": self.exec_val} if self.exec_val in ["asc", "desc"]: - RedisArchivist().set_message( - f"{self.current_user}:sort_order", sort_order + UserConfig(self.current_user).set_value( + "sort_order", self.exec_val ) else: - RedisArchivist().set_message( - f"{self.current_user}:sort_by", sort_order - ) + UserConfig(self.current_user).set_value("sort_by", self.exec_val) return {"success": True} def _hide_watched(self): """toggle if to show watched vids or not""" - key = f"{self.current_user}:hide_watched" - message = {"status": bool(int(self.exec_val))} - print(f"toggle {key}: {message}") - RedisArchivist().set_message(key, message) + UserConfig(self.current_user).set_value( + "hide_watched", bool(int(self.exec_val)) + ) return {"success": True} def _show_subed_only(self): """show or hide subscribed channels only on channels page""" - key = f"{self.current_user}:show_subed_only" - message = {"status": bool(int(self.exec_val))} - print(f"toggle {key}: {message}") - RedisArchivist().set_message(key, message) + UserConfig(self.current_user).set_value( + "show_subed_only", bool(int(self.exec_val)) + ) return {"success": True} def _show_ignored_only(self): """switch view on /downloads/ to show ignored only""" - show_value = self.exec_val - key = f"{self.current_user}:show_ignored_only" - value = {"status": show_value} - print(f"Filter download view ignored only: {show_value}") - RedisArchivist().set_message(key, value) + UserConfig(self.current_user).set_value( + "show_ignored_only", bool(int(self.exec_val)) + ) return {"success": True} def _db_restore(self): diff --git a/tubearchivist/home/src/frontend/searching.py b/tubearchivist/home/src/frontend/searching.py index b9f2624..a150106 100644 --- a/tubearchivist/home/src/frontend/searching.py +++ b/tubearchivist/home/src/frontend/searching.py @@ -11,23 +11,21 @@ from datetime import datetime from home.src.download.thumbnails import ThumbManager from home.src.es.connect import ElasticWrap -from home.src.ta.config import AppConfig from home.src.ta.helper import get_duration_str class SearchHandler: """search elastic search""" - def __init__(self, path, config, data=False): + def __init__(self, path, data=False): self.max_hits = None self.aggs = None self.path = path - self.config = config self.data = data def get_data(self): """get the data""" - response, _ = ElasticWrap(self.path, config=self.config).get(self.data) + response, _ = ElasticWrap(self.path).get(self.data) if "hits" in response.keys(): self.max_hits = response["hits"]["total"]["value"] @@ -109,12 +107,10 @@ class SearchHandler: class SearchForm: """build query from search form data""" - CONFIG = AppConfig().config - def multi_search(self, search_query): """searching through index""" path, query, query_type = SearchParser(search_query).run() - look_up = SearchHandler(path, config=self.CONFIG, data=query) + look_up = SearchHandler(path, data=query) search_results = look_up.get_data() all_results = self.build_results(search_results) diff --git a/tubearchivist/home/src/index/generic.py b/tubearchivist/home/src/index/generic.py index 6e82e54..a5f624d 100644 --- a/tubearchivist/home/src/index/generic.py +++ b/tubearchivist/home/src/index/generic.py @@ -8,7 +8,7 @@ import math from home.src.download.yt_dlp_base import YtWrap from home.src.es.connect import ElasticWrap from home.src.ta.config import AppConfig -from home.src.ta.ta_redis import RedisArchivist +from home.src.ta.users import UserConfig class YouTubeItem: @@ -100,13 +100,7 @@ class Pagination: def get_page_size(self): """get default or user modified page_size""" - key = f"{self.request.user.id}:page_size" - page_size = RedisArchivist().get_message(key)["status"] - if not page_size: - config = AppConfig().config - page_size = config["archive"]["page_size"] - - return page_size + return UserConfig(self.request.user.id).get_value("page_size") def first_guess(self): """build first guess before api call""" diff --git a/tubearchivist/home/src/index/video.py b/tubearchivist/home/src/index/video.py index 626d257..606e32f 100644 --- a/tubearchivist/home/src/index/video.py +++ b/tubearchivist/home/src/index/video.py @@ -18,7 +18,7 @@ from home.src.index.subtitle import YoutubeSubtitle from home.src.index.video_constants import VideoTypeEnum from home.src.index.video_streams import MediaStreamExtractor from home.src.ta.helper import get_duration_sec, get_duration_str, randomizor -from home.src.ta.ta_redis import RedisArchivist +from home.src.ta.users import UserConfig from ryd_client import ryd_client @@ -32,17 +32,16 @@ class SponsorBlock: self.user_agent = f"{settings.TA_UPSTREAM} {settings.TA_VERSION}" self.last_refresh = int(datetime.now().timestamp()) - def get_sb_id(self): - """get sponsorblock userid or generate if needed""" + def get_sb_id(self) -> str: + """get sponsorblock for the userid or generate if needed""" if not self.user_id: - print("missing request user id") - raise ValueError + raise ValueError("missing request user id") - key = f"{self.user_id}:id_sponsorblock" - sb_id = RedisArchivist().get_message(key) - if not sb_id["status"]: - sb_id = {"status": randomizor(32)} - RedisArchivist().set_message(key, sb_id) + user = UserConfig(self.user_id) + sb_id = user.get_value("sponsorblock_id") + if not sb_id: + sb_id = randomizor(32) + user.set_value("sponsorblock_id", sb_id) return sb_id @@ -88,7 +87,7 @@ class SponsorBlock: def post_timestamps(self, youtube_id, start_time, end_time): """post timestamps to api""" - user_id = self.get_sb_id().get("status") + user_id = self.get_sb_id() data = { "videoID": youtube_id, "startTime": start_time, @@ -105,7 +104,7 @@ class SponsorBlock: def vote_on_segment(self, uuid, vote): """send vote on existing segment""" - user_id = self.get_sb_id().get("status") + user_id = self.get_sb_id() data = { "UUID": uuid, "userID": user_id, diff --git a/tubearchivist/home/src/ta/config.py b/tubearchivist/home/src/ta/config.py index 84fe84a..a32d083 100644 --- a/tubearchivist/home/src/ta/config.py +++ b/tubearchivist/home/src/ta/config.py @@ -17,12 +17,10 @@ from home.src.ta.ta_redis import RedisArchivist class AppConfig: - """handle user settings and application variables""" + """handle application variables""" - def __init__(self, user_id=False): - self.user_id = user_id + def __init__(self): self.config = self.get_config() - self.colors = self.get_colors() def get_config(self): """get config from default file or redis if changed""" @@ -30,12 +28,6 @@ class AppConfig: if not config: config = self.get_config_file() - if self.user_id: - key = f"{self.user_id}:page_size" - page_size = RedisArchivist().get_message(key)["status"] - if page_size: - config["archive"]["page_size"] = page_size - config["application"].update(self.get_config_env()) return config @@ -50,14 +42,12 @@ class AppConfig: @staticmethod def get_config_env(): - """read environment application variables""" - es_pass = os.environ.get("ELASTIC_PASSWORD") - es_user = os.environ.get("ELASTIC_USER", default="elastic") + """read environment application variables. + + Connection to ES is managed in ElasticWrap and the + connection to Redis is managed in RedisArchivist.""" application = { - "REDIS_HOST": os.environ.get("REDIS_HOST"), - "es_url": os.environ.get("ES_URL"), - "es_auth": (es_user, es_pass), "HOST_UID": int(os.environ.get("HOST_UID", False)), "HOST_GID": int(os.environ.get("HOST_GID", False)), "enable_cast": bool(os.environ.get("ENABLE_CAST")), @@ -103,30 +93,6 @@ class AppConfig: RedisArchivist().set_message("config", self.config, save=True) return updated - @staticmethod - def set_user_config(form_post, user_id): - """set values in redis for user settings""" - for key, value in form_post.items(): - if not value: - continue - - message = {"status": value} - redis_key = f"{user_id}:{key}" - RedisArchivist().set_message(redis_key, message, save=True) - - def get_colors(self): - """overwrite config if user has set custom values""" - colors = False - if self.user_id: - col_dict = RedisArchivist().get_message(f"{self.user_id}:colors") - colors = col_dict["status"] - - if not colors: - colors = self.config["application"]["colors"] - - self.config["application"]["colors"] = colors - return colors - @staticmethod def _build_rand_daily(): """build random daily schedule per installation""" diff --git a/tubearchivist/home/src/ta/users.py b/tubearchivist/home/src/ta/users.py new file mode 100644 index 0000000..c337381 --- /dev/null +++ b/tubearchivist/home/src/ta/users.py @@ -0,0 +1,104 @@ +""" +Functionality: +- read and write user config backed by ES +- encapsulate persistence of user properties +""" + +from typing import TypedDict + +from home.src.es.connect import ElasticWrap + + +class UserConfigType(TypedDict, total=False): + """describes the user configuration""" + + colors: str + page_size: int + sort_by: str + sort_order: str + view_style_home: str + view_style_channel: str + view_style_downloads: str + view_style_playlist: str + grid_items: int + hide_watched: bool + show_ignored_only: bool + show_subed_only: bool + sponsorblock_id: str + + +class UserConfig: + """Handle settings for an individual user + + Create getters and setters for usage in the application. + Although tedious it helps prevents everything caring about how properties + are persisted. Plus it allows us to save anytime any value is set. + """ + + _DEFAULT_USER_SETTINGS = UserConfigType( + colors="dark", + page_size=12, + sort_by="published", + sort_order="desc", + view_style_home="grid", + view_style_channel="list", + view_style_downloads="list", + view_style_playlist="grid", + grid_items=3, + hide_watched=False, + show_ignored_only=False, + show_subed_only=False, + sponsorblock_id=None, + ) + + def __init__(self, user_id: str): + self._user_id: str = user_id + self._config: UserConfigType = self._get_config() + + def get_value(self, key: str): + """Get the given key from the users configuration + + Throws a KeyError if the requested Key is not a permitted value""" + if key not in self._DEFAULT_USER_SETTINGS: + raise KeyError(f"Unable to read config for unknown key '{key}'") + + return self._config.get(key) or self._DEFAULT_USER_SETTINGS.get(key) + + def set_value(self, key: str, value: str | bool | int): + """Set or replace a configuration value for the user + + Throws a KeyError if the requested Key is not a permitted value""" + if not self._user_id: + raise ValueError("Unable to persist config for null user_id") + + if key not in self._DEFAULT_USER_SETTINGS: + raise KeyError(f"Unable to persist config for unknown key '{key}'") + + old = self.get_value(key) + self._config[key] = value + + # Upsert this property (creating a record if not exists) + es_payload = {"doc": {"config": {key: value}}, "doc_as_upsert": True} + es_document_path = f"ta_config/_update/user_{self._user_id}" + response, status = ElasticWrap(es_document_path).post(es_payload) + if status < 200 or status > 299: + raise ValueError(f"Failed storing user value {status}: {response}") + + print(f"User {self._user_id} value '{key}' change: {old} > {value}") + + def _get_config(self) -> UserConfigType: + """get config from ES or load from the application defaults""" + if not self._user_id: + # this is for a non logged-in user so use all the defaults + return {} + + # Does this user have configuration stored in ES + es_document_path = f"ta_config/_doc/user_{self._user_id}" + response, status = ElasticWrap(es_document_path).get(print_error=False) + if status == 200 and "_source" in response.keys(): + source = response.get("_source") + if "config" in source.keys(): + return source.get("config") + + # There is no config in ES + return {} diff --git a/tubearchivist/home/templates/home/settings_user.html b/tubearchivist/home/templates/home/settings_user.html index a12fc75..b9545e0 100644 --- a/tubearchivist/home/templates/home/settings_user.html +++ b/tubearchivist/home/templates/home/settings_user.html @@ -9,7 +9,7 @@

Color scheme

-

Current color scheme: {{ config.application.colors }}

+

Current color scheme: {{ colors }}

Select your preferred color scheme between dark and light mode.
{{ user_form.colors }}
@@ -17,7 +17,7 @@

Archive View

-

Current page size: {{ config.archive.page_size }}

+

Current page size: {{ page_size }}

Result of videos showing in archive page
{{ user_form.page_size }}
diff --git a/tubearchivist/home/views.py b/tubearchivist/home/views.py index 24c10fa..b0aea5e 100644 --- a/tubearchivist/home/views.py +++ b/tubearchivist/home/views.py @@ -41,6 +41,7 @@ from home.src.index.video_constants import VideoTypeEnum from home.src.ta.config import AppConfig, ReleaseVersion, ScheduleBuilder from home.src.ta.helper import time_parser from home.src.ta.ta_redis import RedisArchivist +from home.src.ta.users import UserConfig from home.tasks import index_channel_playlists, subscribe_to from rest_framework.authtoken.models import Token @@ -52,93 +53,38 @@ class ArchivistViewConfig(View): super().__init__() self.view_origin = view_origin self.user_id = False - self.user_conf = False + self.user_conf: UserConfig = False self.default_conf = False self.context = False - def _get_sort_by(self): - """return sort_by config var""" - messag_key = f"{self.user_id}:sort_by" - sort_by = self.user_conf.get_message(messag_key)["status"] - if not sort_by: - sort_by = self.default_conf["archive"]["sort_by"] - - return sort_by - - def _get_sort_order(self): - """return sort_order config var""" - sort_order_key = f"{self.user_id}:sort_order" - sort_order = self.user_conf.get_message(sort_order_key)["status"] - if not sort_order: - sort_order = self.default_conf["archive"]["sort_order"] - - return sort_order - - def _get_view_style(self): - """return view_style config var""" - view_key = f"{self.user_id}:view:{self.view_origin}" - view_style = self.user_conf.get_message(view_key)["status"] - if not view_style: - view_style = self.default_conf["default_view"][self.view_origin] - - return view_style - - def _get_grid_items(self): - """return items per row to show in grid view""" - grid_key = f"{self.user_id}:grid_items" - grid_items = self.user_conf.get_message(grid_key)["status"] - if not grid_items: - grid_items = self.default_conf["default_view"]["grid_items"] - - return grid_items - def get_all_view_styles(self): - """get dict of all view stiles for search form""" - all_keys = ["channel", "playlist", "home"] + """get dict of all view styles for search form""" all_styles = {} - for view_origin in all_keys: - view_key = f"{self.user_id}:view:{view_origin}" - view_style = self.user_conf.get_message(view_key)["status"] - if not view_style: - view_style = self.default_conf["default_view"][view_origin] - all_styles[view_origin] = view_style + for view_origin in ["channel", "playlist", "home", "downloads"]: + all_styles[view_origin] = self.user_conf.get_value( + f"view_style_{view_origin}" + ) return all_styles - def _get_hide_watched(self): - hide_watched_key = f"{self.user_id}:hide_watched" - hide_watched = self.user_conf.get_message(hide_watched_key)["status"] - - return hide_watched - - def _get_show_ignore_only(self): - ignored_key = f"{self.user_id}:show_ignored_only" - show_ignored_only = self.user_conf.get_message(ignored_key)["status"] - - return show_ignored_only - - def _get_show_subed_only(self): - sub_only_key = f"{self.user_id}:show_subed_only" - show_subed_only = self.user_conf.get_message(sub_only_key)["status"] - - return show_subed_only - def config_builder(self, user_id): """build default context for every view""" self.user_id = user_id - self.user_conf = RedisArchivist() - self.default_conf = AppConfig(self.user_id).config + self.user_conf = UserConfig(self.user_id) + self.default_conf = AppConfig().config self.context = { - "colors": self.default_conf["application"]["colors"], + "colors": self.user_conf.get_value("colors"), "cast": self.default_conf["application"]["enable_cast"], - "sort_by": self._get_sort_by(), - "sort_order": self._get_sort_order(), - "view_style": self._get_view_style(), - "grid_items": self._get_grid_items(), - "hide_watched": self._get_hide_watched(), - "show_ignored_only": self._get_show_ignore_only(), - "show_subed_only": self._get_show_subed_only(), + "sort_by": self.user_conf.get_value("sort_by"), + "sort_order": self.user_conf.get_value("sort_order"), + "view_style": self.user_conf.get_value( + f"view_style_{self.view_origin}" + ), + "grid_items": self.user_conf.get_value("grid_items"), + "hide_watched": self.user_conf.get_value("hide_watched"), + "show_ignored_only": self.user_conf.get_value("show_ignored_only"), + "show_subed_only": self.user_conf.get_value("show_subed_only"), "version": settings.TA_VERSION, "ta_update": ReleaseVersion().get_update(), } @@ -212,13 +158,11 @@ class ArchivistResultsView(ArchivistViewConfig): """get all videos in progress""" ids = [{"match": {"youtube_id": i.get("youtube_id")}} for i in results] data = { - "size": self.default_conf["archive"]["page_size"], + "size": UserConfig(self.user_id).get_value("page_size"), "query": {"bool": {"should": ids}}, "sort": [{"published": {"order": "desc"}}], } - search = SearchHandler( - "ta_video/_search", self.default_conf, data=data - ) + search = SearchHandler("ta_video/_search", data=data) videos = search.get_data() if not videos: return False @@ -236,7 +180,7 @@ class ArchivistResultsView(ArchivistViewConfig): def single_lookup(self, es_path): """retrieve a single item from url""" - search = SearchHandler(es_path, config=self.default_conf) + search = SearchHandler(es_path) result = search.get_data()[0]["source"] return result @@ -251,9 +195,7 @@ class ArchivistResultsView(ArchivistViewConfig): def find_results(self): """add results and pagination to context""" - search = SearchHandler( - self.es_search, config=self.default_conf, data=self.data - ) + search = SearchHandler(self.es_search, data=self.data) self.context["results"] = search.get_data() self.pagination_handler.validate(search.max_hits) self.context["max_hits"] = search.max_hits @@ -268,7 +210,7 @@ class MinView(View): def get_min_context(request): """build minimal vars for context""" return { - "colors": AppConfig(request.user.id).colors, + "colors": UserConfig(request.user.id).get_value("colors"), "version": settings.TA_VERSION, "ta_update": ReleaseVersion().get_update(), } @@ -892,8 +834,8 @@ class VideoView(MinView): def get(self, request, video_id): """get single video""" - config_handler = AppConfig(request.user.id) - look_up = SearchHandler(f"ta_video/_doc/{video_id}", config=False) + config_handler = AppConfig() + look_up = SearchHandler(f"ta_video/_doc/{video_id}") video_data = look_up.get_data()[0]["source"] try: rating = video_data["stats"]["average_rating"] @@ -1005,7 +947,9 @@ class SettingsUserView(MinView): context.update( { "title": "User Settings", - "config": AppConfig(request.user.id).config, + "page_size": UserConfig(request.user.id).get_value( + "page_size" + ), "user_form": UserSettingsForm(), } ) @@ -1015,10 +959,17 @@ class SettingsUserView(MinView): def post(self, request): """handle form post to update settings""" user_form = UserSettingsForm(request.POST) + config_handler = UserConfig(request.user.id) if user_form.is_valid(): user_form_post = user_form.cleaned_data - if any(user_form_post.values()): - AppConfig().set_user_config(user_form_post, request.user.id) + if user_form_post.get("colors"): + config_handler.set_value( + "colors", user_form_post.get("colors") + ) + if user_form_post.get("page_size"): + config_handler.set_value( + "page_size", user_form_post.get("page_size") + ) sleep(1) return redirect("settings_user", permanent=True) @@ -1037,7 +988,7 @@ class SettingsApplicationView(MinView): context.update( { "title": "Application Settings", - "config": AppConfig(request.user.id).config, + "config": AppConfig().config, "api_token": self.get_token(request), "app_form": ApplicationSettingsForm(), "snapshots": ElasticSnapshot().get_snapshot_stats(), @@ -1126,7 +1077,7 @@ class SettingsSchedulingView(MinView): context.update( { "title": "Scheduling Settings", - "config": AppConfig(request.user.id).config, + "config": AppConfig().config, "scheduler_form": SchedulerSettingsForm(), } ) From a5b61bfaf6cc577f50dd514e1cd2b5531fe6bf6f Mon Sep 17 00:00:00 2001 From: Joseph Liu Date: Thu, 21 Sep 2023 08:40:42 -0700 Subject: [PATCH 09/25] Add "Mark Unwatched" to channels and playlists (#547) --- tubearchivist/home/templates/home/channel_id.html | 5 ++++- tubearchivist/home/templates/home/playlist_id.html | 5 ++++- tubearchivist/static/script.js | 10 +++++++++- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/tubearchivist/home/templates/home/channel_id.html b/tubearchivist/home/templates/home/channel_id.html index dcc935c..421392c 100644 --- a/tubearchivist/home/templates/home/channel_id.html +++ b/tubearchivist/home/templates/home/channel_id.html @@ -47,7 +47,10 @@
{% if aggs %}

{{ aggs.total_items.value }} videos | {{ aggs.total_duration.value_str }} playback | Total size {{ aggs.total_size.value|filesizeformat }}

- +
+ + +
{% endif %}
diff --git a/tubearchivist/home/templates/home/playlist_id.html b/tubearchivist/home/templates/home/playlist_id.html index d5d20dd..525e99a 100644 --- a/tubearchivist/home/templates/home/playlist_id.html +++ b/tubearchivist/home/templates/home/playlist_id.html @@ -50,7 +50,10 @@
{% if max_hits %}

Total Videos archived: {{ max_hits }}/{{ playlist_info.playlist_entries|length }}

-

Watched:

+
+ + +
{% endif %} {% if reindex %}

Reindex scheduled

diff --git a/tubearchivist/static/script.js b/tubearchivist/static/script.js index 990e76c..95fe165 100644 --- a/tubearchivist/static/script.js +++ b/tubearchivist/static/script.js @@ -64,7 +64,15 @@ function isWatchedButton(button) { let youtube_id = button.getAttribute('data-id'); let apiEndpoint = '/api/watched/'; let data = { id: youtube_id, is_watched: true }; - button.remove(); + apiRequest(apiEndpoint, 'POST', data); + setTimeout(function () { + location.reload(); + }, 1000); +} +function isUnwatchedButton(button) { + let youtube_id = button.getAttribute('data-id'); + let apiEndpoint = '/api/watched/'; + let data = { id: youtube_id, is_watched: false }; apiRequest(apiEndpoint, 'POST', data); setTimeout(function () { location.reload(); From 92975a5c95ae460984d7f02298a1bb7431eaa560 Mon Sep 17 00:00:00 2001 From: Simon Date: Thu, 21 Sep 2023 23:16:06 +0700 Subject: [PATCH 10/25] disable ta_config indexing --- tubearchivist/home/src/es/index_mapping.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tubearchivist/home/src/es/index_mapping.json b/tubearchivist/home/src/es/index_mapping.json index a25b300..d6dda4b 100644 --- a/tubearchivist/home/src/es/index_mapping.json +++ b/tubearchivist/home/src/es/index_mapping.json @@ -3,7 +3,8 @@ "index_name": "config", "expected_map": { "config": { - "type": "object" + "type": "object", + "enabled": false } }, "expected_set": { From 38b3815a332794f2ba6392aadb3e119be5a99aaa Mon Sep 17 00:00:00 2001 From: Simon Date: Thu, 21 Sep 2023 23:17:25 +0700 Subject: [PATCH 11/25] catch disabled old settings --- .../config/management/commands/ta_startup.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tubearchivist/config/management/commands/ta_startup.py b/tubearchivist/config/management/commands/ta_startup.py index f715b8f..f71fe76 100644 --- a/tubearchivist/config/management/commands/ta_startup.py +++ b/tubearchivist/config/management/commands/ta_startup.py @@ -241,62 +241,62 @@ class Command(BaseCommand): colors_key = f"{user}:colors" colors = redis.get_message(colors_key).get("status") - if colors: + if colors is not None: new_conf.set_value("colors", colors) redis.del_message(colors_key) sort_by_key = f"{user}:sort_by" sort_by = redis.get_message(sort_by_key).get("status") - if sort_by: + if sort_by is not None: new_conf.set_value("sort_by", sort_by) redis.del_message(sort_by_key) page_size_key = f"{user}:page_size" page_size = redis.get_message(page_size_key).get("status") - if page_size: + if page_size is not None: new_conf.set_value("page_size", page_size) redis.del_message(page_size_key) sort_order_key = f"{user}:sort_order" sort_order = redis.get_message(sort_order_key).get("status") - if sort_order: + if sort_order is not None: new_conf.set_value("sort_order", sort_order) redis.del_message(sort_order_key) grid_items_key = f"{user}:grid_items" grid_items = redis.get_message(grid_items_key).get("status") - if grid_items: + if grid_items is not None: new_conf.set_value("grid_items", grid_items) redis.del_message(grid_items_key) hide_watch_key = f"{user}:hide_watched" hide_watch = redis.get_message(hide_watch_key).get("status") - if hide_watch: + if hide_watch is not None: new_conf.set_value("hide_watched", hide_watch) redis.del_message(hide_watch_key) ignore_only_key = f"{user}:show_ignored_only" ignore_only = redis.get_message(ignore_only_key).get("status") - if ignore_only: + if ignore_only is not None: new_conf.set_value("show_ignored_only", ignore_only) redis.del_message(ignore_only_key) subed_only_key = f"{user}:show_subed_only" subed_only = redis.get_message(subed_only_key).get("status") - if subed_only: + if subed_only is not None: new_conf.set_value("show_subed_only", subed_only) redis.del_message(subed_only_key) sb_id_key = f"{user}:id_sb_id" sb_id = redis.get_message(sb_id_key).get("status") - if sb_id: + if sb_id is not None: new_conf.set_value("sb_id_id", sb_id) redis.del_message(sb_id_key) for view in ["channel", "playlist", "home", "downloads"]: view_key = f"{user}:view:{view}" view_style = redis.get_message(view_key).get("status") - if view_style: + if view_style is not None: new_conf.set_value(f"view_style_{view}", view_style) redis.del_message(view_key) From b2bb7ea28e2d20a09e4c7daf94549213020ec7b8 Mon Sep 17 00:00:00 2001 From: Igor Rzegocki Date: Thu, 21 Sep 2023 22:23:44 +0200 Subject: [PATCH 12/25] Support wider spectrum of ElasticSearch configurations --- README.md | 2 + .../management/commands/ta_connection.py | 18 +++++++- tubearchivist/home/src/es/connect.py | 44 ++++++++++++++++--- tubearchivist/home/src/es/snapshot.py | 4 +- 4 files changed, 59 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 9bad431..954df49 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,8 @@ Take a look at the example [docker-compose.yml](https://github.com/tubearchivist | TA_PORT | Overwrite Nginx port | Optional | | TA_UWSGI_PORT | Overwrite container internal uwsgi port | Optional | | ES_URL | URL That ElasticSearch runs on | Optional | +| ES_VERIFY_SSL | Verify ElasticSearch SSL certificate, everything other than `false` defaults to `true` | Optional | +| ES_SNAPSHOT_DIR | Custom path where elastic search stores snapshots for master/data nodes | Optional | | HOST_GID | Allow TA to own the video files instead of container user | Optional | | HOST_UID | Allow TA to own the video files instead of container user | Optional | | ELASTIC_USER | Change the default ElasticSearch user | Optional | diff --git a/tubearchivist/config/management/commands/ta_connection.py b/tubearchivist/config/management/commands/ta_connection.py index 23cc14d..4e0f59a 100644 --- a/tubearchivist/config/management/commands/ta_connection.py +++ b/tubearchivist/config/management/commands/ta_connection.py @@ -3,6 +3,7 @@ Functionality: - check that all connections are working """ +from os import environ from time import sleep import requests @@ -132,7 +133,19 @@ class Command(BaseCommand): """check that path.repo var is set""" self.stdout.write("[5] check ES path.repo env var") response, _ = ElasticWrap("_nodes/_all/settings").get() + snaphost_roles = [ + "data", + "data_cold", + "data_content", + "data_frozen", + "data_hot", + "data_warm", + "master", + ] for node in response["nodes"].values(): + if not (set(node["roles"]) & set(snaphost_roles)): + continue + if node["settings"]["path"].get("repo"): self.stdout.write( self.style.SUCCESS(" ✓ path.repo env var is set") @@ -142,7 +155,10 @@ class Command(BaseCommand): message = ( " 🗙 path.repo env var not found. " + "set the following env var to the ES container:\n" - + " path.repo=/usr/share/elasticsearch/data/snapshot" + + " path.repo=" + + environ.get( + "ES_SNAPSHOT_DIR", "/usr/share/elasticsearch/data/snapshot" + ), ) self.stdout.write(self.style.ERROR(f"{message}")) sleep(60) diff --git a/tubearchivist/home/src/es/connect.py b/tubearchivist/home/src/es/connect.py index b526cf4..7d3b79a 100644 --- a/tubearchivist/home/src/es/connect.py +++ b/tubearchivist/home/src/es/connect.py @@ -9,6 +9,7 @@ import json import os import requests +import urllib3 class ElasticWrap: @@ -19,19 +20,33 @@ class ElasticWrap: ES_URL: str = str(os.environ.get("ES_URL")) ES_PASS: str = str(os.environ.get("ELASTIC_PASSWORD")) ES_USER: str = str(os.environ.get("ELASTIC_USER") or "elastic") + ES_VERIFY_SSL: str = str(os.environ.get("ES_VERIFY_SSL") or "true") def __init__(self, path): self.url = f"{self.ES_URL}/{path}" self.auth = (self.ES_USER, self.ES_PASS) + self.verify_ssl = self.ES_VERIFY_SSL != "false" + + if not self.verify_ssl: + urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) def get(self, data=False, timeout=10, print_error=True): """get data from es""" if data: response = requests.get( - self.url, json=data, auth=self.auth, timeout=timeout + self.url, + json=data, + auth=self.auth, + timeout=timeout, + verify=self.verify_ssl, ) else: - response = requests.get(self.url, auth=self.auth, timeout=timeout) + response = requests.get( + self.url, + auth=self.auth, + timeout=timeout, + verify=self.verify_ssl, + ) if print_error and not response.ok: print(response.text) @@ -48,10 +63,19 @@ class ElasticWrap: if data: response = requests.post( - self.url, data=payload, headers=headers, auth=self.auth + self.url, + data=payload, + headers=headers, + auth=self.auth, + verify=self.verify_ssl, ) else: - response = requests.post(self.url, headers=headers, auth=self.auth) + response = requests.post( + self.url, + headers=headers, + auth=self.auth, + verify=self.verify_ssl, + ) if not response.ok: print(response.text) @@ -62,7 +86,9 @@ class ElasticWrap: """put data to es""" if refresh: self.url = f"{self.url}/?refresh=true" - response = requests.put(f"{self.url}", json=data, auth=self.auth) + response = requests.put( + f"{self.url}", json=data, auth=self.auth, verify=self.verify_ssl + ) if not response.ok: print(response.text) print(data) @@ -75,9 +101,13 @@ class ElasticWrap: if refresh: self.url = f"{self.url}/?refresh=true" if data: - response = requests.delete(self.url, json=data, auth=self.auth) + response = requests.delete( + self.url, json=data, auth=self.auth, verify=self.verify_ssl + ) else: - response = requests.delete(self.url, auth=self.auth) + response = requests.delete( + self.url, auth=self.auth, verify=self.verify_ssl + ) if not response.ok: print(response.text) diff --git a/tubearchivist/home/src/es/snapshot.py b/tubearchivist/home/src/es/snapshot.py index 6d6563c..15fc82c 100644 --- a/tubearchivist/home/src/es/snapshot.py +++ b/tubearchivist/home/src/es/snapshot.py @@ -19,7 +19,9 @@ class ElasticSnapshot: REPO_SETTINGS = { "compress": "true", "chunk_size": "1g", - "location": "/usr/share/elasticsearch/data/snapshot", + "location": environ.get( + "ES_SNAPSHOT_DIR", "/usr/share/elasticsearch/data/snapshot" + ), } POLICY = "ta_daily" From f423ddc53a1e0ac15b7c8212a1dd79d86a55dd64 Mon Sep 17 00:00:00 2001 From: Igor Rzegocki Date: Fri, 22 Sep 2023 10:01:47 +0200 Subject: [PATCH 13/25] add healthcheck endpoint (#553) --- tubearchivist/config/settings.py | 1 + tubearchivist/home/src/ta/health.py | 11 +++++++++++ 2 files changed, 12 insertions(+) create mode 100644 tubearchivist/home/src/ta/health.py diff --git a/tubearchivist/config/settings.py b/tubearchivist/config/settings.py index e58eeea..5629453 100644 --- a/tubearchivist/config/settings.py +++ b/tubearchivist/config/settings.py @@ -64,6 +64,7 @@ MIDDLEWARE = [ "django.contrib.auth.middleware.AuthenticationMiddleware", "django.contrib.messages.middleware.MessageMiddleware", "django.middleware.clickjacking.XFrameOptionsMiddleware", + "home.src.ta.health.HealthCheckMiddleware", ] ROOT_URLCONF = "config.urls" diff --git a/tubearchivist/home/src/ta/health.py b/tubearchivist/home/src/ta/health.py new file mode 100644 index 0000000..001a021 --- /dev/null +++ b/tubearchivist/home/src/ta/health.py @@ -0,0 +1,11 @@ +from django.http import HttpResponse + + +class HealthCheckMiddleware: + def __init__(self, get_response): + self.get_response = get_response + + def __call__(self, request): + if request.path == "/health": + return HttpResponse("ok") + return self.get_response(request) From 892e81c185020e24402ffdb1ec21b524e0e8f7ec Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 22 Sep 2023 20:35:14 +0700 Subject: [PATCH 14/25] refactor ElasticWrap dynamic kwargs --- tubearchivist/home/src/es/connect.py | 132 ++++++++++++++++----------- 1 file changed, 79 insertions(+), 53 deletions(-) diff --git a/tubearchivist/home/src/es/connect.py b/tubearchivist/home/src/es/connect.py index 7d3b79a..afbdc61 100644 --- a/tubearchivist/home/src/es/connect.py +++ b/tubearchivist/home/src/es/connect.py @@ -7,6 +7,7 @@ functionality: import json import os +from typing import Any import requests import urllib3 @@ -20,75 +21,93 @@ class ElasticWrap: ES_URL: str = str(os.environ.get("ES_URL")) ES_PASS: str = str(os.environ.get("ELASTIC_PASSWORD")) ES_USER: str = str(os.environ.get("ELASTIC_USER") or "elastic") - ES_VERIFY_SSL: str = str(os.environ.get("ES_VERIFY_SSL") or "true") + ES_DISABLE_VERIFY_SSL: bool = bool(os.environ.get("ES_DISABLE_VERIFY_SSL")) - def __init__(self, path): - self.url = f"{self.ES_URL}/{path}" - self.auth = (self.ES_USER, self.ES_PASS) - self.verify_ssl = self.ES_VERIFY_SSL != "false" + def __init__(self, path: str): + self.url: str = f"{self.ES_URL}/{path}" + self.auth: tuple[str, str] = (self.ES_USER, self.ES_PASS) - if not self.verify_ssl: + if not self.ES_DISABLE_VERIFY_SSL: urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - def get(self, data=False, timeout=10, print_error=True): + def get( + self, + data: bool | dict = False, + timeout: int = 10, + print_error: bool = True, + ) -> tuple[dict, int]: """get data from es""" + + kwargs: dict[str, Any] = { + "auth": self.auth, + "timeout": timeout, + } + + if self.ES_DISABLE_VERIFY_SSL: + kwargs["verify"] = False + if data: - response = requests.get( - self.url, - json=data, - auth=self.auth, - timeout=timeout, - verify=self.verify_ssl, - ) - else: - response = requests.get( - self.url, - auth=self.auth, - timeout=timeout, - verify=self.verify_ssl, - ) + kwargs["json"] = data + + response = requests.get(self.url, **kwargs) + if print_error and not response.ok: print(response.text) return response.json(), response.status_code - def post(self, data=False, ndjson=False): + def post( + self, data: bool | dict = False, ndjson: bool = False + ) -> tuple[dict, int]: """post data to es""" - if ndjson: - headers = {"Content-type": "application/x-ndjson"} - payload = data - else: - headers = {"Content-type": "application/json"} - payload = json.dumps(data) - if data: - response = requests.post( - self.url, - data=payload, - headers=headers, - auth=self.auth, - verify=self.verify_ssl, + kwargs: dict[str, Any] = {"auth": self.auth} + + if ndjson and data: + kwargs.update( + { + "headers": {"Content-type": "application/x-ndjson"}, + "data": data, + } ) - else: - response = requests.post( - self.url, - headers=headers, - auth=self.auth, - verify=self.verify_ssl, + elif data: + kwargs.update( + { + "headers": {"Content-type": "application/json"}, + "data": json.dumps(data), + } ) + if self.ES_DISABLE_VERIFY_SSL: + kwargs["verify"] = False + + response = requests.post(self.url, **kwargs) + if not response.ok: print(response.text) return response.json(), response.status_code - def put(self, data, refresh=False): + def put( + self, + data: bool | dict = False, + refresh: bool = False, + ) -> tuple[dict, Any]: """put data to es""" + if refresh: self.url = f"{self.url}/?refresh=true" - response = requests.put( - f"{self.url}", json=data, auth=self.auth, verify=self.verify_ssl - ) + + kwargs: dict[str, Any] = { + "json": data, + "auth": self.auth, + } + + if self.ES_DISABLE_VERIFY_SSL: + kwargs["verify"] = False + + response = requests.put(self.url, **kwargs) + if not response.ok: print(response.text) print(data) @@ -96,18 +115,25 @@ class ElasticWrap: return response.json(), response.status_code - def delete(self, data=False, refresh=False): + def delete( + self, + data: bool | dict = False, + refresh: bool = False, + ) -> tuple[dict, Any]: """delete document from es""" + if refresh: self.url = f"{self.url}/?refresh=true" + + kwargs: dict[str, Any] = {"auth": self.auth} + if data: - response = requests.delete( - self.url, json=data, auth=self.auth, verify=self.verify_ssl - ) - else: - response = requests.delete( - self.url, auth=self.auth, verify=self.verify_ssl - ) + kwargs["json"] = data + + if self.ES_DISABLE_VERIFY_SSL: + kwargs["verify"] = False + + response = requests.delete(self.url, **kwargs) if not response.ok: print(response.text) From dcbd8d2a55570ba6fc46474eed243160407009e0 Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 22 Sep 2023 20:42:26 +0700 Subject: [PATCH 15/25] update ES_DISABLE_VERIFY_SSL readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 954df49..4756258 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ Take a look at the example [docker-compose.yml](https://github.com/tubearchivist | TA_PORT | Overwrite Nginx port | Optional | | TA_UWSGI_PORT | Overwrite container internal uwsgi port | Optional | | ES_URL | URL That ElasticSearch runs on | Optional | -| ES_VERIFY_SSL | Verify ElasticSearch SSL certificate, everything other than `false` defaults to `true` | Optional | +| ES_DISABLE_VERIFY_SSL | Disable ElasticSearch SSL certificate verification | Optional | | ES_SNAPSHOT_DIR | Custom path where elastic search stores snapshots for master/data nodes | Optional | | HOST_GID | Allow TA to own the video files instead of container user | Optional | | HOST_UID | Allow TA to own the video files instead of container user | Optional | From bae11fe1f1311c44402c0a478baf9329a6c9edaa Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 22 Sep 2023 21:27:38 +0700 Subject: [PATCH 16/25] fix appconfig init --- tubearchivist/home/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubearchivist/home/views.py b/tubearchivist/home/views.py index 8521ebe..cca537d 100644 --- a/tubearchivist/home/views.py +++ b/tubearchivist/home/views.py @@ -835,7 +835,7 @@ class VideoView(MinView): def get(self, request, video_id): """get single video""" - config_handler = AppConfig(request.user.id) + config_handler = AppConfig() response, _ = ElasticWrap(f"ta_video/_doc/{video_id}").get() video_data = SearchProcess(response).process() From 7afeb41469e0aacb435465032b3f6dfca2ad833f Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 22 Sep 2023 21:51:27 +0700 Subject: [PATCH 17/25] use SearchProcess for single_lookup --- tubearchivist/home/views.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tubearchivist/home/views.py b/tubearchivist/home/views.py index cca537d..50aebc7 100644 --- a/tubearchivist/home/views.py +++ b/tubearchivist/home/views.py @@ -12,7 +12,7 @@ from api.src.search_processor import SearchProcess from django.conf import settings from django.contrib.auth import login from django.contrib.auth.forms import AuthenticationForm -from django.http import JsonResponse +from django.http import Http404, JsonResponse from django.shortcuts import redirect, render from django.views import View from home.src.download.queue import PendingInteract @@ -181,8 +181,12 @@ class ArchivistResultsView(ArchivistViewConfig): def single_lookup(self, es_path): """retrieve a single item from url""" - search = SearchHandler(es_path) - result = search.get_data()[0]["source"] + response, status_code = ElasticWrap(es_path).get() + if not status_code == 200: + raise Http404 + + result = SearchProcess(response).process() + return result def initiate_vars(self, request): From 1cdb9e1ad54be786d66b50f10776f441d16a004f Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 22 Sep 2023 23:54:31 +0700 Subject: [PATCH 18/25] refactor find_results use ElasticWrap directly --- tubearchivist/home/src/frontend/searching.py | 94 ------------------- .../home/templates/home/channel.html | 24 ++--- .../home/templates/home/channel_id.html | 22 ++--- .../templates/home/channel_id_playlist.html | 16 ++-- .../home/templates/home/downloads.html | 34 +++---- tubearchivist/home/templates/home/home.html | 24 ++--- .../home/templates/home/playlist.html | 16 ++-- .../home/templates/home/playlist_id.html | 22 ++--- tubearchivist/home/views.py | 20 ++-- 9 files changed, 91 insertions(+), 181 deletions(-) diff --git a/tubearchivist/home/src/frontend/searching.py b/tubearchivist/home/src/frontend/searching.py index 068027f..5bcc01d 100644 --- a/tubearchivist/home/src/frontend/searching.py +++ b/tubearchivist/home/src/frontend/searching.py @@ -6,103 +6,9 @@ Functionality: - calculate pagination values """ -import urllib.parse -from datetime import datetime from api.src.search_processor import SearchProcess -from home.src.download.thumbnails import ThumbManager from home.src.es.connect import ElasticWrap -from home.src.ta.helper import get_duration_str - - -class SearchHandler: - """search elastic search""" - - def __init__(self, path, data=False): - self.max_hits = None - self.aggs = None - self.path = path - self.data = data - - def get_data(self): - """get the data""" - response, _ = ElasticWrap(self.path).get(self.data) - - if "hits" in response.keys(): - self.max_hits = response["hits"]["total"]["value"] - return_value = response["hits"]["hits"] - else: - # simulate list for single result to reuse rest of class - return_value = [response] - - if not return_value: - return False - - for idx, hit in enumerate(return_value): - return_value[idx] = self.hit_cleanup(hit) - - if response.get("aggregations"): - self.aggs = response["aggregations"] - if "total_duration" in self.aggs: - duration_sec = int(self.aggs["total_duration"]["value"]) - self.aggs["total_duration"].update( - {"value_str": get_duration_str(duration_sec)} - ) - - return return_value - - @staticmethod - def hit_cleanup(hit): - """clean up and parse data from a single hit""" - hit["source"] = hit.pop("_source") - hit_keys = hit["source"].keys() - if "media_url" in hit_keys: - parsed_url = urllib.parse.quote(hit["source"]["media_url"]) - hit["source"]["media_url"] = parsed_url - - if "published" in hit_keys: - published = hit["source"]["published"] - date_pub = datetime.strptime(published, "%Y-%m-%d") - date_str = datetime.strftime(date_pub, "%d %b, %Y") - hit["source"]["published"] = date_str - - if "vid_last_refresh" in hit_keys: - vid_last_refresh = hit["source"]["vid_last_refresh"] - date_refresh = datetime.fromtimestamp(vid_last_refresh) - date_str = datetime.strftime(date_refresh, "%d %b, %Y") - hit["source"]["vid_last_refresh"] = date_str - - if "playlist_last_refresh" in hit_keys: - playlist_last_refresh = hit["source"]["playlist_last_refresh"] - date_refresh = datetime.fromtimestamp(playlist_last_refresh) - date_str = datetime.strftime(date_refresh, "%d %b, %Y") - hit["source"]["playlist_last_refresh"] = date_str - - if "vid_thumb_url" in hit_keys: - youtube_id = hit["source"]["youtube_id"] - thumb_path = ThumbManager(youtube_id).vid_thumb_path() - hit["source"]["vid_thumb_url"] = f"/cache/{thumb_path}" - - if "channel_last_refresh" in hit_keys: - refreshed = hit["source"]["channel_last_refresh"] - date_refresh = datetime.fromtimestamp(refreshed) - date_str = datetime.strftime(date_refresh, "%d %b, %Y") - hit["source"]["channel_last_refresh"] = date_str - - if "channel" in hit_keys: - channel_keys = hit["source"]["channel"].keys() - if "channel_last_refresh" in channel_keys: - refreshed = hit["source"]["channel"]["channel_last_refresh"] - date_refresh = datetime.fromtimestamp(refreshed) - date_str = datetime.strftime(date_refresh, "%d %b, %Y") - hit["source"]["channel"]["channel_last_refresh"] = date_str - - if "subtitle_fragment_id" in hit_keys: - youtube_id = hit["source"]["youtube_id"] - thumb_path = ThumbManager(youtube_id).vid_thumb_path() - hit["source"]["vid_thumb_url"] = f"/cache/{thumb_path}" - - return hit class SearchForm: diff --git a/tubearchivist/home/templates/home/channel.html b/tubearchivist/home/templates/home/channel.html index 7a7debf..0091345 100644 --- a/tubearchivist/home/templates/home/channel.html +++ b/tubearchivist/home/templates/home/channel.html @@ -42,33 +42,33 @@ {% for channel in results %}
-

{{ channel.source.channel_name }}

- {% if channel.source.channel_subs >= 1000000 %} -

Subscribers: {{ channel.source.channel_subs|intword }}

+

{{ channel.channel_name }}

+ {% if channel.channel_subs >= 1000000 %} +

Subscribers: {{ channel.channel_subs|intword }}

{% else %} -

Subscribers: {{ channel.source.channel_subs|intcomma }}

+

Subscribers: {{ channel.channel_subs|intcomma }}

{% endif %}
-

Last refreshed: {{ channel.source.channel_last_refresh }}

- {% if channel.source.channel_subscribed %} - +

Last refreshed: {{ channel.channel_last_refresh }}

+ {% if channel.channel_subscribed %} + {% else %} - + {% endif %}
diff --git a/tubearchivist/home/templates/home/channel_id.html b/tubearchivist/home/templates/home/channel_id.html index 421392c..838615b 100644 --- a/tubearchivist/home/templates/home/channel_id.html +++ b/tubearchivist/home/templates/home/channel_id.html @@ -106,14 +106,14 @@ {% if results %} {% for video in results %}
- +
- video-thumb - {% if video.source.player.progress %} -
+ video-thumb + {% if video.player.progress %} +
{% else %} -
+
{% endif %}
@@ -122,16 +122,16 @@
-
- {% if video.source.player.watched %} - seen-icon +
+ {% if video.player.watched %} + seen-icon {% else %} - unseen-icon + unseen-icon {% endif %} - {{ video.source.published }} | {{ video.source.player.duration_str }} + {{ video.published }} | {{ video.player.duration_str }}
diff --git a/tubearchivist/home/templates/home/channel_id_playlist.html b/tubearchivist/home/templates/home/channel_id_playlist.html index 86d635d..2a199d0 100644 --- a/tubearchivist/home/templates/home/channel_id_playlist.html +++ b/tubearchivist/home/templates/home/channel_id_playlist.html @@ -45,18 +45,18 @@ {% for playlist in results %}
-

{{ playlist.source.playlist_channel }}

-

{{ playlist.source.playlist_name }}

-

Last refreshed: {{ playlist.source.playlist_last_refresh }}

- {% if playlist.source.playlist_subscribed %} - +

{{ playlist.playlist_channel }}

+

{{ playlist.playlist_name }}

+

Last refreshed: {{ playlist.playlist_last_refresh }}

+ {% if playlist.playlist_subscribed %} + {% else %} - + {% endif %}
diff --git a/tubearchivist/home/templates/home/downloads.html b/tubearchivist/home/templates/home/downloads.html index 2084358..16e1f15 100644 --- a/tubearchivist/home/templates/home/downloads.html +++ b/tubearchivist/home/templates/home/downloads.html @@ -70,18 +70,18 @@
{% if results %} {% for video in results %} -
+
- video_thumb + video_thumb
{% if show_ignored_only %} ignored {% else %} queued {% endif %} - {{ video.source.vid_type }} - {% if video.source.auto_start %} + {{ video.vid_type }} + {% if video.auto_start %} auto {% endif %}
@@ -89,27 +89,27 @@
- {% if video.source.channel_indexed %} - {{ video.source.channel_name }} + {% if video.channel_indexed %} + {{ video.channel_name }} {% else %} - {{ video.source.channel_name }} + {{ video.channel_name }} {% endif %} -

{{ video.source.title }}

+

{{ video.title }}

-

Published: {{ video.source.published }} | Duration: {{ video.source.duration }} | {{ video.source.youtube_id }}

- {% if video.source.message %} -

{{ video.source.message }}

+

Published: {{ video.published }} | Duration: {{ video.duration }} | {{ video.youtube_id }}

+ {% if video.message %} +

{{ video.message }}

{% endif %}
{% if show_ignored_only %} - - + + {% else %} - - + + {% endif %} - {% if video.source.message %} - + {% if video.message %} + {% endif %}
diff --git a/tubearchivist/home/templates/home/home.html b/tubearchivist/home/templates/home/home.html index ade51aa..59a3478 100644 --- a/tubearchivist/home/templates/home/home.html +++ b/tubearchivist/home/templates/home/home.html @@ -95,14 +95,14 @@ {% if results %} {% for video in results %}
- +
- video-thumb - {% if video.source.player.progress %} -
+ video-thumb + {% if video.player.progress %} +
{% else %} -
+
{% endif %}
@@ -111,17 +111,17 @@
-
- {% if video.source.player.watched %} - seen-icon +
+ {% if video.player.watched %} + seen-icon {% else %} - unseen-icon + unseen-icon {% endif %} - {{ video.source.published }} | {{ video.source.player.duration_str }} + {{ video.published }} | {{ video.player.duration_str }}
diff --git a/tubearchivist/home/templates/home/playlist.html b/tubearchivist/home/templates/home/playlist.html index c82af74..c6e0203 100644 --- a/tubearchivist/home/templates/home/playlist.html +++ b/tubearchivist/home/templates/home/playlist.html @@ -40,18 +40,18 @@ {% for playlist in results %}
-

{{ playlist.source.playlist_channel }}

-

{{ playlist.source.playlist_name }}

-

Last refreshed: {{ playlist.source.playlist_last_refresh }}

- {% if playlist.source.playlist_subscribed %} - +

{{ playlist.playlist_channel }}

+

{{ playlist.playlist_name }}

+

Last refreshed: {{ playlist.playlist_last_refresh }}

+ {% if playlist.playlist_subscribed %} + {% else %} - + {% endif %}
diff --git a/tubearchivist/home/templates/home/playlist_id.html b/tubearchivist/home/templates/home/playlist_id.html index 525e99a..3faf449 100644 --- a/tubearchivist/home/templates/home/playlist_id.html +++ b/tubearchivist/home/templates/home/playlist_id.html @@ -110,14 +110,14 @@ {% if results %} {% for video in results %}
- +
- video-thumb - {% if video.source.player.progress %} -
+ video-thumb + {% if video.player.progress %} +
{% else %} -
+
{% endif %}
@@ -126,16 +126,16 @@
-
- {% if video.source.player.watched %} - seen-icon +
+ {% if video.player.watched %} + seen-icon {% else %} - unseen-icon + unseen-icon {% endif %} - {{ video.source.published }} | {{ video.source.player.duration_str }} + {{ video.published }} | {{ video.player.duration_str }}
diff --git a/tubearchivist/home/views.py b/tubearchivist/home/views.py index 50aebc7..0b6c6a6 100644 --- a/tubearchivist/home/views.py +++ b/tubearchivist/home/views.py @@ -32,7 +32,6 @@ from home.src.frontend.forms import ( SubscribeToPlaylistForm, UserSettingsForm, ) -from home.src.frontend.searching import SearchHandler from home.src.index.channel import channel_overwrites from home.src.index.generic import Pagination from home.src.index.playlist import YoutubePlaylist @@ -200,12 +199,17 @@ class ArchivistResultsView(ArchivistViewConfig): def find_results(self): """add results and pagination to context""" - search = SearchHandler(self.es_search, data=self.data) - self.context["results"] = search.get_data() - self.pagination_handler.validate(search.max_hits) - self.context["max_hits"] = search.max_hits - self.context["pagination"] = self.pagination_handler.pagination - self.context["aggs"] = search.aggs + response, _ = ElasticWrap(self.es_search).get(self.data) + max_hits = response["hits"]["total"]["value"] + self.pagination_handler.validate(max_hits) + self.context.update( + { + "results": [i["_source"] for i in response["hits"]["hits"]], + "max_hits": max_hits, + "pagination": self.pagination_handler.pagination, + "aggs": response.get("aggregations"), + } + ) class MinView(View): @@ -499,7 +503,7 @@ class ChannelIdView(ChannelIdBaseView): self.channel_pages(channel_id) if self.context["results"]: - channel_info = self.context["results"][0]["source"]["channel"] + channel_info = self.context["results"][0]["channel"] channel_name = channel_info["channel_name"] else: # fall back channel lookup if no videos found From 6abec9401bfaf4ee4b773cb1e41aa943d5f9a59e Mon Sep 17 00:00:00 2001 From: Igor Rzegocki Date: Sat, 23 Sep 2023 15:00:46 +0200 Subject: [PATCH 19/25] Mute urlib3 ssl warnings when SSL verification is disabled --- tubearchivist/home/src/es/connect.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubearchivist/home/src/es/connect.py b/tubearchivist/home/src/es/connect.py index afbdc61..43e2f6e 100644 --- a/tubearchivist/home/src/es/connect.py +++ b/tubearchivist/home/src/es/connect.py @@ -27,7 +27,7 @@ class ElasticWrap: self.url: str = f"{self.ES_URL}/{path}" self.auth: tuple[str, str] = (self.ES_USER, self.ES_PASS) - if not self.ES_DISABLE_VERIFY_SSL: + if self.ES_DISABLE_VERIFY_SSL: urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) def get( From 7801ed0d6037771cd17268ab89972410a663fe46 Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 25 Sep 2023 10:14:14 +0700 Subject: [PATCH 20/25] fix find_results through SearchProcess --- tubearchivist/home/views.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tubearchivist/home/views.py b/tubearchivist/home/views.py index 0b6c6a6..f5c771d 100644 --- a/tubearchivist/home/views.py +++ b/tubearchivist/home/views.py @@ -200,11 +200,12 @@ class ArchivistResultsView(ArchivistViewConfig): def find_results(self): """add results and pagination to context""" response, _ = ElasticWrap(self.es_search).get(self.data) + results = SearchProcess(response).process() max_hits = response["hits"]["total"]["value"] self.pagination_handler.validate(max_hits) self.context.update( { - "results": [i["_source"] for i in response["hits"]["hits"]], + "results": results, "max_hits": max_hits, "pagination": self.pagination_handler.pagination, "aggs": response.get("aggregations"), From 9d09d27fbadeba3f0c0b30a3571e588ab623f7b3 Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 25 Sep 2023 14:13:26 +0700 Subject: [PATCH 21/25] bump requirements --- tubearchivist/requirements.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tubearchivist/requirements.txt b/tubearchivist/requirements.txt index 50f5f15..08e4e86 100644 --- a/tubearchivist/requirements.txt +++ b/tubearchivist/requirements.txt @@ -1,13 +1,13 @@ -apprise==1.4.5 -celery==5.3.1 -Django==4.2.4 +apprise==1.5.0 +celery==5.3.4 +Django==4.2.5 django-auth-ldap==4.5.0 django-cors-headers==4.2.0 djangorestframework==3.14.0 -Pillow==10.0.0 +Pillow==10.0.1 redis==5.0.0 requests==2.31.0 ryd-client==0.0.6 uWSGI==2.0.22 whitenoise==6.5.0 -yt_dlp==2023.7.6 +yt_dlp==2023.9.24 From 77fef5de57b213cf57b6f09ed7a774bbbb0dc9cc Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 25 Sep 2023 14:53:12 +0700 Subject: [PATCH 22/25] fix standard duration str agg --- tubearchivist/api/src/search_processor.py | 14 +++++++++++++- tubearchivist/home/views.py | 3 ++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/tubearchivist/api/src/search_processor.py b/tubearchivist/api/src/search_processor.py index 7c9d28e..232474d 100644 --- a/tubearchivist/api/src/search_processor.py +++ b/tubearchivist/api/src/search_processor.py @@ -8,7 +8,7 @@ import urllib.parse from home.src.download.thumbnails import ThumbManager from home.src.ta.config import AppConfig -from home.src.ta.helper import date_praser +from home.src.ta.helper import date_praser, get_duration_str class SearchProcess: @@ -163,3 +163,15 @@ class SearchProcess: subtitle_dict.update({"vid_thumb_url": f"/cache/{thumb_path}"}) return subtitle_dict + + +def process_aggs(response): + """convert aggs duration to str""" + + if response.get("aggregations"): + aggs = response["aggregations"] + if "total_duration" in aggs: + duration_sec = int(aggs["total_duration"]["value"]) + aggs["total_duration"].update( + {"value_str": get_duration_str(duration_sec)} + ) diff --git a/tubearchivist/home/views.py b/tubearchivist/home/views.py index f5c771d..c9227c4 100644 --- a/tubearchivist/home/views.py +++ b/tubearchivist/home/views.py @@ -8,7 +8,7 @@ import json import urllib.parse from time import sleep -from api.src.search_processor import SearchProcess +from api.src.search_processor import SearchProcess, process_aggs from django.conf import settings from django.contrib.auth import login from django.contrib.auth.forms import AuthenticationForm @@ -200,6 +200,7 @@ class ArchivistResultsView(ArchivistViewConfig): def find_results(self): """add results and pagination to context""" response, _ = ElasticWrap(self.es_search).get(self.data) + process_aggs(response) results = SearchProcess(response).process() max_hits = response["hits"]["total"]["value"] self.pagination_handler.validate(max_hits) From 515b7240471518bfab91749bcff9726c6ae88baf Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 25 Sep 2023 15:59:31 +0700 Subject: [PATCH 23/25] split json backup files, #406 --- tubearchivist/home/src/download/thumbnails.py | 6 ++-- tubearchivist/home/src/es/backup.py | 28 ++++++++++++------- tubearchivist/home/src/es/connect.py | 4 ++- 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/tubearchivist/home/src/download/thumbnails.py b/tubearchivist/home/src/download/thumbnails.py index 7041ec2..ca498c5 100644 --- a/tubearchivist/home/src/download/thumbnails.py +++ b/tubearchivist/home/src/download/thumbnails.py @@ -246,9 +246,10 @@ class ThumbManager(ThumbManagerBase): class ValidatorCallback: """handle callback validate thumbnails page by page""" - def __init__(self, source, index_name): + def __init__(self, source, index_name, counter=0): self.source = source self.index_name = index_name + self.counter = counter def run(self): """run the task for page""" @@ -384,9 +385,10 @@ class EmbedCallback: MEDIA_DIR = CONFIG["application"]["videos"] FORMAT = MP4Cover.FORMAT_JPEG - def __init__(self, source, index_name): + def __init__(self, source, index_name, counter=0): self.source = source self.index_name = index_name + self.counter = counter def run(self): """run embed""" diff --git a/tubearchivist/home/src/es/backup.py b/tubearchivist/home/src/es/backup.py index df1a481..3dc1cf5 100644 --- a/tubearchivist/home/src/es/backup.py +++ b/tubearchivist/home/src/es/backup.py @@ -18,6 +18,8 @@ from home.src.ta.helper import get_mapping, ignore_filelist class ElasticBackup: """dump index to nd-json files for later bulk import""" + INDEX_SPLIT = ["comment"] + def __init__(self, reason=False, task=False): self.config = AppConfig().config self.cache_dir = self.config["application"]["cache_dir"] @@ -51,14 +53,18 @@ class ElasticBackup: def backup_index(self, index_name): """export all documents of a single index""" - paginate = IndexPaginate( - f"ta_{index_name}", - data={"query": {"match_all": {}}}, - keep_source=True, - callback=BackupCallback, - task=self.task, - total=self._get_total(index_name), - ) + paginate_kwargs = { + "data": {"query": {"match_all": {}}}, + "keep_source": True, + "callback": BackupCallback, + "task": self.task, + "total": self._get_total(index_name), + } + + if index_name in self.INDEX_SPLIT: + paginate_kwargs.update({"size": 200}) + + paginate = IndexPaginate(f"ta_{index_name}", **paginate_kwargs) _ = paginate.get_results() @staticmethod @@ -206,9 +212,10 @@ class ElasticBackup: class BackupCallback: """handle backup ndjson writer as callback for IndexPaginate""" - def __init__(self, source, index_name): + def __init__(self, source, index_name, counter=0): self.source = source self.index_name = index_name + self.counter = counter self.timestamp = datetime.now().strftime("%Y%m%d") def run(self): @@ -237,7 +244,8 @@ class BackupCallback: def _write_es_json(self, file_content): """write nd-json file for es _bulk API to disk""" cache_dir = AppConfig().config["application"]["cache_dir"] - file_name = f"es_{self.index_name.lstrip('ta_')}-{self.timestamp}.json" + index = self.index_name.lstrip("ta_") + file_name = f"es_{index}-{self.timestamp}-{self.counter}.json" file_path = os.path.join(cache_dir, "backup", file_name) with open(file_path, "a+", encoding="utf-8") as f: f.write(file_content) diff --git a/tubearchivist/home/src/es/connect.py b/tubearchivist/home/src/es/connect.py index 43e2f6e..a7c3ff5 100644 --- a/tubearchivist/home/src/es/connect.py +++ b/tubearchivist/home/src/es/connect.py @@ -204,7 +204,9 @@ class IndexPaginate: all_results.append(hit["_source"]) if self.kwargs.get("callback"): - self.kwargs.get("callback")(all_hits, self.index_name).run() + self.kwargs.get("callback")( + all_hits, self.index_name, counter=counter + ).run() if self.kwargs.get("task"): print(f"{self.index_name}: processing page {counter}") From 6a83756fb49fd0d23c9ecf19bf9f01260ba2232f Mon Sep 17 00:00:00 2001 From: Igor Rzegocki Date: Fri, 15 Sep 2023 20:32:15 +0200 Subject: [PATCH 24/25] support for auth forwarding proxy --- README.md | 3 +++ tubearchivist/config/settings.py | 14 +++++++++++++- tubearchivist/home/src/ta/auth.py | 10 ++++++++++ tubearchivist/home/urls.py | 22 +++++++++++++++++----- 4 files changed, 43 insertions(+), 6 deletions(-) create mode 100644 tubearchivist/home/src/ta/auth.py diff --git a/README.md b/README.md index 4756258..49445a2 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,9 @@ Take a look at the example [docker-compose.yml](https://github.com/tubearchivist | TZ | Set your timezone for the scheduler | Required | | TA_PORT | Overwrite Nginx port | Optional | | TA_UWSGI_PORT | Overwrite container internal uwsgi port | Optional | +| TA_ENABLE_AUTH_PROXY | Enables support for forwarding auth in reverse proxies | [Read more](https://docs.tubearchivist.com/configuration/forward-auth/) | +| TA_AUTH_PROXY_USERNAME_HEADER | Header containing username to log in | Optional | +| TA_AUTH_PROXY_LOGOUT_URL | Logout URL for forwarded auth | Opttional | | ES_URL | URL That ElasticSearch runs on | Optional | | ES_DISABLE_VERIFY_SSL | Disable ElasticSearch SSL certificate verification | Optional | | ES_SNAPSHOT_DIR | Custom path where elastic search stores snapshots for master/data nodes | Optional | diff --git a/tubearchivist/config/settings.py b/tubearchivist/config/settings.py index 5629453..0e69cab 100644 --- a/tubearchivist/config/settings.py +++ b/tubearchivist/config/settings.py @@ -175,7 +175,6 @@ if bool(environ.get("TA_LDAP")): ldap.OPT_X_TLS_REQUIRE_CERT: ldap.OPT_X_TLS_NEVER, } - global AUTHENTICATION_BACKENDS AUTHENTICATION_BACKENDS = ("django_auth_ldap.backend.LDAPBackend",) # Database @@ -211,6 +210,19 @@ AUTH_PASSWORD_VALIDATORS = [ AUTH_USER_MODEL = "home.Account" +# Forward-auth authentication +if bool(environ.get("TA_ENABLE_AUTH_PROXY")): + TA_AUTH_PROXY_USERNAME_HEADER = ( + environ.get("TA_AUTH_PROXY_USERNAME_HEADER") or "HTTP_REMOTE_USER" + ) + TA_AUTH_PROXY_LOGOUT_URL = environ.get("TA_AUTH_PROXY_LOGOUT_URL") + + MIDDLEWARE.append("home.src.ta.auth.HttpRemoteUserMiddleware") + + AUTHENTICATION_BACKENDS = ( + "django.contrib.auth.backends.RemoteUserBackend", + ) + # Internationalization # https://docs.djangoproject.com/en/3.2/topics/i18n/ diff --git a/tubearchivist/home/src/ta/auth.py b/tubearchivist/home/src/ta/auth.py new file mode 100644 index 0000000..0567db1 --- /dev/null +++ b/tubearchivist/home/src/ta/auth.py @@ -0,0 +1,10 @@ +from django.conf import settings +from django.contrib.auth.middleware import PersistentRemoteUserMiddleware + + +class HttpRemoteUserMiddleware(PersistentRemoteUserMiddleware): + """This class allows authentication via HTTP_REMOTE_USER which is set for + example by certain SSO applications. + """ + + header = settings.TA_AUTH_PROXY_USERNAME_HEADER diff --git a/tubearchivist/home/urls.py b/tubearchivist/home/urls.py index 614c29b..2c2388c 100644 --- a/tubearchivist/home/urls.py +++ b/tubearchivist/home/urls.py @@ -3,18 +3,30 @@ from django.conf import settings from django.contrib.auth.decorators import login_required from django.contrib.auth.views import LogoutView +from django.shortcuts import redirect from django.urls import path from home import views -urlpatterns = [ - path("", login_required(views.HomeView.as_view()), name="home"), - path("login/", views.LoginView.as_view(), name="login"), - path( +if hasattr(settings, "TA_AUTH_PROXY_LOGOUT_URL"): + logout_path = path( + "logout/", + lambda request: redirect( + settings.TA_AUTH_PROXY_LOGOUT_URL, permanent=False + ), + name="logout", + ) +else: + logout_path = path( "logout/", LogoutView.as_view(), {"next_page": settings.LOGOUT_REDIRECT_URL}, name="logout", - ), + ) + +urlpatterns = [ + path("", login_required(views.HomeView.as_view()), name="home"), + path("login/", views.LoginView.as_view(), name="login"), + logout_path, path("about/", views.AboutView.as_view(), name="about"), path( "downloads/", From 88f5c58b8e7ec8cf46e998fbd0b3beabfebb7f5b Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 26 Sep 2023 09:23:32 +0700 Subject: [PATCH 25/25] add unstable tag --- tubearchivist/config/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubearchivist/config/settings.py b/tubearchivist/config/settings.py index 0e69cab..0afef8c 100644 --- a/tubearchivist/config/settings.py +++ b/tubearchivist/config/settings.py @@ -269,4 +269,4 @@ CORS_ALLOW_HEADERS = list(default_headers) + [ # TA application settings TA_UPSTREAM = "https://github.com/tubearchivist/tubearchivist" -TA_VERSION = "v0.4.1" +TA_VERSION = "v0.4.2-unstable"