diff --git a/.github/ISSUE_TEMPLATE/INSTALLATION-HELP.yml b/.github/ISSUE_TEMPLATE/INSTALLATION-HELP.yml index 38e4fa2..2102c6a 100644 --- a/.github/ISSUE_TEMPLATE/INSTALLATION-HELP.yml +++ b/.github/ISSUE_TEMPLATE/INSTALLATION-HELP.yml @@ -38,6 +38,6 @@ body: attributes: label: Relevant log output description: Please copy and paste any relevant Docker logs. This will be automatically formatted into code, so no need for backticks. - render: shell + render: Shell validations: required: true diff --git a/tubearchivist/api/src/aggs.py b/tubearchivist/api/src/aggs.py index 8921d86..e2c65db 100644 --- a/tubearchivist/api/src/aggs.py +++ b/tubearchivist/api/src/aggs.py @@ -24,62 +24,159 @@ class AggBase: raise NotImplementedError -class Primary(AggBase): - """primary aggregation for total documents indexed""" +class Video(AggBase): + """get video stats""" - name = "primary" - path = "ta_video,ta_channel,ta_playlist,ta_subtitle,ta_download/_search" + name = "video_stats" + path = "ta_video/_search" data = { "size": 0, "aggs": { "video_type": { - "filter": {"exists": {"field": "active"}}, - "aggs": {"filtered": {"terms": {"field": "vid_type"}}}, + "terms": {"field": "vid_type"}, + "aggs": { + "media_size": {"sum": {"field": "media_size"}}, + "duration": {"sum": {"field": "player.duration"}}, + }, }, - "channel_total": {"value_count": {"field": "channel_active"}}, - "channel_sub": {"terms": {"field": "channel_subscribed"}}, - "playlist_total": {"value_count": {"field": "playlist_active"}}, - "playlist_sub": {"terms": {"field": "playlist_subscribed"}}, - "download": {"terms": {"field": "status"}}, + "video_active": { + "terms": {"field": "active"}, + "aggs": { + "media_size": {"sum": {"field": "media_size"}}, + "duration": {"sum": {"field": "player.duration"}}, + }, + }, + "video_media_size": {"sum": {"field": "media_size"}}, + "video_count": {"value_count": {"field": "youtube_id"}}, + "duration": {"sum": {"field": "player.duration"}}, }, } def process(self): - """make the call""" + """process aggregation""" aggregations = self.get() - videos = {"total": aggregations["video_type"].get("doc_count")} - videos.update( - { - i.get("key"): i.get("doc_count") - for i in aggregations["video_type"]["filtered"]["buckets"] - } - ) - channels = {"total": aggregations["channel_total"].get("value")} - channels.update( - { - "sub_" + i.get("key_as_string"): i.get("doc_count") - for i in aggregations["channel_sub"]["buckets"] - } - ) - playlists = {"total": aggregations["playlist_total"].get("value")} - playlists.update( - { - "sub_" + i.get("key_as_string"): i.get("doc_count") - for i in aggregations["playlist_sub"]["buckets"] - } - ) - downloads = { - i.get("key"): i.get("doc_count") - for i in aggregations["download"]["buckets"] + duration = int(aggregations["duration"]["value"]) + response = { + "doc_count": aggregations["video_count"]["value"], + "media_size": int(aggregations["video_media_size"]["value"]), + "duration": duration, + "duration_str": get_duration_str(duration), } + for bucket in aggregations["video_type"]["buckets"]: + duration = int(bucket["duration"].get("value")) + response.update( + { + f"type_{bucket['key']}": { + "doc_count": bucket.get("doc_count"), + "media_size": int(bucket["media_size"].get("value")), + "duration": duration, + "duration_str": get_duration_str(duration), + } + } + ) + + for bucket in aggregations["video_active"]["buckets"]: + duration = int(bucket["duration"].get("value")) + response.update( + { + f"active_{bucket['key_as_string']}": { + "doc_count": bucket.get("doc_count"), + "media_size": int(bucket["media_size"].get("value")), + "duration": duration, + "duration_str": get_duration_str(duration), + } + } + ) + + return response + + +class Channel(AggBase): + """get channel stats""" + + name = "channel_stats" + path = "ta_channel/_search" + data = { + "size": 0, + "aggs": { + "channel_count": {"value_count": {"field": "channel_id"}}, + "channel_active": {"terms": {"field": "channel_active"}}, + "channel_subscribed": {"terms": {"field": "channel_subscribed"}}, + }, + } + + def process(self): + """process aggregation""" + aggregations = self.get() response = { - "videos": videos, - "channels": channels, - "playlists": playlists, - "downloads": downloads, + "doc_count": aggregations["channel_count"].get("value"), } + for bucket in aggregations["channel_active"]["buckets"]: + key = f"active_{bucket['key_as_string']}" + response.update({key: bucket.get("doc_count")}) + for bucket in aggregations["channel_subscribed"]["buckets"]: + key = f"subscribed_{bucket['key_as_string']}" + response.update({key: bucket.get("doc_count")}) + + return response + + +class Playlist(AggBase): + """get playlist stats""" + + name = "playlist_stats" + path = "ta_playlist/_search" + data = { + "size": 0, + "aggs": { + "playlist_count": {"value_count": {"field": "playlist_id"}}, + "playlist_active": {"terms": {"field": "playlist_active"}}, + "playlist_subscribed": {"terms": {"field": "playlist_subscribed"}}, + }, + } + + def process(self): + """process aggregation""" + aggregations = self.get() + response = {"doc_count": aggregations["playlist_count"].get("value")} + for bucket in aggregations["playlist_active"]["buckets"]: + key = f"active_{bucket['key_as_string']}" + response.update({key: bucket.get("doc_count")}) + for bucket in aggregations["playlist_subscribed"]["buckets"]: + key = f"subscribed_{bucket['key_as_string']}" + response.update({key: bucket.get("doc_count")}) + + return response + + +class Download(AggBase): + """get downloads queue stats""" + + name = "download_queue_stats" + path = "ta_download/_search" + data = { + "size": 0, + "aggs": { + "status": {"terms": {"field": "status"}}, + "video_type": { + "filter": {"term": {"status": "pending"}}, + "aggs": {"type_pending": {"terms": {"field": "vid_type"}}}, + }, + }, + } + + def process(self): + """process aggregation""" + aggregations = self.get() + response = {} + for bucket in aggregations["status"]["buckets"]: + response.update({bucket["key"]: bucket.get("doc_count")}) + + for bucket in aggregations["video_type"]["type_pending"]["buckets"]: + key = f"pending_{bucket['key']}" + response.update({key: bucket.get("doc_count")}) return response diff --git a/tubearchivist/api/urls.py b/tubearchivist/api/urls.py index 8b075f2..b03547c 100644 --- a/tubearchivist/api/urls.py +++ b/tubearchivist/api/urls.py @@ -152,9 +152,24 @@ urlpatterns = [ name="api-notification", ), path( - "stats/primary/", - views.StatPrimaryView.as_view(), - name="api-stats-primary", + "stats/video/", + views.StatVideoView.as_view(), + name="api-stats-video", + ), + path( + "stats/channel/", + views.StatChannelView.as_view(), + name="api-stats-channel", + ), + path( + "stats/playlist/", + views.StatPlaylistView.as_view(), + name="api-stats-playlist", + ), + path( + "stats/download/", + views.StatDownloadView.as_view(), + name="api-stats-download", ), path( "stats/watch/", diff --git a/tubearchivist/api/views.py b/tubearchivist/api/views.py index 0e64d10..e026dba 100644 --- a/tubearchivist/api/views.py +++ b/tubearchivist/api/views.py @@ -1,6 +1,14 @@ """all API views""" -from api.src.aggs import BiggestChannel, DownloadHist, Primary, WatchProgress +from api.src.aggs import ( + BiggestChannel, + Channel, + Download, + DownloadHist, + Playlist, + Video, + WatchProgress, +) from api.src.search_processor import SearchProcess from home.src.download.queue import PendingInteract from home.src.download.subscriptions import ( @@ -1141,16 +1149,52 @@ class NotificationView(ApiBaseView): return Response(RedisArchivist().list_items(query)) -class StatPrimaryView(ApiBaseView): - """resolves to /api/stats/primary/ - GET: return document count +class StatVideoView(ApiBaseView): + """resolves to /api/stats/video/ + GET: return video stats """ def get(self, request): """get stats""" # pylint: disable=unused-argument - return Response(Primary().process()) + return Response(Video().process()) + + +class StatChannelView(ApiBaseView): + """resolves to /api/stats/channel/ + GET: return channel stats + """ + + def get(self, request): + """get stats""" + # pylint: disable=unused-argument + + return Response(Channel().process()) + + +class StatPlaylistView(ApiBaseView): + """resolves to /api/stats/playlist/ + GET: return playlist stats + """ + + def get(self, request): + """get stats""" + # pylint: disable=unused-argument + + return Response(Playlist().process()) + + +class StatDownloadView(ApiBaseView): + """resolves to /api/stats/download/ + GET: return download stats + """ + + def get(self, request): + """get stats""" + # pylint: disable=unused-argument + + return Response(Download().process()) class StatWatchProgress(ApiBaseView): diff --git a/tubearchivist/home/src/es/index_mapping.json b/tubearchivist/home/src/es/index_mapping.json index d6dda4b..52ee884 100644 --- a/tubearchivist/home/src/es/index_mapping.json +++ b/tubearchivist/home/src/es/index_mapping.json @@ -74,7 +74,7 @@ "type": "boolean" }, "integrate_sponsorblock": { - "type" : "boolean" + "type": "boolean" } } } @@ -168,7 +168,7 @@ "type": "boolean" }, "integrate_sponsorblock": { - "type" : "boolean" + "type": "boolean" } } } @@ -236,19 +236,37 @@ "comment_count": { "type": "long" }, - "stats" : { - "properties" : { - "average_rating" : { - "type" : "float" + "stats": { + "properties": { + "average_rating": { + "type": "float" }, - "dislike_count" : { - "type" : "long" + "dislike_count": { + "type": "long" }, - "like_count" : { - "type" : "long" + "like_count": { + "type": "long" }, - "view_count" : { - "type" : "long" + "view_count": { + "type": "long" + } + } + }, + "player": { + "properties": { + "duration": { + "type": "long" + }, + "duration_str": { + "type": "keyword", + "index": false + }, + "watched": { + "type": "boolean" + }, + "watched_date": { + "type": "date", + "format": "epoch_second" } } }, @@ -314,28 +332,28 @@ "is_enabled": { "type": "boolean" }, - "segments" : { - "properties" : { - "UUID" : { + "segments": { + "properties": { + "UUID": { "type": "keyword" }, - "actionType" : { + "actionType": { "type": "keyword" }, - "category" : { + "category": { "type": "keyword" }, - "locked" : { - "type" : "short" + "locked": { + "type": "short" }, - "segment" : { - "type" : "float" + "segment": { + "type": "float" }, - "videoDuration" : { - "type" : "float" + "videoDuration": { + "type": "float" }, - "votes" : { - "type" : "long" + "votes": { + "type": "long" } } } @@ -516,7 +534,7 @@ "format": "epoch_second" }, "subtitle_index": { - "type" : "long" + "type": "long" }, "subtitle_lang": { "type": "keyword" @@ -525,7 +543,7 @@ "type": "keyword" }, "subtitle_line": { - "type" : "text", + "type": "text", "analyzer": "english" } }, @@ -560,14 +578,14 @@ "type": "keyword" }, "comment_text": { - "type" : "text" + "type": "text" }, "comment_timestamp": { "type": "date", "format": "epoch_second" }, "comment_time_text": { - "type" : "text" + "type": "text" }, "comment_likecount": { "type": "long" @@ -613,4 +631,4 @@ } } ] -} +} \ No newline at end of file diff --git a/tubearchivist/home/src/index/comments.py b/tubearchivist/home/src/index/comments.py index 5f25602..50e0f17 100644 --- a/tubearchivist/home/src/index/comments.py +++ b/tubearchivist/home/src/index/comments.py @@ -77,7 +77,7 @@ class Comments: def get_yt_comments(self): """get comments from youtube""" yt_obs = self.build_yt_obs() - info_json = YtWrap(yt_obs).extract(self.youtube_id) + info_json = YtWrap(yt_obs, config=self.config).extract(self.youtube_id) if not info_json: return False, False diff --git a/tubearchivist/home/src/index/reindex.py b/tubearchivist/home/src/index/reindex.py index e87b30a..53b62d1 100644 --- a/tubearchivist/home/src/index/reindex.py +++ b/tubearchivist/home/src/index/reindex.py @@ -105,11 +105,13 @@ class ReindexPopulate(ReindexBase): """get total hits from index""" index_name = reindex_config["index_name"] active_key = reindex_config["active_key"] - path = f"{index_name}/_search?filter_path=hits.total" - data = {"query": {"match": {active_key: True}}} - response, _ = ElasticWrap(path).post(data=data) - total_hits = response["hits"]["total"]["value"] - return total_hits + data = { + "query": {"term": {active_key: {"value": True}}}, + "_source": False, + } + total = IndexPaginate(index_name, data, keep_source=True).get_results() + + return len(total) def _get_daily_should(self, total_hits): """calc how many should reindex daily""" @@ -123,7 +125,7 @@ class ReindexPopulate(ReindexBase): """get outdated from index_name""" index_name = reindex_config["index_name"] refresh_key = reindex_config["refresh_key"] - now_lte = self.now - self.interval * 24 * 60 * 60 + now_lte = str(self.now - self.interval * 24 * 60 * 60) must_list = [ {"match": {reindex_config["active_key"]: True}}, {"range": {refresh_key: {"lte": now_lte}}}, diff --git a/tubearchivist/home/src/index/video.py b/tubearchivist/home/src/index/video.py index b414350..d06ce5e 100644 --- a/tubearchivist/home/src/index/video.py +++ b/tubearchivist/home/src/index/video.py @@ -188,11 +188,11 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle): # build json_data basics self.json_data = { "title": self.youtube_meta["title"], - "description": self.youtube_meta["description"], - "category": self.youtube_meta["categories"], + "description": self.youtube_meta.get("description", ""), + "category": self.youtube_meta.get("categories", []), "vid_thumb_url": self.youtube_meta["thumbnail"], "vid_thumb_base64": base64_blur, - "tags": self.youtube_meta["tags"], + "tags": self.youtube_meta.get("tags", []), "published": published, "vid_last_refresh": last_refresh, "date_downloaded": last_refresh, @@ -210,20 +210,13 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle): def _add_stats(self): """add stats dicst to json_data""" - # likes - like_count = self.youtube_meta.get("like_count", 0) - dislike_count = self.youtube_meta.get("dislike_count", 0) - average_rating = self.youtube_meta.get("average_rating", 0) - self.json_data.update( - { - "stats": { - "view_count": self.youtube_meta["view_count"], - "like_count": like_count, - "dislike_count": dislike_count, - "average_rating": average_rating, - } - } - ) + stats = { + "view_count": self.youtube_meta.get("view_count", 0), + "like_count": self.youtube_meta.get("like_count", 0), + "dislike_count": self.youtube_meta.get("dislike_count", 0), + "average_rating": self.youtube_meta.get("average_rating", 0), + } + self.json_data.update({"stats": stats}) def build_dl_cache_path(self): """find video path in dl cache""" diff --git a/tubearchivist/home/src/ta/users.py b/tubearchivist/home/src/ta/users.py index bb6a387..7181b3f 100644 --- a/tubearchivist/home/src/ta/users.py +++ b/tubearchivist/home/src/ta/users.py @@ -50,7 +50,14 @@ class UserConfig: VALID_STYLESHEETS = get_stylesheets() VALID_VIEW_STYLE = ["grid", "list"] VALID_SORT_ORDER = ["asc", "desc"] - VALID_SORT_BY = ["published", "downloaded", "views", "likes"] + VALID_SORT_BY = [ + "published", + "downloaded", + "views", + "likes", + "duration", + "filesize", + ] VALID_GRID_ITEMS = range(3, 8) def __init__(self, user_id: str): diff --git a/tubearchivist/home/templates/home/channel_id.html b/tubearchivist/home/templates/home/channel_id.html index c51fbd2..6c8b653 100644 --- a/tubearchivist/home/templates/home/channel_id.html +++ b/tubearchivist/home/templates/home/channel_id.html @@ -82,6 +82,8 @@ + +