mirror of
https://github.com/tubearchivist/tubearchivist-frontend.git
synced 2024-11-22 03:40:14 +00:00
Style channel configure form, #build
Changed: - layout of the channel configure form - hide if no overwrites set - validate min es version 7.17.x - improved build to incl debug tools
This commit is contained in:
commit
fa60f2e5f8
@ -25,7 +25,7 @@ This is my setup I have landed on, YMMV:
|
|||||||
```bash
|
```bash
|
||||||
./deploy.sh test
|
./deploy.sh test
|
||||||
```
|
```
|
||||||
- The command above will also copy the file `tubarchivist/testing.sh` into the working folder of the container. Running this script will install additional debugging tools I regularly use in testing.
|
- The command above will call the docker build command with `--build-arg INSTALL_DEBUG=1` to install additional useful debug tools.
|
||||||
- The `test` argument takes another optional argument to build for a specific architecture valid options are: `amd64`, `arm64` and `multi`, default is `amd64`.
|
- The `test` argument takes another optional argument to build for a specific architecture valid options are: `amd64`, `arm64` and `multi`, default is `amd64`.
|
||||||
- This `deploy.sh` file is not meant to be universally usable for every possible environment but could serve as an idea on how to automatically rebuild containers to test changes - customize to your liking.
|
- This `deploy.sh` file is not meant to be universally usable for every possible environment but could serve as an idea on how to automatically rebuild containers to test changes - customize to your liking.
|
||||||
|
|
||||||
|
10
Dockerfile
10
Dockerfile
@ -1,7 +1,8 @@
|
|||||||
# build the tube archivist image from default python slim image
|
# build the tube archivist image from default python slim image
|
||||||
|
|
||||||
FROM python:3.10.2-slim-bullseye
|
FROM python:3.10.3-slim-bullseye
|
||||||
ARG TARGETPLATFORM
|
ARG TARGETPLATFORM
|
||||||
|
ARG INSTALL_DEBUG
|
||||||
|
|
||||||
ENV PYTHONUNBUFFERED 1
|
ENV PYTHONUNBUFFERED 1
|
||||||
|
|
||||||
@ -26,6 +27,13 @@ RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \
|
|||||||
apt-get -y update && apt-get -y install --no-install-recommends ffmpeg && rm -rf /var/lib/apt/lists/* \
|
apt-get -y update && apt-get -y install --no-install-recommends ffmpeg && rm -rf /var/lib/apt/lists/* \
|
||||||
; fi
|
; fi
|
||||||
|
|
||||||
|
# install debug tools for testing environment
|
||||||
|
RUN if [ "$INSTALL_DEBUG" ] ; then \
|
||||||
|
apt-get -y update && apt-get -y install --no-install-recommends \
|
||||||
|
vim htop bmon net-tools iputils-ping procps \
|
||||||
|
&& pip install --no-cache-dir ipython --src /usr/local/src \
|
||||||
|
; fi
|
||||||
|
|
||||||
# make folders
|
# make folders
|
||||||
RUN mkdir /cache
|
RUN mkdir /cache
|
||||||
RUN mkdir /youtube
|
RUN mkdir /youtube
|
||||||
|
12
deploy.sh
12
deploy.sh
@ -43,7 +43,10 @@ function sync_test {
|
|||||||
# pass argument to build for specific platform
|
# pass argument to build for specific platform
|
||||||
|
|
||||||
host="tubearchivist.local"
|
host="tubearchivist.local"
|
||||||
|
# make base folder
|
||||||
|
ssh "$host" "mkdir -p docker"
|
||||||
|
|
||||||
|
# copy project files to build image
|
||||||
rsync -a --progress --delete-after \
|
rsync -a --progress --delete-after \
|
||||||
--exclude ".git" \
|
--exclude ".git" \
|
||||||
--exclude ".gitignore" \
|
--exclude ".gitignore" \
|
||||||
@ -52,8 +55,8 @@ function sync_test {
|
|||||||
--exclude "db.sqlite3" \
|
--exclude "db.sqlite3" \
|
||||||
. -e ssh "$host":tubearchivist
|
. -e ssh "$host":tubearchivist
|
||||||
|
|
||||||
# uncomment or copy your own docker-compose file
|
# copy default docker-compose file if not exist
|
||||||
# rsync -r --progress --delete docker-compose.yml -e ssh "$host":docker
|
rsync --progress --ignore-existing docker-compose.yml -e ssh "$host":docker
|
||||||
|
|
||||||
if [[ $1 = "amd64" ]]; then
|
if [[ $1 = "amd64" ]]; then
|
||||||
platform="linux/amd64"
|
platform="linux/amd64"
|
||||||
@ -65,12 +68,9 @@ function sync_test {
|
|||||||
platform="linux/amd64"
|
platform="linux/amd64"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
ssh "$host" "docker buildx build --platform $platform -t bbilly1/tubearchivist:latest tubearchivist --load"
|
ssh "$host" "docker buildx build --build-arg INSTALL_DEBUG=1 --platform $platform -t bbilly1/tubearchivist:latest tubearchivist --load"
|
||||||
ssh "$host" 'docker-compose -f docker/docker-compose.yml up -d'
|
ssh "$host" 'docker-compose -f docker/docker-compose.yml up -d'
|
||||||
|
|
||||||
ssh "$host" 'docker cp tubearchivist/tubearchivist/testing.sh tubearchivist:/app/testing.sh'
|
|
||||||
ssh "$host" 'docker exec tubearchivist chmod +x /app/testing.sh'
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
"""all API views"""
|
"""all API views"""
|
||||||
|
|
||||||
import requests
|
|
||||||
from home.src.download.thumbnails import ThumbManager
|
from home.src.download.thumbnails import ThumbManager
|
||||||
|
from home.src.es.connect import ElasticWrap
|
||||||
from home.src.ta.config import AppConfig
|
from home.src.ta.config import AppConfig
|
||||||
from home.src.ta.helper import UrlListParser
|
from home.src.ta.helper import UrlListParser
|
||||||
from home.src.ta.ta_redis import RedisArchivist
|
from home.src.ta.ta_redis import RedisArchivist
|
||||||
@ -24,31 +24,21 @@ class ApiBaseView(APIView):
|
|||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.response = {"data": False}
|
self.response = {"data": False, "config": AppConfig().config}
|
||||||
self.status_code = False
|
self.status_code = False
|
||||||
self.context = False
|
self.context = False
|
||||||
self.default_conf = AppConfig().config
|
|
||||||
|
|
||||||
def config_builder(self):
|
|
||||||
"""build confic context"""
|
|
||||||
self.context = {
|
|
||||||
"es_url": self.default_conf["application"]["es_url"],
|
|
||||||
"es_auth": self.default_conf["application"]["es_auth"],
|
|
||||||
}
|
|
||||||
self.response["config"] = self.default_conf
|
|
||||||
|
|
||||||
def get_document(self, document_id):
|
def get_document(self, document_id):
|
||||||
"""get single document from es"""
|
"""get single document from es"""
|
||||||
es_url = self.context["es_url"]
|
path = f"{self.search_base}{document_id}"
|
||||||
url = f"{es_url}{self.search_base}{document_id}"
|
print(path)
|
||||||
print(url)
|
response, status_code = ElasticWrap(path).get()
|
||||||
response = requests.get(url, auth=self.context["es_auth"])
|
|
||||||
try:
|
try:
|
||||||
self.response["data"] = response.json()["_source"]
|
self.response["data"] = response["_source"]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
print(f"item not found: {document_id}")
|
print(f"item not found: {document_id}")
|
||||||
self.response["data"] = False
|
self.response["data"] = False
|
||||||
self.status_code = response.status_code
|
self.status_code = status_code
|
||||||
|
|
||||||
def process_keys(self):
|
def process_keys(self):
|
||||||
"""process keys for frontend"""
|
"""process keys for frontend"""
|
||||||
@ -59,7 +49,7 @@ class ApiBaseView(APIView):
|
|||||||
if "vid_thumb_url" in all_keys:
|
if "vid_thumb_url" in all_keys:
|
||||||
youtube_id = self.response["data"]["youtube_id"]
|
youtube_id = self.response["data"]["youtube_id"]
|
||||||
vid_thumb_url = ThumbManager().vid_thumb_path(youtube_id)
|
vid_thumb_url = ThumbManager().vid_thumb_path(youtube_id)
|
||||||
cache_dir = self.default_conf["application"]["cache_dir"]
|
cache_dir = self.response["config"]["application"]["cache_dir"]
|
||||||
new_thumb = f"{cache_dir}/{vid_thumb_url}"
|
new_thumb = f"{cache_dir}/{vid_thumb_url}"
|
||||||
self.response["data"]["vid_thumb_url"] = new_thumb
|
self.response["data"]["vid_thumb_url"] = new_thumb
|
||||||
if "subtitles" in all_keys:
|
if "subtitles" in all_keys:
|
||||||
@ -75,13 +65,11 @@ class ApiBaseView(APIView):
|
|||||||
|
|
||||||
def get_document_list(self, data):
|
def get_document_list(self, data):
|
||||||
"""get a list of results"""
|
"""get a list of results"""
|
||||||
es_url = self.context["es_url"]
|
print(self.search_base)
|
||||||
url = f"{es_url}{self.search_base}"
|
response, status_code = ElasticWrap(self.search_base).get(data=data)
|
||||||
print(url)
|
all_hits = response["hits"]["hits"]
|
||||||
response = requests.get(url, json=data, auth=self.context["es_auth"])
|
|
||||||
all_hits = response.json()["hits"]["hits"]
|
|
||||||
self.response["data"] = [i["_source"] for i in all_hits]
|
self.response["data"] = [i["_source"] for i in all_hits]
|
||||||
self.status_code = response.status_code
|
self.status_code = status_code
|
||||||
|
|
||||||
|
|
||||||
class VideoApiView(ApiBaseView):
|
class VideoApiView(ApiBaseView):
|
||||||
@ -89,12 +77,11 @@ class VideoApiView(ApiBaseView):
|
|||||||
GET: returns metadata dict of video
|
GET: returns metadata dict of video
|
||||||
"""
|
"""
|
||||||
|
|
||||||
search_base = "/ta_video/_doc/"
|
search_base = "ta_video/_doc/"
|
||||||
|
|
||||||
def get(self, request, video_id):
|
def get(self, request, video_id):
|
||||||
# pylint: disable=unused-argument
|
# pylint: disable=unused-argument
|
||||||
"""get request"""
|
"""get request"""
|
||||||
self.config_builder()
|
|
||||||
self.get_document(video_id)
|
self.get_document(video_id)
|
||||||
self.process_keys()
|
self.process_keys()
|
||||||
return Response(self.response, status=self.status_code)
|
return Response(self.response, status=self.status_code)
|
||||||
@ -143,12 +130,11 @@ class ChannelApiView(ApiBaseView):
|
|||||||
GET: returns metadata dict of channel
|
GET: returns metadata dict of channel
|
||||||
"""
|
"""
|
||||||
|
|
||||||
search_base = "/ta_channel/_doc/"
|
search_base = "ta_channel/_doc/"
|
||||||
|
|
||||||
def get(self, request, channel_id):
|
def get(self, request, channel_id):
|
||||||
# pylint: disable=unused-argument
|
# pylint: disable=unused-argument
|
||||||
"""get request"""
|
"""get request"""
|
||||||
self.config_builder()
|
|
||||||
self.get_document(channel_id)
|
self.get_document(channel_id)
|
||||||
return Response(self.response, status=self.status_code)
|
return Response(self.response, status=self.status_code)
|
||||||
|
|
||||||
@ -159,13 +145,12 @@ class ChannelApiListView(ApiBaseView):
|
|||||||
POST: edit a list of channels
|
POST: edit a list of channels
|
||||||
"""
|
"""
|
||||||
|
|
||||||
search_base = "/ta_channel/_search/"
|
search_base = "ta_channel/_search/"
|
||||||
|
|
||||||
def get(self, request):
|
def get(self, request):
|
||||||
# pylint: disable=unused-argument
|
# pylint: disable=unused-argument
|
||||||
"""get request"""
|
"""get request"""
|
||||||
data = {"query": {"match_all": {}}}
|
data = {"query": {"match_all": {}}}
|
||||||
self.config_builder()
|
|
||||||
self.get_document_list(data)
|
self.get_document_list(data)
|
||||||
self.get_paginate()
|
self.get_paginate()
|
||||||
|
|
||||||
@ -194,12 +179,11 @@ class PlaylistApiView(ApiBaseView):
|
|||||||
GET: returns metadata dict of playlist
|
GET: returns metadata dict of playlist
|
||||||
"""
|
"""
|
||||||
|
|
||||||
search_base = "/ta_playlist/_doc/"
|
search_base = "ta_playlist/_doc/"
|
||||||
|
|
||||||
def get(self, request, playlist_id):
|
def get(self, request, playlist_id):
|
||||||
# pylint: disable=unused-argument
|
# pylint: disable=unused-argument
|
||||||
"""get request"""
|
"""get request"""
|
||||||
self.config_builder()
|
|
||||||
self.get_document(playlist_id)
|
self.get_document(playlist_id)
|
||||||
return Response(self.response, status=self.status_code)
|
return Response(self.response, status=self.status_code)
|
||||||
|
|
||||||
@ -209,12 +193,11 @@ class DownloadApiView(ApiBaseView):
|
|||||||
GET: returns metadata dict of an item in the download queue
|
GET: returns metadata dict of an item in the download queue
|
||||||
"""
|
"""
|
||||||
|
|
||||||
search_base = "/ta_download/_doc/"
|
search_base = "ta_download/_doc/"
|
||||||
|
|
||||||
def get(self, request, video_id):
|
def get(self, request, video_id):
|
||||||
# pylint: disable=unused-argument
|
# pylint: disable=unused-argument
|
||||||
"""get request"""
|
"""get request"""
|
||||||
self.config_builder()
|
|
||||||
self.get_document(video_id)
|
self.get_document(video_id)
|
||||||
return Response(self.response, status=self.status_code)
|
return Response(self.response, status=self.status_code)
|
||||||
|
|
||||||
@ -225,13 +208,12 @@ class DownloadApiListView(ApiBaseView):
|
|||||||
POST: add a list of videos to download queue
|
POST: add a list of videos to download queue
|
||||||
"""
|
"""
|
||||||
|
|
||||||
search_base = "/ta_download/_search/"
|
search_base = "ta_download/_search/"
|
||||||
|
|
||||||
def get(self, request):
|
def get(self, request):
|
||||||
# pylint: disable=unused-argument
|
# pylint: disable=unused-argument
|
||||||
"""get request"""
|
"""get request"""
|
||||||
data = {"query": {"match_all": {}}}
|
data = {"query": {"match_all": {}}}
|
||||||
self.config_builder()
|
|
||||||
self.get_document_list(data)
|
self.get_document_list(data)
|
||||||
self.get_paginate()
|
self.get_paginate()
|
||||||
return Response(self.response)
|
return Response(self.response)
|
||||||
|
@ -1,8 +1,10 @@
|
|||||||
"""handle custom startup functions"""
|
"""handle custom startup functions"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
from django.apps import AppConfig
|
from django.apps import AppConfig
|
||||||
|
from home.src.es.connect import ElasticWrap
|
||||||
from home.src.es.index_setup import index_check
|
from home.src.es.index_setup import index_check
|
||||||
from home.src.ta.config import AppConfig as ArchivistConfig
|
from home.src.ta.config import AppConfig as ArchivistConfig
|
||||||
from home.src.ta.ta_redis import RedisArchivist
|
from home.src.ta.ta_redis import RedisArchivist
|
||||||
@ -11,6 +13,9 @@ from home.src.ta.ta_redis import RedisArchivist
|
|||||||
class StartupCheck:
|
class StartupCheck:
|
||||||
"""checks to run at application startup"""
|
"""checks to run at application startup"""
|
||||||
|
|
||||||
|
MIN_MAJOR, MAX_MAJOR = 7, 7
|
||||||
|
MIN_MINOR = 17
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.config_handler = ArchivistConfig()
|
self.config_handler = ArchivistConfig()
|
||||||
self.redis_con = RedisArchivist()
|
self.redis_con = RedisArchivist()
|
||||||
@ -19,6 +24,7 @@ class StartupCheck:
|
|||||||
def run(self):
|
def run(self):
|
||||||
"""run all startup checks"""
|
"""run all startup checks"""
|
||||||
print("run startup checks")
|
print("run startup checks")
|
||||||
|
self.es_version_check()
|
||||||
self.release_lock()
|
self.release_lock()
|
||||||
index_check()
|
index_check()
|
||||||
self.sync_redis_state()
|
self.sync_redis_state()
|
||||||
@ -72,6 +78,33 @@ class StartupCheck:
|
|||||||
if response:
|
if response:
|
||||||
print("deleted leftover key from redis: " + lock)
|
print("deleted leftover key from redis: " + lock)
|
||||||
|
|
||||||
|
def is_invalid(self, version):
|
||||||
|
"""return true if es version is invalid, false if ok"""
|
||||||
|
major, minor = [int(i) for i in version.split(".")[:2]]
|
||||||
|
if not self.MIN_MAJOR <= major <= self.MAX_MAJOR:
|
||||||
|
return True
|
||||||
|
|
||||||
|
if minor >= self.MIN_MINOR:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def es_version_check(self):
|
||||||
|
"""check for minimal elasticsearch version"""
|
||||||
|
response, _ = ElasticWrap("/").get()
|
||||||
|
version = response["version"]["number"]
|
||||||
|
invalid = self.is_invalid(version)
|
||||||
|
|
||||||
|
if invalid:
|
||||||
|
print(
|
||||||
|
"minial required elasticsearch version: "
|
||||||
|
+ f"{self.MIN_MAJOR}.{self.MIN_MINOR}, "
|
||||||
|
+ "please update to recommended version."
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
print("elasticsearch version check passed")
|
||||||
|
|
||||||
|
|
||||||
class HomeConfig(AppConfig):
|
class HomeConfig(AppConfig):
|
||||||
"""call startup funcs"""
|
"""call startup funcs"""
|
||||||
|
@ -11,11 +11,10 @@ import shutil
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from time import sleep
|
from time import sleep
|
||||||
|
|
||||||
import requests
|
|
||||||
import yt_dlp
|
import yt_dlp
|
||||||
from home.src.download.queue import PendingList
|
from home.src.download.queue import PendingList
|
||||||
from home.src.download.subscriptions import PlaylistSubscription
|
from home.src.download.subscriptions import PlaylistSubscription
|
||||||
from home.src.es.connect import IndexPaginate
|
from home.src.es.connect import ElasticWrap, IndexPaginate
|
||||||
from home.src.index.channel import YoutubeChannel
|
from home.src.index.channel import YoutubeChannel
|
||||||
from home.src.index.playlist import YoutubePlaylist
|
from home.src.index.playlist import YoutubePlaylist
|
||||||
from home.src.index.video import YoutubeVideo, index_new_video
|
from home.src.index.video import YoutubeVideo, index_new_video
|
||||||
@ -162,7 +161,7 @@ class VideoDownloader:
|
|||||||
pending.get_channels()
|
pending.get_channels()
|
||||||
self.video_overwrites = pending.video_overwrites
|
self.video_overwrites = pending.video_overwrites
|
||||||
|
|
||||||
queue = RedisQueue("dl_queue")
|
queue = RedisQueue()
|
||||||
|
|
||||||
limit_queue = self.config["downloads"]["limit_count"]
|
limit_queue = self.config["downloads"]["limit_count"]
|
||||||
if limit_queue:
|
if limit_queue:
|
||||||
@ -212,8 +211,7 @@ class VideoDownloader:
|
|||||||
RedisArchivist().set_message("message:download", mess_dict)
|
RedisArchivist().set_message("message:download", mess_dict)
|
||||||
return
|
return
|
||||||
|
|
||||||
queue = RedisQueue("dl_queue")
|
RedisQueue().add_list(to_add)
|
||||||
queue.add_list(to_add)
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _progress_hook(response):
|
def _progress_hook(response):
|
||||||
@ -371,14 +369,11 @@ class VideoDownloader:
|
|||||||
if host_uid and host_gid:
|
if host_uid and host_gid:
|
||||||
os.chown(new_file_path, host_uid, host_gid)
|
os.chown(new_file_path, host_uid, host_gid)
|
||||||
|
|
||||||
def _delete_from_pending(self, youtube_id):
|
@staticmethod
|
||||||
|
def _delete_from_pending(youtube_id):
|
||||||
"""delete downloaded video from pending index if its there"""
|
"""delete downloaded video from pending index if its there"""
|
||||||
es_url = self.config["application"]["es_url"]
|
path = f"ta_download/_doc/{youtube_id}"
|
||||||
es_auth = self.config["application"]["es_auth"]
|
_, _ = ElasticWrap(path).delete()
|
||||||
url = f"{es_url}/ta_download/_doc/{youtube_id}"
|
|
||||||
response = requests.delete(url, auth=es_auth)
|
|
||||||
if not response.ok and not response.status_code == 404:
|
|
||||||
print(response.text)
|
|
||||||
|
|
||||||
def _add_subscribed_channels(self):
|
def _add_subscribed_channels(self):
|
||||||
"""add all channels subscribed to refresh"""
|
"""add all channels subscribed to refresh"""
|
||||||
|
@ -93,11 +93,12 @@ class IndexPaginate:
|
|||||||
|
|
||||||
DEFAULT_SIZE = 500
|
DEFAULT_SIZE = 500
|
||||||
|
|
||||||
def __init__(self, index_name, data, size=False):
|
def __init__(self, index_name, data, size=False, keep_source=False):
|
||||||
self.index_name = index_name
|
self.index_name = index_name
|
||||||
self.data = data
|
self.data = data
|
||||||
self.pit_id = False
|
self.pit_id = False
|
||||||
self.size = size
|
self.size = size
|
||||||
|
self.keep_source = keep_source
|
||||||
|
|
||||||
def get_results(self):
|
def get_results(self):
|
||||||
"""get all results"""
|
"""get all results"""
|
||||||
@ -132,7 +133,10 @@ class IndexPaginate:
|
|||||||
all_hits = response["hits"]["hits"]
|
all_hits = response["hits"]["hits"]
|
||||||
if all_hits:
|
if all_hits:
|
||||||
for hit in all_hits:
|
for hit in all_hits:
|
||||||
source = hit["_source"]
|
if self.keep_source:
|
||||||
|
source = hit
|
||||||
|
else:
|
||||||
|
source = hit["_source"]
|
||||||
search_after = hit["sort"]
|
search_after = hit["sort"]
|
||||||
all_results.append(source)
|
all_results.append(source)
|
||||||
# update search_after with last hit data
|
# update search_after with last hit data
|
||||||
|
@ -10,7 +10,7 @@ import os
|
|||||||
import zipfile
|
import zipfile
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
import requests
|
from home.src.es.connect import ElasticWrap, IndexPaginate
|
||||||
from home.src.ta.config import AppConfig
|
from home.src.ta.config import AppConfig
|
||||||
from home.src.ta.helper import ignore_filelist
|
from home.src.ta.helper import ignore_filelist
|
||||||
|
|
||||||
@ -20,11 +20,6 @@ class ElasticIndex:
|
|||||||
handle mapping and settings on elastic search for a given index
|
handle mapping and settings on elastic search for a given index
|
||||||
"""
|
"""
|
||||||
|
|
||||||
CONFIG = AppConfig().config
|
|
||||||
ES_URL = CONFIG["application"]["es_url"]
|
|
||||||
ES_AUTH = CONFIG["application"]["es_auth"]
|
|
||||||
HEADERS = {"Content-type": "application/json"}
|
|
||||||
|
|
||||||
def __init__(self, index_name, expected_map, expected_set):
|
def __init__(self, index_name, expected_map, expected_set):
|
||||||
self.index_name = index_name
|
self.index_name = index_name
|
||||||
self.expected_map = expected_map
|
self.expected_map = expected_map
|
||||||
@ -33,15 +28,9 @@ class ElasticIndex:
|
|||||||
|
|
||||||
def index_exists(self):
|
def index_exists(self):
|
||||||
"""check if index already exists and return mapping if it does"""
|
"""check if index already exists and return mapping if it does"""
|
||||||
index_name = self.index_name
|
response, status_code = ElasticWrap(f"ta_{self.index_name}").get()
|
||||||
url = f"{self.ES_URL}/ta_{index_name}"
|
exists = status_code == 200
|
||||||
response = requests.get(url, auth=self.ES_AUTH)
|
details = response.get(f"ta_{self.index_name}", False)
|
||||||
exists = response.ok
|
|
||||||
|
|
||||||
if exists:
|
|
||||||
details = response.json()[f"ta_{index_name}"]
|
|
||||||
else:
|
|
||||||
details = False
|
|
||||||
|
|
||||||
return exists, details
|
return exists, details
|
||||||
|
|
||||||
@ -110,63 +99,41 @@ class ElasticIndex:
|
|||||||
|
|
||||||
def rebuild_index(self):
|
def rebuild_index(self):
|
||||||
"""rebuild with new mapping"""
|
"""rebuild with new mapping"""
|
||||||
# backup
|
|
||||||
self.reindex("backup")
|
self.reindex("backup")
|
||||||
# delete original
|
|
||||||
self.delete_index(backup=False)
|
self.delete_index(backup=False)
|
||||||
# create new
|
|
||||||
self.create_blank()
|
self.create_blank()
|
||||||
self.reindex("restore")
|
self.reindex("restore")
|
||||||
# delete backup
|
|
||||||
self.delete_index()
|
self.delete_index()
|
||||||
|
|
||||||
def reindex(self, method):
|
def reindex(self, method):
|
||||||
"""create on elastic search"""
|
"""create on elastic search"""
|
||||||
index_name = self.index_name
|
|
||||||
if method == "backup":
|
if method == "backup":
|
||||||
source = f"ta_{index_name}"
|
source = f"ta_{self.index_name}"
|
||||||
destination = f"ta_{index_name}_backup"
|
destination = f"ta_{self.index_name}_backup"
|
||||||
elif method == "restore":
|
elif method == "restore":
|
||||||
source = f"ta_{index_name}_backup"
|
source = f"ta_{self.index_name}_backup"
|
||||||
destination = f"ta_{index_name}"
|
destination = f"ta_{self.index_name}"
|
||||||
|
|
||||||
query = {"source": {"index": source}, "dest": {"index": destination}}
|
data = {"source": {"index": source}, "dest": {"index": destination}}
|
||||||
data = json.dumps(query)
|
_, _ = ElasticWrap("_reindex?refresh=true").post(data=data)
|
||||||
url = self.ES_URL + "/_reindex?refresh=true"
|
|
||||||
response = requests.post(
|
|
||||||
url=url, data=data, headers=self.HEADERS, auth=self.ES_AUTH
|
|
||||||
)
|
|
||||||
if not response.ok:
|
|
||||||
print(response.text)
|
|
||||||
|
|
||||||
def delete_index(self, backup=True):
|
def delete_index(self, backup=True):
|
||||||
"""delete index passed as argument"""
|
"""delete index passed as argument"""
|
||||||
|
path = f"ta_{self.index_name}"
|
||||||
if backup:
|
if backup:
|
||||||
url = f"{self.ES_URL}/ta_{self.index_name}_backup"
|
path = path + "_backup"
|
||||||
else:
|
|
||||||
url = f"{self.ES_URL}/ta_{self.index_name}"
|
_, _ = ElasticWrap(path).delete()
|
||||||
response = requests.delete(url, auth=self.ES_AUTH)
|
|
||||||
if not response.ok:
|
|
||||||
print(response.text)
|
|
||||||
|
|
||||||
def create_blank(self):
|
def create_blank(self):
|
||||||
"""apply new mapping and settings for blank new index"""
|
"""apply new mapping and settings for blank new index"""
|
||||||
expected_map = self.expected_map
|
data = {}
|
||||||
expected_set = self.expected_set
|
if self.expected_set:
|
||||||
# stich payload
|
data.update({"settings": self.expected_set})
|
||||||
payload = {}
|
if self.expected_map:
|
||||||
if expected_set:
|
data.update({"mappings": {"properties": self.expected_map}})
|
||||||
payload.update({"settings": expected_set})
|
|
||||||
if expected_map:
|
_, _ = ElasticWrap(f"ta_{self.index_name}").put(data)
|
||||||
payload.update({"mappings": {"properties": expected_map}})
|
|
||||||
# create
|
|
||||||
url = f"{self.ES_URL}/ta_{self.index_name}"
|
|
||||||
data = json.dumps(payload)
|
|
||||||
response = requests.put(
|
|
||||||
url=url, data=data, headers=self.HEADERS, auth=self.ES_AUTH
|
|
||||||
)
|
|
||||||
if not response.ok:
|
|
||||||
print(response.text)
|
|
||||||
|
|
||||||
|
|
||||||
class ElasticBackup:
|
class ElasticBackup:
|
||||||
@ -174,52 +141,21 @@ class ElasticBackup:
|
|||||||
|
|
||||||
def __init__(self, index_config, reason):
|
def __init__(self, index_config, reason):
|
||||||
self.config = AppConfig().config
|
self.config = AppConfig().config
|
||||||
|
self.cache_dir = self.config["application"]["cache_dir"]
|
||||||
self.index_config = index_config
|
self.index_config = index_config
|
||||||
self.reason = reason
|
self.reason = reason
|
||||||
self.timestamp = datetime.now().strftime("%Y%m%d")
|
self.timestamp = datetime.now().strftime("%Y%m%d")
|
||||||
self.backup_files = []
|
self.backup_files = []
|
||||||
|
|
||||||
def get_all_documents(self, index_name):
|
@staticmethod
|
||||||
|
def get_all_documents(index_name):
|
||||||
"""export all documents of a single index"""
|
"""export all documents of a single index"""
|
||||||
headers = {"Content-type": "application/json"}
|
|
||||||
es_url = self.config["application"]["es_url"]
|
|
||||||
es_auth = self.config["application"]["es_auth"]
|
|
||||||
# get PIT ID
|
|
||||||
url = f"{es_url}/ta_{index_name}/_pit?keep_alive=1m"
|
|
||||||
response = requests.post(url, auth=es_auth)
|
|
||||||
json_data = json.loads(response.text)
|
|
||||||
pit_id = json_data["id"]
|
|
||||||
# build query
|
|
||||||
data = {
|
data = {
|
||||||
"query": {"match_all": {}},
|
"query": {"match_all": {}},
|
||||||
"size": 100,
|
"sort": [{"_doc": {"order": "desc"}}],
|
||||||
"pit": {"id": pit_id, "keep_alive": "1m"},
|
|
||||||
"sort": [{"_id": {"order": "asc"}}],
|
|
||||||
}
|
}
|
||||||
query_str = json.dumps(data)
|
paginate = IndexPaginate(f"ta_{index_name}", data, keep_source=True)
|
||||||
url = es_url + "/_search"
|
all_results = paginate.get_results()
|
||||||
# loop until nothing left
|
|
||||||
all_results = []
|
|
||||||
while True:
|
|
||||||
response = requests.get(
|
|
||||||
url, data=query_str, headers=headers, auth=es_auth
|
|
||||||
)
|
|
||||||
json_data = json.loads(response.text)
|
|
||||||
all_hits = json_data["hits"]["hits"]
|
|
||||||
if all_hits:
|
|
||||||
for hit in all_hits:
|
|
||||||
search_after = hit["sort"]
|
|
||||||
all_results.append(hit)
|
|
||||||
# update search_after with last hit data
|
|
||||||
data["search_after"] = search_after
|
|
||||||
query_str = json.dumps(data)
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
# clean up PIT
|
|
||||||
query_str = json.dumps({"id": pit_id})
|
|
||||||
requests.delete(
|
|
||||||
es_url + "/_pit", data=query_str, headers=headers, auth=es_auth
|
|
||||||
)
|
|
||||||
|
|
||||||
return all_results
|
return all_results
|
||||||
|
|
||||||
@ -244,9 +180,8 @@ class ElasticBackup:
|
|||||||
|
|
||||||
def write_es_json(self, file_content, index_name):
|
def write_es_json(self, file_content, index_name):
|
||||||
"""write nd-json file for es _bulk API to disk"""
|
"""write nd-json file for es _bulk API to disk"""
|
||||||
cache_dir = self.config["application"]["cache_dir"]
|
|
||||||
file_name = f"es_{index_name}-{self.timestamp}.json"
|
file_name = f"es_{index_name}-{self.timestamp}.json"
|
||||||
file_path = os.path.join(cache_dir, "backup", file_name)
|
file_path = os.path.join(self.cache_dir, "backup", file_name)
|
||||||
with open(file_path, "w", encoding="utf-8") as f:
|
with open(file_path, "w", encoding="utf-8") as f:
|
||||||
f.write(file_content)
|
f.write(file_content)
|
||||||
|
|
||||||
@ -254,9 +189,8 @@ class ElasticBackup:
|
|||||||
|
|
||||||
def write_ta_json(self, all_results, index_name):
|
def write_ta_json(self, all_results, index_name):
|
||||||
"""write generic json file to disk"""
|
"""write generic json file to disk"""
|
||||||
cache_dir = self.config["application"]["cache_dir"]
|
|
||||||
file_name = f"ta_{index_name}-{self.timestamp}.json"
|
file_name = f"ta_{index_name}-{self.timestamp}.json"
|
||||||
file_path = os.path.join(cache_dir, "backup", file_name)
|
file_path = os.path.join(self.cache_dir, "backup", file_name)
|
||||||
to_write = [i["_source"] for i in all_results]
|
to_write = [i["_source"] for i in all_results]
|
||||||
file_content = json.dumps(to_write)
|
file_content = json.dumps(to_write)
|
||||||
with open(file_path, "w", encoding="utf-8") as f:
|
with open(file_path, "w", encoding="utf-8") as f:
|
||||||
@ -266,9 +200,8 @@ class ElasticBackup:
|
|||||||
|
|
||||||
def zip_it(self):
|
def zip_it(self):
|
||||||
"""pack it up into single zip file"""
|
"""pack it up into single zip file"""
|
||||||
cache_dir = self.config["application"]["cache_dir"]
|
|
||||||
file_name = f"ta_backup-{self.timestamp}-{self.reason}.zip"
|
file_name = f"ta_backup-{self.timestamp}-{self.reason}.zip"
|
||||||
backup_folder = os.path.join(cache_dir, "backup")
|
backup_folder = os.path.join(self.cache_dir, "backup")
|
||||||
backup_file = os.path.join(backup_folder, file_name)
|
backup_file = os.path.join(backup_folder, file_name)
|
||||||
|
|
||||||
with zipfile.ZipFile(
|
with zipfile.ZipFile(
|
||||||
@ -283,29 +216,18 @@ class ElasticBackup:
|
|||||||
|
|
||||||
def post_bulk_restore(self, file_name):
|
def post_bulk_restore(self, file_name):
|
||||||
"""send bulk to es"""
|
"""send bulk to es"""
|
||||||
cache_dir = self.config["application"]["cache_dir"]
|
file_path = os.path.join(self.cache_dir, file_name)
|
||||||
es_url = self.config["application"]["es_url"]
|
|
||||||
es_auth = self.config["application"]["es_auth"]
|
|
||||||
headers = {"Content-type": "application/x-ndjson"}
|
|
||||||
file_path = os.path.join(cache_dir, file_name)
|
|
||||||
|
|
||||||
with open(file_path, "r", encoding="utf-8") as f:
|
with open(file_path, "r", encoding="utf-8") as f:
|
||||||
query_str = f.read()
|
data = f.read()
|
||||||
|
|
||||||
if not query_str.strip():
|
if not data.strip():
|
||||||
return
|
return
|
||||||
|
|
||||||
url = es_url + "/_bulk"
|
_, _ = ElasticWrap("_bulk").post(data=data, ndjson=True)
|
||||||
request = requests.post(
|
|
||||||
url, data=query_str, headers=headers, auth=es_auth
|
|
||||||
)
|
|
||||||
if not request.ok:
|
|
||||||
print(request.text)
|
|
||||||
|
|
||||||
def get_all_backup_files(self):
|
def get_all_backup_files(self):
|
||||||
"""build all available backup files for view"""
|
"""build all available backup files for view"""
|
||||||
cache_dir = self.config["application"]["cache_dir"]
|
backup_dir = os.path.join(self.cache_dir, "backup")
|
||||||
backup_dir = os.path.join(cache_dir, "backup")
|
|
||||||
backup_files = os.listdir(backup_dir)
|
backup_files = os.listdir(backup_dir)
|
||||||
all_backup_files = ignore_filelist(backup_files)
|
all_backup_files = ignore_filelist(backup_files)
|
||||||
all_available_backups = [
|
all_available_backups = [
|
||||||
@ -336,8 +258,7 @@ class ElasticBackup:
|
|||||||
|
|
||||||
def unpack_zip_backup(self, filename):
|
def unpack_zip_backup(self, filename):
|
||||||
"""extract backup zip and return filelist"""
|
"""extract backup zip and return filelist"""
|
||||||
cache_dir = self.config["application"]["cache_dir"]
|
backup_dir = os.path.join(self.cache_dir, "backup")
|
||||||
backup_dir = os.path.join(cache_dir, "backup")
|
|
||||||
file_path = os.path.join(backup_dir, filename)
|
file_path = os.path.join(backup_dir, filename)
|
||||||
|
|
||||||
with zipfile.ZipFile(file_path, "r") as z:
|
with zipfile.ZipFile(file_path, "r") as z:
|
||||||
@ -348,9 +269,7 @@ class ElasticBackup:
|
|||||||
|
|
||||||
def restore_json_files(self, zip_content):
|
def restore_json_files(self, zip_content):
|
||||||
"""go through the unpacked files and restore"""
|
"""go through the unpacked files and restore"""
|
||||||
|
backup_dir = os.path.join(self.cache_dir, "backup")
|
||||||
cache_dir = self.config["application"]["cache_dir"]
|
|
||||||
backup_dir = os.path.join(cache_dir, "backup")
|
|
||||||
|
|
||||||
for json_f in zip_content:
|
for json_f in zip_content:
|
||||||
|
|
||||||
@ -364,14 +283,13 @@ class ElasticBackup:
|
|||||||
self.post_bulk_restore(file_name)
|
self.post_bulk_restore(file_name)
|
||||||
os.remove(file_name)
|
os.remove(file_name)
|
||||||
|
|
||||||
def index_exists(self, index_name):
|
@staticmethod
|
||||||
|
def index_exists(index_name):
|
||||||
"""check if index already exists to skip"""
|
"""check if index already exists to skip"""
|
||||||
es_url = self.config["application"]["es_url"]
|
_, status_code = ElasticWrap(f"ta_{index_name}").get()
|
||||||
es_auth = self.config["application"]["es_auth"]
|
exists = status_code == 200
|
||||||
url = f"{es_url}/ta_{index_name}"
|
|
||||||
response = requests.get(url, auth=es_auth)
|
|
||||||
|
|
||||||
return response.ok
|
return exists
|
||||||
|
|
||||||
def rotate_backup(self):
|
def rotate_backup(self):
|
||||||
"""delete old backups if needed"""
|
"""delete old backups if needed"""
|
||||||
@ -386,8 +304,7 @@ class ElasticBackup:
|
|||||||
print("no backup files to rotate")
|
print("no backup files to rotate")
|
||||||
return
|
return
|
||||||
|
|
||||||
cache_dir = self.config["application"]["cache_dir"]
|
backup_dir = os.path.join(self.cache_dir, "backup")
|
||||||
backup_dir = os.path.join(cache_dir, "backup")
|
|
||||||
|
|
||||||
all_to_delete = auto[rotate:]
|
all_to_delete = auto[rotate:]
|
||||||
for to_delete in all_to_delete:
|
for to_delete in all_to_delete:
|
||||||
|
@ -114,7 +114,7 @@ class PostData:
|
|||||||
print(f"ignore video {video_id}")
|
print(f"ignore video {video_id}")
|
||||||
PendingInteract(video_id=video_id, status="ignore").update_status()
|
PendingInteract(video_id=video_id, status="ignore").update_status()
|
||||||
# also clear from redis queue
|
# also clear from redis queue
|
||||||
RedisQueue("dl_queue").clear_item(video_id)
|
RedisQueue().clear_item(video_id)
|
||||||
return {"success": True}
|
return {"success": True}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -132,7 +132,7 @@ class PostData:
|
|||||||
to_execute = self.exec_val
|
to_execute = self.exec_val
|
||||||
if to_execute == "stop":
|
if to_execute == "stop":
|
||||||
print("stopping download queue")
|
print("stopping download queue")
|
||||||
RedisQueue("dl_queue").clear()
|
RedisQueue().clear()
|
||||||
elif to_execute == "kill":
|
elif to_execute == "kill":
|
||||||
task_id = RedisArchivist().get_message("dl_queue_id")
|
task_id = RedisArchivist().get_message("dl_queue_id")
|
||||||
if not isinstance(task_id, str):
|
if not isinstance(task_id, str):
|
||||||
|
@ -3,22 +3,15 @@ functionality:
|
|||||||
- handle watched state for videos, channels and playlists
|
- handle watched state for videos, channels and playlists
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
import requests
|
from home.src.es.connect import ElasticWrap
|
||||||
from home.src.ta.config import AppConfig
|
|
||||||
from home.src.ta.helper import UrlListParser
|
from home.src.ta.helper import UrlListParser
|
||||||
|
|
||||||
|
|
||||||
class WatchState:
|
class WatchState:
|
||||||
"""handle watched checkbox for videos and channels"""
|
"""handle watched checkbox for videos and channels"""
|
||||||
|
|
||||||
CONFIG = AppConfig().config
|
|
||||||
ES_URL = CONFIG["application"]["es_url"]
|
|
||||||
ES_AUTH = CONFIG["application"]["es_auth"]
|
|
||||||
HEADERS = {"Content-type": "application/json"}
|
|
||||||
|
|
||||||
def __init__(self, youtube_id):
|
def __init__(self, youtube_id):
|
||||||
self.youtube_id = youtube_id
|
self.youtube_id = youtube_id
|
||||||
self.stamp = int(datetime.now().strftime("%s"))
|
self.stamp = int(datetime.now().strftime("%s"))
|
||||||
@ -33,7 +26,7 @@ class WatchState:
|
|||||||
elif url_type == "playlist":
|
elif url_type == "playlist":
|
||||||
self.mark_playlist_watched()
|
self.mark_playlist_watched()
|
||||||
|
|
||||||
print(f"marked {self.youtube_id} as watched")
|
print(f"{self.youtube_id}: marked as watched")
|
||||||
|
|
||||||
def mark_as_unwatched(self):
|
def mark_as_unwatched(self):
|
||||||
"""revert watched state to false"""
|
"""revert watched state to false"""
|
||||||
@ -41,7 +34,7 @@ class WatchState:
|
|||||||
if url_type == "video":
|
if url_type == "video":
|
||||||
self.mark_vid_watched(revert=True)
|
self.mark_vid_watched(revert=True)
|
||||||
|
|
||||||
print(f"revert {self.youtube_id} as unwatched")
|
print(f"{self.youtube_id}: revert as unwatched")
|
||||||
|
|
||||||
def dedect_type(self):
|
def dedect_type(self):
|
||||||
"""find youtube id type"""
|
"""find youtube id type"""
|
||||||
@ -52,77 +45,54 @@ class WatchState:
|
|||||||
|
|
||||||
def mark_vid_watched(self, revert=False):
|
def mark_vid_watched(self, revert=False):
|
||||||
"""change watched status of single video"""
|
"""change watched status of single video"""
|
||||||
url = self.ES_URL + "/ta_video/_update/" + self.youtube_id
|
path = f"ta_video/_update/{self.youtube_id}"
|
||||||
data = {
|
data = {
|
||||||
"doc": {"player": {"watched": True, "watched_date": self.stamp}}
|
"doc": {"player": {"watched": True, "watched_date": self.stamp}}
|
||||||
}
|
}
|
||||||
if revert:
|
if revert:
|
||||||
data["doc"]["player"]["watched"] = False
|
data["doc"]["player"]["watched"] = False
|
||||||
|
|
||||||
payload = json.dumps(data)
|
response, status_code = ElasticWrap(path).post(data=data)
|
||||||
request = requests.post(
|
if status_code != 200:
|
||||||
url, data=payload, headers=self.HEADERS, auth=self.ES_AUTH
|
print(response)
|
||||||
)
|
|
||||||
if not request.ok:
|
|
||||||
print(request.text)
|
|
||||||
raise ValueError("failed to mark video as watched")
|
raise ValueError("failed to mark video as watched")
|
||||||
|
|
||||||
def mark_channel_watched(self):
|
def mark_channel_watched(self):
|
||||||
"""change watched status of every video in channel"""
|
"""change watched status of every video in channel"""
|
||||||
|
path = "ta_video/_update_by_query"
|
||||||
|
must_list = [
|
||||||
|
{"term": {"channel.channel_id": {"value": self.youtube_id}}},
|
||||||
|
{"term": {"player.watched": {"value": False}}},
|
||||||
|
]
|
||||||
data = {
|
data = {
|
||||||
"query": {
|
"query": {"bool": {"must": must_list}},
|
||||||
"bool": {
|
|
||||||
"must": [
|
|
||||||
{
|
|
||||||
"term": {
|
|
||||||
"channel.channel_id": {
|
|
||||||
"value": self.youtube_id
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{"term": {"player.watched": {"value": False}}},
|
|
||||||
]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"script": {
|
"script": {
|
||||||
"source": "ctx._source.player['watched'] = true",
|
"source": "ctx._source.player['watched'] = true",
|
||||||
"lang": "painless",
|
"lang": "painless",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
payload = json.dumps(data)
|
|
||||||
url = f"{self.ES_URL}/ta_video/_update_by_query"
|
response, status_code = ElasticWrap(path).post(data=data)
|
||||||
request = requests.post(
|
if status_code != 200:
|
||||||
url, data=payload, headers=self.HEADERS, auth=self.ES_AUTH
|
print(response)
|
||||||
)
|
|
||||||
if not request.ok:
|
|
||||||
print(request.text)
|
|
||||||
raise ValueError("failed mark channel as watched")
|
raise ValueError("failed mark channel as watched")
|
||||||
|
|
||||||
def mark_playlist_watched(self):
|
def mark_playlist_watched(self):
|
||||||
"""change watched state of all videos in playlist"""
|
"""change watched state of all videos in playlist"""
|
||||||
|
path = "ta_video/_update_by_query"
|
||||||
|
must_list = [
|
||||||
|
{"term": {"playlist.keyword": {"value": self.youtube_id}}},
|
||||||
|
{"term": {"player.watched": {"value": False}}},
|
||||||
|
]
|
||||||
data = {
|
data = {
|
||||||
"query": {
|
"query": {"bool": {"must": must_list}},
|
||||||
"bool": {
|
|
||||||
"must": [
|
|
||||||
{
|
|
||||||
"term": {
|
|
||||||
"playlist.keyword": {"value": self.youtube_id}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{"term": {"player.watched": {"value": False}}},
|
|
||||||
]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"script": {
|
"script": {
|
||||||
"source": "ctx._source.player['watched'] = true",
|
"source": "ctx._source.player['watched'] = true",
|
||||||
"lang": "painless",
|
"lang": "painless",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
payload = json.dumps(data)
|
|
||||||
url = f"{self.ES_URL}/ta_video/_update_by_query"
|
response, status_code = ElasticWrap(path).post(data=data)
|
||||||
request = requests.post(
|
if status_code != 200:
|
||||||
url, data=payload, headers=self.HEADERS, auth=self.ES_AUTH
|
print(response)
|
||||||
)
|
|
||||||
if not request.ok:
|
|
||||||
print(request.text)
|
|
||||||
raise ValueError("failed mark playlist as watched")
|
raise ValueError("failed mark playlist as watched")
|
||||||
|
@ -10,11 +10,10 @@ import os
|
|||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
import requests
|
|
||||||
from home.src.download.queue import PendingList
|
from home.src.download.queue import PendingList
|
||||||
from home.src.download.yt_dlp_handler import VideoDownloader
|
from home.src.download.yt_dlp_handler import VideoDownloader
|
||||||
|
from home.src.es.connect import ElasticWrap
|
||||||
from home.src.index.reindex import Reindex
|
from home.src.index.reindex import Reindex
|
||||||
from home.src.index.video import index_new_video
|
from home.src.index.video import index_new_video
|
||||||
from home.src.ta.config import AppConfig
|
from home.src.ta.config import AppConfig
|
||||||
@ -26,8 +25,6 @@ class FilesystemScanner:
|
|||||||
"""handle scanning and fixing from filesystem"""
|
"""handle scanning and fixing from filesystem"""
|
||||||
|
|
||||||
CONFIG = AppConfig().config
|
CONFIG = AppConfig().config
|
||||||
ES_URL = CONFIG["application"]["es_url"]
|
|
||||||
ES_AUTH = CONFIG["application"]["es_auth"]
|
|
||||||
VIDEOS = CONFIG["application"]["videos"]
|
VIDEOS = CONFIG["application"]["videos"]
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@ -148,25 +145,16 @@ class FilesystemScanner:
|
|||||||
bulk_list.append(json.dumps(source))
|
bulk_list.append(json.dumps(source))
|
||||||
# add last newline
|
# add last newline
|
||||||
bulk_list.append("\n")
|
bulk_list.append("\n")
|
||||||
query_str = "\n".join(bulk_list)
|
data = "\n".join(bulk_list)
|
||||||
# make the call
|
_, _ = ElasticWrap("_bulk").post(data=data, ndjson=True)
|
||||||
headers = {"Content-type": "application/x-ndjson"}
|
|
||||||
url = self.ES_URL + "/_bulk"
|
|
||||||
request = requests.post(
|
|
||||||
url, data=query_str, headers=headers, auth=self.ES_AUTH
|
|
||||||
)
|
|
||||||
if not request.ok:
|
|
||||||
print(request.text)
|
|
||||||
|
|
||||||
def delete_from_index(self):
|
def delete_from_index(self):
|
||||||
"""find indexed but deleted mediafile"""
|
"""find indexed but deleted mediafile"""
|
||||||
for indexed in self.to_delete:
|
for indexed in self.to_delete:
|
||||||
youtube_id = indexed[0]
|
youtube_id = indexed[0]
|
||||||
print(f"deleting {youtube_id} from index")
|
print(f"deleting {youtube_id} from index")
|
||||||
url = self.ES_URL + "/ta_video/_doc/" + youtube_id
|
path = f"ta_video/_doc/{youtube_id}"
|
||||||
request = requests.delete(url, auth=self.ES_AUTH)
|
_, _ = ElasticWrap(path).delete()
|
||||||
if not request.ok:
|
|
||||||
print(request.text)
|
|
||||||
|
|
||||||
|
|
||||||
class ManualImport:
|
class ManualImport:
|
||||||
@ -319,10 +307,7 @@ def scan_filesystem():
|
|||||||
|
|
||||||
def reindex_old_documents():
|
def reindex_old_documents():
|
||||||
"""daily refresh of old documents"""
|
"""daily refresh of old documents"""
|
||||||
# continue if needed
|
handler = Reindex()
|
||||||
reindex_handler = Reindex()
|
handler.check_outdated()
|
||||||
reindex_handler.check_outdated()
|
handler.reindex()
|
||||||
reindex_handler.reindex()
|
RedisArchivist().set_message("last_reindex", handler.now, expire=False)
|
||||||
# set timestamp
|
|
||||||
now = int(datetime.now().strftime("%s"))
|
|
||||||
RedisArchivist().set_message("last_reindex", now, expire=False)
|
|
||||||
|
@ -4,85 +4,60 @@ functionality:
|
|||||||
- index and update in es
|
- index and update in es
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from math import ceil
|
from math import ceil
|
||||||
from time import sleep
|
from time import sleep
|
||||||
|
|
||||||
import requests
|
|
||||||
from home.src.download.queue import PendingList
|
from home.src.download.queue import PendingList
|
||||||
from home.src.download.thumbnails import ThumbManager
|
from home.src.download.thumbnails import ThumbManager
|
||||||
|
from home.src.es.connect import ElasticWrap
|
||||||
from home.src.index.channel import YoutubeChannel
|
from home.src.index.channel import YoutubeChannel
|
||||||
from home.src.index.playlist import YoutubePlaylist
|
from home.src.index.playlist import YoutubePlaylist
|
||||||
from home.src.index.video import YoutubeVideo
|
from home.src.index.video import YoutubeVideo
|
||||||
from home.src.ta.config import AppConfig
|
from home.src.ta.config import AppConfig
|
||||||
from home.src.ta.helper import get_total_hits
|
|
||||||
|
|
||||||
|
|
||||||
class Reindex:
|
class Reindex:
|
||||||
"""check for outdated documents and refresh data from youtube"""
|
"""check for outdated documents and refresh data from youtube"""
|
||||||
|
|
||||||
|
MATCH_FIELD = {
|
||||||
|
"ta_video": "active",
|
||||||
|
"ta_channel": "channel_active",
|
||||||
|
"ta_playlist": "playlist_active",
|
||||||
|
}
|
||||||
|
MULTIPLY = 1.2
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
# config
|
# config
|
||||||
config = AppConfig().config
|
self.now = int(datetime.now().strftime("%s"))
|
||||||
self.sleep_interval = config["downloads"]["sleep_interval"]
|
self.config = AppConfig().config
|
||||||
self.es_url = config["application"]["es_url"]
|
self.interval = self.config["scheduler"]["check_reindex_days"]
|
||||||
self.es_auth = config["application"]["es_auth"]
|
|
||||||
self.refresh_interval = config["scheduler"]["check_reindex_days"]
|
|
||||||
self.integrate_ryd = config["downloads"]["integrate_ryd"]
|
|
||||||
# scan
|
# scan
|
||||||
self.all_youtube_ids = False
|
self.all_youtube_ids = False
|
||||||
self.all_channel_ids = False
|
self.all_channel_ids = False
|
||||||
self.all_playlist_ids = False
|
self.all_playlist_ids = False
|
||||||
|
|
||||||
def get_daily(self):
|
def _get_daily(self):
|
||||||
"""get daily refresh values"""
|
"""get daily refresh values"""
|
||||||
total_videos = get_total_hits(
|
total_videos = self._get_total_hits("ta_video")
|
||||||
"ta_video", self.es_url, self.es_auth, "active"
|
video_daily = ceil(total_videos / self.interval * self.MULTIPLY)
|
||||||
)
|
total_channels = self._get_total_hits("ta_channel")
|
||||||
video_daily = ceil(total_videos / self.refresh_interval * 1.2)
|
channel_daily = ceil(total_channels / self.interval * self.MULTIPLY)
|
||||||
total_channels = get_total_hits(
|
total_playlists = self._get_total_hits("ta_playlist")
|
||||||
"ta_channel", self.es_url, self.es_auth, "channel_active"
|
playlist_daily = ceil(total_playlists / self.interval * self.MULTIPLY)
|
||||||
)
|
|
||||||
channel_daily = ceil(total_channels / self.refresh_interval * 1.2)
|
|
||||||
total_playlists = get_total_hits(
|
|
||||||
"ta_playlist", self.es_url, self.es_auth, "playlist_active"
|
|
||||||
)
|
|
||||||
playlist_daily = ceil(total_playlists / self.refresh_interval * 1.2)
|
|
||||||
return (video_daily, channel_daily, playlist_daily)
|
return (video_daily, channel_daily, playlist_daily)
|
||||||
|
|
||||||
def get_outdated_vids(self, size):
|
def _get_total_hits(self, index):
|
||||||
"""get daily videos to refresh"""
|
"""get total hits from index"""
|
||||||
headers = {"Content-type": "application/json"}
|
match_field = self.MATCH_FIELD[index]
|
||||||
now = int(datetime.now().strftime("%s"))
|
path = f"{index}/_search?filter_path=hits.total"
|
||||||
now_lte = now - self.refresh_interval * 24 * 60 * 60
|
data = {"query": {"match": {match_field: True}}}
|
||||||
data = {
|
response, _ = ElasticWrap(path).post(data=data)
|
||||||
"size": size,
|
total_hits = response["hits"]["total"]["value"]
|
||||||
"query": {
|
return total_hits
|
||||||
"bool": {
|
|
||||||
"must": [
|
|
||||||
{"match": {"active": True}},
|
|
||||||
{"range": {"vid_last_refresh": {"lte": now_lte}}},
|
|
||||||
]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"sort": [{"vid_last_refresh": {"order": "asc"}}],
|
|
||||||
"_source": False,
|
|
||||||
}
|
|
||||||
query_str = json.dumps(data)
|
|
||||||
url = self.es_url + "/ta_video/_search"
|
|
||||||
response = requests.get(
|
|
||||||
url, data=query_str, headers=headers, auth=self.es_auth
|
|
||||||
)
|
|
||||||
if not response.ok:
|
|
||||||
print(response.text)
|
|
||||||
response_dict = json.loads(response.text)
|
|
||||||
all_youtube_ids = [i["_id"] for i in response_dict["hits"]["hits"]]
|
|
||||||
return all_youtube_ids
|
|
||||||
|
|
||||||
def get_unrated_vids(self):
|
def _get_unrated_vids(self):
|
||||||
"""get all videos without rating if ryd integration is enabled"""
|
"""get max 200 videos without rating if ryd integration is enabled"""
|
||||||
headers = {"Content-type": "application/json"}
|
|
||||||
data = {
|
data = {
|
||||||
"size": 200,
|
"size": 200,
|
||||||
"query": {
|
"query": {
|
||||||
@ -91,86 +66,78 @@ class Reindex:
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
query_str = json.dumps(data)
|
response, _ = ElasticWrap("ta_video/_search").get(data=data)
|
||||||
url = self.es_url + "/ta_video/_search"
|
|
||||||
response = requests.get(
|
missing_rating = [i["_id"] for i in response["hits"]["hits"]]
|
||||||
url, data=query_str, headers=headers, auth=self.es_auth
|
|
||||||
)
|
|
||||||
if not response.ok:
|
|
||||||
print(response.text)
|
|
||||||
response_dict = json.loads(response.text)
|
|
||||||
missing_rating = [i["_id"] for i in response_dict["hits"]["hits"]]
|
|
||||||
self.all_youtube_ids = self.all_youtube_ids + missing_rating
|
self.all_youtube_ids = self.all_youtube_ids + missing_rating
|
||||||
|
|
||||||
def get_outdated_channels(self, size):
|
def _get_outdated_vids(self, size):
|
||||||
"""get daily channels to refresh"""
|
"""get daily videos to refresh"""
|
||||||
headers = {"Content-type": "application/json"}
|
now_lte = self.now - self.interval * 24 * 60 * 60
|
||||||
now = int(datetime.now().strftime("%s"))
|
must_list = [
|
||||||
now_lte = now - self.refresh_interval * 24 * 60 * 60
|
{"match": {"active": True}},
|
||||||
|
{"range": {"vid_last_refresh": {"lte": now_lte}}},
|
||||||
|
]
|
||||||
data = {
|
data = {
|
||||||
"size": size,
|
"size": size,
|
||||||
"query": {
|
"query": {"bool": {"must": must_list}},
|
||||||
"bool": {
|
"sort": [{"vid_last_refresh": {"order": "asc"}}],
|
||||||
"must": [
|
"_source": False,
|
||||||
{"match": {"channel_active": True}},
|
}
|
||||||
{"range": {"channel_last_refresh": {"lte": now_lte}}},
|
response, _ = ElasticWrap("ta_video/_search").get(data=data)
|
||||||
]
|
|
||||||
}
|
all_youtube_ids = [i["_id"] for i in response["hits"]["hits"]]
|
||||||
},
|
return all_youtube_ids
|
||||||
|
|
||||||
|
def _get_outdated_channels(self, size):
|
||||||
|
"""get daily channels to refresh"""
|
||||||
|
now_lte = self.now - self.interval * 24 * 60 * 60
|
||||||
|
must_list = [
|
||||||
|
{"match": {"channel_active": True}},
|
||||||
|
{"range": {"channel_last_refresh": {"lte": now_lte}}},
|
||||||
|
]
|
||||||
|
data = {
|
||||||
|
"size": size,
|
||||||
|
"query": {"bool": {"must": must_list}},
|
||||||
"sort": [{"channel_last_refresh": {"order": "asc"}}],
|
"sort": [{"channel_last_refresh": {"order": "asc"}}],
|
||||||
"_source": False,
|
"_source": False,
|
||||||
}
|
}
|
||||||
query_str = json.dumps(data)
|
response, _ = ElasticWrap("ta_channel/_search").get(data=data)
|
||||||
url = self.es_url + "/ta_channel/_search"
|
|
||||||
response = requests.get(
|
all_channel_ids = [i["_id"] for i in response["hits"]["hits"]]
|
||||||
url, data=query_str, headers=headers, auth=self.es_auth
|
|
||||||
)
|
|
||||||
if not response.ok:
|
|
||||||
print(response.text)
|
|
||||||
response_dict = json.loads(response.text)
|
|
||||||
all_channel_ids = [i["_id"] for i in response_dict["hits"]["hits"]]
|
|
||||||
return all_channel_ids
|
return all_channel_ids
|
||||||
|
|
||||||
def get_outdated_playlists(self, size):
|
def _get_outdated_playlists(self, size):
|
||||||
"""get daily outdated playlists to refresh"""
|
"""get daily outdated playlists to refresh"""
|
||||||
headers = {"Content-type": "application/json"}
|
now_lte = self.now - self.interval * 24 * 60 * 60
|
||||||
now = int(datetime.now().strftime("%s"))
|
must_list = [
|
||||||
now_lte = now - self.refresh_interval * 24 * 60 * 60
|
{"match": {"playlist_active": True}},
|
||||||
|
{"range": {"playlist_last_refresh": {"lte": now_lte}}},
|
||||||
|
]
|
||||||
data = {
|
data = {
|
||||||
"size": size,
|
"size": size,
|
||||||
"query": {
|
"query": {"bool": {"must": must_list}},
|
||||||
"bool": {
|
|
||||||
"must": [
|
|
||||||
{"match": {"playlist_active": True}},
|
|
||||||
{"range": {"playlist_last_refresh": {"lte": now_lte}}},
|
|
||||||
]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"sort": [{"playlist_last_refresh": {"order": "asc"}}],
|
"sort": [{"playlist_last_refresh": {"order": "asc"}}],
|
||||||
"_source": False,
|
"_source": False,
|
||||||
}
|
}
|
||||||
query_str = json.dumps(data)
|
response, _ = ElasticWrap("ta_playlist/_search").get(data=data)
|
||||||
url = self.es_url + "/ta_playlist/_search"
|
|
||||||
response = requests.get(
|
all_playlist_ids = [i["_id"] for i in response["hits"]["hits"]]
|
||||||
url, data=query_str, headers=headers, auth=self.es_auth
|
|
||||||
)
|
|
||||||
if not response.ok:
|
|
||||||
print(response.text)
|
|
||||||
response_dict = json.loads(response.text)
|
|
||||||
all_playlist_ids = [i["_id"] for i in response_dict["hits"]["hits"]]
|
|
||||||
return all_playlist_ids
|
return all_playlist_ids
|
||||||
|
|
||||||
def check_outdated(self):
|
def check_outdated(self):
|
||||||
"""add missing vids and channels"""
|
"""add missing vids and channels"""
|
||||||
video_daily, channel_daily, playlist_daily = self.get_daily()
|
video_daily, channel_daily, playlist_daily = self._get_daily()
|
||||||
self.all_youtube_ids = self.get_outdated_vids(video_daily)
|
self.all_youtube_ids = self._get_outdated_vids(video_daily)
|
||||||
self.all_channel_ids = self.get_outdated_channels(channel_daily)
|
self.all_channel_ids = self._get_outdated_channels(channel_daily)
|
||||||
self.all_playlist_ids = self.get_outdated_playlists(playlist_daily)
|
self.all_playlist_ids = self._get_outdated_playlists(playlist_daily)
|
||||||
if self.integrate_ryd:
|
|
||||||
self.get_unrated_vids()
|
integrate_ryd = self.config["downloads"]["integrate_ryd"]
|
||||||
|
if integrate_ryd:
|
||||||
|
self._get_unrated_vids()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def reindex_single_video(youtube_id):
|
def _reindex_single_video(youtube_id):
|
||||||
"""refresh data for single video"""
|
"""refresh data for single video"""
|
||||||
video = YoutubeVideo(youtube_id)
|
video = YoutubeVideo(youtube_id)
|
||||||
|
|
||||||
@ -188,6 +155,8 @@ class Reindex:
|
|||||||
return
|
return
|
||||||
|
|
||||||
video.delete_subtitles()
|
video.delete_subtitles()
|
||||||
|
video.check_subtitles()
|
||||||
|
|
||||||
# add back
|
# add back
|
||||||
video.json_data["player"] = player
|
video.json_data["player"] = player
|
||||||
video.json_data["date_downloaded"] = date_downloaded
|
video.json_data["date_downloaded"] = date_downloaded
|
||||||
@ -204,20 +173,21 @@ class Reindex:
|
|||||||
return
|
return
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def reindex_single_channel(channel_id):
|
def _reindex_single_channel(channel_id):
|
||||||
"""refresh channel data and sync to videos"""
|
"""refresh channel data and sync to videos"""
|
||||||
channel = YoutubeChannel(channel_id)
|
channel = YoutubeChannel(channel_id)
|
||||||
channel.get_from_es()
|
channel.get_from_es()
|
||||||
subscribed = channel.json_data["channel_subscribed"]
|
subscribed = channel.json_data["channel_subscribed"]
|
||||||
overwrites = channel.json_data["channel_overwrites"]
|
overwrites = channel.json_data.get("channel_overwrites", False)
|
||||||
channel.get_from_youtube()
|
channel.get_from_youtube()
|
||||||
channel.json_data["channel_subscribed"] = subscribed
|
channel.json_data["channel_subscribed"] = subscribed
|
||||||
channel.json_data["channel_overwrites"] = overwrites
|
if overwrites:
|
||||||
|
channel.json_data["channel_overwrites"] = overwrites
|
||||||
channel.upload_to_es()
|
channel.upload_to_es()
|
||||||
channel.sync_to_videos()
|
channel.sync_to_videos()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def reindex_single_playlist(playlist_id, all_indexed_ids):
|
def _reindex_single_playlist(playlist_id, all_indexed_ids):
|
||||||
"""refresh playlist data"""
|
"""refresh playlist data"""
|
||||||
playlist = YoutubePlaylist(playlist_id)
|
playlist = YoutubePlaylist(playlist_id)
|
||||||
playlist.get_from_es()
|
playlist.get_from_es()
|
||||||
@ -234,18 +204,19 @@ class Reindex:
|
|||||||
|
|
||||||
def reindex(self):
|
def reindex(self):
|
||||||
"""reindex what's needed"""
|
"""reindex what's needed"""
|
||||||
|
sleep_interval = self.config["downloads"]["sleep_interval"]
|
||||||
# videos
|
# videos
|
||||||
print(f"reindexing {len(self.all_youtube_ids)} videos")
|
print(f"reindexing {len(self.all_youtube_ids)} videos")
|
||||||
for youtube_id in self.all_youtube_ids:
|
for youtube_id in self.all_youtube_ids:
|
||||||
self.reindex_single_video(youtube_id)
|
self._reindex_single_video(youtube_id)
|
||||||
if self.sleep_interval:
|
if sleep_interval:
|
||||||
sleep(self.sleep_interval)
|
sleep(sleep_interval)
|
||||||
# channels
|
# channels
|
||||||
print(f"reindexing {len(self.all_channel_ids)} channels")
|
print(f"reindexing {len(self.all_channel_ids)} channels")
|
||||||
for channel_id in self.all_channel_ids:
|
for channel_id in self.all_channel_ids:
|
||||||
self.reindex_single_channel(channel_id)
|
self._reindex_single_channel(channel_id)
|
||||||
if self.sleep_interval:
|
if sleep_interval:
|
||||||
sleep(self.sleep_interval)
|
sleep(sleep_interval)
|
||||||
# playlist
|
# playlist
|
||||||
print(f"reindexing {len(self.all_playlist_ids)} playlists")
|
print(f"reindexing {len(self.all_playlist_ids)} playlists")
|
||||||
if self.all_playlist_ids:
|
if self.all_playlist_ids:
|
||||||
@ -253,6 +224,6 @@ class Reindex:
|
|||||||
handler.get_indexed()
|
handler.get_indexed()
|
||||||
all_indexed_ids = [i["youtube_id"] for i in handler.all_videos]
|
all_indexed_ids = [i["youtube_id"] for i in handler.all_videos]
|
||||||
for playlist_id in self.all_playlist_ids:
|
for playlist_id in self.all_playlist_ids:
|
||||||
self.reindex_single_playlist(playlist_id, all_indexed_ids)
|
self._reindex_single_playlist(playlist_id, all_indexed_ids)
|
||||||
if self.sleep_interval:
|
if sleep_interval:
|
||||||
sleep(self.sleep_interval)
|
sleep(sleep_interval)
|
||||||
|
@ -296,7 +296,6 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
|
|||||||
self._add_stats()
|
self._add_stats()
|
||||||
self.add_file_path()
|
self.add_file_path()
|
||||||
self.add_player()
|
self.add_player()
|
||||||
self._check_subtitles()
|
|
||||||
if self.config["downloads"]["integrate_ryd"]:
|
if self.config["downloads"]["integrate_ryd"]:
|
||||||
self._get_ryd_stats()
|
self._get_ryd_stats()
|
||||||
|
|
||||||
@ -369,7 +368,7 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
|
|||||||
channel_dir = os.path.join(self.app_conf["videos"], channel)
|
channel_dir = os.path.join(self.app_conf["videos"], channel)
|
||||||
all_files = os.listdir(channel_dir)
|
all_files = os.listdir(channel_dir)
|
||||||
for file in all_files:
|
for file in all_files:
|
||||||
if self.youtube_id in file:
|
if self.youtube_id in file and file.endswith(".mp4"):
|
||||||
vid_path = os.path.join(channel_dir, file)
|
vid_path = os.path.join(channel_dir, file)
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
@ -441,7 +440,7 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
|
|||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def _check_subtitles(self):
|
def check_subtitles(self):
|
||||||
"""optionally add subtitles"""
|
"""optionally add subtitles"""
|
||||||
handler = YoutubeSubtitle(self)
|
handler = YoutubeSubtitle(self)
|
||||||
subtitles = handler.get_subtitles()
|
subtitles = handler.get_subtitles()
|
||||||
@ -451,8 +450,9 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
|
|||||||
|
|
||||||
def delete_subtitles(self):
|
def delete_subtitles(self):
|
||||||
"""delete indexed subtitles"""
|
"""delete indexed subtitles"""
|
||||||
|
path = "ta_subtitle/_delete_by_query?refresh=true"
|
||||||
data = {"query": {"term": {"youtube_id": {"value": self.youtube_id}}}}
|
data = {"query": {"term": {"youtube_id": {"value": self.youtube_id}}}}
|
||||||
_, _ = ElasticWrap("ta_subtitle/_delete_by_query").post(data=data)
|
_, _ = ElasticWrap(path).post(data=data)
|
||||||
|
|
||||||
|
|
||||||
def index_new_video(youtube_id):
|
def index_new_video(youtube_id):
|
||||||
@ -462,5 +462,6 @@ def index_new_video(youtube_id):
|
|||||||
if not video.json_data:
|
if not video.json_data:
|
||||||
raise ValueError("failed to get metadata for " + youtube_id)
|
raise ValueError("failed to get metadata for " + youtube_id)
|
||||||
|
|
||||||
|
video.check_subtitles()
|
||||||
video.upload_to_es()
|
video.upload_to_es()
|
||||||
return video.json_data
|
return video.json_data
|
||||||
|
@ -3,31 +3,15 @@ Loose collection of helper functions
|
|||||||
- don't import AppConfig class here to avoid circular imports
|
- don't import AppConfig class here to avoid circular imports
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
import string
|
import string
|
||||||
import subprocess
|
import subprocess
|
||||||
import unicodedata
|
import unicodedata
|
||||||
from urllib.parse import parse_qs, urlparse
|
from urllib.parse import parse_qs, urlparse
|
||||||
|
|
||||||
import requests
|
|
||||||
import yt_dlp
|
import yt_dlp
|
||||||
|
|
||||||
|
|
||||||
def get_total_hits(index, es_url, es_auth, match_field):
|
|
||||||
"""get total hits from index"""
|
|
||||||
headers = {"Content-type": "application/json"}
|
|
||||||
data = {"query": {"match": {match_field: True}}}
|
|
||||||
payload = json.dumps(data)
|
|
||||||
url = f"{es_url}/{index}/_search?filter_path=hits.total"
|
|
||||||
request = requests.post(url, data=payload, headers=headers, auth=es_auth)
|
|
||||||
if not request.ok:
|
|
||||||
print(request.text)
|
|
||||||
total_json = json.loads(request.text)
|
|
||||||
total_hits = total_json["hits"]["total"]["value"]
|
|
||||||
return total_hits
|
|
||||||
|
|
||||||
|
|
||||||
def clean_string(file_name):
|
def clean_string(file_name):
|
||||||
"""clean string to only asci characters"""
|
"""clean string to only asci characters"""
|
||||||
whitelist = "-_.() " + string.ascii_letters + string.digits
|
whitelist = "-_.() " + string.ascii_letters + string.digits
|
||||||
|
@ -11,12 +11,20 @@ import redis
|
|||||||
from home.src.ta.helper import ignore_filelist
|
from home.src.ta.helper import ignore_filelist
|
||||||
|
|
||||||
|
|
||||||
class RedisArchivist:
|
class RedisBase:
|
||||||
"""collection of methods to interact with redis"""
|
"""connection base for redis"""
|
||||||
|
|
||||||
REDIS_HOST = os.environ.get("REDIS_HOST")
|
REDIS_HOST = os.environ.get("REDIS_HOST")
|
||||||
REDIS_PORT = os.environ.get("REDIS_PORT") or 6379
|
REDIS_PORT = os.environ.get("REDIS_PORT") or 6379
|
||||||
NAME_SPACE = "ta:"
|
NAME_SPACE = "ta:"
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.conn = redis.Redis(host=self.REDIS_HOST, port=self.REDIS_PORT)
|
||||||
|
|
||||||
|
|
||||||
|
class RedisArchivist(RedisBase):
|
||||||
|
"""collection of methods to interact with redis"""
|
||||||
|
|
||||||
CHANNELS = [
|
CHANNELS = [
|
||||||
"download",
|
"download",
|
||||||
"add",
|
"add",
|
||||||
@ -27,14 +35,9 @@ class RedisArchivist:
|
|||||||
"setting",
|
"setting",
|
||||||
]
|
]
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.redis_connection = redis.Redis(
|
|
||||||
host=self.REDIS_HOST, port=self.REDIS_PORT
|
|
||||||
)
|
|
||||||
|
|
||||||
def set_message(self, key, message, expire=True):
|
def set_message(self, key, message, expire=True):
|
||||||
"""write new message to redis"""
|
"""write new message to redis"""
|
||||||
self.redis_connection.execute_command(
|
self.conn.execute_command(
|
||||||
"JSON.SET", self.NAME_SPACE + key, ".", json.dumps(message)
|
"JSON.SET", self.NAME_SPACE + key, ".", json.dumps(message)
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -43,15 +46,11 @@ class RedisArchivist:
|
|||||||
secs = 20
|
secs = 20
|
||||||
else:
|
else:
|
||||||
secs = expire
|
secs = expire
|
||||||
self.redis_connection.execute_command(
|
self.conn.execute_command("EXPIRE", self.NAME_SPACE + key, secs)
|
||||||
"EXPIRE", self.NAME_SPACE + key, secs
|
|
||||||
)
|
|
||||||
|
|
||||||
def get_message(self, key):
|
def get_message(self, key):
|
||||||
"""get message dict from redis"""
|
"""get message dict from redis"""
|
||||||
reply = self.redis_connection.execute_command(
|
reply = self.conn.execute_command("JSON.GET", self.NAME_SPACE + key)
|
||||||
"JSON.GET", self.NAME_SPACE + key
|
|
||||||
)
|
|
||||||
if reply:
|
if reply:
|
||||||
json_str = json.loads(reply)
|
json_str = json.loads(reply)
|
||||||
else:
|
else:
|
||||||
@ -61,7 +60,7 @@ class RedisArchivist:
|
|||||||
|
|
||||||
def list_items(self, query):
|
def list_items(self, query):
|
||||||
"""list all matches"""
|
"""list all matches"""
|
||||||
reply = self.redis_connection.execute_command(
|
reply = self.conn.execute_command(
|
||||||
"KEYS", self.NAME_SPACE + query + "*"
|
"KEYS", self.NAME_SPACE + query + "*"
|
||||||
)
|
)
|
||||||
all_matches = [i.decode().lstrip(self.NAME_SPACE) for i in reply]
|
all_matches = [i.decode().lstrip(self.NAME_SPACE) for i in reply]
|
||||||
@ -74,14 +73,12 @@ class RedisArchivist:
|
|||||||
|
|
||||||
def del_message(self, key):
|
def del_message(self, key):
|
||||||
"""delete key from redis"""
|
"""delete key from redis"""
|
||||||
response = self.redis_connection.execute_command(
|
response = self.conn.execute_command("DEL", self.NAME_SPACE + key)
|
||||||
"DEL", self.NAME_SPACE + key
|
|
||||||
)
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def get_lock(self, lock_key):
|
def get_lock(self, lock_key):
|
||||||
"""handle lock for task management"""
|
"""handle lock for task management"""
|
||||||
redis_lock = self.redis_connection.lock(self.NAME_SPACE + lock_key)
|
redis_lock = self.conn.lock(self.NAME_SPACE + lock_key)
|
||||||
return redis_lock
|
return redis_lock
|
||||||
|
|
||||||
def get_progress(self):
|
def get_progress(self):
|
||||||
@ -89,7 +86,7 @@ class RedisArchivist:
|
|||||||
all_messages = []
|
all_messages = []
|
||||||
for channel in self.CHANNELS:
|
for channel in self.CHANNELS:
|
||||||
key = "message:" + channel
|
key = "message:" + channel
|
||||||
reply = self.redis_connection.execute_command(
|
reply = self.conn.execute_command(
|
||||||
"JSON.GET", self.NAME_SPACE + key
|
"JSON.GET", self.NAME_SPACE + key
|
||||||
)
|
)
|
||||||
if reply:
|
if reply:
|
||||||
@ -120,19 +117,12 @@ class RedisArchivist:
|
|||||||
return mess_dict
|
return mess_dict
|
||||||
|
|
||||||
|
|
||||||
class RedisQueue:
|
class RedisQueue(RedisBase):
|
||||||
"""dynamically interact with the download queue in redis"""
|
"""dynamically interact with the download queue in redis"""
|
||||||
|
|
||||||
REDIS_HOST = os.environ.get("REDIS_HOST")
|
def __init__(self):
|
||||||
REDIS_PORT = os.environ.get("REDIS_PORT")
|
super().__init__()
|
||||||
NAME_SPACE = "ta:"
|
self.key = self.NAME_SPACE + "dl_queue"
|
||||||
|
|
||||||
if not REDIS_PORT:
|
|
||||||
REDIS_PORT = 6379
|
|
||||||
|
|
||||||
def __init__(self, key):
|
|
||||||
self.key = self.NAME_SPACE + key
|
|
||||||
self.conn = redis.Redis(host=self.REDIS_HOST, port=self.REDIS_PORT)
|
|
||||||
|
|
||||||
def get_all(self):
|
def get_all(self):
|
||||||
"""return all elements in list"""
|
"""return all elements in list"""
|
||||||
|
@ -8,8 +8,8 @@ Functionality:
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import home.apps as startup_apps
|
|
||||||
from celery import Celery, shared_task
|
from celery import Celery, shared_task
|
||||||
|
from home.apps import StartupCheck
|
||||||
from home.src.download.queue import PendingList
|
from home.src.download.queue import PendingList
|
||||||
from home.src.download.subscriptions import (
|
from home.src.download.subscriptions import (
|
||||||
ChannelSubscription,
|
ChannelSubscription,
|
||||||
@ -98,7 +98,7 @@ def download_pending():
|
|||||||
@shared_task
|
@shared_task
|
||||||
def download_single(youtube_id):
|
def download_single(youtube_id):
|
||||||
"""start download single video now"""
|
"""start download single video now"""
|
||||||
queue = RedisQueue("dl_queue")
|
queue = RedisQueue()
|
||||||
queue.add_priority(youtube_id)
|
queue.add_priority(youtube_id)
|
||||||
print("Added to queue with priority: " + youtube_id)
|
print("Added to queue with priority: " + youtube_id)
|
||||||
# start queue if needed
|
# start queue if needed
|
||||||
@ -181,7 +181,7 @@ def kill_dl(task_id):
|
|||||||
app.control.revoke(task_id, terminate=True)
|
app.control.revoke(task_id, terminate=True)
|
||||||
|
|
||||||
_ = RedisArchivist().del_message("dl_queue_id")
|
_ = RedisArchivist().del_message("dl_queue_id")
|
||||||
RedisQueue("dl_queue").clear()
|
RedisQueue().clear()
|
||||||
|
|
||||||
# clear cache
|
# clear cache
|
||||||
cache_dir = os.path.join(CONFIG["application"]["cache_dir"], "download")
|
cache_dir = os.path.join(CONFIG["application"]["cache_dir"], "download")
|
||||||
@ -274,5 +274,5 @@ try:
|
|||||||
app.conf.beat_schedule = ScheduleBuilder().build_schedule()
|
app.conf.beat_schedule = ScheduleBuilder().build_schedule()
|
||||||
except KeyError:
|
except KeyError:
|
||||||
# update path from v0.0.8 to v0.0.9 to load new defaults
|
# update path from v0.0.8 to v0.0.9 to load new defaults
|
||||||
startup_apps.sync_redis_state()
|
StartupCheck().sync_redis_state()
|
||||||
app.conf.beat_schedule = ScheduleBuilder().build_schedule()
|
app.conf.beat_schedule = ScheduleBuilder().build_schedule()
|
||||||
|
@ -53,36 +53,44 @@
|
|||||||
<p>Total Videos archived: {{ max_hits }}</p>
|
<p>Total Videos archived: {{ max_hits }}</p>
|
||||||
<p>Watched: <button title="Mark all videos from {{ channel_info.channel_name }} as watched" type="button" id="watched-button" data-id="{{ channel_info.channel_id }}" onclick="isWatchedButton(this)">Mark as watched</button></p>
|
<p>Watched: <button title="Mark all videos from {{ channel_info.channel_name }} as watched" type="button" id="watched-button" data-id="{{ channel_info.channel_id }}" onclick="isWatchedButton(this)">Mark as watched</button></p>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
<button {% if channel_info.channel_overwrites %} class="danger-button"{% endif %} onclick="showOverwrite()" title="Overwrite settings for channel {{ channel_info.channel_name }}">Configure</button>
|
||||||
<a href="/playlist/?search={{ channel_info.channel_id }}" title="Show all playlists belonging to {{ channel_info.channel_name }}"><button>Show Playlists</button></a>
|
<a href="/playlist/?search={{ channel_info.channel_id }}" title="Show all playlists belonging to {{ channel_info.channel_name }}"><button>Show Playlists</button></a>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="info-box">
|
<div id="overwrite-form" class="info-box{% if not channel_info.channel_overwrites %} hidden-overwrite{% endif %}">
|
||||||
<div class="info-box-item">
|
<div class="info-box-item">
|
||||||
<form action="/channel/{{ channel_info.channel_id }}/" method="POST">
|
<form class="overwrite-form" action="/channel/{{ channel_info.channel_id }}/" method="POST">
|
||||||
{% csrf_token %}
|
{% csrf_token %}
|
||||||
<p>Download format: <span class="settings-current">
|
<div class="overwrite-form-item">
|
||||||
{% if channel_info.channel_overwrites.download_format %}
|
<p>Download format: <span class="settings-current">
|
||||||
{{ channel_info.channel_overwrites.download_format }}
|
{% if channel_info.channel_overwrites.download_format %}
|
||||||
{% else %}
|
{{ channel_info.channel_overwrites.download_format }}
|
||||||
False
|
{% else %}
|
||||||
{% endif %}</span></p>
|
False
|
||||||
{{ channel_overwrite_form.download_format }}<br>
|
{% endif %}</span></p>
|
||||||
<p>Auto delete videos after x days: <span class="settings-current">
|
{{ channel_overwrite_form.download_format }}<br>
|
||||||
{% if channel_info.channel_overwrites.autodelete_days %}
|
</div>
|
||||||
{{ channel_info.channel_overwrites.autodelete_days }}
|
<div class="overwrite-form-item">
|
||||||
{% else %}
|
<p>Auto delete watched videos after x days: <span class="settings-current">
|
||||||
False
|
{% if channel_info.channel_overwrites.autodelete_days %}
|
||||||
{% endif %}</span></p>
|
{{ channel_info.channel_overwrites.autodelete_days }}
|
||||||
{{ channel_overwrite_form.autodelete_days }}<br>
|
{% else %}
|
||||||
<p>Index playlists: <span class="settings-current">
|
False
|
||||||
{% if channel_info.channel_overwrites.index_playlists %}
|
{% endif %}</span></p>
|
||||||
{{ channel_info.channel_overwrites.index_playlists }}
|
{{ channel_overwrite_form.autodelete_days }}<br>
|
||||||
{% else %}
|
</div>
|
||||||
False
|
<div class="overwrite-form-item">
|
||||||
{% endif %}</span></p>
|
<p>Index playlists: <span class="settings-current">
|
||||||
{{ channel_overwrite_form.index_playlists }}<br>
|
{% if channel_info.channel_overwrites.index_playlists %}
|
||||||
<button type="submit">Save</button>
|
{{ channel_info.channel_overwrites.index_playlists }}
|
||||||
|
{% else %}
|
||||||
|
False
|
||||||
|
{% endif %}</span></p>
|
||||||
|
{{ channel_overwrite_form.index_playlists }}<br>
|
||||||
|
</div>
|
||||||
|
<div class="overwrite-form-item"></div>
|
||||||
|
<button type="submit">Save Channel Overwrites</button>
|
||||||
</form>
|
</form>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
@ -201,6 +201,9 @@ class ArchivistResultsView(ArchivistViewConfig):
|
|||||||
"ta_video/_search", self.default_conf, data=data
|
"ta_video/_search", self.default_conf, data=data
|
||||||
)
|
)
|
||||||
videos = search.get_data()
|
videos = search.get_data()
|
||||||
|
if not videos:
|
||||||
|
return False
|
||||||
|
|
||||||
for video in videos:
|
for video in videos:
|
||||||
youtube_id = video["source"]["youtube_id"]
|
youtube_id = video["source"]["youtube_id"]
|
||||||
matched = [i for i in results if i["youtube_id"] == youtube_id]
|
matched = [i for i in results if i["youtube_id"] == youtube_id]
|
||||||
|
@ -4,7 +4,7 @@ Django==4.0.3
|
|||||||
django-cors-headers==3.11.0
|
django-cors-headers==3.11.0
|
||||||
djangorestframework==3.13.1
|
djangorestframework==3.13.1
|
||||||
Pillow==9.0.1
|
Pillow==9.0.1
|
||||||
redis==4.1.4
|
redis==4.2.0
|
||||||
requests==2.27.1
|
requests==2.27.1
|
||||||
ryd-client==0.0.3
|
ryd-client==0.0.3
|
||||||
uWSGI==2.0.20
|
uWSGI==2.0.20
|
||||||
|
@ -566,6 +566,28 @@ button:hover {
|
|||||||
margin-bottom: 10px;
|
margin-bottom: 10px;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.overwrite-form {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: 1fr 1fr;
|
||||||
|
width: 100%;
|
||||||
|
}
|
||||||
|
|
||||||
|
.overwrite-form button {
|
||||||
|
width: 200px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.overwrite-form-item {
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.overwrite-form-item input {
|
||||||
|
width: 90%;
|
||||||
|
}
|
||||||
|
|
||||||
|
.hidden-overwrite {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
|
||||||
/* login */
|
/* login */
|
||||||
.login-page {
|
.login-page {
|
||||||
display: flex;
|
display: flex;
|
||||||
@ -1062,7 +1084,10 @@ button:hover {
|
|||||||
.channel-list.grid,
|
.channel-list.grid,
|
||||||
.video-item.list,
|
.video-item.list,
|
||||||
.playlist-list.list,
|
.playlist-list.list,
|
||||||
.playlist-list.grid {
|
.playlist-list.grid,
|
||||||
|
.info-box-2,
|
||||||
|
.info-box-3,
|
||||||
|
.overwrite-form {
|
||||||
grid-template-columns: 1fr;
|
grid-template-columns: 1fr;
|
||||||
}
|
}
|
||||||
.playlist-item.list {
|
.playlist-item.list {
|
||||||
@ -1101,10 +1126,6 @@ button:hover {
|
|||||||
.sort select {
|
.sort select {
|
||||||
margin: unset;
|
margin: unset;
|
||||||
}
|
}
|
||||||
.info-box-2,
|
|
||||||
.info-box-3 {
|
|
||||||
grid-template-columns: 1fr;
|
|
||||||
}
|
|
||||||
.description-box {
|
.description-box {
|
||||||
display: block;
|
display: block;
|
||||||
}
|
}
|
||||||
|
@ -192,7 +192,6 @@ function addSingle(button) {
|
|||||||
function deleteQueue(button) {
|
function deleteQueue(button) {
|
||||||
var to_delete = button.getAttribute('data-id');
|
var to_delete = button.getAttribute('data-id');
|
||||||
var payload = JSON.stringify({'deleteQueue': to_delete});
|
var payload = JSON.stringify({'deleteQueue': to_delete});
|
||||||
console.log(payload);
|
|
||||||
sendPost(payload);
|
sendPost(payload);
|
||||||
setTimeout(function(){
|
setTimeout(function(){
|
||||||
location.reload();
|
location.reload();
|
||||||
@ -315,7 +314,6 @@ function deletePlaylist(button) {
|
|||||||
"playlist-action": playlist_action
|
"playlist-action": playlist_action
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
console.log(payload);
|
|
||||||
sendPost(payload);
|
sendPost(payload);
|
||||||
setTimeout(function(){
|
setTimeout(function(){
|
||||||
window.location.replace("/playlist/");
|
window.location.replace("/playlist/");
|
||||||
@ -418,7 +416,6 @@ function createVideoTag(videoData, videoProgress) {
|
|||||||
var videoSubtitles = videoData.data.subtitles; // Array of subtitles
|
var videoSubtitles = videoData.data.subtitles; // Array of subtitles
|
||||||
if (typeof(videoSubtitles) != 'undefined' && videoData.config.downloads.subtitle) {
|
if (typeof(videoSubtitles) != 'undefined' && videoData.config.downloads.subtitle) {
|
||||||
for (var i = 0; i < videoSubtitles.length; i++) {
|
for (var i = 0; i < videoSubtitles.length; i++) {
|
||||||
console.log(videoSubtitles[i]);
|
|
||||||
let label = videoSubtitles[i].name;
|
let label = videoSubtitles[i].name;
|
||||||
if (videoSubtitles[i].source == "auto") {
|
if (videoSubtitles[i].source == "auto") {
|
||||||
label += " - auto";
|
label += " - auto";
|
||||||
@ -886,6 +883,15 @@ function showForm() {
|
|||||||
animate('animate-icon', 'pulse-img');
|
animate('animate-icon', 'pulse-img');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function showOverwrite() {
|
||||||
|
var overwriteDiv = document.getElementById("overwrite-form");
|
||||||
|
if (overwriteDiv.classList.contains("hidden-overwrite")) {
|
||||||
|
overwriteDiv.classList.remove("hidden-overwrite");
|
||||||
|
} else {
|
||||||
|
overwriteDiv.classList.add("hidden-overwrite")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
function animate(elementId, animationClass) {
|
function animate(elementId, animationClass) {
|
||||||
var toAnimate = document.getElementById(elementId);
|
var toAnimate = document.getElementById(elementId);
|
||||||
if (toAnimate.className !== animationClass) {
|
if (toAnimate.className !== animationClass) {
|
||||||
|
@ -1,9 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# install debug and testing tools into slim container
|
|
||||||
|
|
||||||
apt update && apt install -y vim htop bmon net-tools iputils-ping procps
|
|
||||||
|
|
||||||
pip install ipython
|
|
||||||
|
|
||||||
##
|
|
||||||
exit 0
|
|
Loading…
Reference in New Issue
Block a user