Style channel configure form, #build

Changed:
- layout of the channel configure form
- hide if no overwrites set
- validate min es version 7.17.x
- improved build to incl debug tools
This commit is contained in:
simon 2022-03-25 15:44:01 +07:00
commit fa60f2e5f8
No known key found for this signature in database
GPG Key ID: 2C15AA5E89985DD4
22 changed files with 358 additions and 489 deletions

View File

@ -25,7 +25,7 @@ This is my setup I have landed on, YMMV:
```bash ```bash
./deploy.sh test ./deploy.sh test
``` ```
- The command above will also copy the file `tubarchivist/testing.sh` into the working folder of the container. Running this script will install additional debugging tools I regularly use in testing. - The command above will call the docker build command with `--build-arg INSTALL_DEBUG=1` to install additional useful debug tools.
- The `test` argument takes another optional argument to build for a specific architecture valid options are: `amd64`, `arm64` and `multi`, default is `amd64`. - The `test` argument takes another optional argument to build for a specific architecture valid options are: `amd64`, `arm64` and `multi`, default is `amd64`.
- This `deploy.sh` file is not meant to be universally usable for every possible environment but could serve as an idea on how to automatically rebuild containers to test changes - customize to your liking. - This `deploy.sh` file is not meant to be universally usable for every possible environment but could serve as an idea on how to automatically rebuild containers to test changes - customize to your liking.

View File

@ -1,7 +1,8 @@
# build the tube archivist image from default python slim image # build the tube archivist image from default python slim image
FROM python:3.10.2-slim-bullseye FROM python:3.10.3-slim-bullseye
ARG TARGETPLATFORM ARG TARGETPLATFORM
ARG INSTALL_DEBUG
ENV PYTHONUNBUFFERED 1 ENV PYTHONUNBUFFERED 1
@ -26,6 +27,13 @@ RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \
apt-get -y update && apt-get -y install --no-install-recommends ffmpeg && rm -rf /var/lib/apt/lists/* \ apt-get -y update && apt-get -y install --no-install-recommends ffmpeg && rm -rf /var/lib/apt/lists/* \
; fi ; fi
# install debug tools for testing environment
RUN if [ "$INSTALL_DEBUG" ] ; then \
apt-get -y update && apt-get -y install --no-install-recommends \
vim htop bmon net-tools iputils-ping procps \
&& pip install --no-cache-dir ipython --src /usr/local/src \
; fi
# make folders # make folders
RUN mkdir /cache RUN mkdir /cache
RUN mkdir /youtube RUN mkdir /youtube

View File

@ -43,7 +43,10 @@ function sync_test {
# pass argument to build for specific platform # pass argument to build for specific platform
host="tubearchivist.local" host="tubearchivist.local"
# make base folder
ssh "$host" "mkdir -p docker"
# copy project files to build image
rsync -a --progress --delete-after \ rsync -a --progress --delete-after \
--exclude ".git" \ --exclude ".git" \
--exclude ".gitignore" \ --exclude ".gitignore" \
@ -52,8 +55,8 @@ function sync_test {
--exclude "db.sqlite3" \ --exclude "db.sqlite3" \
. -e ssh "$host":tubearchivist . -e ssh "$host":tubearchivist
# uncomment or copy your own docker-compose file # copy default docker-compose file if not exist
# rsync -r --progress --delete docker-compose.yml -e ssh "$host":docker rsync --progress --ignore-existing docker-compose.yml -e ssh "$host":docker
if [[ $1 = "amd64" ]]; then if [[ $1 = "amd64" ]]; then
platform="linux/amd64" platform="linux/amd64"
@ -65,12 +68,9 @@ function sync_test {
platform="linux/amd64" platform="linux/amd64"
fi fi
ssh "$host" "docker buildx build --platform $platform -t bbilly1/tubearchivist:latest tubearchivist --load" ssh "$host" "docker buildx build --build-arg INSTALL_DEBUG=1 --platform $platform -t bbilly1/tubearchivist:latest tubearchivist --load"
ssh "$host" 'docker-compose -f docker/docker-compose.yml up -d' ssh "$host" 'docker-compose -f docker/docker-compose.yml up -d'
ssh "$host" 'docker cp tubearchivist/tubearchivist/testing.sh tubearchivist:/app/testing.sh'
ssh "$host" 'docker exec tubearchivist chmod +x /app/testing.sh'
} }

View File

@ -1,7 +1,7 @@
"""all API views""" """all API views"""
import requests
from home.src.download.thumbnails import ThumbManager from home.src.download.thumbnails import ThumbManager
from home.src.es.connect import ElasticWrap
from home.src.ta.config import AppConfig from home.src.ta.config import AppConfig
from home.src.ta.helper import UrlListParser from home.src.ta.helper import UrlListParser
from home.src.ta.ta_redis import RedisArchivist from home.src.ta.ta_redis import RedisArchivist
@ -24,31 +24,21 @@ class ApiBaseView(APIView):
def __init__(self): def __init__(self):
super().__init__() super().__init__()
self.response = {"data": False} self.response = {"data": False, "config": AppConfig().config}
self.status_code = False self.status_code = False
self.context = False self.context = False
self.default_conf = AppConfig().config
def config_builder(self):
"""build confic context"""
self.context = {
"es_url": self.default_conf["application"]["es_url"],
"es_auth": self.default_conf["application"]["es_auth"],
}
self.response["config"] = self.default_conf
def get_document(self, document_id): def get_document(self, document_id):
"""get single document from es""" """get single document from es"""
es_url = self.context["es_url"] path = f"{self.search_base}{document_id}"
url = f"{es_url}{self.search_base}{document_id}" print(path)
print(url) response, status_code = ElasticWrap(path).get()
response = requests.get(url, auth=self.context["es_auth"])
try: try:
self.response["data"] = response.json()["_source"] self.response["data"] = response["_source"]
except KeyError: except KeyError:
print(f"item not found: {document_id}") print(f"item not found: {document_id}")
self.response["data"] = False self.response["data"] = False
self.status_code = response.status_code self.status_code = status_code
def process_keys(self): def process_keys(self):
"""process keys for frontend""" """process keys for frontend"""
@ -59,7 +49,7 @@ class ApiBaseView(APIView):
if "vid_thumb_url" in all_keys: if "vid_thumb_url" in all_keys:
youtube_id = self.response["data"]["youtube_id"] youtube_id = self.response["data"]["youtube_id"]
vid_thumb_url = ThumbManager().vid_thumb_path(youtube_id) vid_thumb_url = ThumbManager().vid_thumb_path(youtube_id)
cache_dir = self.default_conf["application"]["cache_dir"] cache_dir = self.response["config"]["application"]["cache_dir"]
new_thumb = f"{cache_dir}/{vid_thumb_url}" new_thumb = f"{cache_dir}/{vid_thumb_url}"
self.response["data"]["vid_thumb_url"] = new_thumb self.response["data"]["vid_thumb_url"] = new_thumb
if "subtitles" in all_keys: if "subtitles" in all_keys:
@ -75,13 +65,11 @@ class ApiBaseView(APIView):
def get_document_list(self, data): def get_document_list(self, data):
"""get a list of results""" """get a list of results"""
es_url = self.context["es_url"] print(self.search_base)
url = f"{es_url}{self.search_base}" response, status_code = ElasticWrap(self.search_base).get(data=data)
print(url) all_hits = response["hits"]["hits"]
response = requests.get(url, json=data, auth=self.context["es_auth"])
all_hits = response.json()["hits"]["hits"]
self.response["data"] = [i["_source"] for i in all_hits] self.response["data"] = [i["_source"] for i in all_hits]
self.status_code = response.status_code self.status_code = status_code
class VideoApiView(ApiBaseView): class VideoApiView(ApiBaseView):
@ -89,12 +77,11 @@ class VideoApiView(ApiBaseView):
GET: returns metadata dict of video GET: returns metadata dict of video
""" """
search_base = "/ta_video/_doc/" search_base = "ta_video/_doc/"
def get(self, request, video_id): def get(self, request, video_id):
# pylint: disable=unused-argument # pylint: disable=unused-argument
"""get request""" """get request"""
self.config_builder()
self.get_document(video_id) self.get_document(video_id)
self.process_keys() self.process_keys()
return Response(self.response, status=self.status_code) return Response(self.response, status=self.status_code)
@ -143,12 +130,11 @@ class ChannelApiView(ApiBaseView):
GET: returns metadata dict of channel GET: returns metadata dict of channel
""" """
search_base = "/ta_channel/_doc/" search_base = "ta_channel/_doc/"
def get(self, request, channel_id): def get(self, request, channel_id):
# pylint: disable=unused-argument # pylint: disable=unused-argument
"""get request""" """get request"""
self.config_builder()
self.get_document(channel_id) self.get_document(channel_id)
return Response(self.response, status=self.status_code) return Response(self.response, status=self.status_code)
@ -159,13 +145,12 @@ class ChannelApiListView(ApiBaseView):
POST: edit a list of channels POST: edit a list of channels
""" """
search_base = "/ta_channel/_search/" search_base = "ta_channel/_search/"
def get(self, request): def get(self, request):
# pylint: disable=unused-argument # pylint: disable=unused-argument
"""get request""" """get request"""
data = {"query": {"match_all": {}}} data = {"query": {"match_all": {}}}
self.config_builder()
self.get_document_list(data) self.get_document_list(data)
self.get_paginate() self.get_paginate()
@ -194,12 +179,11 @@ class PlaylistApiView(ApiBaseView):
GET: returns metadata dict of playlist GET: returns metadata dict of playlist
""" """
search_base = "/ta_playlist/_doc/" search_base = "ta_playlist/_doc/"
def get(self, request, playlist_id): def get(self, request, playlist_id):
# pylint: disable=unused-argument # pylint: disable=unused-argument
"""get request""" """get request"""
self.config_builder()
self.get_document(playlist_id) self.get_document(playlist_id)
return Response(self.response, status=self.status_code) return Response(self.response, status=self.status_code)
@ -209,12 +193,11 @@ class DownloadApiView(ApiBaseView):
GET: returns metadata dict of an item in the download queue GET: returns metadata dict of an item in the download queue
""" """
search_base = "/ta_download/_doc/" search_base = "ta_download/_doc/"
def get(self, request, video_id): def get(self, request, video_id):
# pylint: disable=unused-argument # pylint: disable=unused-argument
"""get request""" """get request"""
self.config_builder()
self.get_document(video_id) self.get_document(video_id)
return Response(self.response, status=self.status_code) return Response(self.response, status=self.status_code)
@ -225,13 +208,12 @@ class DownloadApiListView(ApiBaseView):
POST: add a list of videos to download queue POST: add a list of videos to download queue
""" """
search_base = "/ta_download/_search/" search_base = "ta_download/_search/"
def get(self, request): def get(self, request):
# pylint: disable=unused-argument # pylint: disable=unused-argument
"""get request""" """get request"""
data = {"query": {"match_all": {}}} data = {"query": {"match_all": {}}}
self.config_builder()
self.get_document_list(data) self.get_document_list(data)
self.get_paginate() self.get_paginate()
return Response(self.response) return Response(self.response)

View File

@ -1,8 +1,10 @@
"""handle custom startup functions""" """handle custom startup functions"""
import os import os
import sys
from django.apps import AppConfig from django.apps import AppConfig
from home.src.es.connect import ElasticWrap
from home.src.es.index_setup import index_check from home.src.es.index_setup import index_check
from home.src.ta.config import AppConfig as ArchivistConfig from home.src.ta.config import AppConfig as ArchivistConfig
from home.src.ta.ta_redis import RedisArchivist from home.src.ta.ta_redis import RedisArchivist
@ -11,6 +13,9 @@ from home.src.ta.ta_redis import RedisArchivist
class StartupCheck: class StartupCheck:
"""checks to run at application startup""" """checks to run at application startup"""
MIN_MAJOR, MAX_MAJOR = 7, 7
MIN_MINOR = 17
def __init__(self): def __init__(self):
self.config_handler = ArchivistConfig() self.config_handler = ArchivistConfig()
self.redis_con = RedisArchivist() self.redis_con = RedisArchivist()
@ -19,6 +24,7 @@ class StartupCheck:
def run(self): def run(self):
"""run all startup checks""" """run all startup checks"""
print("run startup checks") print("run startup checks")
self.es_version_check()
self.release_lock() self.release_lock()
index_check() index_check()
self.sync_redis_state() self.sync_redis_state()
@ -72,6 +78,33 @@ class StartupCheck:
if response: if response:
print("deleted leftover key from redis: " + lock) print("deleted leftover key from redis: " + lock)
def is_invalid(self, version):
"""return true if es version is invalid, false if ok"""
major, minor = [int(i) for i in version.split(".")[:2]]
if not self.MIN_MAJOR <= major <= self.MAX_MAJOR:
return True
if minor >= self.MIN_MINOR:
return False
return True
def es_version_check(self):
"""check for minimal elasticsearch version"""
response, _ = ElasticWrap("/").get()
version = response["version"]["number"]
invalid = self.is_invalid(version)
if invalid:
print(
"minial required elasticsearch version: "
+ f"{self.MIN_MAJOR}.{self.MIN_MINOR}, "
+ "please update to recommended version."
)
sys.exit(1)
print("elasticsearch version check passed")
class HomeConfig(AppConfig): class HomeConfig(AppConfig):
"""call startup funcs""" """call startup funcs"""

View File

@ -11,11 +11,10 @@ import shutil
from datetime import datetime from datetime import datetime
from time import sleep from time import sleep
import requests
import yt_dlp import yt_dlp
from home.src.download.queue import PendingList from home.src.download.queue import PendingList
from home.src.download.subscriptions import PlaylistSubscription from home.src.download.subscriptions import PlaylistSubscription
from home.src.es.connect import IndexPaginate from home.src.es.connect import ElasticWrap, IndexPaginate
from home.src.index.channel import YoutubeChannel from home.src.index.channel import YoutubeChannel
from home.src.index.playlist import YoutubePlaylist from home.src.index.playlist import YoutubePlaylist
from home.src.index.video import YoutubeVideo, index_new_video from home.src.index.video import YoutubeVideo, index_new_video
@ -162,7 +161,7 @@ class VideoDownloader:
pending.get_channels() pending.get_channels()
self.video_overwrites = pending.video_overwrites self.video_overwrites = pending.video_overwrites
queue = RedisQueue("dl_queue") queue = RedisQueue()
limit_queue = self.config["downloads"]["limit_count"] limit_queue = self.config["downloads"]["limit_count"]
if limit_queue: if limit_queue:
@ -212,8 +211,7 @@ class VideoDownloader:
RedisArchivist().set_message("message:download", mess_dict) RedisArchivist().set_message("message:download", mess_dict)
return return
queue = RedisQueue("dl_queue") RedisQueue().add_list(to_add)
queue.add_list(to_add)
@staticmethod @staticmethod
def _progress_hook(response): def _progress_hook(response):
@ -371,14 +369,11 @@ class VideoDownloader:
if host_uid and host_gid: if host_uid and host_gid:
os.chown(new_file_path, host_uid, host_gid) os.chown(new_file_path, host_uid, host_gid)
def _delete_from_pending(self, youtube_id): @staticmethod
def _delete_from_pending(youtube_id):
"""delete downloaded video from pending index if its there""" """delete downloaded video from pending index if its there"""
es_url = self.config["application"]["es_url"] path = f"ta_download/_doc/{youtube_id}"
es_auth = self.config["application"]["es_auth"] _, _ = ElasticWrap(path).delete()
url = f"{es_url}/ta_download/_doc/{youtube_id}"
response = requests.delete(url, auth=es_auth)
if not response.ok and not response.status_code == 404:
print(response.text)
def _add_subscribed_channels(self): def _add_subscribed_channels(self):
"""add all channels subscribed to refresh""" """add all channels subscribed to refresh"""

View File

@ -93,11 +93,12 @@ class IndexPaginate:
DEFAULT_SIZE = 500 DEFAULT_SIZE = 500
def __init__(self, index_name, data, size=False): def __init__(self, index_name, data, size=False, keep_source=False):
self.index_name = index_name self.index_name = index_name
self.data = data self.data = data
self.pit_id = False self.pit_id = False
self.size = size self.size = size
self.keep_source = keep_source
def get_results(self): def get_results(self):
"""get all results""" """get all results"""
@ -132,7 +133,10 @@ class IndexPaginate:
all_hits = response["hits"]["hits"] all_hits = response["hits"]["hits"]
if all_hits: if all_hits:
for hit in all_hits: for hit in all_hits:
source = hit["_source"] if self.keep_source:
source = hit
else:
source = hit["_source"]
search_after = hit["sort"] search_after = hit["sort"]
all_results.append(source) all_results.append(source)
# update search_after with last hit data # update search_after with last hit data

View File

@ -10,7 +10,7 @@ import os
import zipfile import zipfile
from datetime import datetime from datetime import datetime
import requests from home.src.es.connect import ElasticWrap, IndexPaginate
from home.src.ta.config import AppConfig from home.src.ta.config import AppConfig
from home.src.ta.helper import ignore_filelist from home.src.ta.helper import ignore_filelist
@ -20,11 +20,6 @@ class ElasticIndex:
handle mapping and settings on elastic search for a given index handle mapping and settings on elastic search for a given index
""" """
CONFIG = AppConfig().config
ES_URL = CONFIG["application"]["es_url"]
ES_AUTH = CONFIG["application"]["es_auth"]
HEADERS = {"Content-type": "application/json"}
def __init__(self, index_name, expected_map, expected_set): def __init__(self, index_name, expected_map, expected_set):
self.index_name = index_name self.index_name = index_name
self.expected_map = expected_map self.expected_map = expected_map
@ -33,15 +28,9 @@ class ElasticIndex:
def index_exists(self): def index_exists(self):
"""check if index already exists and return mapping if it does""" """check if index already exists and return mapping if it does"""
index_name = self.index_name response, status_code = ElasticWrap(f"ta_{self.index_name}").get()
url = f"{self.ES_URL}/ta_{index_name}" exists = status_code == 200
response = requests.get(url, auth=self.ES_AUTH) details = response.get(f"ta_{self.index_name}", False)
exists = response.ok
if exists:
details = response.json()[f"ta_{index_name}"]
else:
details = False
return exists, details return exists, details
@ -110,63 +99,41 @@ class ElasticIndex:
def rebuild_index(self): def rebuild_index(self):
"""rebuild with new mapping""" """rebuild with new mapping"""
# backup
self.reindex("backup") self.reindex("backup")
# delete original
self.delete_index(backup=False) self.delete_index(backup=False)
# create new
self.create_blank() self.create_blank()
self.reindex("restore") self.reindex("restore")
# delete backup
self.delete_index() self.delete_index()
def reindex(self, method): def reindex(self, method):
"""create on elastic search""" """create on elastic search"""
index_name = self.index_name
if method == "backup": if method == "backup":
source = f"ta_{index_name}" source = f"ta_{self.index_name}"
destination = f"ta_{index_name}_backup" destination = f"ta_{self.index_name}_backup"
elif method == "restore": elif method == "restore":
source = f"ta_{index_name}_backup" source = f"ta_{self.index_name}_backup"
destination = f"ta_{index_name}" destination = f"ta_{self.index_name}"
query = {"source": {"index": source}, "dest": {"index": destination}} data = {"source": {"index": source}, "dest": {"index": destination}}
data = json.dumps(query) _, _ = ElasticWrap("_reindex?refresh=true").post(data=data)
url = self.ES_URL + "/_reindex?refresh=true"
response = requests.post(
url=url, data=data, headers=self.HEADERS, auth=self.ES_AUTH
)
if not response.ok:
print(response.text)
def delete_index(self, backup=True): def delete_index(self, backup=True):
"""delete index passed as argument""" """delete index passed as argument"""
path = f"ta_{self.index_name}"
if backup: if backup:
url = f"{self.ES_URL}/ta_{self.index_name}_backup" path = path + "_backup"
else:
url = f"{self.ES_URL}/ta_{self.index_name}" _, _ = ElasticWrap(path).delete()
response = requests.delete(url, auth=self.ES_AUTH)
if not response.ok:
print(response.text)
def create_blank(self): def create_blank(self):
"""apply new mapping and settings for blank new index""" """apply new mapping and settings for blank new index"""
expected_map = self.expected_map data = {}
expected_set = self.expected_set if self.expected_set:
# stich payload data.update({"settings": self.expected_set})
payload = {} if self.expected_map:
if expected_set: data.update({"mappings": {"properties": self.expected_map}})
payload.update({"settings": expected_set})
if expected_map: _, _ = ElasticWrap(f"ta_{self.index_name}").put(data)
payload.update({"mappings": {"properties": expected_map}})
# create
url = f"{self.ES_URL}/ta_{self.index_name}"
data = json.dumps(payload)
response = requests.put(
url=url, data=data, headers=self.HEADERS, auth=self.ES_AUTH
)
if not response.ok:
print(response.text)
class ElasticBackup: class ElasticBackup:
@ -174,52 +141,21 @@ class ElasticBackup:
def __init__(self, index_config, reason): def __init__(self, index_config, reason):
self.config = AppConfig().config self.config = AppConfig().config
self.cache_dir = self.config["application"]["cache_dir"]
self.index_config = index_config self.index_config = index_config
self.reason = reason self.reason = reason
self.timestamp = datetime.now().strftime("%Y%m%d") self.timestamp = datetime.now().strftime("%Y%m%d")
self.backup_files = [] self.backup_files = []
def get_all_documents(self, index_name): @staticmethod
def get_all_documents(index_name):
"""export all documents of a single index""" """export all documents of a single index"""
headers = {"Content-type": "application/json"}
es_url = self.config["application"]["es_url"]
es_auth = self.config["application"]["es_auth"]
# get PIT ID
url = f"{es_url}/ta_{index_name}/_pit?keep_alive=1m"
response = requests.post(url, auth=es_auth)
json_data = json.loads(response.text)
pit_id = json_data["id"]
# build query
data = { data = {
"query": {"match_all": {}}, "query": {"match_all": {}},
"size": 100, "sort": [{"_doc": {"order": "desc"}}],
"pit": {"id": pit_id, "keep_alive": "1m"},
"sort": [{"_id": {"order": "asc"}}],
} }
query_str = json.dumps(data) paginate = IndexPaginate(f"ta_{index_name}", data, keep_source=True)
url = es_url + "/_search" all_results = paginate.get_results()
# loop until nothing left
all_results = []
while True:
response = requests.get(
url, data=query_str, headers=headers, auth=es_auth
)
json_data = json.loads(response.text)
all_hits = json_data["hits"]["hits"]
if all_hits:
for hit in all_hits:
search_after = hit["sort"]
all_results.append(hit)
# update search_after with last hit data
data["search_after"] = search_after
query_str = json.dumps(data)
else:
break
# clean up PIT
query_str = json.dumps({"id": pit_id})
requests.delete(
es_url + "/_pit", data=query_str, headers=headers, auth=es_auth
)
return all_results return all_results
@ -244,9 +180,8 @@ class ElasticBackup:
def write_es_json(self, file_content, index_name): def write_es_json(self, file_content, index_name):
"""write nd-json file for es _bulk API to disk""" """write nd-json file for es _bulk API to disk"""
cache_dir = self.config["application"]["cache_dir"]
file_name = f"es_{index_name}-{self.timestamp}.json" file_name = f"es_{index_name}-{self.timestamp}.json"
file_path = os.path.join(cache_dir, "backup", file_name) file_path = os.path.join(self.cache_dir, "backup", file_name)
with open(file_path, "w", encoding="utf-8") as f: with open(file_path, "w", encoding="utf-8") as f:
f.write(file_content) f.write(file_content)
@ -254,9 +189,8 @@ class ElasticBackup:
def write_ta_json(self, all_results, index_name): def write_ta_json(self, all_results, index_name):
"""write generic json file to disk""" """write generic json file to disk"""
cache_dir = self.config["application"]["cache_dir"]
file_name = f"ta_{index_name}-{self.timestamp}.json" file_name = f"ta_{index_name}-{self.timestamp}.json"
file_path = os.path.join(cache_dir, "backup", file_name) file_path = os.path.join(self.cache_dir, "backup", file_name)
to_write = [i["_source"] for i in all_results] to_write = [i["_source"] for i in all_results]
file_content = json.dumps(to_write) file_content = json.dumps(to_write)
with open(file_path, "w", encoding="utf-8") as f: with open(file_path, "w", encoding="utf-8") as f:
@ -266,9 +200,8 @@ class ElasticBackup:
def zip_it(self): def zip_it(self):
"""pack it up into single zip file""" """pack it up into single zip file"""
cache_dir = self.config["application"]["cache_dir"]
file_name = f"ta_backup-{self.timestamp}-{self.reason}.zip" file_name = f"ta_backup-{self.timestamp}-{self.reason}.zip"
backup_folder = os.path.join(cache_dir, "backup") backup_folder = os.path.join(self.cache_dir, "backup")
backup_file = os.path.join(backup_folder, file_name) backup_file = os.path.join(backup_folder, file_name)
with zipfile.ZipFile( with zipfile.ZipFile(
@ -283,29 +216,18 @@ class ElasticBackup:
def post_bulk_restore(self, file_name): def post_bulk_restore(self, file_name):
"""send bulk to es""" """send bulk to es"""
cache_dir = self.config["application"]["cache_dir"] file_path = os.path.join(self.cache_dir, file_name)
es_url = self.config["application"]["es_url"]
es_auth = self.config["application"]["es_auth"]
headers = {"Content-type": "application/x-ndjson"}
file_path = os.path.join(cache_dir, file_name)
with open(file_path, "r", encoding="utf-8") as f: with open(file_path, "r", encoding="utf-8") as f:
query_str = f.read() data = f.read()
if not query_str.strip(): if not data.strip():
return return
url = es_url + "/_bulk" _, _ = ElasticWrap("_bulk").post(data=data, ndjson=True)
request = requests.post(
url, data=query_str, headers=headers, auth=es_auth
)
if not request.ok:
print(request.text)
def get_all_backup_files(self): def get_all_backup_files(self):
"""build all available backup files for view""" """build all available backup files for view"""
cache_dir = self.config["application"]["cache_dir"] backup_dir = os.path.join(self.cache_dir, "backup")
backup_dir = os.path.join(cache_dir, "backup")
backup_files = os.listdir(backup_dir) backup_files = os.listdir(backup_dir)
all_backup_files = ignore_filelist(backup_files) all_backup_files = ignore_filelist(backup_files)
all_available_backups = [ all_available_backups = [
@ -336,8 +258,7 @@ class ElasticBackup:
def unpack_zip_backup(self, filename): def unpack_zip_backup(self, filename):
"""extract backup zip and return filelist""" """extract backup zip and return filelist"""
cache_dir = self.config["application"]["cache_dir"] backup_dir = os.path.join(self.cache_dir, "backup")
backup_dir = os.path.join(cache_dir, "backup")
file_path = os.path.join(backup_dir, filename) file_path = os.path.join(backup_dir, filename)
with zipfile.ZipFile(file_path, "r") as z: with zipfile.ZipFile(file_path, "r") as z:
@ -348,9 +269,7 @@ class ElasticBackup:
def restore_json_files(self, zip_content): def restore_json_files(self, zip_content):
"""go through the unpacked files and restore""" """go through the unpacked files and restore"""
backup_dir = os.path.join(self.cache_dir, "backup")
cache_dir = self.config["application"]["cache_dir"]
backup_dir = os.path.join(cache_dir, "backup")
for json_f in zip_content: for json_f in zip_content:
@ -364,14 +283,13 @@ class ElasticBackup:
self.post_bulk_restore(file_name) self.post_bulk_restore(file_name)
os.remove(file_name) os.remove(file_name)
def index_exists(self, index_name): @staticmethod
def index_exists(index_name):
"""check if index already exists to skip""" """check if index already exists to skip"""
es_url = self.config["application"]["es_url"] _, status_code = ElasticWrap(f"ta_{index_name}").get()
es_auth = self.config["application"]["es_auth"] exists = status_code == 200
url = f"{es_url}/ta_{index_name}"
response = requests.get(url, auth=es_auth)
return response.ok return exists
def rotate_backup(self): def rotate_backup(self):
"""delete old backups if needed""" """delete old backups if needed"""
@ -386,8 +304,7 @@ class ElasticBackup:
print("no backup files to rotate") print("no backup files to rotate")
return return
cache_dir = self.config["application"]["cache_dir"] backup_dir = os.path.join(self.cache_dir, "backup")
backup_dir = os.path.join(cache_dir, "backup")
all_to_delete = auto[rotate:] all_to_delete = auto[rotate:]
for to_delete in all_to_delete: for to_delete in all_to_delete:

View File

@ -114,7 +114,7 @@ class PostData:
print(f"ignore video {video_id}") print(f"ignore video {video_id}")
PendingInteract(video_id=video_id, status="ignore").update_status() PendingInteract(video_id=video_id, status="ignore").update_status()
# also clear from redis queue # also clear from redis queue
RedisQueue("dl_queue").clear_item(video_id) RedisQueue().clear_item(video_id)
return {"success": True} return {"success": True}
@staticmethod @staticmethod
@ -132,7 +132,7 @@ class PostData:
to_execute = self.exec_val to_execute = self.exec_val
if to_execute == "stop": if to_execute == "stop":
print("stopping download queue") print("stopping download queue")
RedisQueue("dl_queue").clear() RedisQueue().clear()
elif to_execute == "kill": elif to_execute == "kill":
task_id = RedisArchivist().get_message("dl_queue_id") task_id = RedisArchivist().get_message("dl_queue_id")
if not isinstance(task_id, str): if not isinstance(task_id, str):

View File

@ -3,22 +3,15 @@ functionality:
- handle watched state for videos, channels and playlists - handle watched state for videos, channels and playlists
""" """
import json
from datetime import datetime from datetime import datetime
import requests from home.src.es.connect import ElasticWrap
from home.src.ta.config import AppConfig
from home.src.ta.helper import UrlListParser from home.src.ta.helper import UrlListParser
class WatchState: class WatchState:
"""handle watched checkbox for videos and channels""" """handle watched checkbox for videos and channels"""
CONFIG = AppConfig().config
ES_URL = CONFIG["application"]["es_url"]
ES_AUTH = CONFIG["application"]["es_auth"]
HEADERS = {"Content-type": "application/json"}
def __init__(self, youtube_id): def __init__(self, youtube_id):
self.youtube_id = youtube_id self.youtube_id = youtube_id
self.stamp = int(datetime.now().strftime("%s")) self.stamp = int(datetime.now().strftime("%s"))
@ -33,7 +26,7 @@ class WatchState:
elif url_type == "playlist": elif url_type == "playlist":
self.mark_playlist_watched() self.mark_playlist_watched()
print(f"marked {self.youtube_id} as watched") print(f"{self.youtube_id}: marked as watched")
def mark_as_unwatched(self): def mark_as_unwatched(self):
"""revert watched state to false""" """revert watched state to false"""
@ -41,7 +34,7 @@ class WatchState:
if url_type == "video": if url_type == "video":
self.mark_vid_watched(revert=True) self.mark_vid_watched(revert=True)
print(f"revert {self.youtube_id} as unwatched") print(f"{self.youtube_id}: revert as unwatched")
def dedect_type(self): def dedect_type(self):
"""find youtube id type""" """find youtube id type"""
@ -52,77 +45,54 @@ class WatchState:
def mark_vid_watched(self, revert=False): def mark_vid_watched(self, revert=False):
"""change watched status of single video""" """change watched status of single video"""
url = self.ES_URL + "/ta_video/_update/" + self.youtube_id path = f"ta_video/_update/{self.youtube_id}"
data = { data = {
"doc": {"player": {"watched": True, "watched_date": self.stamp}} "doc": {"player": {"watched": True, "watched_date": self.stamp}}
} }
if revert: if revert:
data["doc"]["player"]["watched"] = False data["doc"]["player"]["watched"] = False
payload = json.dumps(data) response, status_code = ElasticWrap(path).post(data=data)
request = requests.post( if status_code != 200:
url, data=payload, headers=self.HEADERS, auth=self.ES_AUTH print(response)
)
if not request.ok:
print(request.text)
raise ValueError("failed to mark video as watched") raise ValueError("failed to mark video as watched")
def mark_channel_watched(self): def mark_channel_watched(self):
"""change watched status of every video in channel""" """change watched status of every video in channel"""
path = "ta_video/_update_by_query"
must_list = [
{"term": {"channel.channel_id": {"value": self.youtube_id}}},
{"term": {"player.watched": {"value": False}}},
]
data = { data = {
"query": { "query": {"bool": {"must": must_list}},
"bool": {
"must": [
{
"term": {
"channel.channel_id": {
"value": self.youtube_id
}
}
},
{"term": {"player.watched": {"value": False}}},
]
}
},
"script": { "script": {
"source": "ctx._source.player['watched'] = true", "source": "ctx._source.player['watched'] = true",
"lang": "painless", "lang": "painless",
}, },
} }
payload = json.dumps(data)
url = f"{self.ES_URL}/ta_video/_update_by_query" response, status_code = ElasticWrap(path).post(data=data)
request = requests.post( if status_code != 200:
url, data=payload, headers=self.HEADERS, auth=self.ES_AUTH print(response)
)
if not request.ok:
print(request.text)
raise ValueError("failed mark channel as watched") raise ValueError("failed mark channel as watched")
def mark_playlist_watched(self): def mark_playlist_watched(self):
"""change watched state of all videos in playlist""" """change watched state of all videos in playlist"""
path = "ta_video/_update_by_query"
must_list = [
{"term": {"playlist.keyword": {"value": self.youtube_id}}},
{"term": {"player.watched": {"value": False}}},
]
data = { data = {
"query": { "query": {"bool": {"must": must_list}},
"bool": {
"must": [
{
"term": {
"playlist.keyword": {"value": self.youtube_id}
}
},
{"term": {"player.watched": {"value": False}}},
]
}
},
"script": { "script": {
"source": "ctx._source.player['watched'] = true", "source": "ctx._source.player['watched'] = true",
"lang": "painless", "lang": "painless",
}, },
} }
payload = json.dumps(data)
url = f"{self.ES_URL}/ta_video/_update_by_query" response, status_code = ElasticWrap(path).post(data=data)
request = requests.post( if status_code != 200:
url, data=payload, headers=self.HEADERS, auth=self.ES_AUTH print(response)
)
if not request.ok:
print(request.text)
raise ValueError("failed mark playlist as watched") raise ValueError("failed mark playlist as watched")

View File

@ -10,11 +10,10 @@ import os
import re import re
import shutil import shutil
import subprocess import subprocess
from datetime import datetime
import requests
from home.src.download.queue import PendingList from home.src.download.queue import PendingList
from home.src.download.yt_dlp_handler import VideoDownloader from home.src.download.yt_dlp_handler import VideoDownloader
from home.src.es.connect import ElasticWrap
from home.src.index.reindex import Reindex from home.src.index.reindex import Reindex
from home.src.index.video import index_new_video from home.src.index.video import index_new_video
from home.src.ta.config import AppConfig from home.src.ta.config import AppConfig
@ -26,8 +25,6 @@ class FilesystemScanner:
"""handle scanning and fixing from filesystem""" """handle scanning and fixing from filesystem"""
CONFIG = AppConfig().config CONFIG = AppConfig().config
ES_URL = CONFIG["application"]["es_url"]
ES_AUTH = CONFIG["application"]["es_auth"]
VIDEOS = CONFIG["application"]["videos"] VIDEOS = CONFIG["application"]["videos"]
def __init__(self): def __init__(self):
@ -148,25 +145,16 @@ class FilesystemScanner:
bulk_list.append(json.dumps(source)) bulk_list.append(json.dumps(source))
# add last newline # add last newline
bulk_list.append("\n") bulk_list.append("\n")
query_str = "\n".join(bulk_list) data = "\n".join(bulk_list)
# make the call _, _ = ElasticWrap("_bulk").post(data=data, ndjson=True)
headers = {"Content-type": "application/x-ndjson"}
url = self.ES_URL + "/_bulk"
request = requests.post(
url, data=query_str, headers=headers, auth=self.ES_AUTH
)
if not request.ok:
print(request.text)
def delete_from_index(self): def delete_from_index(self):
"""find indexed but deleted mediafile""" """find indexed but deleted mediafile"""
for indexed in self.to_delete: for indexed in self.to_delete:
youtube_id = indexed[0] youtube_id = indexed[0]
print(f"deleting {youtube_id} from index") print(f"deleting {youtube_id} from index")
url = self.ES_URL + "/ta_video/_doc/" + youtube_id path = f"ta_video/_doc/{youtube_id}"
request = requests.delete(url, auth=self.ES_AUTH) _, _ = ElasticWrap(path).delete()
if not request.ok:
print(request.text)
class ManualImport: class ManualImport:
@ -319,10 +307,7 @@ def scan_filesystem():
def reindex_old_documents(): def reindex_old_documents():
"""daily refresh of old documents""" """daily refresh of old documents"""
# continue if needed handler = Reindex()
reindex_handler = Reindex() handler.check_outdated()
reindex_handler.check_outdated() handler.reindex()
reindex_handler.reindex() RedisArchivist().set_message("last_reindex", handler.now, expire=False)
# set timestamp
now = int(datetime.now().strftime("%s"))
RedisArchivist().set_message("last_reindex", now, expire=False)

View File

@ -4,85 +4,60 @@ functionality:
- index and update in es - index and update in es
""" """
import json
from datetime import datetime from datetime import datetime
from math import ceil from math import ceil
from time import sleep from time import sleep
import requests
from home.src.download.queue import PendingList from home.src.download.queue import PendingList
from home.src.download.thumbnails import ThumbManager from home.src.download.thumbnails import ThumbManager
from home.src.es.connect import ElasticWrap
from home.src.index.channel import YoutubeChannel from home.src.index.channel import YoutubeChannel
from home.src.index.playlist import YoutubePlaylist from home.src.index.playlist import YoutubePlaylist
from home.src.index.video import YoutubeVideo from home.src.index.video import YoutubeVideo
from home.src.ta.config import AppConfig from home.src.ta.config import AppConfig
from home.src.ta.helper import get_total_hits
class Reindex: class Reindex:
"""check for outdated documents and refresh data from youtube""" """check for outdated documents and refresh data from youtube"""
MATCH_FIELD = {
"ta_video": "active",
"ta_channel": "channel_active",
"ta_playlist": "playlist_active",
}
MULTIPLY = 1.2
def __init__(self): def __init__(self):
# config # config
config = AppConfig().config self.now = int(datetime.now().strftime("%s"))
self.sleep_interval = config["downloads"]["sleep_interval"] self.config = AppConfig().config
self.es_url = config["application"]["es_url"] self.interval = self.config["scheduler"]["check_reindex_days"]
self.es_auth = config["application"]["es_auth"]
self.refresh_interval = config["scheduler"]["check_reindex_days"]
self.integrate_ryd = config["downloads"]["integrate_ryd"]
# scan # scan
self.all_youtube_ids = False self.all_youtube_ids = False
self.all_channel_ids = False self.all_channel_ids = False
self.all_playlist_ids = False self.all_playlist_ids = False
def get_daily(self): def _get_daily(self):
"""get daily refresh values""" """get daily refresh values"""
total_videos = get_total_hits( total_videos = self._get_total_hits("ta_video")
"ta_video", self.es_url, self.es_auth, "active" video_daily = ceil(total_videos / self.interval * self.MULTIPLY)
) total_channels = self._get_total_hits("ta_channel")
video_daily = ceil(total_videos / self.refresh_interval * 1.2) channel_daily = ceil(total_channels / self.interval * self.MULTIPLY)
total_channels = get_total_hits( total_playlists = self._get_total_hits("ta_playlist")
"ta_channel", self.es_url, self.es_auth, "channel_active" playlist_daily = ceil(total_playlists / self.interval * self.MULTIPLY)
)
channel_daily = ceil(total_channels / self.refresh_interval * 1.2)
total_playlists = get_total_hits(
"ta_playlist", self.es_url, self.es_auth, "playlist_active"
)
playlist_daily = ceil(total_playlists / self.refresh_interval * 1.2)
return (video_daily, channel_daily, playlist_daily) return (video_daily, channel_daily, playlist_daily)
def get_outdated_vids(self, size): def _get_total_hits(self, index):
"""get daily videos to refresh""" """get total hits from index"""
headers = {"Content-type": "application/json"} match_field = self.MATCH_FIELD[index]
now = int(datetime.now().strftime("%s")) path = f"{index}/_search?filter_path=hits.total"
now_lte = now - self.refresh_interval * 24 * 60 * 60 data = {"query": {"match": {match_field: True}}}
data = { response, _ = ElasticWrap(path).post(data=data)
"size": size, total_hits = response["hits"]["total"]["value"]
"query": { return total_hits
"bool": {
"must": [
{"match": {"active": True}},
{"range": {"vid_last_refresh": {"lte": now_lte}}},
]
}
},
"sort": [{"vid_last_refresh": {"order": "asc"}}],
"_source": False,
}
query_str = json.dumps(data)
url = self.es_url + "/ta_video/_search"
response = requests.get(
url, data=query_str, headers=headers, auth=self.es_auth
)
if not response.ok:
print(response.text)
response_dict = json.loads(response.text)
all_youtube_ids = [i["_id"] for i in response_dict["hits"]["hits"]]
return all_youtube_ids
def get_unrated_vids(self): def _get_unrated_vids(self):
"""get all videos without rating if ryd integration is enabled""" """get max 200 videos without rating if ryd integration is enabled"""
headers = {"Content-type": "application/json"}
data = { data = {
"size": 200, "size": 200,
"query": { "query": {
@ -91,86 +66,78 @@ class Reindex:
} }
}, },
} }
query_str = json.dumps(data) response, _ = ElasticWrap("ta_video/_search").get(data=data)
url = self.es_url + "/ta_video/_search"
response = requests.get( missing_rating = [i["_id"] for i in response["hits"]["hits"]]
url, data=query_str, headers=headers, auth=self.es_auth
)
if not response.ok:
print(response.text)
response_dict = json.loads(response.text)
missing_rating = [i["_id"] for i in response_dict["hits"]["hits"]]
self.all_youtube_ids = self.all_youtube_ids + missing_rating self.all_youtube_ids = self.all_youtube_ids + missing_rating
def get_outdated_channels(self, size): def _get_outdated_vids(self, size):
"""get daily channels to refresh""" """get daily videos to refresh"""
headers = {"Content-type": "application/json"} now_lte = self.now - self.interval * 24 * 60 * 60
now = int(datetime.now().strftime("%s")) must_list = [
now_lte = now - self.refresh_interval * 24 * 60 * 60 {"match": {"active": True}},
{"range": {"vid_last_refresh": {"lte": now_lte}}},
]
data = { data = {
"size": size, "size": size,
"query": { "query": {"bool": {"must": must_list}},
"bool": { "sort": [{"vid_last_refresh": {"order": "asc"}}],
"must": [ "_source": False,
{"match": {"channel_active": True}}, }
{"range": {"channel_last_refresh": {"lte": now_lte}}}, response, _ = ElasticWrap("ta_video/_search").get(data=data)
]
} all_youtube_ids = [i["_id"] for i in response["hits"]["hits"]]
}, return all_youtube_ids
def _get_outdated_channels(self, size):
"""get daily channels to refresh"""
now_lte = self.now - self.interval * 24 * 60 * 60
must_list = [
{"match": {"channel_active": True}},
{"range": {"channel_last_refresh": {"lte": now_lte}}},
]
data = {
"size": size,
"query": {"bool": {"must": must_list}},
"sort": [{"channel_last_refresh": {"order": "asc"}}], "sort": [{"channel_last_refresh": {"order": "asc"}}],
"_source": False, "_source": False,
} }
query_str = json.dumps(data) response, _ = ElasticWrap("ta_channel/_search").get(data=data)
url = self.es_url + "/ta_channel/_search"
response = requests.get( all_channel_ids = [i["_id"] for i in response["hits"]["hits"]]
url, data=query_str, headers=headers, auth=self.es_auth
)
if not response.ok:
print(response.text)
response_dict = json.loads(response.text)
all_channel_ids = [i["_id"] for i in response_dict["hits"]["hits"]]
return all_channel_ids return all_channel_ids
def get_outdated_playlists(self, size): def _get_outdated_playlists(self, size):
"""get daily outdated playlists to refresh""" """get daily outdated playlists to refresh"""
headers = {"Content-type": "application/json"} now_lte = self.now - self.interval * 24 * 60 * 60
now = int(datetime.now().strftime("%s")) must_list = [
now_lte = now - self.refresh_interval * 24 * 60 * 60 {"match": {"playlist_active": True}},
{"range": {"playlist_last_refresh": {"lte": now_lte}}},
]
data = { data = {
"size": size, "size": size,
"query": { "query": {"bool": {"must": must_list}},
"bool": {
"must": [
{"match": {"playlist_active": True}},
{"range": {"playlist_last_refresh": {"lte": now_lte}}},
]
}
},
"sort": [{"playlist_last_refresh": {"order": "asc"}}], "sort": [{"playlist_last_refresh": {"order": "asc"}}],
"_source": False, "_source": False,
} }
query_str = json.dumps(data) response, _ = ElasticWrap("ta_playlist/_search").get(data=data)
url = self.es_url + "/ta_playlist/_search"
response = requests.get( all_playlist_ids = [i["_id"] for i in response["hits"]["hits"]]
url, data=query_str, headers=headers, auth=self.es_auth
)
if not response.ok:
print(response.text)
response_dict = json.loads(response.text)
all_playlist_ids = [i["_id"] for i in response_dict["hits"]["hits"]]
return all_playlist_ids return all_playlist_ids
def check_outdated(self): def check_outdated(self):
"""add missing vids and channels""" """add missing vids and channels"""
video_daily, channel_daily, playlist_daily = self.get_daily() video_daily, channel_daily, playlist_daily = self._get_daily()
self.all_youtube_ids = self.get_outdated_vids(video_daily) self.all_youtube_ids = self._get_outdated_vids(video_daily)
self.all_channel_ids = self.get_outdated_channels(channel_daily) self.all_channel_ids = self._get_outdated_channels(channel_daily)
self.all_playlist_ids = self.get_outdated_playlists(playlist_daily) self.all_playlist_ids = self._get_outdated_playlists(playlist_daily)
if self.integrate_ryd:
self.get_unrated_vids() integrate_ryd = self.config["downloads"]["integrate_ryd"]
if integrate_ryd:
self._get_unrated_vids()
@staticmethod @staticmethod
def reindex_single_video(youtube_id): def _reindex_single_video(youtube_id):
"""refresh data for single video""" """refresh data for single video"""
video = YoutubeVideo(youtube_id) video = YoutubeVideo(youtube_id)
@ -188,6 +155,8 @@ class Reindex:
return return
video.delete_subtitles() video.delete_subtitles()
video.check_subtitles()
# add back # add back
video.json_data["player"] = player video.json_data["player"] = player
video.json_data["date_downloaded"] = date_downloaded video.json_data["date_downloaded"] = date_downloaded
@ -204,20 +173,21 @@ class Reindex:
return return
@staticmethod @staticmethod
def reindex_single_channel(channel_id): def _reindex_single_channel(channel_id):
"""refresh channel data and sync to videos""" """refresh channel data and sync to videos"""
channel = YoutubeChannel(channel_id) channel = YoutubeChannel(channel_id)
channel.get_from_es() channel.get_from_es()
subscribed = channel.json_data["channel_subscribed"] subscribed = channel.json_data["channel_subscribed"]
overwrites = channel.json_data["channel_overwrites"] overwrites = channel.json_data.get("channel_overwrites", False)
channel.get_from_youtube() channel.get_from_youtube()
channel.json_data["channel_subscribed"] = subscribed channel.json_data["channel_subscribed"] = subscribed
channel.json_data["channel_overwrites"] = overwrites if overwrites:
channel.json_data["channel_overwrites"] = overwrites
channel.upload_to_es() channel.upload_to_es()
channel.sync_to_videos() channel.sync_to_videos()
@staticmethod @staticmethod
def reindex_single_playlist(playlist_id, all_indexed_ids): def _reindex_single_playlist(playlist_id, all_indexed_ids):
"""refresh playlist data""" """refresh playlist data"""
playlist = YoutubePlaylist(playlist_id) playlist = YoutubePlaylist(playlist_id)
playlist.get_from_es() playlist.get_from_es()
@ -234,18 +204,19 @@ class Reindex:
def reindex(self): def reindex(self):
"""reindex what's needed""" """reindex what's needed"""
sleep_interval = self.config["downloads"]["sleep_interval"]
# videos # videos
print(f"reindexing {len(self.all_youtube_ids)} videos") print(f"reindexing {len(self.all_youtube_ids)} videos")
for youtube_id in self.all_youtube_ids: for youtube_id in self.all_youtube_ids:
self.reindex_single_video(youtube_id) self._reindex_single_video(youtube_id)
if self.sleep_interval: if sleep_interval:
sleep(self.sleep_interval) sleep(sleep_interval)
# channels # channels
print(f"reindexing {len(self.all_channel_ids)} channels") print(f"reindexing {len(self.all_channel_ids)} channels")
for channel_id in self.all_channel_ids: for channel_id in self.all_channel_ids:
self.reindex_single_channel(channel_id) self._reindex_single_channel(channel_id)
if self.sleep_interval: if sleep_interval:
sleep(self.sleep_interval) sleep(sleep_interval)
# playlist # playlist
print(f"reindexing {len(self.all_playlist_ids)} playlists") print(f"reindexing {len(self.all_playlist_ids)} playlists")
if self.all_playlist_ids: if self.all_playlist_ids:
@ -253,6 +224,6 @@ class Reindex:
handler.get_indexed() handler.get_indexed()
all_indexed_ids = [i["youtube_id"] for i in handler.all_videos] all_indexed_ids = [i["youtube_id"] for i in handler.all_videos]
for playlist_id in self.all_playlist_ids: for playlist_id in self.all_playlist_ids:
self.reindex_single_playlist(playlist_id, all_indexed_ids) self._reindex_single_playlist(playlist_id, all_indexed_ids)
if self.sleep_interval: if sleep_interval:
sleep(self.sleep_interval) sleep(sleep_interval)

View File

@ -296,7 +296,6 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
self._add_stats() self._add_stats()
self.add_file_path() self.add_file_path()
self.add_player() self.add_player()
self._check_subtitles()
if self.config["downloads"]["integrate_ryd"]: if self.config["downloads"]["integrate_ryd"]:
self._get_ryd_stats() self._get_ryd_stats()
@ -369,7 +368,7 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
channel_dir = os.path.join(self.app_conf["videos"], channel) channel_dir = os.path.join(self.app_conf["videos"], channel)
all_files = os.listdir(channel_dir) all_files = os.listdir(channel_dir)
for file in all_files: for file in all_files:
if self.youtube_id in file: if self.youtube_id in file and file.endswith(".mp4"):
vid_path = os.path.join(channel_dir, file) vid_path = os.path.join(channel_dir, file)
break break
else: else:
@ -441,7 +440,7 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
return True return True
def _check_subtitles(self): def check_subtitles(self):
"""optionally add subtitles""" """optionally add subtitles"""
handler = YoutubeSubtitle(self) handler = YoutubeSubtitle(self)
subtitles = handler.get_subtitles() subtitles = handler.get_subtitles()
@ -451,8 +450,9 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
def delete_subtitles(self): def delete_subtitles(self):
"""delete indexed subtitles""" """delete indexed subtitles"""
path = "ta_subtitle/_delete_by_query?refresh=true"
data = {"query": {"term": {"youtube_id": {"value": self.youtube_id}}}} data = {"query": {"term": {"youtube_id": {"value": self.youtube_id}}}}
_, _ = ElasticWrap("ta_subtitle/_delete_by_query").post(data=data) _, _ = ElasticWrap(path).post(data=data)
def index_new_video(youtube_id): def index_new_video(youtube_id):
@ -462,5 +462,6 @@ def index_new_video(youtube_id):
if not video.json_data: if not video.json_data:
raise ValueError("failed to get metadata for " + youtube_id) raise ValueError("failed to get metadata for " + youtube_id)
video.check_subtitles()
video.upload_to_es() video.upload_to_es()
return video.json_data return video.json_data

View File

@ -3,31 +3,15 @@ Loose collection of helper functions
- don't import AppConfig class here to avoid circular imports - don't import AppConfig class here to avoid circular imports
""" """
import json
import re import re
import string import string
import subprocess import subprocess
import unicodedata import unicodedata
from urllib.parse import parse_qs, urlparse from urllib.parse import parse_qs, urlparse
import requests
import yt_dlp import yt_dlp
def get_total_hits(index, es_url, es_auth, match_field):
"""get total hits from index"""
headers = {"Content-type": "application/json"}
data = {"query": {"match": {match_field: True}}}
payload = json.dumps(data)
url = f"{es_url}/{index}/_search?filter_path=hits.total"
request = requests.post(url, data=payload, headers=headers, auth=es_auth)
if not request.ok:
print(request.text)
total_json = json.loads(request.text)
total_hits = total_json["hits"]["total"]["value"]
return total_hits
def clean_string(file_name): def clean_string(file_name):
"""clean string to only asci characters""" """clean string to only asci characters"""
whitelist = "-_.() " + string.ascii_letters + string.digits whitelist = "-_.() " + string.ascii_letters + string.digits

View File

@ -11,12 +11,20 @@ import redis
from home.src.ta.helper import ignore_filelist from home.src.ta.helper import ignore_filelist
class RedisArchivist: class RedisBase:
"""collection of methods to interact with redis""" """connection base for redis"""
REDIS_HOST = os.environ.get("REDIS_HOST") REDIS_HOST = os.environ.get("REDIS_HOST")
REDIS_PORT = os.environ.get("REDIS_PORT") or 6379 REDIS_PORT = os.environ.get("REDIS_PORT") or 6379
NAME_SPACE = "ta:" NAME_SPACE = "ta:"
def __init__(self):
self.conn = redis.Redis(host=self.REDIS_HOST, port=self.REDIS_PORT)
class RedisArchivist(RedisBase):
"""collection of methods to interact with redis"""
CHANNELS = [ CHANNELS = [
"download", "download",
"add", "add",
@ -27,14 +35,9 @@ class RedisArchivist:
"setting", "setting",
] ]
def __init__(self):
self.redis_connection = redis.Redis(
host=self.REDIS_HOST, port=self.REDIS_PORT
)
def set_message(self, key, message, expire=True): def set_message(self, key, message, expire=True):
"""write new message to redis""" """write new message to redis"""
self.redis_connection.execute_command( self.conn.execute_command(
"JSON.SET", self.NAME_SPACE + key, ".", json.dumps(message) "JSON.SET", self.NAME_SPACE + key, ".", json.dumps(message)
) )
@ -43,15 +46,11 @@ class RedisArchivist:
secs = 20 secs = 20
else: else:
secs = expire secs = expire
self.redis_connection.execute_command( self.conn.execute_command("EXPIRE", self.NAME_SPACE + key, secs)
"EXPIRE", self.NAME_SPACE + key, secs
)
def get_message(self, key): def get_message(self, key):
"""get message dict from redis""" """get message dict from redis"""
reply = self.redis_connection.execute_command( reply = self.conn.execute_command("JSON.GET", self.NAME_SPACE + key)
"JSON.GET", self.NAME_SPACE + key
)
if reply: if reply:
json_str = json.loads(reply) json_str = json.loads(reply)
else: else:
@ -61,7 +60,7 @@ class RedisArchivist:
def list_items(self, query): def list_items(self, query):
"""list all matches""" """list all matches"""
reply = self.redis_connection.execute_command( reply = self.conn.execute_command(
"KEYS", self.NAME_SPACE + query + "*" "KEYS", self.NAME_SPACE + query + "*"
) )
all_matches = [i.decode().lstrip(self.NAME_SPACE) for i in reply] all_matches = [i.decode().lstrip(self.NAME_SPACE) for i in reply]
@ -74,14 +73,12 @@ class RedisArchivist:
def del_message(self, key): def del_message(self, key):
"""delete key from redis""" """delete key from redis"""
response = self.redis_connection.execute_command( response = self.conn.execute_command("DEL", self.NAME_SPACE + key)
"DEL", self.NAME_SPACE + key
)
return response return response
def get_lock(self, lock_key): def get_lock(self, lock_key):
"""handle lock for task management""" """handle lock for task management"""
redis_lock = self.redis_connection.lock(self.NAME_SPACE + lock_key) redis_lock = self.conn.lock(self.NAME_SPACE + lock_key)
return redis_lock return redis_lock
def get_progress(self): def get_progress(self):
@ -89,7 +86,7 @@ class RedisArchivist:
all_messages = [] all_messages = []
for channel in self.CHANNELS: for channel in self.CHANNELS:
key = "message:" + channel key = "message:" + channel
reply = self.redis_connection.execute_command( reply = self.conn.execute_command(
"JSON.GET", self.NAME_SPACE + key "JSON.GET", self.NAME_SPACE + key
) )
if reply: if reply:
@ -120,19 +117,12 @@ class RedisArchivist:
return mess_dict return mess_dict
class RedisQueue: class RedisQueue(RedisBase):
"""dynamically interact with the download queue in redis""" """dynamically interact with the download queue in redis"""
REDIS_HOST = os.environ.get("REDIS_HOST") def __init__(self):
REDIS_PORT = os.environ.get("REDIS_PORT") super().__init__()
NAME_SPACE = "ta:" self.key = self.NAME_SPACE + "dl_queue"
if not REDIS_PORT:
REDIS_PORT = 6379
def __init__(self, key):
self.key = self.NAME_SPACE + key
self.conn = redis.Redis(host=self.REDIS_HOST, port=self.REDIS_PORT)
def get_all(self): def get_all(self):
"""return all elements in list""" """return all elements in list"""

View File

@ -8,8 +8,8 @@ Functionality:
import os import os
import home.apps as startup_apps
from celery import Celery, shared_task from celery import Celery, shared_task
from home.apps import StartupCheck
from home.src.download.queue import PendingList from home.src.download.queue import PendingList
from home.src.download.subscriptions import ( from home.src.download.subscriptions import (
ChannelSubscription, ChannelSubscription,
@ -98,7 +98,7 @@ def download_pending():
@shared_task @shared_task
def download_single(youtube_id): def download_single(youtube_id):
"""start download single video now""" """start download single video now"""
queue = RedisQueue("dl_queue") queue = RedisQueue()
queue.add_priority(youtube_id) queue.add_priority(youtube_id)
print("Added to queue with priority: " + youtube_id) print("Added to queue with priority: " + youtube_id)
# start queue if needed # start queue if needed
@ -181,7 +181,7 @@ def kill_dl(task_id):
app.control.revoke(task_id, terminate=True) app.control.revoke(task_id, terminate=True)
_ = RedisArchivist().del_message("dl_queue_id") _ = RedisArchivist().del_message("dl_queue_id")
RedisQueue("dl_queue").clear() RedisQueue().clear()
# clear cache # clear cache
cache_dir = os.path.join(CONFIG["application"]["cache_dir"], "download") cache_dir = os.path.join(CONFIG["application"]["cache_dir"], "download")
@ -274,5 +274,5 @@ try:
app.conf.beat_schedule = ScheduleBuilder().build_schedule() app.conf.beat_schedule = ScheduleBuilder().build_schedule()
except KeyError: except KeyError:
# update path from v0.0.8 to v0.0.9 to load new defaults # update path from v0.0.8 to v0.0.9 to load new defaults
startup_apps.sync_redis_state() StartupCheck().sync_redis_state()
app.conf.beat_schedule = ScheduleBuilder().build_schedule() app.conf.beat_schedule = ScheduleBuilder().build_schedule()

View File

@ -53,36 +53,44 @@
<p>Total Videos archived: {{ max_hits }}</p> <p>Total Videos archived: {{ max_hits }}</p>
<p>Watched: <button title="Mark all videos from {{ channel_info.channel_name }} as watched" type="button" id="watched-button" data-id="{{ channel_info.channel_id }}" onclick="isWatchedButton(this)">Mark as watched</button></p> <p>Watched: <button title="Mark all videos from {{ channel_info.channel_name }} as watched" type="button" id="watched-button" data-id="{{ channel_info.channel_id }}" onclick="isWatchedButton(this)">Mark as watched</button></p>
{% endif %} {% endif %}
<button {% if channel_info.channel_overwrites %} class="danger-button"{% endif %} onclick="showOverwrite()" title="Overwrite settings for channel {{ channel_info.channel_name }}">Configure</button>
<a href="/playlist/?search={{ channel_info.channel_id }}" title="Show all playlists belonging to {{ channel_info.channel_name }}"><button>Show Playlists</button></a> <a href="/playlist/?search={{ channel_info.channel_id }}" title="Show all playlists belonging to {{ channel_info.channel_name }}"><button>Show Playlists</button></a>
</div> </div>
</div> </div>
</div> </div>
<div class="info-box"> <div id="overwrite-form" class="info-box{% if not channel_info.channel_overwrites %} hidden-overwrite{% endif %}">
<div class="info-box-item"> <div class="info-box-item">
<form action="/channel/{{ channel_info.channel_id }}/" method="POST"> <form class="overwrite-form" action="/channel/{{ channel_info.channel_id }}/" method="POST">
{% csrf_token %} {% csrf_token %}
<p>Download format: <span class="settings-current"> <div class="overwrite-form-item">
{% if channel_info.channel_overwrites.download_format %} <p>Download format: <span class="settings-current">
{{ channel_info.channel_overwrites.download_format }} {% if channel_info.channel_overwrites.download_format %}
{% else %} {{ channel_info.channel_overwrites.download_format }}
False {% else %}
{% endif %}</span></p> False
{{ channel_overwrite_form.download_format }}<br> {% endif %}</span></p>
<p>Auto delete videos after x days: <span class="settings-current"> {{ channel_overwrite_form.download_format }}<br>
{% if channel_info.channel_overwrites.autodelete_days %} </div>
{{ channel_info.channel_overwrites.autodelete_days }} <div class="overwrite-form-item">
{% else %} <p>Auto delete watched videos after x days: <span class="settings-current">
False {% if channel_info.channel_overwrites.autodelete_days %}
{% endif %}</span></p> {{ channel_info.channel_overwrites.autodelete_days }}
{{ channel_overwrite_form.autodelete_days }}<br> {% else %}
<p>Index playlists: <span class="settings-current"> False
{% if channel_info.channel_overwrites.index_playlists %} {% endif %}</span></p>
{{ channel_info.channel_overwrites.index_playlists }} {{ channel_overwrite_form.autodelete_days }}<br>
{% else %} </div>
False <div class="overwrite-form-item">
{% endif %}</span></p> <p>Index playlists: <span class="settings-current">
{{ channel_overwrite_form.index_playlists }}<br> {% if channel_info.channel_overwrites.index_playlists %}
<button type="submit">Save</button> {{ channel_info.channel_overwrites.index_playlists }}
{% else %}
False
{% endif %}</span></p>
{{ channel_overwrite_form.index_playlists }}<br>
</div>
<div class="overwrite-form-item"></div>
<button type="submit">Save Channel Overwrites</button>
</form> </form>
</div> </div>
</div> </div>

View File

@ -201,6 +201,9 @@ class ArchivistResultsView(ArchivistViewConfig):
"ta_video/_search", self.default_conf, data=data "ta_video/_search", self.default_conf, data=data
) )
videos = search.get_data() videos = search.get_data()
if not videos:
return False
for video in videos: for video in videos:
youtube_id = video["source"]["youtube_id"] youtube_id = video["source"]["youtube_id"]
matched = [i for i in results if i["youtube_id"] == youtube_id] matched = [i for i in results if i["youtube_id"] == youtube_id]

View File

@ -4,7 +4,7 @@ Django==4.0.3
django-cors-headers==3.11.0 django-cors-headers==3.11.0
djangorestframework==3.13.1 djangorestframework==3.13.1
Pillow==9.0.1 Pillow==9.0.1
redis==4.1.4 redis==4.2.0
requests==2.27.1 requests==2.27.1
ryd-client==0.0.3 ryd-client==0.0.3
uWSGI==2.0.20 uWSGI==2.0.20

View File

@ -566,6 +566,28 @@ button:hover {
margin-bottom: 10px; margin-bottom: 10px;
} }
.overwrite-form {
display: grid;
grid-template-columns: 1fr 1fr;
width: 100%;
}
.overwrite-form button {
width: 200px;
}
.overwrite-form-item {
margin-bottom: 1rem;
}
.overwrite-form-item input {
width: 90%;
}
.hidden-overwrite {
display: none;
}
/* login */ /* login */
.login-page { .login-page {
display: flex; display: flex;
@ -1062,7 +1084,10 @@ button:hover {
.channel-list.grid, .channel-list.grid,
.video-item.list, .video-item.list,
.playlist-list.list, .playlist-list.list,
.playlist-list.grid { .playlist-list.grid,
.info-box-2,
.info-box-3,
.overwrite-form {
grid-template-columns: 1fr; grid-template-columns: 1fr;
} }
.playlist-item.list { .playlist-item.list {
@ -1101,10 +1126,6 @@ button:hover {
.sort select { .sort select {
margin: unset; margin: unset;
} }
.info-box-2,
.info-box-3 {
grid-template-columns: 1fr;
}
.description-box { .description-box {
display: block; display: block;
} }

View File

@ -192,7 +192,6 @@ function addSingle(button) {
function deleteQueue(button) { function deleteQueue(button) {
var to_delete = button.getAttribute('data-id'); var to_delete = button.getAttribute('data-id');
var payload = JSON.stringify({'deleteQueue': to_delete}); var payload = JSON.stringify({'deleteQueue': to_delete});
console.log(payload);
sendPost(payload); sendPost(payload);
setTimeout(function(){ setTimeout(function(){
location.reload(); location.reload();
@ -315,7 +314,6 @@ function deletePlaylist(button) {
"playlist-action": playlist_action "playlist-action": playlist_action
} }
}); });
console.log(payload);
sendPost(payload); sendPost(payload);
setTimeout(function(){ setTimeout(function(){
window.location.replace("/playlist/"); window.location.replace("/playlist/");
@ -418,7 +416,6 @@ function createVideoTag(videoData, videoProgress) {
var videoSubtitles = videoData.data.subtitles; // Array of subtitles var videoSubtitles = videoData.data.subtitles; // Array of subtitles
if (typeof(videoSubtitles) != 'undefined' && videoData.config.downloads.subtitle) { if (typeof(videoSubtitles) != 'undefined' && videoData.config.downloads.subtitle) {
for (var i = 0; i < videoSubtitles.length; i++) { for (var i = 0; i < videoSubtitles.length; i++) {
console.log(videoSubtitles[i]);
let label = videoSubtitles[i].name; let label = videoSubtitles[i].name;
if (videoSubtitles[i].source == "auto") { if (videoSubtitles[i].source == "auto") {
label += " - auto"; label += " - auto";
@ -886,6 +883,15 @@ function showForm() {
animate('animate-icon', 'pulse-img'); animate('animate-icon', 'pulse-img');
} }
function showOverwrite() {
var overwriteDiv = document.getElementById("overwrite-form");
if (overwriteDiv.classList.contains("hidden-overwrite")) {
overwriteDiv.classList.remove("hidden-overwrite");
} else {
overwriteDiv.classList.add("hidden-overwrite")
}
}
function animate(elementId, animationClass) { function animate(elementId, animationClass) {
var toAnimate = document.getElementById(elementId); var toAnimate = document.getElementById(elementId);
if (toAnimate.className !== animationClass) { if (toAnimate.className !== animationClass) {

View File

@ -1,9 +0,0 @@
#!/bin/bash
# install debug and testing tools into slim container
apt update && apt install -y vim htop bmon net-tools iputils-ping procps
pip install ipython
##
exit 0