sync to master

This commit is contained in:
greg 2024-02-02 09:17:32 -05:00
parent 566c5abb62
commit 76e61aeb1a
19 changed files with 129 additions and 90 deletions

View File

@ -190,7 +190,7 @@ Implemented:
## User Scripts
This is a list of useful user scripts, generously created from folks like you to extend this project and its functionality. This is your time to shine, [read this](https://github.com/tubearchivist/tubearchivist/blob/master/CONTRIBUTING.md#user-scripts) then open a PR to add your script here.
- Example 1
[danieljue/ta_dl_page_script](https://github.com/danieljue/ta_dl_page_script): Helper browser script to prioritize a channels' videos in download queue.
## Donate
The best donation to **Tube Archivist** is your time, take a look at the [contribution page](CONTRIBUTING.md) to get started.

View File

@ -18,7 +18,7 @@ python manage.py ta_migpath
# start all tasks
nginx &
celery -A home.tasks worker --loglevel=INFO &
celery -A home.tasks worker --loglevel=INFO --max-tasks-per-child 10 &
celery -A home beat --loglevel=INFO \
-s "${BEAT_SCHEDULE_PATH:-${cachedir}/celerybeat-schedule}" &
uwsgi --ini uwsgi.ini

View File

@ -37,9 +37,10 @@ class Command(BaseCommand):
self.stdout.write(TOPIC)
self._sync_redis_state()
self._make_folders()
self._release_locks()
self._clear_redis_keys()
self._clear_tasks()
self._clear_dl_cache()
self._mig_clear_failed_versioncheck()
self._version_check()
self._mig_index_setup()
self._mig_snapshot_check()
@ -75,10 +76,10 @@ class Command(BaseCommand):
self.stdout.write(self.style.SUCCESS(" ✓ expected folders created"))
def _release_locks(self):
"""make sure there are no leftover locks set in redis"""
self.stdout.write("[3] clear leftover locks in redis")
all_locks = [
def _clear_redis_keys(self):
"""make sure there are no leftover locks or keys set in redis"""
self.stdout.write("[3] clear leftover keys in redis")
all_keys = [
"dl_queue_id",
"dl_queue",
"downloading",
@ -87,19 +88,22 @@ class Command(BaseCommand):
"rescan",
"run_backup",
"startup_check",
"reindex:ta_video",
"reindex:ta_channel",
"reindex:ta_playlist",
]
redis_con = RedisArchivist()
has_changed = False
for lock in all_locks:
if redis_con.del_message(lock):
for key in all_keys:
if redis_con.del_message(key):
self.stdout.write(
self.style.SUCCESS(f" ✓ cleared lock {lock}")
self.style.SUCCESS(f" ✓ cleared key {key}")
)
has_changed = True
if not has_changed:
self.stdout.write(self.style.SUCCESS(" no locks found"))
self.stdout.write(self.style.SUCCESS(" no keys found"))
def _clear_tasks(self):
"""clear tasks and messages"""
@ -147,6 +151,10 @@ class Command(BaseCommand):
self.stdout.write("[MIGRATION] setup snapshots")
ElasticSnapshot().setup()
def _mig_clear_failed_versioncheck(self):
"""hotfix for v0.4.5, clearing faulty versioncheck"""
ReleaseVersion().clear_fail()
def _mig_move_users_to_es(self): # noqa: C901
"""migration: update from 0.4.1 to 0.4.2 move user config to ES"""
self.stdout.write("[MIGRATION] move user configuration to ES")

View File

@ -269,4 +269,4 @@ CORS_ALLOW_HEADERS = list(default_headers) + [
# TA application settings
TA_UPSTREAM = "https://github.com/tubearchivist/tubearchivist"
TA_VERSION = "v0.4.5-unstable"
TA_VERSION = "v0.4.6"

View File

@ -13,6 +13,7 @@ Including another URLconf
1. Import the include() function: from django.urls import include, path
2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
"""
from django.contrib import admin
from django.urls import include, path

View File

@ -1,4 +1,5 @@
"""custom models"""
from django.contrib.auth.models import (
AbstractBaseUser,
BaseUserManager,

View File

@ -50,7 +50,7 @@ class DownloadPostProcess:
return
print(f"auto delete older than {autodelete_days} days")
now_lte = self.now - autodelete_days * 24 * 60 * 60
now_lte = str(self.now - autodelete_days * 24 * 60 * 60)
data = {
"query": {"range": {"player.watched_date": {"lte": now_lte}}},
"sort": [{"player.watched_date": {"order": "asc"}}],
@ -63,7 +63,7 @@ class DownloadPostProcess:
if "autodelete_days" in value:
autodelete_days = value.get("autodelete_days")
print(f"{channel_id}: delete older than {autodelete_days}d")
now_lte = self.now - autodelete_days * 24 * 60 * 60
now_lte = str(self.now - autodelete_days * 24 * 60 * 60)
must_list = [
{"range": {"player.watched_date": {"lte": now_lte}}},
{"term": {"channel.channel_id": {"value": channel_id}}},

View File

@ -3,6 +3,7 @@ functionality:
- wrapper around requests to call elastic search
- reusable search_after to extract total index
"""
# pylint: disable=missing-timeout
import json

View File

@ -6,7 +6,6 @@ Functionality:
- calculate pagination values
"""
from api.src.search_processor import SearchProcess
from home.src.es.connect import ElasticWrap

View File

@ -31,8 +31,8 @@ class YoutubeChannel(YouTubeItem):
self.task = task
def build_yt_url(self):
"""build youtube url"""
return f"{self.yt_base}{self.youtube_id}/featured"
"""overwrite base to use channel about page"""
return f"{self.yt_base}{self.youtube_id}/about"
def build_json(self, upload=False, fallback=False):
"""get from es or from youtube"""
@ -199,6 +199,15 @@ class YoutubeChannel(YouTubeItem):
}
_, _ = ElasticWrap("ta_comment/_delete_by_query").post(data)
def delete_es_subtitles(self):
"""delete all subtitles from this channel"""
data = {
"query": {
"term": {"subtitle_channel_id": {"value": self.youtube_id}}
}
}
_, _ = ElasticWrap("ta_subtitle/_delete_by_query").post(data)
def delete_playlists(self):
"""delete all indexed playlist from es"""
all_playlists = self.get_indexed_playlists()
@ -235,6 +244,7 @@ class YoutubeChannel(YouTubeItem):
print(f"{self.youtube_id}: delete indexed videos")
self.delete_es_videos()
self.delete_es_comments()
self.delete_es_subtitles()
self.del_in_es()
def index_channel_playlists(self):

View File

@ -126,7 +126,7 @@ class Comments:
"comment_author_id": comment["author_id"],
"comment_author_thumbnail": comment["author_thumbnail"],
"comment_author_is_uploader": comment.get(
"comment_author_is_uploader", False
"author_is_uploader", False
),
"comment_parent": comment["parent"],
}

View File

@ -243,7 +243,7 @@ class Reindex(ReindexBase):
return
for name, index_config in self.REINDEX_CONFIG.items():
if not RedisQueue(index_config["queue_name"]).has_item():
if not RedisQueue(index_config["queue_name"]).length():
continue
self.total = RedisQueue(index_config["queue_name"]).length()
@ -292,6 +292,9 @@ class Reindex(ReindexBase):
# read current state
video.get_from_es()
if not video.json_data:
return
es_meta = video.json_data.copy()
# get new
@ -329,6 +332,9 @@ class Reindex(ReindexBase):
# read current state
channel = YoutubeChannel(channel_id)
channel.get_from_es()
if not channel.json_data:
return
es_meta = channel.json_data.copy()
# get new
@ -357,6 +363,9 @@ class Reindex(ReindexBase):
self._get_all_videos()
playlist = YoutubePlaylist(playlist_id)
playlist.get_from_es()
if not playlist.json_data:
return
subscribed = playlist.json_data["playlist_subscribed"]
playlist.all_youtube_ids = self.all_indexed_ids
playlist.build_json(scrape=True)

View File

@ -177,6 +177,7 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
def _process_youtube_meta(self):
"""extract relevant fields from youtube"""
self._validate_id()
# extract
self.channel_id = self.youtube_meta["channel_id"]
upload_date = self.youtube_meta["upload_date"]
@ -202,6 +203,19 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
"active": True,
}
def _validate_id(self):
"""validate expected video ID, raise value error on mismatch"""
remote_id = self.youtube_meta["id"]
if not self.youtube_id == remote_id:
# unexpected redirect
message = (
f"[reindex][{self.youtube_id}] got an unexpected redirect "
+ f"to {remote_id}, you are probably getting blocked by YT. "
"See FAQ for more details."
)
raise ValueError(message)
def _add_channel(self):
"""add channel dict to video json_data"""
channel = ta_channel.YoutubeChannel(self.channel_id)

View File

@ -269,14 +269,13 @@ class ReleaseVersion:
REMOTE_URL = "https://www.tubearchivist.com/api/release/latest/"
NEW_KEY = "versioncheck:new"
def __init__(self):
self.local_version = self._parse_version(settings.TA_VERSION)
self.is_unstable = settings.TA_VERSION.endswith("-unstable")
self.remote_version = False
self.is_breaking = False
self.response = False
def __init__(self) -> None:
self.local_version: str = settings.TA_VERSION
self.is_unstable: bool = settings.TA_VERSION.endswith("-unstable")
self.remote_version: str = ""
self.is_breaking: bool = False
def check(self):
def check(self) -> None:
"""check version"""
print(f"[{self.local_version}]: look for updates")
self.get_remote_version()
@ -290,50 +289,63 @@ class ReleaseVersion:
RedisArchivist().set_message(self.NEW_KEY, message)
print(f"[{self.local_version}]: found new version {new_version}")
def get_local_version(self):
def get_local_version(self) -> str:
"""read version from local"""
return self.local_version
def get_remote_version(self):
def get_remote_version(self) -> None:
"""read version from remote"""
sleep(randint(0, 60))
self.response = requests.get(self.REMOTE_URL, timeout=20).json()
remote_version_str = self.response["release_version"]
self.remote_version = self._parse_version(remote_version_str)
self.is_breaking = self.response["breaking_changes"]
response = requests.get(self.REMOTE_URL, timeout=20).json()
self.remote_version = response["release_version"]
self.is_breaking = response["breaking_changes"]
def _has_update(self):
def _has_update(self) -> str | bool:
"""check if there is an update"""
if self.remote_version > self.local_version:
remote_parsed = self._parse_version(self.remote_version)
local_parsed = self._parse_version(self.local_version)
if remote_parsed > local_parsed:
return self.remote_version
if self.is_unstable and self.local_version == self.remote_version:
if self.is_unstable and local_parsed == remote_parsed:
return self.remote_version
return False
@staticmethod
def _parse_version(version):
def _parse_version(version) -> tuple[int, ...]:
"""return version parts"""
clean = version.rstrip("-unstable").lstrip("v")
return tuple((int(i) for i in clean.split(".")))
def is_updated(self):
def is_updated(self) -> str | bool:
"""check if update happened in the mean time"""
message = self.get_update()
if not message:
return False
if self.local_version >= self._parse_version(message.get("version")):
local_parsed = self._parse_version(self.local_version)
message_parsed = self._parse_version(message.get("version"))
if local_parsed >= message_parsed:
RedisArchivist().del_message(self.NEW_KEY)
return settings.TA_VERSION
return False
def get_update(self):
def get_update(self) -> dict:
"""return new version dict if available"""
message = RedisArchivist().get_message(self.NEW_KEY)
if not message.get("status"):
return False
return {}
return message
def clear_fail(self) -> None:
"""clear key, catch previous error in v0.4.5"""
message = self.get_update()
if not message:
return
if isinstance(message.get("version"), list):
RedisArchivist().del_message(self.NEW_KEY)

View File

@ -20,6 +20,7 @@ class RedisBase:
self.conn = redis.Redis(
host=EnvironmentSettings.REDIS_HOST,
port=EnvironmentSettings.REDIS_PORT,
decode_responses=True,
)
@ -82,7 +83,7 @@ class RedisArchivist(RedisBase):
if not reply:
return []
return [i.decode().lstrip(self.NAME_SPACE) for i in reply]
return [i.lstrip(self.NAME_SPACE) for i in reply]
def list_items(self, query: str) -> list:
"""list all matches"""
@ -99,65 +100,49 @@ class RedisArchivist(RedisBase):
class RedisQueue(RedisBase):
"""dynamically interact with queues in redis"""
"""
dynamically interact with queues in redis using sorted set
- low score number is first in queue
- add new items with high score number
"""
def __init__(self, queue_name: str):
super().__init__()
self.key = f"{self.NAME_SPACE}{queue_name}"
def get_all(self):
def get_all(self) -> list[str]:
"""return all elements in list"""
result = self.conn.execute_command("LRANGE", self.key, 0, -1)
all_elements = [i.decode() for i in result]
return all_elements
result = self.conn.zrange(self.key, 0, -1)
return result
def length(self) -> int:
"""return total elements in list"""
return self.conn.execute_command("LLEN", self.key)
return self.conn.zcard(self.key)
def in_queue(self, element) -> str | bool:
"""check if element is in list"""
result = self.conn.execute_command("LPOS", self.key, element)
result = self.conn.zrank(self.key, element)
if result is not None:
return "in_queue"
return False
def add_list(self, to_add):
def add_list(self, to_add: list) -> None:
"""add list to queue"""
self.conn.execute_command("RPUSH", self.key, *to_add)
def add_priority(self, to_add: str) -> None:
"""add single video to front of queue"""
item: str = json.dumps(to_add)
self.clear_item(item)
self.conn.execute_command("LPUSH", self.key, item)
mapping = {i: "+inf" for i in to_add}
self.conn.zadd(self.key, mapping)
def get_next(self) -> str | bool:
"""return next element in the queue, False if none"""
result = self.conn.execute_command("LPOP", self.key)
"""return next element in the queue, if available"""
result = self.conn.zpopmin(self.key)
if not result:
return False
next_element = result.decode()
return next_element
return result[0][0]
def clear(self) -> None:
"""delete list from redis"""
self.conn.execute_command("DEL", self.key)
def clear_item(self, to_clear: str) -> None:
"""remove single item from list if it's there"""
self.conn.execute_command("LREM", self.key, 0, to_clear)
def trim(self, size: int) -> None:
"""trim the queue based on settings amount"""
self.conn.execute_command("LTRIM", self.key, 0, size)
def has_item(self) -> bool:
"""check if queue as at least one pending item"""
result = self.conn.execute_command("LRANGE", self.key, 0, 0)
return bool(result)
self.conn.delete(self.key)
class TaskRedis(RedisBase):
@ -170,7 +155,7 @@ class TaskRedis(RedisBase):
def get_all(self) -> list:
"""return all tasks"""
all_keys = self.conn.execute_command("KEYS", f"{self.BASE}*")
return [i.decode().replace(self.BASE, "") for i in all_keys]
return [i.replace(self.BASE, "") for i in all_keys]
def get_single(self, task_id: str) -> dict:
"""return content of single task"""
@ -178,7 +163,7 @@ class TaskRedis(RedisBase):
if not result:
return {}
return json.loads(result.decode())
return json.loads(result)
def set_key(
self, task_id: str, message: dict, expire: bool | int = False

View File

@ -33,11 +33,9 @@
<body>
<div class="main-content">
<div class="boxed-content">
<div class="top-banner">
<a href="{% url 'home' %}">
<img alt="tube-archivist-banner">
</a>
</div>
<a href="{% url 'home' %}">
<div class="top-banner"></div>
</a>
<div class="top-nav">
<div class="nav-items">
<a href="{% url 'home' %}">

View File

@ -3,6 +3,7 @@ Functionality:
- all views for home app
- holds base classes to inherit from
"""
import enum
import urllib.parse
from time import sleep

View File

@ -4,8 +4,8 @@ Django==4.2.7
django-auth-ldap==4.6.0
django-cors-headers==4.3.1
djangorestframework==3.14.0
Pillow==10.1.0
redis==5.0.1
Pillow==10.2.0
redis==5.0.0
requests==2.31.0
ryd-client==0.0.6
uWSGI==2.0.23

View File

@ -159,13 +159,13 @@ button:hover {
}
.top-banner {
text-align: center;
}
.top-banner img {
width: 100%;
max-width: 700px;
content: var(--banner);
background-image: var(--banner);
background-repeat: no-repeat;
background-size: contain;
height: 10vh;
min-height: 80px;
max-height: 120px;
background-position: center center;
}
.footer {