mirror of
https://github.com/tubearchivist/tubearchivist-frontend.git
synced 2024-11-22 11:50:14 +00:00
linting everything in black
This commit is contained in:
parent
69e6e490f4
commit
2433e0e7d8
@ -11,6 +11,6 @@ import os
|
|||||||
|
|
||||||
from django.core.asgi import get_asgi_application
|
from django.core.asgi import get_asgi_application
|
||||||
|
|
||||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings')
|
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings")
|
||||||
|
|
||||||
application = get_asgi_application()
|
application = get_asgi_application()
|
||||||
|
@ -21,67 +21,67 @@ BASE_DIR = Path(__file__).resolve().parent.parent
|
|||||||
# See https://docs.djangoproject.com/en/3.2/howto/deployment/checklist/
|
# See https://docs.djangoproject.com/en/3.2/howto/deployment/checklist/
|
||||||
|
|
||||||
# SECURITY WARNING: keep the secret key used in production secret!
|
# SECURITY WARNING: keep the secret key used in production secret!
|
||||||
SECRET_KEY = 'Fvid^aUL6LohRZz*kZFvq85B&JW&kB9o*#jdzWsdWE8*XkCLR8'
|
SECRET_KEY = "Fvid^aUL6LohRZz*kZFvq85B&JW&kB9o*#jdzWsdWE8*XkCLR8"
|
||||||
|
|
||||||
# SECURITY WARNING: don't run with debug turned on in production!
|
# SECURITY WARNING: don't run with debug turned on in production!
|
||||||
DEBUG = bool(environ.get('DJANGO_DEBUG'))
|
DEBUG = bool(environ.get("DJANGO_DEBUG"))
|
||||||
|
|
||||||
ALLOWED_HOSTS = ['*']
|
ALLOWED_HOSTS = ["*"]
|
||||||
|
|
||||||
|
|
||||||
# Application definition
|
# Application definition
|
||||||
|
|
||||||
INSTALLED_APPS = [
|
INSTALLED_APPS = [
|
||||||
'home.apps.HomeConfig',
|
"home.apps.HomeConfig",
|
||||||
'django.contrib.admin',
|
"django.contrib.admin",
|
||||||
'django.contrib.auth',
|
"django.contrib.auth",
|
||||||
'django.contrib.contenttypes',
|
"django.contrib.contenttypes",
|
||||||
'django.contrib.sessions',
|
"django.contrib.sessions",
|
||||||
'django.contrib.messages',
|
"django.contrib.messages",
|
||||||
'whitenoise.runserver_nostatic',
|
"whitenoise.runserver_nostatic",
|
||||||
'django.contrib.staticfiles',
|
"django.contrib.staticfiles",
|
||||||
'django.contrib.humanize'
|
"django.contrib.humanize",
|
||||||
]
|
]
|
||||||
|
|
||||||
MIDDLEWARE = [
|
MIDDLEWARE = [
|
||||||
'django.middleware.security.SecurityMiddleware',
|
"django.middleware.security.SecurityMiddleware",
|
||||||
'django.contrib.sessions.middleware.SessionMiddleware',
|
"django.contrib.sessions.middleware.SessionMiddleware",
|
||||||
'whitenoise.middleware.WhiteNoiseMiddleware',
|
"whitenoise.middleware.WhiteNoiseMiddleware",
|
||||||
'django.middleware.common.CommonMiddleware',
|
"django.middleware.common.CommonMiddleware",
|
||||||
'django.middleware.csrf.CsrfViewMiddleware',
|
"django.middleware.csrf.CsrfViewMiddleware",
|
||||||
'django.contrib.auth.middleware.AuthenticationMiddleware',
|
"django.contrib.auth.middleware.AuthenticationMiddleware",
|
||||||
'django.contrib.messages.middleware.MessageMiddleware',
|
"django.contrib.messages.middleware.MessageMiddleware",
|
||||||
'django.middleware.clickjacking.XFrameOptionsMiddleware',
|
"django.middleware.clickjacking.XFrameOptionsMiddleware",
|
||||||
]
|
]
|
||||||
|
|
||||||
ROOT_URLCONF = 'config.urls'
|
ROOT_URLCONF = "config.urls"
|
||||||
|
|
||||||
TEMPLATES = [
|
TEMPLATES = [
|
||||||
{
|
{
|
||||||
'BACKEND': 'django.template.backends.django.DjangoTemplates',
|
"BACKEND": "django.template.backends.django.DjangoTemplates",
|
||||||
'DIRS': [],
|
"DIRS": [],
|
||||||
'APP_DIRS': True,
|
"APP_DIRS": True,
|
||||||
'OPTIONS': {
|
"OPTIONS": {
|
||||||
'context_processors': [
|
"context_processors": [
|
||||||
'django.template.context_processors.debug',
|
"django.template.context_processors.debug",
|
||||||
'django.template.context_processors.request',
|
"django.template.context_processors.request",
|
||||||
'django.contrib.auth.context_processors.auth',
|
"django.contrib.auth.context_processors.auth",
|
||||||
'django.contrib.messages.context_processors.messages',
|
"django.contrib.messages.context_processors.messages",
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
WSGI_APPLICATION = 'config.wsgi.application'
|
WSGI_APPLICATION = "config.wsgi.application"
|
||||||
|
|
||||||
|
|
||||||
# Database
|
# Database
|
||||||
# https://docs.djangoproject.com/en/3.2/ref/settings/#databases
|
# https://docs.djangoproject.com/en/3.2/ref/settings/#databases
|
||||||
|
|
||||||
DATABASES = {
|
DATABASES = {
|
||||||
'default': {
|
"default": {
|
||||||
'ENGINE': 'django.db.backends.sqlite3',
|
"ENGINE": "django.db.backends.sqlite3",
|
||||||
'NAME': BASE_DIR / 'db.sqlite3',
|
"NAME": BASE_DIR / "db.sqlite3",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -91,16 +91,16 @@ DATABASES = {
|
|||||||
|
|
||||||
AUTH_PASSWORD_VALIDATORS = [
|
AUTH_PASSWORD_VALIDATORS = [
|
||||||
{
|
{
|
||||||
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', # noqa: E501
|
"NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", # noqa: E501
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', # noqa: E501
|
"NAME": "django.contrib.auth.password_validation.MinimumLengthValidator", # noqa: E501
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', # noqa: E501
|
"NAME": "django.contrib.auth.password_validation.CommonPasswordValidator", # noqa: E501
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', # noqa: E501
|
"NAME": "django.contrib.auth.password_validation.NumericPasswordValidator", # noqa: E501
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -108,9 +108,9 @@ AUTH_PASSWORD_VALIDATORS = [
|
|||||||
# Internationalization
|
# Internationalization
|
||||||
# https://docs.djangoproject.com/en/3.2/topics/i18n/
|
# https://docs.djangoproject.com/en/3.2/topics/i18n/
|
||||||
|
|
||||||
LANGUAGE_CODE = 'en-us'
|
LANGUAGE_CODE = "en-us"
|
||||||
|
|
||||||
TIME_ZONE = 'UTC'
|
TIME_ZONE = "UTC"
|
||||||
|
|
||||||
USE_I18N = True
|
USE_I18N = True
|
||||||
|
|
||||||
@ -122,7 +122,7 @@ USE_TZ = True
|
|||||||
# Static files (CSS, JavaScript, Images)
|
# Static files (CSS, JavaScript, Images)
|
||||||
# https://docs.djangoproject.com/en/3.2/howto/static-files/
|
# https://docs.djangoproject.com/en/3.2/howto/static-files/
|
||||||
|
|
||||||
STATIC_URL = '/static/'
|
STATIC_URL = "/static/"
|
||||||
|
|
||||||
# STATICFILES_DIRS = [
|
# STATICFILES_DIRS = [
|
||||||
# str(BASE_DIR.joinpath('static')),
|
# str(BASE_DIR.joinpath('static')),
|
||||||
@ -130,15 +130,15 @@ STATIC_URL = '/static/'
|
|||||||
# ]
|
# ]
|
||||||
|
|
||||||
# STATIC_URL = '/static/'
|
# STATIC_URL = '/static/'
|
||||||
STATICFILES_DIRS = (str(BASE_DIR.joinpath('static')),)
|
STATICFILES_DIRS = (str(BASE_DIR.joinpath("static")),)
|
||||||
# MEDIA_ROOT = str(BASE_DIR.joinpath('media'))
|
# MEDIA_ROOT = str(BASE_DIR.joinpath('media'))
|
||||||
# MEDIA_URL = '/media/'
|
# MEDIA_URL = '/media/'
|
||||||
|
|
||||||
STATIC_ROOT = str(BASE_DIR.joinpath('staticfiles'))
|
STATIC_ROOT = str(BASE_DIR.joinpath("staticfiles"))
|
||||||
|
|
||||||
STATICFILES_STORAGE = 'whitenoise.storage.CompressedManifestStaticFilesStorage'
|
STATICFILES_STORAGE = "whitenoise.storage.CompressedManifestStaticFilesStorage"
|
||||||
|
|
||||||
# Default primary key field type
|
# Default primary key field type
|
||||||
# https://docs.djangoproject.com/en/3.2/ref/settings/#default-auto-field
|
# https://docs.djangoproject.com/en/3.2/ref/settings/#default-auto-field
|
||||||
|
|
||||||
DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
|
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
|
||||||
|
@ -17,6 +17,6 @@ from django.contrib import admin
|
|||||||
from django.urls import include, path
|
from django.urls import include, path
|
||||||
|
|
||||||
urlpatterns = [
|
urlpatterns = [
|
||||||
path('', include('home.urls')),
|
path("", include("home.urls")),
|
||||||
path('admin/', admin.site.urls),
|
path("admin/", admin.site.urls),
|
||||||
]
|
]
|
||||||
|
@ -11,6 +11,6 @@ import os
|
|||||||
|
|
||||||
from django.core.wsgi import get_wsgi_application
|
from django.core.wsgi import get_wsgi_application
|
||||||
|
|
||||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings')
|
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings")
|
||||||
|
|
||||||
application = get_wsgi_application()
|
application = get_wsgi_application()
|
||||||
|
@ -10,24 +10,24 @@ from .tasks import app as celery_app
|
|||||||
|
|
||||||
|
|
||||||
def sync_redis_state():
|
def sync_redis_state():
|
||||||
""" make sure redis gets the config.json values """
|
"""make sure redis gets the config.json values"""
|
||||||
print('sync redis')
|
print("sync redis")
|
||||||
config_handler = AppConfig()
|
config_handler = AppConfig()
|
||||||
config_handler.load_new_defaults()
|
config_handler.load_new_defaults()
|
||||||
config = config_handler.config
|
config = config_handler.config
|
||||||
sort_order = config['archive']['sort']
|
sort_order = config["archive"]["sort"]
|
||||||
set_message('sort_order', sort_order, expire=False)
|
set_message("sort_order", sort_order, expire=False)
|
||||||
hide_watched = bool(int(config['archive']['hide_watched']))
|
hide_watched = bool(int(config["archive"]["hide_watched"]))
|
||||||
set_message('hide_watched', hide_watched, expire=False)
|
set_message("hide_watched", hide_watched, expire=False)
|
||||||
show_subed_only = bool(int(config['archive']['show_subed_only']))
|
show_subed_only = bool(int(config["archive"]["show_subed_only"]))
|
||||||
set_message('show_subed_only', show_subed_only, expire=False)
|
set_message("show_subed_only", show_subed_only, expire=False)
|
||||||
|
|
||||||
|
|
||||||
def make_folders():
|
def make_folders():
|
||||||
""" make needed cache folders here so docker doesn't mess it up """
|
"""make needed cache folders here so docker doesn't mess it up"""
|
||||||
folders = ['download', 'channels', 'videos', 'import', 'backup']
|
folders = ["download", "channels", "videos", "import", "backup"]
|
||||||
config = AppConfig().config
|
config = AppConfig().config
|
||||||
cache_dir = config['application']['cache_dir']
|
cache_dir = config["application"]["cache_dir"]
|
||||||
for folder in folders:
|
for folder in folders:
|
||||||
folder_path = os.path.join(cache_dir, folder)
|
folder_path = os.path.join(cache_dir, folder)
|
||||||
try:
|
try:
|
||||||
@ -36,7 +36,7 @@ def make_folders():
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
||||||
__all__ = ('celery_app',)
|
__all__ = ("celery_app",)
|
||||||
make_folders()
|
make_folders()
|
||||||
sync_redis_state()
|
sync_redis_state()
|
||||||
index_check()
|
index_check()
|
||||||
|
@ -2,5 +2,5 @@ from django.apps import AppConfig
|
|||||||
|
|
||||||
|
|
||||||
class HomeConfig(AppConfig):
|
class HomeConfig(AppConfig):
|
||||||
default_auto_field = 'django.db.models.BigAutoField'
|
default_auto_field = "django.db.models.BigAutoField"
|
||||||
name = 'home'
|
name = "home"
|
||||||
|
@ -12,71 +12,71 @@ from home.src.helper import get_message, set_message
|
|||||||
|
|
||||||
|
|
||||||
class AppConfig:
|
class AppConfig:
|
||||||
""" handle user settings and application variables """
|
"""handle user settings and application variables"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.config = self.get_config()
|
self.config = self.get_config()
|
||||||
|
|
||||||
def get_config(self):
|
def get_config(self):
|
||||||
""" get config from default file or redis if changed """
|
"""get config from default file or redis if changed"""
|
||||||
config = self.get_config_redis()
|
config = self.get_config_redis()
|
||||||
if not config:
|
if not config:
|
||||||
config = self.get_config_file()
|
config = self.get_config_file()
|
||||||
|
|
||||||
config['application'].update(self.get_config_env())
|
config["application"].update(self.get_config_env())
|
||||||
return config
|
return config
|
||||||
|
|
||||||
def get_config_file(self):
|
def get_config_file(self):
|
||||||
""" read the defaults from config.json """
|
"""read the defaults from config.json"""
|
||||||
with open('home/config.json', 'r', encoding="utf-8") as f:
|
with open("home/config.json", "r", encoding="utf-8") as f:
|
||||||
config_str = f.read()
|
config_str = f.read()
|
||||||
config_file = json.loads(config_str)
|
config_file = json.loads(config_str)
|
||||||
|
|
||||||
config_file['application'].update(self.get_config_env())
|
config_file["application"].update(self.get_config_env())
|
||||||
|
|
||||||
return config_file
|
return config_file
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_config_env():
|
def get_config_env():
|
||||||
""" read environment application variables """
|
"""read environment application variables"""
|
||||||
application = {
|
application = {
|
||||||
'REDIS_HOST': os.environ.get('REDIS_HOST'),
|
"REDIS_HOST": os.environ.get("REDIS_HOST"),
|
||||||
'es_url': os.environ.get('ES_URL'),
|
"es_url": os.environ.get("ES_URL"),
|
||||||
'HOST_UID': int(os.environ.get('HOST_UID')),
|
"HOST_UID": int(os.environ.get("HOST_UID")),
|
||||||
'HOST_GID': int(os.environ.get('HOST_GID'))
|
"HOST_GID": int(os.environ.get("HOST_GID")),
|
||||||
}
|
}
|
||||||
|
|
||||||
return application
|
return application
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_config_redis():
|
def get_config_redis():
|
||||||
""" read config json set from redis to overwrite defaults """
|
"""read config json set from redis to overwrite defaults"""
|
||||||
config = get_message('config')
|
config = get_message("config")
|
||||||
if not list(config.values())[0]:
|
if not list(config.values())[0]:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return config
|
return config
|
||||||
|
|
||||||
def update_config(self, form_post):
|
def update_config(self, form_post):
|
||||||
""" update config values from settings form """
|
"""update config values from settings form"""
|
||||||
config = self.config
|
config = self.config
|
||||||
for key, value in form_post.items():
|
for key, value in form_post.items():
|
||||||
to_write = value[0]
|
to_write = value[0]
|
||||||
if len(to_write):
|
if len(to_write):
|
||||||
if to_write == '0':
|
if to_write == "0":
|
||||||
to_write = False
|
to_write = False
|
||||||
elif to_write == '1':
|
elif to_write == "1":
|
||||||
to_write = True
|
to_write = True
|
||||||
elif to_write.isdigit():
|
elif to_write.isdigit():
|
||||||
to_write = int(to_write)
|
to_write = int(to_write)
|
||||||
|
|
||||||
config_dict, config_value = key.split('.')
|
config_dict, config_value = key.split(".")
|
||||||
config[config_dict][config_value] = to_write
|
config[config_dict][config_value] = to_write
|
||||||
|
|
||||||
set_message('config', config, expire=False)
|
set_message("config", config, expire=False)
|
||||||
|
|
||||||
def load_new_defaults(self):
|
def load_new_defaults(self):
|
||||||
""" check config.json for missing defaults """
|
"""check config.json for missing defaults"""
|
||||||
default_config = self.get_config_file()
|
default_config = self.get_config_file()
|
||||||
redis_config = self.get_config_redis()
|
redis_config = self.get_config_redis()
|
||||||
|
|
||||||
@ -100,4 +100,4 @@ class AppConfig:
|
|||||||
needs_update = True
|
needs_update = True
|
||||||
|
|
||||||
if needs_update:
|
if needs_update:
|
||||||
set_message('config', redis_config, expire=False)
|
set_message("config", redis_config, expire=False)
|
||||||
|
@ -19,15 +19,15 @@ from home.src.index import YoutubeChannel, index_new_video
|
|||||||
|
|
||||||
|
|
||||||
class PendingList:
|
class PendingList:
|
||||||
""" manage the pending videos list """
|
"""manage the pending videos list"""
|
||||||
|
|
||||||
CONFIG = AppConfig().config
|
CONFIG = AppConfig().config
|
||||||
ES_URL = CONFIG['application']['es_url']
|
ES_URL = CONFIG["application"]["es_url"]
|
||||||
VIDEOS = CONFIG['application']['videos']
|
VIDEOS = CONFIG["application"]["videos"]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def parse_url_list(youtube_ids):
|
def parse_url_list(youtube_ids):
|
||||||
""" extract youtube ids from list """
|
"""extract youtube ids from list"""
|
||||||
missing_videos = []
|
missing_videos = []
|
||||||
for entry in youtube_ids:
|
for entry in youtube_ids:
|
||||||
# notify
|
# notify
|
||||||
@ -35,31 +35,31 @@ class PendingList:
|
|||||||
"status": "pending",
|
"status": "pending",
|
||||||
"level": "info",
|
"level": "info",
|
||||||
"title": "Adding to download queue.",
|
"title": "Adding to download queue.",
|
||||||
"message": 'Extracting lists'
|
"message": "Extracting lists",
|
||||||
}
|
}
|
||||||
set_message('progress:download', mess_dict)
|
set_message("progress:download", mess_dict)
|
||||||
# extract
|
# extract
|
||||||
url = entry['url']
|
url = entry["url"]
|
||||||
url_type = entry['type']
|
url_type = entry["type"]
|
||||||
if url_type == 'video':
|
if url_type == "video":
|
||||||
missing_videos.append(url)
|
missing_videos.append(url)
|
||||||
elif url_type == 'channel':
|
elif url_type == "channel":
|
||||||
youtube_ids = ChannelSubscription().get_last_youtube_videos(
|
youtube_ids = ChannelSubscription().get_last_youtube_videos(
|
||||||
url, limit=False
|
url, limit=False
|
||||||
)
|
)
|
||||||
missing_videos = missing_videos + youtube_ids
|
missing_videos = missing_videos + youtube_ids
|
||||||
elif url_type == 'playlist':
|
elif url_type == "playlist":
|
||||||
youtube_ids = playlist_extractor(url)
|
youtube_ids = playlist_extractor(url)
|
||||||
missing_videos = missing_videos + youtube_ids
|
missing_videos = missing_videos + youtube_ids
|
||||||
|
|
||||||
return missing_videos
|
return missing_videos
|
||||||
|
|
||||||
def add_to_pending(self, missing_videos):
|
def add_to_pending(self, missing_videos):
|
||||||
""" build the bulk json data from pending """
|
"""build the bulk json data from pending"""
|
||||||
# check if channel is indexed
|
# check if channel is indexed
|
||||||
channel_handler = ChannelSubscription()
|
channel_handler = ChannelSubscription()
|
||||||
all_indexed = channel_handler.get_channels(subscribed_only=False)
|
all_indexed = channel_handler.get_channels(subscribed_only=False)
|
||||||
all_channel_ids = [i['channel_id'] for i in all_indexed]
|
all_channel_ids = [i["channel_id"] for i in all_indexed]
|
||||||
# check if already there
|
# check if already there
|
||||||
all_downloaded = self.get_all_downloaded()
|
all_downloaded = self.get_all_downloaded()
|
||||||
# loop
|
# loop
|
||||||
@ -77,11 +77,11 @@ class PendingList:
|
|||||||
if not video:
|
if not video:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if video['channel_id'] in all_channel_ids:
|
if video["channel_id"] in all_channel_ids:
|
||||||
video['channel_indexed'] = True
|
video["channel_indexed"] = True
|
||||||
else:
|
else:
|
||||||
video['channel_indexed'] = False
|
video["channel_indexed"] = False
|
||||||
video['status'] = "pending"
|
video["status"] = "pending"
|
||||||
action = {"create": {"_id": youtube_id, "_index": "ta_download"}}
|
action = {"create": {"_id": youtube_id, "_index": "ta_download"}}
|
||||||
bulk_list.append(json.dumps(action))
|
bulk_list.append(json.dumps(action))
|
||||||
bulk_list.append(json.dumps(video))
|
bulk_list.append(json.dumps(video))
|
||||||
@ -90,128 +90,130 @@ class PendingList:
|
|||||||
"status": "pending",
|
"status": "pending",
|
||||||
"level": "info",
|
"level": "info",
|
||||||
"title": "Adding to download queue.",
|
"title": "Adding to download queue.",
|
||||||
"message": 'Processing IDs...'
|
"message": "Processing IDs...",
|
||||||
}
|
}
|
||||||
set_message('progress:download', mess_dict)
|
set_message("progress:download", mess_dict)
|
||||||
# add last newline
|
# add last newline
|
||||||
bulk_list.append('\n')
|
bulk_list.append("\n")
|
||||||
query_str = '\n'.join(bulk_list)
|
query_str = "\n".join(bulk_list)
|
||||||
headers = {'Content-type': 'application/x-ndjson'}
|
headers = {"Content-type": "application/x-ndjson"}
|
||||||
url = self.ES_URL + '/_bulk'
|
url = self.ES_URL + "/_bulk"
|
||||||
request = requests.post(url, data=query_str, headers=headers)
|
request = requests.post(url, data=query_str, headers=headers)
|
||||||
if not request.ok:
|
if not request.ok:
|
||||||
print(request)
|
print(request)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_youtube_details(youtube_id):
|
def get_youtube_details(youtube_id):
|
||||||
""" get details from youtubedl for single pending video """
|
"""get details from youtubedl for single pending video"""
|
||||||
obs = {
|
obs = {
|
||||||
'default_search': 'ytsearch',
|
"default_search": "ytsearch",
|
||||||
'quiet': True,
|
"quiet": True,
|
||||||
'skip_download': True,
|
"skip_download": True,
|
||||||
}
|
}
|
||||||
try:
|
try:
|
||||||
vid = youtube_dl.YoutubeDL(obs).extract_info(youtube_id)
|
vid = youtube_dl.YoutubeDL(obs).extract_info(youtube_id)
|
||||||
except youtube_dl.utils.DownloadError:
|
except youtube_dl.utils.DownloadError:
|
||||||
print('failed to extract info for: ' + youtube_id)
|
print("failed to extract info for: " + youtube_id)
|
||||||
return False
|
return False
|
||||||
# parse response
|
# parse response
|
||||||
seconds = vid['duration']
|
seconds = vid["duration"]
|
||||||
duration_str = DurationConverter.get_str(seconds)
|
duration_str = DurationConverter.get_str(seconds)
|
||||||
upload_date = vid['upload_date']
|
upload_date = vid["upload_date"]
|
||||||
upload_dt = datetime.strptime(upload_date, "%Y%m%d")
|
upload_dt = datetime.strptime(upload_date, "%Y%m%d")
|
||||||
published = upload_dt.strftime("%Y-%m-%d")
|
published = upload_dt.strftime("%Y-%m-%d")
|
||||||
# build dict
|
# build dict
|
||||||
youtube_details = {
|
youtube_details = {
|
||||||
"youtube_id": youtube_id,
|
"youtube_id": youtube_id,
|
||||||
"channel_name": vid['channel'],
|
"channel_name": vid["channel"],
|
||||||
"vid_thumb_url": vid['thumbnail'],
|
"vid_thumb_url": vid["thumbnail"],
|
||||||
"title": vid['title'],
|
"title": vid["title"],
|
||||||
"channel_id": vid['channel_id'],
|
"channel_id": vid["channel_id"],
|
||||||
"duration": duration_str,
|
"duration": duration_str,
|
||||||
"published": published,
|
"published": published,
|
||||||
"timestamp": int(datetime.now().strftime("%s"))
|
"timestamp": int(datetime.now().strftime("%s")),
|
||||||
}
|
}
|
||||||
return youtube_details
|
return youtube_details
|
||||||
|
|
||||||
def get_all_pending(self):
|
def get_all_pending(self):
|
||||||
""" get a list of all pending videos in ta_download """
|
"""get a list of all pending videos in ta_download"""
|
||||||
headers = {'Content-type': 'application/json'}
|
headers = {"Content-type": "application/json"}
|
||||||
# get PIT ID
|
# get PIT ID
|
||||||
url = self.ES_URL + '/ta_download/_pit?keep_alive=1m'
|
url = self.ES_URL + "/ta_download/_pit?keep_alive=1m"
|
||||||
response = requests.post(url)
|
response = requests.post(url)
|
||||||
json_data = json.loads(response.text)
|
json_data = json.loads(response.text)
|
||||||
pit_id = json_data['id']
|
pit_id = json_data["id"]
|
||||||
# query
|
# query
|
||||||
data = {
|
data = {
|
||||||
"size": 50, "query": {"match_all": {}},
|
"size": 50,
|
||||||
|
"query": {"match_all": {}},
|
||||||
"pit": {"id": pit_id, "keep_alive": "1m"},
|
"pit": {"id": pit_id, "keep_alive": "1m"},
|
||||||
"sort": [{"timestamp": {"order": "desc"}}]
|
"sort": [{"timestamp": {"order": "desc"}}],
|
||||||
}
|
}
|
||||||
query_str = json.dumps(data)
|
query_str = json.dumps(data)
|
||||||
url = self.ES_URL + '/_search'
|
url = self.ES_URL + "/_search"
|
||||||
all_pending = []
|
all_pending = []
|
||||||
all_ignore = []
|
all_ignore = []
|
||||||
while True:
|
while True:
|
||||||
response = requests.get(url, data=query_str, headers=headers)
|
response = requests.get(url, data=query_str, headers=headers)
|
||||||
json_data = json.loads(response.text)
|
json_data = json.loads(response.text)
|
||||||
all_hits = json_data['hits']['hits']
|
all_hits = json_data["hits"]["hits"]
|
||||||
if all_hits:
|
if all_hits:
|
||||||
for hit in all_hits:
|
for hit in all_hits:
|
||||||
youtube_id = hit['_source']['youtube_id']
|
youtube_id = hit["_source"]["youtube_id"]
|
||||||
status = hit['_source']['status']
|
status = hit["_source"]["status"]
|
||||||
if status == 'pending':
|
if status == "pending":
|
||||||
all_pending.append(hit['_source'])
|
all_pending.append(hit["_source"])
|
||||||
elif status == 'ignore':
|
elif status == "ignore":
|
||||||
all_ignore.append(youtube_id)
|
all_ignore.append(youtube_id)
|
||||||
search_after = hit['sort']
|
search_after = hit["sort"]
|
||||||
# update search_after with last hit data
|
# update search_after with last hit data
|
||||||
data['search_after'] = search_after
|
data["search_after"] = search_after
|
||||||
query_str = json.dumps(data)
|
query_str = json.dumps(data)
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
# clean up PIT
|
# clean up PIT
|
||||||
query_str = json.dumps({"id": pit_id})
|
query_str = json.dumps({"id": pit_id})
|
||||||
requests.delete(self.ES_URL + '/_pit', data=query_str, headers=headers)
|
requests.delete(self.ES_URL + "/_pit", data=query_str, headers=headers)
|
||||||
return all_pending, all_ignore
|
return all_pending, all_ignore
|
||||||
|
|
||||||
def get_all_indexed(self):
|
def get_all_indexed(self):
|
||||||
""" get a list of all videos indexed """
|
"""get a list of all videos indexed"""
|
||||||
headers = {'Content-type': 'application/json'}
|
headers = {"Content-type": "application/json"}
|
||||||
# get PIT ID
|
# get PIT ID
|
||||||
url = self.ES_URL + '/ta_video/_pit?keep_alive=1m'
|
url = self.ES_URL + "/ta_video/_pit?keep_alive=1m"
|
||||||
response = requests.post(url)
|
response = requests.post(url)
|
||||||
json_data = json.loads(response.text)
|
json_data = json.loads(response.text)
|
||||||
pit_id = json_data['id']
|
pit_id = json_data["id"]
|
||||||
# query
|
# query
|
||||||
data = {
|
data = {
|
||||||
"size": 500, "query": {"match_all": {}},
|
"size": 500,
|
||||||
|
"query": {"match_all": {}},
|
||||||
"pit": {"id": pit_id, "keep_alive": "1m"},
|
"pit": {"id": pit_id, "keep_alive": "1m"},
|
||||||
"sort": [{"published": {"order": "desc"}}]
|
"sort": [{"published": {"order": "desc"}}],
|
||||||
}
|
}
|
||||||
query_str = json.dumps(data)
|
query_str = json.dumps(data)
|
||||||
url = self.ES_URL + '/_search'
|
url = self.ES_URL + "/_search"
|
||||||
all_indexed = []
|
all_indexed = []
|
||||||
while True:
|
while True:
|
||||||
response = requests.get(url, data=query_str, headers=headers)
|
response = requests.get(url, data=query_str, headers=headers)
|
||||||
json_data = json.loads(response.text)
|
json_data = json.loads(response.text)
|
||||||
all_hits = json_data['hits']['hits']
|
all_hits = json_data["hits"]["hits"]
|
||||||
if all_hits:
|
if all_hits:
|
||||||
for hit in all_hits:
|
for hit in all_hits:
|
||||||
all_indexed.append(hit)
|
all_indexed.append(hit)
|
||||||
search_after = hit['sort']
|
search_after = hit["sort"]
|
||||||
# update search_after with last hit data
|
# update search_after with last hit data
|
||||||
data['search_after'] = search_after
|
data["search_after"] = search_after
|
||||||
query_str = json.dumps(data)
|
query_str = json.dumps(data)
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
# clean up PIT
|
# clean up PIT
|
||||||
query_str = json.dumps({"id": pit_id})
|
query_str = json.dumps({"id": pit_id})
|
||||||
requests.delete(self.ES_URL + '/_pit', data=query_str, headers=headers)
|
requests.delete(self.ES_URL + "/_pit", data=query_str, headers=headers)
|
||||||
return all_indexed
|
return all_indexed
|
||||||
|
|
||||||
def get_all_downloaded(self):
|
def get_all_downloaded(self):
|
||||||
""" get a list of all videos in archive """
|
"""get a list of all videos in archive"""
|
||||||
all_channel_folders = os.listdir(self.VIDEOS)
|
all_channel_folders = os.listdir(self.VIDEOS)
|
||||||
all_downloaded = []
|
all_downloaded = []
|
||||||
for channel_folder in all_channel_folders:
|
for channel_folder in all_channel_folders:
|
||||||
@ -223,125 +225,131 @@ class PendingList:
|
|||||||
return all_downloaded
|
return all_downloaded
|
||||||
|
|
||||||
def delete_from_pending(self, youtube_id):
|
def delete_from_pending(self, youtube_id):
|
||||||
""" delete the youtube_id from ta_download """
|
"""delete the youtube_id from ta_download"""
|
||||||
url = f'{self.ES_URL}/ta_download/_doc/{youtube_id}'
|
url = f"{self.ES_URL}/ta_download/_doc/{youtube_id}"
|
||||||
response = requests.delete(url)
|
response = requests.delete(url)
|
||||||
if not response.ok:
|
if not response.ok:
|
||||||
print(response.text)
|
print(response.text)
|
||||||
|
|
||||||
def ignore_from_pending(self, ignore_list):
|
def ignore_from_pending(self, ignore_list):
|
||||||
""" build the bulk query string """
|
"""build the bulk query string"""
|
||||||
|
|
||||||
stamp = int(datetime.now().strftime("%s"))
|
stamp = int(datetime.now().strftime("%s"))
|
||||||
bulk_list = []
|
bulk_list = []
|
||||||
|
|
||||||
for youtube_id in ignore_list:
|
for youtube_id in ignore_list:
|
||||||
action = {"update": {"_id": youtube_id, "_index": "ta_download"}}
|
action = {"update": {"_id": youtube_id, "_index": "ta_download"}}
|
||||||
source = {"doc": {"status": 'ignore', "timestamp": stamp}}
|
source = {"doc": {"status": "ignore", "timestamp": stamp}}
|
||||||
bulk_list.append(json.dumps(action))
|
bulk_list.append(json.dumps(action))
|
||||||
bulk_list.append(json.dumps(source))
|
bulk_list.append(json.dumps(source))
|
||||||
|
|
||||||
# add last newline
|
# add last newline
|
||||||
bulk_list.append('\n')
|
bulk_list.append("\n")
|
||||||
query_str = '\n'.join(bulk_list)
|
query_str = "\n".join(bulk_list)
|
||||||
|
|
||||||
headers = {'Content-type': 'application/x-ndjson'}
|
headers = {"Content-type": "application/x-ndjson"}
|
||||||
url = self.ES_URL + '/_bulk'
|
url = self.ES_URL + "/_bulk"
|
||||||
request = requests.post(url, data=query_str, headers=headers)
|
request = requests.post(url, data=query_str, headers=headers)
|
||||||
mess_dict = {
|
mess_dict = {
|
||||||
"status": "ignore",
|
"status": "ignore",
|
||||||
"level": "info",
|
"level": "info",
|
||||||
"title": "Added to ignore list",
|
"title": "Added to ignore list",
|
||||||
"message": ''
|
"message": "",
|
||||||
}
|
}
|
||||||
set_message('progress:download', mess_dict)
|
set_message("progress:download", mess_dict)
|
||||||
if not request.ok:
|
if not request.ok:
|
||||||
print(request)
|
print(request)
|
||||||
|
|
||||||
|
|
||||||
class ChannelSubscription:
|
class ChannelSubscription:
|
||||||
""" manage the list of channels subscribed """
|
"""manage the list of channels subscribed"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
config = AppConfig().config
|
config = AppConfig().config
|
||||||
self.es_url = config['application']['es_url']
|
self.es_url = config["application"]["es_url"]
|
||||||
self.channel_size = config['subscriptions']['channel_size']
|
self.channel_size = config["subscriptions"]["channel_size"]
|
||||||
|
|
||||||
def get_channels(self, subscribed_only=True):
|
def get_channels(self, subscribed_only=True):
|
||||||
""" get a list of all channels subscribed to """
|
"""get a list of all channels subscribed to"""
|
||||||
headers = {'Content-type': 'application/json'}
|
headers = {"Content-type": "application/json"}
|
||||||
# get PIT ID
|
# get PIT ID
|
||||||
url = self.es_url + '/ta_channel/_pit?keep_alive=1m'
|
url = self.es_url + "/ta_channel/_pit?keep_alive=1m"
|
||||||
response = requests.post(url)
|
response = requests.post(url)
|
||||||
json_data = json.loads(response.text)
|
json_data = json.loads(response.text)
|
||||||
pit_id = json_data['id']
|
pit_id = json_data["id"]
|
||||||
# query
|
# query
|
||||||
if subscribed_only:
|
if subscribed_only:
|
||||||
data = {
|
data = {
|
||||||
"query": {"term": {"channel_subscribed": {"value": True}}},
|
"query": {"term": {"channel_subscribed": {"value": True}}},
|
||||||
"size": 50, "pit": {"id": pit_id, "keep_alive": "1m"},
|
"size": 50,
|
||||||
"sort": [{"channel_name.keyword": {"order": "asc"}}]
|
"pit": {"id": pit_id, "keep_alive": "1m"},
|
||||||
|
"sort": [{"channel_name.keyword": {"order": "asc"}}],
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
data = {
|
data = {
|
||||||
"query": {"match_all": {}},
|
"query": {"match_all": {}},
|
||||||
"size": 50, "pit": {"id": pit_id, "keep_alive": "1m"},
|
"size": 50,
|
||||||
"sort": [{"channel_name.keyword": {"order": "asc"}}]
|
"pit": {"id": pit_id, "keep_alive": "1m"},
|
||||||
|
"sort": [{"channel_name.keyword": {"order": "asc"}}],
|
||||||
}
|
}
|
||||||
query_str = json.dumps(data)
|
query_str = json.dumps(data)
|
||||||
url = self.es_url + '/_search'
|
url = self.es_url + "/_search"
|
||||||
all_channels = []
|
all_channels = []
|
||||||
while True:
|
while True:
|
||||||
response = requests.get(url, data=query_str, headers=headers)
|
response = requests.get(url, data=query_str, headers=headers)
|
||||||
json_data = json.loads(response.text)
|
json_data = json.loads(response.text)
|
||||||
all_hits = json_data['hits']['hits']
|
all_hits = json_data["hits"]["hits"]
|
||||||
if all_hits:
|
if all_hits:
|
||||||
for hit in all_hits:
|
for hit in all_hits:
|
||||||
source = hit['_source']
|
source = hit["_source"]
|
||||||
search_after = hit['sort']
|
search_after = hit["sort"]
|
||||||
all_channels.append(source)
|
all_channels.append(source)
|
||||||
# update search_after with last hit data
|
# update search_after with last hit data
|
||||||
data['search_after'] = search_after
|
data["search_after"] = search_after
|
||||||
query_str = json.dumps(data)
|
query_str = json.dumps(data)
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
# clean up PIT
|
# clean up PIT
|
||||||
query_str = json.dumps({"id": pit_id})
|
query_str = json.dumps({"id": pit_id})
|
||||||
requests.delete(self.es_url + '/_pit', data=query_str, headers=headers)
|
requests.delete(self.es_url + "/_pit", data=query_str, headers=headers)
|
||||||
return all_channels
|
return all_channels
|
||||||
|
|
||||||
def get_last_youtube_videos(self, channel_id, limit=True):
|
def get_last_youtube_videos(self, channel_id, limit=True):
|
||||||
""" get a list of last videos from channel """
|
"""get a list of last videos from channel"""
|
||||||
url = f'https://www.youtube.com/channel/{channel_id}/videos'
|
url = f"https://www.youtube.com/channel/{channel_id}/videos"
|
||||||
obs = {
|
obs = {
|
||||||
'default_search': 'ytsearch', 'quiet': True,
|
"default_search": "ytsearch",
|
||||||
'skip_download': True, 'extract_flat': True
|
"quiet": True,
|
||||||
|
"skip_download": True,
|
||||||
|
"extract_flat": True,
|
||||||
}
|
}
|
||||||
if limit:
|
if limit:
|
||||||
obs['playlistend'] = self.channel_size
|
obs["playlistend"] = self.channel_size
|
||||||
chan = youtube_dl.YoutubeDL(obs).extract_info(url, download=False)
|
chan = youtube_dl.YoutubeDL(obs).extract_info(url, download=False)
|
||||||
last_videos = [(i['id'], i['title']) for i in chan['entries']]
|
last_videos = [(i["id"], i["title"]) for i in chan["entries"]]
|
||||||
return last_videos
|
return last_videos
|
||||||
|
|
||||||
def find_missing(self):
|
def find_missing(self):
|
||||||
""" add missing videos from subscribed channels to pending """
|
"""add missing videos from subscribed channels to pending"""
|
||||||
all_channels = self.get_channels()
|
all_channels = self.get_channels()
|
||||||
pending_handler = PendingList()
|
pending_handler = PendingList()
|
||||||
all_pending, all_ignore = pending_handler.get_all_pending()
|
all_pending, all_ignore = pending_handler.get_all_pending()
|
||||||
all_pending_ids = [i['youtube_id'] for i in all_pending]
|
all_pending_ids = [i["youtube_id"] for i in all_pending]
|
||||||
all_downloaded = pending_handler.get_all_downloaded()
|
all_downloaded = pending_handler.get_all_downloaded()
|
||||||
to_ignore = all_pending_ids + all_ignore + all_downloaded
|
to_ignore = all_pending_ids + all_ignore + all_downloaded
|
||||||
missing_videos = []
|
missing_videos = []
|
||||||
counter = 1
|
counter = 1
|
||||||
for channel in all_channels:
|
for channel in all_channels:
|
||||||
channel_id = channel['channel_id']
|
channel_id = channel["channel_id"]
|
||||||
last_videos = self.get_last_youtube_videos(channel_id)
|
last_videos = self.get_last_youtube_videos(channel_id)
|
||||||
set_message('progress:download', {
|
set_message(
|
||||||
"status": "rescan",
|
"progress:download",
|
||||||
"level": "info",
|
{
|
||||||
"title": "Rescanning: Looking for new videos.",
|
"status": "rescan",
|
||||||
"message": f'Progress: {counter}/{len(all_channels)}'
|
"level": "info",
|
||||||
}
|
"title": "Rescanning: Looking for new videos.",
|
||||||
|
"message": f"Progress: {counter}/{len(all_channels)}",
|
||||||
|
},
|
||||||
)
|
)
|
||||||
for video in last_videos:
|
for video in last_videos:
|
||||||
youtube_id = video[0]
|
youtube_id = video[0]
|
||||||
@ -352,22 +360,22 @@ class ChannelSubscription:
|
|||||||
return missing_videos
|
return missing_videos
|
||||||
|
|
||||||
def change_subscribe(self, channel_id, channel_subscribed):
|
def change_subscribe(self, channel_id, channel_subscribed):
|
||||||
""" subscribe or unsubscribe from channel and update """
|
"""subscribe or unsubscribe from channel and update"""
|
||||||
if not isinstance(channel_subscribed, bool):
|
if not isinstance(channel_subscribed, bool):
|
||||||
print('invalid status, should be bool')
|
print("invalid status, should be bool")
|
||||||
return
|
return
|
||||||
headers = {'Content-type': 'application/json'}
|
headers = {"Content-type": "application/json"}
|
||||||
channel_handler = YoutubeChannel(channel_id)
|
channel_handler = YoutubeChannel(channel_id)
|
||||||
channel_dict = channel_handler.channel_dict
|
channel_dict = channel_handler.channel_dict
|
||||||
channel_dict['channel_subscribed'] = channel_subscribed
|
channel_dict["channel_subscribed"] = channel_subscribed
|
||||||
if channel_subscribed:
|
if channel_subscribed:
|
||||||
# handle subscribe
|
# handle subscribe
|
||||||
url = self.es_url + '/ta_channel/_doc/' + channel_id
|
url = self.es_url + "/ta_channel/_doc/" + channel_id
|
||||||
payload = json.dumps(channel_dict)
|
payload = json.dumps(channel_dict)
|
||||||
print(channel_dict)
|
print(channel_dict)
|
||||||
else:
|
else:
|
||||||
url = self.es_url + '/ta_channel/_update/' + channel_id
|
url = self.es_url + "/ta_channel/_update/" + channel_id
|
||||||
payload = json.dumps({'doc': channel_dict})
|
payload = json.dumps({"doc": channel_dict})
|
||||||
# update channel
|
# update channel
|
||||||
request = requests.post(url, data=payload, headers=headers)
|
request = requests.post(url, data=payload, headers=headers)
|
||||||
if not request.ok:
|
if not request.ok:
|
||||||
@ -377,27 +385,30 @@ class ChannelSubscription:
|
|||||||
|
|
||||||
|
|
||||||
def playlist_extractor(playlist_id):
|
def playlist_extractor(playlist_id):
|
||||||
""" return youtube_ids from a playlist_id """
|
"""return youtube_ids from a playlist_id"""
|
||||||
url = 'https://www.youtube.com/playlist?list=' + playlist_id
|
url = "https://www.youtube.com/playlist?list=" + playlist_id
|
||||||
obs = {
|
obs = {
|
||||||
'default_search': 'ytsearch', 'quiet': True, 'ignoreerrors': True,
|
"default_search": "ytsearch",
|
||||||
'skip_download': True, 'extract_flat': True
|
"quiet": True,
|
||||||
|
"ignoreerrors": True,
|
||||||
|
"skip_download": True,
|
||||||
|
"extract_flat": True,
|
||||||
}
|
}
|
||||||
playlist = youtube_dl.YoutubeDL(obs).extract_info(url, download=False)
|
playlist = youtube_dl.YoutubeDL(obs).extract_info(url, download=False)
|
||||||
playlist_vids = [(i['id'], i['title']) for i in playlist['entries']]
|
playlist_vids = [(i["id"], i["title"]) for i in playlist["entries"]]
|
||||||
return playlist_vids
|
return playlist_vids
|
||||||
|
|
||||||
|
|
||||||
class VideoDownloader:
|
class VideoDownloader:
|
||||||
""" handle the video download functionality """
|
"""handle the video download functionality"""
|
||||||
|
|
||||||
def __init__(self, youtube_id_list):
|
def __init__(self, youtube_id_list):
|
||||||
self.youtube_id_list = youtube_id_list
|
self.youtube_id_list = youtube_id_list
|
||||||
self.config = AppConfig().config
|
self.config = AppConfig().config
|
||||||
|
|
||||||
def download_list(self):
|
def download_list(self):
|
||||||
""" download the list of youtube_ids """
|
"""download the list of youtube_ids"""
|
||||||
limit_count = self.config['downloads']['limit_count']
|
limit_count = self.config["downloads"]["limit_count"]
|
||||||
if limit_count:
|
if limit_count:
|
||||||
self.youtube_id_list = self.youtube_id_list[:limit_count]
|
self.youtube_id_list = self.youtube_id_list[:limit_count]
|
||||||
|
|
||||||
@ -405,112 +416,118 @@ class VideoDownloader:
|
|||||||
try:
|
try:
|
||||||
self.dl_single_vid(youtube_id)
|
self.dl_single_vid(youtube_id)
|
||||||
except youtube_dl.utils.DownloadError:
|
except youtube_dl.utils.DownloadError:
|
||||||
print('failed to download ' + youtube_id)
|
print("failed to download " + youtube_id)
|
||||||
continue
|
continue
|
||||||
vid_dict = index_new_video(youtube_id)
|
vid_dict = index_new_video(youtube_id)
|
||||||
self.move_to_archive(vid_dict)
|
self.move_to_archive(vid_dict)
|
||||||
self.delete_from_pending(youtube_id)
|
self.delete_from_pending(youtube_id)
|
||||||
if self.config['downloads']['sleep_interval']:
|
if self.config["downloads"]["sleep_interval"]:
|
||||||
sleep(self.config['downloads']['sleep_interval'])
|
sleep(self.config["downloads"]["sleep_interval"])
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def progress_hook(response):
|
def progress_hook(response):
|
||||||
""" process the progress_hooks from youtube_dl """
|
"""process the progress_hooks from youtube_dl"""
|
||||||
# title
|
# title
|
||||||
filename = response['filename'][12:].replace('_', ' ')
|
filename = response["filename"][12:].replace("_", " ")
|
||||||
title = "Downloading: " + os.path.split(filename)[-1]
|
title = "Downloading: " + os.path.split(filename)[-1]
|
||||||
# message
|
# message
|
||||||
try:
|
try:
|
||||||
percent = response['_percent_str']
|
percent = response["_percent_str"]
|
||||||
size = response['_total_bytes_str']
|
size = response["_total_bytes_str"]
|
||||||
speed = response['_speed_str']
|
speed = response["_speed_str"]
|
||||||
eta = response['_eta_str']
|
eta = response["_eta_str"]
|
||||||
message = f'{percent} of {size} at {speed} - time left: {eta}'
|
message = f"{percent} of {size} at {speed} - time left: {eta}"
|
||||||
except KeyError:
|
except KeyError:
|
||||||
message = ''
|
message = ""
|
||||||
mess_dict = {
|
mess_dict = {
|
||||||
"status": "downloading",
|
"status": "downloading",
|
||||||
"level": "info",
|
"level": "info",
|
||||||
"title": title,
|
"title": title,
|
||||||
"message": message
|
"message": message,
|
||||||
}
|
}
|
||||||
set_message('progress:download', mess_dict)
|
set_message("progress:download", mess_dict)
|
||||||
|
|
||||||
def dl_single_vid(self, youtube_id):
|
def dl_single_vid(self, youtube_id):
|
||||||
""" download single video """
|
"""download single video"""
|
||||||
obs = {
|
obs = {
|
||||||
'default_search': 'ytsearch',
|
"default_search": "ytsearch",
|
||||||
'merge_output_format': 'mp4', 'restrictfilenames': True,
|
"merge_output_format": "mp4",
|
||||||
'outtmpl': (self.config['application']['cache_dir'] +
|
"restrictfilenames": True,
|
||||||
'/download/' +
|
"outtmpl": (
|
||||||
self.config['application']['file_template']),
|
self.config["application"]["cache_dir"]
|
||||||
'progress_hooks': [self.progress_hook],
|
+ "/download/"
|
||||||
'quiet': True, 'continuedl': True, 'retries': 3
|
+ self.config["application"]["file_template"]
|
||||||
|
),
|
||||||
|
"progress_hooks": [self.progress_hook],
|
||||||
|
"quiet": True,
|
||||||
|
"continuedl": True,
|
||||||
|
"retries": 3,
|
||||||
}
|
}
|
||||||
if self.config['downloads']['format']:
|
if self.config["downloads"]["format"]:
|
||||||
obs['format'] = self.config['downloads']['format']
|
obs["format"] = self.config["downloads"]["format"]
|
||||||
if self.config['downloads']['limit_speed']:
|
if self.config["downloads"]["limit_speed"]:
|
||||||
obs['ratelimit'] = self.config['downloads']['limit_speed'] * 1024
|
obs["ratelimit"] = self.config["downloads"]["limit_speed"] * 1024
|
||||||
external = False
|
external = False
|
||||||
if external:
|
if external:
|
||||||
obs['external_downloader'] = 'aria2c'
|
obs["external_downloader"] = "aria2c"
|
||||||
|
|
||||||
|
|
||||||
postprocessors = []
|
postprocessors = []
|
||||||
|
|
||||||
if self.config['downloads']['add_metadata']:
|
if self.config["downloads"]["add_metadata"]:
|
||||||
postprocessors.append({
|
postprocessors.append(
|
||||||
'key': 'FFmpegMetadata',
|
{
|
||||||
'add_chapters': True,
|
"key": "FFmpegMetadata",
|
||||||
'add_metadata': True,
|
"add_chapters": True,
|
||||||
})
|
"add_metadata": True,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
obs['postprocessors'] = postprocessors
|
obs["postprocessors"] = postprocessors
|
||||||
|
|
||||||
# check if already in cache to continue from there
|
# check if already in cache to continue from there
|
||||||
cache_dir = self.config['application']['cache_dir']
|
cache_dir = self.config["application"]["cache_dir"]
|
||||||
all_cached = os.listdir(cache_dir + '/download/')
|
all_cached = os.listdir(cache_dir + "/download/")
|
||||||
for file_name in all_cached:
|
for file_name in all_cached:
|
||||||
if youtube_id in file_name:
|
if youtube_id in file_name:
|
||||||
obs['outtmpl'] = cache_dir + '/download/' + file_name
|
obs["outtmpl"] = cache_dir + "/download/" + file_name
|
||||||
with youtube_dl.YoutubeDL(obs) as ydl:
|
with youtube_dl.YoutubeDL(obs) as ydl:
|
||||||
try:
|
try:
|
||||||
ydl.download([youtube_id])
|
ydl.download([youtube_id])
|
||||||
except youtube_dl.utils.DownloadError:
|
except youtube_dl.utils.DownloadError:
|
||||||
print('retry failed download: ' + youtube_id)
|
print("retry failed download: " + youtube_id)
|
||||||
sleep(10)
|
sleep(10)
|
||||||
ydl.download([youtube_id])
|
ydl.download([youtube_id])
|
||||||
|
|
||||||
def move_to_archive(self, vid_dict):
|
def move_to_archive(self, vid_dict):
|
||||||
""" move downloaded video from cache to archive """
|
"""move downloaded video from cache to archive"""
|
||||||
videos = self.config['application']['videos']
|
videos = self.config["application"]["videos"]
|
||||||
channel_name = vid_dict['channel']['channel_name']
|
channel_name = vid_dict["channel"]["channel_name"]
|
||||||
channel_name_clean = clean_string(channel_name)
|
channel_name_clean = clean_string(channel_name)
|
||||||
media_url = vid_dict['media_url']
|
media_url = vid_dict["media_url"]
|
||||||
youtube_id = vid_dict['youtube_id']
|
youtube_id = vid_dict["youtube_id"]
|
||||||
# make archive folder
|
# make archive folder
|
||||||
videos = self.config['application']['videos']
|
videos = self.config["application"]["videos"]
|
||||||
new_folder = os.path.join(videos, channel_name_clean)
|
new_folder = os.path.join(videos, channel_name_clean)
|
||||||
os.makedirs(new_folder, exist_ok=True)
|
os.makedirs(new_folder, exist_ok=True)
|
||||||
# find real filename
|
# find real filename
|
||||||
cache_dir = self.config['application']['cache_dir']
|
cache_dir = self.config["application"]["cache_dir"]
|
||||||
for file_str in os.listdir(cache_dir + '/download'):
|
for file_str in os.listdir(cache_dir + "/download"):
|
||||||
if youtube_id in file_str:
|
if youtube_id in file_str:
|
||||||
old_file = file_str
|
old_file = file_str
|
||||||
old_file_path = os.path.join(cache_dir, 'download', old_file)
|
old_file_path = os.path.join(cache_dir, "download", old_file)
|
||||||
new_file_path = os.path.join(videos, media_url)
|
new_file_path = os.path.join(videos, media_url)
|
||||||
# move and fix permission
|
# move and fix permission
|
||||||
shutil.move(old_file_path, new_file_path)
|
shutil.move(old_file_path, new_file_path)
|
||||||
os.chown(
|
os.chown(
|
||||||
new_file_path,
|
new_file_path,
|
||||||
self.config['application']['HOST_UID'],
|
self.config["application"]["HOST_UID"],
|
||||||
self.config['application']['HOST_GID']
|
self.config["application"]["HOST_GID"],
|
||||||
)
|
)
|
||||||
|
|
||||||
def delete_from_pending(self, youtube_id):
|
def delete_from_pending(self, youtube_id):
|
||||||
""" delete downloaded video from pending index if its there """
|
"""delete downloaded video from pending index if its there"""
|
||||||
es_url = self.config['application']['es_url']
|
es_url = self.config["application"]["es_url"]
|
||||||
url = f'{es_url}/ta_download/_doc/{youtube_id}'
|
url = f"{es_url}/ta_download/_doc/{youtube_id}"
|
||||||
response = requests.delete(url)
|
response = requests.delete(url)
|
||||||
if not response.ok and not response.status_code == 404:
|
if not response.ok and not response.status_code == 404:
|
||||||
print(response.text)
|
print(response.text)
|
||||||
|
@ -13,53 +13,53 @@ import unicodedata
|
|||||||
import redis
|
import redis
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
REDIS_HOST = os.environ.get('REDIS_HOST')
|
REDIS_HOST = os.environ.get("REDIS_HOST")
|
||||||
|
|
||||||
|
|
||||||
def get_total_hits(index, es_url, match_field):
|
def get_total_hits(index, es_url, match_field):
|
||||||
""" get total hits from index """
|
"""get total hits from index"""
|
||||||
headers = {'Content-type': 'application/json'}
|
headers = {"Content-type": "application/json"}
|
||||||
data = {"query": {"match": {match_field: True}}}
|
data = {"query": {"match": {match_field: True}}}
|
||||||
payload = json.dumps(data)
|
payload = json.dumps(data)
|
||||||
url = f'{es_url}/{index}/_search?filter_path=hits.total'
|
url = f"{es_url}/{index}/_search?filter_path=hits.total"
|
||||||
request = requests.post(url, data=payload, headers=headers)
|
request = requests.post(url, data=payload, headers=headers)
|
||||||
if not request.ok:
|
if not request.ok:
|
||||||
print(request.text)
|
print(request.text)
|
||||||
total_json = json.loads(request.text)
|
total_json = json.loads(request.text)
|
||||||
total_hits = total_json['hits']['total']['value']
|
total_hits = total_json["hits"]["total"]["value"]
|
||||||
return total_hits
|
return total_hits
|
||||||
|
|
||||||
|
|
||||||
def clean_string(file_name):
|
def clean_string(file_name):
|
||||||
""" clean string to only asci characters """
|
"""clean string to only asci characters"""
|
||||||
whitelist = "-_.() " + string.ascii_letters + string.digits
|
whitelist = "-_.() " + string.ascii_letters + string.digits
|
||||||
normalized = unicodedata.normalize('NFKD', file_name)
|
normalized = unicodedata.normalize("NFKD", file_name)
|
||||||
ascii_only = normalized.encode('ASCII', 'ignore').decode().strip()
|
ascii_only = normalized.encode("ASCII", "ignore").decode().strip()
|
||||||
white_listed = ''.join(c for c in ascii_only if c in whitelist)
|
white_listed = "".join(c for c in ascii_only if c in whitelist)
|
||||||
cleaned = re.sub(r'[ ]{2,}', ' ', white_listed)
|
cleaned = re.sub(r"[ ]{2,}", " ", white_listed)
|
||||||
return cleaned
|
return cleaned
|
||||||
|
|
||||||
|
|
||||||
def process_url_list(url_str):
|
def process_url_list(url_str):
|
||||||
""" parse url_list to find valid youtube video or channel ids """
|
"""parse url_list to find valid youtube video or channel ids"""
|
||||||
to_replace = ['watch?v=', 'playlist?list=']
|
to_replace = ["watch?v=", "playlist?list="]
|
||||||
url_list = re.split('\n+', url_str[0])
|
url_list = re.split("\n+", url_str[0])
|
||||||
youtube_ids = []
|
youtube_ids = []
|
||||||
for url in url_list:
|
for url in url_list:
|
||||||
url_clean = url.strip().strip('/').split('/')[-1]
|
url_clean = url.strip().strip("/").split("/")[-1]
|
||||||
for i in to_replace:
|
for i in to_replace:
|
||||||
url_clean = url_clean.replace(i, '')
|
url_clean = url_clean.replace(i, "")
|
||||||
url_no_param = url_clean.split('&')[0]
|
url_no_param = url_clean.split("&")[0]
|
||||||
str_len = len(url_no_param)
|
str_len = len(url_no_param)
|
||||||
if str_len == 11:
|
if str_len == 11:
|
||||||
link_type = 'video'
|
link_type = "video"
|
||||||
elif str_len == 24:
|
elif str_len == 24:
|
||||||
link_type = 'channel'
|
link_type = "channel"
|
||||||
elif str_len == 34:
|
elif str_len == 34:
|
||||||
link_type = 'playlist'
|
link_type = "playlist"
|
||||||
else:
|
else:
|
||||||
# unable to parse
|
# unable to parse
|
||||||
raise ValueError('not a valid url: ' + url)
|
raise ValueError("not a valid url: " + url)
|
||||||
|
|
||||||
youtube_ids.append({"url": url_no_param, "type": link_type})
|
youtube_ids.append({"url": url_no_param, "type": link_type})
|
||||||
|
|
||||||
@ -67,19 +67,17 @@ def process_url_list(url_str):
|
|||||||
|
|
||||||
|
|
||||||
def set_message(key, message, expire=True):
|
def set_message(key, message, expire=True):
|
||||||
""" write new message to redis """
|
"""write new message to redis"""
|
||||||
redis_connection = redis.Redis(host=REDIS_HOST)
|
redis_connection = redis.Redis(host=REDIS_HOST)
|
||||||
redis_connection.execute_command(
|
redis_connection.execute_command("JSON.SET", key, ".", json.dumps(message))
|
||||||
'JSON.SET', key, '.', json.dumps(message)
|
|
||||||
)
|
|
||||||
if expire:
|
if expire:
|
||||||
redis_connection.execute_command('EXPIRE', key, 20)
|
redis_connection.execute_command("EXPIRE", key, 20)
|
||||||
|
|
||||||
|
|
||||||
def get_message(key):
|
def get_message(key):
|
||||||
""" get any message from JSON key """
|
"""get any message from JSON key"""
|
||||||
redis_connection = redis.Redis(host=REDIS_HOST)
|
redis_connection = redis.Redis(host=REDIS_HOST)
|
||||||
reply = redis_connection.execute_command('JSON.GET', key)
|
reply = redis_connection.execute_command("JSON.GET", key)
|
||||||
if reply:
|
if reply:
|
||||||
json_str = json.loads(reply)
|
json_str = json.loads(reply)
|
||||||
else:
|
else:
|
||||||
@ -88,9 +86,9 @@ def get_message(key):
|
|||||||
|
|
||||||
|
|
||||||
def get_dl_message(cache_dir):
|
def get_dl_message(cache_dir):
|
||||||
""" get latest message if available """
|
"""get latest message if available"""
|
||||||
redis_connection = redis.Redis(host=REDIS_HOST)
|
redis_connection = redis.Redis(host=REDIS_HOST)
|
||||||
reply = redis_connection.execute_command('JSON.GET', 'progress:download')
|
reply = redis_connection.execute_command("JSON.GET", "progress:download")
|
||||||
if reply:
|
if reply:
|
||||||
json_str = json.loads(reply)
|
json_str = json.loads(reply)
|
||||||
elif json_str := monitor_cache_dir(cache_dir):
|
elif json_str := monitor_cache_dir(cache_dir):
|
||||||
@ -101,7 +99,7 @@ def get_dl_message(cache_dir):
|
|||||||
|
|
||||||
|
|
||||||
def get_lock(lock_key):
|
def get_lock(lock_key):
|
||||||
""" handle lock for task management """
|
"""handle lock for task management"""
|
||||||
redis_lock = redis.Redis(host=REDIS_HOST).lock(lock_key)
|
redis_lock = redis.Redis(host=REDIS_HOST).lock(lock_key)
|
||||||
return redis_lock
|
return redis_lock
|
||||||
|
|
||||||
@ -110,15 +108,15 @@ def monitor_cache_dir(cache_dir):
|
|||||||
"""
|
"""
|
||||||
look at download cache dir directly as alternative progress info
|
look at download cache dir directly as alternative progress info
|
||||||
"""
|
"""
|
||||||
dl_cache = os.path.join(cache_dir, 'download')
|
dl_cache = os.path.join(cache_dir, "download")
|
||||||
cache_file = os.listdir(dl_cache)
|
cache_file = os.listdir(dl_cache)
|
||||||
if cache_file:
|
if cache_file:
|
||||||
filename = cache_file[0][12:].replace('_', ' ').split('.')[0]
|
filename = cache_file[0][12:].replace("_", " ").split(".")[0]
|
||||||
mess_dict = {
|
mess_dict = {
|
||||||
"status": "downloading",
|
"status": "downloading",
|
||||||
"level": "info",
|
"level": "info",
|
||||||
"title": "Downloading: " + filename,
|
"title": "Downloading: " + filename,
|
||||||
"message": ""
|
"message": "",
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
@ -133,27 +131,37 @@ class DurationConverter:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_sec(file_path):
|
def get_sec(file_path):
|
||||||
""" read duration from file """
|
"""read duration from file"""
|
||||||
duration = subprocess.run([
|
duration = subprocess.run(
|
||||||
"ffprobe", "-v", "error", "-show_entries", "format=duration",
|
[
|
||||||
"-of", "default=noprint_wrappers=1:nokey=1", file_path
|
"ffprobe",
|
||||||
], capture_output=True, check=True)
|
"-v",
|
||||||
|
"error",
|
||||||
|
"-show_entries",
|
||||||
|
"format=duration",
|
||||||
|
"-of",
|
||||||
|
"default=noprint_wrappers=1:nokey=1",
|
||||||
|
file_path,
|
||||||
|
],
|
||||||
|
capture_output=True,
|
||||||
|
check=True,
|
||||||
|
)
|
||||||
duration_sec = int(float(duration.stdout.decode().strip()))
|
duration_sec = int(float(duration.stdout.decode().strip()))
|
||||||
return duration_sec
|
return duration_sec
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_str(duration_sec):
|
def get_str(duration_sec):
|
||||||
""" takes duration in sec and returns clean string """
|
"""takes duration in sec and returns clean string"""
|
||||||
hours = duration_sec // 3600
|
hours = duration_sec // 3600
|
||||||
minutes = (duration_sec - (hours * 3600)) // 60
|
minutes = (duration_sec - (hours * 3600)) // 60
|
||||||
secs = duration_sec - (hours * 3600) - (minutes * 60)
|
secs = duration_sec - (hours * 3600) - (minutes * 60)
|
||||||
|
|
||||||
duration_str = str()
|
duration_str = str()
|
||||||
if hours:
|
if hours:
|
||||||
duration_str = str(hours).zfill(2) + ':'
|
duration_str = str(hours).zfill(2) + ":"
|
||||||
if minutes:
|
if minutes:
|
||||||
duration_str = duration_str + str(minutes).zfill(2) + ':'
|
duration_str = duration_str + str(minutes).zfill(2) + ":"
|
||||||
else:
|
else:
|
||||||
duration_str = duration_str + '00:'
|
duration_str = duration_str + "00:"
|
||||||
duration_str = duration_str + str(secs).zfill(2)
|
duration_str = duration_str + str(secs).zfill(2)
|
||||||
return duration_str
|
return duration_str
|
||||||
|
@ -19,11 +19,11 @@ from home.src.helper import DurationConverter, clean_string
|
|||||||
|
|
||||||
|
|
||||||
class YoutubeChannel:
|
class YoutubeChannel:
|
||||||
""" represents a single youtube channel """
|
"""represents a single youtube channel"""
|
||||||
|
|
||||||
CONFIG = AppConfig().config
|
CONFIG = AppConfig().config
|
||||||
ES_URL = CONFIG['application']['es_url']
|
ES_URL = CONFIG["application"]["es_url"]
|
||||||
CACHE_DIR = CONFIG['application']['cache_dir']
|
CACHE_DIR = CONFIG["application"]["cache_dir"]
|
||||||
|
|
||||||
def __init__(self, channel_id):
|
def __init__(self, channel_id):
|
||||||
self.channel_id = channel_id
|
self.channel_id = channel_id
|
||||||
@ -32,193 +32,187 @@ class YoutubeChannel:
|
|||||||
self.channel_dict = self.build_channel_dict()
|
self.channel_dict = self.build_channel_dict()
|
||||||
|
|
||||||
def build_channel_dict(self, scrape=False):
|
def build_channel_dict(self, scrape=False):
|
||||||
""" combine the dicts build from extracted json payload """
|
"""combine the dicts build from extracted json payload"""
|
||||||
if scrape:
|
if scrape:
|
||||||
channel_dict = False
|
channel_dict = False
|
||||||
else:
|
else:
|
||||||
channel_dict = self.get_es_channel()
|
channel_dict = self.get_es_channel()
|
||||||
if not channel_dict:
|
if not channel_dict:
|
||||||
print('scrape data from youtube')
|
print("scrape data from youtube")
|
||||||
self.scrape_channel()
|
self.scrape_channel()
|
||||||
channel_dict = self.parse_channel_main()
|
channel_dict = self.parse_channel_main()
|
||||||
channel_dict.update(self.parse_channel_meta())
|
channel_dict.update(self.parse_channel_meta())
|
||||||
self.source = 'scraped'
|
self.source = "scraped"
|
||||||
return channel_dict
|
return channel_dict
|
||||||
|
|
||||||
def get_es_channel(self):
|
def get_es_channel(self):
|
||||||
""" get from elastic search first if possible """
|
"""get from elastic search first if possible"""
|
||||||
channel_id = self.channel_id
|
channel_id = self.channel_id
|
||||||
url = f'{self.ES_URL}/ta_channel/_doc/{channel_id}'
|
url = f"{self.ES_URL}/ta_channel/_doc/{channel_id}"
|
||||||
response = requests.get(url)
|
response = requests.get(url)
|
||||||
if response.ok:
|
if response.ok:
|
||||||
channel_source = response.json()['_source']
|
channel_source = response.json()["_source"]
|
||||||
self.source = 'elastic'
|
self.source = "elastic"
|
||||||
return channel_source
|
return channel_source
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def scrape_channel(self):
|
def scrape_channel(self):
|
||||||
""" scrape channel page for additional infos """
|
"""scrape channel page for additional infos"""
|
||||||
channel_id = self.channel_id
|
channel_id = self.channel_id
|
||||||
url = f'https://www.youtube.com/channel/{channel_id}/about?hl=en'
|
url = f"https://www.youtube.com/channel/{channel_id}/about?hl=en"
|
||||||
cookies = {
|
cookies = {"CONSENT": "YES+xxxxxxxxxxxxxxxxxxxxxxxxxxx"}
|
||||||
'CONSENT': 'YES+xxxxxxxxxxxxxxxxxxxxxxxxxxx'
|
|
||||||
}
|
|
||||||
response = requests.get(url, cookies=cookies)
|
response = requests.get(url, cookies=cookies)
|
||||||
if response.ok:
|
if response.ok:
|
||||||
channel_page = response.text
|
channel_page = response.text
|
||||||
else:
|
else:
|
||||||
print(f'failed to extract channel info for: {channel_id}')
|
print(f"failed to extract channel info for: {channel_id}")
|
||||||
raise ConnectionError
|
raise ConnectionError
|
||||||
soup = BeautifulSoup(channel_page, 'html.parser')
|
soup = BeautifulSoup(channel_page, "html.parser")
|
||||||
# load script into json
|
# load script into json
|
||||||
all_scripts = soup.find('body').find_all('script')
|
all_scripts = soup.find("body").find_all("script")
|
||||||
for script in all_scripts:
|
for script in all_scripts:
|
||||||
if 'var ytInitialData = ' in str(script):
|
if "var ytInitialData = " in str(script):
|
||||||
script_content = str(script)
|
script_content = str(script)
|
||||||
break
|
break
|
||||||
# extract payload
|
# extract payload
|
||||||
script_content = script_content.split('var ytInitialData = ')[1]
|
script_content = script_content.split("var ytInitialData = ")[1]
|
||||||
json_raw = script_content.rstrip(';</script>')
|
json_raw = script_content.rstrip(";</script>")
|
||||||
json_data = json.loads(json_raw)
|
json_data = json.loads(json_raw)
|
||||||
# add to self
|
# add to self
|
||||||
self.json_data = json_data
|
self.json_data = json_data
|
||||||
|
|
||||||
def parse_channel_main(self):
|
def parse_channel_main(self):
|
||||||
""" extract maintab values from scraped channel json data """
|
"""extract maintab values from scraped channel json data"""
|
||||||
main_tab = self.json_data['header']['c4TabbedHeaderRenderer']
|
main_tab = self.json_data["header"]["c4TabbedHeaderRenderer"]
|
||||||
channel_name = main_tab['title']
|
channel_name = main_tab["title"]
|
||||||
last_refresh = int(datetime.now().strftime("%s"))
|
last_refresh = int(datetime.now().strftime("%s"))
|
||||||
# channel_subs
|
# channel_subs
|
||||||
try:
|
try:
|
||||||
sub_text_simple = main_tab['subscriberCountText']['simpleText']
|
sub_text_simple = main_tab["subscriberCountText"]["simpleText"]
|
||||||
sub_text = sub_text_simple.split(' ')[0]
|
sub_text = sub_text_simple.split(" ")[0]
|
||||||
if sub_text[-1] == 'K':
|
if sub_text[-1] == "K":
|
||||||
channel_subs = int(float(sub_text.replace('K', ''))*1000)
|
channel_subs = int(float(sub_text.replace("K", "")) * 1000)
|
||||||
elif sub_text[-1] == 'M':
|
elif sub_text[-1] == "M":
|
||||||
channel_subs = int(float(sub_text.replace('M', ''))*1000000)
|
channel_subs = int(float(sub_text.replace("M", "")) * 1000000)
|
||||||
elif int(sub_text) >= 0:
|
elif int(sub_text) >= 0:
|
||||||
channel_subs = int(sub_text)
|
channel_subs = int(sub_text)
|
||||||
else:
|
else:
|
||||||
message = f'{sub_text} not dealt with'
|
message = f"{sub_text} not dealt with"
|
||||||
print(message)
|
print(message)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
channel_subs = 0
|
channel_subs = 0
|
||||||
# banner
|
# banner
|
||||||
try:
|
try:
|
||||||
all_banners = main_tab['banner']['thumbnails']
|
all_banners = main_tab["banner"]["thumbnails"]
|
||||||
banner = sorted(all_banners, key=lambda k: k['width'])[-1]['url']
|
banner = sorted(all_banners, key=lambda k: k["width"])[-1]["url"]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
banner = False
|
banner = False
|
||||||
# build and return dict
|
# build and return dict
|
||||||
main_channel_dict = {
|
main_channel_dict = {
|
||||||
'channel_active': True,
|
"channel_active": True,
|
||||||
'channel_last_refresh': last_refresh,
|
"channel_last_refresh": last_refresh,
|
||||||
'channel_subs': channel_subs,
|
"channel_subs": channel_subs,
|
||||||
'channel_banner_url': banner,
|
"channel_banner_url": banner,
|
||||||
'channel_name': channel_name,
|
"channel_name": channel_name,
|
||||||
'channel_id': self.channel_id
|
"channel_id": self.channel_id,
|
||||||
}
|
}
|
||||||
return main_channel_dict
|
return main_channel_dict
|
||||||
|
|
||||||
def parse_channel_meta(self):
|
def parse_channel_meta(self):
|
||||||
""" extract meta tab values from channel payload """
|
"""extract meta tab values from channel payload"""
|
||||||
# meta tab
|
# meta tab
|
||||||
json_data = self.json_data
|
json_data = self.json_data
|
||||||
meta_tab = json_data['metadata']['channelMetadataRenderer']
|
meta_tab = json_data["metadata"]["channelMetadataRenderer"]
|
||||||
description = meta_tab['description']
|
description = meta_tab["description"]
|
||||||
all_thumbs = meta_tab['avatar']['thumbnails']
|
all_thumbs = meta_tab["avatar"]["thumbnails"]
|
||||||
thumb_url = sorted(all_thumbs, key=lambda k: k['width'])[-1]['url']
|
thumb_url = sorted(all_thumbs, key=lambda k: k["width"])[-1]["url"]
|
||||||
# stats tab
|
# stats tab
|
||||||
renderer = 'twoColumnBrowseResultsRenderer'
|
renderer = "twoColumnBrowseResultsRenderer"
|
||||||
all_tabs = json_data['contents'][renderer]['tabs']
|
all_tabs = json_data["contents"][renderer]["tabs"]
|
||||||
for tab in all_tabs:
|
for tab in all_tabs:
|
||||||
if 'tabRenderer' in tab.keys():
|
if "tabRenderer" in tab.keys():
|
||||||
if tab['tabRenderer']['title'] == 'About':
|
if tab["tabRenderer"]["title"] == "About":
|
||||||
about_tab = (tab['tabRenderer']['content']
|
about_tab = tab["tabRenderer"]["content"][
|
||||||
['sectionListRenderer']['contents'][0]
|
"sectionListRenderer"
|
||||||
['itemSectionRenderer']['contents'][0]
|
]["contents"][0]["itemSectionRenderer"]["contents"][0][
|
||||||
['channelAboutFullMetadataRenderer'])
|
"channelAboutFullMetadataRenderer"
|
||||||
|
]
|
||||||
break
|
break
|
||||||
try:
|
try:
|
||||||
channel_views_text = about_tab['viewCountText']['simpleText']
|
channel_views_text = about_tab["viewCountText"]["simpleText"]
|
||||||
channel_views = int(re.sub(r"\D", "", channel_views_text))
|
channel_views = int(re.sub(r"\D", "", channel_views_text))
|
||||||
except KeyError:
|
except KeyError:
|
||||||
channel_views = 0
|
channel_views = 0
|
||||||
|
|
||||||
meta_channel_dict = {
|
meta_channel_dict = {
|
||||||
'channel_description': description,
|
"channel_description": description,
|
||||||
'channel_thumb_url': thumb_url,
|
"channel_thumb_url": thumb_url,
|
||||||
'channel_views': channel_views
|
"channel_views": channel_views,
|
||||||
}
|
}
|
||||||
|
|
||||||
return meta_channel_dict
|
return meta_channel_dict
|
||||||
|
|
||||||
def upload_to_es(self):
|
def upload_to_es(self):
|
||||||
""" upload channel data to elastic search """
|
"""upload channel data to elastic search"""
|
||||||
url = f'{self.ES_URL}/ta_channel/_doc/{self.channel_id}'
|
url = f"{self.ES_URL}/ta_channel/_doc/{self.channel_id}"
|
||||||
response = requests.put(url, json=self.channel_dict)
|
response = requests.put(url, json=self.channel_dict)
|
||||||
print(f'added {self.channel_id} to es')
|
print(f"added {self.channel_id} to es")
|
||||||
if not response.ok:
|
if not response.ok:
|
||||||
print(response.text)
|
print(response.text)
|
||||||
|
|
||||||
def clear_cache(self):
|
def clear_cache(self):
|
||||||
""" delete banner and thumb from cache if available """
|
"""delete banner and thumb from cache if available"""
|
||||||
channel_cache = os.path.join(self.CACHE_DIR, 'channels')
|
channel_cache = os.path.join(self.CACHE_DIR, "channels")
|
||||||
thumb = os.path.join(channel_cache, self.channel_id + '_thumb.jpg')
|
thumb = os.path.join(channel_cache, self.channel_id + "_thumb.jpg")
|
||||||
banner = os.path.join(channel_cache, self.channel_id + '_banner.jpg')
|
banner = os.path.join(channel_cache, self.channel_id + "_banner.jpg")
|
||||||
if os.path.exists(thumb):
|
if os.path.exists(thumb):
|
||||||
os.remove(thumb)
|
os.remove(thumb)
|
||||||
if os.path.exists(banner):
|
if os.path.exists(banner):
|
||||||
os.remove(banner)
|
os.remove(banner)
|
||||||
|
|
||||||
def sync_to_videos(self):
|
def sync_to_videos(self):
|
||||||
""" sync new channel_dict to all videos of channel """
|
"""sync new channel_dict to all videos of channel"""
|
||||||
headers = {'Content-type': 'application/json'}
|
headers = {"Content-type": "application/json"}
|
||||||
channel_id = self.channel_id
|
channel_id = self.channel_id
|
||||||
# add ingest pipeline
|
# add ingest pipeline
|
||||||
processors = []
|
processors = []
|
||||||
for field, value in self.channel_dict.items():
|
for field, value in self.channel_dict.items():
|
||||||
line = {"set": {"field": "channel." + field, "value": value}}
|
line = {"set": {"field": "channel." + field, "value": value}}
|
||||||
processors.append(line)
|
processors.append(line)
|
||||||
data = {
|
data = {"description": channel_id, "processors": processors}
|
||||||
"description": channel_id,
|
|
||||||
"processors": processors
|
|
||||||
}
|
|
||||||
payload = json.dumps(data)
|
payload = json.dumps(data)
|
||||||
url = self.ES_URL + '/_ingest/pipeline/' + channel_id
|
url = self.ES_URL + "/_ingest/pipeline/" + channel_id
|
||||||
request = requests.put(url, data=payload, headers=headers)
|
request = requests.put(url, data=payload, headers=headers)
|
||||||
if not request.ok:
|
if not request.ok:
|
||||||
print(request.text)
|
print(request.text)
|
||||||
# apply pipeline
|
# apply pipeline
|
||||||
data = {
|
data = {"query": {"match": {"channel.channel_id": channel_id}}}
|
||||||
"query": {"match": {"channel.channel_id": channel_id}}
|
|
||||||
}
|
|
||||||
payload = json.dumps(data)
|
payload = json.dumps(data)
|
||||||
url = self.ES_URL + '/ta_video/_update_by_query?pipeline=' + channel_id
|
url = self.ES_URL + "/ta_video/_update_by_query?pipeline=" + channel_id
|
||||||
request = requests.post(url, data=payload, headers=headers)
|
request = requests.post(url, data=payload, headers=headers)
|
||||||
if not request.ok:
|
if not request.ok:
|
||||||
print(request.text)
|
print(request.text)
|
||||||
|
|
||||||
def get_total_hits(self):
|
def get_total_hits(self):
|
||||||
""" get total channels indexed """
|
"""get total channels indexed"""
|
||||||
headers = {'Content-type': 'application/json'}
|
headers = {"Content-type": "application/json"}
|
||||||
data = {"query": {"match_all": {}}}
|
data = {"query": {"match_all": {}}}
|
||||||
payload = json.dumps(data)
|
payload = json.dumps(data)
|
||||||
url = f'{self.ES_URL}/ta_channel/_search?filter_path=hits.total'
|
url = f"{self.ES_URL}/ta_channel/_search?filter_path=hits.total"
|
||||||
request = requests.post(url, data=payload, headers=headers)
|
request = requests.post(url, data=payload, headers=headers)
|
||||||
if not request.ok:
|
if not request.ok:
|
||||||
print(request.text)
|
print(request.text)
|
||||||
total_hits = json.loads(request.text)['hits']['total']['value']
|
total_hits = json.loads(request.text)["hits"]["total"]["value"]
|
||||||
return total_hits
|
return total_hits
|
||||||
|
|
||||||
|
|
||||||
class YoutubeVideo:
|
class YoutubeVideo:
|
||||||
""" represents a single youtube video """
|
"""represents a single youtube video"""
|
||||||
|
|
||||||
CONFIG = AppConfig().config
|
CONFIG = AppConfig().config
|
||||||
ES_URL = CONFIG['application']['es_url']
|
ES_URL = CONFIG["application"]["es_url"]
|
||||||
CACHE_DIR = CONFIG['application']['cache_dir']
|
CACHE_DIR = CONFIG["application"]["cache_dir"]
|
||||||
VIDEOS = CONFIG['application']['videos']
|
VIDEOS = CONFIG["application"]["videos"]
|
||||||
|
|
||||||
def __init__(self, youtube_id):
|
def __init__(self, youtube_id):
|
||||||
self.youtube_id = youtube_id
|
self.youtube_id = youtube_id
|
||||||
@ -226,8 +220,8 @@ class YoutubeVideo:
|
|||||||
self.vid_dict = self.get_wrapper()
|
self.vid_dict = self.get_wrapper()
|
||||||
|
|
||||||
def get_wrapper(self):
|
def get_wrapper(self):
|
||||||
""" wrapper to loop around youtube_dl to retry on failure """
|
"""wrapper to loop around youtube_dl to retry on failure"""
|
||||||
print(f'get video data for {self.youtube_id}')
|
print(f"get video data for {self.youtube_id}")
|
||||||
for i in range(3):
|
for i in range(3):
|
||||||
try:
|
try:
|
||||||
vid_dict = self.get_youtubedl_vid_data()
|
vid_dict = self.get_youtubedl_vid_data()
|
||||||
@ -241,63 +235,63 @@ class YoutubeVideo:
|
|||||||
return vid_dict
|
return vid_dict
|
||||||
|
|
||||||
def get_youtubedl_vid_data(self):
|
def get_youtubedl_vid_data(self):
|
||||||
""" parse youtubedl extract info """
|
"""parse youtubedl extract info"""
|
||||||
youtube_id = self.youtube_id
|
youtube_id = self.youtube_id
|
||||||
obs = {
|
obs = {
|
||||||
'quiet': True,
|
"quiet": True,
|
||||||
'default_search': 'ytsearch',
|
"default_search": "ytsearch",
|
||||||
'skip_download': True
|
"skip_download": True,
|
||||||
}
|
}
|
||||||
try:
|
try:
|
||||||
vid = youtube_dl.YoutubeDL(obs).extract_info(youtube_id)
|
vid = youtube_dl.YoutubeDL(obs).extract_info(youtube_id)
|
||||||
except (
|
except (
|
||||||
youtube_dl.utils.ExtractorError,
|
youtube_dl.utils.ExtractorError,
|
||||||
youtube_dl.utils.DownloadError
|
youtube_dl.utils.DownloadError,
|
||||||
):
|
):
|
||||||
print('failed to get info for ' + youtube_id)
|
print("failed to get info for " + youtube_id)
|
||||||
return False
|
return False
|
||||||
# extract
|
# extract
|
||||||
self.channel_id = vid['channel_id']
|
self.channel_id = vid["channel_id"]
|
||||||
upload_date = vid['upload_date']
|
upload_date = vid["upload_date"]
|
||||||
upload_date_time = datetime.strptime(upload_date, "%Y%m%d")
|
upload_date_time = datetime.strptime(upload_date, "%Y%m%d")
|
||||||
published = upload_date_time.strftime("%Y-%m-%d")
|
published = upload_date_time.strftime("%Y-%m-%d")
|
||||||
last_refresh = int(datetime.now().strftime("%s"))
|
last_refresh = int(datetime.now().strftime("%s"))
|
||||||
# likes
|
# likes
|
||||||
try:
|
try:
|
||||||
like_count = vid['like_count']
|
like_count = vid["like_count"]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
like_count = 0
|
like_count = 0
|
||||||
try:
|
try:
|
||||||
dislike_count = vid['dislike_count']
|
dislike_count = vid["dislike_count"]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
dislike_count = 0
|
dislike_count = 0
|
||||||
# build dicts
|
# build dicts
|
||||||
stats = {
|
stats = {
|
||||||
"view_count": vid['view_count'],
|
"view_count": vid["view_count"],
|
||||||
"like_count": like_count,
|
"like_count": like_count,
|
||||||
"dislike_count": dislike_count,
|
"dislike_count": dislike_count,
|
||||||
"average_rating": vid['average_rating']
|
"average_rating": vid["average_rating"],
|
||||||
}
|
}
|
||||||
vid_basic = {
|
vid_basic = {
|
||||||
"title": vid['title'],
|
"title": vid["title"],
|
||||||
"description": vid['description'],
|
"description": vid["description"],
|
||||||
"category": vid['categories'],
|
"category": vid["categories"],
|
||||||
"vid_thumb_url": vid['thumbnail'],
|
"vid_thumb_url": vid["thumbnail"],
|
||||||
"tags": vid['tags'],
|
"tags": vid["tags"],
|
||||||
"published": published,
|
"published": published,
|
||||||
"stats": stats,
|
"stats": stats,
|
||||||
"vid_last_refresh": last_refresh,
|
"vid_last_refresh": last_refresh,
|
||||||
"date_downloaded": last_refresh,
|
"date_downloaded": last_refresh,
|
||||||
"youtube_id": youtube_id,
|
"youtube_id": youtube_id,
|
||||||
"active": True,
|
"active": True,
|
||||||
"channel": False
|
"channel": False,
|
||||||
}
|
}
|
||||||
|
|
||||||
return vid_basic
|
return vid_basic
|
||||||
|
|
||||||
def add_player(self, missing_vid):
|
def add_player(self, missing_vid):
|
||||||
""" add player information for new videos """
|
"""add player information for new videos"""
|
||||||
cache_path = self.CACHE_DIR + '/download/'
|
cache_path = self.CACHE_DIR + "/download/"
|
||||||
videos = self.VIDEOS
|
videos = self.VIDEOS
|
||||||
|
|
||||||
if missing_vid:
|
if missing_vid:
|
||||||
@ -318,24 +312,24 @@ class YoutubeVideo:
|
|||||||
player = {
|
player = {
|
||||||
"watched": False,
|
"watched": False,
|
||||||
"duration": duration,
|
"duration": duration,
|
||||||
"duration_str": duration_str
|
"duration_str": duration_str,
|
||||||
}
|
}
|
||||||
self.vid_dict['player'] = player
|
self.vid_dict["player"] = player
|
||||||
|
|
||||||
def build_file_path(self, channel_name):
|
def build_file_path(self, channel_name):
|
||||||
""" build media_url from where file will be located """
|
"""build media_url from where file will be located"""
|
||||||
clean_channel_name = clean_string(channel_name)
|
clean_channel_name = clean_string(channel_name)
|
||||||
timestamp = self.vid_dict['published'].replace('-', '')
|
timestamp = self.vid_dict["published"].replace("-", "")
|
||||||
youtube_id = self.vid_dict['youtube_id']
|
youtube_id = self.vid_dict["youtube_id"]
|
||||||
title = self.vid_dict['title']
|
title = self.vid_dict["title"]
|
||||||
clean_title = clean_string(title)
|
clean_title = clean_string(title)
|
||||||
filename = f'{timestamp}_{youtube_id}_{clean_title}.mp4'
|
filename = f"{timestamp}_{youtube_id}_{clean_title}.mp4"
|
||||||
media_url = os.path.join(clean_channel_name, filename)
|
media_url = os.path.join(clean_channel_name, filename)
|
||||||
self.vid_dict['media_url'] = media_url
|
self.vid_dict["media_url"] = media_url
|
||||||
|
|
||||||
def get_es_data(self):
|
def get_es_data(self):
|
||||||
""" get current data from elastic search """
|
"""get current data from elastic search"""
|
||||||
url = self.ES_URL + '/ta_video/_doc/' + self.youtube_id
|
url = self.ES_URL + "/ta_video/_doc/" + self.youtube_id
|
||||||
response = requests.get(url)
|
response = requests.get(url)
|
||||||
if not response.ok:
|
if not response.ok:
|
||||||
print(response.text)
|
print(response.text)
|
||||||
@ -343,48 +337,48 @@ class YoutubeVideo:
|
|||||||
return es_vid_dict
|
return es_vid_dict
|
||||||
|
|
||||||
def upload_to_es(self):
|
def upload_to_es(self):
|
||||||
""" upload channel data to elastic search """
|
"""upload channel data to elastic search"""
|
||||||
url = f'{self.ES_URL}/ta_video/_doc/{self.youtube_id}'
|
url = f"{self.ES_URL}/ta_video/_doc/{self.youtube_id}"
|
||||||
response = requests.put(url, json=self.vid_dict)
|
response = requests.put(url, json=self.vid_dict)
|
||||||
if not response.ok:
|
if not response.ok:
|
||||||
print(response.text)
|
print(response.text)
|
||||||
|
|
||||||
def delete_cache(self):
|
def delete_cache(self):
|
||||||
""" delete thumbnail from cache if exist """
|
"""delete thumbnail from cache if exist"""
|
||||||
video_cache = os.path.join(self.CACHE_DIR, 'videos')
|
video_cache = os.path.join(self.CACHE_DIR, "videos")
|
||||||
thumb = os.path.join(video_cache, self.youtube_id + '.jpg')
|
thumb = os.path.join(video_cache, self.youtube_id + ".jpg")
|
||||||
if os.path.exists(thumb):
|
if os.path.exists(thumb):
|
||||||
os.remove(thumb)
|
os.remove(thumb)
|
||||||
|
|
||||||
def deactivate(self):
|
def deactivate(self):
|
||||||
""" deactivate document on extractor error """
|
"""deactivate document on extractor error"""
|
||||||
youtube_id = self.youtube_id
|
youtube_id = self.youtube_id
|
||||||
headers = {'Content-type': 'application/json'}
|
headers = {"Content-type": "application/json"}
|
||||||
url = f'{self.ES_URL}/ta_video/_update/{youtube_id}'
|
url = f"{self.ES_URL}/ta_video/_update/{youtube_id}"
|
||||||
data = {"script": "ctx._source.active = false"}
|
data = {"script": "ctx._source.active = false"}
|
||||||
json_str = json.dumps(data)
|
json_str = json.dumps(data)
|
||||||
response = requests.post(url, data=json_str, headers=headers)
|
response = requests.post(url, data=json_str, headers=headers)
|
||||||
print(f'deactivated {youtube_id}')
|
print(f"deactivated {youtube_id}")
|
||||||
if not response.ok:
|
if not response.ok:
|
||||||
print(response.text)
|
print(response.text)
|
||||||
|
|
||||||
|
|
||||||
def index_new_video(youtube_id, missing_vid=False):
|
def index_new_video(youtube_id, missing_vid=False):
|
||||||
""" combine video and channel classes for new video index """
|
"""combine video and channel classes for new video index"""
|
||||||
vid_handler = YoutubeVideo(youtube_id)
|
vid_handler = YoutubeVideo(youtube_id)
|
||||||
if not vid_handler.vid_dict:
|
if not vid_handler.vid_dict:
|
||||||
raise ValueError('failed to get metadata for ' + youtube_id)
|
raise ValueError("failed to get metadata for " + youtube_id)
|
||||||
|
|
||||||
channel_handler = YoutubeChannel(vid_handler.channel_id)
|
channel_handler = YoutubeChannel(vid_handler.channel_id)
|
||||||
# add filepath to vid_dict
|
# add filepath to vid_dict
|
||||||
channel_name = channel_handler.channel_dict['channel_name']
|
channel_name = channel_handler.channel_dict["channel_name"]
|
||||||
vid_handler.build_file_path(channel_name)
|
vid_handler.build_file_path(channel_name)
|
||||||
# add channel and player to video
|
# add channel and player to video
|
||||||
vid_handler.add_player(missing_vid)
|
vid_handler.add_player(missing_vid)
|
||||||
vid_handler.vid_dict['channel'] = channel_handler.channel_dict
|
vid_handler.vid_dict["channel"] = channel_handler.channel_dict
|
||||||
# add new channel to es
|
# add new channel to es
|
||||||
if channel_handler.source == 'scraped':
|
if channel_handler.source == "scraped":
|
||||||
channel_handler.channel_dict['channel_subscribed'] = False
|
channel_handler.channel_dict["channel_subscribed"] = False
|
||||||
channel_handler.upload_to_es()
|
channel_handler.upload_to_es()
|
||||||
# upload video to es
|
# upload video to es
|
||||||
vid_handler.upload_to_es()
|
vid_handler.upload_to_es()
|
||||||
|
@ -17,8 +17,8 @@ from home.src.config import AppConfig
|
|||||||
# expected mapping and settings
|
# expected mapping and settings
|
||||||
INDEX_CONFIG = [
|
INDEX_CONFIG = [
|
||||||
{
|
{
|
||||||
'index_name': 'channel',
|
"index_name": "channel",
|
||||||
'expected_map': {
|
"expected_map": {
|
||||||
"channel_id": {
|
"channel_id": {
|
||||||
"type": "keyword",
|
"type": "keyword",
|
||||||
},
|
},
|
||||||
@ -28,53 +28,34 @@ INDEX_CONFIG = [
|
|||||||
"keyword": {
|
"keyword": {
|
||||||
"type": "keyword",
|
"type": "keyword",
|
||||||
"ignore_above": 256,
|
"ignore_above": 256,
|
||||||
"normalizer": "to_lower"
|
"normalizer": "to_lower",
|
||||||
},
|
},
|
||||||
"search_as_you_type": {
|
"search_as_you_type": {
|
||||||
"type": "search_as_you_type",
|
"type": "search_as_you_type",
|
||||||
"doc_values": False,
|
"doc_values": False,
|
||||||
"max_shingle_size": 3
|
"max_shingle_size": 3,
|
||||||
}
|
},
|
||||||
}
|
},
|
||||||
},
|
},
|
||||||
"channel_banner_url": {
|
"channel_banner_url": {"type": "keyword", "index": False},
|
||||||
"type": "keyword",
|
"channel_thumb_url": {"type": "keyword", "index": False},
|
||||||
"index": False
|
"channel_description": {"type": "text"},
|
||||||
},
|
"channel_last_refresh": {"type": "date", "format": "epoch_second"},
|
||||||
"channel_thumb_url": {
|
|
||||||
"type": "keyword",
|
|
||||||
"index": False
|
|
||||||
},
|
|
||||||
"channel_description": {
|
|
||||||
"type": "text"
|
|
||||||
},
|
|
||||||
"channel_last_refresh": {
|
|
||||||
"type": "date",
|
|
||||||
"format": "epoch_second"
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
'expected_set': {
|
"expected_set": {
|
||||||
"analysis": {
|
"analysis": {
|
||||||
"normalizer": {
|
"normalizer": {
|
||||||
"to_lower": {
|
"to_lower": {"type": "custom", "filter": ["lowercase"]}
|
||||||
"type": "custom",
|
|
||||||
"filter": ["lowercase"]
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"number_of_replicas": "0"
|
"number_of_replicas": "0",
|
||||||
}
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'index_name': 'video',
|
"index_name": "video",
|
||||||
'expected_map': {
|
"expected_map": {
|
||||||
"vid_thumb_url": {
|
"vid_thumb_url": {"type": "text", "index": False},
|
||||||
"type": "text",
|
"date_downloaded": {"type": "date"},
|
||||||
"index": False
|
|
||||||
},
|
|
||||||
"date_downloaded": {
|
|
||||||
"type": "date"
|
|
||||||
},
|
|
||||||
"channel": {
|
"channel": {
|
||||||
"properties": {
|
"properties": {
|
||||||
"channel_id": {
|
"channel_id": {
|
||||||
@ -86,127 +67,92 @@ INDEX_CONFIG = [
|
|||||||
"keyword": {
|
"keyword": {
|
||||||
"type": "keyword",
|
"type": "keyword",
|
||||||
"ignore_above": 256,
|
"ignore_above": 256,
|
||||||
"normalizer": "to_lower"
|
"normalizer": "to_lower",
|
||||||
},
|
},
|
||||||
"search_as_you_type": {
|
"search_as_you_type": {
|
||||||
"type": "search_as_you_type",
|
"type": "search_as_you_type",
|
||||||
"doc_values": False,
|
"doc_values": False,
|
||||||
"max_shingle_size": 3
|
"max_shingle_size": 3,
|
||||||
}
|
},
|
||||||
}
|
},
|
||||||
},
|
|
||||||
"channel_banner_url": {
|
|
||||||
"type": "keyword",
|
|
||||||
"index": False
|
|
||||||
},
|
|
||||||
"channel_thumb_url": {
|
|
||||||
"type": "keyword",
|
|
||||||
"index": False
|
|
||||||
},
|
|
||||||
"channel_description": {
|
|
||||||
"type": "text"
|
|
||||||
},
|
},
|
||||||
|
"channel_banner_url": {"type": "keyword", "index": False},
|
||||||
|
"channel_thumb_url": {"type": "keyword", "index": False},
|
||||||
|
"channel_description": {"type": "text"},
|
||||||
"channel_last_refresh": {
|
"channel_last_refresh": {
|
||||||
"type": "date",
|
"type": "date",
|
||||||
"format": "epoch_second"
|
"format": "epoch_second",
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"description": {
|
"description": {"type": "text"},
|
||||||
"type": "text"
|
"media_url": {"type": "keyword", "index": False},
|
||||||
},
|
|
||||||
"media_url": {
|
|
||||||
"type": "keyword",
|
|
||||||
"index": False
|
|
||||||
},
|
|
||||||
"title": {
|
"title": {
|
||||||
"type": "text",
|
"type": "text",
|
||||||
"fields": {
|
"fields": {
|
||||||
"keyword": {
|
"keyword": {
|
||||||
"type": "keyword",
|
"type": "keyword",
|
||||||
"ignore_above": 256,
|
"ignore_above": 256,
|
||||||
"normalizer": "to_lower"
|
"normalizer": "to_lower",
|
||||||
},
|
},
|
||||||
"search_as_you_type": {
|
"search_as_you_type": {
|
||||||
"type": "search_as_you_type",
|
"type": "search_as_you_type",
|
||||||
"doc_values": False,
|
"doc_values": False,
|
||||||
"max_shingle_size": 3
|
"max_shingle_size": 3,
|
||||||
}
|
},
|
||||||
}
|
},
|
||||||
},
|
|
||||||
"vid_last_refresh": {
|
|
||||||
"type": "date"
|
|
||||||
},
|
|
||||||
"youtube_id": {
|
|
||||||
"type": "keyword"
|
|
||||||
},
|
|
||||||
"published": {
|
|
||||||
"type": "date"
|
|
||||||
},
|
},
|
||||||
|
"vid_last_refresh": {"type": "date"},
|
||||||
|
"youtube_id": {"type": "keyword"},
|
||||||
|
"published": {"type": "date"},
|
||||||
},
|
},
|
||||||
'expected_set': {
|
"expected_set": {
|
||||||
"analysis": {
|
"analysis": {
|
||||||
"normalizer": {
|
"normalizer": {
|
||||||
"to_lower": {
|
"to_lower": {"type": "custom", "filter": ["lowercase"]}
|
||||||
"type": "custom",
|
|
||||||
"filter": ["lowercase"]
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"number_of_replicas": "0"
|
"number_of_replicas": "0",
|
||||||
}
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'index_name': 'download',
|
"index_name": "download",
|
||||||
'expected_map': {
|
"expected_map": {
|
||||||
"timestamp": {
|
"timestamp": {"type": "date"},
|
||||||
"type": "date"
|
"channel_id": {"type": "keyword"},
|
||||||
},
|
|
||||||
"channel_id": {
|
|
||||||
"type": "keyword"
|
|
||||||
},
|
|
||||||
"channel_name": {
|
"channel_name": {
|
||||||
"type": "text",
|
"type": "text",
|
||||||
"fields": {
|
"fields": {
|
||||||
"keyword": {
|
"keyword": {
|
||||||
"type": "keyword",
|
"type": "keyword",
|
||||||
"ignore_above": 256,
|
"ignore_above": 256,
|
||||||
"normalizer": "to_lower"
|
"normalizer": "to_lower",
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
},
|
|
||||||
"status": {
|
|
||||||
"type": "keyword"
|
|
||||||
},
|
},
|
||||||
|
"status": {"type": "keyword"},
|
||||||
"title": {
|
"title": {
|
||||||
"type": "text",
|
"type": "text",
|
||||||
"fields": {
|
"fields": {
|
||||||
"keyword": {
|
"keyword": {
|
||||||
"type": "keyword",
|
"type": "keyword",
|
||||||
"ignore_above": 256,
|
"ignore_above": 256,
|
||||||
"normalizer": "to_lower"
|
"normalizer": "to_lower",
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
},
|
},
|
||||||
"vid_thumb_url": {
|
"vid_thumb_url": {"type": "keyword"},
|
||||||
"type": "keyword"
|
"youtube_id": {"type": "keyword"},
|
||||||
},
|
|
||||||
"youtube_id": {
|
|
||||||
"type": "keyword"
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
'expected_set': {
|
"expected_set": {
|
||||||
"analysis": {
|
"analysis": {
|
||||||
"normalizer": {
|
"normalizer": {
|
||||||
"to_lower": {
|
"to_lower": {"type": "custom", "filter": ["lowercase"]}
|
||||||
"type": "custom",
|
|
||||||
"filter": ["lowercase"]
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"number_of_replicas": "0"
|
"number_of_replicas": "0",
|
||||||
}
|
},
|
||||||
}
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@ -216,8 +162,8 @@ class ElasticIndex:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
CONFIG = AppConfig().config
|
CONFIG = AppConfig().config
|
||||||
ES_URL = CONFIG['application']['es_url']
|
ES_URL = CONFIG["application"]["es_url"]
|
||||||
HEADERS = {'Content-type': 'application/json'}
|
HEADERS = {"Content-type": "application/json"}
|
||||||
|
|
||||||
def __init__(self, index_name, expected_map, expected_set):
|
def __init__(self, index_name, expected_map, expected_set):
|
||||||
self.index_name = index_name
|
self.index_name = index_name
|
||||||
@ -226,14 +172,14 @@ class ElasticIndex:
|
|||||||
self.exists, self.details = self.index_exists()
|
self.exists, self.details = self.index_exists()
|
||||||
|
|
||||||
def index_exists(self):
|
def index_exists(self):
|
||||||
""" check if index already exists and return mapping if it does """
|
"""check if index already exists and return mapping if it does"""
|
||||||
index_name = self.index_name
|
index_name = self.index_name
|
||||||
url = f'{self.ES_URL}/ta_{index_name}'
|
url = f"{self.ES_URL}/ta_{index_name}"
|
||||||
response = requests.get(url)
|
response = requests.get(url)
|
||||||
exists = response.ok
|
exists = response.ok
|
||||||
|
|
||||||
if exists:
|
if exists:
|
||||||
details = response.json()[f'ta_{index_name}']
|
details = response.json()[f"ta_{index_name}"]
|
||||||
else:
|
else:
|
||||||
details = False
|
details = False
|
||||||
|
|
||||||
@ -258,19 +204,19 @@ class ElasticIndex:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def validate_mappings(self):
|
def validate_mappings(self):
|
||||||
""" check if all mappings are as expected """
|
"""check if all mappings are as expected"""
|
||||||
|
|
||||||
expected_map = self.expected_map
|
expected_map = self.expected_map
|
||||||
now_map = self.details['mappings']['properties']
|
now_map = self.details["mappings"]["properties"]
|
||||||
|
|
||||||
for key, value in expected_map.items():
|
for key, value in expected_map.items():
|
||||||
# nested
|
# nested
|
||||||
if list(value.keys()) == ['properties']:
|
if list(value.keys()) == ["properties"]:
|
||||||
for key_n, value_n in value['properties'].items():
|
for key_n, value_n in value["properties"].items():
|
||||||
if key_n not in now_map[key]['properties'].keys():
|
if key_n not in now_map[key]["properties"].keys():
|
||||||
print(key_n, value_n)
|
print(key_n, value_n)
|
||||||
return True
|
return True
|
||||||
if not value_n == now_map[key]['properties'][key_n]:
|
if not value_n == now_map[key]["properties"][key_n]:
|
||||||
print(key_n, value_n)
|
print(key_n, value_n)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@ -287,9 +233,9 @@ class ElasticIndex:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def validate_settings(self):
|
def validate_settings(self):
|
||||||
""" check if all settings are as expected """
|
"""check if all settings are as expected"""
|
||||||
|
|
||||||
now_set = self.details['settings']['index']
|
now_set = self.details["settings"]["index"]
|
||||||
|
|
||||||
for key, value in self.expected_set.items():
|
for key, value in self.expected_set.items():
|
||||||
if key not in now_set.keys():
|
if key not in now_set.keys():
|
||||||
@ -303,53 +249,46 @@ class ElasticIndex:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def rebuild_index(self):
|
def rebuild_index(self):
|
||||||
""" rebuild with new mapping """
|
"""rebuild with new mapping"""
|
||||||
# backup
|
# backup
|
||||||
self.reindex('backup')
|
self.reindex("backup")
|
||||||
# delete original
|
# delete original
|
||||||
self.delete_index(backup=False)
|
self.delete_index(backup=False)
|
||||||
# create new
|
# create new
|
||||||
self.create_blank()
|
self.create_blank()
|
||||||
self.reindex('restore')
|
self.reindex("restore")
|
||||||
# delete backup
|
# delete backup
|
||||||
self.delete_index()
|
self.delete_index()
|
||||||
|
|
||||||
def reindex(self, method):
|
def reindex(self, method):
|
||||||
""" create on elastic search """
|
"""create on elastic search"""
|
||||||
index_name = self.index_name
|
index_name = self.index_name
|
||||||
if method == 'backup':
|
if method == "backup":
|
||||||
source = f'ta_{index_name}'
|
source = f"ta_{index_name}"
|
||||||
destination = f'ta_{index_name}_backup'
|
destination = f"ta_{index_name}_backup"
|
||||||
elif method == 'restore':
|
elif method == "restore":
|
||||||
source = f'ta_{index_name}_backup'
|
source = f"ta_{index_name}_backup"
|
||||||
destination = f'ta_{index_name}'
|
destination = f"ta_{index_name}"
|
||||||
|
|
||||||
query = {
|
query = {"source": {"index": source}, "dest": {"index": destination}}
|
||||||
"source": {
|
|
||||||
"index": source
|
|
||||||
},
|
|
||||||
"dest": {
|
|
||||||
"index": destination
|
|
||||||
}
|
|
||||||
}
|
|
||||||
data = json.dumps(query)
|
data = json.dumps(query)
|
||||||
url = self.ES_URL + '/_reindex?refresh=true'
|
url = self.ES_URL + "/_reindex?refresh=true"
|
||||||
response = requests.post(url=url, data=data, headers=self.HEADERS)
|
response = requests.post(url=url, data=data, headers=self.HEADERS)
|
||||||
if not response.ok:
|
if not response.ok:
|
||||||
print(response.text)
|
print(response.text)
|
||||||
|
|
||||||
def delete_index(self, backup=True):
|
def delete_index(self, backup=True):
|
||||||
""" delete index passed as argument """
|
"""delete index passed as argument"""
|
||||||
if backup:
|
if backup:
|
||||||
url = f'{self.ES_URL}/ta_{self.index_name}_backup'
|
url = f"{self.ES_URL}/ta_{self.index_name}_backup"
|
||||||
else:
|
else:
|
||||||
url = f'{self.ES_URL}/ta_{self.index_name}'
|
url = f"{self.ES_URL}/ta_{self.index_name}"
|
||||||
response = requests.delete(url)
|
response = requests.delete(url)
|
||||||
if not response.ok:
|
if not response.ok:
|
||||||
print(response.text)
|
print(response.text)
|
||||||
|
|
||||||
def create_blank(self):
|
def create_blank(self):
|
||||||
""" apply new mapping and settings for blank new index """
|
"""apply new mapping and settings for blank new index"""
|
||||||
expected_map = self.expected_map
|
expected_map = self.expected_map
|
||||||
expected_set = self.expected_set
|
expected_set = self.expected_set
|
||||||
# stich payload
|
# stich payload
|
||||||
@ -359,7 +298,7 @@ class ElasticIndex:
|
|||||||
if expected_map:
|
if expected_map:
|
||||||
payload.update({"mappings": {"properties": expected_map}})
|
payload.update({"mappings": {"properties": expected_map}})
|
||||||
# create
|
# create
|
||||||
url = f'{self.ES_URL}/ta_{self.index_name}'
|
url = f"{self.ES_URL}/ta_{self.index_name}"
|
||||||
data = json.dumps(payload)
|
data = json.dumps(payload)
|
||||||
response = requests.put(url=url, data=data, headers=self.HEADERS)
|
response = requests.put(url=url, data=data, headers=self.HEADERS)
|
||||||
if not response.ok:
|
if not response.ok:
|
||||||
@ -367,103 +306,104 @@ class ElasticIndex:
|
|||||||
|
|
||||||
|
|
||||||
class ElasticBackup:
|
class ElasticBackup:
|
||||||
""" dump index to nd-json files for later bulk import """
|
"""dump index to nd-json files for later bulk import"""
|
||||||
|
|
||||||
def __init__(self, index_config):
|
def __init__(self, index_config):
|
||||||
self.config = AppConfig().config
|
self.config = AppConfig().config
|
||||||
self.index_config = index_config
|
self.index_config = index_config
|
||||||
self.timestamp = datetime.now().strftime('%Y%m%d')
|
self.timestamp = datetime.now().strftime("%Y%m%d")
|
||||||
self.backup_files = []
|
self.backup_files = []
|
||||||
|
|
||||||
def get_all_documents(self, index_name):
|
def get_all_documents(self, index_name):
|
||||||
""" export all documents of a single index """
|
"""export all documents of a single index"""
|
||||||
headers = {'Content-type': 'application/json'}
|
headers = {"Content-type": "application/json"}
|
||||||
es_url = self.config['application']['es_url']
|
es_url = self.config["application"]["es_url"]
|
||||||
# get PIT ID
|
# get PIT ID
|
||||||
url = f'{es_url}/ta_{index_name}/_pit?keep_alive=1m'
|
url = f"{es_url}/ta_{index_name}/_pit?keep_alive=1m"
|
||||||
response = requests.post(url)
|
response = requests.post(url)
|
||||||
json_data = json.loads(response.text)
|
json_data = json.loads(response.text)
|
||||||
pit_id = json_data['id']
|
pit_id = json_data["id"]
|
||||||
# build query
|
# build query
|
||||||
data = {
|
data = {
|
||||||
"query": {"match_all": {}},
|
"query": {"match_all": {}},
|
||||||
"size": 100, "pit": {"id": pit_id, "keep_alive": "1m"},
|
"size": 100,
|
||||||
"sort": [{"_id": {"order": "asc"}}]
|
"pit": {"id": pit_id, "keep_alive": "1m"},
|
||||||
|
"sort": [{"_id": {"order": "asc"}}],
|
||||||
}
|
}
|
||||||
query_str = json.dumps(data)
|
query_str = json.dumps(data)
|
||||||
url = es_url + '/_search'
|
url = es_url + "/_search"
|
||||||
# loop until nothing left
|
# loop until nothing left
|
||||||
all_results = []
|
all_results = []
|
||||||
while True:
|
while True:
|
||||||
response = requests.get(url, data=query_str, headers=headers)
|
response = requests.get(url, data=query_str, headers=headers)
|
||||||
json_data = json.loads(response.text)
|
json_data = json.loads(response.text)
|
||||||
all_hits = json_data['hits']['hits']
|
all_hits = json_data["hits"]["hits"]
|
||||||
if all_hits:
|
if all_hits:
|
||||||
for hit in all_hits:
|
for hit in all_hits:
|
||||||
search_after = hit['sort']
|
search_after = hit["sort"]
|
||||||
all_results.append(hit)
|
all_results.append(hit)
|
||||||
# update search_after with last hit data
|
# update search_after with last hit data
|
||||||
data['search_after'] = search_after
|
data["search_after"] = search_after
|
||||||
query_str = json.dumps(data)
|
query_str = json.dumps(data)
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
# clean up PIT
|
# clean up PIT
|
||||||
query_str = json.dumps({"id": pit_id})
|
query_str = json.dumps({"id": pit_id})
|
||||||
requests.delete(es_url + '/_pit', data=query_str, headers=headers)
|
requests.delete(es_url + "/_pit", data=query_str, headers=headers)
|
||||||
|
|
||||||
return all_results
|
return all_results
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def build_bulk(all_results):
|
def build_bulk(all_results):
|
||||||
""" build bulk query data from all_results """
|
"""build bulk query data from all_results"""
|
||||||
bulk_list = []
|
bulk_list = []
|
||||||
|
|
||||||
for document in all_results:
|
for document in all_results:
|
||||||
document_id = document['_id']
|
document_id = document["_id"]
|
||||||
es_index = document['_index']
|
es_index = document["_index"]
|
||||||
action = {"index": {"_index": es_index, "_id": document_id}}
|
action = {"index": {"_index": es_index, "_id": document_id}}
|
||||||
source = document['_source']
|
source = document["_source"]
|
||||||
bulk_list.append(json.dumps(action))
|
bulk_list.append(json.dumps(action))
|
||||||
bulk_list.append(json.dumps(source))
|
bulk_list.append(json.dumps(source))
|
||||||
|
|
||||||
# add last newline
|
# add last newline
|
||||||
bulk_list.append('\n')
|
bulk_list.append("\n")
|
||||||
file_content = '\n'.join(bulk_list)
|
file_content = "\n".join(bulk_list)
|
||||||
|
|
||||||
return file_content
|
return file_content
|
||||||
|
|
||||||
def write_es_json(self, file_content, index_name):
|
def write_es_json(self, file_content, index_name):
|
||||||
""" write nd-json file for es _bulk API to disk """
|
"""write nd-json file for es _bulk API to disk"""
|
||||||
cache_dir = self.config['application']['cache_dir']
|
cache_dir = self.config["application"]["cache_dir"]
|
||||||
file_name = f'es_{index_name}-{self.timestamp}.json'
|
file_name = f"es_{index_name}-{self.timestamp}.json"
|
||||||
file_path = os.path.join(cache_dir, 'backup', file_name)
|
file_path = os.path.join(cache_dir, "backup", file_name)
|
||||||
with open(file_path, 'w', encoding='utf-8') as f:
|
with open(file_path, "w", encoding="utf-8") as f:
|
||||||
f.write(file_content)
|
f.write(file_content)
|
||||||
|
|
||||||
self.backup_files.append(file_path)
|
self.backup_files.append(file_path)
|
||||||
|
|
||||||
def write_ta_json(self, all_results, index_name):
|
def write_ta_json(self, all_results, index_name):
|
||||||
""" write generic json file to disk """
|
"""write generic json file to disk"""
|
||||||
cache_dir = self.config['application']['cache_dir']
|
cache_dir = self.config["application"]["cache_dir"]
|
||||||
file_name = f'ta_{index_name}-{self.timestamp}.json'
|
file_name = f"ta_{index_name}-{self.timestamp}.json"
|
||||||
file_path = os.path.join(cache_dir, 'backup', file_name)
|
file_path = os.path.join(cache_dir, "backup", file_name)
|
||||||
to_write = [i['_source'] for i in all_results]
|
to_write = [i["_source"] for i in all_results]
|
||||||
file_content = json.dumps(to_write)
|
file_content = json.dumps(to_write)
|
||||||
with open(file_path, 'w', encoding='utf-8') as f:
|
with open(file_path, "w", encoding="utf-8") as f:
|
||||||
f.write(file_content)
|
f.write(file_content)
|
||||||
|
|
||||||
self.backup_files.append(file_path)
|
self.backup_files.append(file_path)
|
||||||
|
|
||||||
def zip_it(self):
|
def zip_it(self):
|
||||||
""" pack it up into single zip file """
|
"""pack it up into single zip file"""
|
||||||
cache_dir = self.config['application']['cache_dir']
|
cache_dir = self.config["application"]["cache_dir"]
|
||||||
file_name = f'ta_backup-{self.timestamp}.zip'
|
file_name = f"ta_backup-{self.timestamp}.zip"
|
||||||
backup_folder = os.path.join(cache_dir, 'backup')
|
backup_folder = os.path.join(cache_dir, "backup")
|
||||||
backup_file = os.path.join(backup_folder, file_name)
|
backup_file = os.path.join(backup_folder, file_name)
|
||||||
|
|
||||||
with zipfile.ZipFile(
|
with zipfile.ZipFile(
|
||||||
backup_file, 'w', compression=zipfile.ZIP_DEFLATED
|
backup_file, "w", compression=zipfile.ZIP_DEFLATED
|
||||||
) as zip_f:
|
) as zip_f:
|
||||||
for backup_file in self.backup_files:
|
for backup_file in self.backup_files:
|
||||||
zip_f.write(backup_file, os.path.basename(backup_file))
|
zip_f.write(backup_file, os.path.basename(backup_file))
|
||||||
|
|
||||||
@ -472,66 +412,67 @@ class ElasticBackup:
|
|||||||
os.remove(backup_file)
|
os.remove(backup_file)
|
||||||
|
|
||||||
def post_bulk_restore(self, file_name):
|
def post_bulk_restore(self, file_name):
|
||||||
""" send bulk to es """
|
"""send bulk to es"""
|
||||||
cache_dir = self.config['application']['cache_dir']
|
cache_dir = self.config["application"]["cache_dir"]
|
||||||
es_url = self.config['application']['es_url']
|
es_url = self.config["application"]["es_url"]
|
||||||
headers = {'Content-type': 'application/x-ndjson'}
|
headers = {"Content-type": "application/x-ndjson"}
|
||||||
file_path = os.path.join(cache_dir, file_name)
|
file_path = os.path.join(cache_dir, file_name)
|
||||||
|
|
||||||
with open(file_path, 'r', encoding='utf-8') as f:
|
with open(file_path, "r", encoding="utf-8") as f:
|
||||||
query_str = f.read()
|
query_str = f.read()
|
||||||
|
|
||||||
if not query_str.strip():
|
if not query_str.strip():
|
||||||
return
|
return
|
||||||
|
|
||||||
url = es_url + '/_bulk'
|
url = es_url + "/_bulk"
|
||||||
request = requests.post(url, data=query_str, headers=headers)
|
request = requests.post(url, data=query_str, headers=headers)
|
||||||
if not request.ok:
|
if not request.ok:
|
||||||
print(request.text)
|
print(request.text)
|
||||||
|
|
||||||
def unpack_zip_backup(self):
|
def unpack_zip_backup(self):
|
||||||
""" extract backup zip and return filelist """
|
"""extract backup zip and return filelist"""
|
||||||
cache_dir = self.config['application']['cache_dir']
|
cache_dir = self.config["application"]["cache_dir"]
|
||||||
backup_dir = os.path.join(cache_dir, 'backup')
|
backup_dir = os.path.join(cache_dir, "backup")
|
||||||
all_available_backups = [
|
all_available_backups = [
|
||||||
i for i in os.listdir(backup_dir) if
|
i
|
||||||
i.startswith('ta_') and i.endswith('.zip')
|
for i in os.listdir(backup_dir)
|
||||||
|
if i.startswith("ta_") and i.endswith(".zip")
|
||||||
]
|
]
|
||||||
all_available_backups.sort()
|
all_available_backups.sort()
|
||||||
newest_backup = all_available_backups[-1]
|
newest_backup = all_available_backups[-1]
|
||||||
file_path = os.path.join(backup_dir, newest_backup)
|
file_path = os.path.join(backup_dir, newest_backup)
|
||||||
|
|
||||||
with zipfile.ZipFile(file_path, 'r') as z:
|
with zipfile.ZipFile(file_path, "r") as z:
|
||||||
zip_content = z.namelist()
|
zip_content = z.namelist()
|
||||||
z.extractall(backup_dir)
|
z.extractall(backup_dir)
|
||||||
|
|
||||||
return zip_content
|
return zip_content
|
||||||
|
|
||||||
def restore_json_files(self, zip_content):
|
def restore_json_files(self, zip_content):
|
||||||
""" go through the unpacked files and restore """
|
"""go through the unpacked files and restore"""
|
||||||
|
|
||||||
cache_dir = self.config['application']['cache_dir']
|
cache_dir = self.config["application"]["cache_dir"]
|
||||||
backup_dir = os.path.join(cache_dir, 'backup')
|
backup_dir = os.path.join(cache_dir, "backup")
|
||||||
|
|
||||||
for json_f in zip_content:
|
for json_f in zip_content:
|
||||||
|
|
||||||
file_name = os.path.join(backup_dir, json_f)
|
file_name = os.path.join(backup_dir, json_f)
|
||||||
|
|
||||||
if not json_f.startswith('es_') or not json_f.endswith('.json'):
|
if not json_f.startswith("es_") or not json_f.endswith(".json"):
|
||||||
os.remove(file_name)
|
os.remove(file_name)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
print('restoring: ' + json_f)
|
print("restoring: " + json_f)
|
||||||
self.post_bulk_restore(file_name)
|
self.post_bulk_restore(file_name)
|
||||||
os.remove(file_name)
|
os.remove(file_name)
|
||||||
|
|
||||||
|
|
||||||
def backup_all_indexes():
|
def backup_all_indexes():
|
||||||
""" backup all es indexes to disk """
|
"""backup all es indexes to disk"""
|
||||||
backup_handler = ElasticBackup(INDEX_CONFIG)
|
backup_handler = ElasticBackup(INDEX_CONFIG)
|
||||||
|
|
||||||
for index in backup_handler.index_config:
|
for index in backup_handler.index_config:
|
||||||
index_name = index['index_name']
|
index_name = index["index_name"]
|
||||||
all_results = backup_handler.get_all_documents(index_name)
|
all_results = backup_handler.get_all_documents(index_name)
|
||||||
file_content = backup_handler.build_bulk(all_results)
|
file_content = backup_handler.build_bulk(all_results)
|
||||||
backup_handler.write_es_json(file_content, index_name)
|
backup_handler.write_es_json(file_content, index_name)
|
||||||
@ -541,7 +482,7 @@ def backup_all_indexes():
|
|||||||
|
|
||||||
|
|
||||||
def restore_from_backup():
|
def restore_from_backup():
|
||||||
""" restore indexes from backup file """
|
"""restore indexes from backup file"""
|
||||||
# delete
|
# delete
|
||||||
index_check(force_restore=True)
|
index_check(force_restore=True)
|
||||||
# recreate
|
# recreate
|
||||||
@ -551,14 +492,14 @@ def restore_from_backup():
|
|||||||
|
|
||||||
|
|
||||||
def index_check(force_restore=False):
|
def index_check(force_restore=False):
|
||||||
""" check if all indexes are created and have correct mapping """
|
"""check if all indexes are created and have correct mapping"""
|
||||||
|
|
||||||
backed_up = False
|
backed_up = False
|
||||||
|
|
||||||
for index in INDEX_CONFIG:
|
for index in INDEX_CONFIG:
|
||||||
index_name = index['index_name']
|
index_name = index["index_name"]
|
||||||
expected_map = index['expected_map']
|
expected_map = index["expected_map"]
|
||||||
expected_set = index['expected_set']
|
expected_set = index["expected_set"]
|
||||||
handler = ElasticIndex(index_name, expected_map, expected_set)
|
handler = ElasticIndex(index_name, expected_map, expected_set)
|
||||||
# force restore
|
# force restore
|
||||||
if force_restore:
|
if force_restore:
|
||||||
@ -568,7 +509,7 @@ def index_check(force_restore=False):
|
|||||||
|
|
||||||
# create new
|
# create new
|
||||||
if not handler.exists:
|
if not handler.exists:
|
||||||
print(f'create new blank index with name ta_{index_name}...')
|
print(f"create new blank index with name ta_{index_name}...")
|
||||||
handler.create_blank()
|
handler.create_blank()
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@ -577,13 +518,13 @@ def index_check(force_restore=False):
|
|||||||
if rebuild:
|
if rebuild:
|
||||||
# make backup before rebuild
|
# make backup before rebuild
|
||||||
if not backed_up:
|
if not backed_up:
|
||||||
print('running backup first')
|
print("running backup first")
|
||||||
backup_all_indexes()
|
backup_all_indexes()
|
||||||
backed_up = True
|
backed_up = True
|
||||||
|
|
||||||
print(f'applying new mappings to index ta_{index_name}...')
|
print(f"applying new mappings to index ta_{index_name}...")
|
||||||
handler.rebuild_index()
|
handler.rebuild_index()
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# else all good
|
# else all good
|
||||||
print(f'ta_{index_name} index is created and up to date...')
|
print(f"ta_{index_name} index is created and up to date...")
|
||||||
|
@ -17,19 +17,23 @@ from time import sleep
|
|||||||
import requests
|
import requests
|
||||||
from home.src.config import AppConfig
|
from home.src.config import AppConfig
|
||||||
from home.src.download import ChannelSubscription, PendingList, VideoDownloader
|
from home.src.download import ChannelSubscription, PendingList, VideoDownloader
|
||||||
from home.src.helper import (clean_string, get_message, get_total_hits,
|
from home.src.helper import (
|
||||||
set_message)
|
clean_string,
|
||||||
|
get_message,
|
||||||
|
get_total_hits,
|
||||||
|
set_message,
|
||||||
|
)
|
||||||
from home.src.index import YoutubeChannel, YoutubeVideo, index_new_video
|
from home.src.index import YoutubeChannel, YoutubeVideo, index_new_video
|
||||||
|
|
||||||
|
|
||||||
class Reindex:
|
class Reindex:
|
||||||
""" check for outdated documents and refresh data from youtube """
|
"""check for outdated documents and refresh data from youtube"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
# config
|
# config
|
||||||
config = AppConfig().config
|
config = AppConfig().config
|
||||||
self.sleep_interval = config['downloads']['sleep_interval']
|
self.sleep_interval = config["downloads"]["sleep_interval"]
|
||||||
self.es_url = config['application']['es_url']
|
self.es_url = config["application"]["es_url"]
|
||||||
self.refresh_interval = 90
|
self.refresh_interval = 90
|
||||||
# scan
|
# scan
|
||||||
self.video_daily, self.channel_daily = self.get_daily()
|
self.video_daily, self.channel_daily = self.get_daily()
|
||||||
@ -37,20 +41,18 @@ class Reindex:
|
|||||||
self.all_channel_ids = False
|
self.all_channel_ids = False
|
||||||
|
|
||||||
def get_daily(self):
|
def get_daily(self):
|
||||||
""" get daily refresh values """
|
"""get daily refresh values"""
|
||||||
total_videos = get_total_hits(
|
total_videos = get_total_hits("ta_video", self.es_url, "active")
|
||||||
'ta_video', self.es_url, 'active'
|
|
||||||
)
|
|
||||||
video_daily = ceil(total_videos / self.refresh_interval * 1.2)
|
video_daily = ceil(total_videos / self.refresh_interval * 1.2)
|
||||||
total_channels = get_total_hits(
|
total_channels = get_total_hits(
|
||||||
'ta_channel', self.es_url, 'channel_active'
|
"ta_channel", self.es_url, "channel_active"
|
||||||
)
|
)
|
||||||
channel_daily = ceil(total_channels / self.refresh_interval * 1.2)
|
channel_daily = ceil(total_channels / self.refresh_interval * 1.2)
|
||||||
return (video_daily, channel_daily)
|
return (video_daily, channel_daily)
|
||||||
|
|
||||||
def get_outdated_vids(self):
|
def get_outdated_vids(self):
|
||||||
""" get daily videos to refresh """
|
"""get daily videos to refresh"""
|
||||||
headers = {'Content-type': 'application/json'}
|
headers = {"Content-type": "application/json"}
|
||||||
now = int(datetime.now().strftime("%s"))
|
now = int(datetime.now().strftime("%s"))
|
||||||
now_3m = now - 3 * 30 * 24 * 60 * 60
|
now_3m = now - 3 * 30 * 24 * 60 * 60
|
||||||
size = self.video_daily
|
size = self.video_daily
|
||||||
@ -60,24 +62,25 @@ class Reindex:
|
|||||||
"bool": {
|
"bool": {
|
||||||
"must": [
|
"must": [
|
||||||
{"match": {"active": True}},
|
{"match": {"active": True}},
|
||||||
{"range": {"vid_last_refresh": {"lte": now_3m}}}
|
{"range": {"vid_last_refresh": {"lte": now_3m}}},
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"sort": [{"vid_last_refresh": {"order": "asc"}}], "_source": False
|
"sort": [{"vid_last_refresh": {"order": "asc"}}],
|
||||||
|
"_source": False,
|
||||||
}
|
}
|
||||||
query_str = json.dumps(data)
|
query_str = json.dumps(data)
|
||||||
url = self.es_url + '/ta_video/_search'
|
url = self.es_url + "/ta_video/_search"
|
||||||
response = requests.get(url, data=query_str, headers=headers)
|
response = requests.get(url, data=query_str, headers=headers)
|
||||||
if not response.ok:
|
if not response.ok:
|
||||||
print(response.text)
|
print(response.text)
|
||||||
response_dict = json.loads(response.text)
|
response_dict = json.loads(response.text)
|
||||||
all_youtube_ids = [i['_id'] for i in response_dict['hits']['hits']]
|
all_youtube_ids = [i["_id"] for i in response_dict["hits"]["hits"]]
|
||||||
return all_youtube_ids
|
return all_youtube_ids
|
||||||
|
|
||||||
def get_outdated_channels(self):
|
def get_outdated_channels(self):
|
||||||
""" get daily channels to refresh """
|
"""get daily channels to refresh"""
|
||||||
headers = {'Content-type': 'application/json'}
|
headers = {"Content-type": "application/json"}
|
||||||
now = int(datetime.now().strftime("%s"))
|
now = int(datetime.now().strftime("%s"))
|
||||||
now_3m = now - 3 * 30 * 24 * 60 * 60
|
now_3m = now - 3 * 30 * 24 * 60 * 60
|
||||||
size = self.channel_daily
|
size = self.channel_daily
|
||||||
@ -87,52 +90,50 @@ class Reindex:
|
|||||||
"bool": {
|
"bool": {
|
||||||
"must": [
|
"must": [
|
||||||
{"match": {"channel_active": True}},
|
{"match": {"channel_active": True}},
|
||||||
{"range": {"channel_last_refresh": {"lte": now_3m}}}
|
{"range": {"channel_last_refresh": {"lte": now_3m}}},
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"sort": [{"channel_last_refresh": {"order": "asc"}}],
|
"sort": [{"channel_last_refresh": {"order": "asc"}}],
|
||||||
"_source": False
|
"_source": False,
|
||||||
}
|
}
|
||||||
query_str = json.dumps(data)
|
query_str = json.dumps(data)
|
||||||
url = self.es_url + '/ta_channel/_search'
|
url = self.es_url + "/ta_channel/_search"
|
||||||
response = requests.get(url, data=query_str, headers=headers)
|
response = requests.get(url, data=query_str, headers=headers)
|
||||||
if not response.ok:
|
if not response.ok:
|
||||||
print(response.text)
|
print(response.text)
|
||||||
response_dict = json.loads(response.text)
|
response_dict = json.loads(response.text)
|
||||||
all_channel_ids = [i['_id'] for i in response_dict['hits']['hits']]
|
all_channel_ids = [i["_id"] for i in response_dict["hits"]["hits"]]
|
||||||
return all_channel_ids
|
return all_channel_ids
|
||||||
|
|
||||||
def check_outdated(self):
|
def check_outdated(self):
|
||||||
""" add missing vids and channels """
|
"""add missing vids and channels"""
|
||||||
self.all_youtube_ids = self.get_outdated_vids()
|
self.all_youtube_ids = self.get_outdated_vids()
|
||||||
self.all_channel_ids = self.get_outdated_channels()
|
self.all_channel_ids = self.get_outdated_channels()
|
||||||
|
|
||||||
def rescrape_all_channels(self):
|
def rescrape_all_channels(self):
|
||||||
""" sync new data from channel to all matching videos """
|
"""sync new data from channel to all matching videos"""
|
||||||
sleep_interval = self.sleep_interval
|
sleep_interval = self.sleep_interval
|
||||||
channel_sub_handler = ChannelSubscription()
|
channel_sub_handler = ChannelSubscription()
|
||||||
all_channels = channel_sub_handler.get_channels(
|
all_channels = channel_sub_handler.get_channels(subscribed_only=False)
|
||||||
subscribed_only=False
|
all_channel_ids = [i["channel_id"] for i in all_channels]
|
||||||
)
|
|
||||||
all_channel_ids = [i['channel_id'] for i in all_channels]
|
|
||||||
|
|
||||||
counter = 1
|
counter = 1
|
||||||
for channel_id in all_channel_ids:
|
for channel_id in all_channel_ids:
|
||||||
message = f'Progress: {counter}/{len(all_channels)}'
|
message = f"Progress: {counter}/{len(all_channels)}"
|
||||||
mess_dict = {
|
mess_dict = {
|
||||||
"status": "scraping",
|
"status": "scraping",
|
||||||
"level": "info",
|
"level": "info",
|
||||||
"title": "Scraping all youtube channels",
|
"title": "Scraping all youtube channels",
|
||||||
"message": message
|
"message": message,
|
||||||
}
|
}
|
||||||
set_message('progress:download', mess_dict)
|
set_message("progress:download", mess_dict)
|
||||||
channel_index = YoutubeChannel(channel_id)
|
channel_index = YoutubeChannel(channel_id)
|
||||||
subscribed = channel_index.channel_dict['channel_subscribed']
|
subscribed = channel_index.channel_dict["channel_subscribed"]
|
||||||
channel_index.channel_dict = channel_index.build_channel_dict(
|
channel_index.channel_dict = channel_index.build_channel_dict(
|
||||||
scrape=True
|
scrape=True
|
||||||
)
|
)
|
||||||
channel_index.channel_dict['channel_subscribed'] = subscribed
|
channel_index.channel_dict["channel_subscribed"] = subscribed
|
||||||
channel_index.upload_to_es()
|
channel_index.upload_to_es()
|
||||||
channel_index.sync_to_videos()
|
channel_index.sync_to_videos()
|
||||||
counter = counter + 1
|
counter = counter + 1
|
||||||
@ -141,7 +142,7 @@ class Reindex:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def reindex_single_video(youtube_id):
|
def reindex_single_video(youtube_id):
|
||||||
""" refresh data for single video """
|
"""refresh data for single video"""
|
||||||
vid_handler = YoutubeVideo(youtube_id)
|
vid_handler = YoutubeVideo(youtube_id)
|
||||||
if not vid_handler.vid_dict:
|
if not vid_handler.vid_dict:
|
||||||
# stop if deactivated
|
# stop if deactivated
|
||||||
@ -149,42 +150,42 @@ class Reindex:
|
|||||||
return
|
return
|
||||||
|
|
||||||
es_vid_dict = vid_handler.get_es_data()
|
es_vid_dict = vid_handler.get_es_data()
|
||||||
player = es_vid_dict['_source']['player']
|
player = es_vid_dict["_source"]["player"]
|
||||||
date_downloaded = es_vid_dict['_source']['date_downloaded']
|
date_downloaded = es_vid_dict["_source"]["date_downloaded"]
|
||||||
channel_dict = es_vid_dict['_source']['channel']
|
channel_dict = es_vid_dict["_source"]["channel"]
|
||||||
channel_name = channel_dict['channel_name']
|
channel_name = channel_dict["channel_name"]
|
||||||
vid_handler.build_file_path(channel_name)
|
vid_handler.build_file_path(channel_name)
|
||||||
# add to vid_dict
|
# add to vid_dict
|
||||||
vid_handler.vid_dict['player'] = player
|
vid_handler.vid_dict["player"] = player
|
||||||
vid_handler.vid_dict['date_downloaded'] = date_downloaded
|
vid_handler.vid_dict["date_downloaded"] = date_downloaded
|
||||||
vid_handler.vid_dict['channel'] = channel_dict
|
vid_handler.vid_dict["channel"] = channel_dict
|
||||||
# update
|
# update
|
||||||
vid_handler.upload_to_es()
|
vid_handler.upload_to_es()
|
||||||
vid_handler.delete_cache()
|
vid_handler.delete_cache()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def reindex_single_channel(channel_id):
|
def reindex_single_channel(channel_id):
|
||||||
""" refresh channel data and sync to videos """
|
"""refresh channel data and sync to videos"""
|
||||||
channel_handler = YoutubeChannel(channel_id)
|
channel_handler = YoutubeChannel(channel_id)
|
||||||
subscribed = channel_handler.channel_dict['channel_subscribed']
|
subscribed = channel_handler.channel_dict["channel_subscribed"]
|
||||||
channel_handler.channel_dict = channel_handler.build_channel_dict(
|
channel_handler.channel_dict = channel_handler.build_channel_dict(
|
||||||
scrape=True
|
scrape=True
|
||||||
)
|
)
|
||||||
channel_handler.channel_dict['channel_subscribed'] = subscribed
|
channel_handler.channel_dict["channel_subscribed"] = subscribed
|
||||||
channel_handler.upload_to_es()
|
channel_handler.upload_to_es()
|
||||||
channel_handler.sync_to_videos()
|
channel_handler.sync_to_videos()
|
||||||
channel_handler.clear_cache()
|
channel_handler.clear_cache()
|
||||||
|
|
||||||
def reindex(self):
|
def reindex(self):
|
||||||
""" reindex what's needed """
|
"""reindex what's needed"""
|
||||||
# videos
|
# videos
|
||||||
print(f'reindexing {len(self.all_youtube_ids)} videos')
|
print(f"reindexing {len(self.all_youtube_ids)} videos")
|
||||||
for youtube_id in self.all_youtube_ids:
|
for youtube_id in self.all_youtube_ids:
|
||||||
self.reindex_single_video(youtube_id)
|
self.reindex_single_video(youtube_id)
|
||||||
if self.sleep_interval:
|
if self.sleep_interval:
|
||||||
sleep(self.sleep_interval)
|
sleep(self.sleep_interval)
|
||||||
# channels
|
# channels
|
||||||
print(f'reindexing {len(self.all_channel_ids)} channels')
|
print(f"reindexing {len(self.all_channel_ids)} channels")
|
||||||
for channel_id in self.all_channel_ids:
|
for channel_id in self.all_channel_ids:
|
||||||
self.reindex_single_channel(channel_id)
|
self.reindex_single_channel(channel_id)
|
||||||
if self.sleep_interval:
|
if self.sleep_interval:
|
||||||
@ -192,11 +193,11 @@ class Reindex:
|
|||||||
|
|
||||||
|
|
||||||
class FilesystemScanner:
|
class FilesystemScanner:
|
||||||
""" handle scanning and fixing from filesystem """
|
"""handle scanning and fixing from filesystem"""
|
||||||
|
|
||||||
CONFIG = AppConfig().config
|
CONFIG = AppConfig().config
|
||||||
ES_URL = CONFIG['application']['es_url']
|
ES_URL = CONFIG["application"]["es_url"]
|
||||||
VIDEOS = CONFIG['application']['videos']
|
VIDEOS = CONFIG["application"]["videos"]
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.all_downloaded = self.get_all_downloaded()
|
self.all_downloaded = self.get_all_downloaded()
|
||||||
@ -207,7 +208,7 @@ class FilesystemScanner:
|
|||||||
self.to_delete = None
|
self.to_delete = None
|
||||||
|
|
||||||
def get_all_downloaded(self):
|
def get_all_downloaded(self):
|
||||||
""" get a list of all video files downloaded """
|
"""get a list of all video files downloaded"""
|
||||||
all_channels = os.listdir(self.VIDEOS)
|
all_channels = os.listdir(self.VIDEOS)
|
||||||
all_channels.sort()
|
all_channels.sort()
|
||||||
all_downloaded = []
|
all_downloaded = []
|
||||||
@ -221,26 +222,26 @@ class FilesystemScanner:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_all_indexed():
|
def get_all_indexed():
|
||||||
""" get a list of all indexed videos """
|
"""get a list of all indexed videos"""
|
||||||
index_handler = PendingList()
|
index_handler = PendingList()
|
||||||
all_indexed_raw = index_handler.get_all_indexed()
|
all_indexed_raw = index_handler.get_all_indexed()
|
||||||
all_indexed = []
|
all_indexed = []
|
||||||
for video in all_indexed_raw:
|
for video in all_indexed_raw:
|
||||||
youtube_id = video['_id']
|
youtube_id = video["_id"]
|
||||||
media_url = video['_source']['media_url']
|
media_url = video["_source"]["media_url"]
|
||||||
published = video['_source']['published']
|
published = video["_source"]["published"]
|
||||||
title = video['_source']['title']
|
title = video["_source"]["title"]
|
||||||
all_indexed.append((youtube_id, media_url, published, title))
|
all_indexed.append((youtube_id, media_url, published, title))
|
||||||
return all_indexed
|
return all_indexed
|
||||||
|
|
||||||
def list_comarison(self):
|
def list_comarison(self):
|
||||||
""" compare the lists to figure out what to do """
|
"""compare the lists to figure out what to do"""
|
||||||
self.find_unindexed()
|
self.find_unindexed()
|
||||||
self.find_missing()
|
self.find_missing()
|
||||||
self.find_bad_media_url()
|
self.find_bad_media_url()
|
||||||
|
|
||||||
def find_unindexed(self):
|
def find_unindexed(self):
|
||||||
""" find video files without a matching document indexed """
|
"""find video files without a matching document indexed"""
|
||||||
all_indexed_ids = [i[0] for i in self.all_indexed]
|
all_indexed_ids = [i[0] for i in self.all_indexed]
|
||||||
to_index = []
|
to_index = []
|
||||||
for downloaded in self.all_downloaded:
|
for downloaded in self.all_downloaded:
|
||||||
@ -250,7 +251,7 @@ class FilesystemScanner:
|
|||||||
self.to_index = to_index
|
self.to_index = to_index
|
||||||
|
|
||||||
def find_missing(self):
|
def find_missing(self):
|
||||||
""" find indexed videos without matching media file """
|
"""find indexed videos without matching media file"""
|
||||||
all_downloaded_ids = [i[2] for i in self.all_downloaded]
|
all_downloaded_ids = [i[2] for i in self.all_downloaded]
|
||||||
to_delete = []
|
to_delete = []
|
||||||
for video in self.all_indexed:
|
for video in self.all_indexed:
|
||||||
@ -261,7 +262,7 @@ class FilesystemScanner:
|
|||||||
self.to_delete = to_delete
|
self.to_delete = to_delete
|
||||||
|
|
||||||
def find_bad_media_url(self):
|
def find_bad_media_url(self):
|
||||||
""" rename media files not matching the indexed title """
|
"""rename media files not matching the indexed title"""
|
||||||
to_fix = []
|
to_fix = []
|
||||||
to_rename = []
|
to_rename = []
|
||||||
for downloaded in self.all_downloaded:
|
for downloaded in self.all_downloaded:
|
||||||
@ -272,8 +273,8 @@ class FilesystemScanner:
|
|||||||
if indexed_id == downloaded_id:
|
if indexed_id == downloaded_id:
|
||||||
# found it
|
# found it
|
||||||
title_c = clean_string(title)
|
title_c = clean_string(title)
|
||||||
pub = published.replace('-', '')
|
pub = published.replace("-", "")
|
||||||
expected_filename = f'{pub}_{indexed_id}_{title_c}.mp4'
|
expected_filename = f"{pub}_{indexed_id}_{title_c}.mp4"
|
||||||
new_url = os.path.join(channel, expected_filename)
|
new_url = os.path.join(channel, expected_filename)
|
||||||
if expected_filename != filename:
|
if expected_filename != filename:
|
||||||
# file to rename
|
# file to rename
|
||||||
@ -290,7 +291,7 @@ class FilesystemScanner:
|
|||||||
self.to_rename = to_rename
|
self.to_rename = to_rename
|
||||||
|
|
||||||
def rename_files(self):
|
def rename_files(self):
|
||||||
""" rename media files as identified by find_bad_media_url """
|
"""rename media files as identified by find_bad_media_url"""
|
||||||
for bad_filename in self.to_rename:
|
for bad_filename in self.to_rename:
|
||||||
channel, filename, expected_filename = bad_filename
|
channel, filename, expected_filename = bad_filename
|
||||||
old_path = os.path.join(self.VIDEOS, channel, filename)
|
old_path = os.path.join(self.VIDEOS, channel, filename)
|
||||||
@ -298,71 +299,72 @@ class FilesystemScanner:
|
|||||||
os.rename(old_path, new_path)
|
os.rename(old_path, new_path)
|
||||||
|
|
||||||
def send_mismatch_bulk(self):
|
def send_mismatch_bulk(self):
|
||||||
""" build bulk update """
|
"""build bulk update"""
|
||||||
bulk_list = []
|
bulk_list = []
|
||||||
for video_mismatch in self.mismatch:
|
for video_mismatch in self.mismatch:
|
||||||
youtube_id, media_url = video_mismatch
|
youtube_id, media_url = video_mismatch
|
||||||
action = {"update": {"_id": youtube_id, "_index": 'ta_video'}}
|
action = {"update": {"_id": youtube_id, "_index": "ta_video"}}
|
||||||
source = {"doc": {"media_url": media_url}}
|
source = {"doc": {"media_url": media_url}}
|
||||||
bulk_list.append(json.dumps(action))
|
bulk_list.append(json.dumps(action))
|
||||||
bulk_list.append(json.dumps(source))
|
bulk_list.append(json.dumps(source))
|
||||||
# add last newline
|
# add last newline
|
||||||
bulk_list.append('\n')
|
bulk_list.append("\n")
|
||||||
query_str = '\n'.join(bulk_list)
|
query_str = "\n".join(bulk_list)
|
||||||
# make the call
|
# make the call
|
||||||
headers = {'Content-type': 'application/x-ndjson'}
|
headers = {"Content-type": "application/x-ndjson"}
|
||||||
url = self.ES_URL + '/_bulk'
|
url = self.ES_URL + "/_bulk"
|
||||||
request = requests.post(url, data=query_str, headers=headers)
|
request = requests.post(url, data=query_str, headers=headers)
|
||||||
if not request.ok:
|
if not request.ok:
|
||||||
print(request.text)
|
print(request.text)
|
||||||
|
|
||||||
def delete_from_index(self):
|
def delete_from_index(self):
|
||||||
""" find indexed but deleted mediafile """
|
"""find indexed but deleted mediafile"""
|
||||||
for indexed in self.to_delete:
|
for indexed in self.to_delete:
|
||||||
youtube_id, _ = indexed
|
youtube_id, _ = indexed
|
||||||
url = self.ES_URL + '/ta_video/_doc/' + youtube_id
|
url = self.ES_URL + "/ta_video/_doc/" + youtube_id
|
||||||
request = requests.delete(url)
|
request = requests.delete(url)
|
||||||
if not request.ok:
|
if not request.ok:
|
||||||
print(request.text)
|
print(request.text)
|
||||||
|
|
||||||
|
|
||||||
class ManualImport:
|
class ManualImport:
|
||||||
""" import and indexing existing video files """
|
"""import and indexing existing video files"""
|
||||||
|
|
||||||
CONFIG = AppConfig().config
|
CONFIG = AppConfig().config
|
||||||
CACHE_DIR = CONFIG['application']['cache_dir']
|
CACHE_DIR = CONFIG["application"]["cache_dir"]
|
||||||
IMPORT_DIR = os.path.join(CACHE_DIR, 'import')
|
IMPORT_DIR = os.path.join(CACHE_DIR, "import")
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.identified = self.import_folder_parser()
|
self.identified = self.import_folder_parser()
|
||||||
|
|
||||||
def import_folder_parser(self):
|
def import_folder_parser(self):
|
||||||
""" detect files in import folder """
|
"""detect files in import folder"""
|
||||||
|
|
||||||
to_import = os.listdir(self.IMPORT_DIR)
|
to_import = os.listdir(self.IMPORT_DIR)
|
||||||
to_import.sort()
|
to_import.sort()
|
||||||
video_files = [i for i in to_import if not i.endswith('.json')]
|
video_files = [i for i in to_import if not i.endswith(".json")]
|
||||||
|
|
||||||
identified = []
|
identified = []
|
||||||
|
|
||||||
for file_path in video_files:
|
for file_path in video_files:
|
||||||
|
|
||||||
file_dict = {'video_file': file_path}
|
file_dict = {"video_file": file_path}
|
||||||
file_name, _ = os.path.splitext(file_path)
|
file_name, _ = os.path.splitext(file_path)
|
||||||
|
|
||||||
matching_json = [
|
matching_json = [
|
||||||
i for i in to_import if i.startswith(file_name)
|
i
|
||||||
and i.endswith('.json')
|
for i in to_import
|
||||||
|
if i.startswith(file_name) and i.endswith(".json")
|
||||||
]
|
]
|
||||||
if matching_json:
|
if matching_json:
|
||||||
json_file = matching_json[0]
|
json_file = matching_json[0]
|
||||||
youtube_id = self.extract_id_from_json(json_file)
|
youtube_id = self.extract_id_from_json(json_file)
|
||||||
file_dict.update({'json_file': json_file})
|
file_dict.update({"json_file": json_file})
|
||||||
else:
|
else:
|
||||||
youtube_id = self.extract_id_from_filename(file_name)
|
youtube_id = self.extract_id_from_filename(file_name)
|
||||||
file_dict.update({'json_file': False})
|
file_dict.update({"json_file": False})
|
||||||
|
|
||||||
file_dict.update({'youtube_id': youtube_id})
|
file_dict.update({"youtube_id": youtube_id})
|
||||||
identified.append(file_dict)
|
identified.append(file_dict)
|
||||||
|
|
||||||
return identified
|
return identified
|
||||||
@ -373,33 +375,33 @@ class ManualImport:
|
|||||||
look at the file name for the youtube id
|
look at the file name for the youtube id
|
||||||
expects filename ending in [<youtube_id>].<ext>
|
expects filename ending in [<youtube_id>].<ext>
|
||||||
"""
|
"""
|
||||||
id_search = re.search(r'\[([a-zA-Z0-9_-]{11})\]$', file_name)
|
id_search = re.search(r"\[([a-zA-Z0-9_-]{11})\]$", file_name)
|
||||||
if id_search:
|
if id_search:
|
||||||
youtube_id = id_search.group(1)
|
youtube_id = id_search.group(1)
|
||||||
return youtube_id
|
return youtube_id
|
||||||
|
|
||||||
print('failed to extract youtube id for: ' + file_name)
|
print("failed to extract youtube id for: " + file_name)
|
||||||
raise Exception
|
raise Exception
|
||||||
|
|
||||||
def extract_id_from_json(self, json_file):
|
def extract_id_from_json(self, json_file):
|
||||||
""" open json file and extract id """
|
"""open json file and extract id"""
|
||||||
json_path = os.path.join(self.CACHE_DIR, 'import', json_file)
|
json_path = os.path.join(self.CACHE_DIR, "import", json_file)
|
||||||
with open(json_path, 'r', encoding='utf-8') as f:
|
with open(json_path, "r", encoding="utf-8") as f:
|
||||||
json_content = f.read()
|
json_content = f.read()
|
||||||
|
|
||||||
youtube_id = json.loads(json_content)['id']
|
youtube_id = json.loads(json_content)["id"]
|
||||||
|
|
||||||
return youtube_id
|
return youtube_id
|
||||||
|
|
||||||
def process_import(self):
|
def process_import(self):
|
||||||
""" go through identified media files """
|
"""go through identified media files"""
|
||||||
|
|
||||||
for media_file in self.identified:
|
for media_file in self.identified:
|
||||||
json_file = media_file['json_file']
|
json_file = media_file["json_file"]
|
||||||
video_file = media_file['video_file']
|
video_file = media_file["video_file"]
|
||||||
youtube_id = media_file['youtube_id']
|
youtube_id = media_file["youtube_id"]
|
||||||
|
|
||||||
video_path = os.path.join(self.CACHE_DIR, 'import', video_file)
|
video_path = os.path.join(self.CACHE_DIR, "import", video_file)
|
||||||
|
|
||||||
self.move_to_cache(video_path, youtube_id)
|
self.move_to_cache(video_path, youtube_id)
|
||||||
|
|
||||||
@ -411,35 +413,43 @@ class ManualImport:
|
|||||||
if os.path.exists(video_path):
|
if os.path.exists(video_path):
|
||||||
os.remove(video_path)
|
os.remove(video_path)
|
||||||
if json_file:
|
if json_file:
|
||||||
json_path = os.path.join(self.CACHE_DIR, 'import', json_file)
|
json_path = os.path.join(self.CACHE_DIR, "import", json_file)
|
||||||
os.remove(json_path)
|
os.remove(json_path)
|
||||||
|
|
||||||
def move_to_cache(self, video_path, youtube_id):
|
def move_to_cache(self, video_path, youtube_id):
|
||||||
""" move identified video file to cache, convert to mp4 """
|
"""move identified video file to cache, convert to mp4"""
|
||||||
file_name = os.path.split(video_path)[-1]
|
file_name = os.path.split(video_path)[-1]
|
||||||
video_file, ext = os.path.splitext(file_name)
|
video_file, ext = os.path.splitext(file_name)
|
||||||
|
|
||||||
# make sure youtube_id is in filename
|
# make sure youtube_id is in filename
|
||||||
if youtube_id not in video_file:
|
if youtube_id not in video_file:
|
||||||
video_file = f'{video_file}_{youtube_id}'
|
video_file = f"{video_file}_{youtube_id}"
|
||||||
|
|
||||||
# move, convert if needed
|
# move, convert if needed
|
||||||
if ext == '.mp4':
|
if ext == ".mp4":
|
||||||
new_file = video_file + ext
|
new_file = video_file + ext
|
||||||
dest_path = os.path.join(self.CACHE_DIR, 'download', new_file)
|
dest_path = os.path.join(self.CACHE_DIR, "download", new_file)
|
||||||
shutil.move(video_path, dest_path)
|
shutil.move(video_path, dest_path)
|
||||||
else:
|
else:
|
||||||
print(f'processing with ffmpeg: {video_file}')
|
print(f"processing with ffmpeg: {video_file}")
|
||||||
new_file = video_file + '.mp4'
|
new_file = video_file + ".mp4"
|
||||||
dest_path = os.path.join(self.CACHE_DIR, 'download', new_file)
|
dest_path = os.path.join(self.CACHE_DIR, "download", new_file)
|
||||||
subprocess.run(
|
subprocess.run(
|
||||||
["ffmpeg", "-i", video_path, dest_path,
|
[
|
||||||
"-loglevel", "warning", "-stats"], check=True
|
"ffmpeg",
|
||||||
|
"-i",
|
||||||
|
video_path,
|
||||||
|
dest_path,
|
||||||
|
"-loglevel",
|
||||||
|
"warning",
|
||||||
|
"-stats",
|
||||||
|
],
|
||||||
|
check=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def scan_filesystem():
|
def scan_filesystem():
|
||||||
""" grouped function to delete and update index """
|
"""grouped function to delete and update index"""
|
||||||
filesystem_handler = FilesystemScanner()
|
filesystem_handler = FilesystemScanner()
|
||||||
filesystem_handler.list_comarison()
|
filesystem_handler.list_comarison()
|
||||||
if filesystem_handler.to_rename:
|
if filesystem_handler.to_rename:
|
||||||
@ -455,10 +465,10 @@ def scan_filesystem():
|
|||||||
|
|
||||||
|
|
||||||
def reindex_old_documents():
|
def reindex_old_documents():
|
||||||
""" daily refresh of old documents """
|
"""daily refresh of old documents"""
|
||||||
# check needed last run
|
# check needed last run
|
||||||
now = int(datetime.now().strftime("%s"))
|
now = int(datetime.now().strftime("%s"))
|
||||||
last_reindex = get_message('last_reindex')
|
last_reindex = get_message("last_reindex")
|
||||||
if isinstance(last_reindex, int) and now - last_reindex < 60 * 60 * 24:
|
if isinstance(last_reindex, int) and now - last_reindex < 60 * 60 * 24:
|
||||||
return
|
return
|
||||||
# continue if needed
|
# continue if needed
|
||||||
@ -466,4 +476,4 @@ def reindex_old_documents():
|
|||||||
reindex_handler.check_outdated()
|
reindex_handler.check_outdated()
|
||||||
reindex_handler.reindex()
|
reindex_handler.reindex()
|
||||||
# set timestamp
|
# set timestamp
|
||||||
set_message('last_reindex', now, expire=False)
|
set_message("last_reindex", now, expire=False)
|
||||||
|
@ -17,10 +17,10 @@ from PIL import Image
|
|||||||
|
|
||||||
|
|
||||||
class SearchHandler:
|
class SearchHandler:
|
||||||
""" search elastic search """
|
"""search elastic search"""
|
||||||
|
|
||||||
CONFIG = AppConfig().config
|
CONFIG = AppConfig().config
|
||||||
CACHE_DIR = CONFIG['application']['cache_dir']
|
CACHE_DIR = CONFIG["application"]["cache_dir"]
|
||||||
|
|
||||||
def __init__(self, url, data, cache=True):
|
def __init__(self, url, data, cache=True):
|
||||||
self.max_hits = None
|
self.max_hits = None
|
||||||
@ -29,15 +29,15 @@ class SearchHandler:
|
|||||||
self.cache = cache
|
self.cache = cache
|
||||||
|
|
||||||
def get_data(self):
|
def get_data(self):
|
||||||
""" get the data """
|
"""get the data"""
|
||||||
if self.data:
|
if self.data:
|
||||||
response = requests.get(self.url, json=self.data).json()
|
response = requests.get(self.url, json=self.data).json()
|
||||||
else:
|
else:
|
||||||
response = requests.get(self.url).json()
|
response = requests.get(self.url).json()
|
||||||
|
|
||||||
if 'hits' in response.keys():
|
if "hits" in response.keys():
|
||||||
self.max_hits = response['hits']['total']['value']
|
self.max_hits = response["hits"]["total"]["value"]
|
||||||
return_value = response['hits']['hits']
|
return_value = response["hits"]["hits"]
|
||||||
else:
|
else:
|
||||||
# simulate list for single result to reuse rest of class
|
# simulate list for single result to reuse rest of class
|
||||||
return_value = [response]
|
return_value = [response]
|
||||||
@ -50,13 +50,13 @@ class SearchHandler:
|
|||||||
all_channels = []
|
all_channels = []
|
||||||
for idx, hit in enumerate(return_value):
|
for idx, hit in enumerate(return_value):
|
||||||
return_value[idx] = self.hit_cleanup(hit)
|
return_value[idx] = self.hit_cleanup(hit)
|
||||||
if hit['_index'] == 'ta_video':
|
if hit["_index"] == "ta_video":
|
||||||
video_dict, channel_dict = self.vid_cache_link(hit)
|
video_dict, channel_dict = self.vid_cache_link(hit)
|
||||||
if video_dict not in all_videos:
|
if video_dict not in all_videos:
|
||||||
all_videos.append(video_dict)
|
all_videos.append(video_dict)
|
||||||
if channel_dict not in all_channels:
|
if channel_dict not in all_channels:
|
||||||
all_channels.append(channel_dict)
|
all_channels.append(channel_dict)
|
||||||
elif hit['_index'] == 'ta_channel':
|
elif hit["_index"] == "ta_channel":
|
||||||
channel_dict = self.channel_cache_link(hit)
|
channel_dict = self.channel_cache_link(hit)
|
||||||
if channel_dict not in all_channels:
|
if channel_dict not in all_channels:
|
||||||
all_channels.append(channel_dict)
|
all_channels.append(channel_dict)
|
||||||
@ -69,52 +69,49 @@ class SearchHandler:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def vid_cache_link(hit):
|
def vid_cache_link(hit):
|
||||||
""" download thumbnails into cache """
|
"""download thumbnails into cache"""
|
||||||
vid_thumb = hit['source']['vid_thumb_url']
|
vid_thumb = hit["source"]["vid_thumb_url"]
|
||||||
youtube_id = hit['source']['youtube_id']
|
youtube_id = hit["source"]["youtube_id"]
|
||||||
channel_id_hit = hit['source']['channel']['channel_id']
|
channel_id_hit = hit["source"]["channel"]["channel_id"]
|
||||||
chan_thumb = hit['source']['channel']['channel_thumb_url']
|
chan_thumb = hit["source"]["channel"]["channel_thumb_url"]
|
||||||
try:
|
try:
|
||||||
chan_banner = hit['source']['channel']['channel_banner_url']
|
chan_banner = hit["source"]["channel"]["channel_banner_url"]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
chan_banner = False
|
chan_banner = False
|
||||||
video_dict = {
|
video_dict = {"youtube_id": youtube_id, "vid_thumb": vid_thumb}
|
||||||
'youtube_id': youtube_id,
|
|
||||||
'vid_thumb': vid_thumb
|
|
||||||
}
|
|
||||||
channel_dict = {
|
channel_dict = {
|
||||||
'channel_id': channel_id_hit,
|
"channel_id": channel_id_hit,
|
||||||
'chan_thumb': chan_thumb,
|
"chan_thumb": chan_thumb,
|
||||||
'chan_banner': chan_banner
|
"chan_banner": chan_banner,
|
||||||
}
|
}
|
||||||
return video_dict, channel_dict
|
return video_dict, channel_dict
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def channel_cache_link(hit):
|
def channel_cache_link(hit):
|
||||||
""" build channel thumb links """
|
"""build channel thumb links"""
|
||||||
channel_id_hit = hit['source']['channel_id']
|
channel_id_hit = hit["source"]["channel_id"]
|
||||||
chan_thumb = hit['source']['channel_thumb_url']
|
chan_thumb = hit["source"]["channel_thumb_url"]
|
||||||
try:
|
try:
|
||||||
chan_banner = hit['source']['channel_banner_url']
|
chan_banner = hit["source"]["channel_banner_url"]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
chan_banner = False
|
chan_banner = False
|
||||||
channel_dict = {
|
channel_dict = {
|
||||||
'channel_id': channel_id_hit,
|
"channel_id": channel_id_hit,
|
||||||
'chan_thumb': chan_thumb,
|
"chan_thumb": chan_thumb,
|
||||||
'chan_banner': chan_banner
|
"chan_banner": chan_banner,
|
||||||
}
|
}
|
||||||
return channel_dict
|
return channel_dict
|
||||||
|
|
||||||
def cache_dl_vids(self, all_videos):
|
def cache_dl_vids(self, all_videos):
|
||||||
""" video thumbs links for cache """
|
"""video thumbs links for cache"""
|
||||||
vid_cache = os.path.join(self.CACHE_DIR, 'videos')
|
vid_cache = os.path.join(self.CACHE_DIR, "videos")
|
||||||
all_vid_cached = os.listdir(vid_cache)
|
all_vid_cached = os.listdir(vid_cache)
|
||||||
# videos
|
# videos
|
||||||
for video_dict in all_videos:
|
for video_dict in all_videos:
|
||||||
youtube_id = video_dict['youtube_id']
|
youtube_id = video_dict["youtube_id"]
|
||||||
if not youtube_id + '.jpg' in all_vid_cached:
|
if not youtube_id + ".jpg" in all_vid_cached:
|
||||||
cache_path = os.path.join(vid_cache, youtube_id + '.jpg')
|
cache_path = os.path.join(vid_cache, youtube_id + ".jpg")
|
||||||
thumb_url = video_dict['vid_thumb']
|
thumb_url = video_dict["vid_thumb"]
|
||||||
img_raw = requests.get(thumb_url, stream=True).raw
|
img_raw = requests.get(thumb_url, stream=True).raw
|
||||||
img = Image.open(img_raw)
|
img = Image.open(img_raw)
|
||||||
width, height = img.size
|
width, height = img.size
|
||||||
@ -125,62 +122,62 @@ class SearchHandler:
|
|||||||
img.convert("RGB").save(cache_path)
|
img.convert("RGB").save(cache_path)
|
||||||
|
|
||||||
def cache_dl_chan(self, all_channels):
|
def cache_dl_chan(self, all_channels):
|
||||||
""" download channel thumbs """
|
"""download channel thumbs"""
|
||||||
chan_cache = os.path.join(self.CACHE_DIR, 'channels')
|
chan_cache = os.path.join(self.CACHE_DIR, "channels")
|
||||||
all_chan_cached = os.listdir(chan_cache)
|
all_chan_cached = os.listdir(chan_cache)
|
||||||
for channel_dict in all_channels:
|
for channel_dict in all_channels:
|
||||||
channel_id_cache = channel_dict['channel_id']
|
channel_id_cache = channel_dict["channel_id"]
|
||||||
channel_banner_url = channel_dict['chan_banner']
|
channel_banner_url = channel_dict["chan_banner"]
|
||||||
channel_banner = channel_id_cache + '_banner.jpg'
|
channel_banner = channel_id_cache + "_banner.jpg"
|
||||||
channel_thumb_url = channel_dict['chan_thumb']
|
channel_thumb_url = channel_dict["chan_thumb"]
|
||||||
channel_thumb = channel_id_cache + '_thumb.jpg'
|
channel_thumb = channel_id_cache + "_thumb.jpg"
|
||||||
# thumb
|
# thumb
|
||||||
if channel_thumb_url and channel_thumb not in all_chan_cached:
|
if channel_thumb_url and channel_thumb not in all_chan_cached:
|
||||||
cache_path = os.path.join(chan_cache, channel_thumb)
|
cache_path = os.path.join(chan_cache, channel_thumb)
|
||||||
img_raw = requests.get(channel_thumb_url, stream=True).content
|
img_raw = requests.get(channel_thumb_url, stream=True).content
|
||||||
with open(cache_path, 'wb') as f:
|
with open(cache_path, "wb") as f:
|
||||||
f.write(img_raw)
|
f.write(img_raw)
|
||||||
# banner
|
# banner
|
||||||
if channel_banner_url and channel_banner not in all_chan_cached:
|
if channel_banner_url and channel_banner not in all_chan_cached:
|
||||||
cache_path = os.path.join(chan_cache, channel_banner)
|
cache_path = os.path.join(chan_cache, channel_banner)
|
||||||
img_raw = requests.get(channel_banner_url, stream=True).content
|
img_raw = requests.get(channel_banner_url, stream=True).content
|
||||||
with open(cache_path, 'wb') as f:
|
with open(cache_path, "wb") as f:
|
||||||
f.write(img_raw)
|
f.write(img_raw)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def hit_cleanup(hit):
|
def hit_cleanup(hit):
|
||||||
""" clean up and parse data from a single hit """
|
"""clean up and parse data from a single hit"""
|
||||||
hit['source'] = hit.pop('_source')
|
hit["source"] = hit.pop("_source")
|
||||||
hit_keys = hit['source'].keys()
|
hit_keys = hit["source"].keys()
|
||||||
if 'media_url' in hit_keys:
|
if "media_url" in hit_keys:
|
||||||
parsed_url = urllib.parse.quote(hit['source']['media_url'])
|
parsed_url = urllib.parse.quote(hit["source"]["media_url"])
|
||||||
hit['source']['media_url'] = parsed_url
|
hit["source"]["media_url"] = parsed_url
|
||||||
|
|
||||||
if 'published' in hit_keys:
|
if "published" in hit_keys:
|
||||||
published = hit['source']['published']
|
published = hit["source"]["published"]
|
||||||
date_pub = datetime.strptime(published, "%Y-%m-%d")
|
date_pub = datetime.strptime(published, "%Y-%m-%d")
|
||||||
date_str = datetime.strftime(date_pub, "%d %b, %Y")
|
date_str = datetime.strftime(date_pub, "%d %b, %Y")
|
||||||
hit['source']['published'] = date_str
|
hit["source"]["published"] = date_str
|
||||||
|
|
||||||
if 'vid_last_refresh' in hit_keys:
|
if "vid_last_refresh" in hit_keys:
|
||||||
vid_last_refresh = hit['source']['vid_last_refresh']
|
vid_last_refresh = hit["source"]["vid_last_refresh"]
|
||||||
date_refresh = datetime.fromtimestamp(vid_last_refresh)
|
date_refresh = datetime.fromtimestamp(vid_last_refresh)
|
||||||
date_str = datetime.strftime(date_refresh, "%d %b, %Y")
|
date_str = datetime.strftime(date_refresh, "%d %b, %Y")
|
||||||
hit['source']['vid_last_refresh'] = date_str
|
hit["source"]["vid_last_refresh"] = date_str
|
||||||
|
|
||||||
if 'channel_last_refresh' in hit_keys:
|
if "channel_last_refresh" in hit_keys:
|
||||||
refreshed = hit['source']['channel_last_refresh']
|
refreshed = hit["source"]["channel_last_refresh"]
|
||||||
date_refresh = datetime.fromtimestamp(refreshed)
|
date_refresh = datetime.fromtimestamp(refreshed)
|
||||||
date_str = datetime.strftime(date_refresh, "%d %b, %Y")
|
date_str = datetime.strftime(date_refresh, "%d %b, %Y")
|
||||||
hit['source']['channel_last_refresh'] = date_str
|
hit["source"]["channel_last_refresh"] = date_str
|
||||||
|
|
||||||
if 'channel' in hit_keys:
|
if "channel" in hit_keys:
|
||||||
channel_keys = hit['source']['channel'].keys()
|
channel_keys = hit["source"]["channel"].keys()
|
||||||
if 'channel_last_refresh' in channel_keys:
|
if "channel_last_refresh" in channel_keys:
|
||||||
refreshed = hit['source']['channel']['channel_last_refresh']
|
refreshed = hit["source"]["channel"]["channel_last_refresh"]
|
||||||
date_refresh = datetime.fromtimestamp(refreshed)
|
date_refresh = datetime.fromtimestamp(refreshed)
|
||||||
date_str = datetime.strftime(date_refresh, "%d %b, %Y")
|
date_str = datetime.strftime(date_refresh, "%d %b, %Y")
|
||||||
hit['source']['channel']['channel_last_refresh'] = date_str
|
hit["source"]["channel"]["channel_last_refresh"] = date_str
|
||||||
|
|
||||||
return hit
|
return hit
|
||||||
|
|
||||||
@ -192,13 +189,13 @@ class Pagination:
|
|||||||
|
|
||||||
def __init__(self, page_get, search_get=False):
|
def __init__(self, page_get, search_get=False):
|
||||||
config = AppConfig().config
|
config = AppConfig().config
|
||||||
self.page_size = config['archive']['page_size']
|
self.page_size = config["archive"]["page_size"]
|
||||||
self.page_get = page_get
|
self.page_get = page_get
|
||||||
self.search_get = search_get
|
self.search_get = search_get
|
||||||
self.pagination = self.first_guess()
|
self.pagination = self.first_guess()
|
||||||
|
|
||||||
def first_guess(self):
|
def first_guess(self):
|
||||||
""" build first guess before api call """
|
"""build first guess before api call"""
|
||||||
page_get = self.page_get
|
page_get = self.page_get
|
||||||
if page_get in [0, 1]:
|
if page_get in [0, 1]:
|
||||||
page_from = 0
|
page_from = 0
|
||||||
@ -213,22 +210,22 @@ class Pagination:
|
|||||||
"page_size": self.page_size,
|
"page_size": self.page_size,
|
||||||
"page_from": page_from,
|
"page_from": page_from,
|
||||||
"prev_pages": prev_pages,
|
"prev_pages": prev_pages,
|
||||||
"current_page": page_get
|
"current_page": page_get,
|
||||||
}
|
}
|
||||||
if self.search_get:
|
if self.search_get:
|
||||||
pagination.update({"search_get": self.search_get})
|
pagination.update({"search_get": self.search_get})
|
||||||
return pagination
|
return pagination
|
||||||
|
|
||||||
def validate(self, total_hits):
|
def validate(self, total_hits):
|
||||||
""" validate pagination with total_hits after making api call """
|
"""validate pagination with total_hits after making api call"""
|
||||||
page_get = self.page_get
|
page_get = self.page_get
|
||||||
max_pages = math.ceil(total_hits / self.page_size)
|
max_pages = math.ceil(total_hits / self.page_size)
|
||||||
if page_get < max_pages and max_pages > 1:
|
if page_get < max_pages and max_pages > 1:
|
||||||
self.pagination['last_page'] = max_pages
|
self.pagination["last_page"] = max_pages
|
||||||
else:
|
else:
|
||||||
self.pagination['last_page'] = False
|
self.pagination["last_page"] = False
|
||||||
next_pages = [
|
next_pages = [
|
||||||
i for i in range(page_get + 1, page_get + 6) if 1 < i < max_pages
|
i for i in range(page_get + 1, page_get + 6) if 1 < i < max_pages
|
||||||
]
|
]
|
||||||
|
|
||||||
self.pagination['next_pages'] = next_pages
|
self.pagination["next_pages"] = next_pages
|
||||||
|
@ -14,17 +14,17 @@ from home.src.index_management import backup_all_indexes, restore_from_backup
|
|||||||
from home.src.reindex import ManualImport, reindex_old_documents
|
from home.src.reindex import ManualImport, reindex_old_documents
|
||||||
|
|
||||||
CONFIG = AppConfig().config
|
CONFIG = AppConfig().config
|
||||||
REDIS_HOST = CONFIG['application']['REDIS_HOST']
|
REDIS_HOST = CONFIG["application"]["REDIS_HOST"]
|
||||||
|
|
||||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'home.settings')
|
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "home.settings")
|
||||||
app = Celery('tasks', broker='redis://' + REDIS_HOST)
|
app = Celery("tasks", broker="redis://" + REDIS_HOST)
|
||||||
app.config_from_object('django.conf:settings', namespace='CELERY')
|
app.config_from_object("django.conf:settings", namespace="CELERY")
|
||||||
app.autodiscover_tasks()
|
app.autodiscover_tasks()
|
||||||
|
|
||||||
|
|
||||||
@shared_task
|
@shared_task
|
||||||
def update_subscribed():
|
def update_subscribed():
|
||||||
""" look for missing videos and add to pending """
|
"""look for missing videos and add to pending"""
|
||||||
channel_handler = ChannelSubscription()
|
channel_handler = ChannelSubscription()
|
||||||
missing_videos = channel_handler.find_missing()
|
missing_videos = channel_handler.find_missing()
|
||||||
if missing_videos:
|
if missing_videos:
|
||||||
@ -36,10 +36,10 @@ def update_subscribed():
|
|||||||
|
|
||||||
@shared_task
|
@shared_task
|
||||||
def download_pending():
|
def download_pending():
|
||||||
""" download latest pending videos """
|
"""download latest pending videos"""
|
||||||
pending_handler = PendingList()
|
pending_handler = PendingList()
|
||||||
pending_vids = pending_handler.get_all_pending()[0]
|
pending_vids = pending_handler.get_all_pending()[0]
|
||||||
to_download = [i['youtube_id'] for i in pending_vids]
|
to_download = [i["youtube_id"] for i in pending_vids]
|
||||||
to_download.reverse()
|
to_download.reverse()
|
||||||
if to_download:
|
if to_download:
|
||||||
download_handler = VideoDownloader(to_download)
|
download_handler = VideoDownloader(to_download)
|
||||||
@ -48,14 +48,14 @@ def download_pending():
|
|||||||
|
|
||||||
@shared_task
|
@shared_task
|
||||||
def download_single(youtube_id):
|
def download_single(youtube_id):
|
||||||
""" start download single video now """
|
"""start download single video now"""
|
||||||
download_handler = VideoDownloader([youtube_id])
|
download_handler = VideoDownloader([youtube_id])
|
||||||
download_handler.download_list()
|
download_handler.download_list()
|
||||||
|
|
||||||
|
|
||||||
@shared_task
|
@shared_task
|
||||||
def extrac_dl(youtube_ids):
|
def extrac_dl(youtube_ids):
|
||||||
""" parse list passed and add to pending """
|
"""parse list passed and add to pending"""
|
||||||
pending_handler = PendingList()
|
pending_handler = PendingList()
|
||||||
missing_videos = pending_handler.parse_url_list(youtube_ids)
|
missing_videos = pending_handler.parse_url_list(youtube_ids)
|
||||||
pending_handler.add_to_pending(missing_videos)
|
pending_handler.add_to_pending(missing_videos)
|
||||||
@ -63,17 +63,17 @@ def extrac_dl(youtube_ids):
|
|||||||
|
|
||||||
@shared_task
|
@shared_task
|
||||||
def check_reindex():
|
def check_reindex():
|
||||||
""" run the reindex main command """
|
"""run the reindex main command"""
|
||||||
reindex_old_documents()
|
reindex_old_documents()
|
||||||
|
|
||||||
|
|
||||||
@shared_task
|
@shared_task
|
||||||
def run_manual_import():
|
def run_manual_import():
|
||||||
""" called from settings page, to go through import folder """
|
"""called from settings page, to go through import folder"""
|
||||||
|
|
||||||
print('starting media file import')
|
print("starting media file import")
|
||||||
have_lock = False
|
have_lock = False
|
||||||
my_lock = get_lock('manual_import')
|
my_lock = get_lock("manual_import")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
have_lock = my_lock.acquire(blocking=False)
|
have_lock = my_lock.acquire(blocking=False)
|
||||||
@ -91,13 +91,13 @@ def run_manual_import():
|
|||||||
|
|
||||||
@shared_task
|
@shared_task
|
||||||
def run_backup():
|
def run_backup():
|
||||||
""" called from settings page, dump backup to zip file """
|
"""called from settings page, dump backup to zip file"""
|
||||||
backup_all_indexes()
|
backup_all_indexes()
|
||||||
print('backup finished')
|
print("backup finished")
|
||||||
|
|
||||||
|
|
||||||
@shared_task
|
@shared_task
|
||||||
def run_restore_backup():
|
def run_restore_backup():
|
||||||
""" called from settings page, dump backup to zip file """
|
"""called from settings page, dump backup to zip file"""
|
||||||
restore_from_backup()
|
restore_from_backup()
|
||||||
print('index restore finished')
|
print("index restore finished")
|
||||||
|
@ -96,7 +96,7 @@
|
|||||||
</div>
|
</div>
|
||||||
<div class="footer">
|
<div class="footer">
|
||||||
<div class="boxed-content">
|
<div class="boxed-content">
|
||||||
<span>© 2021 The Tube Archivist | <a href="https://github.com/bbilly1/tubearchivist" target="_blank">Github</a> | <a href="https://hub.docker.com/r/bbilly1/tubearchivist" target="_blank">Docker Hub</a></span>
|
<span>© 2021 The Tube Archivist v0.0.3 | <a href="https://github.com/bbilly1/tubearchivist" target="_blank">Github</a> | <a href="https://hub.docker.com/r/bbilly1/tubearchivist" target="_blank">Docker Hub</a></span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</body>
|
</body>
|
||||||
|
@ -1,22 +1,30 @@
|
|||||||
""" all home app urls """
|
""" all home app urls """
|
||||||
|
|
||||||
from django.urls import path
|
from django.urls import path
|
||||||
from home.views import (AboutView, ChannelIdView, ChannelView, DownloadView,
|
from home.views import (
|
||||||
HomeView, SettingsView, VideoView)
|
AboutView,
|
||||||
|
ChannelIdView,
|
||||||
|
ChannelView,
|
||||||
|
DownloadView,
|
||||||
|
HomeView,
|
||||||
|
SettingsView,
|
||||||
|
VideoView,
|
||||||
|
)
|
||||||
|
|
||||||
from . import views
|
from . import views
|
||||||
|
|
||||||
urlpatterns = [
|
urlpatterns = [
|
||||||
path('', HomeView.as_view(), name='home'),
|
path("", HomeView.as_view(), name="home"),
|
||||||
path('about/', AboutView.as_view(), name='about'),
|
path("about/", AboutView.as_view(), name="about"),
|
||||||
path('downloads/', DownloadView.as_view(), name='downloads'),
|
path("downloads/", DownloadView.as_view(), name="downloads"),
|
||||||
path('settings/', SettingsView.as_view(), name='settings'),
|
path("settings/", SettingsView.as_view(), name="settings"),
|
||||||
path('process/', views.process, name='process'),
|
path("process/", views.process, name="process"),
|
||||||
path('downloads/progress', views.progress, name='progress'),
|
path("downloads/progress", views.progress, name="progress"),
|
||||||
path('channel/', ChannelView.as_view(), name='channel'),
|
path("channel/", ChannelView.as_view(), name="channel"),
|
||||||
path(
|
path(
|
||||||
'channel/<slug:channel_id_detail>/',
|
"channel/<slug:channel_id_detail>/",
|
||||||
ChannelIdView.as_view(), name='channel_id'
|
ChannelIdView.as_view(),
|
||||||
|
name="channel_id",
|
||||||
),
|
),
|
||||||
path('video/<slug:video_id>/', VideoView.as_view(), name='video')
|
path("video/<slug:video_id>/", VideoView.as_view(), name="video"),
|
||||||
]
|
]
|
||||||
|
@ -16,36 +16,46 @@ from django.utils.http import urlencode
|
|||||||
from django.views import View
|
from django.views import View
|
||||||
from home.src.config import AppConfig
|
from home.src.config import AppConfig
|
||||||
from home.src.download import ChannelSubscription, PendingList
|
from home.src.download import ChannelSubscription, PendingList
|
||||||
from home.src.helper import (get_dl_message, get_message, process_url_list,
|
from home.src.helper import (
|
||||||
set_message)
|
get_dl_message,
|
||||||
|
get_message,
|
||||||
|
process_url_list,
|
||||||
|
set_message,
|
||||||
|
)
|
||||||
from home.src.searching import Pagination, SearchHandler
|
from home.src.searching import Pagination, SearchHandler
|
||||||
from home.tasks import (download_pending, download_single, extrac_dl,
|
from home.tasks import (
|
||||||
run_backup, run_manual_import, run_restore_backup,
|
download_pending,
|
||||||
update_subscribed)
|
download_single,
|
||||||
|
extrac_dl,
|
||||||
|
run_backup,
|
||||||
|
run_manual_import,
|
||||||
|
run_restore_backup,
|
||||||
|
update_subscribed,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class HomeView(View):
|
class HomeView(View):
|
||||||
""" resolves to /
|
"""resolves to /
|
||||||
handle home page and video search post functionality
|
handle home page and video search post functionality
|
||||||
"""
|
"""
|
||||||
|
|
||||||
CONFIG = AppConfig().config
|
CONFIG = AppConfig().config
|
||||||
ES_URL = CONFIG['application']['es_url']
|
ES_URL = CONFIG["application"]["es_url"]
|
||||||
|
|
||||||
def get(self, request):
|
def get(self, request):
|
||||||
""" return home search results """
|
"""return home search results"""
|
||||||
colors, sort_order, hide_watched = self.read_config()
|
colors, sort_order, hide_watched = self.read_config()
|
||||||
# handle search
|
# handle search
|
||||||
search_get = request.GET.get('search', False)
|
search_get = request.GET.get("search", False)
|
||||||
if search_get:
|
if search_get:
|
||||||
search_encoded = urllib.parse.quote(search_get)
|
search_encoded = urllib.parse.quote(search_get)
|
||||||
else:
|
else:
|
||||||
search_encoded = False
|
search_encoded = False
|
||||||
# define page size
|
# define page size
|
||||||
page_get = int(request.GET.get('page', 0))
|
page_get = int(request.GET.get("page", 0))
|
||||||
pagination_handler = Pagination(page_get, search_encoded)
|
pagination_handler = Pagination(page_get, search_encoded)
|
||||||
|
|
||||||
url = self.ES_URL + '/ta_video/_search'
|
url = self.ES_URL + "/ta_video/_search"
|
||||||
|
|
||||||
data = self.build_data(
|
data = self.build_data(
|
||||||
pagination_handler, sort_order, search_get, hide_watched
|
pagination_handler, sort_order, search_get, hide_watched
|
||||||
@ -56,95 +66,94 @@ class HomeView(View):
|
|||||||
max_hits = search.max_hits
|
max_hits = search.max_hits
|
||||||
pagination_handler.validate(max_hits)
|
pagination_handler.validate(max_hits)
|
||||||
context = {
|
context = {
|
||||||
'videos': videos_hits,
|
"videos": videos_hits,
|
||||||
'pagination': pagination_handler.pagination,
|
"pagination": pagination_handler.pagination,
|
||||||
'sortorder': sort_order,
|
"sortorder": sort_order,
|
||||||
'hide_watched': hide_watched,
|
"hide_watched": hide_watched,
|
||||||
'colors': colors
|
"colors": colors,
|
||||||
}
|
}
|
||||||
return render(request, 'home/home.html', context)
|
return render(request, "home/home.html", context)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def build_data(pagination_handler, sort_order, search_get, hide_watched):
|
def build_data(pagination_handler, sort_order, search_get, hide_watched):
|
||||||
""" build the data dict for the search query """
|
"""build the data dict for the search query"""
|
||||||
page_size = pagination_handler.pagination['page_size']
|
page_size = pagination_handler.pagination["page_size"]
|
||||||
page_from = pagination_handler.pagination['page_from']
|
page_from = pagination_handler.pagination["page_from"]
|
||||||
data = {
|
data = {
|
||||||
"size": page_size, "from": page_from, "query": {"match_all": {}},
|
"size": page_size,
|
||||||
|
"from": page_from,
|
||||||
|
"query": {"match_all": {}},
|
||||||
"sort": [
|
"sort": [
|
||||||
{"published": {"order": "desc"}},
|
{"published": {"order": "desc"}},
|
||||||
{"date_downloaded": {"order": "desc"}}
|
{"date_downloaded": {"order": "desc"}},
|
||||||
]
|
],
|
||||||
}
|
}
|
||||||
# define sort
|
# define sort
|
||||||
if sort_order == 'downloaded':
|
if sort_order == "downloaded":
|
||||||
del data['sort'][0]
|
del data["sort"][0]
|
||||||
if search_get:
|
if search_get:
|
||||||
del data['sort']
|
del data["sort"]
|
||||||
if hide_watched:
|
if hide_watched:
|
||||||
data['query'] = {"term": {"player.watched": {"value": False}}}
|
data["query"] = {"term": {"player.watched": {"value": False}}}
|
||||||
if search_get:
|
if search_get:
|
||||||
query = {
|
query = {
|
||||||
"multi_match": {
|
"multi_match": {
|
||||||
"query": search_get,
|
"query": search_get,
|
||||||
"fields": ["title", "channel.channel_name", "tags"],
|
"fields": ["title", "channel.channel_name", "tags"],
|
||||||
"type": "cross_fields",
|
"type": "cross_fields",
|
||||||
"operator": "and"
|
"operator": "and",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
data['query'] = query
|
data["query"] = query
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def read_config():
|
def read_config():
|
||||||
""" read needed values from redis """
|
"""read needed values from redis"""
|
||||||
config_handler = AppConfig().config
|
config_handler = AppConfig().config
|
||||||
colors = config_handler['application']['colors']
|
colors = config_handler["application"]["colors"]
|
||||||
sort_order = get_message('sort_order')
|
sort_order = get_message("sort_order")
|
||||||
hide_watched = get_message('hide_watched')
|
hide_watched = get_message("hide_watched")
|
||||||
return colors, sort_order, hide_watched
|
return colors, sort_order, hide_watched
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def post(request):
|
def post(request):
|
||||||
""" handle post from search form """
|
"""handle post from search form"""
|
||||||
post_data = dict(request.POST)
|
post_data = dict(request.POST)
|
||||||
search_query = post_data['videoSearch'][0]
|
search_query = post_data["videoSearch"][0]
|
||||||
search_url = '/?' + urlencode({'search': search_query})
|
search_url = "/?" + urlencode({"search": search_query})
|
||||||
return redirect(search_url, permanent=True)
|
return redirect(search_url, permanent=True)
|
||||||
|
|
||||||
|
|
||||||
class AboutView(View):
|
class AboutView(View):
|
||||||
""" resolves to /about/
|
"""resolves to /about/
|
||||||
show helpful how to information
|
show helpful how to information
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get(request):
|
def get(request):
|
||||||
""" handle http get """
|
"""handle http get"""
|
||||||
config = AppConfig().config
|
config = AppConfig().config
|
||||||
colors = config['application']['colors']
|
colors = config["application"]["colors"]
|
||||||
context = {
|
context = {"title": "About", "colors": colors}
|
||||||
'title': 'About',
|
return render(request, "home/about.html", context)
|
||||||
'colors': colors
|
|
||||||
}
|
|
||||||
return render(request, 'home/about.html', context)
|
|
||||||
|
|
||||||
|
|
||||||
class DownloadView(View):
|
class DownloadView(View):
|
||||||
""" resolves to /download/
|
"""resolves to /download/
|
||||||
takes POST for downloading youtube links
|
takes POST for downloading youtube links
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def get(self, request):
|
def get(self, request):
|
||||||
""" handle get requests """
|
"""handle get requests"""
|
||||||
config = AppConfig().config
|
config = AppConfig().config
|
||||||
colors = config['application']['colors']
|
colors = config["application"]["colors"]
|
||||||
|
|
||||||
page_get = int(request.GET.get('page', 0))
|
page_get = int(request.GET.get("page", 0))
|
||||||
pagination_handler = Pagination(page_get)
|
pagination_handler = Pagination(page_get)
|
||||||
|
|
||||||
url = config['application']['es_url'] + '/ta_download/_search'
|
url = config["application"]["es_url"] + "/ta_download/_search"
|
||||||
data = self.build_data(pagination_handler)
|
data = self.build_data(pagination_handler)
|
||||||
search = SearchHandler(url, data, cache=False)
|
search = SearchHandler(url, data, cache=False)
|
||||||
|
|
||||||
@ -152,7 +161,7 @@ class DownloadView(View):
|
|||||||
max_hits = search.max_hits
|
max_hits = search.max_hits
|
||||||
|
|
||||||
if videos_hits:
|
if videos_hits:
|
||||||
all_pending = [i['source'] for i in videos_hits]
|
all_pending = [i["source"] for i in videos_hits]
|
||||||
pagination_handler.validate(max_hits)
|
pagination_handler.validate(max_hits)
|
||||||
pagination = pagination_handler.pagination
|
pagination = pagination_handler.pagination
|
||||||
else:
|
else:
|
||||||
@ -160,33 +169,34 @@ class DownloadView(View):
|
|||||||
pagination = False
|
pagination = False
|
||||||
|
|
||||||
context = {
|
context = {
|
||||||
'pending': all_pending,
|
"pending": all_pending,
|
||||||
'max_hits': max_hits,
|
"max_hits": max_hits,
|
||||||
'pagination': pagination,
|
"pagination": pagination,
|
||||||
'title': 'Downloads',
|
"title": "Downloads",
|
||||||
'colors': colors
|
"colors": colors,
|
||||||
}
|
}
|
||||||
return render(request, 'home/downloads.html', context)
|
return render(request, "home/downloads.html", context)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def build_data(pagination_handler):
|
def build_data(pagination_handler):
|
||||||
""" build data dict for search """
|
"""build data dict for search"""
|
||||||
page_size = pagination_handler.pagination['page_size']
|
page_size = pagination_handler.pagination["page_size"]
|
||||||
page_from = pagination_handler.pagination['page_from']
|
page_from = pagination_handler.pagination["page_from"]
|
||||||
data = {
|
data = {
|
||||||
"size": page_size, "from": page_from,
|
"size": page_size,
|
||||||
|
"from": page_from,
|
||||||
"query": {"term": {"status": {"value": "pending"}}},
|
"query": {"term": {"status": {"value": "pending"}}},
|
||||||
"sort": [{"timestamp": {"order": "desc"}}]
|
"sort": [{"timestamp": {"order": "desc"}}],
|
||||||
}
|
}
|
||||||
return data
|
return data
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def post(request):
|
def post(request):
|
||||||
""" handle post requests """
|
"""handle post requests"""
|
||||||
download_post = dict(request.POST)
|
download_post = dict(request.POST)
|
||||||
if 'vid-url' in download_post.keys():
|
if "vid-url" in download_post.keys():
|
||||||
url_str = download_post['vid-url']
|
url_str = download_post["vid-url"]
|
||||||
print('adding to queue')
|
print("adding to queue")
|
||||||
youtube_ids = process_url_list(url_str)
|
youtube_ids = process_url_list(url_str)
|
||||||
if not youtube_ids:
|
if not youtube_ids:
|
||||||
# failed to process
|
# failed to process
|
||||||
@ -194,52 +204,52 @@ class DownloadView(View):
|
|||||||
mess_dict = {
|
mess_dict = {
|
||||||
"status": "downloading",
|
"status": "downloading",
|
||||||
"level": "error",
|
"level": "error",
|
||||||
"title": 'Failed to extract links.',
|
"title": "Failed to extract links.",
|
||||||
"message": ''
|
"message": "",
|
||||||
}
|
}
|
||||||
set_message('progress:download', mess_dict)
|
set_message("progress:download", mess_dict)
|
||||||
return redirect('downloads')
|
return redirect("downloads")
|
||||||
|
|
||||||
print(youtube_ids)
|
print(youtube_ids)
|
||||||
extrac_dl.delay(youtube_ids)
|
extrac_dl.delay(youtube_ids)
|
||||||
|
|
||||||
sleep(2)
|
sleep(2)
|
||||||
return redirect('downloads', permanent=True)
|
return redirect("downloads", permanent=True)
|
||||||
|
|
||||||
|
|
||||||
class ChannelIdView(View):
|
class ChannelIdView(View):
|
||||||
""" resolves to /channel/<channel-id>/
|
"""resolves to /channel/<channel-id>/
|
||||||
display single channel page from channel_id
|
display single channel page from channel_id
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def get(self, request, channel_id_detail):
|
def get(self, request, channel_id_detail):
|
||||||
""" get method """
|
"""get method"""
|
||||||
es_url, colors = self.read_config()
|
es_url, colors = self.read_config()
|
||||||
context = self.get_channel_videos(request, channel_id_detail, es_url)
|
context = self.get_channel_videos(request, channel_id_detail, es_url)
|
||||||
context.update({'colors': colors})
|
context.update({"colors": colors})
|
||||||
return render(request, 'home/channel_id.html', context)
|
return render(request, "home/channel_id.html", context)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def read_config():
|
def read_config():
|
||||||
""" read config file """
|
"""read config file"""
|
||||||
config = AppConfig().config
|
config = AppConfig().config
|
||||||
es_url = config['application']['es_url']
|
es_url = config["application"]["es_url"]
|
||||||
colors = config['application']['colors']
|
colors = config["application"]["colors"]
|
||||||
return es_url, colors
|
return es_url, colors
|
||||||
|
|
||||||
def get_channel_videos(self, request, channel_id_detail, es_url):
|
def get_channel_videos(self, request, channel_id_detail, es_url):
|
||||||
""" get channel from video index """
|
"""get channel from video index"""
|
||||||
page_get = int(request.GET.get('page', 0))
|
page_get = int(request.GET.get("page", 0))
|
||||||
pagination_handler = Pagination(page_get)
|
pagination_handler = Pagination(page_get)
|
||||||
# get data
|
# get data
|
||||||
url = es_url + '/ta_video/_search'
|
url = es_url + "/ta_video/_search"
|
||||||
data = self.build_data(pagination_handler, channel_id_detail)
|
data = self.build_data(pagination_handler, channel_id_detail)
|
||||||
search = SearchHandler(url, data)
|
search = SearchHandler(url, data)
|
||||||
videos_hits = search.get_data()
|
videos_hits = search.get_data()
|
||||||
max_hits = search.max_hits
|
max_hits = search.max_hits
|
||||||
if max_hits:
|
if max_hits:
|
||||||
channel_info = videos_hits[0]['source']['channel']
|
channel_info = videos_hits[0]["source"]["channel"]
|
||||||
channel_name = channel_info['channel_name']
|
channel_name = channel_info["channel_name"]
|
||||||
pagination_handler.validate(max_hits)
|
pagination_handler.validate(max_hits)
|
||||||
pagination = pagination_handler.pagination
|
pagination = pagination_handler.pagination
|
||||||
else:
|
else:
|
||||||
@ -251,218 +261,223 @@ class ChannelIdView(View):
|
|||||||
pagination = False
|
pagination = False
|
||||||
|
|
||||||
context = {
|
context = {
|
||||||
'channel_info': channel_info,
|
"channel_info": channel_info,
|
||||||
'videos': videos_hits,
|
"videos": videos_hits,
|
||||||
'max_hits': max_hits,
|
"max_hits": max_hits,
|
||||||
'pagination': pagination,
|
"pagination": pagination,
|
||||||
'title': 'Channel: ' + channel_name,
|
"title": "Channel: " + channel_name,
|
||||||
}
|
}
|
||||||
|
|
||||||
return context
|
return context
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def build_data(pagination_handler, channel_id_detail):
|
def build_data(pagination_handler, channel_id_detail):
|
||||||
""" build data dict for search """
|
"""build data dict for search"""
|
||||||
page_size = pagination_handler.pagination['page_size']
|
page_size = pagination_handler.pagination["page_size"]
|
||||||
page_from = pagination_handler.pagination['page_from']
|
page_from = pagination_handler.pagination["page_from"]
|
||||||
data = {
|
data = {
|
||||||
"size": page_size, "from": page_from,
|
"size": page_size,
|
||||||
|
"from": page_from,
|
||||||
"query": {
|
"query": {
|
||||||
"term": {"channel.channel_id": {"value": channel_id_detail}}
|
"term": {"channel.channel_id": {"value": channel_id_detail}}
|
||||||
},
|
},
|
||||||
"sort": [
|
"sort": [
|
||||||
{"published": {"order": "desc"}},
|
{"published": {"order": "desc"}},
|
||||||
{"date_downloaded": {"order": "desc"}}
|
{"date_downloaded": {"order": "desc"}},
|
||||||
]
|
],
|
||||||
}
|
}
|
||||||
return data
|
return data
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_channel_info(channel_id_detail, es_url):
|
def get_channel_info(channel_id_detail, es_url):
|
||||||
""" get channel info from channel index if no videos """
|
"""get channel info from channel index if no videos"""
|
||||||
url = f'{es_url}/ta_channel/_doc/{channel_id_detail}'
|
url = f"{es_url}/ta_channel/_doc/{channel_id_detail}"
|
||||||
data = False
|
data = False
|
||||||
search = SearchHandler(url, data)
|
search = SearchHandler(url, data)
|
||||||
channel_data = search.get_data()
|
channel_data = search.get_data()
|
||||||
channel_info = channel_data[0]['source']
|
channel_info = channel_data[0]["source"]
|
||||||
channel_name = channel_info['channel_name']
|
channel_name = channel_info["channel_name"]
|
||||||
return channel_info, channel_name
|
return channel_info, channel_name
|
||||||
|
|
||||||
|
|
||||||
class ChannelView(View):
|
class ChannelView(View):
|
||||||
""" resolves to /channel/
|
"""resolves to /channel/
|
||||||
handle functionality for channel overview page, subscribe to channel,
|
handle functionality for channel overview page, subscribe to channel,
|
||||||
search as you type for channel name
|
search as you type for channel name
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def get(self, request):
|
def get(self, request):
|
||||||
""" handle http get requests """
|
"""handle http get requests"""
|
||||||
es_url, colors = self.read_config()
|
es_url, colors = self.read_config()
|
||||||
page_get = int(request.GET.get('page', 0))
|
page_get = int(request.GET.get("page", 0))
|
||||||
pagination_handler = Pagination(page_get)
|
pagination_handler = Pagination(page_get)
|
||||||
page_size = pagination_handler.pagination['page_size']
|
page_size = pagination_handler.pagination["page_size"]
|
||||||
page_from = pagination_handler.pagination['page_from']
|
page_from = pagination_handler.pagination["page_from"]
|
||||||
# get
|
# get
|
||||||
url = es_url + '/ta_channel/_search'
|
url = es_url + "/ta_channel/_search"
|
||||||
data = {
|
data = {
|
||||||
"size": page_size, "from": page_from, "query": {"match_all": {}},
|
"size": page_size,
|
||||||
"sort": [{"channel_name.keyword": {"order": "asc"}}]
|
"from": page_from,
|
||||||
|
"query": {"match_all": {}},
|
||||||
|
"sort": [{"channel_name.keyword": {"order": "asc"}}],
|
||||||
}
|
}
|
||||||
show_subed_only = get_message('show_subed_only')
|
show_subed_only = get_message("show_subed_only")
|
||||||
if show_subed_only:
|
if show_subed_only:
|
||||||
data['query'] = {"term": {"channel_subscribed": {"value": True}}}
|
data["query"] = {"term": {"channel_subscribed": {"value": True}}}
|
||||||
search = SearchHandler(url, data)
|
search = SearchHandler(url, data)
|
||||||
channel_hits = search.get_data()
|
channel_hits = search.get_data()
|
||||||
max_hits = search.max_hits
|
max_hits = search.max_hits
|
||||||
pagination_handler.validate(search.max_hits)
|
pagination_handler.validate(search.max_hits)
|
||||||
context = {
|
context = {
|
||||||
'channels': channel_hits,
|
"channels": channel_hits,
|
||||||
'max_hits': max_hits,
|
"max_hits": max_hits,
|
||||||
'pagination': pagination_handler.pagination,
|
"pagination": pagination_handler.pagination,
|
||||||
'show_subed_only': show_subed_only,
|
"show_subed_only": show_subed_only,
|
||||||
'title': 'Channels',
|
"title": "Channels",
|
||||||
'colors': colors
|
"colors": colors,
|
||||||
}
|
}
|
||||||
return render(request, 'home/channel.html', context)
|
return render(request, "home/channel.html", context)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def read_config():
|
def read_config():
|
||||||
""" read config file """
|
"""read config file"""
|
||||||
config = AppConfig().config
|
config = AppConfig().config
|
||||||
es_url = config['application']['es_url']
|
es_url = config["application"]["es_url"]
|
||||||
colors = config['application']['colors']
|
colors = config["application"]["colors"]
|
||||||
return es_url, colors
|
return es_url, colors
|
||||||
|
|
||||||
def post(self, request):
|
def post(self, request):
|
||||||
""" handle http post requests """
|
"""handle http post requests"""
|
||||||
subscriptions_post = dict(request.POST)
|
subscriptions_post = dict(request.POST)
|
||||||
print(subscriptions_post)
|
print(subscriptions_post)
|
||||||
subscriptions_post = dict(request.POST)
|
subscriptions_post = dict(request.POST)
|
||||||
if 'subscribe' in subscriptions_post.keys():
|
if "subscribe" in subscriptions_post.keys():
|
||||||
sub_str = subscriptions_post['subscribe']
|
sub_str = subscriptions_post["subscribe"]
|
||||||
try:
|
try:
|
||||||
youtube_ids = process_url_list(sub_str)
|
youtube_ids = process_url_list(sub_str)
|
||||||
self.subscribe_to(youtube_ids)
|
self.subscribe_to(youtube_ids)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
print('parsing subscribe ids failed!')
|
print("parsing subscribe ids failed!")
|
||||||
print(sub_str)
|
print(sub_str)
|
||||||
|
|
||||||
sleep(1)
|
sleep(1)
|
||||||
return redirect('channel', permanent=True)
|
return redirect("channel", permanent=True)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def subscribe_to(youtube_ids):
|
def subscribe_to(youtube_ids):
|
||||||
""" process the subscribe ids """
|
"""process the subscribe ids"""
|
||||||
for youtube_id in youtube_ids:
|
for youtube_id in youtube_ids:
|
||||||
if youtube_id['type'] == 'video':
|
if youtube_id["type"] == "video":
|
||||||
to_sub = youtube_id['url']
|
to_sub = youtube_id["url"]
|
||||||
vid_details = PendingList().get_youtube_details(to_sub)
|
vid_details = PendingList().get_youtube_details(to_sub)
|
||||||
channel_id_sub = vid_details['channel_id']
|
channel_id_sub = vid_details["channel_id"]
|
||||||
elif youtube_id['type'] == 'channel':
|
elif youtube_id["type"] == "channel":
|
||||||
channel_id_sub = youtube_id['url']
|
channel_id_sub = youtube_id["url"]
|
||||||
else:
|
else:
|
||||||
raise ValueError('failed to subscribe to: ' + youtube_id)
|
raise ValueError("failed to subscribe to: " + youtube_id)
|
||||||
|
|
||||||
ChannelSubscription().change_subscribe(
|
ChannelSubscription().change_subscribe(
|
||||||
channel_id_sub, channel_subscribed=True
|
channel_id_sub, channel_subscribed=True
|
||||||
)
|
)
|
||||||
print('subscribed to: ' + channel_id_sub)
|
print("subscribed to: " + channel_id_sub)
|
||||||
|
|
||||||
|
|
||||||
class VideoView(View):
|
class VideoView(View):
|
||||||
""" resolves to /video/<video-id>/
|
"""resolves to /video/<video-id>/
|
||||||
display details about a single video
|
display details about a single video
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def get(self, request, video_id):
|
def get(self, request, video_id):
|
||||||
""" get single video """
|
"""get single video"""
|
||||||
es_url, colors = self.read_config()
|
es_url, colors = self.read_config()
|
||||||
url = f'{es_url}/ta_video/_doc/{video_id}'
|
url = f"{es_url}/ta_video/_doc/{video_id}"
|
||||||
data = None
|
data = None
|
||||||
look_up = SearchHandler(url, data)
|
look_up = SearchHandler(url, data)
|
||||||
video_hit = look_up.get_data()
|
video_hit = look_up.get_data()
|
||||||
video_data = video_hit[0]['source']
|
video_data = video_hit[0]["source"]
|
||||||
video_title = video_data['title']
|
video_title = video_data["title"]
|
||||||
context = {
|
context = {"video": video_data, "title": video_title, "colors": colors}
|
||||||
'video': video_data,
|
return render(request, "home/video.html", context)
|
||||||
'title': video_title,
|
|
||||||
'colors': colors
|
|
||||||
}
|
|
||||||
return render(request, 'home/video.html', context)
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def read_config():
|
def read_config():
|
||||||
""" read config file """
|
"""read config file"""
|
||||||
config = AppConfig().config
|
config = AppConfig().config
|
||||||
es_url = config['application']['es_url']
|
es_url = config["application"]["es_url"]
|
||||||
colors = config['application']['colors']
|
colors = config["application"]["colors"]
|
||||||
return es_url, colors
|
return es_url, colors
|
||||||
|
|
||||||
|
|
||||||
class SettingsView(View):
|
class SettingsView(View):
|
||||||
""" resolves to /settings/
|
"""resolves to /settings/
|
||||||
handle the settings page, display current settings,
|
handle the settings page, display current settings,
|
||||||
take post request from the form to update settings
|
take post request from the form to update settings
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get(request):
|
def get(request):
|
||||||
""" read and display current settings """
|
"""read and display current settings"""
|
||||||
config = AppConfig().config
|
config = AppConfig().config
|
||||||
colors = config['application']['colors']
|
colors = config["application"]["colors"]
|
||||||
|
|
||||||
context = {
|
context = {"title": "Settings", "config": config, "colors": colors}
|
||||||
'title': 'Settings',
|
|
||||||
'config': config,
|
|
||||||
'colors': colors
|
|
||||||
}
|
|
||||||
|
|
||||||
return render(request, 'home/settings.html', context)
|
return render(request, "home/settings.html", context)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def post(request):
|
def post(request):
|
||||||
""" handle form post to update settings """
|
"""handle form post to update settings"""
|
||||||
form_post = dict(request.POST)
|
form_post = dict(request.POST)
|
||||||
del form_post['csrfmiddlewaretoken']
|
del form_post["csrfmiddlewaretoken"]
|
||||||
print(form_post)
|
print(form_post)
|
||||||
config_handler = AppConfig()
|
config_handler = AppConfig()
|
||||||
config_handler.update_config(form_post)
|
config_handler.update_config(form_post)
|
||||||
|
|
||||||
return redirect('settings', permanent=True)
|
return redirect("settings", permanent=True)
|
||||||
|
|
||||||
|
|
||||||
def progress(request):
|
def progress(request):
|
||||||
# pylint: disable=unused-argument
|
# pylint: disable=unused-argument
|
||||||
""" endpoint for download progress ajax calls """
|
"""endpoint for download progress ajax calls"""
|
||||||
config = AppConfig().config
|
config = AppConfig().config
|
||||||
cache_dir = config['application']['cache_dir']
|
cache_dir = config["application"]["cache_dir"]
|
||||||
json_data = get_dl_message(cache_dir)
|
json_data = get_dl_message(cache_dir)
|
||||||
return JsonResponse(json_data)
|
return JsonResponse(json_data)
|
||||||
|
|
||||||
|
|
||||||
def process(request):
|
def process(request):
|
||||||
""" handle all the buttons calls via POST ajax """
|
"""handle all the buttons calls via POST ajax"""
|
||||||
if request.method == 'POST':
|
if request.method == "POST":
|
||||||
post_dict = json.loads(request.body.decode())
|
post_dict = json.loads(request.body.decode())
|
||||||
post_handler = PostData(post_dict)
|
post_handler = PostData(post_dict)
|
||||||
if post_handler.to_do:
|
if post_handler.to_do:
|
||||||
task_result = post_handler.run_task()
|
task_result = post_handler.run_task()
|
||||||
return JsonResponse(task_result)
|
return JsonResponse(task_result)
|
||||||
|
|
||||||
return JsonResponse({'success': False})
|
return JsonResponse({"success": False})
|
||||||
|
|
||||||
|
|
||||||
class PostData:
|
class PostData:
|
||||||
""" generic post handler from process route """
|
"""generic post handler from process route"""
|
||||||
|
|
||||||
CONFIG = AppConfig().config
|
CONFIG = AppConfig().config
|
||||||
ES_URL = CONFIG['application']['es_url']
|
ES_URL = CONFIG["application"]["es_url"]
|
||||||
|
|
||||||
VALID_KEYS = [
|
VALID_KEYS = [
|
||||||
"watched", "rescan_pending", "ignore", "dl_pending",
|
"watched",
|
||||||
"unsubscribe", "sort_order", "hide_watched", "show_subed_only",
|
"rescan_pending",
|
||||||
"channel-search", "video-search", "dlnow", "manual-import",
|
"ignore",
|
||||||
"db-backup", "db-restore"
|
"dl_pending",
|
||||||
|
"unsubscribe",
|
||||||
|
"sort_order",
|
||||||
|
"hide_watched",
|
||||||
|
"show_subed_only",
|
||||||
|
"channel-search",
|
||||||
|
"video-search",
|
||||||
|
"dlnow",
|
||||||
|
"manual-import",
|
||||||
|
"db-backup",
|
||||||
|
"db-restore",
|
||||||
]
|
]
|
||||||
|
|
||||||
def __init__(self, post_dict):
|
def __init__(self, post_dict):
|
||||||
@ -470,81 +485,81 @@ class PostData:
|
|||||||
self.to_do = self.validate()
|
self.to_do = self.validate()
|
||||||
|
|
||||||
def validate(self):
|
def validate(self):
|
||||||
""" validate the post_dict """
|
"""validate the post_dict"""
|
||||||
to_do = []
|
to_do = []
|
||||||
for key, value in self.post_dict.items():
|
for key, value in self.post_dict.items():
|
||||||
if key in self.VALID_KEYS:
|
if key in self.VALID_KEYS:
|
||||||
task_item = {'task': key, 'status': value}
|
task_item = {"task": key, "status": value}
|
||||||
print(task_item)
|
print(task_item)
|
||||||
to_do.append(task_item)
|
to_do.append(task_item)
|
||||||
else:
|
else:
|
||||||
print(key + ' not a valid key')
|
print(key + " not a valid key")
|
||||||
|
|
||||||
return to_do
|
return to_do
|
||||||
|
|
||||||
def run_task(self):
|
def run_task(self):
|
||||||
""" run through the tasks to do """
|
"""run through the tasks to do"""
|
||||||
for item in self.to_do:
|
for item in self.to_do:
|
||||||
task = item['task']
|
task = item["task"]
|
||||||
if task == 'watched':
|
if task == "watched":
|
||||||
youtube_id = item['status']
|
youtube_id = item["status"]
|
||||||
self.parse_watched(youtube_id)
|
self.parse_watched(youtube_id)
|
||||||
elif task == 'rescan_pending':
|
elif task == "rescan_pending":
|
||||||
print('rescan subscribed channels')
|
print("rescan subscribed channels")
|
||||||
update_subscribed.delay()
|
update_subscribed.delay()
|
||||||
elif task == 'ignore':
|
elif task == "ignore":
|
||||||
print('ignore video')
|
print("ignore video")
|
||||||
handler = PendingList()
|
handler = PendingList()
|
||||||
ignore_list = item['status']
|
ignore_list = item["status"]
|
||||||
handler.ignore_from_pending([ignore_list])
|
handler.ignore_from_pending([ignore_list])
|
||||||
elif task == 'dl_pending':
|
elif task == "dl_pending":
|
||||||
print('download pending')
|
print("download pending")
|
||||||
download_pending.delay()
|
download_pending.delay()
|
||||||
elif task == 'unsubscribe':
|
elif task == "unsubscribe":
|
||||||
channel_id_unsub = item['status']
|
channel_id_unsub = item["status"]
|
||||||
print('unsubscribe from ' + channel_id_unsub)
|
print("unsubscribe from " + channel_id_unsub)
|
||||||
ChannelSubscription().change_subscribe(
|
ChannelSubscription().change_subscribe(
|
||||||
channel_id_unsub, channel_subscribed=False
|
channel_id_unsub, channel_subscribed=False
|
||||||
)
|
)
|
||||||
elif task == 'sort_order':
|
elif task == "sort_order":
|
||||||
sort_order = item['status']
|
sort_order = item["status"]
|
||||||
set_message('sort_order', sort_order, expire=False)
|
set_message("sort_order", sort_order, expire=False)
|
||||||
elif task == 'hide_watched':
|
elif task == "hide_watched":
|
||||||
hide_watched = bool(int(item['status']))
|
hide_watched = bool(int(item["status"]))
|
||||||
print(item['status'])
|
print(item["status"])
|
||||||
set_message('hide_watched', hide_watched, expire=False)
|
set_message("hide_watched", hide_watched, expire=False)
|
||||||
elif task == 'show_subed_only':
|
elif task == "show_subed_only":
|
||||||
show_subed_only = bool(int(item['status']))
|
show_subed_only = bool(int(item["status"]))
|
||||||
print(show_subed_only)
|
print(show_subed_only)
|
||||||
set_message('show_subed_only', show_subed_only, expire=False)
|
set_message("show_subed_only", show_subed_only, expire=False)
|
||||||
elif task == 'channel-search':
|
elif task == "channel-search":
|
||||||
search_query = item['status']
|
search_query = item["status"]
|
||||||
print('searching for: ' + search_query)
|
print("searching for: " + search_query)
|
||||||
search_results = self.search_channels(search_query)
|
search_results = self.search_channels(search_query)
|
||||||
return search_results
|
return search_results
|
||||||
elif task == 'video-search':
|
elif task == "video-search":
|
||||||
search_query = item['status']
|
search_query = item["status"]
|
||||||
print('searching for: ' + search_query)
|
print("searching for: " + search_query)
|
||||||
search_results = self.search_videos(search_query)
|
search_results = self.search_videos(search_query)
|
||||||
return search_results
|
return search_results
|
||||||
elif task == 'dlnow':
|
elif task == "dlnow":
|
||||||
youtube_id = item['status']
|
youtube_id = item["status"]
|
||||||
print('downloading: ' + youtube_id)
|
print("downloading: " + youtube_id)
|
||||||
download_single.delay(youtube_id=youtube_id)
|
download_single.delay(youtube_id=youtube_id)
|
||||||
elif task == 'manual-import':
|
elif task == "manual-import":
|
||||||
print('starting manual import')
|
print("starting manual import")
|
||||||
run_manual_import.delay()
|
run_manual_import.delay()
|
||||||
elif task == 'db-backup':
|
elif task == "db-backup":
|
||||||
print('backing up database')
|
print("backing up database")
|
||||||
run_backup.delay()
|
run_backup.delay()
|
||||||
elif task == 'db-restore':
|
elif task == "db-restore":
|
||||||
print('restoring index from backup zip')
|
print("restoring index from backup zip")
|
||||||
run_restore_backup.delay()
|
run_restore_backup.delay()
|
||||||
return {'success': True}
|
return {"success": True}
|
||||||
|
|
||||||
def search_channels(self, search_query):
|
def search_channels(self, search_query):
|
||||||
""" fancy searching channels as you type """
|
"""fancy searching channels as you type"""
|
||||||
url = self.ES_URL + '/ta_channel/_search'
|
url = self.ES_URL + "/ta_channel/_search"
|
||||||
data = {
|
data = {
|
||||||
"size": 10,
|
"size": 10,
|
||||||
"query": {
|
"query": {
|
||||||
@ -554,18 +569,18 @@ class PostData:
|
|||||||
"fields": [
|
"fields": [
|
||||||
"channel_name.search_as_you_type",
|
"channel_name.search_as_you_type",
|
||||||
"channel_name._2gram",
|
"channel_name._2gram",
|
||||||
"channel_name._3gram"
|
"channel_name._3gram",
|
||||||
]
|
],
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
look_up = SearchHandler(url, data, cache=False)
|
look_up = SearchHandler(url, data, cache=False)
|
||||||
search_results = look_up.get_data()
|
search_results = look_up.get_data()
|
||||||
return {'results': search_results}
|
return {"results": search_results}
|
||||||
|
|
||||||
def search_videos(self, search_query):
|
def search_videos(self, search_query):
|
||||||
""" fancy searching videos as you type """
|
"""fancy searching videos as you type"""
|
||||||
url = self.ES_URL + '/ta_video/_search'
|
url = self.ES_URL + "/ta_video/_search"
|
||||||
data = {
|
data = {
|
||||||
"size": 10,
|
"size": 10,
|
||||||
"query": {
|
"query": {
|
||||||
@ -575,51 +590,51 @@ class PostData:
|
|||||||
"fields": [
|
"fields": [
|
||||||
"title.search_as_you_type",
|
"title.search_as_you_type",
|
||||||
"title._2gram",
|
"title._2gram",
|
||||||
"title._3gram"
|
"title._3gram",
|
||||||
]
|
],
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
look_up = SearchHandler(url, data, cache=False)
|
look_up = SearchHandler(url, data, cache=False)
|
||||||
search_results = look_up.get_data()
|
search_results = look_up.get_data()
|
||||||
return {'results': search_results}
|
return {"results": search_results}
|
||||||
|
|
||||||
def parse_watched(self, youtube_id):
|
def parse_watched(self, youtube_id):
|
||||||
""" marked as watched based on id type """
|
"""marked as watched based on id type"""
|
||||||
es_url = self.ES_URL
|
es_url = self.ES_URL
|
||||||
id_type = process_url_list([youtube_id])[0]['type']
|
id_type = process_url_list([youtube_id])[0]["type"]
|
||||||
stamp = int(datetime.now().strftime("%s"))
|
stamp = int(datetime.now().strftime("%s"))
|
||||||
if id_type == 'video':
|
if id_type == "video":
|
||||||
stamp = int(datetime.now().strftime("%s"))
|
stamp = int(datetime.now().strftime("%s"))
|
||||||
url = self.ES_URL + '/ta_video/_update/' + youtube_id
|
url = self.ES_URL + "/ta_video/_update/" + youtube_id
|
||||||
source = {
|
source = {
|
||||||
"doc": {"player": {"watched": True, "watched_date": stamp}}
|
"doc": {"player": {"watched": True, "watched_date": stamp}}
|
||||||
}
|
}
|
||||||
request = requests.post(url, json=source)
|
request = requests.post(url, json=source)
|
||||||
if not request.ok:
|
if not request.ok:
|
||||||
print(request.text)
|
print(request.text)
|
||||||
elif id_type == 'channel':
|
elif id_type == "channel":
|
||||||
headers = {'Content-type': 'application/json'}
|
headers = {"Content-type": "application/json"}
|
||||||
data = {
|
data = {
|
||||||
"description": youtube_id,
|
"description": youtube_id,
|
||||||
"processors": [
|
"processors": [
|
||||||
{"set": {"field": "player.watched", "value": True}},
|
{"set": {"field": "player.watched", "value": True}},
|
||||||
{"set": {"field": "player.watched_date", "value": stamp}}
|
{"set": {"field": "player.watched_date", "value": stamp}},
|
||||||
]
|
],
|
||||||
}
|
}
|
||||||
payload = json.dumps(data)
|
payload = json.dumps(data)
|
||||||
url = es_url + '/_ingest/pipeline/' + youtube_id
|
url = es_url + "/_ingest/pipeline/" + youtube_id
|
||||||
request = requests.put(url, data=payload, headers=headers)
|
request = requests.put(url, data=payload, headers=headers)
|
||||||
if not request.ok:
|
if not request.ok:
|
||||||
print(request.text)
|
print(request.text)
|
||||||
# apply pipeline
|
# apply pipeline
|
||||||
must_list = [
|
must_list = [
|
||||||
{"term": {"channel.channel_id": {"value": youtube_id}}},
|
{"term": {"channel.channel_id": {"value": youtube_id}}},
|
||||||
{"term": {"player.watched": {"value": False}}}
|
{"term": {"player.watched": {"value": False}}},
|
||||||
]
|
]
|
||||||
data = {"query": {"bool": {"must": must_list}}}
|
data = {"query": {"bool": {"must": must_list}}}
|
||||||
payload = json.dumps(data)
|
payload = json.dumps(data)
|
||||||
url = f'{es_url}/ta_video/_update_by_query?pipeline={youtube_id}'
|
url = f"{es_url}/ta_video/_update_by_query?pipeline={youtube_id}"
|
||||||
request = requests.post(url, data=payload, headers=headers)
|
request = requests.post(url, data=payload, headers=headers)
|
||||||
if not request.ok:
|
if not request.ok:
|
||||||
print(request.text)
|
print(request.text)
|
||||||
|
@ -7,7 +7,7 @@ import sys
|
|||||||
def main():
|
def main():
|
||||||
# pylint: disable=import-outside-toplevel
|
# pylint: disable=import-outside-toplevel
|
||||||
"""Run administrative tasks."""
|
"""Run administrative tasks."""
|
||||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings')
|
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings")
|
||||||
try:
|
try:
|
||||||
from django.core.management import execute_from_command_line
|
from django.core.management import execute_from_command_line
|
||||||
except ImportError as exc:
|
except ImportError as exc:
|
||||||
@ -19,5 +19,5 @@ def main():
|
|||||||
execute_from_command_line(sys.argv)
|
execute_from_command_line(sys.argv)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
@ -8,10 +8,10 @@ import requests
|
|||||||
|
|
||||||
|
|
||||||
class Requirements:
|
class Requirements:
|
||||||
""" handle requirements.txt """
|
"""handle requirements.txt"""
|
||||||
|
|
||||||
FILE_PATH = 'tubearchivist/requirements.txt'
|
FILE_PATH = "tubearchivist/requirements.txt"
|
||||||
LOCK = '/tmp/tubearchivist-requirements.lock'
|
LOCK = "/tmp/tubearchivist-requirements.lock"
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.exists = self.checked_today()
|
self.exists = self.checked_today()
|
||||||
@ -19,24 +19,24 @@ class Requirements:
|
|||||||
self.all_updates = False
|
self.all_updates = False
|
||||||
|
|
||||||
def checked_today(self):
|
def checked_today(self):
|
||||||
""" skip requirements check when lock file exists """
|
"""skip requirements check when lock file exists"""
|
||||||
exists = pathlib.Path(self.LOCK).exists()
|
exists = pathlib.Path(self.LOCK).exists()
|
||||||
return exists
|
return exists
|
||||||
|
|
||||||
def look_for_updates(self):
|
def look_for_updates(self):
|
||||||
""" look through requirements and check for updates """
|
"""look through requirements and check for updates"""
|
||||||
self.all_requirements = self.get_dependencies()
|
self.all_requirements = self.get_dependencies()
|
||||||
self.all_updates = self.check_packages()
|
self.all_updates = self.check_packages()
|
||||||
|
|
||||||
def get_dependencies(self):
|
def get_dependencies(self):
|
||||||
""" read out requirements.txt """
|
"""read out requirements.txt"""
|
||||||
|
|
||||||
all_requirements = []
|
all_requirements = []
|
||||||
with open(self.FILE_PATH, 'r', encoding='utf-8') as f:
|
with open(self.FILE_PATH, "r", encoding="utf-8") as f:
|
||||||
dependencies = f.readlines()
|
dependencies = f.readlines()
|
||||||
|
|
||||||
for dependency in dependencies:
|
for dependency in dependencies:
|
||||||
package, version = dependency.split('==')
|
package, version = dependency.split("==")
|
||||||
all_requirements.append((package, version.strip()))
|
all_requirements.append((package, version.strip()))
|
||||||
|
|
||||||
all_requirements.sort(key=lambda x: x[0].lower())
|
all_requirements.sort(key=lambda x: x[0].lower())
|
||||||
@ -44,33 +44,32 @@ class Requirements:
|
|||||||
return all_requirements
|
return all_requirements
|
||||||
|
|
||||||
def check_packages(self):
|
def check_packages(self):
|
||||||
""" compare installed with remote version """
|
"""compare installed with remote version"""
|
||||||
|
|
||||||
total = len(self.all_requirements)
|
total = len(self.all_requirements)
|
||||||
print(f'checking versions for {total} packages...')
|
print(f"checking versions for {total} packages...")
|
||||||
|
|
||||||
all_updates = {}
|
all_updates = {}
|
||||||
|
|
||||||
for dependency in self.all_requirements:
|
for dependency in self.all_requirements:
|
||||||
package, version_installed = dependency
|
package, version_installed = dependency
|
||||||
url = f'https://pypi.org/pypi/{package}/json'
|
url = f"https://pypi.org/pypi/{package}/json"
|
||||||
response = requests.get(url).json()
|
response = requests.get(url).json()
|
||||||
version_remote = response['info']['version']
|
version_remote = response["info"]["version"]
|
||||||
homepage = response['info']['home_page']
|
homepage = response["info"]["home_page"]
|
||||||
if version_remote != version_installed:
|
if version_remote != version_installed:
|
||||||
to_update = {
|
to_update = {
|
||||||
package: {
|
package: {"from": version_installed, "to": version_remote}
|
||||||
"from": version_installed,
|
|
||||||
"to": version_remote
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
all_updates.update(to_update)
|
all_updates.update(to_update)
|
||||||
message = (f'update {package} {version_installed}' +
|
message = (
|
||||||
f'==> {version_remote}\n {homepage}')
|
f"update {package} {version_installed}"
|
||||||
|
+ f"==> {version_remote}\n {homepage}"
|
||||||
|
)
|
||||||
print(message)
|
print(message)
|
||||||
|
|
||||||
if not all_updates:
|
if not all_updates:
|
||||||
print('no updates found')
|
print("no updates found")
|
||||||
|
|
||||||
# remember that
|
# remember that
|
||||||
pathlib.Path(self.LOCK).touch()
|
pathlib.Path(self.LOCK).touch()
|
||||||
@ -78,7 +77,7 @@ class Requirements:
|
|||||||
return all_updates
|
return all_updates
|
||||||
|
|
||||||
def apply_updates(self):
|
def apply_updates(self):
|
||||||
""" update requirements.txt file with new versions """
|
"""update requirements.txt file with new versions"""
|
||||||
|
|
||||||
to_write = []
|
to_write = []
|
||||||
|
|
||||||
@ -86,31 +85,31 @@ class Requirements:
|
|||||||
package, old_version = requirement
|
package, old_version = requirement
|
||||||
|
|
||||||
if package in self.all_updates.keys():
|
if package in self.all_updates.keys():
|
||||||
package_version = self.all_updates[package]['to']
|
package_version = self.all_updates[package]["to"]
|
||||||
else:
|
else:
|
||||||
package_version = old_version
|
package_version = old_version
|
||||||
|
|
||||||
to_write.append(f'{package}=={package_version}\n')
|
to_write.append(f"{package}=={package_version}\n")
|
||||||
|
|
||||||
with open(self.FILE_PATH, 'w', encoding='utf-8') as f:
|
with open(self.FILE_PATH, "w", encoding="utf-8") as f:
|
||||||
f.writelines(to_write)
|
f.writelines(to_write)
|
||||||
|
|
||||||
print('requirements.txt updates')
|
print("requirements.txt updates")
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
""" main to check for updates """
|
"""main to check for updates"""
|
||||||
handler = Requirements()
|
handler = Requirements()
|
||||||
if handler.exists:
|
if handler.exists:
|
||||||
return
|
return
|
||||||
|
|
||||||
handler.look_for_updates()
|
handler.look_for_updates()
|
||||||
if handler.all_updates:
|
if handler.all_updates:
|
||||||
input_response = input('\nupdate requirements.txt? [y/n] ')
|
input_response = input("\nupdate requirements.txt? [y/n] ")
|
||||||
if input_response == 'y':
|
if input_response == "y":
|
||||||
handler.apply_updates()
|
handler.apply_updates()
|
||||||
else:
|
else:
|
||||||
print('cancle update...')
|
print("cancel update...")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user