diff --git a/Dockerfile b/Dockerfile index 0ea95d4..07b5cc5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -34,7 +34,7 @@ VOLUME /youtube # start WORKDIR /app -EXPOSE 80 +EXPOSE 8000 RUN chmod +x ./run.sh diff --git a/README.md b/README.md index 1cb13fc..b3d10cd 100644 --- a/README.md +++ b/README.md @@ -82,7 +82,8 @@ Detect the YouTube ID from filename, this accepts the default yt-dlp naming conv ## Potential pitfalls -**Elastic Search** in Docker requires the kernel setting of the host machine `vm.max_map_count` to be set to least 262144. +### vm.max_map_count +**Elastic Search** in Docker requires the kernel setting of the host machine `vm.max_map_count` to be set to at least 262144. To temporary set the value run: ``` @@ -94,6 +95,13 @@ To apply the change permanently depends on your host operating system: - On Arch based systems create a file */etc/sysctl.d/max_map_count.conf* with the content `vm.max_map_count = 262144`. - On any other platform look up in the documentation on how to pass kernel parameters. +### Permissions for elasticsearch +If you see a message similar to `AccessDeniedException[/usr/share/elasticsearch/data/nodes]` when initially starting elasticsearch, that means the container is not allowed to write files to the volume. +That's most likely the case when you run `docker-compose` as an unprivileged user. To fix that issue, shutdown the container and on your host machine run: +``` +chown 1000:0 /path/to/mount/point +``` +This will match the permissions with the **UID** and **GID** of elasticsearch within the container and should fix the issue. ## Roadmap This should be considered as a **minimal viable product**, there is an extensive list of future functions and improvements planned. diff --git a/run.sh b/run.sh index 14a3663..e1eb4b7 100644 --- a/run.sh +++ b/run.sh @@ -14,7 +14,7 @@ until curl "$ES_URL" -fs; do done python manage.py migrate -python manage.py collectstatic +python manage.py collectstatic --noinput -c nginx & celery -A home.tasks worker --loglevel=INFO & uwsgi --ini uwsgi.ini diff --git a/tubearchivist/home/__init__.py b/tubearchivist/home/__init__.py index 8eb7ad5..d1e3e4e 100644 --- a/tubearchivist/home/__init__.py +++ b/tubearchivist/home/__init__.py @@ -22,8 +22,8 @@ def sync_redis_state(): def make_folders(): - """ make needed folders here to avoid letting docker messing it up """ - folders = ['download', 'channels', 'videos', 'import'] + """ make needed cache folders here so docker doesn't mess it up """ + folders = ['download', 'channels', 'videos', 'import', 'backup'] config = AppConfig().config cache_dir = config['application']['cache_dir'] for folder in folders: diff --git a/tubearchivist/home/src/index_management.py b/tubearchivist/home/src/index_management.py index 819ecce..533e379 100644 --- a/tubearchivist/home/src/index_management.py +++ b/tubearchivist/home/src/index_management.py @@ -8,6 +8,7 @@ Functionality: import json import os +import zipfile from datetime import datetime @@ -375,6 +376,7 @@ class ElasticBackup: self.config = AppConfig().config self.index_config = index_config self.timestamp = datetime.now().strftime('%Y%m%d') + self.backup_files = [] def get_all_documents(self, index_name): """ export all documents of a single index """ @@ -389,7 +391,7 @@ class ElasticBackup: data = { "query": {"match_all": {}}, "size": 100, "pit": {"id": pit_id, "keep_alive": "1m"}, - "sort": [ {"_id": {"order": "asc"}} ] + "sort": [{"_id": {"order": "asc"}}] } query_str = json.dumps(data) url = es_url + '/_search' @@ -422,7 +424,7 @@ class ElasticBackup: for document in all_results: document_id = document['_id'] es_index = document['_index'] - action = { "index" : { "_index": es_index, "_id": document_id } } + action = {"index": {"_index": es_index, "_id": document_id}} source = document['_source'] bulk_list.append(json.dumps(action)) bulk_list.append(json.dumps(source)) @@ -433,14 +435,44 @@ class ElasticBackup: return file_content - def write_json_file(self, file_content, index_name): - """ write json file to disk """ + def write_es_json(self, file_content, index_name): + """ write nd json file for es _bulk API to disk """ cache_dir = self.config['application']['cache_dir'] - file_name = f'ta_{index_name}-{self.timestamp}.json' - file_path = os.path.join(cache_dir, file_name) + file_name = f'es_{index_name}-{self.timestamp}.json' + file_path = os.path.join(cache_dir, 'backup', file_name) with open(file_path, 'w', encoding='utf-8') as f: f.write(file_content) + self.backup_files.append(file_path) + + def write_ta_json(self, all_results, index_name): + """ write generic json file to disk """ + cache_dir = self.config['application']['cache_dir'] + file_name = f'ta_{index_name}-{self.timestamp}.json' + file_path = os.path.join(cache_dir, 'backup', file_name) + to_write = [i['_source'] for i in all_results] + file_content = json.dumps(to_write) + with open(file_path, 'w', encoding='utf-8') as f: + f.write(file_content) + + self.backup_files.append(file_path) + + def zip_it(self): + """ pack it up into single zip file """ + cache_dir = self.config['application']['cache_dir'] + file_name = f'ta_backup-{self.timestamp}.zip' + backup_file = os.path.join(cache_dir, 'backup', file_name) + + with zipfile.ZipFile( + backup_file, 'w', compression=zipfile.ZIP_DEFLATED + ) as zip_f: + for backup_file in self.backup_files: + zip_f.write(backup_file) + + # cleanup + for backup_file in self.backup_files: + os.remove(backup_file) + def post_bulk_restore(self, file_name): """ send bulk to es """ cache_dir = self.config['application']['cache_dir'] @@ -475,7 +507,10 @@ def backup_all_indexes(): index_name = index['index_name'] all_results = backup_handler.get_all_documents(index_name) file_content = backup_handler.build_bulk(all_results) - backup_handler.write_json_file(file_content, index_name) + backup_handler.write_es_json(file_content, index_name) + backup_handler.write_ta_json(all_results, index_name) + + backup_handler.zip_it() def restore_from_backup(): diff --git a/tubearchivist/home/tasks.py b/tubearchivist/home/tasks.py index b24751c..ed6a6f3 100644 --- a/tubearchivist/home/tasks.py +++ b/tubearchivist/home/tasks.py @@ -15,6 +15,7 @@ from home.src.download import ( ) from home.src.config import AppConfig from home.src.reindex import reindex_old_documents, ManualImport +from home.src.index_management import backup_all_indexes from home.src.helper import get_lock @@ -54,8 +55,7 @@ def download_pending(): @shared_task def download_single(youtube_id): """ start download single video now """ - to_download = [youtube_id] - download_handler = VideoDownloader(to_download) + download_handler = VideoDownloader([youtube_id]) download_handler.download_list() @@ -93,3 +93,9 @@ def run_manual_import(): finally: if have_lock: my_lock.release() + +@shared_task +def run_backup(): + """ called from settings page, dump backup to zip file """ + backup_all_indexes() + print('backup finished') diff --git a/tubearchivist/home/templates/home/downloads.html b/tubearchivist/home/templates/home/downloads.html index db6621e..cd96bfb 100644 --- a/tubearchivist/home/templates/home/downloads.html +++ b/tubearchivist/home/templates/home/downloads.html @@ -29,7 +29,7 @@

Download queue

{% if pending %} -

Total pending downloads: {{ pending|length }}

+

Total pending downloads: {{ max_hits }}

{% for video in pending %}
@@ -44,7 +44,7 @@ {% endif %}

Published: {{ video.published }} | Duration: {{ video.duration }} | {{ video.youtube_id }}

- +
{% endfor %} diff --git a/tubearchivist/home/templates/home/settings.html b/tubearchivist/home/templates/home/settings.html index 2fddf3d..79c11d1 100644 --- a/tubearchivist/home/templates/home/settings.html +++ b/tubearchivist/home/templates/home/settings.html @@ -114,17 +114,24 @@

Manual media files import.

-

Add files to the cache/import folder. Make sure to follow the instructions on Github.

+

Add files to the cache/import folder. Make sure to follow the instructions on Github.

-

Rescan filesystem.

+

Backup database

+

Export your database to a zip file stored at cache/backup.

+
+ +
+
+
+

Restore from backup.

Coming soon
-

Backup database.

+

Rescan filesystem.

Coming soon
{% endblock content %} \ No newline at end of file diff --git a/tubearchivist/home/views.py b/tubearchivist/home/views.py index 5768c83..e00ddbf 100644 --- a/tubearchivist/home/views.py +++ b/tubearchivist/home/views.py @@ -31,7 +31,8 @@ from home.tasks import ( download_pending, extrac_dl, download_single, - run_manual_import + run_manual_import, + run_backup ) @@ -147,20 +148,50 @@ class DownloadView(View): takes POST for downloading youtube links """ - @staticmethod - def get(request): + def get(self, request): """ handle get requests """ config = AppConfig().config colors = config['application']['colors'] - pending_handler = PendingList() - all_pending, _ = pending_handler.get_all_pending() + + page_get = int(request.GET.get('page', 0)) + pagination_handler = Pagination(page_get) + + url = config['application']['es_url'] + '/ta_download/_search' + data = self.build_data(pagination_handler) + search = SearchHandler(url, data, cache=False) + + videos_hits = search.get_data() + max_hits = search.max_hits + + if videos_hits: + all_pending = [i['source'] for i in videos_hits] + pagination_handler.validate(max_hits) + pagination = pagination_handler.pagination + else: + all_pending = False + pagination = False + context = { 'pending': all_pending, + 'max_hits': max_hits, + 'pagination': pagination, 'title': 'Downloads', 'colors': colors } return render(request, 'home/downloads.html', context) + @staticmethod + def build_data(pagination_handler): + """ build data dict for search """ + page_size = pagination_handler.pagination['page_size'] + page_from = pagination_handler.pagination['page_from'] + data = { + "size": page_size, "from": page_from, + "query": {"term": {"status": {"value": "pending"}}}, + "sort": [{"timestamp": {"order": "desc"}}] + } + return data + @staticmethod def post(request): """ handle post requests """ @@ -442,7 +473,8 @@ class PostData: VALID_KEYS = [ "watched", "rescan_pending", "ignore", "dl_pending", "unsubscribe", "sort_order", "hide_watched", "show_subed_only", - "channel-search", "video-search", "dlnow", "manual-import" + "channel-search", "video-search", "dlnow", "manual-import", + "db-backup" ] def __init__(self, post_dict): @@ -510,10 +542,13 @@ class PostData: elif task == 'dlnow': youtube_id = item['status'] print('downloading: ' + youtube_id) - download_single(youtube_id) + download_single.delay(youtube_id=youtube_id) elif task == 'manual-import': print('starting manual import') run_manual_import.delay() + elif task == 'db-backup': + print('backing up database') + run_backup.delay() return {'success': True} def search_channels(self, search_query): diff --git a/tubearchivist/static/script.js b/tubearchivist/static/script.js index 05a9816..94b5172 100644 --- a/tubearchivist/static/script.js +++ b/tubearchivist/static/script.js @@ -72,8 +72,8 @@ function toIgnore(button) { function downloadNow(button) { var youtube_id = button.getAttribute('data-id'); var payload = JSON.stringify({'dlnow': youtube_id}); - animate('download-icon', 'bounce-img'); sendPost(payload); + document.getElementById(youtube_id).remove(); setTimeout(function(){ handleInterval(); }, 500); @@ -91,6 +91,17 @@ function manualImport() { toReplace.appendChild(message); } +function dbBackup() { + var payload = JSON.stringify({'db-backup': true}); + sendPost(payload) + // clear button + var message = document.createElement('p'); + message.innerText = 'backing up archive'; + var toReplace = document.getElementById('db-backup'); + toReplace.innerHTML = ''; + toReplace.appendChild(message); +} + // player function createPlayer(button) { var mediaUrl = button.getAttribute('data-src');