Compare commits
No commits in common. "master" and "v0.2.3" have entirely different histories.
@ -17,5 +17,8 @@ venv/
|
||||
# Unneeded graphics
|
||||
assets/*
|
||||
|
||||
# Unneeded docs
|
||||
docs/*
|
||||
|
||||
# for local testing only
|
||||
testing.sh
|
17
.eslintrc.js
@ -1,17 +0,0 @@
|
||||
'use strict';
|
||||
module.exports = {
|
||||
extends: ['eslint:recommended', 'eslint-config-prettier'],
|
||||
parserOptions: {
|
||||
ecmaVersion: 2020,
|
||||
},
|
||||
env: {
|
||||
browser: true,
|
||||
},
|
||||
rules: {
|
||||
strict: ['error', 'global'],
|
||||
'no-unused-vars': ['error', { vars: 'local' }],
|
||||
eqeqeq: ['error', 'always', { null: 'ignore' }],
|
||||
curly: ['error', 'multi-line'],
|
||||
'no-var': 'error',
|
||||
},
|
||||
};
|
6
.github/ISSUE_TEMPLATE/BUG-REPORT.yml
vendored
@ -6,17 +6,15 @@ body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for taking the time to help improve this project! Please read the [how to open an issue](https://github.com/tubearchivist/tubearchivist/blob/master/CONTRIBUTING.md#how-to-open-an-issue) guide carefully before continuing.
|
||||
Thanks for taking the time to help improve this project!
|
||||
|
||||
- type: checkboxes
|
||||
id: latest
|
||||
attributes:
|
||||
label: "I've read the documentation"
|
||||
label: Latest and Greatest
|
||||
options:
|
||||
- label: I'm running the latest version of Tube Archivist and have read the [release notes](https://github.com/tubearchivist/tubearchivist/releases/latest).
|
||||
required: true
|
||||
- label: I have read the [how to open an issue](https://github.com/tubearchivist/tubearchivist/blob/master/CONTRIBUTING.md#how-to-open-an-issue) guide, particularly the [bug report](https://github.com/tubearchivist/tubearchivist/blob/master/CONTRIBUTING.md#bug-report) section.
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: os
|
||||
|
35
.github/ISSUE_TEMPLATE/FEATURE-REQUEST.yml
vendored
@ -1,12 +1,37 @@
|
||||
name: Feature Request
|
||||
description: This Project currently doesn't take any new feature requests.
|
||||
description: Create a new feature request
|
||||
title: "[Feature Request]: "
|
||||
|
||||
body:
|
||||
- type: checkboxes
|
||||
id: block
|
||||
- type: markdown
|
||||
attributes:
|
||||
label: "This project doesn't accept any new feature requests for the forseeable future. There is no shortage of ideas and the next development steps are clear for years to come."
|
||||
value: |
|
||||
Thanks for taking the time to help improve this project!
|
||||
|
||||
- type: checkboxes
|
||||
id: already
|
||||
attributes:
|
||||
label: Already implemented?
|
||||
options:
|
||||
- label: I understand that this issue will be closed without comment.
|
||||
- label: I have read through the [wiki](https://github.com/tubearchivist/tubearchivist/wiki).
|
||||
required: true
|
||||
- label: I understand the [scope](https://github.com/tubearchivist/tubearchivist/wiki/FAQ) of this project and am aware of the [known limitations](https://github.com/tubearchivist/tubearchivist#known-limitations) and my idea is not already on the [roadmap](https://github.com/tubearchivist/tubearchivist#roadmap).
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: description
|
||||
attributes:
|
||||
label: Your Feature Request
|
||||
value: "## Is your feature request related to a problem? Please describe.\n\n## Describe the solution you'd like\n\n## Additional context"
|
||||
placeholder: Tell us what you see!
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: checkboxes
|
||||
id: help
|
||||
attributes:
|
||||
label: Your help is needed!
|
||||
description: This project is ambitious as it is, please contribute.
|
||||
options:
|
||||
- label: Yes I can help with this feature request!
|
||||
required: false
|
||||
|
23
.github/ISSUE_TEMPLATE/FRONTEND-MIGRATION.yml
vendored
@ -1,23 +0,0 @@
|
||||
name: Frontend Migration
|
||||
description: Tracking our new React based frontend
|
||||
title: "[Frontend Migration]: "
|
||||
labels: ["react migration"]
|
||||
|
||||
body:
|
||||
- type: dropdown
|
||||
id: domain
|
||||
attributes:
|
||||
label: Domain
|
||||
options:
|
||||
- Frontend
|
||||
- Backend
|
||||
- Combined
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: description
|
||||
attributes:
|
||||
label: Description
|
||||
placeholder: Organizing our React frontend migration
|
||||
validations:
|
||||
required: true
|
6
.github/ISSUE_TEMPLATE/INSTALLATION-HELP.yml
vendored
@ -13,7 +13,9 @@ body:
|
||||
attributes:
|
||||
label: Installation instructions
|
||||
options:
|
||||
- label: I have read the [how to open an issue](https://github.com/tubearchivist/tubearchivist/blob/master/CONTRIBUTING.md#how-to-open-an-issue) guide, particularly the [installation help](https://github.com/tubearchivist/tubearchivist/blob/master/CONTRIBUTING.md#installation-help) section.
|
||||
- label: I have read and understand the [installation instructions](https://github.com/tubearchivist/tubearchivist#installing-and-updating).
|
||||
required: true
|
||||
- label: My issue is not described in the [potential pitfalls](https://github.com/tubearchivist/tubearchivist#potential-pitfalls) section.
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
@ -38,6 +40,6 @@ body:
|
||||
attributes:
|
||||
label: Relevant log output
|
||||
description: Please copy and paste any relevant Docker logs. This will be automatically formatted into code, so no need for backticks.
|
||||
render: Shell
|
||||
render: shell
|
||||
validations:
|
||||
required: true
|
||||
|
3
.github/pull_request_template.md
vendored
@ -1,3 +0,0 @@
|
||||
Thank you for taking the time to improve this project. Please take a look at the [How to make a Pull Request](https://github.com/tubearchivist/tubearchivist/blob/master/CONTRIBUTING.md#how-to-make-a-pull-request) section to help get your contribution merged.
|
||||
|
||||
You can delete this text before submitting.
|
22
.github/workflows/lint_js.yml
vendored
@ -1,22 +0,0 @@
|
||||
name: lint_js
|
||||
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- '**/*.js'
|
||||
pull_request:
|
||||
paths:
|
||||
- '**/*.js'
|
||||
|
||||
jobs:
|
||||
check:
|
||||
name: lint_js
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: '22'
|
||||
- run: npm ci
|
||||
- run: npm run lint
|
||||
- run: npm run format -- --check
|
54
.github/workflows/lint_python.yml
vendored
@ -1,42 +1,22 @@
|
||||
name: lint_python
|
||||
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- '**/*.py'
|
||||
pull_request:
|
||||
paths:
|
||||
- '**/*.py'
|
||||
|
||||
on: [pull_request, push]
|
||||
jobs:
|
||||
lint_python:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y gcc libldap2-dev libsasl2-dev libssl-dev
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Cache pip
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cache/pip
|
||||
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-pip-
|
||||
|
||||
- name: Install python dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r tubearchivist/requirements-dev.txt
|
||||
|
||||
- name: Run Linter
|
||||
run: ./deploy.sh validate
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/setup-python@v2
|
||||
- run: pip install --upgrade pip wheel
|
||||
- run: pip install bandit black codespell flake8 flake8-bugbear
|
||||
flake8-comprehensions isort
|
||||
- run: black --check --diff --line-length 79 .
|
||||
- run: codespell
|
||||
- run: flake8 . --count --max-complexity=10 --max-line-length=79
|
||||
--show-source --statistics
|
||||
- run: isort --check-only --line-length 79 --profile black .
|
||||
# - run: pip install -r tubearchivist/requirements.txt
|
||||
# - run: mkdir --parents --verbose .mypy_cache
|
||||
# - run: mypy --ignore-missing-imports --install-types --non-interactive .
|
||||
# - run: python3 tubearchivist/manage.py test || true
|
||||
# - run: shopt -s globstar && pyupgrade --py36-plus **/*.py || true
|
||||
# - run: safety check
|
||||
|
43
.github/workflows/unit_tests.yml
vendored
@ -1,43 +0,0 @@
|
||||
name: python_unit_tests
|
||||
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- '**/*.py'
|
||||
pull_request:
|
||||
paths:
|
||||
- '**/*.py'
|
||||
|
||||
jobs:
|
||||
unit-tests:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y gcc libldap2-dev libsasl2-dev libssl-dev
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Cache pip
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cache/pip
|
||||
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-pip-
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r tubearchivist/requirements-dev.txt
|
||||
|
||||
- name: Run unit tests
|
||||
run: pytest tubearchivist
|
4
.gitignore
vendored
@ -1,12 +1,8 @@
|
||||
# python testing cache
|
||||
__pycache__
|
||||
.venv
|
||||
|
||||
# django testing db
|
||||
db.sqlite3
|
||||
|
||||
# vscode custom conf
|
||||
.vscode
|
||||
|
||||
# JavaScript stuff
|
||||
node_modules
|
||||
|
@ -1,48 +0,0 @@
|
||||
# The Inner Workings of Tube Archivist
|
||||
This is a high level overview of the architecture of Tube Archivist, intended for interested contributors to find your way around quickly.
|
||||
|
||||
```
|
||||
Tube Archivist
|
||||
______________________|_____________________
|
||||
| | |
|
||||
------------------- --------------- -------------------
|
||||
| | | | | |
|
||||
| DjangoProject | | RedisJson | | ElasticSearch |
|
||||
| | | | | |
|
||||
------------------- --------------- -------------------
|
||||
```
|
||||
|
||||
## DjangoProject
|
||||
This is the main Python application. Django serves its data container internally with **Uwsgi** on port 8080, the interface is served with **Nginx** on the public port 8000.
|
||||
|
||||
Users created static files like media files and artwork as well as application artwork like logos and fonts are served directly from Nginx, while the rest of the application uses uwsgi_pass to proxy the requests to uwsgi.
|
||||
|
||||
Config files are located in the `docker_assets` folder. The script `run.sh` is the container `CMD` command and entry point, validating env vars, connection to ElasticSearch (ES) and will start the application.
|
||||
|
||||
Compared to other Django projects, this application doesn't make use of the database models, due to a lack of integration with ES. This project has its own abstractions and integrations, treating ES as a REST API.
|
||||
|
||||
Long running application tasks are handed off to **Celery** - using **Redis** as a broker - to run asynchronously from the main threads.
|
||||
- All tasks are defined in the `home.tasks.py` module.
|
||||
|
||||
There are three Django apps:
|
||||
- **config**: The root app, routing the main endpoints and the main `settings.py` file
|
||||
- **api**: The API app with its views and functionality
|
||||
- **home**: Most of the application logic, templates and views, will probably get split up further in the future.
|
||||
|
||||
The *home* app is split up into packages in the `src` directory:
|
||||
- **download**: All download related classes, interact with yt-dlp, download artwork, handle the download queue and post processing tasks.
|
||||
- **es**: All index setup and validation classes, handles mapping validations and makes mapping changes, wrapper functions to simplify interactions with Elasticsearch, backup and restore.
|
||||
- **frontend**: All direct interactions with the frontend, like Django forms, searching, watched state changes, and legacy api_calls in the process of moving to the api app.
|
||||
- **index**: Contains all functionality for scraping and indexing videos, channels, playlists, comments, subtitles, etc...
|
||||
- **ta**: Loose collection of functions and classes, handle application config and contains redis wrapper classes.
|
||||
|
||||
## RedisJson
|
||||
Holds the main application config json object that gets dynamically edited from the frontend, serves as a message broker for **Celery**. Redis serves as a temporary and thread safe link between Django and the frontend, storing progress messages and temporary queues for processing. Used to store locking keys for threads and execution details for tasks.
|
||||
|
||||
- Wrapper classes to interact with Redis are located in the `home.src.ta.ta_redis.py` module.
|
||||
|
||||
## ElasticSearch (ES)
|
||||
Is used to store and index all metadata, functions as an application database and makes it all searchable. The mapping defines which fields are indexed as searchable text fields and which fields are used for match filtering.
|
||||
|
||||
- The index setup and validation is handled in the `home.src.es.index_setup.py` module.
|
||||
- Wrapper classes for making requests to ES are located in the `home.src.es.connect.py` module.
|
207
CONTRIBUTING.md
@ -1,166 +1,27 @@
|
||||
# Contributing to Tube Archivist
|
||||
## Contributing to Tube Archivist
|
||||
|
||||
Welcome, and thanks for showing interest in improving Tube Archivist!
|
||||
If you haven't already, the best place to start is the README. This will give you an overview on what the project is all about.
|
||||
|
||||
## Table of Content
|
||||
- [Next Steps](#next-steps)
|
||||
- [Beta Testing](#beta-testing)
|
||||
- [How to open an issue](#how-to-open-an-issue)
|
||||
- [Bug Report](#bug-report)
|
||||
- [Feature Request](#feature-request)
|
||||
- [Installation Help](#installation-help)
|
||||
- [How to make a Pull Request](#how-to-make-a-pull-request)
|
||||
- [Contributions beyond the scope](#contributions-beyond-the-scope)
|
||||
- [User Scripts](#user-scripts)
|
||||
- [Improve to the Documentation](#improve-to-the-documentation)
|
||||
- [Development Environment](#development-environment)
|
||||
---
|
||||
## Report a bug
|
||||
|
||||
## Next Steps
|
||||
Going forward, this project will focus on developing a new modern frontend.
|
||||
If you notice something is not working as expected, check to see if it has been previously reported in the [open issues](https://github.com/tubearchivist/tubearchivist/issues).
|
||||
If it has not yet been disclosed, go ahead and create an issue.
|
||||
If the issue doesn't move forward due to a lack of response, I assume it's solved and will close it after some time to keep the list fresh.
|
||||
|
||||
- For the time being, don't open any new PRs that are not towards the new frontend.
|
||||
- New features requests likely won't get accepted during this process.
|
||||
- Depending on the severity, bug reports may or may not get fixed during this time.
|
||||
- When in doubt, reach out.
|
||||
## Wiki
|
||||
|
||||
Join us on [Discord](https://tubearchivist.com/discord) if you want to help with that process.
|
||||
|
||||
## Beta Testing
|
||||
Be the first to help test new features and improvements and provide feedback! There are regular `:unstable` builds for easy access. That's for the tinkerers and the breave. Ideally use a testing environment first, before a release be the first to install it on your main system.
|
||||
|
||||
There is always something that can get missed during development. Look at the commit messages tagged with `#build`, these are the unstable builds and give a quick overview what has changed.
|
||||
|
||||
- Test the features mentioned, play around, try to break it.
|
||||
- Test the update path by installing the `:latest` release first, the upgrade to `:unstable` to check for any errors.
|
||||
- Test the unstable build on a fresh install.
|
||||
|
||||
Then provide feedback, if there is a problem but also if there is no problem. Reach out on [Discord](https://tubearchivist.com/discord) in the `#beta-testing` channel with your findings.
|
||||
|
||||
This will help with a smooth update for the regular release. Plus you get to test things out early!
|
||||
|
||||
## How to open an issue
|
||||
Please read this carefully before opening any [issue](https://github.com/tubearchivist/tubearchivist/issues) on GitHub. Make sure you read [Next Steps](#next-steps) above.
|
||||
|
||||
**Do**:
|
||||
- Do provide details and context, this matters a lot and makes it easier for people to help.
|
||||
- Do familiarize yourself with the project first, some questions answer themselves when using the project for some time. Familiarize yourself with the [Readme](https://github.com/tubearchivist/tubearchivist) and the [documentation](https://docs.tubearchivist.com/), this covers a lot of the common questions, particularly the [FAQ](https://docs.tubearchivist.com/faq/).
|
||||
- Do respond to questions within a day or two so issues can progress. If the issue doesn't move forward due to a lack of response, we'll assume it's solved and we'll close it after some time to keep the list fresh.
|
||||
|
||||
**Don't**:
|
||||
- Don't open *duplicates*, that includes open and closed issues.
|
||||
- Don't open an issue for something that's already on the [roadmap](https://github.com/tubearchivist/tubearchivist#roadmap), this needs your help to implement it, not another issue.
|
||||
- Don't open an issue for something that's a [known limitation](https://github.com/tubearchivist/tubearchivist#known-limitations). These are *known* by definition and don't need another reminder. Some limitations may be solved in the future, maybe by you?
|
||||
- Don't overwrite the *issue template*, they are there for a reason. Overwriting that shows that you don't really care about this project. It shows that you have a misunderstanding how open source collaboration works and just want to push your ideas through. Overwriting the template may result in a ban.
|
||||
|
||||
### Bug Report
|
||||
Bug reports are highly welcome! This project has improved a lot due to your help by providing feedback when something doesn't work as expected. The developers can't possibly cover all edge cases in an ever changing environment like YouTube and yt-dlp.
|
||||
|
||||
Please keep in mind:
|
||||
- Docker logs are the easiest way to understand what's happening when something goes wrong, *always* provide the logs upfront.
|
||||
- Set the environment variable `DJANGO_DEBUG=True` to Tube Archivist and reproduce the bug for a better log output. Don't forget to remove that variable again after.
|
||||
- A bug that can't be reproduced, is difficult or sometimes even impossible to fix. Provide very clear steps *how to reproduce*.
|
||||
|
||||
### Feature Request
|
||||
This project doesn't take any new feature requests. This project doesn't lack ideas, see the currently open tasks and roadmap. New feature requests aren't helpful at this point in time. Thank you for your understanding.
|
||||
|
||||
### Installation Help
|
||||
GitHub is most likely not the best place to ask for installation help. That's inherently individual and one on one.
|
||||
1. First step is always, help yourself. Start at the [Readme](https://github.com/tubearchivist/tubearchivist) or the additional platform specific installation pages in the [docs](https://docs.tubearchivist.com/).
|
||||
2. If that doesn't answer your question, open a `#support` thread on [Discord](https://www.tubearchivist.com/discord).
|
||||
3. Only if that is not an option, open an issue here.
|
||||
|
||||
IMPORTANT: When receiving help, contribute back to the community by improving the installation instructions with your newly gained knowledge.
|
||||
|
||||
---
|
||||
|
||||
## How to make a Pull Request
|
||||
|
||||
Make sure you read [Next Steps](#next-steps) above.
|
||||
|
||||
Thank you for contributing and helping improve this project. Focus for the foreseeable future is on improving and building on existing functionality, *not* on adding and expanding the application.
|
||||
|
||||
This is a quick checklist to help streamline the process:
|
||||
|
||||
- For **code changes**, make your PR against the [testing branch](https://github.com/tubearchivist/tubearchivist/tree/testing). That's where all active development happens. This simplifies the later merging into *master*, minimizes any conflicts and usually allows for easy and convenient *fast-forward* merging.
|
||||
- For **documentation changes**, make your PR directly against the *master* branch.
|
||||
- Show off your progress, even if not yet complete, by creating a [draft](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/about-pull-requests#draft-pull-requests) PR first and switch it as *ready* when you are ready.
|
||||
- Make sure all your code is linted and formatted correctly, see below. The automatic GH action unfortunately needs to be triggered manually by a maintainer for first time contributors, but will trigger automatically for existing contributors.
|
||||
|
||||
### Making changes to the JavaScript
|
||||
|
||||
The JavaScript does not require any build step; you just edit the files directly. However, there is config for eslint and prettier (a linter and formatter respectively); their use is recommended but not required. To use them, install `node`, run `npm i` from the root directory of this repository to install dependencies, then run `npm run lint` and `npm run format` to run eslint and prettier respectively.
|
||||
|
||||
### Code formatting and linting
|
||||
|
||||
To keep things clean and consistent for everybody, there is a github action setup to lint and check the changes. You can test your code locally first if you want. For example if you made changes in the **video** module, run
|
||||
|
||||
```shell
|
||||
./deploy.sh validate tubearchivist/home/src/index/video.py
|
||||
```
|
||||
|
||||
to validate your changes. If you omit the path, all the project files will get checked. This is subject to change as the codebase improves.
|
||||
|
||||
---
|
||||
|
||||
## Contributions beyond the scope
|
||||
|
||||
As you have read the [FAQ](https://docs.tubearchivist.com/faq/) and the [known limitations](https://github.com/tubearchivist/tubearchivist#known-limitations) and have gotten an idea what this project tries to do, there will be some obvious shortcomings that stand out, that have been explicitly excluded from the scope of this project, at least for the time being.
|
||||
|
||||
Extending the scope of this project will only be feasible with more [regular contributors](https://github.com/tubearchivist/tubearchivist/graphs/contributors) that are willing to help improve this project in the long run. Contributors that have an overall improvement of the project in mind and not just about implementing this *one* thing.
|
||||
|
||||
Small minor additions, or making a PR for a documented feature request or bug, even if that was and will be your only contribution to this project, are always welcome and is *not* what this is about.
|
||||
|
||||
Beyond that, general rules to consider:
|
||||
|
||||
- Maintainability is key: It's not just about implementing something and being done with it, it's about maintaining it, fixing bugs as they occur, improving on it and supporting it in the long run.
|
||||
- Others can do it better: Some problems have been solved by very talented developers. These things don't need to be reinvented again here in this project.
|
||||
- Develop for the 80%: New features and additions *should* be beneficial for 80% of the users. If you are trying to solve your own problem that only applies to you, maybe that would be better to do in your own fork or if possible by a standalone implementation using the API.
|
||||
- If all of that sounds too strict for you, as stated above, start becoming a regular contributor to this project.
|
||||
|
||||
---
|
||||
|
||||
## User Scripts
|
||||
Some of you might have created useful scripts or API integrations around this project. Sharing is caring! Please add a link to your script to the Readme [here](https://github.com/tubearchivist/tubearchivist#user-scripts).
|
||||
- Your repo should have a `LICENSE` file with one of the common open source licenses. People are expected to fork, adapt and build upon your great work.
|
||||
- Your script should not modify the *official* files of Tube Archivist. E.g. your symlink script should build links *outside* of your `/youtube` folder. Or your fancy script that creates a beautiful artwork gallery should do that *outside* of the `/cache` folder. Modifying the *official* files and folders of TA are probably not supported.
|
||||
- On the top of the repo you should have a mention and a link back to the Tube Archivist repo. Clearly state to **not** to open any issues on the main TA repo regarding your script.
|
||||
- Example template:
|
||||
- `[<user>/<repo>](https://linktoyourrepo.com)`: A short one line description.
|
||||
|
||||
---
|
||||
|
||||
## Improve to the Documentation
|
||||
|
||||
The documentation available at [docs.tubearchivist.com](https://docs.tubearchivist.com/) and is build from a separate repo [tubearchivist/docs](https://github.com/tubearchivist/docs). The Readme has additional instructions on how to make changes.
|
||||
|
||||
---
|
||||
The wiki is where all user functions are documented in detail. These pages are mirrored into the **docs** folder of the repo. This allows for pull requests and all other features like regular code. Make any changes there, and I'll sync them with the wiki tab.
|
||||
|
||||
## Development Environment
|
||||
|
||||
I have learned the hard way, that working on a dockerized application outside of docker is very error prone and in general not a good idea. So if you want to test your changes, it's best to run them in a docker testing environment. You might be able to run the application directly, but this document assumes you're using docker.
|
||||
I have learned the hard way, that working on a dockerized application outside of docker is very error prone and in general not a good idea. So if you want to test your changes, it's best to run them in a docker testing environment.
|
||||
|
||||
### Instructions
|
||||
|
||||
Set up docker on your development machine.
|
||||
|
||||
Clone this repository.
|
||||
|
||||
Functional changes should be made against the unstable `testing` branch, so check that branch out, then make a new branch for your work.
|
||||
|
||||
Edit the `docker-compose.yml` file and replace the [`image: bbilly1/tubearchivist` line](https://github.com/tubearchivist/tubearchivist/blob/4af12aee15620e330adf3624c984c3acf6d0ac8b/docker-compose.yml#L7) with `build: .`. Also make any other changes to the environment variables and so on necessary to run the application, just like you're launching the application as normal.
|
||||
|
||||
Run `docker compose up --build`. This will bring up the application. Kill it with `ctrl-c` or by running `docker compose down` from a new terminal window in the same directory.
|
||||
|
||||
Make your changes locally and re-run `docker compose up --build`. The `Dockerfile` is structured in a way that the actual application code is in the last layer so rebuilding the image with only code changes utilizes the build cache for everything else and will just take a few seconds.
|
||||
|
||||
### Develop environment inside a VM
|
||||
|
||||
You may find it nice to run everything inside of a VM, though this is not necessary. There's a `deploy.sh` script which has some helpers for this use case. YMMV, this is what one of the developers does:
|
||||
|
||||
- Clone the repo, work on it with your favorite code editor in your local filesystem. *testing* branch is where all the changes are happening, might be unstable and is WIP.
|
||||
- Then I have a VM running standard Ubuntu Server LTS with docker installed. The VM keeps my projects separate and offers convenient snapshot functionality. The VM also offers ways to simulate low end environments by limiting CPU cores and memory. You can use this [Ansible Docker Ubuntu](https://github.com/bbilly1/ansible-playbooks) playbook to get started quickly. But you could also just run docker on your host system.
|
||||
- I have my local DNS resolve `tubearchivist.local` to the IP of the VM for convenience. To deploy the latest changes and rebuild the application to the testing VM run:
|
||||
This is my setup I have landed on, YMMV:
|
||||
- Clone the repo, work on it with your favorite code editor in your local filesystem. *testing* branch is the where all the changes are happening, might be unstable and is WIP.
|
||||
- Then I have a VM running standard Ubuntu Server LTS with docker installed. The VM keeps my projects separate and offers convenient snapshot functionality. The VM also offers ways to simulate lowend environments by limiting CPU cores and memory. You can use this [Ansible Docker Ubuntu](https://github.com/bbilly1/ansible-playbooks) playbook to get started quickly. But you could also just run docker on your host system.
|
||||
- The `Dockerfile` is structured in a way that the actual application code is in the last layer so rebuilding the image with only code changes utilizes the build cache for everything else and will just take a few seconds.
|
||||
- Take a look at the `deploy.sh` file. I have my local DNS resolve `tubearchivist.local` to the IP of the VM for convenience. To deploy the latest changes and rebuild the application to the testing VM run:
|
||||
```bash
|
||||
./deploy.sh test
|
||||
```
|
||||
@ -168,7 +29,7 @@ You may find it nice to run everything inside of a VM, though this is not necess
|
||||
- The `test` argument takes another optional argument to build for a specific architecture valid options are: `amd64`, `arm64` and `multi`, default is `amd64`.
|
||||
- This `deploy.sh` script is not meant to be universally usable for every possible environment but could serve as an idea on how to automatically rebuild containers to test changes - customize to your liking.
|
||||
|
||||
### Working with Elasticsearch
|
||||
## Working with Elasticsearch
|
||||
Additionally to the required services as listed in the example docker-compose file, the **Dev Tools** of [Kibana](https://www.elastic.co/guide/en/kibana/current/docker.html) are invaluable for running and testing Elasticsearch queries.
|
||||
|
||||
**Quick start**
|
||||
@ -179,15 +40,41 @@ bin/elasticsearch-service-tokens create elastic/kibana kibana
|
||||
|
||||
Example docker compose, use same version as for Elasticsearch:
|
||||
```yml
|
||||
services:
|
||||
kibana:
|
||||
image: docker.elastic.co/kibana/kibana:0.0.0
|
||||
container_name: kibana
|
||||
environment:
|
||||
kibana:
|
||||
image: docker.elastic.co/kibana/kibana:0.0.0
|
||||
container_name: kibana
|
||||
environment:
|
||||
- "ELASTICSEARCH_HOSTS=http://archivist-es:9200"
|
||||
- "ELASTICSEARCH_SERVICEACCOUNTTOKEN=<your-token-here>"
|
||||
ports:
|
||||
ports:
|
||||
- "5601:5601"
|
||||
```
|
||||
|
||||
If you want to run queries on the Elasticsearch container directly from your host with for example `curl` or something like *postman*, you might want to **publish** the port 9200 instead of just **exposing** it.
|
||||
|
||||
## Implementing a new feature
|
||||
|
||||
Do you see anything on the roadmap that you would like to take a closer look at but you are not sure, what's the best way to tackle that? Or anything not on there yet you'd like to implement but are not sure how? Reach out on Discord and we'll look into it together.
|
||||
|
||||
## Making changes
|
||||
|
||||
To fix a bug or implement a feature, fork the repository and make all changes to the testing branch. When ready, create a pull request.
|
||||
|
||||
## Releases
|
||||
|
||||
There are three different docker tags:
|
||||
- **latest**: As the name implies is the latest multiarch release for regular usage.
|
||||
- **unstable**: Intermediate amd64 builds for quick testing and improved collaboration. Don't mix with a *latest* installation, for your testing environment only. This is untested and WIP and will have breaking changes between commits that might require a reset to resolve.
|
||||
- **semantic versioning**: There will be a handful named version tags that will also have a matching release and tag on github.
|
||||
|
||||
If you want to see what's in your container, checkout the matching release tag. A merge to **master** usually means a *latest* or *unstable* release. If you want to preview changes in your testing environment, pull the *unstable* tag or clone the repository and build the docker container with the Dockerfile from the **testing** branch.
|
||||
|
||||
## Code formatting and linting
|
||||
|
||||
To keep things clean and consistent for everybody, there is a github action setup to lint and check the changes. You can test your code locally first if you want. For example if you made changes in the **video** module, run
|
||||
|
||||
```shell
|
||||
./deploy.sh validate tubearchivist/home/src/index/video.py
|
||||
```
|
||||
|
||||
to validate your changes. If you omit the path, all the project files will get checked. This is subject to change as the codebase improves.
|
||||
|
43
Dockerfile
@ -1,28 +1,22 @@
|
||||
# multi stage to build tube archivist
|
||||
# build python wheel, download and extract ffmpeg, copy into final image
|
||||
# first stage to build python wheel, copy into final image
|
||||
|
||||
|
||||
# First stage to build python wheel
|
||||
FROM python:3.11.8-slim-bookworm AS builder
|
||||
FROM python:3.10.8-slim-bullseye AS builder
|
||||
ARG TARGETPLATFORM
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential gcc libldap2-dev libsasl2-dev libssl-dev git
|
||||
RUN apt-get update
|
||||
RUN apt-get install -y --no-install-recommends build-essential gcc libldap2-dev libsasl2-dev libssl-dev
|
||||
|
||||
# install requirements
|
||||
COPY ./tubearchivist/requirements.txt /requirements.txt
|
||||
RUN pip install --user -r requirements.txt
|
||||
|
||||
# build ffmpeg
|
||||
FROM python:3.11.8-slim-bookworm as ffmpeg-builder
|
||||
# build final image
|
||||
FROM python:3.10.8-slim-bullseye as tubearchivist
|
||||
|
||||
ARG TARGETPLATFORM
|
||||
|
||||
COPY docker_assets/ffmpeg_download.py ffmpeg_download.py
|
||||
RUN python ffmpeg_download.py $TARGETPLATFORM
|
||||
|
||||
# build final image
|
||||
FROM python:3.11.8-slim-bookworm as tubearchivist
|
||||
|
||||
ARG INSTALL_DEBUG
|
||||
|
||||
ENV PYTHONUNBUFFERED 1
|
||||
@ -31,21 +25,32 @@ ENV PYTHONUNBUFFERED 1
|
||||
COPY --from=builder /root/.local /root/.local
|
||||
ENV PATH=/root/.local/bin:$PATH
|
||||
|
||||
# copy ffmpeg
|
||||
COPY --from=ffmpeg-builder ./ffmpeg/ffmpeg /usr/bin/ffmpeg
|
||||
COPY --from=ffmpeg-builder ./ffprobe/ffprobe /usr/bin/ffprobe
|
||||
|
||||
# install distro packages needed
|
||||
RUN apt-get clean && apt-get -y update && apt-get -y install --no-install-recommends \
|
||||
nginx \
|
||||
atomicparsley \
|
||||
curl && rm -rf /var/lib/apt/lists/*
|
||||
curl \
|
||||
xz-utils && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# get newest patched ffmpeg and ffprobe builds for amd64 fall back to repo ffmpeg for arm64
|
||||
RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \
|
||||
curl -s https://api.github.com/repos/yt-dlp/FFmpeg-Builds/releases/latest \
|
||||
| grep browser_download_url \
|
||||
| grep ".*master.*linux64.*tar.xz" \
|
||||
| cut -d '"' -f 4 \
|
||||
| xargs curl -L --output ffmpeg.tar.xz && \
|
||||
tar -xf ffmpeg.tar.xz --strip-components=2 --no-anchored -C /usr/bin/ "ffmpeg" && \
|
||||
tar -xf ffmpeg.tar.xz --strip-components=2 --no-anchored -C /usr/bin/ "ffprobe" && \
|
||||
rm ffmpeg.tar.xz \
|
||||
; elif [ "$TARGETPLATFORM" = "linux/arm64" ] ; then \
|
||||
apt-get -y update && apt-get -y install --no-install-recommends ffmpeg && rm -rf /var/lib/apt/lists/* \
|
||||
; fi
|
||||
|
||||
# install debug tools for testing environment
|
||||
RUN if [ "$INSTALL_DEBUG" ] ; then \
|
||||
apt-get -y update && apt-get -y install --no-install-recommends \
|
||||
vim htop bmon net-tools iputils-ping procps \
|
||||
&& pip install --user ipython pytest pytest-django \
|
||||
&& pip install --user ipython \
|
||||
; fi
|
||||
|
||||
# make folders
|
||||
|
272
README.md
@ -1,173 +1,195 @@
|
||||
![Tube Archivist](assets/tube-archivist-front.jpg?raw=true "Tube Archivist Banner")
|
||||
[*more screenshots and video*](SHOWCASE.MD)
|
||||
![Tube Archivist](assets/tube-archivist-banner.jpg?raw=true "Tube Archivist Banner")
|
||||
|
||||
<h1 align="center">Your self hosted YouTube media server</h1>
|
||||
<div align="center">
|
||||
<a href="https://github.com/bbilly1/tilefy" target="_blank"><img src="https://tiles.tilefy.me/t/tubearchivist-docker.png" alt="tubearchivist-docker" title="Tube Archivist Docker Pulls" height="50" width="190"/></a>
|
||||
<a href="https://github.com/bbilly1/tilefy" target="_blank"><img src="https://tiles.tilefy.me/t/tubearchivist-github-star.png" alt="tubearchivist-github-star" title="Tube Archivist GitHub Stars" height="50" width="190"/></a>
|
||||
<a href="https://github.com/bbilly1/tilefy" target="_blank"><img src="https://tiles.tilefy.me/t/tubearchivist-github-forks.png" alt="tubearchivist-github-forks" title="Tube Archivist GitHub Forks" height="50" width="190"/></a>
|
||||
<a href="https://www.tubearchivist.com/discord" target="_blank"><img src="https://tiles.tilefy.me/t/tubearchivist-discord.png" alt="tubearchivist-discord" title="TA Discord Server Members" height="50" width="190"/></a>
|
||||
<a href="https://github.com/bbilly1/tilefy" target="_blank"><img src="https://tiles.tilefy.me/t/tubearchivist-docker.png" alt="tubearchivist-docker" title="Tube Archivist Docker Pulls" height="50" width="200"/></a>
|
||||
<a href="https://github.com/bbilly1/tilefy" target="_blank"><img src="https://tiles.tilefy.me/t/tubearchivist-github-star.png" alt="tubearchivist-github-star" title="Tube Archivist GitHub Stars" height="50" width="200"/></a>
|
||||
<a href="https://github.com/bbilly1/tilefy" target="_blank"><img src="https://tiles.tilefy.me/t/tubearchivist-github-forks.png" alt="tubearchivist-github-forks" title="Tube Archivist GitHub Forks" height="50" width="200"/></a>
|
||||
</div>
|
||||
|
||||
## Table of contents:
|
||||
* [Docs](https://docs.tubearchivist.com/) with [FAQ](https://docs.tubearchivist.com/faq/), and API documentation
|
||||
* [Wiki](https://github.com/tubearchivist/tubearchivist/wiki) with [FAQ](https://github.com/tubearchivist/tubearchivist/wiki/FAQ)
|
||||
* [Core functionality](#core-functionality)
|
||||
* [Resources](#resources)
|
||||
* [Installing](#installing)
|
||||
* [Screenshots](#screenshots)
|
||||
* [Problem Tube Archivist tries to solve](#problem-tube-archivist-tries-to-solve)
|
||||
* [Connect](#connect)
|
||||
* [Extended Universe](#extended-universe)
|
||||
* [Installing and updating](#installing-and-updating)
|
||||
* [Getting Started](#getting-started)
|
||||
* [Known limitations](#known-limitations)
|
||||
* [Port Collisions](#port-collisions)
|
||||
* [Common Errors](#common-errors)
|
||||
* [Potential pitfalls](#potential-pitfalls)
|
||||
* [Roadmap](#roadmap)
|
||||
* [Known limitations](#known-limitations)
|
||||
* [Donate](#donate)
|
||||
|
||||
------------------------
|
||||
|
||||
## Core functionality
|
||||
Once your YouTube video collection grows, it becomes hard to search and find a specific video. That's where Tube Archivist comes in: By indexing your video collection with metadata from YouTube, you can organize, search and enjoy your archived YouTube videos without hassle offline through a convenient web interface. This includes:
|
||||
* Subscribe to your favorite YouTube channels
|
||||
* Download Videos using **yt-dlp**
|
||||
* Index and make videos searchable
|
||||
* Play videos
|
||||
* Keep track of viewed and unviewed videos
|
||||
|
||||
## Resources
|
||||
- [Discord](https://www.tubearchivist.com/discord): Connect with us on our Discord server.
|
||||
## Tube Archivist on YouTube
|
||||
[![ibracorp-youtube-video-thumb](assets/tube-archivist-ibracorp-O8H8Z01c0Ys.jpg)](https://www.youtube.com/watch?v=O8H8Z01c0Ys)
|
||||
|
||||
## Screenshots
|
||||
![home screenshot](assets/tube-archivist-screenshot-home.png?raw=true "Tube Archivist Home")
|
||||
*Home Page*
|
||||
|
||||
![channels screenshot](assets/tube-archivist-screenshot-channels.png?raw=true "Tube Archivist Channels")
|
||||
*All Channels*
|
||||
|
||||
![single channel screenshot](assets/tube-archivist-screenshot-single-channel.png?raw=true "Tube Archivist Single Channel")
|
||||
*Single Channel*
|
||||
|
||||
![video page screenshot](assets/tube-archivist-screenshot-video.png?raw=true "Tube Archivist Video Page")
|
||||
*Video Page*
|
||||
|
||||
![video page screenshot](assets/tube-archivist-screenshot-download.png?raw=true "Tube Archivist Video Page")
|
||||
*Downloads Page*
|
||||
|
||||
## Problem Tube Archivist tries to solve
|
||||
Once your YouTube video collection grows, it becomes hard to search and find a specific video. That's where Tube Archivist comes in: By indexing your video collection with metadata from YouTube, you can organize, search and enjoy your archived YouTube videos without hassle offline through a convenient web interface.
|
||||
|
||||
## Connect
|
||||
- [Discord](https://discord.gg/AFwz8nE7BK): Connect with us on our Discord server.
|
||||
- [r/TubeArchivist](https://www.reddit.com/r/TubeArchivist/): Join our Subreddit.
|
||||
|
||||
## Extended Universe
|
||||
- [Browser Extension](https://github.com/tubearchivist/browser-extension) Tube Archivist Companion, for [Firefox](https://addons.mozilla.org/addon/tubearchivist-companion/) and [Chrome](https://chrome.google.com/webstore/detail/tubearchivist-companion/jjnkmicfnfojkkgobdfeieblocadmcie)
|
||||
- [Jellyfin Plugin](https://github.com/tubearchivist/tubearchivist-jf-plugin): Add your videos to Jellyfin
|
||||
- [Plex Plugin](https://github.com/tubearchivist/tubearchivist-plex): Add your videos to Plex
|
||||
- [Tube Archivist Metrics](https://github.com/tubearchivist/tubearchivist-metrics) to create statistics in Prometheus/OpenMetrics format.
|
||||
|
||||
## Installing
|
||||
For minimal system requirements, the Tube Archivist stack needs around 2GB of available memory for a small testing setup and around 4GB of available memory for a mid to large sized installation. Minimal with dual core with 4 threads, better quad core plus.
|
||||
This project requires docker. Ensure it is installed and running on your system.
|
||||
## Installing and updating
|
||||
Take a look at the example `docker-compose.yml` file provided. Use the *latest* or the named semantic version tag. The *unstable* tag is for intermediate testing and as the name implies, is **unstable** and not be used on your main installation but in a [testing environment](CONTRIBUTING.md).
|
||||
|
||||
The documentation has additional user provided instructions for [Unraid](https://docs.tubearchivist.com/installation/unraid/), [Synology](https://docs.tubearchivist.com/installation/synology/) and [Podman](https://docs.tubearchivist.com/installation/podman/).
|
||||
For minimal system requirements, the Tube Archivist stack needs around 2GB of available memory for a small testing setup and around 4GB of available memory for a mid to large sized installation.
|
||||
|
||||
The instructions here should get you up and running quickly, for Docker beginners and full explanation about each environment variable, see the [docs](https://docs.tubearchivist.com/installation/docker-compose/).
|
||||
Tube Archivist depends on three main components split up into separate docker containers:
|
||||
|
||||
Take a look at the example [docker-compose.yml](https://github.com/tubearchivist/tubearchivist/blob/master/docker-compose.yml) and configure the required environment variables.
|
||||
### Tube Archivist
|
||||
The main Python application that displays and serves your video collection, built with Django.
|
||||
- Serves the interface on port `8000`
|
||||
- Needs a volume for the video archive at **/youtube**
|
||||
- And another volume to save application data at **/cache**.
|
||||
- The environment variables `ES_URL` and `REDIS_HOST` are needed to tell Tube Archivist where Elasticsearch and Redis respectively are located.
|
||||
- The environment variables `HOST_UID` and `HOST_GID` allows Tube Archivist to `chown` the video files to the main host system user instead of the container user. Those two variables are optional, not setting them will disable that functionality. That might be needed if the underlying filesystem doesn't support `chown` like *NFS*.
|
||||
- Set the environment variable `TA_HOST` to match with the system running Tube Archivist. This can be a domain like *example.com*, a subdomain like *ta.example.com* or an IP address like *192.168.1.20*, add without the protocol and without the port. You can add multiple hostnames separated with a space. Any wrong configurations here will result in a `Bad Request (400)` response.
|
||||
- Change the environment variables `TA_USERNAME` and `TA_PASSWORD` to create the initial credentials.
|
||||
- `ELASTIC_PASSWORD` is for the password for Elasticsearch. The environment variable `ELASTIC_USER` is optional, should you want to change the username from the default *elastic*.
|
||||
- For the scheduler to know what time it is, set your timezone with the `TZ` environment variable, defaults to *UTC*.
|
||||
|
||||
**TubeArchivist**:
|
||||
| Environment Var | Value | |
|
||||
| ----------- | ----------- | ----------- |
|
||||
| TA_HOST | Server IP or hostname | Required |
|
||||
| TA_USERNAME | Initial username when logging into TA | Required |
|
||||
| TA_PASSWORD | Initial password when logging into TA | Required |
|
||||
| ELASTIC_PASSWORD | Password for ElasticSearch | Required |
|
||||
| REDIS_HOST | Hostname for Redis | Required |
|
||||
| TZ | Set your timezone for the scheduler | Required |
|
||||
| TA_PORT | Overwrite Nginx port | Optional |
|
||||
| TA_UWSGI_PORT | Overwrite container internal uwsgi port | Optional |
|
||||
| TA_ENABLE_AUTH_PROXY | Enables support for forwarding auth in reverse proxies | [Read more](https://docs.tubearchivist.com/configuration/forward-auth/) |
|
||||
| TA_AUTH_PROXY_USERNAME_HEADER | Header containing username to log in | Optional |
|
||||
| TA_AUTH_PROXY_LOGOUT_URL | Logout URL for forwarded auth | Optional |
|
||||
| ES_URL | URL That ElasticSearch runs on | Optional |
|
||||
| ES_DISABLE_VERIFY_SSL | Disable ElasticSearch SSL certificate verification | Optional |
|
||||
| ES_SNAPSHOT_DIR | Custom path where elastic search stores snapshots for master/data nodes | Optional |
|
||||
| HOST_GID | Allow TA to own the video files instead of container user | Optional |
|
||||
| HOST_UID | Allow TA to own the video files instead of container user | Optional |
|
||||
| ELASTIC_USER | Change the default ElasticSearch user | Optional |
|
||||
| REDIS_PORT | Port that Redis runs on | Optional |
|
||||
| TA_LDAP | Configure TA to use LDAP Authentication | [Read more](https://docs.tubearchivist.com/configuration/ldap/) |
|
||||
| ENABLE_CAST | Enable casting support | [Read more](https://docs.tubearchivist.com/configuration/cast/) |
|
||||
| DJANGO_DEBUG | Return additional error messages, for debug only | |
|
||||
|
||||
**ElasticSearch**
|
||||
| Environment Var | Value | State |
|
||||
| ----------- | ----------- | ----------- |
|
||||
| ELASTIC_PASSWORD | Matching password `ELASTIC_PASSWORD` from TubeArchivist | Required |
|
||||
| http.port | Change the port ElasticSearch runs on | Optional |
|
||||
|
||||
|
||||
## Update
|
||||
Always use the *latest* (the default) or a named semantic version tag for the docker images. The *unstable* tags are only for your testing environment, there might not be an update path for these testing builds.
|
||||
|
||||
You will see the current version number of **Tube Archivist** in the footer of the interface. There is a daily version check task querying tubearchivist.com, notifying you of any new releases in the footer. To update, you need to update the docker images, the method for which will depend on your platform. For example, if you're using `docker-compose`, run `docker-compose pull` and then restart with `docker-compose up -d`. After updating, check the footer to verify you are running the expected version.
|
||||
|
||||
- This project is tested for updates between one or two releases maximum. Further updates back may or may not be supported and you might have to reset your index and configurations to update. Ideally apply new updates at least once per month.
|
||||
- There can be breaking changes between updates, particularly as the application grows, new environment variables or settings might be required for you to set in the your docker-compose file. *Always* check the **release notes**: Any breaking changes will be marked there.
|
||||
- All testing and development is done with the Elasticsearch version number as mentioned in the provided *docker-compose.yml* file. This will be updated when a new release of Elasticsearch is available. Running an older version of Elasticsearch is most likely not going to result in any issues, but it's still recommended to run the same version as mentioned. Use `bbilly1/tubearchivist-es` to automatically get the recommended version.
|
||||
|
||||
## Getting Started
|
||||
1. Go through the **settings** page and look at the available options. Particularly set *Download Format* to your desired video quality before downloading. **Tube Archivist** downloads the best available quality by default. To support iOS or MacOS and some other browsers a compatible format must be specified. For example:
|
||||
```
|
||||
bestvideo[vcodec*=avc1]+bestaudio[acodec*=mp4a]/mp4
|
||||
```
|
||||
2. Subscribe to some of your favorite YouTube channels on the **channels** page.
|
||||
3. On the **downloads** page, click on *Rescan subscriptions* to add videos from the subscribed channels to your Download queue or click on *Add to download queue* to manually add Video IDs, links, channels or playlists.
|
||||
4. Click on *Start download* and let **Tube Archivist** to it's thing.
|
||||
5. Enjoy your archived collection!
|
||||
|
||||
|
||||
### Port Collisions
|
||||
### Port collisions
|
||||
If you have a collision on port `8000`, best solution is to use dockers *HOST_PORT* and *CONTAINER_PORT* distinction: To for example change the interface to port 9000 use `9000:8000` in your docker-compose file.
|
||||
|
||||
For more information on port collisions, check the docs.
|
||||
Should that not be an option, the Tube Archivist container takes these two additional environment variables:
|
||||
- **TA_PORT**: To actually change the port where nginx listens, make sure to also change the ports value in your docker-compose file.
|
||||
- **TA_UWSGI_PORT**: To change the default uwsgi port 8080 used for container internal networking between uwsgi serving the django application and nginx.
|
||||
|
||||
## Common Errors
|
||||
Here is a list of common errors and their solutions.
|
||||
Changing any of these two environment variables will change the files *nginx.conf* and *uwsgi.ini* at startup using `sed` in your container.
|
||||
|
||||
### `vm.max_map_count`
|
||||
### LDAP Authentication
|
||||
You can configure LDAP with the following environment variables:
|
||||
|
||||
- `TA_LDAP` (ex: `true`) Set to anything besides empty string to use LDAP authentication **instead** of local user authentication.
|
||||
- `TA_LDAP_SERVER_URI` (ex: `ldap://ldap-server:389`) Set to the uri of your LDAP server.
|
||||
- `TA_LDAP_DISABLE_CERT_CHECK` (ex: `true`) Set to anything besides empty string to disable certificate checking when connecting over LDAPS.
|
||||
- `TA_LDAP_BIND_DN` (ex: `uid=search-user,ou=users,dc=your-server`) DN of the user that is able to perform searches on your LDAP account.
|
||||
- `TA_LDAP_BIND_PASSWORD` (ex: `yoursecretpassword`) Password for the search user.
|
||||
- `TA_LDAP_USER_BASE` (ex: `ou=users,dc=your-server`) Search base for user filter.
|
||||
- `TA_LDAP_USER_FILTER` (ex: `(objectClass=user)`) Filter for valid users. Login usernames are automatically matched using `uid` and does not need to be specified in this filter.
|
||||
|
||||
When LDAP authentication is enabled, django passwords (e.g. the password defined in TA_PASSWORD), will not allow you to login, only the LDAP server is used.
|
||||
|
||||
### Elasticsearch
|
||||
**Note**: Tube Archivist depends on Elasticsearch 8.
|
||||
|
||||
Use `bbilly1/tubearchivist-es` to automatically get the recommended version, or use the official image with the version tag in the docker-compose file.
|
||||
|
||||
Stores video meta data and makes everything searchable. Also keeps track of the download queue.
|
||||
- Needs to be accessible over the default port `9200`
|
||||
- Needs a volume at **/usr/share/elasticsearch/data** to store data
|
||||
|
||||
Follow the [documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/docker.html) for additional installation details.
|
||||
|
||||
### Redis JSON
|
||||
Functions as a cache and temporary link between the application and the file system. Used to store and display messages and configuration variables.
|
||||
- Needs to be accessible over the default port `6379`
|
||||
- Needs a volume at **/data** to make your configuration changes permanent.
|
||||
|
||||
### Redis on a custom port
|
||||
For some architectures it might be required to run Redis JSON on a nonstandard port. To for example change the Redis port to **6380**, set the following values:
|
||||
- Set the environment variable `REDIS_PORT=6380` to the *tubearchivist* service.
|
||||
- For the *archivist-redis* service, change the ports to `6380:6380`
|
||||
- Additionally set the following value to the *archivist-redis* service: `command: --port 6380 --loadmodule /usr/lib/redis/modules/rejson.so`
|
||||
|
||||
### Updating Tube Archivist
|
||||
You will see the current version number of **Tube Archivist** in the footer of the interface so you can compare it with the latest release to make sure you are running the *latest and greatest*.
|
||||
* There can be breaking changes between updates, particularly as the application grows, new environment variables or settings might be required for you to set in the your docker-compose file. *Always* check the **release notes**: Any breaking changes will be marked there.
|
||||
* All testing and development is done with the Elasticsearch version number as mentioned in the provided *docker-compose.yml* file. This will be updated when a new release of Elasticsearch is available. Running an older version of Elasticsearch is most likely not going to result in any issues, but it's still recommended to run the same version as mentioned. Use `bbilly1/tubearchivist-es` to automatically get the recommended version.
|
||||
|
||||
### Alternative installation instructions:
|
||||
- **arm64**: The Tube Archivist container is multi arch, so is Elasticsearch. RedisJSON doesn't offer arm builds, you can use `bbilly1/rejson`, an unofficial rebuild for arm64.
|
||||
- **Helm Chart**: There is a Helm Chart available at https://github.com/insuusvenerati/helm-charts. Mostly self-explanatory but feel free to ask questions in the discord / subreddit.
|
||||
- **Wiki**: There are additional helpful installation instructions in the [wiki](https://github.com/tubearchivist/tubearchivist/wiki/Installation) for Unraid, Truenas and Synology.
|
||||
|
||||
|
||||
## Potential pitfalls
|
||||
### vm.max_map_count
|
||||
**Elastic Search** in Docker requires the kernel setting of the host machine `vm.max_map_count` to be set to at least 262144.
|
||||
|
||||
To temporary set the value run:
|
||||
```
|
||||
sudo sysctl -w vm.max_map_count=262144
|
||||
```
|
||||
|
||||
To apply the change permanently depends on your host operating system:
|
||||
|
||||
- For example on Ubuntu Server add `vm.max_map_count = 262144` to the file `/etc/sysctl.conf`.
|
||||
- On Arch based systems create a file `/etc/sysctl.d/max_map_count.conf` with the content `vm.max_map_count = 262144`.
|
||||
- On any other platform look up in the documentation on how to pass kernel parameters.
|
||||
|
||||
- For example on Ubuntu Server add `vm.max_map_count = 262144` to the file */etc/sysctl.conf*.
|
||||
- On Arch based systems create a file */etc/sysctl.d/max_map_count.conf* with the content `vm.max_map_count = 262144`.
|
||||
- On any other platform look up in the documentation on how to pass kernel parameters.
|
||||
|
||||
### Permissions for elasticsearch
|
||||
If you see a message similar to `Unable to access 'path.repo' (/usr/share/elasticsearch/data/snapshot)` or `failed to obtain node locks, tried [/usr/share/elasticsearch/data]` and `maybe these locations are not writable` when initially starting elasticsearch, that probably means the container is not allowed to write files to the volume.
|
||||
If you see a message similar to `failed to obtain node locks, tried [/usr/share/elasticsearch/data]` and `maybe these locations are not writable` when initially starting elasticsearch, that probably means the container is not allowed to write files to the volume.
|
||||
To fix that issue, shutdown the container and on your host machine run:
|
||||
```
|
||||
chown 1000:0 -R /path/to/mount/point
|
||||
```
|
||||
This will match the permissions with the **UID** and **GID** of elasticsearch process within the container and should fix the issue.
|
||||
|
||||
|
||||
### Disk usage
|
||||
The Elasticsearch index will turn to ***read only*** if the disk usage of the container goes above 95% until the usage drops below 90% again, you will see error messages like `disk usage exceeded flood-stage watermark`.
|
||||
The Elasticsearch index will turn to *read only* if the disk usage of the container goes above 95% until the usage drops below 90% again, you will see error messages like `disk usage exceeded flood-stage watermark`, [link](https://github.com/tubearchivist/tubearchivist#disk-usage).
|
||||
|
||||
Similar to that, TubeArchivist will become all sorts of messed up when running out of disk space. There are some error messages in the logs when that happens, but it's best to make sure to have enough disk space before starting to download.
|
||||
|
||||
## `error setting rlimit`
|
||||
If you are seeing errors like `failed to create shim: OCI runtime create failed` and `error during container init: error setting rlimits`, this means docker can't set these limits, usually because they are set at another place or are incompatible because of other reasons. Solution is to remove the `ulimits` key from the ES container in your docker compose and start again.
|
||||
|
||||
This can happen if you have nested virtualizations, e.g. LXC running Docker in Proxmox.
|
||||
|
||||
## Known limitations
|
||||
- Video files created by Tube Archivist need to be playable in your browser of choice. Not every codec is compatible with every browser and might require some testing with format selection.
|
||||
- Every limitation of **yt-dlp** will also be present in Tube Archivist. If **yt-dlp** can't download or extract a video for any reason, Tube Archivist won't be able to either.
|
||||
- There is no flexibility in naming of the media files.
|
||||
## Getting Started
|
||||
1. Go through the **settings** page and look at the available options. Particularly set *Download Format* to your desired video quality before downloading. **Tube Archivist** downloads the best available quality by default. To support iOS or MacOS and some other browsers a compatible format must be specified. For example:
|
||||
```
|
||||
bestvideo[VCODEC=avc1]+bestaudio[ACODEC=mp4a]/mp4
|
||||
```
|
||||
2. Subscribe to some of your favorite YouTube channels on the **channels** page.
|
||||
3. On the **downloads** page, click on *Rescan subscriptions* to add videos from the subscribed channels to your Download queue or click on *Add to download queue* to manually add Video IDs, links, channels or playlists.
|
||||
4. Click on *Start download* and let **Tube Archivist** to it's thing.
|
||||
5. Enjoy your archived collection!
|
||||
|
||||
## Roadmap
|
||||
We have come far, nonetheless we are not short of ideas on how to improve and extend this project. Issues waiting for you to be tackled in no particular order:
|
||||
|
||||
- [ ] Audio download
|
||||
- [ ] User roles
|
||||
- [ ] Podcast mode to serve channel as mp3
|
||||
- [ ] Random and repeat controls ([#108](https://github.com/tubearchivist/tubearchivist/issues/108), [#220](https://github.com/tubearchivist/tubearchivist/issues/220))
|
||||
- [ ] Implement [PyFilesystem](https://github.com/PyFilesystem/pyfilesystem2) for flexible video storage
|
||||
- [ ] Implement [Apprise](https://github.com/caronc/apprise) for notifications ([#97](https://github.com/tubearchivist/tubearchivist/issues/97))
|
||||
- [ ] User created playlists, random and repeat controls ([#108](https://github.com/tubearchivist/tubearchivist/issues/108), [#220](https://github.com/tubearchivist/tubearchivist/issues/220))
|
||||
- [ ] Auto play or play next link ([#226](https://github.com/tubearchivist/tubearchivist/issues/226))
|
||||
- [ ] Show similar videos on video page
|
||||
- [ ] Multi language support
|
||||
- [ ] Show total video downloaded vs total videos available in channel
|
||||
- [ ] Download or Ignore videos by keyword ([#163](https://github.com/tubearchivist/tubearchivist/issues/163))
|
||||
- [ ] Add statistics of index
|
||||
- [ ] Download speed schedule ([#198](https://github.com/tubearchivist/tubearchivist/issues/198))
|
||||
- [ ] Auto ignore videos by keyword ([#163](https://github.com/tubearchivist/tubearchivist/issues/163))
|
||||
- [ ] Custom searchable notes to videos, channels, playlists ([#144](https://github.com/tubearchivist/tubearchivist/issues/144))
|
||||
- [ ] Search comments
|
||||
- [ ] Search download queue
|
||||
- [ ] Per user videos/channel/playlists
|
||||
- [ ] Download video comments
|
||||
|
||||
Implemented:
|
||||
- [X] Configure shorts, streams and video sizes per channel [2024-07-15]
|
||||
- [X] User created playlists [2024-04-10]
|
||||
- [X] User roles, aka read only user [2023-11-10]
|
||||
- [X] Add statistics of index [2023-09-03]
|
||||
- [X] Implement [Apprise](https://github.com/caronc/apprise) for notifications [2023-08-05]
|
||||
- [X] Download video comments [2022-11-30]
|
||||
- [X] Show similar videos on video page [2022-11-30]
|
||||
- [X] Implement complete offline media file import from json file [2022-08-20]
|
||||
- [X] Filter and query in search form, search by url query [2022-07-23]
|
||||
- [X] Make items in grid row configurable to use more of the screen [2022-06-04]
|
||||
@ -189,18 +211,11 @@ Implemented:
|
||||
- [X] Backup and restore [2021-09-22]
|
||||
- [X] Scan your file system to index already downloaded videos [2021-09-14]
|
||||
|
||||
## User Scripts
|
||||
This is a list of useful user scripts, generously created from folks like you to extend this project and its functionality. Make sure to check the respective repository links for detailed license information.
|
||||
## Known limitations
|
||||
- Video files created by Tube Archivist need to be playable in your browser of choice. Not every codec is compatible with every browser and might require some testing with format selection.
|
||||
- Every limitation of **yt-dlp** will also be present in Tube Archivist. If **yt-dlp** can't download or extract a video for any reason, Tube Archivist won't be able to either.
|
||||
- There is currently no flexibility in naming of the media files.
|
||||
|
||||
This is your time to shine, [read this](https://github.com/tubearchivist/tubearchivist/blob/master/CONTRIBUTING.md#user-scripts) then open a PR to add your script here.
|
||||
|
||||
- [danieljue/ta_dl_page_script](https://github.com/danieljue/ta_dl_page_script): Helper browser script to prioritize a channels' videos in download queue.
|
||||
- [dot-mike/ta-scripts](https://github.com/dot-mike/ta-scripts): A collection of personal scripts for managing TubeArchivist.
|
||||
- [DarkFighterLuke/ta_base_url_nginx](https://gist.github.com/DarkFighterLuke/4561b6bfbf83720493dc59171c58ac36): Set base URL with Nginx when you can't use subdomains.
|
||||
- [lamusmaser/ta_migration_helper](https://github.com/lamusmaser/ta_migration_helper): Advanced helper script for migration issues to TubeArchivist v0.4.4 or later.
|
||||
- [lamusmaser/create_info_json](https://gist.github.com/lamusmaser/837fb58f73ea0cad784a33497932e0dd): Script to generate `.info.json` files using `ffmpeg` collecting information from downloaded videos.
|
||||
- [lamusmaser/ta_fix_for_video_redirection](https://github.com/lamusmaser/ta_fix_for_video_redirection): Script to fix videos that were incorrectly indexed by YouTube's "Video is Unavailable" response.
|
||||
- [RoninTech/ta-helper](https://github.com/RoninTech/ta-helper): Helper script to provide a symlink association to reference TubeArchivist videos with their original titles.
|
||||
|
||||
## Donate
|
||||
The best donation to **Tube Archivist** is your time, take a look at the [contribution page](CONTRIBUTING.md) to get started.
|
||||
@ -210,20 +225,6 @@ Second best way to support the development is to provide for caffeinated beverag
|
||||
* [Paypal Subscription](https://www.paypal.com/webapps/billing/plans/subscribe?plan_id=P-03770005GR991451KMFGVPMQ) for a monthly coffee
|
||||
* [ko-fi.com](https://ko-fi.com/bbilly1) for an alternative platform
|
||||
|
||||
## Notable mentions
|
||||
This is a selection of places where this project has been featured on reddit, in the news, blogs or any other online media, newest on top.
|
||||
* **ycombinator**: Tube Archivist on Hackernews front page, [2023-07-16][[link](https://news.ycombinator.com/item?id=36744395)]
|
||||
* **linux-community.de**: Tube Archivist bringt Ordnung in die Youtube-Sammlung, [German][2023-05-01][[link](https://www.linux-community.de/ausgaben/linuxuser/2023/05/tube-archivist-bringt-ordnung-in-die-youtube-sammlung/)]
|
||||
* **noted.lol**: Dev Debrief, An Interview With the Developer of Tube Archivist, [2023-03-30] [[link](https://noted.lol/dev-debrief-tube-archivist/)]
|
||||
* **console.substack.com**: Interview With Simon of Tube Archivist, [2023-01-29] [[link](https://console.substack.com/p/console-142#%C2%A7interview-with-simon-of-tube-archivist)]
|
||||
* **reddit.com**: Tube Archivist v0.3.0 - Now Archiving Comments, [2022-12-02] [[link](https://www.reddit.com/r/selfhosted/comments/zaonzp/tube_archivist_v030_now_archiving_comments/)]
|
||||
* **reddit.com**: Tube Archivist v0.2 - Now with Full Text Search, [2022-07-24] [[link](https://www.reddit.com/r/selfhosted/comments/w6jfa1/tube_archivist_v02_now_with_full_text_search/)]
|
||||
* **noted.lol**: How I Control What Media My Kids Watch Using Tube Archivist, [2022-03-27] [[link](https://noted.lol/how-i-control-what-media-my-kids-watch-using-tube-archivist/)]
|
||||
* **thehomelab.wiki**: Tube Archivist - A Youtube-DL Alternative on Steroids, [2022-01-27] [[link](https://thehomelab.wiki/books/news/page/tube-archivist-a-youtube-dl-alternative-on-steroids)]
|
||||
* **reddit.com**: Celebrating TubeArchivist v0.1, [2022-01-09] [[link](https://www.reddit.com/r/selfhosted/comments/rzh084/celebrating_tubearchivist_v01/)]
|
||||
* **linuxunplugged.com**: Pick: tubearchivist — Your self-hosted YouTube media server, [2021-09-11] [[link](https://linuxunplugged.com/425)] and [2021-10-05] [[link](https://linuxunplugged.com/426)]
|
||||
* **reddit.com**: Introducing Tube Archivist, your self hosted Youtube media server, [2021-09-12] [[link](https://www.reddit.com/r/selfhosted/comments/pmj07b/introducing_tube_archivist_your_self_hosted/)]
|
||||
|
||||
|
||||
## Sponsor
|
||||
Big thank you to [Digitalocean](https://www.digitalocean.com/) for generously donating credit for the tubearchivist.com VPS and buildserver.
|
||||
@ -232,4 +233,3 @@ Big thank you to [Digitalocean](https://www.digitalocean.com/) for generously do
|
||||
<img src="https://opensource.nyc3.cdn.digitaloceanspaces.com/attribution/assets/PoweredByDO/DO_Powered_by_Badge_blue.svg" width="201px">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
|
25
SHOWCASE.MD
@ -1,25 +0,0 @@
|
||||
## Tube Archivist on YouTube
|
||||
[![ibracorp-youtube-video-thumb](assets/tube-archivist-ibracorp-O8H8Z01c0Ys.jpg)](https://www.youtube.com/watch?v=O8H8Z01c0Ys)
|
||||
Video featuring Tube Archivist generously created by [IBRACORP](https://www.youtube.com/@IBRACORP).
|
||||
|
||||
## Screenshots
|
||||
![login screenshot](assets/tube-archivist-login.png?raw=true "Tube Archivist Login")
|
||||
*Login Page*: Secure way to access your media collection.
|
||||
|
||||
![home screenshot](assets/tube-archivist-home.png?raw=true "Tube Archivist Home")
|
||||
*Home Page*: Your recent videos, continue watching incomplete videos.
|
||||
|
||||
![channels screenshot](assets/tube-archivist-channels.png?raw=true "Tube Archivist Channels")
|
||||
*All Channels*: A list of all your indexed channels, filtered by subscribed only.
|
||||
|
||||
![single channel screenshot](assets/tube-archivist-single-channel.png?raw=true "Tube Archivist Single Channel")
|
||||
*Single Channel*: Single channel page with additional metadata and sub pages.
|
||||
|
||||
![video page screenshot](assets/tube-archivist-video.png?raw=true "Tube Archivist Video Page")
|
||||
*Video Page*: Stream your video directly from the interface.
|
||||
|
||||
![video page screenshot](assets/tube-archivist-download.png?raw=true "Tube Archivist Video Page")
|
||||
*Downloads Page*: Add, control, and monitor your download queue.
|
||||
|
||||
![search page screenshot](assets/tube-archivist-search.png?raw=true "Tube Archivist Search Page")
|
||||
*Search Page*. Use expressions to quickly search through your collection.
|
BIN
assets/tube-archivist-banner.jpg
Normal file
After Width: | Height: | Size: 49 KiB |
Before Width: | Height: | Size: 516 KiB |
Before Width: | Height: | Size: 541 KiB |
Before Width: | Height: | Size: 1.6 MiB |
Before Width: | Height: | Size: 578 KiB |
Before Width: | Height: | Size: 106 KiB |
BIN
assets/tube-archivist-screenshot-channels.png
Normal file
After Width: | Height: | Size: 131 KiB |
BIN
assets/tube-archivist-screenshot-download.png
Normal file
After Width: | Height: | Size: 79 KiB |
BIN
assets/tube-archivist-screenshot-home.png
Normal file
After Width: | Height: | Size: 174 KiB |
BIN
assets/tube-archivist-screenshot-single-channel.png
Normal file
After Width: | Height: | Size: 166 KiB |
BIN
assets/tube-archivist-screenshot-video.png
Normal file
After Width: | Height: | Size: 238 KiB |
Before Width: | Height: | Size: 96 KiB |
Before Width: | Height: | Size: 716 KiB |
Before Width: | Height: | Size: 684 KiB |
19
deploy.sh
@ -25,9 +25,7 @@ function sync_blackhole {
|
||||
--exclude ".gitignore" \
|
||||
--exclude "**/cache" \
|
||||
--exclude "**/__pycache__/" \
|
||||
--exclude ".venv" \
|
||||
--exclude "db.sqlite3" \
|
||||
--exclude ".mypy_cache" \
|
||||
. -e ssh "$host":tubearchivist
|
||||
|
||||
ssh "$host" 'docker build -t bbilly1/tubearchivist --build-arg TARGETPLATFORM="linux/amd64" tubearchivist'
|
||||
@ -50,10 +48,7 @@ function sync_test {
|
||||
--exclude ".gitignore" \
|
||||
--exclude "**/cache" \
|
||||
--exclude "**/__pycache__/" \
|
||||
--exclude "**/.pytest_cache/" \
|
||||
--exclude ".venv" \
|
||||
--exclude "db.sqlite3" \
|
||||
--exclude ".mypy_cache" \
|
||||
. -e ssh "$host":tubearchivist
|
||||
|
||||
# copy default docker-compose file if not exist
|
||||
@ -87,17 +82,15 @@ function validate {
|
||||
|
||||
echo "run validate on $check_path"
|
||||
|
||||
# note: this logic is duplicated in the `./github/workflows/lint_python.yml` config
|
||||
# if you update this file, you should update that as well
|
||||
echo "running black"
|
||||
black --force-exclude "migrations/*" --diff --color --check -l 79 "$check_path"
|
||||
black --diff --color --check -l 79 "$check_path"
|
||||
echo "running codespell"
|
||||
codespell --skip="./.git,./.venv,./package.json,./package-lock.json,./node_modules,./.mypy_cache" "$check_path"
|
||||
codespell --skip="./.git" "$check_path"
|
||||
echo "running flake8"
|
||||
flake8 "$check_path" --exclude "migrations,.venv" --count --max-complexity=10 \
|
||||
--max-line-length=79 --show-source --statistics
|
||||
flake8 "$check_path" --count --max-complexity=10 --max-line-length=79 \
|
||||
--show-source --statistics
|
||||
echo "running isort"
|
||||
isort --skip "migrations" --skip ".venv" --check-only --diff --profile black -l 79 "$check_path"
|
||||
isort --check-only --diff --profile black -l 79 "$check_path"
|
||||
printf " \n> all validations passed\n"
|
||||
|
||||
}
|
||||
@ -218,6 +211,8 @@ if [[ $1 == "blackhole" ]]; then
|
||||
elif [[ $1 == "test" ]]; then
|
||||
sync_test "$2"
|
||||
elif [[ $1 == "validate" ]]; then
|
||||
# check package versions in requirements.txt for updates
|
||||
python version_check.py
|
||||
validate "$2"
|
||||
elif [[ $1 == "docker" ]]; then
|
||||
sync_docker
|
||||
|
@ -1,4 +1,4 @@
|
||||
version: '3.5'
|
||||
version: '3.3'
|
||||
|
||||
services:
|
||||
tubearchivist:
|
||||
@ -20,17 +20,11 @@ services:
|
||||
- TA_PASSWORD=verysecret # your initial TA credentials
|
||||
- ELASTIC_PASSWORD=verysecret # set password for Elasticsearch
|
||||
- TZ=America/New_York # set your time zone
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
||||
interval: 2m
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
depends_on:
|
||||
- archivist-es
|
||||
- archivist-redis
|
||||
archivist-redis:
|
||||
image: redis/redis-stack-server
|
||||
image: redislabs/rejson # for arm64 use bbilly1/rejson
|
||||
container_name: archivist-redis
|
||||
restart: unless-stopped
|
||||
expose:
|
||||
@ -40,15 +34,14 @@ services:
|
||||
depends_on:
|
||||
- archivist-es
|
||||
archivist-es:
|
||||
image: bbilly1/tubearchivist-es # only for amd64, or use official es 8.14.3
|
||||
image: bbilly1/tubearchivist-es # only for amd64, or use official es 8.4.3
|
||||
container_name: archivist-es
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
- "ELASTIC_PASSWORD=verysecret" # matching Elasticsearch password
|
||||
- "ES_JAVA_OPTS=-Xms1g -Xmx1g"
|
||||
- "xpack.security.enabled=true"
|
||||
- "ELASTIC_PASSWORD=verysecret" # matching Elasticsearch password
|
||||
- "discovery.type=single-node"
|
||||
- "path.repo=/usr/share/elasticsearch/data/snapshot"
|
||||
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
|
||||
ulimits:
|
||||
memlock:
|
||||
soft: -1
|
||||
|
@ -1,71 +0,0 @@
|
||||
"""
|
||||
ffmpeg link builder
|
||||
copied as into build step in Dockerfile
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import tarfile
|
||||
import urllib.request
|
||||
from enum import Enum
|
||||
|
||||
API_URL = "https://api.github.com/repos/yt-dlp/FFmpeg-Builds/releases/latest"
|
||||
BINARIES = ["ffmpeg", "ffprobe"]
|
||||
|
||||
|
||||
class PlatformFilter(Enum):
|
||||
"""options"""
|
||||
|
||||
ARM64 = "linuxarm64"
|
||||
AMD64 = "linux64"
|
||||
|
||||
|
||||
def get_assets():
|
||||
"""get all available assets from latest build"""
|
||||
with urllib.request.urlopen(API_URL) as f:
|
||||
all_links = json.loads(f.read().decode("utf-8"))
|
||||
|
||||
return all_links
|
||||
|
||||
|
||||
def pick_url(all_links, platform):
|
||||
"""pick url for platform"""
|
||||
filter_by = PlatformFilter[platform.split("/")[1].upper()].value
|
||||
options = [i for i in all_links["assets"] if filter_by in i["name"]]
|
||||
if not options:
|
||||
raise ValueError(f"no valid asset found for filter {filter_by}")
|
||||
|
||||
url_pick = options[0]["browser_download_url"]
|
||||
|
||||
return url_pick
|
||||
|
||||
|
||||
def download_extract(url):
|
||||
"""download and extract binaries"""
|
||||
print("download file")
|
||||
filename, _ = urllib.request.urlretrieve(url)
|
||||
print("extract file")
|
||||
with tarfile.open(filename, "r:xz") as tar:
|
||||
for member in tar.getmembers():
|
||||
member.name = os.path.basename(member.name)
|
||||
if member.name in BINARIES:
|
||||
print(f"extract {member.name}")
|
||||
tar.extract(member, member.name)
|
||||
|
||||
|
||||
def main():
|
||||
"""entry point"""
|
||||
args = sys.argv
|
||||
if len(args) == 1:
|
||||
platform = "linux/amd64"
|
||||
else:
|
||||
platform = args[1]
|
||||
|
||||
all_links = get_assets()
|
||||
url = pick_url(all_links, platform)
|
||||
download_extract(url)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -1,23 +1,68 @@
|
||||
#!/bin/bash
|
||||
# startup script inside the container for tubearchivist
|
||||
|
||||
set -e
|
||||
|
||||
# django setup
|
||||
python manage.py migrate
|
||||
|
||||
if [[ -z "$DJANGO_DEBUG" ]]; then
|
||||
python manage.py collectstatic --noinput -c
|
||||
if [[ -z "$ELASTIC_USER" ]]; then
|
||||
export ELASTIC_USER=elastic
|
||||
fi
|
||||
|
||||
# ta setup
|
||||
python manage.py ta_envcheck
|
||||
python manage.py ta_connection
|
||||
python manage.py ta_startup
|
||||
cachedir=/cache
|
||||
[[ -d $cachedir ]] || cachedir=.
|
||||
lockfile=${cachedir}/initsu.lock
|
||||
|
||||
# start all tasks
|
||||
required="Missing required environment variable"
|
||||
[[ -f $lockfile ]] || : "${TA_USERNAME:?$required}"
|
||||
: "${TA_PASSWORD:?$required}"
|
||||
: "${ELASTIC_PASSWORD:?$required}"
|
||||
: "${TA_HOST:?$required}"
|
||||
|
||||
# ugly nginx and uwsgi port overwrite with env vars
|
||||
if [[ -n "$TA_PORT" ]]; then
|
||||
sed -i "s/8000/$TA_PORT/g" /etc/nginx/sites-available/default
|
||||
fi
|
||||
|
||||
if [[ -n "$TA_UWSGI_PORT" ]]; then
|
||||
sed -i "s/8080/$TA_UWSGI_PORT/g" /etc/nginx/sites-available/default
|
||||
sed -i "s/8080/$TA_UWSGI_PORT/g" /app/uwsgi.ini
|
||||
fi
|
||||
|
||||
# wait for elasticsearch
|
||||
counter=0
|
||||
until curl -u "$ELASTIC_USER":"$ELASTIC_PASSWORD" "$ES_URL" -fs; do
|
||||
echo "waiting for elastic search to start"
|
||||
counter=$((counter+1))
|
||||
if [[ $counter -eq 12 ]]; then
|
||||
# fail after 2 min
|
||||
echo "failed to connect to elastic search, exiting..."
|
||||
curl -v -u "$ELASTIC_USER":"$ELASTIC_PASSWORD" "$ES_URL"?pretty
|
||||
exit 1
|
||||
fi
|
||||
sleep 10
|
||||
done
|
||||
|
||||
# start python application
|
||||
python manage.py makemigrations
|
||||
python manage.py migrate
|
||||
|
||||
if [[ -f $lockfile ]]; then
|
||||
echo -e "\e[33;1m[WARNING]\e[0m This is not the first run! Skipping" \
|
||||
"superuser creation.\nTo force it, remove $lockfile"
|
||||
else
|
||||
export DJANGO_SUPERUSER_PASSWORD=$TA_PASSWORD
|
||||
output="$(python manage.py createsuperuser --noinput --name "$TA_USERNAME" 2>&1)"
|
||||
|
||||
case "$output" in
|
||||
*"Superuser created successfully"*)
|
||||
echo "$output" && touch $lockfile ;;
|
||||
*"That name is already taken."*)
|
||||
echo "Superuser already exists. Creation will be skipped on next start."
|
||||
touch $lockfile ;;
|
||||
*) echo "$output" && exit 1
|
||||
esac
|
||||
fi
|
||||
|
||||
python manage.py collectstatic --noinput -c
|
||||
nginx &
|
||||
celery -A home.celery worker --loglevel=INFO --max-tasks-per-child 10 &
|
||||
celery -A home.tasks worker --loglevel=INFO &
|
||||
celery -A home beat --loglevel=INFO \
|
||||
--scheduler django_celery_beat.schedulers:DatabaseScheduler &
|
||||
-s "${BEAT_SCHEDULE_PATH:-${cachedir}/celerybeat-schedule}" &
|
||||
uwsgi --ini uwsgi.ini
|
||||
|
36
docs/Channels.md
Normal file
@ -0,0 +1,36 @@
|
||||
# Channels Overview and Channel Detail Page
|
||||
|
||||
The channels are organized on two different levels, similar as the [playlists](Playlists):
|
||||
|
||||
## Channels Overview
|
||||
Accessible at `/channel/` of your Tube Archivist, the **Overview Page** shows a list of all channels you have indexed.
|
||||
- You can filter that list to show or hide subscribed channels with the toggle. Clicking on the channel banner or the channel name will direct you to the *Channel Detail Page*.
|
||||
- If you are subscribed to a channel a *Unsubscribe* button will show, if you aren't subscribed, a *Subscribe* button will show instead.
|
||||
|
||||
The **Subscribe to Channels** button <img src="assets/icon-add.png?raw=true" alt="add icon" width="20px" style="margin:0 5px;"> opens a text field to subscribe to a channel. You have a few options:
|
||||
- Enter the YouTube channel ID, a 25 character alphanumeric string. For example *UCBa659QWEk1AI4Tg--mrJ2A*
|
||||
- Enter the URL to the channel page on YouTube. For example *https://www.youtube.com/channel/UCBa659QWEk1AI4Tg--mrJ2A*
|
||||
- Enter the channel name for example: *https://www.youtube.com/c/TomScottGo*.
|
||||
- Enter the video URL for any video and let Tube Archivist extract the channel ID for you. For example *https://www.youtube.com/watch?v=2tdiKTSdE9Y*
|
||||
- Add one per line.
|
||||
|
||||
You can search your indexed channels by clicking on the search icon <img src="assets/icon-search.png?raw=true" alt="search icon" width="20px" style="margin:0 5px;">. This will open a dedicated page.
|
||||
|
||||
## Channel Detail
|
||||
Each channel will get a dedicated channel detail page accessible at `/channel/<channel-id>/` of your Tube Archivist. This page shows all the videos you have downloaded from this channel.
|
||||
|
||||
- If you are subscribed to the channel, an *Unsubscribe* button will show, else the *Subscribe* button will show.
|
||||
- The **Mark as Watched** button will mark all videos of this channel as watched.
|
||||
|
||||
Additionally there is a *Channel Playlist* page, accessible at `/channel/<channel-id>/playlist/` to show all indexed playlists from this channel.
|
||||
|
||||
On the *Channel About* page, accessible at `/channel/<channel-id>/about/`, you can see additional metadata.
|
||||
- The button **Delete Channel** will delete the channel plus all videos of this channel, both media files and metadata additionally this will also delete playlists metadata belonging to that channel.
|
||||
|
||||
The channel customize form gives options to change settings on a per channel basis. Any configurations here will overwrite your configurations from the [settings](Settings) page.
|
||||
- **Download Format**: Overwrite the download quality for videos from this channel.
|
||||
- **Auto Delete**: Automatically delete watched videos from this channel after selected days.
|
||||
- **Index Playlists**: Automatically add all Playlists with at least a video downloaded to your index. Only do this for channels where you care about playlists as this will slow down indexing new videos for having to check which playlist this belongs to.
|
||||
- **SponsorBlock**: Using [SponsorBlock](https://sponsor.ajay.app/) to get and skip sponsored content. Customize per channel: You can *disable* or *enable* SponsorBlock for certain channels only to overwrite the behavior set on the [Settings](settings) page. Selecting *unset* will remove the overwrite and your setting will fall back to the default on the settings page.
|
||||
|
||||
If you have any videos pending in the download queue, a *Downloads* link will show, bringing you directly to the [downloads](Downloads) page, filtering the list by the selected channel.
|
43
docs/Downloads.md
Normal file
@ -0,0 +1,43 @@
|
||||
# Downloads Page
|
||||
Accessible at `/downloads/` of your Tube Archivist, this page handles all the download functionality.
|
||||
|
||||
|
||||
## Rescan Subscriptions
|
||||
The **Rescan Subscriptions** icon <img src="assets/icon-rescan.png?raw=true" alt="rescan icon" width="20px" style="margin:0 5px;"> will start a background task to look for new videos from the channels and playlists you are subscribed to. You can define the channel and playlist page size on the [settings page](Settings#subscriptions). With the default page size, expect this process to take around 2-3 seconds for each channel or playlist you are subscribed to. A status message will show the progress.
|
||||
|
||||
Then for every video found, **Tube Archivist** will skip the video if it has already been downloaded or if you added it to the *ignored* list before. All the other videos will get added to the download queue. Expect this to take around 2 seconds for each video as **Tube Archivist** needs to grab some additional metadata. New videos will get added at the bottom of the download queue.
|
||||
|
||||
## Download Queue
|
||||
The **Start Download** icon <img src="assets/icon-download.png?raw=true" alt="download icon" width="20px" style="margin:0 5px;"> will start the download process starting from the top of the queue. Take a look at the relevant settings on the [Settings Page](Settings#downloads). Once the process started, a progress message will show with additional details and controls:
|
||||
- The stop icon <img src="assets/icon-stop.png?raw=true" alt="stop icon" width="20px" style="margin:0 5px;"> will gracefully stop the download process, once the current video has been finished successfully.
|
||||
- The cancel icon <img src="assets/icon-close-red.png?raw=true" alt="close icon" width="20px" style="margin:0 5px;"> is equivalent to killing the process and will stop the download immediately. Any leftover files will get deleted, the canceled video will still be available in the download queue.
|
||||
|
||||
After downloading, Tube Archivist tries to add new videos to already indexed playlists.
|
||||
|
||||
## Add to Download Queue
|
||||
The **Add to Download Queue** icon <img src="assets/icon-add.png?raw=true" alt="add icon" width="20px" style="margin:0 5px;"> opens a text field to manually add videos to the download queue. You have a few options:
|
||||
- Add a link to a YouTube video. For example *https://www.youtube.com/watch?v=2tdiKTSdE9Y*.
|
||||
- Add a YouTube video ID. For example *2tdiKTSdE9Y*.
|
||||
- Add a link to a YouTube video by providing the shortened URL, for example *https://youtu.be/2tdiKTSdE9Y*.
|
||||
- Add a Channel ID or Channel URL to add every available video to the download queue. This will ignore the channel page size as described before and is meant for an initial download of the whole channel. You can still ignore selected videos before starting the download.
|
||||
- Add a channel name like for example *https://www.youtube.com/c/TomScottGo*.
|
||||
- Add a playlist ID or URL to add every available video in the list to the download queue, for example *https://www.youtube.com/playlist?list=PL96C35uN7xGLLeET0dOWaKHkAlPsrkcha* or *PL96C35uN7xGLLeET0dOWaKHkAlPsrkcha*.
|
||||
- Note: When adding a playlist to the queue, this playlist will automatically get [indexed](Playlists#playlist-detail).
|
||||
- Note: When you add a link to a video in a playlist, Tube Archivist assumes you want to download only the specific video and not the whole playlist, for example *https://www.youtube.com/watch?v=CINVwWHlzTY&list=PL96C35uN7xGLLeET0dOWaKHkAlPsrkcha* will only add one video *CINVwWHlzTY* to the queue.
|
||||
- Add one link per line.
|
||||
|
||||
## The Download Queue
|
||||
Below the three buttons you find the download queue. New items will get added at the bottom of the queue, the next video to download once you click on **Start Download** will be the first in the list.
|
||||
|
||||
You can filter the download queue with the **filter** dropdown box, the filter will show once you have more than one channel in the download queue. Select the channel to filter by name, the number in parentheses indicates how many videos you have pending from this channel. Reset the filter by selecting *all* from the dropdown. This will generate links for the top 30 channels with pending videos.
|
||||
|
||||
Every video in the download queue has two buttons:
|
||||
- **Ignore**: This will remove that video from the download queue and this video will not get added again, even when you **Rescan Subscriptions**.
|
||||
- **Download now**: This will give priority to this video. If the download process is already running, the prioritized video will get downloaded as soon as the current video is finished. If there is no download process running, this will start downloading this single video and stop after that.
|
||||
|
||||
You can flip the view by activating **Show Only Ignored Videos**. This will show all videos you have previously *ignored*.
|
||||
Every video in the ignored list has two buttons:
|
||||
- **Forget**: This will delete the item form the ignored list.
|
||||
- **Add to Queue**: This will add the ignored video back to the download queue.
|
||||
|
||||
You can delete your download queue from the [Settings](Settings#actions) page.
|
31
docs/FAQ.md
Normal file
@ -0,0 +1,31 @@
|
||||
# Frequently Asked Questions
|
||||
|
||||
## 1. Scope of this project
|
||||
Tube Archivist is *Your self hosted YouTube media server*, which also defines the primary scope of what this project tries to do:
|
||||
- **Self hosted**: This assumes you have full control over the underlying operating system and hardware and can configure things to work properly with Docker, it's volumes and networks as well as whatever disk storage and filesystem you choose to use.
|
||||
- **YouTube**: Downloading, indexing and playing videos from YouTube, there are currently no plans to expand this to any additional platforms.
|
||||
- **Media server**: This project tries to be a stand alone media server in it's own web interface.
|
||||
|
||||
Additionally to that, progress is also happening on:
|
||||
- **API**: Endpoints for additional integrations.
|
||||
- **Browser Extension**: To integrate between youtube.com and Tube Archivist.
|
||||
|
||||
Defining the scope is important for the success of any project:
|
||||
- A scope too broad will result in development effort spreading too thin and will run into danger that his project tries to do too many things and none of them well.
|
||||
- A too narrow scope will make this project uninteresting and will exclude audiences that could also benefit from this project.
|
||||
- Not defining a scope will easily lead to misunderstandings and false hopes of where this project tries to go.
|
||||
|
||||
Of course this is subject to change: The scope can be expanded as this project continues to grow and more people contribute.
|
||||
|
||||
## 2. Emby/Plex/Jellyfin/Kodi integrations
|
||||
Although there are similarities between these excellent projects and Tube Archivist, they have a very different use case. Trying to fit the metadata relations and database structure of a YouTube archival project into these media servers that specialize in Movies and TV shows is always going to be limiting.
|
||||
|
||||
Part of the scope is to be its own media server, so that's where the focus and effort of this project is. That being said, the nature of self hosted and open source software gives you all the possible freedom to use your media as you wish.
|
||||
|
||||
## 3. To Docker or not to Docker
|
||||
This project is a classical docker application: There are multiple moving parts that need to be able to interact with each other and need to be compatible with multiple architectures and operating systems. Additionally Docker also drastically reduces development complexity which is highly appreciated.
|
||||
|
||||
So Docker is the only supported installation method. If you don't have any experience with Docker, consider investing the time to learn this very useful technology.
|
||||
|
||||
## 4. Finetuning Elasticsearch
|
||||
A minimal configuration of Elasticsearch (ES) is provided in the example docker-compose.yml file. ES is highly configurable and very interesting to learn more about. Refer to the [documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html) if you want to get into it.
|
32
docs/Home.md
Normal file
@ -0,0 +1,32 @@
|
||||
# Tube Archivist Wiki
|
||||
Welcome to the official Tube Archivist Wiki. This is an up-to-date documentation of user functionality.
|
||||
|
||||
Table of contents:
|
||||
* [FAQ](FAQ): Frequently asked questions what this project is and tries to do
|
||||
* [Channels](Channels): Browse your channels, handle channel subscriptions
|
||||
* [Playlists](Playlists): Browse your indexed playlists, handle playlist subscriptions
|
||||
* [Downloads](Downloads): Scanning subscriptions, handle download queue
|
||||
* [Settings](Settings): All the configuration options
|
||||
* [Video](Video): All details of a single video and playlist navigation.
|
||||
* [Users](Users): User management admin interface
|
||||
* [Search](Search): Search your archive
|
||||
* [Installation](Installation): Detailed installation instructions for various platforms.
|
||||
|
||||
## Getting Started
|
||||
1. [Subscribe](Channels#channels-overview) to some of your favourite YouTube channels.
|
||||
2. [Scan](Downloads#rescan-subscriptions) subscriptions to add the latest videos to the download queue.
|
||||
3. [Add](Downloads#add-to-download-queue) additional videos, channels or playlist - ignore the ones you don't want to download.
|
||||
4. [Download](Downloads#download-queue) and let **Tube Archivist** do it's thing.
|
||||
5. Sit back and enjoy your archived and indexed collection!
|
||||
|
||||
## General Navigation
|
||||
* Clicking on the channel name or the channel icon brings you to the dedicated channel page to show videos from that channel.
|
||||
* Clicking on a video title brings you to the dedicated video page and shows additional details.
|
||||
* Clicking on a video thumbnail opens the video player and starts streaming the selected video.
|
||||
* Clicking on the search icon <img src="assets/icon-search.png?raw=true" alt="gridview icon" width="20px" style="margin:0 5px;"> will open a dedicated search page to search over your complete index.
|
||||
* The pagination - if available - builds links for up to 10'000 results, use the search, sort or filter functionality to find what you are looking for.
|
||||
|
||||
|
||||
An empty checkbox icon <img src="assets/icon-unseen.png?raw=true" alt="unseen icon" width="20px" style="margin:0 5px;"> will show for videos you haven't marked as watched. Click on it and the icon will change to a filled checkbox <img src="assets/icon-seen.png?raw=true" alt="seen icon" width="20px" style="margin:0 5px;"> indicating it as watched - click again to revert.
|
||||
|
||||
When available the <img src="assets/icon-gridview.png?raw=true" alt="gridview icon" width="20px" style="margin:0 5px;"> gridview icon will display the list in a grid. A grid row holds 3 items by default, use the <img src="assets/icon-add.png?raw=true" alt="listview icon" width="20px" style="margin:0 5px;"> icon to add more or the <img src="assets/icon-substract.png?raw=true" alt="listview icon" width="20px" style="margin:0 5px;"> icon to remove items per row, depending on your screen size. The <img src="assets/icon-listview.png?raw=true" alt="listview icon" width="20px" style="margin:0 5px;"> listview icon will arrange the items in a list. The sort icon <img src="assets/icon-sort.png?raw=true" alt="listview icon" width="20px" style="margin:0 5px;"> will open additional sort options.
|
255
docs/Installation.md
Normal file
@ -0,0 +1,255 @@
|
||||
# Detailed Installation Instructions for Various Platforms
|
||||
|
||||
## Table of Contents
|
||||
- [Unraid](#unraid)
|
||||
- [Truenas Scale](#truenas-scale)
|
||||
- [Synology](#synology)
|
||||
|
||||
These are beginners guides installation instructions for additional platforms generously provided by users of these platforms. When in doubt, verify the details with the [Readme](https://github.com/tubearchivist/tubearchivist#installing-and-updating). If you see any issues here while using these instructions, please contribute.
|
||||
|
||||
## Unraid
|
||||
|
||||
Tube Archivist, and all if it's dependencies are located in the [community applications](https://forums.unraid.net/topic/38582-plug-in-community-applications/) store. The three containers you will need are as follows:
|
||||
|
||||
- **TubeArchivist-RedisJSON**: This container acts as a cache and temporary link between the application and the file system. Used to store and display messages and configuration variables.
|
||||
- **TubeArchivist-ES**: ElasticSearch stores video meta data and makes everything searchable. Also keeps track of the download queue.
|
||||
- **TubeArchivist**: Once your YouTube video collection grows, it becomes hard to search and find a specific video. That's where Tube Archivist comes in: By indexing your video collection with metadata from YouTube, you can organize, search and enjoy your archived YouTube videos without hassle offline through a convenient web interface.
|
||||
|
||||
### Step 1: Install `TubeArchivist-RedisJSON`
|
||||
|
||||
![enter image description here](https://i.imgur.com/ycAqFRU.png)
|
||||
This is the easiest container to setup of the thee, just make sure that you do not have any port conflicts, and that your `/data` is mounted to the correct path. The other containers will map to the same directory.
|
||||
|
||||
If you need to install `TubeArchivist-RedisJSON`on a different port, you'll have to follow [these steps](https://github.com/tubearchivist/tubearchivist#redis-on-a-custom-port) later on when installing the `TubeArchivist` container
|
||||
|
||||
|
||||
### Step 2: Install `TubeArchivist-ES`
|
||||
![enter image description here](https://i.imgur.com/o6tsTdt.png)
|
||||
ElasticSeach is also pretty easy to setup. Again, make sure you have no port conflicts, make sure that you mapped `/usr/share/elasticsearch/data` to the same directory as `RedisJSON`, and make sure to change the default password to something more secure.
|
||||
|
||||
There is three additional settings in the "show more settings" area, but leave those as they are.
|
||||
|
||||
|
||||
### Step 3: Install `TubeArchivist`
|
||||
|
||||
![enter image description here](https://i.imgur.com/dwSCfgO.png)
|
||||
It's finally time to set up TubeArchivist!
|
||||
|
||||
- `Port:`Again, make sure that you have no port conflicts on 8000.
|
||||
|
||||
- `Youtube Media Path:` is where you'll download all of your videos to.
|
||||
Make sure that this is an empty directory to not cause confusion when
|
||||
starting the application. If you have existing videos that you'd like
|
||||
to import into Tube Archivist, please checkout the [settings
|
||||
wiki.](https://github.com/tubearchivist/tubearchivist/wiki/Settings#manual-media-files-import)
|
||||
|
||||
|
||||
- `Appdata:` This should be the same base path as the other two containers.
|
||||
|
||||
- `TA Username:`This will be your username for TubeArchivist.
|
||||
|
||||
- `TA Password:`This will be your password for TubeArchivist.
|
||||
|
||||
- `Redis` This will be JUST the ip address of your redis container
|
||||
|
||||
- `ElasticSearch Password:`This is the password you defined in the `TubeArchivist-ES` container.
|
||||
- `ElasticSearch:` This seems to cause some confusion, but it's a pretty simple step, just replace the IP and Port to match you `TubeArchivist-ES` container.
|
||||
|
||||
(example: if your IP is 192.168.1.15, the value should be http://192.168.1.15:9200)
|
||||
|
||||
- `Time Zone:` This is an important step for your scheduler, to find your timezone, use a site like [TimeZoneConverter](http://www.timezoneconverter.com/cgi-bin/findzone.tzc)
|
||||
|
||||
### From there, you should be able to start up your containers and you're good to go!
|
||||
If you're still having trouble, join us on [discord](https://discord.gg/AFwz8nE7BK) and come to the #unraid channel.
|
||||
|
||||
<br />
|
||||
<br />
|
||||
|
||||
## Truenas Scale
|
||||
|
||||
Truenas Scale can be a bit confusing, with its k3s kubernetes implementation.
|
||||
|
||||
However, there is a step by step guide available for it's users here:
|
||||
|
||||
https://heavysetup.info/applications/tube-archivist/dataset/
|
||||
|
||||
- Ensure you are navigating the columns under `Tube Archivist` on the left hand side of the screen
|
||||
|
||||
<br />
|
||||
<br />
|
||||
|
||||
## Synology
|
||||
|
||||
There are several different methods to install TubeArchivist on Synology platforms. This will focus on the available `docker` package and `docker-compose` implementations.
|
||||
|
||||
### Prepare Directories/Folders
|
||||
Before we setup TubeArchivist, we need to setup the directories/folders. You are assumed to be logged into the Synology NAS.
|
||||
#### 1. Docker Base Folder
|
||||
1. Open the `File Station` utility.
|
||||
2. Click on the **Create🔽** button and choose *Create New Shared Folder*.
|
||||
3. **Name** the folder "Docker".
|
||||
4. Add a **Description**.
|
||||
5. Select the **Volume Location**.
|
||||
> Note: By default, this will be where all data is stored. Change the folders as best meets your requirements.
|
||||
6. Select the appropriate options from the remaining checkbox configurations.
|
||||
![Synology - Create Docker Folder](assets/Synology_0.2.0_Docker-Folder-Create.png)
|
||||
7. Click the **Next** button.
|
||||
8. If you are going to **Encrypt** your folder, check the appropriate box and provide the Encryption Key and its confirmation.
|
||||
9. Click the **Next** button.
|
||||
10. On the **Advanced Settings** page, you can select the *Enable data checksum for advanced data integrity* setting. This may cause a performance impact, but will allow for potential file self-healing. **This cannot be changed later.**
|
||||
> Note: This is not recommended, as we will be hosting databases within this folder.
|
||||
11. If you are enabling a quota for how large the folder can get, you can select the *Enabled shared folder quota* setting and choose the maximum size this folder can grow. This can be changed later.
|
||||
12. Click the **Next** button.
|
||||
13. Confirm the settings, then click the **Apply** button. This will create the folder.
|
||||
#### 2. TubeArchivist Base Folder
|
||||
1. Open the `File Station` utility.
|
||||
2. Select the "Docker" folder on the left-hand side.
|
||||
3. Click on the `Create🔽` button and choose *create Folder*.
|
||||
4. **Name** the folder "TubeArchivist".
|
||||
#### 3. Redis Data
|
||||
1. Open the `File Station` utility.
|
||||
2. Select the "Docker" folder on the left-hand side.
|
||||
3. Select the "TubeArchivist" folder beneath "Docker".
|
||||
4. Click on the `Create🔽` button and choose *create Folder*.
|
||||
5. **Name** the folder "redis".
|
||||
#### 4. Elastic Search Data
|
||||
1. Open the `File Station` utility.
|
||||
2. Select the "Docker" folder on the left-hand side.
|
||||
3. Select the "TubeArchivist" folder beneath "Docker".
|
||||
4. Click on the `Create🔽` button and choose *create Folder*.
|
||||
5. **Name** the folder "es".
|
||||
#### 5. TubeArchivist Cache
|
||||
1. Open the `File Station` utility.
|
||||
2. Select the "Docker" folder on the left-hand side.
|
||||
3. Select the "TubeArchivist" folder beneath "Docker".
|
||||
4. Click on the `Create🔽` button and choose *create Folder*.
|
||||
5. **Name** the folder "cache".
|
||||
#### 6. TubeArchivist Output
|
||||
1. Open the `File Station` utility.
|
||||
2. Select the "Docker" folder on the left-hand side.
|
||||
3. Select the "TubeArchivist" folder beneath "Docker".
|
||||
4. Click on the `Create🔽` button and choose *create Folder*.
|
||||
5. **Name** the folder "media".
|
||||
#### 7. Confirm Folder Structure
|
||||
Once all of the folders have been created, it should have a folder structure within Docker\TubeArchivist that includes "cache", "es", "media", and "redis" folders.
|
||||
![Synology - Docker Folder Structure](assets/Synology_0.2.0_Docker-Folder-Structure.png)
|
||||
|
||||
#### 8. Change Permissions - CLI Required
|
||||
> If you do not have SSH access enabled for CLI, [enable it](https://kb.synology.com/en-sg/DSM/tutorial/How_to_login_to_DSM_with_root_permission_via_SSH_Telnet) before continuing.
|
||||
1. Open the SSH connection to the Synology. Login as your primary `Admin` user, or the user that was enabled for SSH access.
|
||||
2. Elevate your access to `root`. Steps are provided [here](https://kb.synology.com/en-sg/DSM/tutorial/How_to_login_to_DSM_with_root_permission_via_SSH_Telnet).
|
||||
3. Change directories to the **Volume** where the "Docker" folder resides.
|
||||
</br>Example: `cd /volume1`
|
||||
4. Change directories to the "Docker" folder.
|
||||
</br>Example: `cd Docker`
|
||||
5. Change directories to the "TubeArchivist" folder.
|
||||
</br>Example: `cd TubeArchivist`
|
||||
6. Change the owner of the "redis" folder. *If correct, this does not have an output.*
|
||||
</br>Example: `chown 999:100 redis`
|
||||
7. Change the owner of the "es" folder. *If correct, this does not have an output.*
|
||||
</br>Example: `chown 1000:1000 es`
|
||||
8. Confirm that the folders have the correct permissions.
|
||||
</br>Example: `ls -hl`
|
||||
![Synology - Docker Folder Permissions Command](assets/Synology_0.2.0_Docker-Folder-Permissions-Commands.png)
|
||||
9. Logout from root.
|
||||
</br>Example: `logout`
|
||||
10. Disconnect from the SSH connection.
|
||||
</br>Example: `exit`
|
||||
### Docker Setup
|
||||
1. Install the `Docker` Synology Package.
|
||||
1. Log in to your Synology NAS.
|
||||
2. Open the `Package Center` utility.
|
||||
3. Search for `Docker`.
|
||||
4. Click `Install`.
|
||||
|
||||
![Synology - Install Docker Utility](assets/Synology_0.2.0_Docker-Install.png)
|
||||
|
||||
2. After `Docker` is installed, open the `Docker` utility.
|
||||
3. Go to the `Registry` tab.
|
||||
4. Search for the following `images` and download them. Follow the recommended versions for each of the images.
|
||||
- `redislabs/rejson`
|
||||
![Synology - Redis Image Search](assets/Synology_0.2.0_Docker-Redis-Search.png)
|
||||
- `bbilly1/tubearchivist-es`
|
||||
![Synology - ElasticSearch Image Search](assets/Synology_0.2.0_Docker-ES-Search.png)
|
||||
- `bbilly1/tubearchivist`
|
||||
![Synology - TubeArchivist Image Search](assets/Synology_0.2.0_Docker-TA-Search.png)
|
||||
|
||||
|
||||
|
||||
5. Go to the `Image` tab. From here, create an container based on each image with the associated configurations below.
|
||||
- ElasticSearch
|
||||
1. Select the associated image.
|
||||
2. Click the **Launch** button in the top.
|
||||
3. Edit the **Container Name** to be "tubearchivist-es".
|
||||
4. Click on the **Advanced Settings** button.
|
||||
5. In the **Advanced Settings** tab, check the box for `Enable auto-restart`.
|
||||
6. In the **Volume** tab, click the **Add Folder** button and select the "Docker/TubeArchivist/es" folder, then type in `/usr/share/elasticsearch/data` for the mount path.
|
||||
7. In the **Network** tab, leave the default `bridge` Network (unless you have a specific Network design that you know how to implement).
|
||||
8. In the **Port Settings** tab, replace the "Auto" entry under **Local Port** with the port that will be used to connect to ElasticSearch (default is 9200).
|
||||
9. In the **Port Settings** tab, select the entryline for port 9300 and **➖ delete** the line. It is not needed for this container.
|
||||
10. The **Links** tab does not require configuration for this container.
|
||||
11. In the **Environment** tab, add in the following ElasticSearch specific environment variables that may apply.
|
||||
- "discovery.type=single-node"
|
||||
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
|
||||
- "UID=1000"
|
||||
- "GID=0"
|
||||
- "xpack.security.enabled=true"
|
||||
- "ELASTIC_PASSWORD=verysecret"
|
||||
> Do not use the default password as it is very insecure.
|
||||
![Synology - ElasticSearch Environment Configurations](assets/Synology_0.2.0_Docker-ES-Env-Conf.png)
|
||||
12. Click on the **Apply** button.
|
||||
13. Back on the **Create Container** screen, click the **Next** button.
|
||||
14. Review the settings to confirm, then click the **Apply** button.
|
||||
- Redis
|
||||
1. Select the associated image.
|
||||
2. Click the **Launch** button in the top.
|
||||
3. Edit the **Container Name** to be "tubearchivist-redis".
|
||||
4. Click on the **Advanced Settings** button.
|
||||
5. In the **Advanced Settings** tab, check the box for `Enable auto-restart`.
|
||||
6. In the **Volume** tab, click the **Add Folder** button and select the "Docker/TubeArchivist/redis" folder, then type in `/data` for the mount path.
|
||||
7. In the **Network** tab, leave the default `bridge` Network (unless you have a specific Network design that you know how to implement).
|
||||
8. In the **Port Settings** tab, replace the "Auto" entry under **Local Port** with the port that will be used to connect to Redis (default is 6379).
|
||||
9. In the **Links** tab, select the "tubearchivist-es" container from the **Container Name** dropdown and provide it the same alias, "tubearchivist-es".
|
||||
10. In the **Environment** tab, add in any Redis specific environment variables that may apply (none by default).
|
||||
11. Click on the **Apply** button.
|
||||
12. Back on the **Create Container** screen, click the **Next** button.
|
||||
13. Review the settings to confirm, then click the **Apply** button.
|
||||
|
||||
- TubeArchivist
|
||||
1. Select the associated image.
|
||||
2. Click the **Launch** button in the top.
|
||||
3. Edit the **Container Name** to be "tubearchivist".
|
||||
4. Click on the **Advanced Settings** button.
|
||||
5. In the **Advanced Settings** tab, check the box for `Enable auto-restart`.
|
||||
6. In the **Volume** tab, click the **Add Folder** button and select the "Docker/TubeArchivist/cache" folder, then type in `/cache` for the mount path.
|
||||
7. In the **Volume** tab, click the **Add Folder** button and select the "Docker/TubeArchivist/media" folder, then type in `/youtube` for the mount path.
|
||||
8. In the **Network** tab, leave the default `bridge` Network (unless you have a specific Network design that you know how to implement).
|
||||
9. In the **Port Settings** tab, replace the "Auto" entry under **Local Port** with the port that will be used to connect to TubeArchivist (default is 8000).
|
||||
10. In the **Links** tab, select the "tubearchivist-es" container from the **Container Name** dropdown and provide it the same alias, "tubearchivist-es".
|
||||
11. In the **Links** tab, select the "tubearchivist-redis" container from the **Container Name** dropdown and provide it the same alias, "tubearchivist-redis".
|
||||
12. In the **Environment** tab, add in the following TubeArchivist specific environment variables that may apply. **Change the variables as-is appropriate to your use case. Follow the [README section](https://github.com/tubearchivist/tubearchivist#tube-archivist) for details on what to set each variable.**
|
||||
- "TA_HOST=synology.local"
|
||||
- "ES_URL=http://tubearchivist-es:9200"
|
||||
- "REDIS_HOST=tubearchivist-redis"
|
||||
- "HOST_UID=1000"
|
||||
- "HOST_GID=1000"
|
||||
- "TA_USERNAME=tubearchivist"
|
||||
- "TA_PASSWORD=verysecret"
|
||||
- "ELASTIC_PASSWORD=verysecret"
|
||||
- "TZ=America/New_York"
|
||||
> Do not use the default password as it is very insecure.
|
||||
> Ensure that ELASTIC_PASSWORD matches the password used on the tubearchivist-es container.
|
||||
![Synology - TubeArchivist Environment Configurations](assets/Synology_0.2.0_Docker-TA-Env-Conf.png)
|
||||
13. Click on the **Apply** button.
|
||||
14. Back on the **Create Container** screen, click the **Next** button.
|
||||
15. Review the settings to confirm, then click the **Apply** button.
|
||||
6. After the containers have been configured and started, you can go to the **Container** tab and monitor the containers.
|
||||
7. To review the logs to ensure that the system has started successfully, select the "tubearchivist" container and click on the **Details** button. In the new window, go to the **Log** tab. Monitor the logs until either an error occurs or the message `celery@tubearchivist ready.` is in the logs. This may take a few minutes, especially for a first time setup.
|
||||
> Note: Synology Docker presents the logs in a pagination format. If you are not seeing the logs update, check if there are additional pages.
|
||||
8. After it has started, go to the location in the `TA_HOST`. This should give you the standard TubeArchivist login screen.
|
||||
<!--
|
||||
### Docker-Compose Setup -->
|
||||
<!-- This section is a Work In Progress -->
|
||||
|
||||
### From there, you should be able to start up your containers and you're good to go!
|
||||
If you're still having trouble, join us on [discord](https://discord.gg/AFwz8nE7BK) and come to the #synology channel.
|
23
docs/Playlists.md
Normal file
@ -0,0 +1,23 @@
|
||||
# Playlist Overview and Playlist Detail Page
|
||||
The playlists are organized in two different levels, similar as the [channels](Channels):
|
||||
|
||||
## Playlist Overview
|
||||
Accessible at `/playlist/` of your Tube Archivist, this **Overview Page** shows a list of all playlists you have indexed over all your channels.
|
||||
- You can filter that list to show only subscribed to playlists with the toggle.
|
||||
|
||||
You can index playlists of a channel from the channel detail page as described [here](Channels#channel-detail).
|
||||
|
||||
The **Subscribe to Playlist** button <img src="assets/icon-add.png?raw=true" alt="add icon" width="20px" style="margin:0 5px;"> opens a text field to subscribe to playlists. You have a few options:
|
||||
- Enter the YouTube playlist id, for example: *PL96C35uN7xGLLeET0dOWaKHkAlPsrkcha*
|
||||
- Enter the Youtube dedicated playlist url, for example: *https://www.youtube.com/playlist?list=PL96C35uN7xGLLeET0dOWaKHkAlPsrkcha*
|
||||
- Add one per line.
|
||||
- NOTE: It doesn't make sense to subscribe to a playlist if you are already subscribed the corresponding channel as this will slow down the **Rescan Subscriptions** [task](Downloads#rescan-subscriptions).
|
||||
|
||||
You can search your indexed playlists by clicking on the search icon <img src="assets/icon-search.png?raw=true" alt="search icon" width="20px" style="margin:0 5px;">. This will open a dedicated page.
|
||||
|
||||
## Playlist Detail
|
||||
Each playlist will get a dedicated playlist detail page accessible at `/playlist/<playlist-id>/` of your Tube Archivist. This page shows all the videos you have downloaded from this playlist.
|
||||
|
||||
- If you are subscribed to the playlist, an Unsubscribe button will show, else the Subscribe button will show.
|
||||
- The Mark as Watched button will mark all videos of this playlist as watched.
|
||||
- The **Delete Playlist** button will give you the option to delete just the *metadata* which won't delete any media files or *delete all* which will delete metadata plus all videos belonging to this playlist.
|
54
docs/Search.md
Normal file
@ -0,0 +1,54 @@
|
||||
# Search Page
|
||||
Accessible at `/search/` of your **Tube Archivist**, search your archive for Videos, Channels and Playlists - or even full text search throughout your indexed subtitles.
|
||||
|
||||
- All your queries are case insensitive and are normalized to lowercase.
|
||||
- All your queries are analyzed for the english language, this means *singular*, *plural* and word variations like *-ing*, *-ed*, *-able* etc are treated as synonyms.
|
||||
- Fuzzy search is activated for all your searches. This can catch typos in your queries or in the matching documents with one to two letters difference, depending on the query length.
|
||||
- All text searches are ranked, meaning the better a match the higher ranked the result. Unless otherwise stated, queries with multiple words are processed with the `and` operator, meaning all words need to match so each word will narrow down the result.
|
||||
- This will return 30 results per query, pagination is not implemented yet.
|
||||
|
||||
Just start typing to start a *simple* search or start your query with a primary keyword to search for a specific type and narrow down the result with secondary keywords. Secondary keywords can be in any order. Use *yes* or *no* for boolean values.
|
||||
|
||||
## Simple
|
||||
Start your query without a keyword to make a simple query. This will search in *video titles*, *channel names* and *playlist titles* and will return matching videos, channels and playlists. Keyword searches will return more results in a particular category due to the fact that more fields are searched for matches.
|
||||
|
||||
## Video
|
||||
Start your query with the primary keyword `video:` to search for videos only. This will search through the *video titles*, *tags* and *category* fields. Narrow your search down with secondary keywords:
|
||||
- `channel:` search for videos matching the channel name.
|
||||
- `active:` is a boolean value, to search for videos that are still active on youtube or that are not active any more.
|
||||
|
||||
**Example**:
|
||||
- `video:learn python channel:corey shafer active:yes`: This will return all videos with the term *Learn Python* from the channel *Corey Shafer* that are still *Active* on YouTube.
|
||||
- `video: channel:tom scott active:no`: Note the omitted term after the primary key, this will show all videos from the channel *Tom Scott* that are no longer active on YouTube.
|
||||
|
||||
## Channel
|
||||
Start with the `channel:` primary keyword to search for channels matching your query. This will search through the *channel name* and *channel description* fields. Narrow your search down with secondary keywords:
|
||||
- `subscribed:` is a boolean value, search for channels that you are subscribed to or not.
|
||||
- `active:` is a boolean value, to search for channels that are still active on YouTube or that are no longer active.
|
||||
|
||||
**Example**:
|
||||
- `channel:linux subscribed:yes`: Search for channels with the term *Linux* that you are subscribed to.
|
||||
- `channel: active:no`: Note the omitted term after the primary key, this will return all channels that are no longer active on YouTube.
|
||||
|
||||
## Playlist
|
||||
Start your query with the primary keyword `playlist:` to search for playlists only. This will search through the *playlist title* and *playlist description* fields. Narrow down your search with these secondary keywords:
|
||||
- `subscribed`: is a boolean value, search for playlists that you are subscribed to or not.
|
||||
- `active:` is a boolean value, to search for playlists that are still active on YouTube or that are no longer active.
|
||||
|
||||
**Example**:
|
||||
- `playlist:backend engineering subscribed:yes`: Search for playlists about *Backend Engineering* that you are subscribed to.
|
||||
- `playlist: active:yes subscribed:yes`: Note the omitted primary search term, this will return all playlists active on YouTube that you are subscribed to.
|
||||
- `playlist:html css active:yes`: Search for playlists containing *HTML CSS* that are still active on YouTube.
|
||||
|
||||
## Full
|
||||
Start a full text search by beginning your query with the primary keyword `full:`. This will search through your indexed Subtitles showing segments with possible matches. This will only show any results if you have activated *subtitle download and index* on the settings page. The operator for full text searches is `or` meaning when searching for multiple words not all words need to match, but additional words will change the ranking of the result, the more words match and the better they match, the higher ranked the result. The matching words will get highlighted in the text preview.
|
||||
|
||||
Clicking the play button on the thumbnail will open the inplace player at the timestamp from where the segment starts. Same when clicking the video title, this will open the video page and put the player at the segment timestamp. This will overwrite any previous playback position.
|
||||
|
||||
Narrow down your search with these secondary keywords:
|
||||
- `lang`: Search for matches only within a language. Use the same two letter ISO country code as you have set on the settings page.
|
||||
- `source`: Can either be *auto* to search through auto generated subtitles only or *user* to search through user uploaded subtitles only.
|
||||
|
||||
**Example**:
|
||||
- `full:contribute to open source lang:en` search for subtitle segments matching with the words *Contribute to Open Source* in the language *en*.
|
||||
- `full:flight simulator cockpit source:user` to search for the words *Flight Simulator Cockpit* from *user* uploaded subtitle segments.
|
174
docs/Settings.md
Normal file
@ -0,0 +1,174 @@
|
||||
# Settings Page
|
||||
Accessible at `/settings/` of your **Tube Archivist**, this page holds all the configurations and additional functionality related to the database.
|
||||
|
||||
Click on **Update Settings** at the bottom of the form to apply your configurations.
|
||||
|
||||
## Color scheme
|
||||
Switch between the easy on the eyes dark theme and the burning bright theme.
|
||||
|
||||
## Archive View
|
||||
- **Page Size**: Defines how many results get displayed on a given page. Same value goes for all archive views.
|
||||
|
||||
## Subscriptions
|
||||
Settings related to the channel management.
|
||||
- **Channel Page Size**: Defines how many pages will get analyzed by **Tube Archivist** each time you click on *Rescan Subscriptions*. The default page size used by yt-dlp is **50**, that's also the recommended value to set here. Any value higher will slow down the rescan process, for example if you set the value to 51, that means yt-dlp will have to go through 2 pages of results instead of 1 and by that doubling the time that process takes.
|
||||
|
||||
## Downloads
|
||||
Settings related to the download process.
|
||||
- **Download Limit**: Stop the download process after downloading the set quantity of videos.
|
||||
- **Download Speed Limit**: Set your download speed limit in KB/s. This will pass the option `--limit-rate` to yt-dlp.
|
||||
- **Throttled Rate Limit**: Restart download if the download speed drops below this value in KB/s. This will pass the option `--throttled-rate` to yt-dlp. Using this option might have a negative effect if you have an unstable or slow internet connection.
|
||||
- **Sleep Interval**: Time in seconds to sleep between requests to YouTube. It's a good idea to set this to **3** seconds. Might be necessary to avoid throttling.
|
||||
- **Auto Delete Watched Videos**: Automatically delete videos marked as watched after selected days. If activated, checks your videos after download task is finished.
|
||||
|
||||
## Download Format
|
||||
Additional settings passed to yt-dlp.
|
||||
- **Format**: This controls which streams get downloaded and is equivalent to passing `--format` to yt-dlp. Use one of the recommended one or look at the documentation of [yt-dlp](https://github.com/yt-dlp/yt-dlp#format-selection). Please note: The option `--merge-output-format mp4` is automatically passed to yt-dlp to guarantee browser compatibility. Similar to that, `--check-formats` is passed as well to check that the selected formats are actually downloadable.
|
||||
- **Embed Metadata**: This saves the available tags directly into the media file by passing `--embed-metadata` to yt-dlp.
|
||||
- **Embed Thumbnail**: This will save the thumbnail into the media file by passing `--embed-thumbnail` to yt-dlp.
|
||||
|
||||
## Subtitles
|
||||
- **Download Setting**: Select the subtitle language you like to download. Add a comma separated list for multiple languages.
|
||||
- **Source Settings**: User created subtitles are provided from the uploader and are usually the video script. Auto generated is from YouTube, quality varies, particularly for auto translated tracks.
|
||||
- **Index Settings**: Enabling subtitle indexing will add the lines to Elasticsearch and will make subtitles searchable. This will increase the index size and is not recommended on low-end hardware.
|
||||
|
||||
## Cookie
|
||||
Importing your YouTube Cookie into Tube Archivist allows yt-dlp to bypass age restrictions, gives access to private videos and your *watch later* or *liked videos*.
|
||||
|
||||
### Security concerns
|
||||
Cookies are used to store your session and contain your access token to your google account, this information can be used to take over your account. Treat that data with utmost care as you would any other password or credential. *Tube Archivist* stores your cookie in Redis and will automatically append it to yt-dlp for every request.
|
||||
|
||||
### Auto import
|
||||
Easiest way to import your cookie is to use the **Tube Archivist Companion** [browser extension](https://github.com/tubearchivist/browser-extension) for Firefox and Chrome.
|
||||
|
||||
### Alternative Manual Export your cookie
|
||||
- Install **Cookies.txt** addon for [chrome](https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid) or [firefox](https://addons.mozilla.org/firefox/addon/cookies-txt).
|
||||
- Visit YouTube and login with whichever YouTube account you wish to use to generate the cookies.
|
||||
- Click on the extension icon in the toolbar - it will drop down showing the active cookies for YT.
|
||||
- Click Export to export the cookies, filename is by default *cookies.google.txt*.
|
||||
|
||||
### Alternative Manual Import your cookie
|
||||
Place the file *cookies.google.txt* into the *cache/import* folder of Tube Archivist and enable the cookie import. Once you click on *Update Application Configurations* to save your changes, your cookie will get imported and stored internally.
|
||||
|
||||
Once imported, a **Validate Cookie File** button will show, where you can confirm if your cookie is working or not.
|
||||
|
||||
### Use your cookie
|
||||
Once imported, additionally to the advantages above, your [Watch Later](https://www.youtube.com/playlist?list=WL) and [Liked Videos](https://www.youtube.com/playlist?list=LL) become a regular playlist you can download and subscribe to as any other [playlist](Playlists).
|
||||
|
||||
### Limitation
|
||||
There is only one cookie per Tube Archivist instance, this will be shared between all users.
|
||||
|
||||
## Integrations
|
||||
All third party integrations of TubeArchivist will **always** be *opt in*.
|
||||
- **API**: Your access token for the Tube Archivist API.
|
||||
- **returnyoutubedislike.com**: This will get return dislikes and average ratings for each video by integrating with the API from [returnyoutubedislike.com](https://www.returnyoutubedislike.com/).
|
||||
- **SponsorBlock**: Using [SponsorBlock](https://sponsor.ajay.app/) to get and skip sponsored content. If a video doesn't have timestamps, or has unlocked timestamps, use the browser addon to contribute to this excellent project. Can also be activated and deactivated as a per [channel overwrite](Settings#channel-customize).
|
||||
- **Cast**: Enabling the cast integration in the settings page will load an additional JS library from **Google**.
|
||||
* Requirements
|
||||
- HTTPS
|
||||
* To use the cast integration HTTPS needs to be enabled, which can be done using a reverse proxy. This is a requirement by Google as communication to the cast device is required to be encrypted, but the content itself is not.
|
||||
- Supported Browser
|
||||
* A supported browser is required for this integration such as Google Chrome. Other browsers, especially Chromium-based browsers, may support casting by enabling it in the settings.
|
||||
- Subtitles
|
||||
* Subtitles are supported however they do not work out of the box and require additional configuration. Due to requirements by Google, to use subtitles you need additional headers which will need to be configured in your reverse proxy. See this [page](https://developers.google.com/cast/docs/web_sender/advanced#cors_requirements) for the specific requirements.
|
||||
> You need the following headers: Content-Type, Accept-Encoding, and Range. Note that the last two headers, Accept-Encoding and Range, are additional headers that you may not have needed previously.
|
||||
> Wildcards "*" cannot be used for the Access-Control-Allow-Origin header. If the page has protected media content, it must use a domain instead of a wildcard.
|
||||
|
||||
|
||||
# Scheduler Setup
|
||||
Schedule settings expect a cron like format, where the first value is minute, second is hour and third is day of the week. Day 0 is Sunday, day 1 is Monday etc.
|
||||
|
||||
Examples:
|
||||
- **0 15 \***: Run task every day at 15:00 in the afternoon.
|
||||
- **30 8 \*/2**: Run task every second day of the week (Sun, Tue, Thu, Sat) at 08:30 in the morning.
|
||||
- **0 \*/3,8-17 \***: Execute every hour divisible by 3, and every hour during office hours (8 in the morning - 5 in the afternoon).
|
||||
- **0 8,16 \***: Execute every day at 8 in the morning and at 4 in the afternoon.
|
||||
- **auto**: Sensible default.
|
||||
- **0**: (zero), deactivate that task.
|
||||
|
||||
NOTE:
|
||||
- Changes in the scheduler settings require a container restart to take effect.
|
||||
- Cron format as *number*/*number* are none standard cron and are not supported by the scheduler, for example **0 0/12 \*** is invalid, use **0 \*/12 \*** instead.
|
||||
- Avoid an unnecessary frequent schedule to not get blocked by YouTube. For that reason * or wildcards for minutes are not supported.
|
||||
|
||||
## Rescan Subscriptions
|
||||
That's the equivalent task as run from the downloads page looking through your channel and playlist and add missing videos to the download queue.
|
||||
|
||||
## Start download
|
||||
Start downloading all videos currently in the download queue.
|
||||
|
||||
## Refresh Metadata
|
||||
Rescan videos, channels and playlists on youtube and update metadata periodically. This will also refresh your subtitles based on your current settings. If an item is no longer available on YouTube, this will deactivate it and exclude it from future refreshes. This task is meant to be run once per day, set your schedule accordingly.
|
||||
|
||||
The field **Refresh older than x days** takes a number where TubeArchivist will consider an item as *outdated*. This value is used to calculate how many items need to be refreshed today based on the total indexed. This will spread out the requests to YouTube. Sensible value here is **90** days.
|
||||
|
||||
## Thumbnail check
|
||||
This will check if all expected thumbnails are there and will delete any artwork without matching video.
|
||||
|
||||
## Index backup
|
||||
Create a zip file of the metadata and select **Max auto backups to keep** to automatically delete old backups created from this task.
|
||||
|
||||
|
||||
# Actions
|
||||
Additional database functionality.
|
||||
|
||||
## Delete download queue
|
||||
The button **Delete all queued** will delete all pending videos from the download queue. The button **Delete all ignored** will delete all videos you have previously ignored.
|
||||
|
||||
## Manual Media Files Import
|
||||
Add the files you'd like to import to the */cache/import* folder. Only add files, don't add subdirectories. All files you are adding, need to have the same *base name* as the media file. Then start the process from the settings page *Manual Media Files Import*.
|
||||
|
||||
Valid media extensions are *.mp4*, *.mkv* or *.webm*. If you have other file extensions or incompatible codecs, convert them first to mp4. **Tube Archivist** can identify the videos with one of the following methods.
|
||||
|
||||
### Method 1:
|
||||
Add a matching *.info.json* file with the media file. Both files need to have the same base name, for example:
|
||||
- For the media file: `<base-name>.mp4`
|
||||
- For the JSON file: `<base-name>.info.json`
|
||||
|
||||
The import process then looks for the 'id' key within the JSON file to identify the video.
|
||||
|
||||
### Method 2:
|
||||
Detect the YouTube ID from filename, this accepts the default yt-dlp naming convention for file names like:
|
||||
- `<base-name>[<youtube-id>].mp4`
|
||||
- The YouTube ID in square brackets at the end of the filename is the crucial part.
|
||||
|
||||
### Offline import:
|
||||
**NOTE**: This is untested. Please provide feedback.
|
||||
|
||||
If the video you are trying to import is not available on YouTube any more, **Tube Archivist** can import the required metadata:
|
||||
- The file `<base-name>.info.json` is required to extract the required information.
|
||||
- Add the thumbnail as `<base-name>.<ext>`, where valid file extensions are *.jpg*, *.png* or *.webp*. If there is no thumbnail file, **Tube Archivist** will try to extract it from the media file or will fallback to a default thumbnail.
|
||||
- Add subtitles as `<base-name>.<lang>.vtt` where *lang* is the two letter ISO country code. This will archive all subtitle files you add to the import folder, independent from your configurations. Subtitles can be archived and used in the player, but they can't be indexed or made searchable due to the fact, that they have a very different structure than the subtitles as **Tube Archivist** needs them.
|
||||
- For videos, where the whole channel is not available any more, you can add the `<channel-id>.info.json` file as generated by *youtube-dl/yt-dlp* to get the full metadata. Alternatively **Tube Archivist** will extract as much info as possible from the video info.json file.
|
||||
|
||||
### Some notes:
|
||||
- This will **consume** the files you put into the import folder: Files will get converted to mp4 if needed (this might take a long time...) and moved to the archive, *.json* files will get deleted upon completion to avoid having duplicates on the next run.
|
||||
- For best file transcoding quality, convert your media files with desired settings first before importing.
|
||||
- Maybe start with a subset of your files to import to make sure everything goes well...
|
||||
- Follow the logs to monitor progress and errors: `docker-compose logs -f tubearchivist`.
|
||||
|
||||
## Embed thumbnails into media file
|
||||
This will write or overwrite all thumbnails in the media file using the downloaded thumbnail. This is only necessary if you didn't download the files with the option *Embed Thumbnail* enabled or want to make sure all media files get the newest thumbnail. Follow the docker-compose logs to monitor progress.
|
||||
|
||||
## Backup Database
|
||||
This will backup your metadata into a zip file. The file will get stored at *cache/backup* and will contain the necessary files to restore the Elasticsearch index formatted **nd-json** files.
|
||||
|
||||
BE AWARE: This will **not** backup any media files, just the metadata from the Elasticsearch.
|
||||
|
||||
## Restore From Backup
|
||||
The restore functionality will expect the same zip file in *cache/backup* as created from the **Backup database** function. This will recreate the index from the snapshot. There will be a list of all available backup to choose from. The *source* tag can have these different values:
|
||||
- **manual**: For backups manually created from here on the settings page.
|
||||
- **auto**: For backups automatically created via a sceduled task.
|
||||
- **update**: For backups created after a Tube Archivist update due to changes in the index.
|
||||
- **False**: Undefined.
|
||||
|
||||
BE AWARE: This will **replace** your current index with the one from the backup file. This won't restore any media files.
|
||||
|
||||
## Rescan Filesystem
|
||||
This function will go through all your media files and looks at the whole index to try to find any issues:
|
||||
- Should the filename not match with the indexed media url, this will rename the video files correctly and update the index with the new link.
|
||||
- When you delete media files from the filesystem outside of the Tube Archivist interface, this will delete leftover metadata from the index.
|
||||
- When you have media files that are not indexed yet, this will grab the metadata from YouTube like it was a newly downloaded video. This can be useful when restoring from an older backup file with missing metadata but already downloaded mediafiles. NOTE: This only works if the media files are named in the same convention as Tube Archivist does, particularly the YouTube ID needs to be at the same index in the filename, alternatively see above for *Manual Media Files Import*.
|
||||
-This will also check all of your thumbnails and download any that are missing.
|
||||
|
||||
BE AWARE: There is no undo.
|
20
docs/Users.md
Normal file
@ -0,0 +1,20 @@
|
||||
# User Management
|
||||
|
||||
For now, **Tube Archivist** is a single user application. You can create multiple users with different names and passwords, they will share the same videos and permissions but some interface configurations are on a per user basis. *More is on the roadmap*.
|
||||
|
||||
## Superuser
|
||||
The first user gets created with the environment variables **TA_USERNAME** and **TA_PASSWORD** from your docker-compose file. That first user will automatically have *superuser* privileges.
|
||||
|
||||
## Admin Interface
|
||||
When logged in from your *superuser* account, you are able to access the admin interface from the settings page or at `/admin/`. This interface holds all functionality for user management.
|
||||
|
||||
## Create additional users
|
||||
From the admin interface when you click on *Accounts* you will get a list of all users. From there you can create additional users by clicking on *Add Account*, provide a name and confirm password and click on *Save* to create the user.
|
||||
|
||||
## Changing users
|
||||
You can delete or change permissions and password of a user by clicking on the username from the *Accounts* list page and follow the interface from there. Changing the password of the *superuser* here will overwrite the password originally set with the environment variables.
|
||||
|
||||
## Reset
|
||||
Delete all user configurations by deleting the file `cache/db.sqlite3` and restart the container. This will create the superuser again from the environment variables.
|
||||
|
||||
NOTE: Future improvements here will most likely require such a reset.
|
11
docs/Video.md
Normal file
@ -0,0 +1,11 @@
|
||||
# Video Page
|
||||
|
||||
Every video downloaded gets a dedicated page accessible at `/video/<video-id>` of your Tube Archivist.
|
||||
|
||||
Clicking on the channel name or the channel icon will bring you to the dedicated channel detail [page](Channels#channel-detail).
|
||||
|
||||
The button **Delete Video** will delete that video including the media file.
|
||||
|
||||
When available, a playlist navigation will show at the bottom. Clicking on the playlist name will bring you to the dedicated [Playlist Detail](Playlists#playlist-detail) page showing all videos downloaded from that playlist. The number in square brackets indicates the position of the current video in that playlist.
|
||||
|
||||
Clicking on the next or previous video name or thumbnail will bring you to that dedicated video page.
|
BIN
docs/assets/Synology_0.2.0_Docker-ES-Env-Conf.png
Normal file
After Width: | Height: | Size: 29 KiB |
BIN
docs/assets/Synology_0.2.0_Docker-ES-Search.png
Normal file
After Width: | Height: | Size: 26 KiB |
BIN
docs/assets/Synology_0.2.0_Docker-Folder-Create.png
Normal file
After Width: | Height: | Size: 62 KiB |
After Width: | Height: | Size: 18 KiB |
BIN
docs/assets/Synology_0.2.0_Docker-Folder-Structure.png
Normal file
After Width: | Height: | Size: 27 KiB |
BIN
docs/assets/Synology_0.2.0_Docker-Install.png
Normal file
After Width: | Height: | Size: 133 KiB |
BIN
docs/assets/Synology_0.2.0_Docker-Redis-Search.png
Normal file
After Width: | Height: | Size: 26 KiB |
BIN
docs/assets/Synology_0.2.0_Docker-TA-Env-Conf.png
Normal file
After Width: | Height: | Size: 29 KiB |
BIN
docs/assets/Synology_0.2.0_Docker-TA-Search.png
Normal file
After Width: | Height: | Size: 32 KiB |
BIN
docs/assets/TubeArchivist-ES.png
Normal file
After Width: | Height: | Size: 58 KiB |
BIN
docs/assets/TubeArchivist-RedisJSON.png
Normal file
After Width: | Height: | Size: 48 KiB |
BIN
docs/assets/TubeArchivist.png
Normal file
After Width: | Height: | Size: 69 KiB |
BIN
docs/assets/icon-add.png
Normal file
After Width: | Height: | Size: 2.5 KiB |
BIN
docs/assets/icon-close-blue.png
Normal file
After Width: | Height: | Size: 4.3 KiB |
BIN
docs/assets/icon-close-red.png
Normal file
After Width: | Height: | Size: 4.3 KiB |
BIN
docs/assets/icon-download.png
Normal file
After Width: | Height: | Size: 2.7 KiB |
BIN
docs/assets/icon-gridview.png
Normal file
After Width: | Height: | Size: 3.1 KiB |
BIN
docs/assets/icon-listview.png
Normal file
After Width: | Height: | Size: 2.8 KiB |
BIN
docs/assets/icon-rescan.png
Normal file
After Width: | Height: | Size: 4.4 KiB |
BIN
docs/assets/icon-search.png
Normal file
After Width: | Height: | Size: 5.1 KiB |
BIN
docs/assets/icon-seen.png
Normal file
After Width: | Height: | Size: 3.3 KiB |
BIN
docs/assets/icon-sort.png
Normal file
After Width: | Height: | Size: 2.0 KiB |
BIN
docs/assets/icon-stop.png
Normal file
After Width: | Height: | Size: 2.3 KiB |
BIN
docs/assets/icon-substract.png
Normal file
After Width: | Height: | Size: 1.8 KiB |
BIN
docs/assets/icon-unseen.png
Normal file
After Width: | Height: | Size: 2.2 KiB |
1137
package-lock.json
generated
17
package.json
@ -1,17 +0,0 @@
|
||||
{
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"lint": "eslint 'tubearchivist/static/**/*.js'",
|
||||
"format": "prettier --write 'tubearchivist/static/**/*.js'"
|
||||
},
|
||||
"devDependencies": {
|
||||
"eslint": "^8.26.0",
|
||||
"prettier": "^2.7.1",
|
||||
"eslint-config-prettier": "^8.5.0"
|
||||
},
|
||||
"prettier": {
|
||||
"singleQuote": true,
|
||||
"arrowParens": "avoid",
|
||||
"printWidth": 100
|
||||
}
|
||||
}
|
@ -1,3 +1,313 @@
|
||||
# TubeArchivist API
|
||||
Documentation of available API endpoints.
|
||||
|
||||
All API documentation has moved to [docs.tubearchivist.com](https://docs.tubearchivist.com/).
|
||||
Note:
|
||||
- This is very early stages and will change!
|
||||
- Check the commit history to see if a documented feature is already in your release
|
||||
|
||||
## Table of contents
|
||||
- [Authentication](#authentication)
|
||||
- [Pagination](#pagination)
|
||||
|
||||
**Video**
|
||||
- [Video List](#video-list-view)
|
||||
- [Video Single](#video-item-view)
|
||||
- [Video Single Progress](#video-progress-view)
|
||||
- [Video Single Sponsorblock](#sponsor-block-view) WIP
|
||||
|
||||
**Channel**
|
||||
- [Channel List](#channel-list-view)
|
||||
- [Channel Single](#channel-item-view)
|
||||
- [Channel Video List](#channel-videos-view)
|
||||
|
||||
**Playlist**
|
||||
- [Playlist List](#playlist-list-view)
|
||||
- [Playlist Single](#playlist-item-view)
|
||||
- [Playlist Videos List](#playlist-videos-view)
|
||||
|
||||
**Download queue**
|
||||
- [Download Queue List](#download-queue-list-view)
|
||||
- [Download Queue Single](#download-queue-item-view)
|
||||
|
||||
**Additional**
|
||||
- [Login](#login-view)
|
||||
- [Task](#task-view) WIP
|
||||
- [Cookie](#cookie-view)
|
||||
- [Search](#search-view)
|
||||
- [Ping](#ping-view)
|
||||
|
||||
## Authentication
|
||||
API token will get automatically created, accessible on the settings page. Token needs to be passed as an authorization header with every request. Additionally session based authentication is enabled too: When you are logged into your TubeArchivist instance, you'll have access to the api in the browser for testing.
|
||||
|
||||
Curl example:
|
||||
```shell
|
||||
curl -v /api/video/<video-id>/ \
|
||||
-H "Authorization: Token xxxxxxxxxx"
|
||||
```
|
||||
|
||||
Python requests example:
|
||||
```python
|
||||
import requests
|
||||
|
||||
url = "/api/video/<video-id>/"
|
||||
headers = {"Authorization": "Token xxxxxxxxxx"}
|
||||
response = requests.get(url, headers=headers)
|
||||
```
|
||||
|
||||
## Pagination
|
||||
The list views return a paginate object with the following keys:
|
||||
- page_size: *int* current page size set in config
|
||||
- page_from: *int* first result idx
|
||||
- prev_pages: *array of ints* of previous pages, if available
|
||||
- current_page: *int* current page from query
|
||||
- max_hits: *bool* if max of 10k results is reached
|
||||
- params: *str* additional url encoded query parameters
|
||||
- last_page: *int* of last page link
|
||||
- next_pages: *array of ints* of next pages
|
||||
- total_hits: *int* total results
|
||||
|
||||
Pass page number as a query parameter: `page=2`. Defaults to *0*, `page=1` is redundant and falls back to *0*. If a page query doesn't return any results, you'll get `HTTP 404 Not Found`.
|
||||
|
||||
## Video List View
|
||||
/api/video/
|
||||
|
||||
## Video Item View
|
||||
/api/video/\<video_id>/
|
||||
|
||||
## Video Progress View
|
||||
/api/video/\<video_id>/progress
|
||||
|
||||
Progress is stored for each user.
|
||||
|
||||
### Get last player position of a video
|
||||
GET /api/video/\<video_id>/progress
|
||||
```json
|
||||
{
|
||||
"youtube_id": "<video_id>",
|
||||
"user_id": 1,
|
||||
"position": 100
|
||||
}
|
||||
```
|
||||
|
||||
### Post player position of video
|
||||
POST /api/video/\<video_id>/progress
|
||||
```json
|
||||
{
|
||||
"position": 100
|
||||
}
|
||||
```
|
||||
|
||||
### Delete player position of video
|
||||
DELETE /api/video/\<video_id>/progress
|
||||
|
||||
|
||||
## Sponsor Block View
|
||||
/api/video/\<video_id>/sponsor/
|
||||
|
||||
Integrate with sponsorblock
|
||||
|
||||
### Get list of segments
|
||||
GET /api/video/\<video_id>/sponsor/
|
||||
|
||||
|
||||
### Vote on existing segment
|
||||
**This only simulates the request**
|
||||
POST /api/video/\<video_id>/sponsor/
|
||||
```json
|
||||
{
|
||||
"vote": {
|
||||
"uuid": "<uuid>",
|
||||
"yourVote": 1
|
||||
}
|
||||
}
|
||||
```
|
||||
yourVote needs to be *int*: 0 for downvote, 1 for upvote, 20 to undo vote
|
||||
|
||||
### Create new segment
|
||||
**This only simulates the request**
|
||||
POST /api/video/\<video_id>/sponsor/
|
||||
```json
|
||||
{
|
||||
"segment": {
|
||||
"startTime": 5,
|
||||
"endTime": 10
|
||||
}
|
||||
}
|
||||
```
|
||||
Timestamps either *int* or *float*, end time can't be before start time.
|
||||
|
||||
|
||||
## Channel List View
|
||||
/api/channel/
|
||||
|
||||
### Subscribe to a list of channels
|
||||
POST /api/channel/
|
||||
```json
|
||||
{
|
||||
"data": [
|
||||
{"channel_id": "UC9-y-6csu5WGm29I7JiwpnA", "channel_subscribed": true}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Channel Item View
|
||||
/api/channel/\<channel_id>/
|
||||
|
||||
## Channel Videos View
|
||||
/api/channel/\<channel_id>/video/
|
||||
|
||||
## Playlist List View
|
||||
/api/playlist/
|
||||
|
||||
## Playlist Item View
|
||||
/api/playlist/\<playlist_id>/
|
||||
|
||||
## Playlist Videos View
|
||||
/api/playlist/\<playlist_id>/video/
|
||||
|
||||
## Download Queue List View
|
||||
GET /api/download/
|
||||
|
||||
Parameter:
|
||||
- filter: pending, ignore
|
||||
- channel: channel-id
|
||||
|
||||
### Add list of videos to download queue
|
||||
POST /api/download/
|
||||
```json
|
||||
{
|
||||
"data": [
|
||||
{"youtube_id": "NYj3DnI81AQ", "status": "pending"}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### Delete download queue items by filter
|
||||
DELETE /api/download/?filter=ignore
|
||||
DELETE /api/download/?filter=pending
|
||||
|
||||
## Download Queue Item View
|
||||
GET /api/download/\<video_id>/
|
||||
POST /api/download/\<video_id>/
|
||||
|
||||
Ignore video in download queue:
|
||||
```json
|
||||
{
|
||||
"status": "ignore"
|
||||
}
|
||||
```
|
||||
|
||||
Add to queue previously ignored video:
|
||||
```json
|
||||
{
|
||||
"status": "pending"
|
||||
}
|
||||
```
|
||||
|
||||
DELETE /api/download/\<video_id>/
|
||||
Forget or delete from download queue
|
||||
|
||||
## Login View
|
||||
Return token and user ID for username and password:
|
||||
POST /api/login
|
||||
```json
|
||||
{
|
||||
"username": "tubearchivist",
|
||||
"password": "verysecret"
|
||||
}
|
||||
```
|
||||
|
||||
after successful login returns
|
||||
```json
|
||||
{
|
||||
"token": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
|
||||
"user_id": 1
|
||||
}
|
||||
```
|
||||
|
||||
## Task View
|
||||
GET /api/task/
|
||||
POST /api/task/
|
||||
|
||||
Check if there is an ongoing task:
|
||||
GET /api/task/
|
||||
|
||||
Returns:
|
||||
```json
|
||||
{
|
||||
"rescan": false,
|
||||
"downloading": false
|
||||
}
|
||||
```
|
||||
|
||||
Start a background task
|
||||
POST /api/task/
|
||||
```json
|
||||
{
|
||||
"run": "task_name"
|
||||
}
|
||||
```
|
||||
|
||||
List of valid task names:
|
||||
- **download_pending**: Start the download queue
|
||||
- **rescan_pending**: Rescan your subscriptions
|
||||
|
||||
|
||||
## Cookie View
|
||||
Check your youtube cookie settings, *status* turns to `true` if cookie has been validated.
|
||||
GET /api/cookie/
|
||||
```json
|
||||
{
|
||||
"cookie_enabled": true,
|
||||
"status": true,
|
||||
"validated": <timestamp>,
|
||||
"validated_str": "timestamp"
|
||||
}
|
||||
```
|
||||
|
||||
POST /api/cookie/
|
||||
Send empty post request to validate cookie.
|
||||
```json
|
||||
{
|
||||
"cookie_validated": true
|
||||
}
|
||||
```
|
||||
|
||||
PUT /api/cookie/
|
||||
Send put request containing the cookie as a string:
|
||||
```json
|
||||
{
|
||||
"cookie": "your-cookie-as-string"
|
||||
}
|
||||
```
|
||||
Imports and validates cookie, returns on success:
|
||||
```json
|
||||
{
|
||||
"cookie_import": "done",
|
||||
"cookie_validated": true
|
||||
}
|
||||
```
|
||||
Or returns status code 400 on failure:
|
||||
```json
|
||||
{
|
||||
"cookie_import": "fail",
|
||||
"cookie_validated": false
|
||||
}
|
||||
```
|
||||
|
||||
## Search View
|
||||
GET /api/search/?query=\<query>
|
||||
|
||||
Returns search results from your query.
|
||||
|
||||
## Ping View
|
||||
Validate your connection with the API
|
||||
GET /api/ping
|
||||
|
||||
When valid returns message with user id:
|
||||
```json
|
||||
{
|
||||
"response": "pong",
|
||||
"user": 1
|
||||
}
|
||||
```
|
||||
|
@ -1,351 +0,0 @@
|
||||
"""aggregations"""
|
||||
|
||||
from home.src.es.connect import ElasticWrap
|
||||
from home.src.ta.helper import get_duration_str
|
||||
from home.src.ta.settings import EnvironmentSettings
|
||||
|
||||
|
||||
class AggBase:
|
||||
"""base class for aggregation calls"""
|
||||
|
||||
path: str = ""
|
||||
data: dict = {}
|
||||
name: str = ""
|
||||
|
||||
def get(self):
|
||||
"""make get call"""
|
||||
response, _ = ElasticWrap(self.path).get(self.data)
|
||||
print(f"[agg][{self.name}] took {response.get('took')} ms to process")
|
||||
|
||||
return response.get("aggregations")
|
||||
|
||||
def process(self):
|
||||
"""implement in subclassess"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class Video(AggBase):
|
||||
"""get video stats"""
|
||||
|
||||
name = "video_stats"
|
||||
path = "ta_video/_search"
|
||||
data = {
|
||||
"size": 0,
|
||||
"aggs": {
|
||||
"video_type": {
|
||||
"terms": {"field": "vid_type"},
|
||||
"aggs": {
|
||||
"media_size": {"sum": {"field": "media_size"}},
|
||||
"duration": {"sum": {"field": "player.duration"}},
|
||||
},
|
||||
},
|
||||
"video_active": {
|
||||
"terms": {"field": "active"},
|
||||
"aggs": {
|
||||
"media_size": {"sum": {"field": "media_size"}},
|
||||
"duration": {"sum": {"field": "player.duration"}},
|
||||
},
|
||||
},
|
||||
"video_media_size": {"sum": {"field": "media_size"}},
|
||||
"video_count": {"value_count": {"field": "youtube_id"}},
|
||||
"duration": {"sum": {"field": "player.duration"}},
|
||||
},
|
||||
}
|
||||
|
||||
def process(self):
|
||||
"""process aggregation"""
|
||||
aggregations = self.get()
|
||||
|
||||
duration = int(aggregations["duration"]["value"])
|
||||
response = {
|
||||
"doc_count": aggregations["video_count"]["value"],
|
||||
"media_size": int(aggregations["video_media_size"]["value"]),
|
||||
"duration": duration,
|
||||
"duration_str": get_duration_str(duration),
|
||||
}
|
||||
for bucket in aggregations["video_type"]["buckets"]:
|
||||
duration = int(bucket["duration"].get("value"))
|
||||
response.update(
|
||||
{
|
||||
f"type_{bucket['key']}": {
|
||||
"doc_count": bucket.get("doc_count"),
|
||||
"media_size": int(bucket["media_size"].get("value")),
|
||||
"duration": duration,
|
||||
"duration_str": get_duration_str(duration),
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
for bucket in aggregations["video_active"]["buckets"]:
|
||||
duration = int(bucket["duration"].get("value"))
|
||||
response.update(
|
||||
{
|
||||
f"active_{bucket['key_as_string']}": {
|
||||
"doc_count": bucket.get("doc_count"),
|
||||
"media_size": int(bucket["media_size"].get("value")),
|
||||
"duration": duration,
|
||||
"duration_str": get_duration_str(duration),
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
class Channel(AggBase):
|
||||
"""get channel stats"""
|
||||
|
||||
name = "channel_stats"
|
||||
path = "ta_channel/_search"
|
||||
data = {
|
||||
"size": 0,
|
||||
"aggs": {
|
||||
"channel_count": {"value_count": {"field": "channel_id"}},
|
||||
"channel_active": {"terms": {"field": "channel_active"}},
|
||||
"channel_subscribed": {"terms": {"field": "channel_subscribed"}},
|
||||
},
|
||||
}
|
||||
|
||||
def process(self):
|
||||
"""process aggregation"""
|
||||
aggregations = self.get()
|
||||
|
||||
response = {
|
||||
"doc_count": aggregations["channel_count"].get("value"),
|
||||
}
|
||||
for bucket in aggregations["channel_active"]["buckets"]:
|
||||
key = f"active_{bucket['key_as_string']}"
|
||||
response.update({key: bucket.get("doc_count")})
|
||||
for bucket in aggregations["channel_subscribed"]["buckets"]:
|
||||
key = f"subscribed_{bucket['key_as_string']}"
|
||||
response.update({key: bucket.get("doc_count")})
|
||||
|
||||
return response
|
||||
|
||||
|
||||
class Playlist(AggBase):
|
||||
"""get playlist stats"""
|
||||
|
||||
name = "playlist_stats"
|
||||
path = "ta_playlist/_search"
|
||||
data = {
|
||||
"size": 0,
|
||||
"aggs": {
|
||||
"playlist_count": {"value_count": {"field": "playlist_id"}},
|
||||
"playlist_active": {"terms": {"field": "playlist_active"}},
|
||||
"playlist_subscribed": {"terms": {"field": "playlist_subscribed"}},
|
||||
},
|
||||
}
|
||||
|
||||
def process(self):
|
||||
"""process aggregation"""
|
||||
aggregations = self.get()
|
||||
response = {"doc_count": aggregations["playlist_count"].get("value")}
|
||||
for bucket in aggregations["playlist_active"]["buckets"]:
|
||||
key = f"active_{bucket['key_as_string']}"
|
||||
response.update({key: bucket.get("doc_count")})
|
||||
for bucket in aggregations["playlist_subscribed"]["buckets"]:
|
||||
key = f"subscribed_{bucket['key_as_string']}"
|
||||
response.update({key: bucket.get("doc_count")})
|
||||
|
||||
return response
|
||||
|
||||
|
||||
class Download(AggBase):
|
||||
"""get downloads queue stats"""
|
||||
|
||||
name = "download_queue_stats"
|
||||
path = "ta_download/_search"
|
||||
data = {
|
||||
"size": 0,
|
||||
"aggs": {
|
||||
"status": {"terms": {"field": "status"}},
|
||||
"video_type": {
|
||||
"filter": {"term": {"status": "pending"}},
|
||||
"aggs": {"type_pending": {"terms": {"field": "vid_type"}}},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
def process(self):
|
||||
"""process aggregation"""
|
||||
aggregations = self.get()
|
||||
response = {}
|
||||
for bucket in aggregations["status"]["buckets"]:
|
||||
response.update({bucket["key"]: bucket.get("doc_count")})
|
||||
|
||||
for bucket in aggregations["video_type"]["type_pending"]["buckets"]:
|
||||
key = f"pending_{bucket['key']}"
|
||||
response.update({key: bucket.get("doc_count")})
|
||||
|
||||
return response
|
||||
|
||||
|
||||
class WatchProgress(AggBase):
|
||||
"""get watch progress"""
|
||||
|
||||
name = "watch_progress"
|
||||
path = "ta_video/_search"
|
||||
data = {
|
||||
"size": 0,
|
||||
"aggs": {
|
||||
name: {
|
||||
"terms": {"field": "player.watched"},
|
||||
"aggs": {
|
||||
"watch_docs": {
|
||||
"filter": {"terms": {"player.watched": [True, False]}},
|
||||
"aggs": {
|
||||
"true_count": {"value_count": {"field": "_index"}},
|
||||
"duration": {"sum": {"field": "player.duration"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
"total_duration": {"sum": {"field": "player.duration"}},
|
||||
"total_vids": {"value_count": {"field": "_index"}},
|
||||
},
|
||||
}
|
||||
|
||||
def process(self):
|
||||
"""make the call"""
|
||||
aggregations = self.get()
|
||||
buckets = aggregations[self.name]["buckets"]
|
||||
|
||||
response = {}
|
||||
all_duration = int(aggregations["total_duration"].get("value"))
|
||||
response.update(
|
||||
{
|
||||
"total": {
|
||||
"duration": all_duration,
|
||||
"duration_str": get_duration_str(all_duration),
|
||||
"items": aggregations["total_vids"].get("value"),
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
for bucket in buckets:
|
||||
response.update(self._build_bucket(bucket, all_duration))
|
||||
|
||||
return response
|
||||
|
||||
@staticmethod
|
||||
def _build_bucket(bucket, all_duration):
|
||||
"""parse bucket"""
|
||||
|
||||
duration = int(bucket["watch_docs"]["duration"]["value"])
|
||||
duration_str = get_duration_str(duration)
|
||||
items = bucket["watch_docs"]["true_count"]["value"]
|
||||
if bucket["key_as_string"] == "false":
|
||||
key = "unwatched"
|
||||
else:
|
||||
key = "watched"
|
||||
|
||||
bucket_parsed = {
|
||||
key: {
|
||||
"duration": duration,
|
||||
"duration_str": duration_str,
|
||||
"progress": duration / all_duration if all_duration else 0,
|
||||
"items": items,
|
||||
}
|
||||
}
|
||||
|
||||
return bucket_parsed
|
||||
|
||||
|
||||
class DownloadHist(AggBase):
|
||||
"""get downloads histogram last week"""
|
||||
|
||||
name = "videos_last_week"
|
||||
path = "ta_video/_search"
|
||||
data = {
|
||||
"size": 0,
|
||||
"aggs": {
|
||||
name: {
|
||||
"date_histogram": {
|
||||
"field": "date_downloaded",
|
||||
"calendar_interval": "day",
|
||||
"format": "yyyy-MM-dd",
|
||||
"order": {"_key": "desc"},
|
||||
"time_zone": EnvironmentSettings.TZ,
|
||||
},
|
||||
"aggs": {
|
||||
"total_videos": {"value_count": {"field": "youtube_id"}},
|
||||
"media_size": {"sum": {"field": "media_size"}},
|
||||
},
|
||||
}
|
||||
},
|
||||
"query": {
|
||||
"range": {
|
||||
"date_downloaded": {
|
||||
"gte": "now-7d/d",
|
||||
"time_zone": EnvironmentSettings.TZ,
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
def process(self):
|
||||
"""process query"""
|
||||
aggregations = self.get()
|
||||
buckets = aggregations[self.name]["buckets"]
|
||||
|
||||
response = [
|
||||
{
|
||||
"date": i.get("key_as_string"),
|
||||
"count": i.get("doc_count"),
|
||||
"media_size": i["media_size"].get("value"),
|
||||
}
|
||||
for i in buckets
|
||||
]
|
||||
|
||||
return response
|
||||
|
||||
|
||||
class BiggestChannel(AggBase):
|
||||
"""get channel aggregations"""
|
||||
|
||||
def __init__(self, order):
|
||||
self.data["aggs"][self.name]["multi_terms"]["order"] = {order: "desc"}
|
||||
|
||||
name = "channel_stats"
|
||||
path = "ta_video/_search"
|
||||
data = {
|
||||
"size": 0,
|
||||
"aggs": {
|
||||
name: {
|
||||
"multi_terms": {
|
||||
"terms": [
|
||||
{"field": "channel.channel_name.keyword"},
|
||||
{"field": "channel.channel_id"},
|
||||
],
|
||||
"order": {"doc_count": "desc"},
|
||||
},
|
||||
"aggs": {
|
||||
"doc_count": {"value_count": {"field": "_index"}},
|
||||
"duration": {"sum": {"field": "player.duration"}},
|
||||
"media_size": {"sum": {"field": "media_size"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
order_choices = ["doc_count", "duration", "media_size"]
|
||||
|
||||
def process(self):
|
||||
"""process aggregation, order_by validated in the view"""
|
||||
|
||||
aggregations = self.get()
|
||||
buckets = aggregations[self.name]["buckets"]
|
||||
|
||||
response = [
|
||||
{
|
||||
"id": i["key"][1],
|
||||
"name": i["key"][0].title(),
|
||||
"doc_count": i["doc_count"]["value"],
|
||||
"duration": i["duration"]["value"],
|
||||
"duration_str": get_duration_str(int(i["duration"]["value"])),
|
||||
"media_size": i["media_size"]["value"],
|
||||
}
|
||||
for i in buckets
|
||||
]
|
||||
|
||||
return response
|
@ -7,14 +7,15 @@ Functionality:
|
||||
import urllib.parse
|
||||
|
||||
from home.src.download.thumbnails import ThumbManager
|
||||
from home.src.ta.helper import date_parser, get_duration_str
|
||||
from home.src.ta.settings import EnvironmentSettings
|
||||
from home.src.ta.config import AppConfig
|
||||
from home.src.ta.helper import date_praser
|
||||
|
||||
|
||||
class SearchProcess:
|
||||
"""process search results"""
|
||||
|
||||
CACHE_DIR = EnvironmentSettings.CACHE_DIR
|
||||
CONFIG = AppConfig().config
|
||||
CACHE_DIR = CONFIG["application"]["cache_dir"]
|
||||
|
||||
def __init__(self, response):
|
||||
self.response = response
|
||||
@ -47,18 +48,6 @@ class SearchProcess:
|
||||
processed = self._process_playlist(result["_source"])
|
||||
if index == "ta_download":
|
||||
processed = self._process_download(result["_source"])
|
||||
if index == "ta_comment":
|
||||
processed = self._process_comment(result["_source"])
|
||||
if index == "ta_subtitle":
|
||||
processed = self._process_subtitle(result)
|
||||
|
||||
if isinstance(processed, dict):
|
||||
processed.update(
|
||||
{
|
||||
"_index": index,
|
||||
"_score": round(result.get("_score") or 0, 2),
|
||||
}
|
||||
)
|
||||
|
||||
return processed
|
||||
|
||||
@ -67,13 +56,13 @@ class SearchProcess:
|
||||
"""run on single channel"""
|
||||
channel_id = channel_dict["channel_id"]
|
||||
art_base = f"/cache/channels/{channel_id}"
|
||||
date_str = date_parser(channel_dict["channel_last_refresh"])
|
||||
date_str = date_praser(channel_dict["channel_last_refresh"])
|
||||
channel_dict.update(
|
||||
{
|
||||
"channel_last_refresh": date_str,
|
||||
"channel_banner_url": f"{art_base}_banner.jpg",
|
||||
"channel_thumb_url": f"{art_base}_thumb.jpg",
|
||||
"channel_tvart_url": f"{art_base}_tvart.jpg",
|
||||
"channel_tvart_url": False,
|
||||
}
|
||||
)
|
||||
|
||||
@ -83,8 +72,8 @@ class SearchProcess:
|
||||
"""run on single video dict"""
|
||||
video_id = video_dict["youtube_id"]
|
||||
media_url = urllib.parse.quote(video_dict["media_url"])
|
||||
vid_last_refresh = date_parser(video_dict["vid_last_refresh"])
|
||||
published = date_parser(video_dict["published"])
|
||||
vid_last_refresh = date_praser(video_dict["vid_last_refresh"])
|
||||
published = date_praser(video_dict["published"])
|
||||
vid_thumb_url = ThumbManager(video_id).vid_thumb_path()
|
||||
channel = self._process_channel(video_dict["channel"])
|
||||
|
||||
@ -109,7 +98,7 @@ class SearchProcess:
|
||||
def _process_playlist(playlist_dict):
|
||||
"""run on single playlist dict"""
|
||||
playlist_id = playlist_dict["playlist_id"]
|
||||
playlist_last_refresh = date_parser(
|
||||
playlist_last_refresh = date_praser(
|
||||
playlist_dict["playlist_last_refresh"]
|
||||
)
|
||||
playlist_dict.update(
|
||||
@ -125,7 +114,7 @@ class SearchProcess:
|
||||
"""run on single download item"""
|
||||
video_id = download_dict["youtube_id"]
|
||||
vid_thumb_url = ThumbManager(video_id).vid_thumb_path()
|
||||
published = date_parser(download_dict["published"])
|
||||
published = date_praser(download_dict["published"])
|
||||
|
||||
download_dict.update(
|
||||
{
|
||||
@ -134,43 +123,3 @@ class SearchProcess:
|
||||
}
|
||||
)
|
||||
return dict(sorted(download_dict.items()))
|
||||
|
||||
def _process_comment(self, comment_dict):
|
||||
"""run on all comments, create reply thread"""
|
||||
all_comments = comment_dict["comment_comments"]
|
||||
processed_comments = []
|
||||
|
||||
for comment in all_comments:
|
||||
if comment["comment_parent"] == "root":
|
||||
comment.update({"comment_replies": []})
|
||||
processed_comments.append(comment)
|
||||
else:
|
||||
processed_comments[-1]["comment_replies"].append(comment)
|
||||
|
||||
return processed_comments
|
||||
|
||||
def _process_subtitle(self, result):
|
||||
"""take complete result dict to extract highlight"""
|
||||
subtitle_dict = result["_source"]
|
||||
highlight = result.get("highlight")
|
||||
if highlight:
|
||||
# replace lines with the highlighted markdown
|
||||
subtitle_line = highlight.get("subtitle_line")[0]
|
||||
subtitle_dict.update({"subtitle_line": subtitle_line})
|
||||
|
||||
thumb_path = ThumbManager(subtitle_dict["youtube_id"]).vid_thumb_path()
|
||||
subtitle_dict.update({"vid_thumb_url": f"/cache/{thumb_path}"})
|
||||
|
||||
return subtitle_dict
|
||||
|
||||
|
||||
def process_aggs(response):
|
||||
"""convert aggs duration to str"""
|
||||
|
||||
if response.get("aggregations"):
|
||||
aggs = response["aggregations"]
|
||||
if "total_duration" in aggs:
|
||||
duration_sec = int(aggs["total_duration"]["value"])
|
||||
aggs["total_duration"].update(
|
||||
{"value_str": get_duration_str(duration_sec)}
|
||||
)
|
||||
|
54
tubearchivist/api/src/task_processor.py
Normal file
@ -0,0 +1,54 @@
|
||||
"""
|
||||
Functionality:
|
||||
- process tasks from API
|
||||
- validate
|
||||
- handover to celery
|
||||
"""
|
||||
|
||||
from home.src.ta.ta_redis import RedisArchivist
|
||||
from home.tasks import download_pending, update_subscribed
|
||||
|
||||
|
||||
class TaskHandler:
|
||||
"""handle tasks from api"""
|
||||
|
||||
def __init__(self, data):
|
||||
self.data = data
|
||||
|
||||
def run_task(self):
|
||||
"""map data and run"""
|
||||
task_name = self.data["run"]
|
||||
try:
|
||||
to_run = self.exec_map(task_name)
|
||||
except KeyError as err:
|
||||
print(f"invalid task name {task_name}")
|
||||
raise ValueError from err
|
||||
|
||||
response = to_run()
|
||||
response.update({"task": task_name})
|
||||
return response
|
||||
|
||||
def exec_map(self, task_name):
|
||||
"""map dict key and return function to execute"""
|
||||
exec_map = {
|
||||
"download_pending": self._download_pending,
|
||||
"rescan_pending": self._rescan_pending,
|
||||
}
|
||||
|
||||
return exec_map[task_name]
|
||||
|
||||
@staticmethod
|
||||
def _rescan_pending():
|
||||
"""look for new items in subscribed channels"""
|
||||
print("rescan subscribed channels")
|
||||
update_subscribed.delay()
|
||||
return {"success": True}
|
||||
|
||||
@staticmethod
|
||||
def _download_pending():
|
||||
"""start the download queue"""
|
||||
print("download pending")
|
||||
running = download_pending.delay()
|
||||
print("set task id: " + running.id)
|
||||
RedisArchivist().set_message("dl_queue_id", running.id)
|
||||
return {"success": True}
|
@ -1,199 +1,102 @@
|
||||
"""all api urls"""
|
||||
|
||||
from api import views
|
||||
from api.views import (
|
||||
ChannelApiListView,
|
||||
ChannelApiVideoView,
|
||||
ChannelApiView,
|
||||
CookieView,
|
||||
DownloadApiListView,
|
||||
DownloadApiView,
|
||||
LoginApiView,
|
||||
PingView,
|
||||
PlaylistApiListView,
|
||||
PlaylistApiVideoView,
|
||||
PlaylistApiView,
|
||||
SearchView,
|
||||
TaskApiView,
|
||||
VideoApiListView,
|
||||
VideoApiView,
|
||||
VideoProgressView,
|
||||
VideoSponsorView,
|
||||
)
|
||||
from django.urls import path
|
||||
|
||||
urlpatterns = [
|
||||
path("ping/", views.PingView.as_view(), name="ping"),
|
||||
path("login/", views.LoginApiView.as_view(), name="api-login"),
|
||||
path("ping/", PingView.as_view(), name="ping"),
|
||||
path("login/", LoginApiView.as_view(), name="api-login"),
|
||||
path(
|
||||
"video/",
|
||||
views.VideoApiListView.as_view(),
|
||||
VideoApiListView.as_view(),
|
||||
name="api-video-list",
|
||||
),
|
||||
path(
|
||||
"video/<slug:video_id>/",
|
||||
views.VideoApiView.as_view(),
|
||||
VideoApiView.as_view(),
|
||||
name="api-video",
|
||||
),
|
||||
path(
|
||||
"video/<slug:video_id>/progress/",
|
||||
views.VideoProgressView.as_view(),
|
||||
VideoProgressView.as_view(),
|
||||
name="api-video-progress",
|
||||
),
|
||||
path(
|
||||
"video/<slug:video_id>/comment/",
|
||||
views.VideoCommentView.as_view(),
|
||||
name="api-video-comment",
|
||||
),
|
||||
path(
|
||||
"video/<slug:video_id>/similar/",
|
||||
views.VideoSimilarView.as_view(),
|
||||
name="api-video-similar",
|
||||
),
|
||||
path(
|
||||
"video/<slug:video_id>/sponsor/",
|
||||
views.VideoSponsorView.as_view(),
|
||||
VideoSponsorView.as_view(),
|
||||
name="api-video-sponsor",
|
||||
),
|
||||
path(
|
||||
"channel/",
|
||||
views.ChannelApiListView.as_view(),
|
||||
ChannelApiListView.as_view(),
|
||||
name="api-channel-list",
|
||||
),
|
||||
path(
|
||||
"channel/search/",
|
||||
views.ChannelApiSearchView.as_view(),
|
||||
name="api-channel-search",
|
||||
),
|
||||
path(
|
||||
"channel/<slug:channel_id>/",
|
||||
views.ChannelApiView.as_view(),
|
||||
ChannelApiView.as_view(),
|
||||
name="api-channel",
|
||||
),
|
||||
path(
|
||||
"channel/<slug:channel_id>/video/",
|
||||
views.ChannelApiVideoView.as_view(),
|
||||
ChannelApiVideoView.as_view(),
|
||||
name="api-channel-video",
|
||||
),
|
||||
path(
|
||||
"playlist/",
|
||||
views.PlaylistApiListView.as_view(),
|
||||
PlaylistApiListView.as_view(),
|
||||
name="api-playlist-list",
|
||||
),
|
||||
path(
|
||||
"playlist/<slug:playlist_id>/",
|
||||
views.PlaylistApiView.as_view(),
|
||||
PlaylistApiView.as_view(),
|
||||
name="api-playlist",
|
||||
),
|
||||
path(
|
||||
"playlist/<slug:playlist_id>/video/",
|
||||
views.PlaylistApiVideoView.as_view(),
|
||||
PlaylistApiVideoView.as_view(),
|
||||
name="api-playlist-video",
|
||||
),
|
||||
path(
|
||||
"download/",
|
||||
views.DownloadApiListView.as_view(),
|
||||
DownloadApiListView.as_view(),
|
||||
name="api-download-list",
|
||||
),
|
||||
path(
|
||||
"download/<slug:video_id>/",
|
||||
views.DownloadApiView.as_view(),
|
||||
DownloadApiView.as_view(),
|
||||
name="api-download",
|
||||
),
|
||||
path(
|
||||
"refresh/",
|
||||
views.RefreshView.as_view(),
|
||||
name="api-refresh",
|
||||
),
|
||||
path(
|
||||
"snapshot/",
|
||||
views.SnapshotApiListView.as_view(),
|
||||
name="api-snapshot-list",
|
||||
),
|
||||
path(
|
||||
"snapshot/<slug:snapshot_id>/",
|
||||
views.SnapshotApiView.as_view(),
|
||||
name="api-snapshot",
|
||||
),
|
||||
path(
|
||||
"backup/",
|
||||
views.BackupApiListView.as_view(),
|
||||
name="api-backup-list",
|
||||
),
|
||||
path(
|
||||
"backup/<str:filename>/",
|
||||
views.BackupApiView.as_view(),
|
||||
name="api-backup",
|
||||
),
|
||||
path(
|
||||
"task-name/",
|
||||
views.TaskListView.as_view(),
|
||||
name="api-task-list",
|
||||
),
|
||||
path(
|
||||
"task-name/<slug:task_name>/",
|
||||
views.TaskNameListView.as_view(),
|
||||
name="api-task-name-list",
|
||||
),
|
||||
path(
|
||||
"task-id/<slug:task_id>/",
|
||||
views.TaskIDView.as_view(),
|
||||
name="api-task-id",
|
||||
),
|
||||
path(
|
||||
"schedule/",
|
||||
views.ScheduleView.as_view(),
|
||||
name="api-schedule",
|
||||
),
|
||||
path(
|
||||
"schedule/notification/",
|
||||
views.ScheduleNotification.as_view(),
|
||||
name="api-schedule-notification",
|
||||
),
|
||||
path(
|
||||
"config/user/",
|
||||
views.UserConfigView.as_view(),
|
||||
name="api-config-user",
|
||||
"task/",
|
||||
TaskApiView.as_view(),
|
||||
name="api-task",
|
||||
),
|
||||
path(
|
||||
"cookie/",
|
||||
views.CookieView.as_view(),
|
||||
CookieView.as_view(),
|
||||
name="api-cookie",
|
||||
),
|
||||
path(
|
||||
"watched/",
|
||||
views.WatchedView.as_view(),
|
||||
name="api-watched",
|
||||
),
|
||||
path(
|
||||
"search/",
|
||||
views.SearchView.as_view(),
|
||||
SearchView.as_view(),
|
||||
name="api-search",
|
||||
),
|
||||
path(
|
||||
"token/",
|
||||
views.TokenView.as_view(),
|
||||
name="api-token",
|
||||
),
|
||||
path(
|
||||
"notification/",
|
||||
views.NotificationView.as_view(),
|
||||
name="api-notification",
|
||||
),
|
||||
path(
|
||||
"stats/video/",
|
||||
views.StatVideoView.as_view(),
|
||||
name="api-stats-video",
|
||||
),
|
||||
path(
|
||||
"stats/channel/",
|
||||
views.StatChannelView.as_view(),
|
||||
name="api-stats-channel",
|
||||
),
|
||||
path(
|
||||
"stats/playlist/",
|
||||
views.StatPlaylistView.as_view(),
|
||||
name="api-stats-playlist",
|
||||
),
|
||||
path(
|
||||
"stats/download/",
|
||||
views.StatDownloadView.as_view(),
|
||||
name="api-stats-download",
|
||||
),
|
||||
path(
|
||||
"stats/watch/",
|
||||
views.StatWatchProgress.as_view(),
|
||||
name="api-stats-watch",
|
||||
),
|
||||
path(
|
||||
"stats/downloadhist/",
|
||||
views.StatDownloadHist.as_view(),
|
||||
name="api-stats-downloadhist",
|
||||
),
|
||||
path(
|
||||
"stats/biggestchannels/",
|
||||
views.StatBiggestChannel.as_view(),
|
||||
name="api-stats-biggestchannels",
|
||||
),
|
||||
]
|
||||
|
@ -1,163 +0,0 @@
|
||||
"""
|
||||
Functionality:
|
||||
- check that all connections are working
|
||||
"""
|
||||
|
||||
from time import sleep
|
||||
|
||||
import requests
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
from home.src.es.connect import ElasticWrap
|
||||
from home.src.ta.settings import EnvironmentSettings
|
||||
from home.src.ta.ta_redis import RedisArchivist
|
||||
|
||||
TOPIC = """
|
||||
|
||||
#######################
|
||||
# Connection check #
|
||||
#######################
|
||||
|
||||
"""
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
"""command framework"""
|
||||
|
||||
TIMEOUT = 120
|
||||
MIN_MAJOR, MAX_MAJOR = 8, 8
|
||||
MIN_MINOR = 0
|
||||
|
||||
# pylint: disable=no-member
|
||||
help = "Check connections"
|
||||
|
||||
def handle(self, *args, **options):
|
||||
"""run all commands"""
|
||||
self.stdout.write(TOPIC)
|
||||
self._redis_connection_check()
|
||||
self._redis_config_set()
|
||||
self._es_connection_check()
|
||||
self._es_version_check()
|
||||
self._es_path_check()
|
||||
|
||||
def _redis_connection_check(self):
|
||||
"""check ir redis connection is established"""
|
||||
self.stdout.write("[1] connect to Redis")
|
||||
redis_conn = RedisArchivist().conn
|
||||
for _ in range(5):
|
||||
try:
|
||||
pong = redis_conn.execute_command("PING")
|
||||
if pong:
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(" ✓ Redis connection verified")
|
||||
)
|
||||
return
|
||||
|
||||
except Exception: # pylint: disable=broad-except
|
||||
self.stdout.write(" ... retry Redis connection")
|
||||
sleep(2)
|
||||
|
||||
message = " 🗙 Redis connection failed"
|
||||
self.stdout.write(self.style.ERROR(f"{message}"))
|
||||
RedisArchivist().exec("PING")
|
||||
sleep(60)
|
||||
raise CommandError(message)
|
||||
|
||||
def _redis_config_set(self):
|
||||
"""set config for redis if not set already"""
|
||||
self.stdout.write("[2] set Redis config")
|
||||
redis_conn = RedisArchivist().conn
|
||||
timeout_is = int(redis_conn.config_get("timeout").get("timeout"))
|
||||
if not timeout_is:
|
||||
redis_conn.config_set("timeout", 3600)
|
||||
|
||||
self.stdout.write(self.style.SUCCESS(" ✓ Redis config set"))
|
||||
|
||||
def _es_connection_check(self):
|
||||
"""wait for elasticsearch connection"""
|
||||
self.stdout.write("[3] connect to Elastic Search")
|
||||
total = self.TIMEOUT // 5
|
||||
for i in range(total):
|
||||
self.stdout.write(f" ... waiting for ES [{i}/{total}]")
|
||||
try:
|
||||
_, status_code = ElasticWrap("/").get(
|
||||
timeout=1, print_error=False
|
||||
)
|
||||
except (
|
||||
requests.exceptions.ConnectionError,
|
||||
requests.exceptions.Timeout,
|
||||
):
|
||||
sleep(5)
|
||||
continue
|
||||
|
||||
if status_code and status_code == 200:
|
||||
path = "_cluster/health?wait_for_status=yellow&timeout=60s"
|
||||
_, _ = ElasticWrap(path).get(timeout=60)
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(" ✓ ES connection established")
|
||||
)
|
||||
return
|
||||
|
||||
response, status_code = ElasticWrap("/").get(
|
||||
timeout=1, print_error=False
|
||||
)
|
||||
|
||||
message = " 🗙 ES connection failed"
|
||||
self.stdout.write(self.style.ERROR(f"{message}"))
|
||||
self.stdout.write(f" error message: {response}")
|
||||
self.stdout.write(f" status code: {status_code}")
|
||||
sleep(60)
|
||||
raise CommandError(message)
|
||||
|
||||
def _es_version_check(self):
|
||||
"""check for minimal elasticsearch version"""
|
||||
self.stdout.write("[4] Elastic Search version check")
|
||||
response, _ = ElasticWrap("/").get()
|
||||
version = response["version"]["number"]
|
||||
major = int(version.split(".")[0])
|
||||
|
||||
if self.MIN_MAJOR <= major <= self.MAX_MAJOR:
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(" ✓ ES version check passed")
|
||||
)
|
||||
return
|
||||
|
||||
message = (
|
||||
" 🗙 ES version check failed. "
|
||||
+ f"Expected {self.MIN_MAJOR}.{self.MIN_MINOR} but got {version}"
|
||||
)
|
||||
self.stdout.write(self.style.ERROR(f"{message}"))
|
||||
sleep(60)
|
||||
raise CommandError(message)
|
||||
|
||||
def _es_path_check(self):
|
||||
"""check that path.repo var is set"""
|
||||
self.stdout.write("[5] check ES path.repo env var")
|
||||
response, _ = ElasticWrap("_nodes/_all/settings").get()
|
||||
snaphost_roles = [
|
||||
"data",
|
||||
"data_cold",
|
||||
"data_content",
|
||||
"data_frozen",
|
||||
"data_hot",
|
||||
"data_warm",
|
||||
"master",
|
||||
]
|
||||
for node in response["nodes"].values():
|
||||
if not (set(node["roles"]) & set(snaphost_roles)):
|
||||
continue
|
||||
|
||||
if node["settings"]["path"].get("repo"):
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(" ✓ path.repo env var is set")
|
||||
)
|
||||
return
|
||||
|
||||
message = (
|
||||
" 🗙 path.repo env var not found. "
|
||||
+ "set the following env var to the ES container:\n"
|
||||
+ " path.repo="
|
||||
+ EnvironmentSettings.ES_SNAPSHOT_DIR
|
||||
)
|
||||
self.stdout.write(self.style.ERROR(message))
|
||||
sleep(60)
|
||||
raise CommandError(message)
|
@ -1,193 +0,0 @@
|
||||
"""
|
||||
Functionality:
|
||||
- Check environment at startup
|
||||
- Process config file overwrites from env var
|
||||
- Stop startup on error
|
||||
- python management.py ta_envcheck
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
from home.models import Account
|
||||
from home.src.ta.settings import EnvironmentSettings
|
||||
|
||||
LOGO = """
|
||||
|
||||
.... .....
|
||||
...'',;:cc,. .;::;;,'...
|
||||
..,;:cccllclc, .:ccllllcc;,..
|
||||
..,:cllcc:;,'.',. ....'',;ccllc:,..
|
||||
..;cllc:,'.. ...,:cccc:'.
|
||||
.;cccc;.. ..,:ccc:'.
|
||||
.ckkkOkxollllllllllllc. .,:::;. .,cclc;
|
||||
.:0MMMMMMMMMMMMMMMMMMMX: .cNMMMWx. .;clc:
|
||||
.;lOXK0000KNMMMMX00000KO; ;KMMMMMNl. .;ccl:,.
|
||||
.;:c:'.....kMMMNo........ 'OMMMWMMMK: '::;;'.
|
||||
....... .xMMMNl .dWMMXdOMMMO' ........
|
||||
.:cc:;. .xMMMNc .lNMMNo.:XMMWx. .:cl:.
|
||||
.:llc,. .:xxxd, ;KMMMk. .oWMMNl. .:llc'
|
||||
.cll:. .;:;;:::,. 'OMMMK:';''kWMMK: .;llc,
|
||||
.cll:. .,;;;;;;,. .,xWMMNl.:l:.;KMMMO' .;llc'
|
||||
.:llc. .cOOOk; .lKNMMWx..:l:..lNMMWx. .:llc'
|
||||
.;lcc,. .xMMMNc :KMMMM0, .:lc. .xWMMNl.'ccl:.
|
||||
.cllc. .xMMMNc 'OMMMMXc...:lc...,0MMMKl:lcc,.
|
||||
.,ccl:. .xMMMNc .xWMMMWo.,;;:lc;;;.cXMMMXdcc;.
|
||||
.,clc:. .xMMMNc .lNMMMWk. .':clc:,. .dWMMW0o;.
|
||||
.,clcc,. .ckkkx; .okkkOx, .';,. 'kKKK0l.
|
||||
.':lcc:'..... . .. ..,;cllc,.
|
||||
.,cclc,.... ....;clc;..
|
||||
..,:,..,c:'.. ...';:,..,:,.
|
||||
....:lcccc:;,'''.....'',;;:clllc,....
|
||||
.'',;:cllllllccccclllllcc:,'..
|
||||
...'',,;;;;;;;;;,''...
|
||||
.....
|
||||
|
||||
"""
|
||||
|
||||
TOPIC = """
|
||||
#######################
|
||||
# Environment Setup #
|
||||
#######################
|
||||
|
||||
"""
|
||||
|
||||
EXPECTED_ENV_VARS = [
|
||||
"TA_USERNAME",
|
||||
"TA_PASSWORD",
|
||||
"ELASTIC_PASSWORD",
|
||||
"ES_URL",
|
||||
"TA_HOST",
|
||||
]
|
||||
INST = "https://github.com/tubearchivist/tubearchivist#installing-and-updating"
|
||||
NGINX = "/etc/nginx/sites-available/default"
|
||||
UWSGI = "/app/uwsgi.ini"
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
"""command framework"""
|
||||
|
||||
# pylint: disable=no-member
|
||||
help = "Check environment before startup"
|
||||
|
||||
def handle(self, *args, **options):
|
||||
"""run all commands"""
|
||||
self.stdout.write(LOGO)
|
||||
self.stdout.write(TOPIC)
|
||||
self._expected_vars()
|
||||
self._elastic_user_overwrite()
|
||||
self._ta_port_overwrite()
|
||||
self._ta_uwsgi_overwrite()
|
||||
self._enable_cast_overwrite()
|
||||
self._create_superuser()
|
||||
|
||||
def _expected_vars(self):
|
||||
"""check if expected env vars are set"""
|
||||
self.stdout.write("[1] checking expected env vars")
|
||||
env = os.environ
|
||||
for var in EXPECTED_ENV_VARS:
|
||||
if not env.get(var):
|
||||
message = f" 🗙 expected env var {var} not set\n {INST}"
|
||||
self.stdout.write(self.style.ERROR(message))
|
||||
raise CommandError(message)
|
||||
|
||||
message = " ✓ all expected env vars are set"
|
||||
self.stdout.write(self.style.SUCCESS(message))
|
||||
|
||||
def _elastic_user_overwrite(self):
|
||||
"""check for ELASTIC_USER overwrite"""
|
||||
self.stdout.write("[2] check ES user overwrite")
|
||||
env = EnvironmentSettings.ES_USER
|
||||
self.stdout.write(self.style.SUCCESS(f" ✓ ES user is set to {env}"))
|
||||
|
||||
def _ta_port_overwrite(self):
|
||||
"""set TA_PORT overwrite for nginx"""
|
||||
self.stdout.write("[3] check TA_PORT overwrite")
|
||||
overwrite = EnvironmentSettings.TA_PORT
|
||||
if not overwrite:
|
||||
self.stdout.write(self.style.SUCCESS(" TA_PORT is not set"))
|
||||
return
|
||||
|
||||
regex = re.compile(r"listen [0-9]{1,5}")
|
||||
to_overwrite = f"listen {overwrite}"
|
||||
changed = file_overwrite(NGINX, regex, to_overwrite)
|
||||
if changed:
|
||||
message = f" ✓ TA_PORT changed to {overwrite}"
|
||||
else:
|
||||
message = f" ✓ TA_PORT already set to {overwrite}"
|
||||
|
||||
self.stdout.write(self.style.SUCCESS(message))
|
||||
|
||||
def _ta_uwsgi_overwrite(self):
|
||||
"""set TA_UWSGI_PORT overwrite"""
|
||||
self.stdout.write("[4] check TA_UWSGI_PORT overwrite")
|
||||
overwrite = EnvironmentSettings.TA_UWSGI_PORT
|
||||
if not overwrite:
|
||||
message = " TA_UWSGI_PORT is not set"
|
||||
self.stdout.write(self.style.SUCCESS(message))
|
||||
return
|
||||
|
||||
# nginx
|
||||
regex = re.compile(r"uwsgi_pass localhost:[0-9]{1,5}")
|
||||
to_overwrite = f"uwsgi_pass localhost:{overwrite}"
|
||||
changed = file_overwrite(NGINX, regex, to_overwrite)
|
||||
|
||||
# uwsgi
|
||||
regex = re.compile(r"socket = :[0-9]{1,5}")
|
||||
to_overwrite = f"socket = :{overwrite}"
|
||||
changed = file_overwrite(UWSGI, regex, to_overwrite)
|
||||
|
||||
if changed:
|
||||
message = f" ✓ TA_UWSGI_PORT changed to {overwrite}"
|
||||
else:
|
||||
message = f" ✓ TA_UWSGI_PORT already set to {overwrite}"
|
||||
|
||||
self.stdout.write(self.style.SUCCESS(message))
|
||||
|
||||
def _enable_cast_overwrite(self):
|
||||
"""cast workaround, remove auth for static files in nginx"""
|
||||
self.stdout.write("[5] check ENABLE_CAST overwrite")
|
||||
overwrite = EnvironmentSettings.ENABLE_CAST
|
||||
if not overwrite:
|
||||
self.stdout.write(self.style.SUCCESS(" ENABLE_CAST is not set"))
|
||||
return
|
||||
|
||||
regex = re.compile(r"[^\S\r\n]*auth_request /api/ping/;\n")
|
||||
changed = file_overwrite(NGINX, regex, "")
|
||||
if changed:
|
||||
message = " ✓ process nginx to enable Cast"
|
||||
else:
|
||||
message = " ✓ Cast is already enabled in nginx"
|
||||
|
||||
self.stdout.write(self.style.SUCCESS(message))
|
||||
|
||||
def _create_superuser(self):
|
||||
"""create superuser if not exist"""
|
||||
self.stdout.write("[6] create superuser")
|
||||
is_created = Account.objects.filter(is_superuser=True)
|
||||
if is_created:
|
||||
message = " superuser already created"
|
||||
self.stdout.write(self.style.SUCCESS(message))
|
||||
return
|
||||
|
||||
name = EnvironmentSettings.TA_USERNAME
|
||||
password = EnvironmentSettings.TA_PASSWORD
|
||||
Account.objects.create_superuser(name, password)
|
||||
message = f" ✓ new superuser with name {name} created"
|
||||
self.stdout.write(self.style.SUCCESS(message))
|
||||
|
||||
|
||||
def file_overwrite(file_path, regex, overwrite):
|
||||
"""change file content from old to overwrite, return true when changed"""
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
file_content = f.read()
|
||||
|
||||
changed = re.sub(regex, overwrite, file_content)
|
||||
if changed == file_content:
|
||||
return False
|
||||
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write(changed)
|
||||
|
||||
return True
|
@ -1,44 +0,0 @@
|
||||
"""
|
||||
channel fix for update from v0.4.7 to v0.4.8
|
||||
reindex channels with 0 subscriber count
|
||||
python manage.py ta_fix_channels
|
||||
"""
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
from home.src.es.connect import IndexPaginate
|
||||
from home.tasks import check_reindex
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
"""fix comment link"""
|
||||
|
||||
def handle(self, *args, **options):
|
||||
"""run command"""
|
||||
self.stdout.write("reindex failed channels")
|
||||
channels = self._get_channels()
|
||||
if not channels:
|
||||
self.stdout.write("did not find any failed channels")
|
||||
return
|
||||
|
||||
self.stdout.write(f"add {len(channels)} channels(s) to queue")
|
||||
to_reindex = {"channel": [i["channel_id"] for i in channels]}
|
||||
check_reindex.delay(data=to_reindex)
|
||||
self.stdout.write(self.style.SUCCESS(" ✓ task queued\n"))
|
||||
|
||||
def _get_channels(self):
|
||||
"""get failed channels"""
|
||||
self.stdout.write("search for failed channels")
|
||||
es_query = {
|
||||
"query": {
|
||||
"bool": {
|
||||
"must": [
|
||||
{"term": {"channel_subs": {"value": 0}}},
|
||||
{"term": {"channel_active": {"value": True}}},
|
||||
]
|
||||
},
|
||||
},
|
||||
"_source": ["channel_id"],
|
||||
}
|
||||
channels = IndexPaginate("ta_channel", es_query).get_results()
|
||||
|
||||
return channels
|
@ -1,76 +0,0 @@
|
||||
"""
|
||||
comment link fix for update from v0.4.7 to v0.4.8
|
||||
scan your videos and comments to fix comment_count field
|
||||
python manage.py ta_fix_comment_link
|
||||
"""
|
||||
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
from home.src.es.connect import ElasticWrap, IndexPaginate
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
"""fix comment link"""
|
||||
|
||||
def handle(self, *args, **options):
|
||||
"""run command"""
|
||||
self.stdout.write("run comment link fix")
|
||||
expected_count = self._get_comment_indexed()
|
||||
all_videos = self._get_videos()
|
||||
|
||||
self.stdout.write(f"checking {len(all_videos)} video(s)")
|
||||
videos_updated = []
|
||||
for video in all_videos:
|
||||
video_id = video["youtube_id"]
|
||||
comment_count = expected_count.get(video_id)
|
||||
if not comment_count:
|
||||
continue
|
||||
|
||||
data = {"doc": {"comment_count": comment_count}}
|
||||
path = f"ta_video/_update/{video_id}"
|
||||
response, status_code = ElasticWrap(path).post(data=data)
|
||||
|
||||
if status_code != 200:
|
||||
message = (
|
||||
"failed to add comment count to video"
|
||||
+ f"response code: {status_code}"
|
||||
+ response
|
||||
)
|
||||
raise CommandError(message)
|
||||
|
||||
videos_updated.append(video_id)
|
||||
|
||||
self.stdout.write(f"fixed {len(videos_updated)} video(s)")
|
||||
self.stdout.write(self.style.SUCCESS(" ✓ task completed\n"))
|
||||
|
||||
def _get_comment_indexed(self):
|
||||
"""get comment count by index"""
|
||||
self.stdout.write("get comments")
|
||||
src = "params['_source']['comment_comments'].length"
|
||||
data = {
|
||||
"script_fields": {
|
||||
"comments_length": {
|
||||
"script": {"source": src, "lang": "painless"}
|
||||
}
|
||||
}
|
||||
}
|
||||
all_comments = IndexPaginate(
|
||||
"ta_comment", data=data, keep_source=True
|
||||
).get_results()
|
||||
|
||||
expected_count = {
|
||||
i["_id"]: i["fields"]["comments_length"][0] for i in all_comments
|
||||
}
|
||||
|
||||
return expected_count
|
||||
|
||||
def _get_videos(self):
|
||||
"""get videos without comment_count"""
|
||||
self.stdout.write("get videos")
|
||||
data = {
|
||||
"query": {
|
||||
"bool": {"must_not": [{"exists": {"field": "comment_count"}}]}
|
||||
}
|
||||
}
|
||||
all_videos = IndexPaginate("ta_video", data).get_results()
|
||||
|
||||
return all_videos
|
@ -1,185 +0,0 @@
|
||||
"""
|
||||
filepath migration from v0.3.6 to v0.3.7
|
||||
not getting called at startup any more, to run manually if needed:
|
||||
python manage.py ta_migpath
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
from home.src.es.connect import ElasticWrap, IndexPaginate
|
||||
from home.src.ta.helper import ignore_filelist
|
||||
from home.src.ta.settings import EnvironmentSettings
|
||||
|
||||
TOPIC = """
|
||||
|
||||
########################
|
||||
# Filesystem Migration #
|
||||
########################
|
||||
|
||||
"""
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
"""command framework"""
|
||||
|
||||
# pylint: disable=no-member
|
||||
|
||||
def handle(self, *args, **options):
|
||||
"""run commands"""
|
||||
self.stdout.write(TOPIC)
|
||||
|
||||
handler = FolderMigration()
|
||||
to_migrate = handler.get_to_migrate()
|
||||
if not to_migrate:
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(" no channel migration needed\n")
|
||||
)
|
||||
return
|
||||
|
||||
self.stdout.write(self.style.SUCCESS(" migrating channels"))
|
||||
total_channels = handler.create_folders(to_migrate)
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(f" created {total_channels} channels")
|
||||
)
|
||||
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(f" migrating {len(to_migrate)} videos")
|
||||
)
|
||||
handler.migrate_videos(to_migrate)
|
||||
self.stdout.write(self.style.SUCCESS(" update videos in index"))
|
||||
handler.send_bulk()
|
||||
|
||||
self.stdout.write(self.style.SUCCESS(" cleanup old folders"))
|
||||
handler.delete_old()
|
||||
|
||||
self.stdout.write(self.style.SUCCESS(" ✓ migration completed\n"))
|
||||
|
||||
|
||||
class FolderMigration:
|
||||
"""migrate video archive folder"""
|
||||
|
||||
def __init__(self):
|
||||
self.videos = EnvironmentSettings.MEDIA_DIR
|
||||
self.bulk_list = []
|
||||
|
||||
def get_to_migrate(self):
|
||||
"""get videos to migrate"""
|
||||
script = (
|
||||
"doc['media_url'].value == "
|
||||
+ "doc['channel.channel_id'].value + '/'"
|
||||
+ " + doc['youtube_id'].value + '.mp4'"
|
||||
)
|
||||
data = {
|
||||
"query": {"bool": {"must_not": [{"script": {"script": script}}]}},
|
||||
"_source": [
|
||||
"youtube_id",
|
||||
"media_url",
|
||||
"channel.channel_id",
|
||||
"subtitles",
|
||||
],
|
||||
}
|
||||
response = IndexPaginate("ta_video", data).get_results()
|
||||
|
||||
return response
|
||||
|
||||
def create_folders(self, to_migrate):
|
||||
"""create required channel folders"""
|
||||
host_uid = EnvironmentSettings.HOST_UID
|
||||
host_gid = EnvironmentSettings.HOST_GID
|
||||
all_channel_ids = {i["channel"]["channel_id"] for i in to_migrate}
|
||||
|
||||
for channel_id in all_channel_ids:
|
||||
new_folder = os.path.join(self.videos, channel_id)
|
||||
os.makedirs(new_folder, exist_ok=True)
|
||||
if host_uid and host_gid:
|
||||
os.chown(new_folder, host_uid, host_gid)
|
||||
|
||||
return len(all_channel_ids)
|
||||
|
||||
def migrate_videos(self, to_migrate):
|
||||
"""migrate all videos of channel"""
|
||||
total = len(to_migrate)
|
||||
for idx, video in enumerate(to_migrate):
|
||||
new_media_url = self._move_video_file(video)
|
||||
if not new_media_url:
|
||||
continue
|
||||
|
||||
all_subtitles = self._move_subtitles(video)
|
||||
action = {
|
||||
"update": {"_id": video["youtube_id"], "_index": "ta_video"}
|
||||
}
|
||||
source = {"doc": {"media_url": new_media_url}}
|
||||
if all_subtitles:
|
||||
source["doc"].update({"subtitles": all_subtitles})
|
||||
|
||||
self.bulk_list.append(json.dumps(action))
|
||||
self.bulk_list.append(json.dumps(source))
|
||||
if idx % 1000 == 0:
|
||||
print(f"processing migration [{idx}/{total}]")
|
||||
self.send_bulk()
|
||||
|
||||
def _move_video_file(self, video):
|
||||
"""move video file to new location"""
|
||||
old_path = os.path.join(self.videos, video["media_url"])
|
||||
if not os.path.exists(old_path):
|
||||
print(f"did not find expected video at {old_path}")
|
||||
return False
|
||||
|
||||
new_media_url = os.path.join(
|
||||
video["channel"]["channel_id"], video["youtube_id"] + ".mp4"
|
||||
)
|
||||
new_path = os.path.join(self.videos, new_media_url)
|
||||
os.rename(old_path, new_path)
|
||||
|
||||
return new_media_url
|
||||
|
||||
def _move_subtitles(self, video):
|
||||
"""move subtitle files to new location"""
|
||||
all_subtitles = video.get("subtitles")
|
||||
if not all_subtitles:
|
||||
return False
|
||||
|
||||
for subtitle in all_subtitles:
|
||||
old_path = os.path.join(self.videos, subtitle["media_url"])
|
||||
if not os.path.exists(old_path):
|
||||
print(f"did not find expected subtitle at {old_path}")
|
||||
continue
|
||||
|
||||
new_media_url = os.path.join(
|
||||
video["channel"]["channel_id"],
|
||||
f"{video.get('youtube_id')}.{subtitle.get('lang')}.vtt",
|
||||
)
|
||||
new_path = os.path.join(self.videos, new_media_url)
|
||||
os.rename(old_path, new_path)
|
||||
subtitle["media_url"] = new_media_url
|
||||
|
||||
return all_subtitles
|
||||
|
||||
def send_bulk(self):
|
||||
"""send bulk request to update index with new urls"""
|
||||
if not self.bulk_list:
|
||||
print("nothing to update")
|
||||
return
|
||||
|
||||
self.bulk_list.append("\n")
|
||||
path = "_bulk?refresh=true"
|
||||
data = "\n".join(self.bulk_list)
|
||||
response, status = ElasticWrap(path).post(data=data, ndjson=True)
|
||||
if not status == 200:
|
||||
print(response)
|
||||
|
||||
self.bulk_list = []
|
||||
|
||||
def delete_old(self):
|
||||
"""delete old empty folders"""
|
||||
all_folders = ignore_filelist(os.listdir(self.videos))
|
||||
for folder in all_folders:
|
||||
folder_path = os.path.join(self.videos, folder)
|
||||
if not os.path.isdir(folder_path):
|
||||
continue
|
||||
|
||||
if not ignore_filelist(os.listdir(folder_path)):
|
||||
shutil.rmtree(folder_path)
|
@ -1,417 +0,0 @@
|
||||
"""
|
||||
Functionality:
|
||||
- Application startup
|
||||
- Apply migrations
|
||||
"""
|
||||
|
||||
import os
|
||||
from datetime import datetime
|
||||
from random import randint
|
||||
from time import sleep
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
from django.utils import dateformat
|
||||
from django_celery_beat.models import CrontabSchedule, PeriodicTasks
|
||||
from home.models import CustomPeriodicTask
|
||||
from home.src.es.connect import ElasticWrap
|
||||
from home.src.es.index_setup import ElasitIndexWrap
|
||||
from home.src.es.snapshot import ElasticSnapshot
|
||||
from home.src.ta.config import AppConfig, ReleaseVersion
|
||||
from home.src.ta.config_schedule import ScheduleBuilder
|
||||
from home.src.ta.helper import clear_dl_cache
|
||||
from home.src.ta.notify import Notifications
|
||||
from home.src.ta.settings import EnvironmentSettings
|
||||
from home.src.ta.ta_redis import RedisArchivist
|
||||
from home.src.ta.task_config import TASK_CONFIG
|
||||
from home.src.ta.task_manager import TaskManager
|
||||
from home.tasks import version_check
|
||||
|
||||
TOPIC = """
|
||||
|
||||
#######################
|
||||
# Application Start #
|
||||
#######################
|
||||
|
||||
"""
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
"""command framework"""
|
||||
|
||||
# pylint: disable=no-member
|
||||
|
||||
def handle(self, *args, **options):
|
||||
"""run all commands"""
|
||||
self.stdout.write(TOPIC)
|
||||
self._sync_redis_state()
|
||||
self._make_folders()
|
||||
self._clear_redis_keys()
|
||||
self._clear_tasks()
|
||||
self._clear_dl_cache()
|
||||
self._version_check()
|
||||
self._mig_index_setup()
|
||||
self._mig_snapshot_check()
|
||||
self._mig_schedule_store()
|
||||
self._mig_custom_playlist()
|
||||
self._mig_add_missing_timestamp()
|
||||
self._create_default_schedules()
|
||||
self._update_schedule_tz()
|
||||
|
||||
def _sync_redis_state(self):
|
||||
"""make sure redis gets new config.json values"""
|
||||
self.stdout.write("[1] set new config.json values")
|
||||
needs_update = AppConfig().load_new_defaults()
|
||||
if needs_update:
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(" ✓ new config values set")
|
||||
)
|
||||
else:
|
||||
self.stdout.write(self.style.SUCCESS(" no new config values"))
|
||||
|
||||
def _make_folders(self):
|
||||
"""make expected cache folders"""
|
||||
self.stdout.write("[2] create expected cache folders")
|
||||
folders = [
|
||||
"backup",
|
||||
"channels",
|
||||
"download",
|
||||
"import",
|
||||
"playlists",
|
||||
"videos",
|
||||
]
|
||||
cache_dir = EnvironmentSettings.CACHE_DIR
|
||||
for folder in folders:
|
||||
folder_path = os.path.join(cache_dir, folder)
|
||||
os.makedirs(folder_path, exist_ok=True)
|
||||
|
||||
self.stdout.write(self.style.SUCCESS(" ✓ expected folders created"))
|
||||
|
||||
def _clear_redis_keys(self):
|
||||
"""make sure there are no leftover locks or keys set in redis"""
|
||||
self.stdout.write("[3] clear leftover keys in redis")
|
||||
all_keys = [
|
||||
"dl_queue_id",
|
||||
"dl_queue",
|
||||
"downloading",
|
||||
"manual_import",
|
||||
"reindex",
|
||||
"rescan",
|
||||
"run_backup",
|
||||
"startup_check",
|
||||
"reindex:ta_video",
|
||||
"reindex:ta_channel",
|
||||
"reindex:ta_playlist",
|
||||
]
|
||||
|
||||
redis_con = RedisArchivist()
|
||||
has_changed = False
|
||||
for key in all_keys:
|
||||
if redis_con.del_message(key):
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(f" ✓ cleared key {key}")
|
||||
)
|
||||
has_changed = True
|
||||
|
||||
if not has_changed:
|
||||
self.stdout.write(self.style.SUCCESS(" no keys found"))
|
||||
|
||||
def _clear_tasks(self):
|
||||
"""clear tasks and messages"""
|
||||
self.stdout.write("[4] clear task leftovers")
|
||||
TaskManager().fail_pending()
|
||||
redis_con = RedisArchivist()
|
||||
to_delete = redis_con.list_keys("message:")
|
||||
if to_delete:
|
||||
for key in to_delete:
|
||||
redis_con.del_message(key)
|
||||
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(f" ✓ cleared {len(to_delete)} messages")
|
||||
)
|
||||
|
||||
def _clear_dl_cache(self):
|
||||
"""clear leftover files from dl cache"""
|
||||
self.stdout.write("[5] clear leftover files from dl cache")
|
||||
leftover_files = clear_dl_cache(EnvironmentSettings.CACHE_DIR)
|
||||
if leftover_files:
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(f" ✓ cleared {leftover_files} files")
|
||||
)
|
||||
else:
|
||||
self.stdout.write(self.style.SUCCESS(" no files found"))
|
||||
|
||||
def _version_check(self):
|
||||
"""remove new release key if updated now"""
|
||||
self.stdout.write("[6] check for first run after update")
|
||||
new_version = ReleaseVersion().is_updated()
|
||||
if new_version:
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(f" ✓ update to {new_version} completed")
|
||||
)
|
||||
else:
|
||||
self.stdout.write(self.style.SUCCESS(" no new update found"))
|
||||
|
||||
version_task = CustomPeriodicTask.objects.filter(name="version_check")
|
||||
if not version_task.exists():
|
||||
return
|
||||
|
||||
if not version_task.first().last_run_at:
|
||||
self.style.SUCCESS(" ✓ send initial version check task")
|
||||
version_check.delay()
|
||||
|
||||
def _mig_index_setup(self):
|
||||
"""migration: validate index mappings"""
|
||||
self.stdout.write("[MIGRATION] validate index mappings")
|
||||
ElasitIndexWrap().setup()
|
||||
|
||||
def _mig_snapshot_check(self):
|
||||
"""migration setup snapshots"""
|
||||
self.stdout.write("[MIGRATION] setup snapshots")
|
||||
ElasticSnapshot().setup()
|
||||
|
||||
def _mig_schedule_store(self):
|
||||
"""
|
||||
update from 0.4.7 to 0.4.8
|
||||
migrate schedule task store to CustomCronSchedule
|
||||
"""
|
||||
self.stdout.write("[MIGRATION] migrate schedule store")
|
||||
config = AppConfig().config
|
||||
current_schedules = config.get("scheduler")
|
||||
if not current_schedules:
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(" no schedules to migrate")
|
||||
)
|
||||
return
|
||||
|
||||
self._mig_update_subscribed(current_schedules)
|
||||
self._mig_download_pending(current_schedules)
|
||||
self._mig_check_reindex(current_schedules)
|
||||
self._mig_thumbnail_check(current_schedules)
|
||||
self._mig_run_backup(current_schedules)
|
||||
self._mig_version_check()
|
||||
|
||||
del config["scheduler"]
|
||||
RedisArchivist().set_message("config", config, save=True)
|
||||
|
||||
def _mig_update_subscribed(self, current_schedules):
|
||||
"""create update_subscribed schedule"""
|
||||
task_name = "update_subscribed"
|
||||
update_subscribed_schedule = current_schedules.get(task_name)
|
||||
if update_subscribed_schedule:
|
||||
self._create_task(task_name, update_subscribed_schedule)
|
||||
|
||||
self._create_notifications(task_name, current_schedules)
|
||||
|
||||
def _mig_download_pending(self, current_schedules):
|
||||
"""create download_pending schedule"""
|
||||
task_name = "download_pending"
|
||||
download_pending_schedule = current_schedules.get(task_name)
|
||||
if download_pending_schedule:
|
||||
self._create_task(task_name, download_pending_schedule)
|
||||
|
||||
self._create_notifications(task_name, current_schedules)
|
||||
|
||||
def _mig_check_reindex(self, current_schedules):
|
||||
"""create check_reindex schedule"""
|
||||
task_name = "check_reindex"
|
||||
check_reindex_schedule = current_schedules.get(task_name)
|
||||
if check_reindex_schedule:
|
||||
task_config = {}
|
||||
days = current_schedules.get("check_reindex_days")
|
||||
if days:
|
||||
task_config.update({"days": days})
|
||||
|
||||
self._create_task(
|
||||
task_name,
|
||||
check_reindex_schedule,
|
||||
task_config=task_config,
|
||||
)
|
||||
|
||||
self._create_notifications(task_name, current_schedules)
|
||||
|
||||
def _mig_thumbnail_check(self, current_schedules):
|
||||
"""create thumbnail_check schedule"""
|
||||
thumbnail_check_schedule = current_schedules.get("thumbnail_check")
|
||||
if thumbnail_check_schedule:
|
||||
self._create_task("thumbnail_check", thumbnail_check_schedule)
|
||||
|
||||
def _mig_run_backup(self, current_schedules):
|
||||
"""create run_backup schedule"""
|
||||
run_backup_schedule = current_schedules.get("run_backup")
|
||||
if run_backup_schedule:
|
||||
task_config = False
|
||||
rotate = current_schedules.get("run_backup_rotate")
|
||||
if rotate:
|
||||
task_config = {"rotate": rotate}
|
||||
|
||||
self._create_task(
|
||||
"run_backup", run_backup_schedule, task_config=task_config
|
||||
)
|
||||
|
||||
def _mig_version_check(self):
|
||||
"""create version_check schedule"""
|
||||
version_check_schedule = {
|
||||
"minute": randint(0, 59),
|
||||
"hour": randint(0, 23),
|
||||
"day_of_week": "*",
|
||||
}
|
||||
self._create_task("version_check", version_check_schedule)
|
||||
|
||||
def _create_task(self, task_name, schedule, task_config=False):
|
||||
"""create task"""
|
||||
description = TASK_CONFIG[task_name].get("title")
|
||||
schedule, _ = CrontabSchedule.objects.get_or_create(**schedule)
|
||||
schedule.timezone = settings.TIME_ZONE
|
||||
schedule.save()
|
||||
|
||||
task, _ = CustomPeriodicTask.objects.get_or_create(
|
||||
crontab=schedule,
|
||||
name=task_name,
|
||||
description=description,
|
||||
task=task_name,
|
||||
)
|
||||
if task_config:
|
||||
task.task_config = task_config
|
||||
task.save()
|
||||
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(f" ✓ new task created: '{task}'")
|
||||
)
|
||||
|
||||
def _create_notifications(self, task_name, current_schedules):
|
||||
"""migrate notifications of task"""
|
||||
notifications = current_schedules.get(f"{task_name}_notify")
|
||||
if not notifications:
|
||||
return
|
||||
|
||||
urls = [i.strip() for i in notifications.split()]
|
||||
if not urls:
|
||||
return
|
||||
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(f" ✓ migrate notifications: '{urls}'")
|
||||
)
|
||||
handler = Notifications(task_name)
|
||||
for url in urls:
|
||||
handler.add_url(url)
|
||||
|
||||
def _mig_custom_playlist(self):
|
||||
"""add playlist_type for migration from v0.4.6 to v0.4.7"""
|
||||
self.stdout.write("[MIGRATION] custom playlist")
|
||||
data = {
|
||||
"query": {
|
||||
"bool": {"must_not": [{"exists": {"field": "playlist_type"}}]}
|
||||
},
|
||||
"script": {"source": "ctx._source['playlist_type'] = 'regular'"},
|
||||
}
|
||||
path = "ta_playlist/_update_by_query"
|
||||
response, status_code = ElasticWrap(path).post(data=data)
|
||||
if status_code == 200:
|
||||
updated = response.get("updated", 0)
|
||||
if updated:
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(
|
||||
f" ✓ {updated} playlist_type updated in ta_playlist"
|
||||
)
|
||||
)
|
||||
else:
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(
|
||||
" no playlist_type needed updating in ta_playlist"
|
||||
)
|
||||
)
|
||||
return
|
||||
|
||||
message = " 🗙 ta_playlist playlist_type update failed"
|
||||
self.stdout.write(self.style.ERROR(message))
|
||||
self.stdout.write(response)
|
||||
sleep(60)
|
||||
raise CommandError(message)
|
||||
|
||||
def _mig_add_missing_timestamp(self) -> None:
|
||||
"""
|
||||
add missing timestamp for versioncheck
|
||||
migrate from v0.4.8 to v0.4.9
|
||||
"""
|
||||
version_tasks = CustomPeriodicTask.objects.filter(name="version_check")
|
||||
if not version_tasks.exists():
|
||||
return
|
||||
|
||||
version_task = version_tasks.first()
|
||||
if not version_task.last_run_at:
|
||||
self.style.SUCCESS(" ✓ send initial version check task")
|
||||
version_check.delay()
|
||||
version_task.last_run_at = dateformat.make_aware(datetime.now())
|
||||
version_task.save()
|
||||
|
||||
def _create_default_schedules(self) -> None:
|
||||
"""
|
||||
create default schedules for new installations
|
||||
needs to be called after _mig_schedule_store
|
||||
"""
|
||||
self.stdout.write("[7] create initial schedules")
|
||||
init_has_run = CustomPeriodicTask.objects.filter(
|
||||
name="version_check"
|
||||
).exists()
|
||||
|
||||
if init_has_run:
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(
|
||||
" schedule init already done, skipping..."
|
||||
)
|
||||
)
|
||||
return
|
||||
|
||||
builder = ScheduleBuilder()
|
||||
check_reindex = builder.get_set_task(
|
||||
"check_reindex", schedule=builder.SCHEDULES["check_reindex"]
|
||||
)
|
||||
check_reindex.task_config.update({"days": 90})
|
||||
check_reindex.last_run_at = dateformat.make_aware(datetime.now())
|
||||
check_reindex.save()
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(
|
||||
f" ✓ created new default schedule: {check_reindex}"
|
||||
)
|
||||
)
|
||||
|
||||
thumbnail_check = builder.get_set_task(
|
||||
"thumbnail_check", schedule=builder.SCHEDULES["thumbnail_check"]
|
||||
)
|
||||
thumbnail_check.last_run_at = dateformat.make_aware(datetime.now())
|
||||
thumbnail_check.save()
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(
|
||||
f" ✓ created new default schedule: {thumbnail_check}"
|
||||
)
|
||||
)
|
||||
daily_random = f"{randint(0, 59)} {randint(0, 23)} *"
|
||||
version_check_task = builder.get_set_task(
|
||||
"version_check", schedule=daily_random
|
||||
)
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(
|
||||
f" ✓ created new default schedule: {version_check_task}"
|
||||
)
|
||||
)
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(" ✓ all default schedules created")
|
||||
)
|
||||
|
||||
def _update_schedule_tz(self) -> None:
|
||||
"""update timezone for Schedule instances"""
|
||||
tz = EnvironmentSettings.TZ
|
||||
to_update = CrontabSchedule.objects.exclude(timezone=tz)
|
||||
|
||||
if not to_update.exists():
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(" all schedules have correct TZ")
|
||||
)
|
||||
return
|
||||
|
||||
updated = to_update.update(timezone=tz)
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(f" ✓ updated {updated} schedules to {tz}.")
|
||||
)
|
||||
PeriodicTasks.update_changed()
|
@ -17,8 +17,7 @@ from pathlib import Path
|
||||
import ldap
|
||||
from corsheaders.defaults import default_headers
|
||||
from django_auth_ldap.config import LDAPSearch
|
||||
from home.src.ta.helper import ta_host_parser
|
||||
from home.src.ta.settings import EnvironmentSettings
|
||||
from home.src.ta.config import AppConfig
|
||||
|
||||
# Build paths inside the project like this: BASE_DIR / 'subdir'.
|
||||
BASE_DIR = Path(__file__).resolve().parent.parent
|
||||
@ -27,20 +26,25 @@ BASE_DIR = Path(__file__).resolve().parent.parent
|
||||
# Quick-start development settings - unsuitable for production
|
||||
# See https://docs.djangoproject.com/en/3.2/howto/deployment/checklist/
|
||||
|
||||
PW_HASH = hashlib.sha256(EnvironmentSettings.TA_PASSWORD.encode())
|
||||
PW_HASH = hashlib.sha256(environ.get("TA_PASSWORD").encode())
|
||||
SECRET_KEY = PW_HASH.hexdigest()
|
||||
|
||||
# SECURITY WARNING: don't run with debug turned on in production!
|
||||
DEBUG = bool(environ.get("DJANGO_DEBUG"))
|
||||
|
||||
ALLOWED_HOSTS, CSRF_TRUSTED_ORIGINS = ta_host_parser(
|
||||
environ.get("TA_HOST", "localhost")
|
||||
)
|
||||
ALLOWED_HOSTS = [i.strip() for i in environ.get("TA_HOST").split()]
|
||||
|
||||
CSRF_TRUSTED_ORIGINS = []
|
||||
for host in ALLOWED_HOSTS:
|
||||
if host.startswith("http://") or host.startswith("https://"):
|
||||
CSRF_TRUSTED_ORIGINS.append(host)
|
||||
else:
|
||||
CSRF_TRUSTED_ORIGINS.append(f"http://{host}")
|
||||
|
||||
|
||||
# Application definition
|
||||
|
||||
INSTALLED_APPS = [
|
||||
"django_celery_beat",
|
||||
"home.apps.HomeConfig",
|
||||
"django.contrib.admin",
|
||||
"django.contrib.auth",
|
||||
@ -54,7 +58,6 @@ INSTALLED_APPS = [
|
||||
"rest_framework",
|
||||
"rest_framework.authtoken",
|
||||
"api",
|
||||
"config",
|
||||
]
|
||||
|
||||
MIDDLEWARE = [
|
||||
@ -67,7 +70,6 @@ MIDDLEWARE = [
|
||||
"django.contrib.auth.middleware.AuthenticationMiddleware",
|
||||
"django.contrib.messages.middleware.MessageMiddleware",
|
||||
"django.middleware.clickjacking.XFrameOptionsMiddleware",
|
||||
"home.src.ta.health.HealthCheckMiddleware",
|
||||
]
|
||||
|
||||
ROOT_URLCONF = "config.urls"
|
||||
@ -101,75 +103,20 @@ if bool(environ.get("TA_LDAP")):
|
||||
global AUTH_LDAP_BIND_PASSWORD
|
||||
AUTH_LDAP_BIND_PASSWORD = environ.get("TA_LDAP_BIND_PASSWORD")
|
||||
|
||||
"""
|
||||
Since these are new environment variables, taking the opporunity to use
|
||||
more accurate env names.
|
||||
Given Names are *_technically_* different from Personal names, as people
|
||||
who change their names have different given names and personal names,
|
||||
and they go by personal names. Additionally, "LastName" is actually
|
||||
incorrect for many cultures, such as Korea, where the
|
||||
family name comes first, and the personal name comes last.
|
||||
|
||||
But we all know people are going to try to guess at these, so still want
|
||||
to include names that people will guess, hence using first/last as well.
|
||||
"""
|
||||
|
||||
# Attribute mapping options
|
||||
|
||||
global AUTH_LDAP_USER_ATTR_MAP_USERNAME
|
||||
AUTH_LDAP_USER_ATTR_MAP_USERNAME = (
|
||||
environ.get("TA_LDAP_USER_ATTR_MAP_USERNAME")
|
||||
or environ.get("TA_LDAP_USER_ATTR_MAP_UID")
|
||||
or "uid"
|
||||
)
|
||||
|
||||
global AUTH_LDAP_USER_ATTR_MAP_PERSONALNAME
|
||||
AUTH_LDAP_USER_ATTR_MAP_PERSONALNAME = (
|
||||
environ.get("TA_LDAP_USER_ATTR_MAP_PERSONALNAME")
|
||||
or environ.get("TA_LDAP_USER_ATTR_MAP_FIRSTNAME")
|
||||
or environ.get("TA_LDAP_USER_ATTR_MAP_GIVENNAME")
|
||||
or "givenName"
|
||||
)
|
||||
|
||||
global AUTH_LDAP_USER_ATTR_MAP_SURNAME
|
||||
AUTH_LDAP_USER_ATTR_MAP_SURNAME = (
|
||||
environ.get("TA_LDAP_USER_ATTR_MAP_SURNAME")
|
||||
or environ.get("TA_LDAP_USER_ATTR_MAP_LASTNAME")
|
||||
or environ.get("TA_LDAP_USER_ATTR_MAP_FAMILYNAME")
|
||||
or "sn"
|
||||
)
|
||||
|
||||
global AUTH_LDAP_USER_ATTR_MAP_EMAIL
|
||||
AUTH_LDAP_USER_ATTR_MAP_EMAIL = (
|
||||
environ.get("TA_LDAP_USER_ATTR_MAP_EMAIL")
|
||||
or environ.get("TA_LDAP_USER_ATTR_MAP_MAIL")
|
||||
or "mail"
|
||||
)
|
||||
|
||||
global AUTH_LDAP_USER_BASE
|
||||
AUTH_LDAP_USER_BASE = environ.get("TA_LDAP_USER_BASE")
|
||||
|
||||
global AUTH_LDAP_USER_FILTER
|
||||
AUTH_LDAP_USER_FILTER = environ.get("TA_LDAP_USER_FILTER")
|
||||
|
||||
global AUTH_LDAP_USER_SEARCH
|
||||
# pylint: disable=no-member
|
||||
AUTH_LDAP_USER_SEARCH = LDAPSearch(
|
||||
AUTH_LDAP_USER_BASE,
|
||||
environ.get("TA_LDAP_USER_BASE"),
|
||||
ldap.SCOPE_SUBTREE,
|
||||
"(&("
|
||||
+ AUTH_LDAP_USER_ATTR_MAP_USERNAME
|
||||
+ "=%(user)s)"
|
||||
+ AUTH_LDAP_USER_FILTER
|
||||
+ ")",
|
||||
"(&(uid=%(user)s)" + environ.get("TA_LDAP_USER_FILTER") + ")",
|
||||
)
|
||||
|
||||
global AUTH_LDAP_USER_ATTR_MAP
|
||||
AUTH_LDAP_USER_ATTR_MAP = {
|
||||
"username": AUTH_LDAP_USER_ATTR_MAP_USERNAME,
|
||||
"first_name": AUTH_LDAP_USER_ATTR_MAP_PERSONALNAME,
|
||||
"last_name": AUTH_LDAP_USER_ATTR_MAP_SURNAME,
|
||||
"email": AUTH_LDAP_USER_ATTR_MAP_EMAIL,
|
||||
"username": "uid",
|
||||
"first_name": "givenName",
|
||||
"last_name": "sn",
|
||||
"email": "mail",
|
||||
}
|
||||
|
||||
if bool(environ.get("TA_LDAP_DISABLE_CERT_CHECK")):
|
||||
@ -178,12 +125,13 @@ if bool(environ.get("TA_LDAP")):
|
||||
ldap.OPT_X_TLS_REQUIRE_CERT: ldap.OPT_X_TLS_NEVER,
|
||||
}
|
||||
|
||||
global AUTHENTICATION_BACKENDS
|
||||
AUTHENTICATION_BACKENDS = ("django_auth_ldap.backend.LDAPBackend",)
|
||||
|
||||
# Database
|
||||
# https://docs.djangoproject.com/en/3.2/ref/settings/#databases
|
||||
|
||||
CACHE_DIR = EnvironmentSettings.CACHE_DIR
|
||||
CACHE_DIR = AppConfig().config["application"]["cache_dir"]
|
||||
DB_PATH = path.join(CACHE_DIR, "db.sqlite3")
|
||||
DATABASES = {
|
||||
"default": {
|
||||
@ -213,25 +161,12 @@ AUTH_PASSWORD_VALIDATORS = [
|
||||
|
||||
AUTH_USER_MODEL = "home.Account"
|
||||
|
||||
# Forward-auth authentication
|
||||
if bool(environ.get("TA_ENABLE_AUTH_PROXY")):
|
||||
TA_AUTH_PROXY_USERNAME_HEADER = (
|
||||
environ.get("TA_AUTH_PROXY_USERNAME_HEADER") or "HTTP_REMOTE_USER"
|
||||
)
|
||||
TA_AUTH_PROXY_LOGOUT_URL = environ.get("TA_AUTH_PROXY_LOGOUT_URL")
|
||||
|
||||
MIDDLEWARE.append("home.src.ta.auth.HttpRemoteUserMiddleware")
|
||||
|
||||
AUTHENTICATION_BACKENDS = (
|
||||
"django.contrib.auth.backends.RemoteUserBackend",
|
||||
)
|
||||
|
||||
|
||||
# Internationalization
|
||||
# https://docs.djangoproject.com/en/3.2/topics/i18n/
|
||||
|
||||
LANGUAGE_CODE = "en-us"
|
||||
TIME_ZONE = EnvironmentSettings.TZ
|
||||
TIME_ZONE = environ.get("TZ") or "UTC"
|
||||
USE_I18N = True
|
||||
USE_L10N = True
|
||||
USE_TZ = True
|
||||
@ -243,11 +178,7 @@ USE_TZ = True
|
||||
STATIC_URL = "/static/"
|
||||
STATICFILES_DIRS = (str(BASE_DIR.joinpath("static")),)
|
||||
STATIC_ROOT = str(BASE_DIR.joinpath("staticfiles"))
|
||||
STORAGES = {
|
||||
"staticfiles": {
|
||||
"BACKEND": "whitenoise.storage.CompressedManifestStaticFilesStorage",
|
||||
},
|
||||
}
|
||||
STATICFILES_STORAGE = "whitenoise.storage.CompressedManifestStaticFilesStorage"
|
||||
|
||||
# Default primary key field type
|
||||
# https://docs.djangoproject.com/en/3.2/ref/settings/#default-auto-field
|
||||
@ -276,4 +207,4 @@ CORS_ALLOW_HEADERS = list(default_headers) + [
|
||||
|
||||
# TA application settings
|
||||
TA_UPSTREAM = "https://github.com/tubearchivist/tubearchivist"
|
||||
TA_VERSION = "v0.4.11-unstable"
|
||||
TA_VERSION = "v0.2.3"
|
||||
|
@ -13,7 +13,6 @@ Including another URLconf
|
||||
1. Import the include() function: from django.urls import include, path
|
||||
2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
|
||||
"""
|
||||
|
||||
from django.contrib import admin
|
||||
from django.urls import include, path
|
||||
|
||||
|
@ -1,7 +1,5 @@
|
||||
"""start celery app"""
|
||||
""" handle celery startup """
|
||||
|
||||
from __future__ import absolute_import, unicode_literals
|
||||
|
||||
from home.celery import app as celery_app
|
||||
from .tasks import app as celery_app
|
||||
|
||||
__all__ = ("celery_app",)
|
||||
|
@ -2,7 +2,6 @@
|
||||
|
||||
from django.contrib import admin
|
||||
from django.contrib.auth.admin import UserAdmin as BaseUserAdmin
|
||||
from django_celery_beat import models as BeatModels
|
||||
|
||||
from .models import Account
|
||||
|
||||
@ -35,12 +34,3 @@ class HomeAdmin(BaseUserAdmin):
|
||||
|
||||
|
||||
admin.site.register(Account, HomeAdmin)
|
||||
admin.site.unregister(
|
||||
[
|
||||
BeatModels.ClockedSchedule,
|
||||
BeatModels.CrontabSchedule,
|
||||
BeatModels.IntervalSchedule,
|
||||
BeatModels.PeriodicTask,
|
||||
BeatModels.SolarSchedule,
|
||||
]
|
||||
)
|
||||
|
@ -1,6 +1,114 @@
|
||||
"""handle custom startup functions"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
from django.apps import AppConfig
|
||||
from home.src.es.connect import ElasticWrap
|
||||
from home.src.es.index_setup import index_check
|
||||
from home.src.ta.config import AppConfig as ArchivistConfig
|
||||
from home.src.ta.ta_redis import RedisArchivist
|
||||
|
||||
|
||||
class StartupCheck:
|
||||
"""checks to run at application startup"""
|
||||
|
||||
MIN_MAJOR, MAX_MAJOR = 8, 8
|
||||
MIN_MINOR = 0
|
||||
|
||||
def __init__(self):
|
||||
self.config_handler = ArchivistConfig()
|
||||
self.redis_con = RedisArchivist()
|
||||
self.has_run = self.get_has_run()
|
||||
|
||||
def run(self):
|
||||
"""run all startup checks"""
|
||||
print("run startup checks")
|
||||
self.es_version_check()
|
||||
self.release_lock()
|
||||
index_check()
|
||||
self.sync_redis_state()
|
||||
self.set_redis_conf()
|
||||
self.make_folders()
|
||||
self.set_has_run()
|
||||
|
||||
def get_has_run(self):
|
||||
"""validate if check has already executed"""
|
||||
return self.redis_con.get_message("startup_check")
|
||||
|
||||
def set_has_run(self):
|
||||
"""startup checks run"""
|
||||
message = {"status": True}
|
||||
self.redis_con.set_message("startup_check", message, expire=120)
|
||||
|
||||
def sync_redis_state(self):
|
||||
"""make sure redis gets new config.json values"""
|
||||
print("sync redis")
|
||||
self.config_handler.load_new_defaults()
|
||||
|
||||
def set_redis_conf(self):
|
||||
"""set conf values for redis"""
|
||||
self.redis_con.conn.config_set("timeout", 3600)
|
||||
|
||||
def make_folders(self):
|
||||
"""make needed cache folders here so docker doesn't mess it up"""
|
||||
folders = [
|
||||
"download",
|
||||
"channels",
|
||||
"videos",
|
||||
"playlists",
|
||||
"import",
|
||||
"backup",
|
||||
]
|
||||
cache_dir = self.config_handler.config["application"]["cache_dir"]
|
||||
for folder in folders:
|
||||
folder_path = os.path.join(cache_dir, folder)
|
||||
try:
|
||||
os.makedirs(folder_path)
|
||||
except FileExistsError:
|
||||
continue
|
||||
|
||||
def release_lock(self):
|
||||
"""make sure there are no leftover locks set in redis"""
|
||||
all_locks = [
|
||||
"startup_check",
|
||||
"manual_import",
|
||||
"downloading",
|
||||
"dl_queue",
|
||||
"dl_queue_id",
|
||||
"rescan",
|
||||
"run_backup",
|
||||
]
|
||||
for lock in all_locks:
|
||||
response = self.redis_con.del_message(lock)
|
||||
if response:
|
||||
print("deleted leftover key from redis: " + lock)
|
||||
|
||||
def is_invalid(self, version):
|
||||
"""return true if es version is invalid, false if ok"""
|
||||
major, minor = [int(i) for i in version.split(".")[:2]]
|
||||
if not self.MIN_MAJOR <= major <= self.MAX_MAJOR:
|
||||
return True
|
||||
|
||||
if minor >= self.MIN_MINOR:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def es_version_check(self):
|
||||
"""check for minimal elasticsearch version"""
|
||||
response, _ = ElasticWrap("/").get()
|
||||
version = response["version"]["number"]
|
||||
invalid = self.is_invalid(version)
|
||||
|
||||
if invalid:
|
||||
print(
|
||||
"required elasticsearch version: "
|
||||
+ f"{self.MIN_MAJOR}.{self.MIN_MINOR}"
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
print("elasticsearch version check passed")
|
||||
|
||||
|
||||
class HomeConfig(AppConfig):
|
||||
@ -8,3 +116,11 @@ class HomeConfig(AppConfig):
|
||||
|
||||
default_auto_field = "django.db.models.BigAutoField"
|
||||
name = "home"
|
||||
|
||||
def ready(self):
|
||||
startup = StartupCheck()
|
||||
if startup.has_run["status"]:
|
||||
print("startup checks run in other thread")
|
||||
return
|
||||
|
||||
startup.run()
|
||||
|
@ -1,22 +0,0 @@
|
||||
"""initiate celery"""
|
||||
|
||||
import os
|
||||
|
||||
from celery import Celery
|
||||
from home.src.ta.settings import EnvironmentSettings
|
||||
|
||||
REDIS_HOST = EnvironmentSettings.REDIS_HOST
|
||||
REDIS_PORT = EnvironmentSettings.REDIS_PORT
|
||||
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings")
|
||||
app = Celery(
|
||||
"tasks",
|
||||
broker=f"redis://{REDIS_HOST}:{REDIS_PORT}",
|
||||
backend=f"redis://{REDIS_HOST}:{REDIS_PORT}",
|
||||
result_extended=True,
|
||||
)
|
||||
app.config_from_object(
|
||||
"django.conf:settings", namespace=EnvironmentSettings.REDIS_NAME_SPACE
|
||||
)
|
||||
app.autodiscover_tasks()
|
||||
app.conf.timezone = EnvironmentSettings.TZ
|
@ -1,30 +1,51 @@
|
||||
{
|
||||
"archive": {
|
||||
"sort_by": "published",
|
||||
"sort_order": "desc",
|
||||
"page_size": 12
|
||||
},
|
||||
"default_view": {
|
||||
"home": "grid",
|
||||
"channel": "list",
|
||||
"downloads": "list",
|
||||
"playlist": "grid",
|
||||
"grid_items": 3
|
||||
},
|
||||
"subscriptions": {
|
||||
"channel_size": 50,
|
||||
"live_channel_size": 50,
|
||||
"shorts_channel_size": 50,
|
||||
"auto_start": false
|
||||
"auto_search": false,
|
||||
"auto_download": false,
|
||||
"channel_size": 50
|
||||
},
|
||||
"downloads": {
|
||||
"limit_count": false,
|
||||
"limit_speed": false,
|
||||
"sleep_interval": 3,
|
||||
"autodelete_days": false,
|
||||
"format": false,
|
||||
"format_sort": false,
|
||||
"add_metadata": false,
|
||||
"add_thumbnail": false,
|
||||
"subtitle": false,
|
||||
"subtitle_source": false,
|
||||
"subtitle_index": false,
|
||||
"comment_max": false,
|
||||
"comment_sort": "top",
|
||||
"cookie_import": false,
|
||||
"throttledratelimit": false,
|
||||
"extractor_lang": false,
|
||||
"integrate_ryd": false,
|
||||
"integrate_sponsorblock": false
|
||||
},
|
||||
"application": {
|
||||
"enable_snapshot": true
|
||||
"app_root": "/app",
|
||||
"cache_dir": "/cache",
|
||||
"videos": "/youtube",
|
||||
"colors": "dark",
|
||||
"enable_cast": false
|
||||
},
|
||||
"scheduler": {
|
||||
"update_subscribed": false,
|
||||
"download_pending": false,
|
||||
"check_reindex": {"minute": "0", "hour": "12", "day_of_week": "*"},
|
||||
"check_reindex_days": 90,
|
||||
"thumbnail_check": {"minute": "0", "hour": "17", "day_of_week": "*"},
|
||||
"run_backup": {"minute": "0", "hour": "8", "day_of_week": "0"},
|
||||
"run_backup_rotate": 5
|
||||
}
|
||||
}
|
||||
|
@ -1,35 +0,0 @@
|
||||
# Generated by Django 4.1.5 on 2023-02-02 06:49
|
||||
|
||||
from django.db import migrations, models
|
||||
import home.models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
initial = True
|
||||
|
||||
dependencies = [
|
||||
('auth', '0012_alter_user_first_name_max_length'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='Account',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('password', models.CharField(max_length=128, verbose_name='password')),
|
||||
('last_login', models.DateTimeField(blank=True, null=True, verbose_name='last login')),
|
||||
('is_superuser', models.BooleanField(default=False, help_text='Designates that this user has all permissions without explicitly assigning them.', verbose_name='superuser status')),
|
||||
('name', models.CharField(max_length=150, unique=True)),
|
||||
('is_staff', models.BooleanField(default=False)),
|
||||
('groups', models.ManyToManyField(blank=True, help_text='The groups this user belongs to. A user will get all permissions granted to each of their groups.', related_name='user_set', related_query_name='user', to='auth.group', verbose_name='groups')),
|
||||
('user_permissions', models.ManyToManyField(blank=True, help_text='Specific permissions for this user.', related_name='user_set', related_query_name='user', to='auth.permission', verbose_name='user permissions')),
|
||||
],
|
||||
options={
|
||||
'abstract': False,
|
||||
},
|
||||
managers=[
|
||||
('objects', home.models.AccountManager()),
|
||||
],
|
||||
),
|
||||
]
|
@ -1,23 +0,0 @@
|
||||
# Generated by Django 4.2.7 on 2023-12-05 13:47
|
||||
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('django_celery_beat', '0018_improve_crontab_helptext'),
|
||||
('home', '0001_initial'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='CustomPeriodicTask',
|
||||
fields=[
|
||||
('periodictask_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='django_celery_beat.periodictask')),
|
||||
('task_config', models.JSONField(default=dict)),
|
||||
],
|
||||
bases=('django_celery_beat.periodictask',),
|
||||
),
|
||||
]
|
@ -1,12 +1,10 @@
|
||||
"""custom models"""
|
||||
|
||||
from django.contrib.auth.models import (
|
||||
AbstractBaseUser,
|
||||
BaseUserManager,
|
||||
PermissionsMixin,
|
||||
)
|
||||
from django.db import models
|
||||
from django_celery_beat.models import PeriodicTask
|
||||
|
||||
|
||||
class AccountManager(BaseUserManager):
|
||||
@ -53,9 +51,3 @@ class Account(AbstractBaseUser, PermissionsMixin):
|
||||
|
||||
USERNAME_FIELD = "name"
|
||||
REQUIRED_FIELDS = ["password"]
|
||||
|
||||
|
||||
class CustomPeriodicTask(PeriodicTask):
|
||||
"""add custom metadata to to task"""
|
||||
|
||||
task_config = models.JSONField(default=dict)
|
||||
|
@ -7,14 +7,17 @@ Functionality:
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
from home.src.download.subscriptions import ChannelSubscription
|
||||
from home.src.download.subscriptions import (
|
||||
ChannelSubscription,
|
||||
PlaylistSubscription,
|
||||
)
|
||||
from home.src.download.thumbnails import ThumbManager
|
||||
from home.src.download.yt_dlp_base import YtWrap
|
||||
from home.src.es.connect import ElasticWrap, IndexPaginate
|
||||
from home.src.index.playlist import YoutubePlaylist
|
||||
from home.src.index.video_constants import VideoTypeEnum
|
||||
from home.src.ta.config import AppConfig
|
||||
from home.src.ta.helper import get_duration_str, is_shorts
|
||||
from home.src.ta.helper import DurationConverter
|
||||
from home.src.ta.ta_redis import RedisArchivist
|
||||
|
||||
|
||||
class PendingIndex:
|
||||
@ -92,13 +95,13 @@ class PendingIndex:
|
||||
class PendingInteract:
|
||||
"""interact with items in download queue"""
|
||||
|
||||
def __init__(self, youtube_id=False, status=False):
|
||||
self.youtube_id = youtube_id
|
||||
def __init__(self, video_id=False, status=False):
|
||||
self.video_id = video_id
|
||||
self.status = status
|
||||
|
||||
def delete_item(self):
|
||||
"""delete single item from pending"""
|
||||
path = f"ta_download/_doc/{self.youtube_id}"
|
||||
path = f"ta_download/_doc/{self.video_id}"
|
||||
_, _ = ElasticWrap(path).delete(refresh=True)
|
||||
|
||||
def delete_by_status(self):
|
||||
@ -108,63 +111,29 @@ class PendingInteract:
|
||||
_, _ = ElasticWrap(path).post(data=data)
|
||||
|
||||
def update_status(self):
|
||||
"""update status of pending item"""
|
||||
if self.status == "priority":
|
||||
data = {
|
||||
"doc": {
|
||||
"status": "pending",
|
||||
"auto_start": True,
|
||||
"message": None,
|
||||
}
|
||||
}
|
||||
else:
|
||||
data = {"doc": {"status": self.status}}
|
||||
|
||||
path = f"ta_download/_update/{self.youtube_id}/?refresh=true"
|
||||
"""update status field of pending item"""
|
||||
data = {"doc": {"status": self.status}}
|
||||
path = f"ta_download/_update/{self.video_id}"
|
||||
_, _ = ElasticWrap(path).post(data=data)
|
||||
|
||||
def get_item(self):
|
||||
"""return pending item dict"""
|
||||
path = f"ta_download/_doc/{self.youtube_id}"
|
||||
response, status_code = ElasticWrap(path).get()
|
||||
return response["_source"], status_code
|
||||
|
||||
def get_channel(self):
|
||||
"""
|
||||
get channel metadata from queue to not depend on channel to be indexed
|
||||
"""
|
||||
data = {
|
||||
"size": 1,
|
||||
"query": {"term": {"channel_id": {"value": self.youtube_id}}},
|
||||
}
|
||||
response, _ = ElasticWrap("ta_download/_search").get(data=data)
|
||||
hits = response["hits"]["hits"]
|
||||
if not hits:
|
||||
channel_name = "NA"
|
||||
else:
|
||||
channel_name = hits[0]["_source"].get("channel_name", "NA")
|
||||
|
||||
return {
|
||||
"channel_id": self.youtube_id,
|
||||
"channel_name": channel_name,
|
||||
}
|
||||
|
||||
|
||||
class PendingList(PendingIndex):
|
||||
"""manage the pending videos list"""
|
||||
|
||||
yt_obs = {
|
||||
"default_search": "ytsearch",
|
||||
"quiet": True,
|
||||
"check_formats": "selected",
|
||||
"noplaylist": True,
|
||||
"writethumbnail": True,
|
||||
"simulate": True,
|
||||
"check_formats": None,
|
||||
"socket_timeout": 3,
|
||||
}
|
||||
|
||||
def __init__(self, youtube_ids=False, task=False):
|
||||
def __init__(self, youtube_ids=False):
|
||||
super().__init__()
|
||||
self.config = AppConfig().config
|
||||
self.youtube_ids = youtube_ids
|
||||
self.task = task
|
||||
self.to_skip = False
|
||||
self.missing_videos = False
|
||||
|
||||
@ -173,135 +142,100 @@ class PendingList(PendingIndex):
|
||||
self.missing_videos = []
|
||||
self.get_download()
|
||||
self.get_indexed()
|
||||
total = len(self.youtube_ids)
|
||||
for idx, entry in enumerate(self.youtube_ids):
|
||||
for entry in self.youtube_ids:
|
||||
# notify
|
||||
mess_dict = {
|
||||
"status": "message:add",
|
||||
"level": "info",
|
||||
"title": "Adding to download queue.",
|
||||
"message": "Extracting lists",
|
||||
}
|
||||
RedisArchivist().set_message("message:add", mess_dict, expire=True)
|
||||
self._process_entry(entry)
|
||||
if not self.task:
|
||||
continue
|
||||
|
||||
self.task.send_progress(
|
||||
message_lines=[f"Extracting items {idx + 1}/{total}"],
|
||||
progress=(idx + 1) / total,
|
||||
)
|
||||
|
||||
def _process_entry(self, entry):
|
||||
"""process single entry from url list"""
|
||||
vid_type = self._get_vid_type(entry)
|
||||
if entry["type"] == "video":
|
||||
self._add_video(entry["url"], vid_type)
|
||||
self._add_video(entry["url"])
|
||||
elif entry["type"] == "channel":
|
||||
self._parse_channel(entry["url"], vid_type)
|
||||
self._parse_channel(entry["url"])
|
||||
elif entry["type"] == "playlist":
|
||||
self._parse_playlist(entry["url"])
|
||||
PlaylistSubscription().process_url_str([entry], subscribed=False)
|
||||
else:
|
||||
raise ValueError(f"invalid url_type: {entry}")
|
||||
|
||||
@staticmethod
|
||||
def _get_vid_type(entry):
|
||||
"""add vid type enum if available"""
|
||||
vid_type_str = entry.get("vid_type")
|
||||
if not vid_type_str:
|
||||
return VideoTypeEnum.UNKNOWN
|
||||
|
||||
return VideoTypeEnum(vid_type_str)
|
||||
|
||||
def _add_video(self, url, vid_type):
|
||||
def _add_video(self, url):
|
||||
"""add video to list"""
|
||||
if url not in self.missing_videos and url not in self.to_skip:
|
||||
self.missing_videos.append((url, vid_type))
|
||||
self.missing_videos.append(url)
|
||||
else:
|
||||
print(f"{url}: skipped adding already indexed video to download.")
|
||||
|
||||
def _parse_channel(self, url, vid_type):
|
||||
def _parse_channel(self, url):
|
||||
"""add all videos of channel to list"""
|
||||
video_results = ChannelSubscription().get_last_youtube_videos(
|
||||
url, limit=False, query_filter=vid_type
|
||||
url, limit=False
|
||||
)
|
||||
for video_id, _, vid_type in video_results:
|
||||
self._add_video(video_id, vid_type)
|
||||
youtube_ids = [i[0] for i in video_results]
|
||||
for video_id in youtube_ids:
|
||||
self._add_video(video_id)
|
||||
|
||||
def _parse_playlist(self, url):
|
||||
"""add all videos of playlist to list"""
|
||||
playlist = YoutubePlaylist(url)
|
||||
is_active = playlist.update_playlist()
|
||||
if not is_active:
|
||||
message = f"{playlist.youtube_id}: failed to extract metadata"
|
||||
print(message)
|
||||
raise ValueError(message)
|
||||
playlist.build_json()
|
||||
video_results = playlist.json_data.get("playlist_entries")
|
||||
youtube_ids = [i["youtube_id"] for i in video_results]
|
||||
for video_id in youtube_ids:
|
||||
self._add_video(video_id)
|
||||
|
||||
entries = playlist.json_data["playlist_entries"]
|
||||
to_add = [i["youtube_id"] for i in entries if not i["downloaded"]]
|
||||
if not to_add:
|
||||
return
|
||||
|
||||
for video_id in to_add:
|
||||
# match vid_type later
|
||||
self._add_video(video_id, VideoTypeEnum.UNKNOWN)
|
||||
|
||||
def add_to_pending(self, status="pending", auto_start=False):
|
||||
def add_to_pending(self, status="pending"):
|
||||
"""add missing videos to pending list"""
|
||||
self.get_channels()
|
||||
bulk_list = []
|
||||
|
||||
total = len(self.missing_videos)
|
||||
videos_added = []
|
||||
for idx, (youtube_id, vid_type) in enumerate(self.missing_videos):
|
||||
if self.task and self.task.is_stopped():
|
||||
break
|
||||
|
||||
print(f"{youtube_id}: [{idx + 1}/{total}]: add to queue")
|
||||
self._notify_add(idx, total)
|
||||
video_details = self.get_youtube_details(youtube_id, vid_type)
|
||||
for idx, youtube_id in enumerate(self.missing_videos):
|
||||
print(f"{youtube_id}: add to download queue")
|
||||
video_details = self.get_youtube_details(youtube_id)
|
||||
if not video_details:
|
||||
continue
|
||||
|
||||
video_details.update(
|
||||
{
|
||||
"status": status,
|
||||
"auto_start": auto_start,
|
||||
}
|
||||
)
|
||||
|
||||
video_details["status"] = status
|
||||
action = {"create": {"_id": youtube_id, "_index": "ta_download"}}
|
||||
bulk_list.append(json.dumps(action))
|
||||
bulk_list.append(json.dumps(video_details))
|
||||
|
||||
url = video_details["vid_thumb_url"]
|
||||
ThumbManager(youtube_id).download_video_thumb(url)
|
||||
videos_added.append(youtube_id)
|
||||
|
||||
if len(bulk_list) >= 20:
|
||||
self._ingest_bulk(bulk_list)
|
||||
bulk_list = []
|
||||
self._notify_add(idx)
|
||||
|
||||
self._ingest_bulk(bulk_list)
|
||||
if bulk_list:
|
||||
# add last newline
|
||||
bulk_list.append("\n")
|
||||
query_str = "\n".join(bulk_list)
|
||||
_, _ = ElasticWrap("_bulk").post(query_str, ndjson=True)
|
||||
|
||||
return videos_added
|
||||
|
||||
def _ingest_bulk(self, bulk_list):
|
||||
"""add items to queue in bulk"""
|
||||
if not bulk_list:
|
||||
return
|
||||
|
||||
# add last newline
|
||||
bulk_list.append("\n")
|
||||
query_str = "\n".join(bulk_list)
|
||||
_, _ = ElasticWrap("_bulk?refresh=true").post(query_str, ndjson=True)
|
||||
|
||||
def _notify_add(self, idx, total):
|
||||
def _notify_add(self, idx):
|
||||
"""send notification for adding videos to download queue"""
|
||||
if not self.task:
|
||||
return
|
||||
progress = f"{idx + 1}/{len(self.missing_videos)}"
|
||||
mess_dict = {
|
||||
"status": "message:add",
|
||||
"level": "info",
|
||||
"title": "Adding new videos to download queue.",
|
||||
"message": "Progress: " + progress,
|
||||
}
|
||||
if idx + 1 == len(self.missing_videos):
|
||||
expire = 4
|
||||
else:
|
||||
expire = True
|
||||
|
||||
self.task.send_progress(
|
||||
message_lines=[
|
||||
"Adding new videos to download queue.",
|
||||
f"Extracting items {idx + 1}/{total}",
|
||||
],
|
||||
progress=(idx + 1) / total,
|
||||
)
|
||||
RedisArchivist().set_message("message:add", mess_dict, expire=expire)
|
||||
if idx + 1 % 25 == 0:
|
||||
print("adding to queue progress: " + progress)
|
||||
|
||||
def get_youtube_details(self, youtube_id, vid_type=VideoTypeEnum.VIDEOS):
|
||||
def get_youtube_details(self, youtube_id):
|
||||
"""get details from youtubedl for single pending video"""
|
||||
vid = YtWrap(self.yt_obs, self.config).extract(youtube_id)
|
||||
if not vid:
|
||||
@ -313,39 +247,16 @@ class PendingList(PendingIndex):
|
||||
return False
|
||||
# stop if video is streaming live now
|
||||
if vid["live_status"] in ["is_upcoming", "is_live"]:
|
||||
print(f"{youtube_id}: skip is_upcoming or is_live")
|
||||
return False
|
||||
|
||||
if vid["live_status"] == "was_live":
|
||||
vid_type = VideoTypeEnum.STREAMS
|
||||
else:
|
||||
if self._check_shorts(vid):
|
||||
vid_type = VideoTypeEnum.SHORTS
|
||||
else:
|
||||
vid_type = VideoTypeEnum.VIDEOS
|
||||
return self._parse_youtube_details(vid)
|
||||
|
||||
if not vid.get("channel"):
|
||||
print(f"{youtube_id}: skip video not part of channel")
|
||||
return False
|
||||
|
||||
return self._parse_youtube_details(vid, vid_type)
|
||||
|
||||
@staticmethod
|
||||
def _check_shorts(vid):
|
||||
"""check if vid is shorts video"""
|
||||
if vid["width"] > vid["height"]:
|
||||
return False
|
||||
|
||||
duration = vid.get("duration")
|
||||
if duration and isinstance(duration, int):
|
||||
if duration > 60:
|
||||
return False
|
||||
|
||||
return is_shorts(vid["id"])
|
||||
|
||||
def _parse_youtube_details(self, vid, vid_type=VideoTypeEnum.VIDEOS):
|
||||
def _parse_youtube_details(self, vid):
|
||||
"""parse response"""
|
||||
vid_id = vid.get("id")
|
||||
duration_str = DurationConverter.get_str(vid["duration"])
|
||||
if duration_str == "NA":
|
||||
print(f"skip extracting duration for: {vid_id}")
|
||||
published = datetime.strptime(vid["upload_date"], "%Y%m%d").strftime(
|
||||
"%Y-%m-%d"
|
||||
)
|
||||
@ -357,11 +268,9 @@ class PendingList(PendingIndex):
|
||||
"vid_thumb_url": vid["thumbnail"],
|
||||
"title": vid["title"],
|
||||
"channel_id": vid["channel_id"],
|
||||
"duration": get_duration_str(vid["duration"]),
|
||||
"duration": duration_str,
|
||||
"published": published,
|
||||
"timestamp": int(datetime.now().timestamp()),
|
||||
# Pulling enum value out so it is serializable
|
||||
"vid_type": vid_type.value,
|
||||
"timestamp": int(datetime.now().strftime("%s")),
|
||||
}
|
||||
if self.all_channels:
|
||||
youtube_details.update(
|
||||
|
@ -4,24 +4,21 @@ Functionality:
|
||||
- handle playlist subscriptions
|
||||
"""
|
||||
|
||||
from home.src.download import queue # partial import
|
||||
from home.src.download.thumbnails import ThumbManager
|
||||
from home.src.download.yt_dlp_base import YtWrap
|
||||
from home.src.es.connect import IndexPaginate
|
||||
from home.src.index.channel import YoutubeChannel
|
||||
from home.src.index.playlist import YoutubePlaylist
|
||||
from home.src.index.video import YoutubeVideo
|
||||
from home.src.index.video_constants import VideoTypeEnum
|
||||
from home.src.ta.config import AppConfig
|
||||
from home.src.ta.helper import is_missing
|
||||
from home.src.ta.urlparser import Parser
|
||||
from home.src.ta.ta_redis import RedisArchivist
|
||||
|
||||
|
||||
class ChannelSubscription:
|
||||
"""manage the list of channels subscribed"""
|
||||
|
||||
def __init__(self, task=False):
|
||||
def __init__(self):
|
||||
self.config = AppConfig().config
|
||||
self.task = task
|
||||
|
||||
@staticmethod
|
||||
def get_channels(subscribed_only=True):
|
||||
@ -38,75 +35,55 @@ class ChannelSubscription:
|
||||
|
||||
return all_channels
|
||||
|
||||
def get_last_youtube_videos(
|
||||
self,
|
||||
channel_id,
|
||||
limit=True,
|
||||
query_filter=None,
|
||||
channel_overwrites=None,
|
||||
):
|
||||
def get_last_youtube_videos(self, channel_id, limit=True):
|
||||
"""get a list of last videos from channel"""
|
||||
query_handler = VideoQueryBuilder(self.config, channel_overwrites)
|
||||
queries = query_handler.build_queries(query_filter)
|
||||
last_videos = []
|
||||
obs = {
|
||||
"skip_download": True,
|
||||
"extract_flat": True,
|
||||
}
|
||||
if limit:
|
||||
obs["playlistend"] = self.config["subscriptions"]["channel_size"]
|
||||
|
||||
for vid_type_enum, limit_amount in queries:
|
||||
obs = {
|
||||
"skip_download": True,
|
||||
"extract_flat": True,
|
||||
}
|
||||
vid_type = vid_type_enum.value
|
||||
|
||||
if limit:
|
||||
obs["playlistend"] = limit_amount
|
||||
|
||||
url = f"https://www.youtube.com/channel/{channel_id}/{vid_type}"
|
||||
channel_query = YtWrap(obs, self.config).extract(url)
|
||||
if not channel_query:
|
||||
continue
|
||||
|
||||
last_videos.extend(
|
||||
[
|
||||
(i["id"], i["title"], vid_type)
|
||||
for i in channel_query["entries"]
|
||||
]
|
||||
)
|
||||
url = f"https://www.youtube.com/channel/{channel_id}/videos"
|
||||
channel = YtWrap(obs, self.config).extract(url)
|
||||
if not channel:
|
||||
return False
|
||||
|
||||
last_videos = [(i["id"], i["title"]) for i in channel["entries"]]
|
||||
return last_videos
|
||||
|
||||
def find_missing(self):
|
||||
"""add missing videos from subscribed channels to pending"""
|
||||
all_channels = self.get_channels()
|
||||
if not all_channels:
|
||||
return False
|
||||
pending = queue.PendingList()
|
||||
pending.get_download()
|
||||
pending.get_indexed()
|
||||
|
||||
missing_videos = []
|
||||
|
||||
total = len(all_channels)
|
||||
for idx, channel in enumerate(all_channels):
|
||||
channel_id = channel["channel_id"]
|
||||
print(f"{channel_id}: find missing videos.")
|
||||
last_videos = self.get_last_youtube_videos(
|
||||
channel_id,
|
||||
channel_overwrites=channel.get("channel_overwrites"),
|
||||
)
|
||||
last_videos = self.get_last_youtube_videos(channel_id)
|
||||
|
||||
if last_videos:
|
||||
ids_to_add = is_missing([i[0] for i in last_videos])
|
||||
for video_id, _, vid_type in last_videos:
|
||||
if video_id in ids_to_add:
|
||||
missing_videos.append((video_id, vid_type))
|
||||
for video in last_videos:
|
||||
if video[0] not in pending.to_skip:
|
||||
missing_videos.append(video[0])
|
||||
# notify
|
||||
message = {
|
||||
"status": "message:rescan",
|
||||
"level": "info",
|
||||
"title": "Scanning channels: Looking for new videos.",
|
||||
"message": f"Progress: {idx + 1}/{len(all_channels)}",
|
||||
}
|
||||
if idx + 1 == len(all_channels):
|
||||
expire = 4
|
||||
else:
|
||||
expire = True
|
||||
|
||||
if not self.task:
|
||||
continue
|
||||
|
||||
if self.task.is_stopped():
|
||||
self.task.send_progress(["Received Stop signal."])
|
||||
break
|
||||
|
||||
self.task.send_progress(
|
||||
message_lines=[f"Scanning Channel {idx + 1}/{total}"],
|
||||
progress=(idx + 1) / total,
|
||||
RedisArchivist().set_message(
|
||||
"message:rescan", message=message, expire=expire
|
||||
)
|
||||
|
||||
return missing_videos
|
||||
@ -121,98 +98,11 @@ class ChannelSubscription:
|
||||
channel.sync_to_videos()
|
||||
|
||||
|
||||
class VideoQueryBuilder:
|
||||
"""Build queries for yt-dlp."""
|
||||
|
||||
def __init__(self, config: dict, channel_overwrites: dict | None = None):
|
||||
self.config = config
|
||||
self.channel_overwrites = channel_overwrites or {}
|
||||
|
||||
def build_queries(
|
||||
self, video_type: VideoTypeEnum | None, limit: bool = True
|
||||
) -> list[tuple[VideoTypeEnum, int | None]]:
|
||||
"""Build queries for all or specific video type."""
|
||||
query_methods = {
|
||||
VideoTypeEnum.VIDEOS: self.videos_query,
|
||||
VideoTypeEnum.STREAMS: self.streams_query,
|
||||
VideoTypeEnum.SHORTS: self.shorts_query,
|
||||
}
|
||||
|
||||
if video_type:
|
||||
# build query for specific type
|
||||
query_method = query_methods.get(video_type)
|
||||
if query_method:
|
||||
query = query_method(limit)
|
||||
if query[1] != 0:
|
||||
return [query]
|
||||
return []
|
||||
|
||||
# Build and return queries for all video types
|
||||
queries = []
|
||||
for build_query in query_methods.values():
|
||||
query = build_query(limit)
|
||||
if query[1] != 0:
|
||||
queries.append(query)
|
||||
|
||||
return queries
|
||||
|
||||
def videos_query(self, limit: bool) -> tuple[VideoTypeEnum, int | None]:
|
||||
"""Build query for videos."""
|
||||
return self._build_generic_query(
|
||||
video_type=VideoTypeEnum.VIDEOS,
|
||||
overwrite_key="subscriptions_channel_size",
|
||||
config_key="channel_size",
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
def streams_query(self, limit: bool) -> tuple[VideoTypeEnum, int | None]:
|
||||
"""Build query for streams."""
|
||||
return self._build_generic_query(
|
||||
video_type=VideoTypeEnum.STREAMS,
|
||||
overwrite_key="subscriptions_live_channel_size",
|
||||
config_key="live_channel_size",
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
def shorts_query(self, limit: bool) -> tuple[VideoTypeEnum, int | None]:
|
||||
"""Build query for shorts."""
|
||||
return self._build_generic_query(
|
||||
video_type=VideoTypeEnum.SHORTS,
|
||||
overwrite_key="subscriptions_shorts_channel_size",
|
||||
config_key="shorts_channel_size",
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
def _build_generic_query(
|
||||
self,
|
||||
video_type: VideoTypeEnum,
|
||||
overwrite_key: str,
|
||||
config_key: str,
|
||||
limit: bool,
|
||||
) -> tuple[VideoTypeEnum, int | None]:
|
||||
"""Generic query for video page scraping."""
|
||||
if not limit:
|
||||
return (video_type, None)
|
||||
|
||||
if (
|
||||
overwrite_key in self.channel_overwrites
|
||||
and self.channel_overwrites[overwrite_key] is not None
|
||||
):
|
||||
overwrite = self.channel_overwrites[overwrite_key]
|
||||
return (video_type, overwrite)
|
||||
|
||||
if overwrite := self.config["subscriptions"].get(config_key):
|
||||
return (video_type, overwrite)
|
||||
|
||||
return (video_type, 0)
|
||||
|
||||
|
||||
class PlaylistSubscription:
|
||||
"""manage the playlist download functionality"""
|
||||
|
||||
def __init__(self, task=False):
|
||||
def __init__(self):
|
||||
self.config = AppConfig().config
|
||||
self.task = task
|
||||
|
||||
@staticmethod
|
||||
def get_playlists(subscribed_only=True):
|
||||
@ -234,6 +124,13 @@ class PlaylistSubscription:
|
||||
|
||||
def process_url_str(self, new_playlists, subscribed=True):
|
||||
"""process playlist subscribe form url_str"""
|
||||
data = {
|
||||
"query": {"match_all": {}},
|
||||
"sort": [{"published": {"order": "desc"}}],
|
||||
}
|
||||
all_indexed = IndexPaginate("ta_video", data).get_results()
|
||||
all_youtube_ids = [i["youtube_id"] for i in all_indexed]
|
||||
|
||||
for idx, playlist in enumerate(new_playlists):
|
||||
playlist_id = playlist["url"]
|
||||
if not playlist["type"] == "playlist":
|
||||
@ -241,12 +138,8 @@ class PlaylistSubscription:
|
||||
continue
|
||||
|
||||
playlist_h = YoutubePlaylist(playlist_id)
|
||||
playlist_h.all_youtube_ids = all_youtube_ids
|
||||
playlist_h.build_json()
|
||||
if not playlist_h.json_data:
|
||||
message = f"{playlist_h.youtube_id}: failed to extract data"
|
||||
print(message)
|
||||
raise ValueError(message)
|
||||
|
||||
playlist_h.json_data["playlist_subscribed"] = subscribed
|
||||
playlist_h.upload_to_es()
|
||||
playlist_h.add_vids_to_playlist()
|
||||
@ -256,13 +149,16 @@ class PlaylistSubscription:
|
||||
thumb = ThumbManager(playlist_id, item_type="playlist")
|
||||
thumb.download_playlist_thumb(url)
|
||||
|
||||
if self.task:
|
||||
self.task.send_progress(
|
||||
message_lines=[
|
||||
f"Processing {idx + 1} of {len(new_playlists)}"
|
||||
],
|
||||
progress=(idx + 1) / len(new_playlists),
|
||||
)
|
||||
# notify
|
||||
message = {
|
||||
"status": "message:subplaylist",
|
||||
"level": "info",
|
||||
"title": "Subscribing to Playlists",
|
||||
"message": f"Processing {idx + 1} of {len(new_playlists)}",
|
||||
}
|
||||
RedisArchivist().set_message(
|
||||
"message:subplaylist", message=message, expire=True
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def channel_validate(channel_id):
|
||||
@ -278,159 +174,48 @@ class PlaylistSubscription:
|
||||
playlist.json_data["playlist_subscribed"] = subscribe_status
|
||||
playlist.upload_to_es()
|
||||
|
||||
@staticmethod
|
||||
def get_to_ignore():
|
||||
"""get all youtube_ids already downloaded or ignored"""
|
||||
pending = queue.PendingList()
|
||||
pending.get_download()
|
||||
pending.get_indexed()
|
||||
|
||||
return pending.to_skip
|
||||
|
||||
def find_missing(self):
|
||||
"""find videos in subscribed playlists not downloaded yet"""
|
||||
all_playlists = [i["playlist_id"] for i in self.get_playlists()]
|
||||
if not all_playlists:
|
||||
return False
|
||||
to_ignore = self.get_to_ignore()
|
||||
|
||||
missing_videos = []
|
||||
total = len(all_playlists)
|
||||
for idx, playlist_id in enumerate(all_playlists):
|
||||
size_limit = self.config["subscriptions"]["channel_size"]
|
||||
playlist = YoutubePlaylist(playlist_id)
|
||||
is_active = playlist.update_playlist()
|
||||
if not is_active:
|
||||
playlist.update_playlist()
|
||||
if not playlist:
|
||||
playlist.deactivate()
|
||||
continue
|
||||
|
||||
playlist_entries = playlist.json_data["playlist_entries"]
|
||||
size_limit = self.config["subscriptions"]["channel_size"]
|
||||
if size_limit:
|
||||
del playlist_entries[size_limit:]
|
||||
|
||||
to_check = [
|
||||
i["youtube_id"]
|
||||
for i in playlist_entries
|
||||
if i["downloaded"] is False
|
||||
]
|
||||
needs_downloading = is_missing(to_check)
|
||||
missing_videos.extend(needs_downloading)
|
||||
all_missing = [i for i in playlist_entries if not i["downloaded"]]
|
||||
|
||||
if not self.task:
|
||||
continue
|
||||
|
||||
if self.task.is_stopped():
|
||||
self.task.send_progress(["Received Stop signal."])
|
||||
break
|
||||
|
||||
self.task.send_progress(
|
||||
message_lines=[f"Scanning Playlists {idx + 1}/{total}"],
|
||||
progress=(idx + 1) / total,
|
||||
message = {
|
||||
"status": "message:rescan",
|
||||
"level": "info",
|
||||
"title": "Scanning playlists: Looking for new videos.",
|
||||
"message": f"Progress: {idx + 1}/{len(all_playlists)}",
|
||||
}
|
||||
RedisArchivist().set_message(
|
||||
"message:rescan", message=message, expire=True
|
||||
)
|
||||
|
||||
for video in all_missing:
|
||||
youtube_id = video["youtube_id"]
|
||||
if youtube_id not in to_ignore:
|
||||
missing_videos.append(youtube_id)
|
||||
|
||||
return missing_videos
|
||||
|
||||
|
||||
class SubscriptionScanner:
|
||||
"""add missing videos to queue"""
|
||||
|
||||
def __init__(self, task=False):
|
||||
self.task = task
|
||||
self.missing_videos = False
|
||||
self.auto_start = AppConfig().config["subscriptions"].get("auto_start")
|
||||
|
||||
def scan(self):
|
||||
"""scan channels and playlists"""
|
||||
if self.task:
|
||||
self.task.send_progress(["Rescanning channels and playlists."])
|
||||
|
||||
self.missing_videos = []
|
||||
self.scan_channels()
|
||||
if self.task and not self.task.is_stopped():
|
||||
self.scan_playlists()
|
||||
|
||||
return self.missing_videos
|
||||
|
||||
def scan_channels(self):
|
||||
"""get missing from channels"""
|
||||
channel_handler = ChannelSubscription(task=self.task)
|
||||
missing = channel_handler.find_missing()
|
||||
if not missing:
|
||||
return
|
||||
|
||||
for vid_id, vid_type in missing:
|
||||
self.missing_videos.append(
|
||||
{"type": "video", "vid_type": vid_type, "url": vid_id}
|
||||
)
|
||||
|
||||
def scan_playlists(self):
|
||||
"""get missing from playlists"""
|
||||
playlist_handler = PlaylistSubscription(task=self.task)
|
||||
missing = playlist_handler.find_missing()
|
||||
if not missing:
|
||||
return
|
||||
|
||||
for i in missing:
|
||||
self.missing_videos.append(
|
||||
{
|
||||
"type": "video",
|
||||
"vid_type": VideoTypeEnum.VIDEOS.value,
|
||||
"url": i,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
class SubscriptionHandler:
|
||||
"""subscribe to channels and playlists from url_str"""
|
||||
|
||||
def __init__(self, url_str, task=False):
|
||||
self.url_str = url_str
|
||||
self.task = task
|
||||
self.to_subscribe = False
|
||||
|
||||
def subscribe(self, expected_type=False):
|
||||
"""subscribe to url_str items"""
|
||||
if self.task:
|
||||
self.task.send_progress(["Processing form content."])
|
||||
self.to_subscribe = Parser(self.url_str).parse()
|
||||
|
||||
total = len(self.to_subscribe)
|
||||
for idx, item in enumerate(self.to_subscribe):
|
||||
if self.task:
|
||||
self._notify(idx, item, total)
|
||||
|
||||
self.subscribe_type(item, expected_type=expected_type)
|
||||
|
||||
def subscribe_type(self, item, expected_type):
|
||||
"""process single item"""
|
||||
if item["type"] == "playlist":
|
||||
if expected_type and expected_type != "playlist":
|
||||
raise TypeError(
|
||||
f"expected {expected_type} url but got {item.get('type')}"
|
||||
)
|
||||
|
||||
PlaylistSubscription().process_url_str([item])
|
||||
return
|
||||
|
||||
if item["type"] == "video":
|
||||
# extract channel id from video
|
||||
video = YoutubeVideo(item["url"])
|
||||
video.get_from_youtube()
|
||||
video.process_youtube_meta()
|
||||
channel_id = video.channel_id
|
||||
elif item["type"] == "channel":
|
||||
channel_id = item["url"]
|
||||
else:
|
||||
raise ValueError("failed to subscribe to: " + item["url"])
|
||||
|
||||
if expected_type and expected_type != "channel":
|
||||
raise TypeError(
|
||||
f"expected {expected_type} url but got {item.get('type')}"
|
||||
)
|
||||
|
||||
self._subscribe(channel_id)
|
||||
|
||||
def _subscribe(self, channel_id):
|
||||
"""subscribe to channel"""
|
||||
ChannelSubscription().change_subscribe(
|
||||
channel_id, channel_subscribed=True
|
||||
)
|
||||
|
||||
def _notify(self, idx, item, total):
|
||||
"""send notification message to redis"""
|
||||
subscribe_type = item["type"].title()
|
||||
message_lines = [
|
||||
f"Subscribe to {subscribe_type}",
|
||||
f"Progress: {idx + 1}/{total}",
|
||||
]
|
||||
self.task.send_progress(message_lines, progress=(idx + 1) / total)
|
||||
|
@ -10,9 +10,9 @@ from io import BytesIO
|
||||
from time import sleep
|
||||
|
||||
import requests
|
||||
from home.src.es.connect import ElasticWrap, IndexPaginate
|
||||
from home.src.ta.helper import is_missing
|
||||
from home.src.ta.settings import EnvironmentSettings
|
||||
from home.src.download import queue # partial import
|
||||
from home.src.es.connect import IndexPaginate
|
||||
from home.src.ta.config import AppConfig
|
||||
from mutagen.mp4 import MP4, MP4Cover
|
||||
from PIL import Image, ImageFile, ImageFilter, UnidentifiedImageError
|
||||
|
||||
@ -22,7 +22,8 @@ ImageFile.LOAD_TRUNCATED_IMAGES = True
|
||||
class ThumbManagerBase:
|
||||
"""base class for thumbnail management"""
|
||||
|
||||
CACHE_DIR = EnvironmentSettings.CACHE_DIR
|
||||
CONFIG = AppConfig().config
|
||||
CACHE_DIR = CONFIG["application"]["cache_dir"]
|
||||
VIDEO_DIR = os.path.join(CACHE_DIR, "videos")
|
||||
CHANNEL_DIR = os.path.join(CACHE_DIR, "channels")
|
||||
PLAYLIST_DIR = os.path.join(CACHE_DIR, "playlists")
|
||||
@ -42,41 +43,33 @@ class ThumbManagerBase:
|
||||
response = requests.get(url, stream=True, timeout=5)
|
||||
if response.ok:
|
||||
try:
|
||||
img = Image.open(response.raw)
|
||||
if isinstance(img, Image.Image):
|
||||
return img
|
||||
return self.get_fallback()
|
||||
|
||||
except (UnidentifiedImageError, OSError):
|
||||
return Image.open(response.raw)
|
||||
except UnidentifiedImageError:
|
||||
print(f"failed to open thumbnail: {url}")
|
||||
return self.get_fallback()
|
||||
|
||||
if response.status_code == 404:
|
||||
return self.get_fallback()
|
||||
|
||||
except (
|
||||
requests.exceptions.RequestException,
|
||||
requests.exceptions.ReadTimeout,
|
||||
):
|
||||
except requests.exceptions.RequestException:
|
||||
print(f"{self.item_id}: retry thumbnail download {url}")
|
||||
sleep((i + 1) ** i)
|
||||
|
||||
return self.get_fallback()
|
||||
return False
|
||||
|
||||
def get_fallback(self):
|
||||
"""get fallback thumbnail if not available"""
|
||||
print(f"{self.item_id}: failed to extract thumbnail, use fallback")
|
||||
if self.fallback:
|
||||
img_raw = Image.open(self.fallback)
|
||||
return img_raw
|
||||
|
||||
app_root = EnvironmentSettings.APP_DIR
|
||||
app_root = self.CONFIG["application"]["app_root"]
|
||||
default_map = {
|
||||
"video": os.path.join(
|
||||
app_root, "static/img/default-video-thumb.jpg"
|
||||
),
|
||||
"playlist": os.path.join(
|
||||
app_root, "static/img/default-playlist-thumb.jpg"
|
||||
app_root, "static/img/default-video-thumb.jpg"
|
||||
),
|
||||
"icon": os.path.join(
|
||||
app_root, "static/img/default-channel-icon.jpg"
|
||||
@ -84,9 +77,6 @@ class ThumbManagerBase:
|
||||
"banner": os.path.join(
|
||||
app_root, "static/img/default-channel-banner.jpg"
|
||||
),
|
||||
"tvart": os.path.join(
|
||||
app_root, "static/img/default-channel-art.jpg"
|
||||
),
|
||||
}
|
||||
|
||||
img_raw = Image.open(default_map[self.item_type])
|
||||
@ -155,10 +145,9 @@ class ThumbManager(ThumbManagerBase):
|
||||
|
||||
def download_channel_art(self, urls, skip_existing=False):
|
||||
"""pass tuple of channel thumbnails"""
|
||||
channel_thumb, channel_banner, channel_tv = urls
|
||||
channel_thumb, channel_banner = urls
|
||||
self._download_channel_thumb(channel_thumb, skip_existing)
|
||||
self._download_channel_banner(channel_banner, skip_existing)
|
||||
self._download_channel_tv(channel_tv, skip_existing)
|
||||
|
||||
def _download_channel_thumb(self, channel_thumb, skip_existing):
|
||||
"""download channel thumbnail"""
|
||||
@ -187,34 +176,13 @@ class ThumbManager(ThumbManagerBase):
|
||||
img_raw = self.download_raw(channel_banner)
|
||||
img_raw.convert("RGB").save(banner_path)
|
||||
|
||||
def _download_channel_tv(self, channel_tv, skip_existing):
|
||||
"""download channel tv art"""
|
||||
art_path = os.path.join(self.CHANNEL_DIR, self.item_id + "_tvart.jpg")
|
||||
self.item_type = "tvart"
|
||||
if skip_existing and os.path.exists(art_path):
|
||||
return
|
||||
|
||||
img_raw = self.download_raw(channel_tv)
|
||||
img_raw.convert("RGB").save(art_path)
|
||||
|
||||
def download_playlist_thumb(self, url, skip_existing=False):
|
||||
"""pass thumbnail url"""
|
||||
thumb_path = os.path.join(self.PLAYLIST_DIR, f"{self.item_id}.jpg")
|
||||
if skip_existing and os.path.exists(thumb_path):
|
||||
return
|
||||
|
||||
img_raw = (
|
||||
self.download_raw(url)
|
||||
if not isinstance(url, str) or url.startswith("http")
|
||||
else Image.open(os.path.join(self.CACHE_DIR, url))
|
||||
)
|
||||
width, height = img_raw.size
|
||||
|
||||
if not width / height == 16 / 9:
|
||||
new_height = width / 16 * 9
|
||||
offset = (height - new_height) / 2
|
||||
img_raw = img_raw.crop((0, offset, width, height - offset))
|
||||
img_raw = img_raw.resize((336, 189))
|
||||
img_raw = self.download_raw(url)
|
||||
img_raw.convert("RGB").save(thumb_path)
|
||||
|
||||
def delete_video_thumb(self):
|
||||
@ -257,10 +225,9 @@ class ThumbManager(ThumbManagerBase):
|
||||
class ValidatorCallback:
|
||||
"""handle callback validate thumbnails page by page"""
|
||||
|
||||
def __init__(self, source, index_name, counter=0):
|
||||
def __init__(self, source, index_name):
|
||||
self.source = source
|
||||
self.index_name = index_name
|
||||
self.counter = counter
|
||||
|
||||
def run(self):
|
||||
"""run the task for page"""
|
||||
@ -285,7 +252,6 @@ class ValidatorCallback:
|
||||
urls = (
|
||||
channel["_source"]["channel_thumb_url"],
|
||||
channel["_source"]["channel_banner_url"],
|
||||
channel["_source"].get("channel_tvart_url", False),
|
||||
)
|
||||
handler = ThumbManager(channel["_source"]["channel_id"])
|
||||
handler.download_channel_art(urls, skip_existing=True)
|
||||
@ -301,205 +267,102 @@ class ValidatorCallback:
|
||||
class ThumbValidator:
|
||||
"""validate thumbnails"""
|
||||
|
||||
INDEX = [
|
||||
{
|
||||
"data": {
|
||||
"query": {"term": {"active": {"value": True}}},
|
||||
"_source": ["vid_thumb_url", "youtube_id"],
|
||||
},
|
||||
"name": "ta_video",
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"query": {"term": {"channel_active": {"value": True}}},
|
||||
"_source": {
|
||||
"excludes": ["channel_description", "channel_overwrites"]
|
||||
},
|
||||
},
|
||||
"name": "ta_channel",
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"query": {"term": {"playlist_active": {"value": True}}},
|
||||
"_source": ["playlist_id", "playlist_thumbnail"],
|
||||
},
|
||||
"name": "ta_playlist",
|
||||
},
|
||||
]
|
||||
def download_missing(self):
|
||||
"""download all missing artwork"""
|
||||
self.download_missing_videos()
|
||||
self.download_missing_channels()
|
||||
self.download_missing_playlists()
|
||||
|
||||
def __init__(self, task=False):
|
||||
self.task = task
|
||||
|
||||
def validate(self):
|
||||
"""validate all indexes"""
|
||||
for index in self.INDEX:
|
||||
total = self._get_total(index["name"])
|
||||
if not total:
|
||||
continue
|
||||
|
||||
paginate = IndexPaginate(
|
||||
index_name=index["name"],
|
||||
data=index["data"],
|
||||
size=1000,
|
||||
callback=ValidatorCallback,
|
||||
task=self.task,
|
||||
total=total,
|
||||
)
|
||||
_ = paginate.get_results()
|
||||
|
||||
def clean_up(self):
|
||||
"""clean up all thumbs"""
|
||||
self._clean_up_vids()
|
||||
self._clean_up_channels()
|
||||
self._clean_up_playlists()
|
||||
|
||||
def _clean_up_vids(self):
|
||||
"""clean unneeded vid thumbs"""
|
||||
video_dir = os.path.join(EnvironmentSettings.CACHE_DIR, "videos")
|
||||
video_folders = os.listdir(video_dir)
|
||||
for video_folder in video_folders:
|
||||
folder_path = os.path.join(video_dir, video_folder)
|
||||
thumbs_is = {i.split(".")[0] for i in os.listdir(folder_path)}
|
||||
thumbs_should = self._get_vid_thumbs_should(video_folder)
|
||||
to_delete = thumbs_is - thumbs_should
|
||||
for thumb in to_delete:
|
||||
delete_path = os.path.join(folder_path, f"{thumb}.jpg")
|
||||
os.remove(delete_path)
|
||||
|
||||
if to_delete:
|
||||
message = (
|
||||
f"[thumbs][video][{video_folder}] "
|
||||
+ f"delete {len(to_delete)} unused thumbnails"
|
||||
)
|
||||
print(message)
|
||||
if self.task:
|
||||
self.task.send_progress([message])
|
||||
|
||||
@staticmethod
|
||||
def _get_vid_thumbs_should(video_folder: str) -> set[str]:
|
||||
"""get indexed"""
|
||||
should_list = [
|
||||
{"prefix": {"youtube_id": {"value": video_folder.lower()}}},
|
||||
{"prefix": {"youtube_id": {"value": video_folder.upper()}}},
|
||||
]
|
||||
def download_missing_videos(self):
|
||||
"""get all missing video thumbnails"""
|
||||
data = {
|
||||
"query": {"bool": {"should": should_list}},
|
||||
"_source": ["youtube_id"],
|
||||
}
|
||||
result = IndexPaginate("ta_video,ta_download", data).get_results()
|
||||
thumbs_should = {i["youtube_id"] for i in result}
|
||||
|
||||
return thumbs_should
|
||||
|
||||
def _clean_up_channels(self):
|
||||
"""clean unneeded channel thumbs"""
|
||||
channel_dir = os.path.join(EnvironmentSettings.CACHE_DIR, "channels")
|
||||
channel_art = os.listdir(channel_dir)
|
||||
thumbs_is = {"_".join(i.split("_")[:-1]) for i in channel_art}
|
||||
to_delete = is_missing(list(thumbs_is), "ta_channel", "channel_id")
|
||||
for channel_thumb in channel_art:
|
||||
if channel_thumb[:24] in to_delete:
|
||||
delete_path = os.path.join(channel_dir, channel_thumb)
|
||||
os.remove(delete_path)
|
||||
|
||||
if to_delete:
|
||||
message = (
|
||||
"[thumbs][channel] "
|
||||
+ f"delete {len(to_delete)} unused channel art"
|
||||
)
|
||||
print(message)
|
||||
if self.task:
|
||||
self.task.send_progress([message])
|
||||
|
||||
def _clean_up_playlists(self):
|
||||
"""clean up unneeded playlist thumbs"""
|
||||
playlist_dir = os.path.join(EnvironmentSettings.CACHE_DIR, "playlists")
|
||||
playlist_art = os.listdir(playlist_dir)
|
||||
thumbs_is = {i.split(".")[0] for i in playlist_art}
|
||||
to_delete = is_missing(list(thumbs_is), "ta_playlist", "playlist_id")
|
||||
for playlist_id in to_delete:
|
||||
delete_path = os.path.join(playlist_dir, f"{playlist_id}.jpg")
|
||||
os.remove(delete_path)
|
||||
|
||||
if to_delete:
|
||||
message = (
|
||||
"[thumbs][playlist] "
|
||||
+ f"delete {len(to_delete)} unused playlist art"
|
||||
)
|
||||
print(message)
|
||||
if self.task:
|
||||
self.task.send_progress([message])
|
||||
|
||||
@staticmethod
|
||||
def _get_total(index_name):
|
||||
"""get total documents in index"""
|
||||
path = f"{index_name}/_count"
|
||||
response, _ = ElasticWrap(path).get()
|
||||
|
||||
return response.get("count")
|
||||
|
||||
|
||||
class ThumbFilesystem:
|
||||
"""sync thumbnail files to media files"""
|
||||
|
||||
INDEX_NAME = "ta_video"
|
||||
|
||||
def __init__(self, task=False):
|
||||
self.task = task
|
||||
|
||||
def embed(self):
|
||||
"""entry point"""
|
||||
data = {
|
||||
"query": {"match_all": {}},
|
||||
"_source": ["media_url", "youtube_id"],
|
||||
"query": {"term": {"active": {"value": True}}},
|
||||
"sort": [{"youtube_id": {"order": "asc"}}],
|
||||
"_source": ["vid_thumb_url", "youtube_id"],
|
||||
}
|
||||
paginate = IndexPaginate(
|
||||
index_name=self.INDEX_NAME,
|
||||
data=data,
|
||||
size=200,
|
||||
callback=EmbedCallback,
|
||||
task=self.task,
|
||||
total=self._get_total(),
|
||||
"ta_video", data, size=5000, callback=ValidatorCallback
|
||||
)
|
||||
_ = paginate.get_results()
|
||||
|
||||
def _get_total(self):
|
||||
"""get total documents in index"""
|
||||
path = f"{self.INDEX_NAME}/_count"
|
||||
response, _ = ElasticWrap(path).get()
|
||||
def download_missing_channels(self):
|
||||
"""get all missing channel thumbnails"""
|
||||
data = {
|
||||
"query": {"term": {"channel_active": {"value": True}}},
|
||||
"sort": [{"channel_id": {"order": "asc"}}],
|
||||
"_source": {
|
||||
"excludes": ["channel_description", "channel_overwrites"]
|
||||
},
|
||||
}
|
||||
paginate = IndexPaginate(
|
||||
"ta_channel", data, callback=ValidatorCallback
|
||||
)
|
||||
_ = paginate.get_results()
|
||||
|
||||
return response.get("count")
|
||||
def download_missing_playlists(self):
|
||||
"""get all missing playlist artwork"""
|
||||
data = {
|
||||
"query": {"term": {"playlist_active": {"value": True}}},
|
||||
"sort": [{"playlist_id": {"order": "asc"}}],
|
||||
"_source": ["playlist_id", "playlist_thumbnail"],
|
||||
}
|
||||
paginate = IndexPaginate(
|
||||
"ta_playlist", data, callback=ValidatorCallback
|
||||
)
|
||||
_ = paginate.get_results()
|
||||
|
||||
|
||||
class EmbedCallback:
|
||||
"""callback class to embed thumbnails"""
|
||||
class ThumbFilesystem:
|
||||
"""filesystem tasks for thumbnails"""
|
||||
|
||||
CACHE_DIR = EnvironmentSettings.CACHE_DIR
|
||||
MEDIA_DIR = EnvironmentSettings.MEDIA_DIR
|
||||
FORMAT = MP4Cover.FORMAT_JPEG
|
||||
CONFIG = AppConfig().config
|
||||
CACHE_DIR = CONFIG["application"]["cache_dir"]
|
||||
MEDIA_DIR = CONFIG["application"]["videos"]
|
||||
VIDEO_DIR = os.path.join(CACHE_DIR, "videos")
|
||||
|
||||
def __init__(self, source, index_name, counter=0):
|
||||
self.source = source
|
||||
self.index_name = index_name
|
||||
self.counter = counter
|
||||
def sync(self):
|
||||
"""embed thumbnails to mediafiles"""
|
||||
video_list = self.get_thumb_list()
|
||||
self._embed_thumbs(video_list)
|
||||
|
||||
def run(self):
|
||||
"""run embed"""
|
||||
for video in self.source:
|
||||
video_id = video["_source"]["youtube_id"]
|
||||
media_url = os.path.join(
|
||||
self.MEDIA_DIR, video["_source"]["media_url"]
|
||||
)
|
||||
def get_thumb_list(self):
|
||||
"""get list of mediafiles and matching thumbnails"""
|
||||
pending = queue.PendingList()
|
||||
pending.get_download()
|
||||
pending.get_indexed()
|
||||
|
||||
video_list = []
|
||||
for video in pending.all_videos:
|
||||
video_id = video["youtube_id"]
|
||||
media_url = os.path.join(self.MEDIA_DIR, video["media_url"])
|
||||
thumb_path = os.path.join(
|
||||
self.CACHE_DIR, ThumbManager(video_id).vid_thumb_path()
|
||||
)
|
||||
if os.path.exists(thumb_path):
|
||||
self.embed(media_url, thumb_path)
|
||||
video_list.append(
|
||||
{
|
||||
"media_url": media_url,
|
||||
"thumb_path": thumb_path,
|
||||
}
|
||||
)
|
||||
|
||||
def embed(self, media_url, thumb_path):
|
||||
"""embed thumb in single media file"""
|
||||
video = MP4(media_url)
|
||||
with open(thumb_path, "rb") as f:
|
||||
video["covr"] = [MP4Cover(f.read(), imageformat=self.FORMAT)]
|
||||
return video_list
|
||||
|
||||
video.save()
|
||||
@staticmethod
|
||||
def _embed_thumbs(video_list):
|
||||
"""rewrite the thumbnail into media file"""
|
||||
|
||||
counter = 1
|
||||
for video in video_list:
|
||||
# loop through all videos
|
||||
media_url = video["media_url"]
|
||||
thumb_path = video["thumb_path"]
|
||||
|
||||
mutagen_vid = MP4(media_url)
|
||||
with open(thumb_path, "rb") as f:
|
||||
mutagen_vid["covr"] = [
|
||||
MP4Cover(f.read(), imageformat=MP4Cover.FORMAT_JPEG)
|
||||
]
|
||||
mutagen_vid.save()
|
||||
if counter % 50 == 0:
|
||||
print(f"thumbnail write progress {counter}/{len(video_list)}")
|
||||
counter = counter + 1
|
||||
|
@ -10,7 +10,6 @@ from http import cookiejar
|
||||
from io import StringIO
|
||||
|
||||
import yt_dlp
|
||||
from home.src.ta.settings import EnvironmentSettings
|
||||
from home.src.ta.ta_redis import RedisArchivist
|
||||
|
||||
|
||||
@ -21,9 +20,8 @@ class YtWrap:
|
||||
"default_search": "ytsearch",
|
||||
"quiet": True,
|
||||
"check_formats": "selected",
|
||||
"socket_timeout": 10,
|
||||
"socket_timeout": 3,
|
||||
"extractor_retries": 3,
|
||||
"retries": 10,
|
||||
}
|
||||
|
||||
def __init__(self, obs_request, config=False):
|
||||
@ -49,33 +47,21 @@ class YtWrap:
|
||||
with yt_dlp.YoutubeDL(self.obs) as ydl:
|
||||
try:
|
||||
ydl.download([url])
|
||||
except yt_dlp.utils.DownloadError as err:
|
||||
print(f"{url}: failed to download with message {err}")
|
||||
if "Temporary failure in name resolution" in str(err):
|
||||
raise ConnectionError("lost the internet, abort!") from err
|
||||
except yt_dlp.utils.DownloadError:
|
||||
print(f"{url}: failed to download.")
|
||||
return False
|
||||
|
||||
return False, str(err)
|
||||
|
||||
return True, True
|
||||
return True
|
||||
|
||||
def extract(self, url):
|
||||
"""make extract request"""
|
||||
try:
|
||||
response = yt_dlp.YoutubeDL(self.obs).extract_info(url)
|
||||
except cookiejar.LoadError as err:
|
||||
print(f"cookie file is invalid: {err}")
|
||||
except cookiejar.LoadError:
|
||||
print("cookie file is invalid")
|
||||
return False
|
||||
except yt_dlp.utils.ExtractorError as err:
|
||||
print(f"{url}: failed to extract with message: {err}, continue...")
|
||||
return False
|
||||
except yt_dlp.utils.DownloadError as err:
|
||||
if "This channel does not have a" in str(err):
|
||||
return False
|
||||
|
||||
print(f"{url}: failed to get info from youtube with message {err}")
|
||||
if "Temporary failure in name resolution" in str(err):
|
||||
raise ConnectionError("lost the internet, abort!") from err
|
||||
|
||||
except (yt_dlp.utils.ExtractorError, yt_dlp.utils.DownloadError):
|
||||
print(f"{url}: failed to get info from youtube")
|
||||
return False
|
||||
|
||||
return response
|
||||
@ -87,7 +73,6 @@ class CookieHandler:
|
||||
def __init__(self, config):
|
||||
self.cookie_io = False
|
||||
self.config = config
|
||||
self.cache_dir = EnvironmentSettings.CACHE_DIR
|
||||
|
||||
def get(self):
|
||||
"""get cookie io stream"""
|
||||
@ -97,9 +82,8 @@ class CookieHandler:
|
||||
|
||||
def import_cookie(self):
|
||||
"""import cookie from file"""
|
||||
import_path = os.path.join(
|
||||
self.cache_dir, "import", "cookies.google.txt"
|
||||
)
|
||||
cache_path = self.config["application"]["cache_dir"]
|
||||
import_path = os.path.join(cache_path, "import", "cookies.google.txt")
|
||||
|
||||
try:
|
||||
with open(import_path, encoding="utf-8") as cookie_file:
|
||||
@ -114,10 +98,10 @@ class CookieHandler:
|
||||
print("cookie: import successful")
|
||||
|
||||
def set_cookie(self, cookie):
|
||||
"""set cookie str and activate in config"""
|
||||
RedisArchivist().set_message("cookie", cookie, save=True)
|
||||
"""set cookie str and activate in cofig"""
|
||||
RedisArchivist().set_message("cookie", cookie)
|
||||
path = ".downloads.cookie_import"
|
||||
RedisArchivist().set_message("config", True, path=path, save=True)
|
||||
RedisArchivist().set_message("config", True, path=path)
|
||||
self.config["downloads"]["cookie_import"] = True
|
||||
print("cookie: activated and stored in Redis")
|
||||
|
||||
@ -167,7 +151,7 @@ class CookieHandler:
|
||||
now = datetime.now()
|
||||
message = {
|
||||
"status": response,
|
||||
"validated": int(now.timestamp()),
|
||||
"validated": int(now.strftime("%s")),
|
||||
"validated_str": now.strftime("%Y-%m-%d %H:%M"),
|
||||
}
|
||||
RedisArchivist().set_message("cookie:valid", message)
|
||||
|
@ -12,132 +12,266 @@ from datetime import datetime
|
||||
|
||||
from home.src.download.queue import PendingList
|
||||
from home.src.download.subscriptions import PlaylistSubscription
|
||||
from home.src.download.yt_dlp_base import YtWrap
|
||||
from home.src.download.yt_dlp_base import CookieHandler, YtWrap
|
||||
from home.src.es.connect import ElasticWrap, IndexPaginate
|
||||
from home.src.index.channel import YoutubeChannel
|
||||
from home.src.index.comments import CommentList
|
||||
from home.src.index.playlist import YoutubePlaylist
|
||||
from home.src.index.video import YoutubeVideo, index_new_video
|
||||
from home.src.index.video_constants import VideoTypeEnum
|
||||
from home.src.ta.config import AppConfig
|
||||
from home.src.ta.helper import get_channel_overwrites, ignore_filelist
|
||||
from home.src.ta.settings import EnvironmentSettings
|
||||
from home.src.ta.ta_redis import RedisQueue
|
||||
from home.src.ta.helper import clean_string, ignore_filelist
|
||||
from home.src.ta.ta_redis import RedisArchivist, RedisQueue
|
||||
|
||||
|
||||
class DownloaderBase:
|
||||
"""base class for shared config"""
|
||||
class DownloadPostProcess:
|
||||
"""handle task to run after download queue finishes"""
|
||||
|
||||
CACHE_DIR = EnvironmentSettings.CACHE_DIR
|
||||
MEDIA_DIR = EnvironmentSettings.MEDIA_DIR
|
||||
CHANNEL_QUEUE = "download:channel"
|
||||
PLAYLIST_QUEUE = "download:playlist:full"
|
||||
PLAYLIST_QUICK = "download:playlist:quick"
|
||||
VIDEO_QUEUE = "download:video"
|
||||
def __init__(self, download):
|
||||
self.download = download
|
||||
self.now = int(datetime.now().strftime("%s"))
|
||||
self.pending = False
|
||||
|
||||
def __init__(self, task):
|
||||
self.task = task
|
||||
self.config = AppConfig().config
|
||||
self.channel_overwrites = get_channel_overwrites()
|
||||
self.now = int(datetime.now().timestamp())
|
||||
def run(self):
|
||||
"""run all functions"""
|
||||
self.pending = PendingList()
|
||||
self.pending.get_download()
|
||||
self.pending.get_channels()
|
||||
self.pending.get_indexed()
|
||||
self.auto_delete_all()
|
||||
self.auto_delete_overwrites()
|
||||
self.validate_playlists()
|
||||
|
||||
|
||||
class VideoDownloader(DownloaderBase):
|
||||
"""handle the video download functionality"""
|
||||
|
||||
def __init__(self, task=False):
|
||||
super().__init__(task)
|
||||
self.obs = False
|
||||
self._build_obs()
|
||||
|
||||
def run_queue(self, auto_only=False) -> tuple[int, int]:
|
||||
"""setup download queue in redis loop until no more items"""
|
||||
downloaded = 0
|
||||
failed = 0
|
||||
while True:
|
||||
video_data = self._get_next(auto_only)
|
||||
if self.task.is_stopped() or not video_data:
|
||||
self._reset_auto()
|
||||
break
|
||||
|
||||
youtube_id = video_data["youtube_id"]
|
||||
channel_id = video_data["channel_id"]
|
||||
print(f"{youtube_id}: Downloading video")
|
||||
self._notify(video_data, "Validate download format")
|
||||
|
||||
success = self._dl_single_vid(youtube_id, channel_id)
|
||||
if not success:
|
||||
failed += 1
|
||||
continue
|
||||
|
||||
self._notify(video_data, "Add video metadata to index", progress=1)
|
||||
video_type = VideoTypeEnum(video_data["vid_type"])
|
||||
vid_dict = index_new_video(youtube_id, video_type=video_type)
|
||||
RedisQueue(self.CHANNEL_QUEUE).add(channel_id)
|
||||
RedisQueue(self.VIDEO_QUEUE).add(youtube_id)
|
||||
|
||||
self._notify(video_data, "Move downloaded file to archive")
|
||||
self.move_to_archive(vid_dict)
|
||||
self._delete_from_pending(youtube_id)
|
||||
downloaded += 1
|
||||
|
||||
# post processing
|
||||
DownloadPostProcess(self.task).run()
|
||||
|
||||
return downloaded, failed
|
||||
|
||||
def _notify(self, video_data, message, progress=False):
|
||||
"""send progress notification to task"""
|
||||
if not self.task:
|
||||
def auto_delete_all(self):
|
||||
"""handle auto delete"""
|
||||
autodelete_days = self.download.config["downloads"]["autodelete_days"]
|
||||
if not autodelete_days:
|
||||
return
|
||||
|
||||
typ = VideoTypeEnum(video_data["vid_type"]).value.rstrip("s").title()
|
||||
title = video_data.get("title")
|
||||
self.task.send_progress(
|
||||
[f"Processing {typ}: {title}", message], progress=progress
|
||||
)
|
||||
|
||||
def _get_next(self, auto_only):
|
||||
"""get next item in queue"""
|
||||
must_list = [{"term": {"status": {"value": "pending"}}}]
|
||||
must_not_list = [{"exists": {"field": "message"}}]
|
||||
if auto_only:
|
||||
must_list.append({"term": {"auto_start": {"value": True}}})
|
||||
|
||||
print(f"auto delete older than {autodelete_days} days")
|
||||
now_lte = self.now - autodelete_days * 24 * 60 * 60
|
||||
data = {
|
||||
"size": 1,
|
||||
"query": {"bool": {"must": must_list, "must_not": must_not_list}},
|
||||
"sort": [
|
||||
{"auto_start": {"order": "desc"}},
|
||||
{"timestamp": {"order": "asc"}},
|
||||
],
|
||||
"query": {"range": {"player.watched_date": {"lte": now_lte}}},
|
||||
"sort": [{"player.watched_date": {"order": "asc"}}],
|
||||
}
|
||||
path = "ta_download/_search"
|
||||
response, _ = ElasticWrap(path).get(data=data)
|
||||
if not response["hits"]["hits"]:
|
||||
return False
|
||||
self._auto_delete_watched(data)
|
||||
|
||||
return response["hits"]["hits"][0]["_source"]
|
||||
def auto_delete_overwrites(self):
|
||||
"""handle per channel auto delete from overwrites"""
|
||||
for channel_id, value in self.pending.channel_overwrites.items():
|
||||
if "autodelete_days" in value:
|
||||
autodelete_days = value.get("autodelete_days")
|
||||
print(f"{channel_id}: delete older than {autodelete_days}d")
|
||||
now_lte = self.now - autodelete_days * 24 * 60 * 60
|
||||
must_list = [
|
||||
{"range": {"player.watched_date": {"lte": now_lte}}},
|
||||
{"term": {"channel.channel_id": {"value": channel_id}}},
|
||||
]
|
||||
data = {
|
||||
"query": {"bool": {"must": must_list}},
|
||||
"sort": [{"player.watched_date": {"order": "desc"}}],
|
||||
}
|
||||
self._auto_delete_watched(data)
|
||||
|
||||
@staticmethod
|
||||
def _auto_delete_watched(data):
|
||||
"""delete watched videos after x days"""
|
||||
to_delete = IndexPaginate("ta_video", data).get_results()
|
||||
if not to_delete:
|
||||
return
|
||||
|
||||
for video in to_delete:
|
||||
youtube_id = video["youtube_id"]
|
||||
print(f"{youtube_id}: auto delete video")
|
||||
YoutubeVideo(youtube_id).delete_media_file()
|
||||
|
||||
print("add deleted to ignore list")
|
||||
vids = [{"type": "video", "url": i["youtube_id"]} for i in to_delete]
|
||||
pending = PendingList(youtube_ids=vids)
|
||||
pending.parse_url_list()
|
||||
pending.add_to_pending(status="ignore")
|
||||
|
||||
def validate_playlists(self):
|
||||
"""look for playlist needing to update"""
|
||||
for id_c, channel_id in enumerate(self.download.channels):
|
||||
channel = YoutubeChannel(channel_id)
|
||||
overwrites = self.pending.channel_overwrites.get(channel_id, False)
|
||||
if overwrites and overwrites.get("index_playlists"):
|
||||
# validate from remote
|
||||
channel.index_channel_playlists()
|
||||
continue
|
||||
|
||||
# validate from local
|
||||
playlists = channel.get_indexed_playlists()
|
||||
all_channel_playlist = [i["playlist_id"] for i in playlists]
|
||||
self._validate_channel_playlist(all_channel_playlist, id_c)
|
||||
|
||||
def _validate_channel_playlist(self, all_channel_playlist, id_c):
|
||||
"""scan channel for playlist needing update"""
|
||||
all_youtube_ids = [i["youtube_id"] for i in self.pending.all_videos]
|
||||
for id_p, playlist_id in enumerate(all_channel_playlist):
|
||||
playlist = YoutubePlaylist(playlist_id)
|
||||
playlist.all_youtube_ids = all_youtube_ids
|
||||
playlist.build_json(scrape=True)
|
||||
if not playlist.json_data:
|
||||
playlist.deactivate()
|
||||
|
||||
playlist.add_vids_to_playlist()
|
||||
playlist.upload_to_es()
|
||||
self._notify_playlist_progress(all_channel_playlist, id_c, id_p)
|
||||
|
||||
def _notify_playlist_progress(self, all_channel_playlist, id_c, id_p):
|
||||
"""notify to UI"""
|
||||
title = (
|
||||
"Processing playlists for channels: "
|
||||
+ f"{id_c + 1}/{len(self.download.channels)}"
|
||||
)
|
||||
message = f"Progress: {id_p + 1}/{len(all_channel_playlist)}"
|
||||
key = "message:download"
|
||||
mess_dict = {
|
||||
"status": key,
|
||||
"level": "info",
|
||||
"title": title,
|
||||
"message": message,
|
||||
}
|
||||
if id_p + 1 == len(all_channel_playlist):
|
||||
expire = 4
|
||||
else:
|
||||
expire = True
|
||||
|
||||
RedisArchivist().set_message(key, mess_dict, expire=expire)
|
||||
|
||||
|
||||
class VideoDownloader:
|
||||
"""
|
||||
handle the video download functionality
|
||||
if not initiated with list, take from queue
|
||||
"""
|
||||
|
||||
MSG = "message:download"
|
||||
|
||||
def __init__(self, youtube_id_list=False):
|
||||
self.obs = False
|
||||
self.video_overwrites = False
|
||||
self.youtube_id_list = youtube_id_list
|
||||
self.config = AppConfig().config
|
||||
self._build_obs()
|
||||
self.channels = set()
|
||||
|
||||
def run_queue(self):
|
||||
"""setup download queue in redis loop until no more items"""
|
||||
self._setup_queue()
|
||||
|
||||
queue = RedisQueue()
|
||||
|
||||
limit_queue = self.config["downloads"]["limit_count"]
|
||||
if limit_queue:
|
||||
queue.trim(limit_queue - 1)
|
||||
|
||||
while True:
|
||||
youtube_id = queue.get_next()
|
||||
if not youtube_id:
|
||||
break
|
||||
|
||||
success = self._dl_single_vid(youtube_id)
|
||||
if not success:
|
||||
continue
|
||||
|
||||
mess_dict = {
|
||||
"status": self.MSG,
|
||||
"level": "info",
|
||||
"title": "Indexing....",
|
||||
"message": "Add video metadata to index.",
|
||||
}
|
||||
RedisArchivist().set_message(self.MSG, mess_dict, expire=60)
|
||||
|
||||
vid_dict = index_new_video(
|
||||
youtube_id, video_overwrites=self.video_overwrites
|
||||
)
|
||||
self.channels.add(vid_dict["channel"]["channel_id"])
|
||||
mess_dict = {
|
||||
"status": self.MSG,
|
||||
"level": "info",
|
||||
"title": "Moving....",
|
||||
"message": "Moving downloaded file to storage folder",
|
||||
}
|
||||
RedisArchivist().set_message(self.MSG, mess_dict)
|
||||
|
||||
if queue.has_item():
|
||||
message = "Continue with next video."
|
||||
else:
|
||||
message = "Download queue is finished."
|
||||
|
||||
self.move_to_archive(vid_dict)
|
||||
mess_dict = {
|
||||
"status": self.MSG,
|
||||
"level": "info",
|
||||
"title": "Completed",
|
||||
"message": message,
|
||||
}
|
||||
RedisArchivist().set_message(self.MSG, mess_dict, expire=10)
|
||||
self._delete_from_pending(youtube_id)
|
||||
|
||||
# post processing
|
||||
self._add_subscribed_channels()
|
||||
DownloadPostProcess(self).run()
|
||||
|
||||
def _setup_queue(self):
|
||||
"""setup required and validate"""
|
||||
if self.config["downloads"]["cookie_import"]:
|
||||
valid = CookieHandler(self.config).validate()
|
||||
if not valid:
|
||||
return
|
||||
|
||||
pending = PendingList()
|
||||
pending.get_download()
|
||||
pending.get_channels()
|
||||
self.video_overwrites = pending.video_overwrites
|
||||
|
||||
def add_pending(self):
|
||||
"""add pending videos to download queue"""
|
||||
mess_dict = {
|
||||
"status": self.MSG,
|
||||
"level": "info",
|
||||
"title": "Looking for videos to download",
|
||||
"message": "Scanning your download queue.",
|
||||
}
|
||||
RedisArchivist().set_message(self.MSG, mess_dict, expire=True)
|
||||
pending = PendingList()
|
||||
pending.get_download()
|
||||
to_add = [i["youtube_id"] for i in pending.all_pending]
|
||||
if not to_add:
|
||||
# there is nothing pending
|
||||
print("download queue is empty")
|
||||
mess_dict = {
|
||||
"status": self.MSG,
|
||||
"level": "error",
|
||||
"title": "Download queue is empty",
|
||||
"message": "Add some videos to the queue first.",
|
||||
}
|
||||
RedisArchivist().set_message(self.MSG, mess_dict, expire=True)
|
||||
return
|
||||
|
||||
RedisQueue().add_list(to_add)
|
||||
|
||||
def _progress_hook(self, response):
|
||||
"""process the progress_hooks from yt_dlp"""
|
||||
progress = False
|
||||
try:
|
||||
size = response.get("_total_bytes_str")
|
||||
if size.strip() == "N/A":
|
||||
size = response.get("_total_bytes_estimate_str", "N/A")
|
||||
title = "Downloading: " + response["info_dict"]["title"]
|
||||
|
||||
try:
|
||||
percent = response["_percent_str"]
|
||||
progress = float(percent.strip("%")) / 100
|
||||
size = response["_total_bytes_str"]
|
||||
speed = response["_speed_str"]
|
||||
eta = response["_eta_str"]
|
||||
message = f"{percent} of {size} at {speed} - time left: {eta}"
|
||||
except KeyError:
|
||||
message = "processing"
|
||||
|
||||
if self.task:
|
||||
title = response["info_dict"]["title"]
|
||||
self.task.send_progress([title, message], progress=progress)
|
||||
mess_dict = {
|
||||
"status": self.MSG,
|
||||
"level": "info",
|
||||
"title": title,
|
||||
"message": message,
|
||||
}
|
||||
RedisArchivist().set_message(self.MSG, mess_dict, expire=True)
|
||||
|
||||
def _build_obs(self):
|
||||
"""collection to build all obs passed to yt-dlp"""
|
||||
@ -148,24 +282,27 @@ class VideoDownloader(DownloaderBase):
|
||||
def _build_obs_basic(self):
|
||||
"""initial obs"""
|
||||
self.obs = {
|
||||
"default_search": "ytsearch",
|
||||
"merge_output_format": "mp4",
|
||||
"outtmpl": (self.CACHE_DIR + "/download/%(id)s.mp4"),
|
||||
"outtmpl": (
|
||||
self.config["application"]["cache_dir"]
|
||||
+ "/download/%(id)s.mp4"
|
||||
),
|
||||
"progress_hooks": [self._progress_hook],
|
||||
"noprogress": True,
|
||||
"quiet": True,
|
||||
"continuedl": True,
|
||||
"retries": 3,
|
||||
"writethumbnail": False,
|
||||
"noplaylist": True,
|
||||
"color": "no_color",
|
||||
"check_formats": "selected",
|
||||
"socket_timeout": 3,
|
||||
}
|
||||
|
||||
def _build_obs_user(self):
|
||||
"""build user customized options"""
|
||||
if self.config["downloads"]["format"]:
|
||||
self.obs["format"] = self.config["downloads"]["format"]
|
||||
if self.config["downloads"]["format_sort"]:
|
||||
format_sort = self.config["downloads"]["format_sort"]
|
||||
format_sort_list = [i.strip() for i in format_sort.split(",")]
|
||||
self.obs["format_sort"] = format_sort_list
|
||||
if self.config["downloads"]["limit_speed"]:
|
||||
self.obs["ratelimit"] = (
|
||||
self.config["downloads"]["limit_speed"] * 1024
|
||||
@ -210,21 +347,30 @@ class VideoDownloader(DownloaderBase):
|
||||
|
||||
self.obs["postprocessors"] = postprocessors
|
||||
|
||||
def _set_overwrites(self, obs: dict, channel_id: str) -> None:
|
||||
"""add overwrites to obs"""
|
||||
overwrites = self.channel_overwrites.get(channel_id)
|
||||
if overwrites and overwrites.get("download_format"):
|
||||
obs["format"] = overwrites.get("download_format")
|
||||
def get_format_overwrites(self, youtube_id):
|
||||
"""get overwrites from single video"""
|
||||
overwrites = self.video_overwrites.get(youtube_id, False)
|
||||
if overwrites:
|
||||
return overwrites.get("download_format", False)
|
||||
|
||||
def _dl_single_vid(self, youtube_id: str, channel_id: str) -> bool:
|
||||
return False
|
||||
|
||||
def _dl_single_vid(self, youtube_id):
|
||||
"""download single video"""
|
||||
obs = self.obs.copy()
|
||||
self._set_overwrites(obs, channel_id)
|
||||
dl_cache = os.path.join(self.CACHE_DIR, "download")
|
||||
format_overwrite = self.get_format_overwrites(youtube_id)
|
||||
if format_overwrite:
|
||||
obs["format"] = format_overwrite
|
||||
|
||||
success, message = YtWrap(obs, self.config).download(youtube_id)
|
||||
if not success:
|
||||
self._handle_error(youtube_id, message)
|
||||
dl_cache = self.config["application"]["cache_dir"] + "/download/"
|
||||
|
||||
# check if already in cache to continue from there
|
||||
all_cached = ignore_filelist(os.listdir(dl_cache))
|
||||
for file_name in all_cached:
|
||||
if youtube_id in file_name:
|
||||
obs["outtmpl"] = os.path.join(dl_cache, file_name)
|
||||
|
||||
success = YtWrap(obs, self.config).download(youtube_id)
|
||||
|
||||
if self.obs["writethumbnail"]:
|
||||
# webp files don't get cleaned up automatically
|
||||
@ -236,28 +382,29 @@ class VideoDownloader(DownloaderBase):
|
||||
|
||||
return success
|
||||
|
||||
@staticmethod
|
||||
def _handle_error(youtube_id, message):
|
||||
"""store error message"""
|
||||
data = {"doc": {"message": message}}
|
||||
_, _ = ElasticWrap(f"ta_download/_update/{youtube_id}").post(data=data)
|
||||
|
||||
def move_to_archive(self, vid_dict):
|
||||
"""move downloaded video from cache to archive"""
|
||||
host_uid = EnvironmentSettings.HOST_UID
|
||||
host_gid = EnvironmentSettings.HOST_GID
|
||||
# make folder
|
||||
folder = os.path.join(
|
||||
self.MEDIA_DIR, vid_dict["channel"]["channel_id"]
|
||||
)
|
||||
if not os.path.exists(folder):
|
||||
os.makedirs(folder)
|
||||
videos = self.config["application"]["videos"]
|
||||
host_uid = self.config["application"]["HOST_UID"]
|
||||
host_gid = self.config["application"]["HOST_GID"]
|
||||
channel_name = clean_string(vid_dict["channel"]["channel_name"])
|
||||
if len(channel_name) <= 3:
|
||||
# fall back to channel id
|
||||
channel_name = vid_dict["channel"]["channel_id"]
|
||||
# make archive folder with correct permissions
|
||||
new_folder = os.path.join(videos, channel_name)
|
||||
if not os.path.exists(new_folder):
|
||||
os.makedirs(new_folder)
|
||||
if host_uid and host_gid:
|
||||
os.chown(folder, host_uid, host_gid)
|
||||
# move media file
|
||||
media_file = vid_dict["youtube_id"] + ".mp4"
|
||||
old_path = os.path.join(self.CACHE_DIR, "download", media_file)
|
||||
new_path = os.path.join(self.MEDIA_DIR, vid_dict["media_url"])
|
||||
os.chown(new_folder, host_uid, host_gid)
|
||||
# find real filename
|
||||
cache_dir = self.config["application"]["cache_dir"]
|
||||
all_cached = ignore_filelist(os.listdir(cache_dir + "/download/"))
|
||||
for file_str in all_cached:
|
||||
if vid_dict["youtube_id"] in file_str:
|
||||
old_file = file_str
|
||||
old_path = os.path.join(cache_dir, "download", old_file)
|
||||
new_path = os.path.join(videos, vid_dict["media_url"])
|
||||
# move media file and fix permission
|
||||
shutil.move(old_path, new_path, copy_function=shutil.copyfile)
|
||||
if host_uid and host_gid:
|
||||
@ -266,186 +413,17 @@ class VideoDownloader(DownloaderBase):
|
||||
@staticmethod
|
||||
def _delete_from_pending(youtube_id):
|
||||
"""delete downloaded video from pending index if its there"""
|
||||
path = f"ta_download/_doc/{youtube_id}?refresh=true"
|
||||
path = f"ta_download/_doc/{youtube_id}"
|
||||
_, _ = ElasticWrap(path).delete()
|
||||
|
||||
def _reset_auto(self):
|
||||
"""reset autostart to defaults after queue stop"""
|
||||
path = "ta_download/_update_by_query"
|
||||
data = {
|
||||
"query": {"term": {"auto_start": {"value": True}}},
|
||||
"script": {
|
||||
"source": "ctx._source.auto_start = false",
|
||||
"lang": "painless",
|
||||
},
|
||||
}
|
||||
response, _ = ElasticWrap(path).post(data=data)
|
||||
updated = response.get("updated")
|
||||
if updated:
|
||||
print(f"[download] reset auto start on {updated} videos.")
|
||||
|
||||
|
||||
class DownloadPostProcess(DownloaderBase):
|
||||
"""handle task to run after download queue finishes"""
|
||||
|
||||
def run(self):
|
||||
"""run all functions"""
|
||||
self.auto_delete_all()
|
||||
self.auto_delete_overwrites()
|
||||
self.refresh_playlist()
|
||||
self.match_videos()
|
||||
self.get_comments()
|
||||
|
||||
def auto_delete_all(self):
|
||||
"""handle auto delete"""
|
||||
autodelete_days = self.config["downloads"]["autodelete_days"]
|
||||
if not autodelete_days:
|
||||
def _add_subscribed_channels(self):
|
||||
"""add all channels subscribed to refresh"""
|
||||
all_subscribed = PlaylistSubscription().get_playlists()
|
||||
if not all_subscribed:
|
||||
return
|
||||
|
||||
print(f"auto delete older than {autodelete_days} days")
|
||||
now_lte = str(self.now - autodelete_days * 24 * 60 * 60)
|
||||
data = {
|
||||
"query": {"range": {"player.watched_date": {"lte": now_lte}}},
|
||||
"sort": [{"player.watched_date": {"order": "asc"}}],
|
||||
}
|
||||
self._auto_delete_watched(data)
|
||||
channel_ids = [i["playlist_channel_id"] for i in all_subscribed]
|
||||
for channel_id in channel_ids:
|
||||
self.channels.add(channel_id)
|
||||
|
||||
def auto_delete_overwrites(self):
|
||||
"""handle per channel auto delete from overwrites"""
|
||||
for channel_id, value in self.channel_overwrites.items():
|
||||
if "autodelete_days" in value:
|
||||
autodelete_days = value.get("autodelete_days")
|
||||
print(f"{channel_id}: delete older than {autodelete_days}d")
|
||||
now_lte = str(self.now - autodelete_days * 24 * 60 * 60)
|
||||
must_list = [
|
||||
{"range": {"player.watched_date": {"lte": now_lte}}},
|
||||
{"term": {"channel.channel_id": {"value": channel_id}}},
|
||||
]
|
||||
data = {
|
||||
"query": {"bool": {"must": must_list}},
|
||||
"sort": [{"player.watched_date": {"order": "desc"}}],
|
||||
}
|
||||
self._auto_delete_watched(data)
|
||||
|
||||
@staticmethod
|
||||
def _auto_delete_watched(data):
|
||||
"""delete watched videos after x days"""
|
||||
to_delete = IndexPaginate("ta_video", data).get_results()
|
||||
if not to_delete:
|
||||
return
|
||||
|
||||
for video in to_delete:
|
||||
youtube_id = video["youtube_id"]
|
||||
print(f"{youtube_id}: auto delete video")
|
||||
YoutubeVideo(youtube_id).delete_media_file()
|
||||
|
||||
print("add deleted to ignore list")
|
||||
vids = [{"type": "video", "url": i["youtube_id"]} for i in to_delete]
|
||||
pending = PendingList(youtube_ids=vids)
|
||||
pending.parse_url_list()
|
||||
_ = pending.add_to_pending(status="ignore")
|
||||
|
||||
def refresh_playlist(self) -> None:
|
||||
"""match videos with playlists"""
|
||||
self.add_playlists_to_refresh()
|
||||
|
||||
queue = RedisQueue(self.PLAYLIST_QUEUE)
|
||||
while True:
|
||||
total = queue.max_score()
|
||||
playlist_id, idx = queue.get_next()
|
||||
if not playlist_id or not idx or not total:
|
||||
break
|
||||
|
||||
playlist = YoutubePlaylist(playlist_id)
|
||||
playlist.update_playlist(skip_on_empty=True)
|
||||
|
||||
if not self.task:
|
||||
continue
|
||||
|
||||
channel_name = playlist.json_data["playlist_channel"]
|
||||
playlist_title = playlist.json_data["playlist_name"]
|
||||
message = [
|
||||
f"Post Processing Playlists for: {channel_name}",
|
||||
f"{playlist_title} [{idx}/{total}]",
|
||||
]
|
||||
progress = idx / total
|
||||
self.task.send_progress(message, progress=progress)
|
||||
|
||||
def add_playlists_to_refresh(self) -> None:
|
||||
"""add playlists to refresh"""
|
||||
if self.task:
|
||||
message = ["Post Processing Playlists", "Scanning for Playlists"]
|
||||
self.task.send_progress(message)
|
||||
|
||||
self._add_playlist_sub()
|
||||
self._add_channel_playlists()
|
||||
self._add_video_playlists()
|
||||
|
||||
def _add_playlist_sub(self):
|
||||
"""add subscribed playlists to refresh"""
|
||||
subs = PlaylistSubscription().get_playlists()
|
||||
to_add = [i["playlist_id"] for i in subs]
|
||||
RedisQueue(self.PLAYLIST_QUEUE).add_list(to_add)
|
||||
|
||||
def _add_channel_playlists(self):
|
||||
"""add playlists from channels to refresh"""
|
||||
queue = RedisQueue(self.CHANNEL_QUEUE)
|
||||
while True:
|
||||
channel_id, _ = queue.get_next()
|
||||
if not channel_id:
|
||||
break
|
||||
|
||||
channel = YoutubeChannel(channel_id)
|
||||
channel.get_from_es()
|
||||
overwrites = channel.get_overwrites()
|
||||
if "index_playlists" in overwrites:
|
||||
channel.get_all_playlists()
|
||||
to_add = [i[0] for i in channel.all_playlists]
|
||||
RedisQueue(self.PLAYLIST_QUEUE).add_list(to_add)
|
||||
|
||||
def _add_video_playlists(self):
|
||||
"""add other playlists for quick sync"""
|
||||
all_playlists = RedisQueue(self.PLAYLIST_QUEUE).get_all()
|
||||
must_not = [{"terms": {"playlist_id": all_playlists}}]
|
||||
video_ids = RedisQueue(self.VIDEO_QUEUE).get_all()
|
||||
must = [{"terms": {"playlist_entries.youtube_id": video_ids}}]
|
||||
data = {
|
||||
"query": {"bool": {"must_not": must_not, "must": must}},
|
||||
"_source": ["playlist_id"],
|
||||
}
|
||||
playlists = IndexPaginate("ta_playlist", data).get_results()
|
||||
to_add = [i["playlist_id"] for i in playlists]
|
||||
RedisQueue(self.PLAYLIST_QUICK).add_list(to_add)
|
||||
|
||||
def match_videos(self) -> None:
|
||||
"""scan rest of indexed playlists to match videos"""
|
||||
queue = RedisQueue(self.PLAYLIST_QUICK)
|
||||
while True:
|
||||
total = queue.max_score()
|
||||
playlist_id, idx = queue.get_next()
|
||||
if not playlist_id or not idx or not total:
|
||||
break
|
||||
|
||||
playlist = YoutubePlaylist(playlist_id)
|
||||
playlist.get_from_es()
|
||||
playlist.add_vids_to_playlist()
|
||||
playlist.remove_vids_from_playlist()
|
||||
|
||||
if not self.task:
|
||||
continue
|
||||
|
||||
message = [
|
||||
"Post Processing Playlists.",
|
||||
f"Validate Playlists: - {idx}/{total}",
|
||||
]
|
||||
progress = idx / total
|
||||
self.task.send_progress(message, progress=progress)
|
||||
|
||||
def get_comments(self):
|
||||
"""get comments from youtube"""
|
||||
video_queue = RedisQueue(self.VIDEO_QUEUE)
|
||||
comment_list = CommentList(task=self.task)
|
||||
comment_list.add(video_ids=video_queue.get_all())
|
||||
|
||||
video_queue.clear()
|
||||
comment_list.index()
|
||||
return
|
||||
|