Compare commits
521 Commits
Author | SHA1 | Date |
---|---|---|
Simon | e0f1828d9c | |
Simon | f5a2e624d8 | |
Simon | dc08c83da5 | |
Simon | 33ecd73137 | |
Simon | cb6476fa8c | |
Simon | ec64a88d1e | |
Simon | 0c487e6339 | |
Simon | f7ad1000c7 | |
Simon | aecd189d04 | |
Simon | b735a770e3 | |
Simon | 5c84a2cbf8 | |
Simon | a4d062fa52 | |
Simon | 9c34bb01d9 | |
Simon | 8c38a2eb69 | |
Simon | 25edff28e7 | |
lamusmaser | 731f4b6111 | |
Simon | e512329599 | |
Simon | e26b039899 | |
Simon | 8bf7f71351 | |
Simon | a72be27982 | |
Simon | b2c1b417e5 | |
Simon | a348b4a810 | |
Simon | bb8db53f7d | |
Simon | 2711537a4d | |
dot-mike | 45f455070d | |
Simon | 6dcef70b8e | |
Simon | c993a5de5c | |
Greg | 090d88c336 | |
Nick | 0e967d721f | |
Simon | c32dbf8bc8 | |
dot-mike | df08a6d591 | |
DarkFighterLuke | 9339b9227e | |
Simon | 8778546577 | |
Simon | 0ff27ebfb9 | |
Simon | 0d863ef557 | |
Simon | 56ca49d0e2 | |
Simon | 27b6efcab7 | |
Simon | 18ba808664 | |
Simon | 65738ef52c | |
Simon | 4049a2a3c1 | |
PhuriousGeorge | 49659322a1 | |
Simon | 4078eb307f | |
Daniel Jue | 7f056b38f4 | |
Simon | 86fe31d258 | |
Simon | 5b26433599 | |
Simon | 4d2fc5423e | |
Simon | 94295cdbd4 | |
Simon | b84bf78974 | |
Simon | 14e23a4371 | |
Simon | fe8f4faa10 | |
Simon | ddc0b7a481 | |
Simon | 7eec3ece49 | |
Simon | 789c35e2b5 | |
Simon | 8870782a6e | |
Simon | e75ffb603c | |
Simon | feabc87c9f | |
Simon | 6f1a45ffb1 | |
Simon | 098db97cba | |
Simon | 597da56975 | |
Simon | 325bdf5cba | |
Simon | db2f249979 | |
Simon | b61b8635b8 | |
Simon | 5aafc21bda | |
lamusmaser | 099c70a13b | |
Simon | 43708ee2a3 | |
Simon | cfb15c1a78 | |
Simon | e9a95d7ada | |
Simon | a21a111221 | |
Simon | 18e504faf2 | |
Simon | 9ffe2098a5 | |
Simon | 1315e836a4 | |
Simon | 2e4289e75c | |
Simon | 96e73a3a53 | |
Simon | a369be0f4a | |
Simon | d5676e5173 | |
Simon | 44c4cf93e2 | |
Simon | 02ac590caa | |
Simon | a466c02304 | |
Simon | e74c26fe36 | |
Simon | b1267cba83 | |
Simon | 91bb0ed9c0 | |
Simon | 4a145ee7cb | |
Simon | 463019ce5a | |
Simon | 9a9d35cac4 | |
Simon | f41ecd24c5 | |
crocs | eced8200c1 | |
Simon | 669bc6a620 | |
lamusmaser | 37df9b65c7 | |
lamusmaser | 6721d01fa6 | |
crocs | 2b49af9620 | |
Derek Slenk | 2f62898a10 | |
spechter | 832259ce48 | |
Simon | b8ccce250a | |
Simon | aa04ecff4f | |
Simon | dcf97d3d24 | |
crocs | 879ad52b32 | |
Simon | 0bedc3ee93 | |
Simon | 1657c55cbe | |
Simon | 8b1324139d | |
Simon | 04124e3dad | |
Simon | 9c26357f76 | |
extome | 7133d6b441 | |
Simon | 6bc0111d0a | |
Simon | 1188e66f37 | |
Simon | ef6d3e868d | |
Simon | d677f9579e | |
Simon | 0b920e87ae | |
Simon | 4d5aa4ad2f | |
Simon | 4b63c2f536 | |
Simon | 31ad9424f5 | |
Simon | 45f4ccfd93 | |
Simon | 285e2042ae | |
Simon | e4b7f8ce38 | |
Simon | 6892cbbc19 | |
Simon | 58ea256b44 | |
Merlin | aa475c58aa | |
Simon | 8247314d01 | |
Simon | 2826ca4a43 | |
Simon | 64ffc18da7 | |
Simon | 21fde5e068 | |
Simon | ea9ed6c238 | |
Simon | 8eaed07cff | |
Clark | 4d111aff82 | |
Simon | 7236bea29a | |
Simon | 5165c3e34a | |
Simon | 572b23169c | |
Steve Ovens | e1fce06f97 | |
Simon | 446d5b7949 | |
Simon | 17c0310220 | |
Omar Laham | 1b0be84972 | |
Simon | 2df68fa83c | |
Simon | 4184736bee | |
Simon | 81a5f15600 | |
Simon | 4a4a274259 | |
Simon | 0776cea7bc | |
Simon | fb853e6c73 | |
Simon | 57d8b060d2 | |
Simon | 6d1810946b | |
Simon | 88f230c3f4 | |
Simon | e9eddf06fb | |
Simon | 8af7a3caf4 | |
Simon | ad7f1f05b0 | |
Simon | e1fe8d1e29 | |
Simon | f8f01ac27f | |
Simon | 8e79cba7d5 | |
Simon | 87e457401d | |
Simon | bb271e276c | |
Simon | 9967015eba | |
Simon | 3b7e4c9266 | |
Xavier Chevalier | 1dd3fb9341 | |
Simon | 120f9e468d | |
Simon | 88f5c58b8e | |
Simon | 6bd06f61cf | |
Igor Rzegocki | 6a83756fb4 | |
Simon | 515b724047 | |
Simon | 77fef5de57 | |
Simon | 9d09d27fba | |
Simon | 0e767e2f84 | |
Simon | 7801ed0d60 | |
Igor Rzegocki | 6abec9401b | |
Simon | 1cdb9e1ad5 | |
Simon | 7afeb41469 | |
Simon | bae11fe1f1 | |
Simon | 0cacaee213 | |
Simon | dcbd8d2a55 | |
Simon | 892e81c185 | |
Igor Rzegocki | f423ddc53a | |
Igor Rzegocki | b2bb7ea28e | |
Simon | 38b3815a33 | |
Simon | 92975a5c95 | |
Joseph Liu | a5b61bfaf6 | |
Clark | 85b56300b3 | |
Kevin Gibbons | 8fa9e23c6e | |
Simon | a7fc7902f0 | |
Simon | 879f5ab52f | |
Simon | c6458c6ec1 | |
Simon | 47c433e7c1 | |
Simon | dc41e5062d | |
Merlin | 317942b7e1 | |
Merlin | 65d768bf02 | |
Merlin | 0767bbfecf | |
Simon | 78d6699c68 | |
Simon | a807d53ff8 | |
Simon | fa45cf08ba | |
Simon | c3da3e23af | |
Simon | 5cf5b4edb7 | |
Simon | 0c9c88fc0b | |
Simon | 725bba0963 | |
Simon | 76981635dc | |
Simon | b56316932f | |
Simon | 8dba2e240e | |
Simon | 4016e81f9a | |
Simon | 5ee37eb0cb | |
Simon | 4650963cc7 | |
Simon | 5acc1ea718 | |
Simon | 505f5b73c5 | |
Simon | d491b0b347 | |
Simon | 52d6c59f3f | |
Simon | 4afb605736 | |
Clark | fcc1c2a648 | |
Simon | 4ded8988c3 | |
Simon | 988c2b8af7 | |
Simon | 58ef8f753f | |
Simon | 3e9f1a392a | |
Simon | 2563722f16 | |
Simon | fb089dd3de | |
dmynerd78 | 983612f460 | |
Simon | d42bd612d0 | |
Simon | 41f6a03751 | |
Simon | f1e25c9a20 | |
Simon | 15794ebfc8 | |
Simon | 68928f5cb1 | |
Simon | a514dda1ff | |
Simon | 2bccb698e6 | |
Simon | 076452c612 | |
Simon | b005b7bcfe | |
Simon | a2eb42ebb9 | |
Simon | 33ff586af4 | |
Simon | 3803537739 | |
Simon | 6151da821f | |
Simon | 8f7f5e0e32 | |
Simon | fa140a3047 | |
Simon | 419b6f02a5 | |
Simon | 58818bb91c | |
Simon | b6ae225342 | |
Simon | 8411889db7 | |
Simon | 313bbe8b49 | |
Simon | 691c643745 | |
Simon | 9e8e929bcc | |
Simon | 2238565a94 | |
Simon | 39e9704436 | |
Simon | fa43753614 | |
Simon | 02be39b6ed | |
Simon | 375e1a2100 | |
Simon | e893dc3b24 | |
Simon | c1ea77434e | |
crocs | 0e1e544fee | |
Simon | a13cd2f7ba | |
Simon | befdc64772 | |
Simon | 06f3055913 | |
Simon | ca2c5b8dfc | |
Simon | c395a949cc | |
Simon | 4473e9c5b2 | |
Simon | 75a63c4828 | |
Simon | aea403a874 | |
Simon | ab8fed14bb | |
Simon | 6f915a5733 | |
Simon | f970ec867e | |
Simon | ef0d490890 | |
lamusmaser | 865089d46d | |
Simon | cd25eadd1c | |
Simon | d500fa5eeb | |
Simon | 4c681d813d | |
Simon | ddfab4a341 | |
Simon | 434aa97a86 | |
Simon | efde4b1142 | |
Simon | 6022bdd3cd | |
Simon | 99baf64b11 | |
Simon | 61b04ba5cf | |
Simon | 2a60360f4a | |
Simon | 8a7cb8bc6f | |
lamusmaser | 1be80b24c2 | |
Simon | 061c653bce | |
Simon | 72a98b0473 | |
Simon | 88e199ef9c | |
Simon | 879497d25a | |
Simon | 3f1d8cf75d | |
Simon | 32721cf7ed | |
Simon | 103409770d | |
Simon | 094ccf4186 | |
Simon | 247808563a | |
simon | 5927ced485 | |
simon | 6fb788b259 | |
simon | 5e92d06f21 | |
simon | 7082718c14 | |
simon | 7e2cd6acd3 | |
simon | 904d0de6aa | |
simon | 868247e6d4 | |
simon | c4e2332b83 | |
simon | 139d20560f | |
simon | 66a14cf389 | |
simon | 9b30c7df6e | |
simon | 5334d79d0d | |
simon | 64984bc1b3 | |
simon | 8ef59f5bff | |
simon | 9d6ab6b7b3 | |
simon | d62b0d3f8d | |
simon | 918a04c502 | |
simon | 60f1809ed8 | |
simon | f848e73251 | |
simon | c65fbb0b60 | |
simon | 95f114d817 | |
simon | 05eac1a8ca | |
simon | ea42f0f1e3 | |
simon | 625dc357cc | |
simon | e94e11c456 | |
simon | a9b5713629 | |
simon | dbaa13bfb0 | |
simon | 5d0d050149 | |
simon | c327e94726 | |
simon | 774780d520 | |
simon | 5e1167743f | |
simon | 4376b826c4 | |
simon | 0fef751ab5 | |
simon | 206921baf0 | |
simon | 0d2d3353a9 | |
simon | b47687535a | |
simon | e092a29b13 | |
simon | 170839362e | |
simon | b95a659396 | |
simon | 2b66786728 | |
simon | b7bfeaf215 | |
simon | cf37800c2b | |
simon | 5cc642098d | |
simon | 7c01ad88b2 | |
simon | e866bb3be5 | |
simon | 63021bd313 | |
simon | cbcb7484a7 | |
Dominik Sander | 1c0b407f3f | |
simon | 280c773441 | |
simon | efca460e9d | |
simon | 8f3b832069 | |
simon | 9b3d1fa1fd | |
Matthew Glinski | 9a38aff03d | |
simon | 06bbe2e400 | |
simon | 77900f89e3 | |
simon | bc39561606 | |
simon | 76535c6304 | |
simon | 790ba3d20e | |
simon | 89779ec13b | |
simon | 1b6b219e02 | |
simon | 5cd845e55d | |
simon | 3a091ac287 | |
simon | e385331f6c | |
simon | 4067b6c182 | |
simon | 3063236634 | |
simon | a17f05ef21 | |
simon | a4d42573ef | |
simon | bf7a429dac | |
simon | 67b3e51454 | |
simon | 5723ff1d20 | |
simon | 82d873a70f | |
simon | 7e4f564f45 | |
simon | 3c8f7aeecf | |
simon | 3f7b38713f | |
simon | 0b33edcb4c | |
simon | 4130a8c5c9 | |
simon | 06c7968070 | |
simon | a98a30cc85 | |
simon | a924d648d6 | |
simon | 0750547236 | |
simon | 1aa4401e6c | |
simon | 5e841bf7f8 | |
simon | 5d524e5110 | |
simon | 1e96c3211f | |
simon | d2649c29c3 | |
simon | ff89432d86 | |
simon | fb77ab44ee | |
simon | e43840da31 | |
simon | 3fa5ed725e | |
simon | 841f5ab67f | |
simon | 4038a837cf | |
simon | 5b2d7db102 | |
simon | c8dd391fd3 | |
simon | bde2cd4586 | |
simon | 780115adb6 | |
simon | 8eec866b31 | |
simon | 239657bc3c | |
simon | db2bdb8eff | |
simon | 87d68a3e5c | |
simon | 09c62fb095 | |
simon | b16c00f480 | |
simon | 9461783f5a | |
simon | 6420bfbe87 | |
simon | 5f8cc6c4fb | |
simon | f1bc3c9f37 | |
simon | 0b300e7e12 | |
simon | 1b067dd95e | |
simon | 20a39895f9 | |
micah686 | 0daba99f08 | |
Kevin Gibbons | efdffb365a | |
simon | ee4dbf99b3 | |
simon | e9d7523a1f | |
simon | 981a4cb8b3 | |
simon | 2a0a04ac65 | |
simon | db0ade203a | |
simon | f0becb750a | |
simon | 8fc9afbad9 | |
simon | 2b8012b5d4 | |
simon | 81701df1c7 | |
simon | df9a52a8f2 | |
simon | d082d3deba | |
simon | 19c853f07b | |
simon | c63249b778 | |
simon | 8cf41a4043 | |
simon | 0e726af2de | |
simon | f304c2eb02 | |
simon | 2bc92c9095 | |
simon | 33b9a012d4 | |
simon | 860213c427 | |
simon | 272972429b | |
simon | 3e2b2f2be1 | |
simon | db0e362b7d | |
simon | 5ffc2046d4 | |
simon | 3bd6075a9b | |
simon | 11a61a06a6 | |
simon | 3f96b37b51 | |
simon | 4f89a0696b | |
simon | 16862df71e | |
simon | 3f4d0c0c76 | |
simon | 73c82a91c0 | |
simon | f194259ab3 | |
simon | 3b9d083f5e | |
simon | 9062f61508 | |
PhuriousGeorge | 75441cdf56 | |
simon | b2fa92a8c3 | |
simon | d533c7acfe | |
simon | e6da63ff09 | |
simon | 461c2c4600 | |
simon | 0ab41f74dc | |
simon | 1ab7127ed3 | |
simon | 6b2bd06393 | |
simon | 04a559e471 | |
simon | 6b7354b14f | |
simon | 2c719ae1ae | |
simon | 488711ee8f | |
simon | 78f04a2ffc | |
simon | 58530563ce | |
simon | 259008df50 | |
simon | 20f8a5a501 | |
Gavin Palmer | 9afd81b372 | |
simon | 2d2431364d | |
simon | 16440a4170 | |
simon | 1f08ea9eea | |
simon | 2850988bfe | |
simon | 2d6c0bd02b | |
simon | 9280159531 | |
simon | 8d13b947e2 | |
simon | 22ef33c6ca | |
simon | ee568534f6 | |
simon | c204c6ca7f | |
simon | 87cfd9e154 | |
simon | 72e5d7ccf8 | |
simon | a459661fa2 | |
simon | a56de2818d | |
simon | 9ea41bd2a9 | |
simon | 14b48e676c | |
simon | 3397418cab | |
p0358 | 7e9fed3d26 | |
simon | 7dc67d6b6f | |
simon | 4fb5744cb3 | |
simon | 5617d1c7ae | |
simon | 666074ed49 | |
simon | 4c0de78fb4 | |
simon | dcb0cf6a6d | |
simon | e7fd9c0aec | |
simon | ea002ad45d | |
simon | 6328e316f4 | |
simon | b49e87c468 | |
simon | c4b175b2db | |
simon | b52aa4daba | |
simon | 8d76961e77 | |
simon | 4cf7eab950 | |
simon | 5f2febf09c | |
Clark | e276700b6d | |
simon | 4a64748da8 | |
simon | 5d8dc76e7a | |
simon | 4dfc9cef52 | |
simon | 571c2ce3a4 | |
simon | 229555d97d | |
simon | 73da59ffbf | |
simon | c7a05a9ba3 | |
simon | 9e1316b543 | |
simon | 72924fab66 | |
simon | 72f091b85e | |
simon | fdf5b2d802 | |
simon | a34c9479f2 | |
simon | 162c05628c | |
simon | 1125c0c4bf | |
simon | 0e5421a558 | |
simon | 5ec0636807 | |
simon | 6641db3e7e | |
simon | 77c26134d1 | |
simon | ef685ecb42 | |
simon | df1018e70c | |
simon | 0658424b94 | |
p0358 | d1b5644c63 | |
lamusmaser | 4936f2fdf2 | |
simon | 0fcd6c086b | |
simon | 34c5b2613c | |
simon | 6b246e7808 | |
simon | eda7528f7d | |
simon | fa46b30e61 | |
Krzysztof Olszewski | 2acaeb34db | |
simon | f8b0977a7f | |
simon | 4bab1a9815 | |
simon | ce7adfee3a | |
simon | a350d735be | |
simon | 3f86786ee8 | |
simon | d6c124aae8 | |
simon | 2d942fe9b5 | |
simon | f18e2ce599 | |
simon | 08350181f3 | |
simon | 114c7c2811 | |
simon | 83057d9e54 | |
simon | d73a131616 | |
simon | 1b5a810520 | |
simon | 2f685859cf | |
simon | cacf6e43b8 | |
simon | 991ab6a2bd | |
simon | 188eb9fc41 | |
simon | 4db3775eba | |
simon | 57841ea1c0 | |
simon | 3602ec4560 | |
simon | afa865edb1 | |
simon | bf5a93dae0 | |
simon | 4d65d45a59 | |
simon | c75ef72e74 | |
simon | 5d0689abba | |
simon | c0e36133a1 | |
simon | 89ac53083f |
|
@ -17,8 +17,5 @@ venv/
|
|||
# Unneeded graphics
|
||||
assets/*
|
||||
|
||||
# Unneeded docs
|
||||
docs/*
|
||||
|
||||
# for local testing only
|
||||
testing.sh
|
|
@ -6,7 +6,7 @@ body:
|
|||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for taking the time to help improve this project!
|
||||
Thanks for taking the time to help improve this project! Please read the [how to open an issue](https://github.com/tubearchivist/tubearchivist/blob/master/CONTRIBUTING.md#how-to-open-an-issue) guide carefully before continuing.
|
||||
|
||||
- type: checkboxes
|
||||
id: latest
|
||||
|
@ -15,7 +15,7 @@ body:
|
|||
options:
|
||||
- label: I'm running the latest version of Tube Archivist and have read the [release notes](https://github.com/tubearchivist/tubearchivist/releases/latest).
|
||||
required: true
|
||||
- label: I have read through the [wiki](https://github.com/tubearchivist/tubearchivist/wiki) and the [readme](https://github.com/tubearchivist/tubearchivist#installing-and-updating), particularly the [common errors](https://github.com/tubearchivist/tubearchivist#common-errors) section.
|
||||
- label: I have read the [how to open an issue](https://github.com/tubearchivist/tubearchivist/blob/master/CONTRIBUTING.md#how-to-open-an-issue) guide, particularly the [bug report](https://github.com/tubearchivist/tubearchivist/blob/master/CONTRIBUTING.md#bug-report) section.
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
|
|
|
@ -1,21 +1,19 @@
|
|||
name: Feature Request
|
||||
description: Create a new feature request
|
||||
description: I have an idea for a great addition to this project
|
||||
title: "[Feature Request]: "
|
||||
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for taking the time to help improve this project!
|
||||
Thanks for taking the time to help improve this project! This project is *very* selective with accepting new feature requests. Please read the [how to open an issue](https://github.com/tubearchivist/tubearchivist/blob/master/CONTRIBUTING.md#how-to-open-an-issue) guide carefully before continuing.
|
||||
|
||||
- type: checkboxes
|
||||
id: already
|
||||
attributes:
|
||||
label: Already implemented?
|
||||
label: "I've read the documentation"
|
||||
options:
|
||||
- label: I have read through the [wiki](https://github.com/tubearchivist/tubearchivist/wiki).
|
||||
required: true
|
||||
- label: I understand the [scope](https://github.com/tubearchivist/tubearchivist/wiki/FAQ) of this project and am aware of the [known limitations](https://github.com/tubearchivist/tubearchivist#known-limitations) and my idea is not already on the [roadmap](https://github.com/tubearchivist/tubearchivist#roadmap).
|
||||
- label: I have read the [how to open an issue](https://github.com/tubearchivist/tubearchivist/blob/master/CONTRIBUTING.md#how-to-open-an-issue) guide, particularly the [feature request](https://github.com/tubearchivist/tubearchivist/blob/master/CONTRIBUTING.md#feature-request) section.
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
|
@ -33,5 +31,4 @@ body:
|
|||
label: Your help is needed!
|
||||
description: This project is ambitious as it is, please contribute.
|
||||
options:
|
||||
- label: Yes I can help with this feature request!
|
||||
required: false
|
||||
- label: Yes I will work on this in the next few days or weeks.
|
||||
|
|
|
@ -13,9 +13,7 @@ body:
|
|||
attributes:
|
||||
label: Installation instructions
|
||||
options:
|
||||
- label: I have read and understand the [installation instructions](https://github.com/tubearchivist/tubearchivist#installing-and-updating).
|
||||
required: true
|
||||
- label: My issue is not described in the [common errors](https://github.com/tubearchivist/tubearchivist#common-errors) section.
|
||||
- label: I have read the [how to open an issue](https://github.com/tubearchivist/tubearchivist/blob/master/CONTRIBUTING.md#how-to-open-an-issue) guide, particularly the [installation help](https://github.com/tubearchivist/tubearchivist/blob/master/CONTRIBUTING.md#installation-help) section.
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
|
@ -40,6 +38,6 @@ body:
|
|||
attributes:
|
||||
label: Relevant log output
|
||||
description: Please copy and paste any relevant Docker logs. This will be automatically formatted into code, so no need for backticks.
|
||||
render: shell
|
||||
render: Shell
|
||||
validations:
|
||||
required: true
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
Thank you for taking the time to improve this project. Please take a look at the [How to make a Pull Request](https://github.com/tubearchivist/tubearchivist/blob/master/CONTRIBUTING.md#how-to-make-a-pull-request) section to help get your contribution merged.
|
||||
|
||||
You can delete this text before submitting.
|
|
@ -8,20 +8,7 @@ jobs:
|
|||
- uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.10'
|
||||
# note: this logic is duplicated in the `validate` function in ./deploy.sh
|
||||
# if you update this file, you should update that as well
|
||||
- run: pip install --upgrade pip wheel
|
||||
- run: pip install bandit black codespell flake8 flake8-bugbear
|
||||
flake8-comprehensions isort requests
|
||||
- run: ./deploy.sh validate
|
||||
# - run: black --check --diff --line-length 79 .
|
||||
# - run: codespell --skip="./.git,./package.json,./package-lock.json,./node_modules"
|
||||
# - run: flake8 . --count --max-complexity=10 --max-line-length=79
|
||||
# --show-source --statistics
|
||||
# - run: isort --check-only --line-length 79 --profile black .
|
||||
# - run: pip install -r tubearchivist/requirements.txt
|
||||
# - run: mkdir --parents --verbose .mypy_cache
|
||||
# - run: mypy --ignore-missing-imports --install-types --non-interactive .
|
||||
# - run: python3 tubearchivist/manage.py test || true
|
||||
# - run: shopt -s globstar && pyupgrade --py36-plus **/*.py || true
|
||||
# - run: safety check
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
# python testing cache
|
||||
__pycache__
|
||||
.venv
|
||||
|
||||
# django testing db
|
||||
db.sqlite3
|
||||
|
|
177
CONTRIBUTING.md
|
@ -1,17 +1,128 @@
|
|||
## Contributing to Tube Archivist
|
||||
|
||||
Welcome, and thanks for showing interest in improving Tube Archivist!
|
||||
If you haven't already, the best place to start is the README. This will give you an overview on what the project is all about.
|
||||
|
||||
## Report a bug
|
||||
## Table of Content
|
||||
- [How to open an issue](#how-to-open-an-issue)
|
||||
- [Bug Report](#bug-report)
|
||||
- [Feature Request](#feature-request)
|
||||
- [Installation Help](#installation-help)
|
||||
- [How to make a Pull Request](#how-to-make-a-pull-request)
|
||||
- [Contributions beyond the scope](#contributions-beyond-the-scope)
|
||||
- [User Scripts](#user-scripts)
|
||||
- [Improve to the Documentation](#improve-to-the-documentation)
|
||||
- [Development Environment](#development-environment)
|
||||
---
|
||||
|
||||
If you notice something is not working as expected, check to see if it has been previously reported in the [open issues](https://github.com/tubearchivist/tubearchivist/issues).
|
||||
If it has not yet been disclosed, go ahead and create an issue.
|
||||
If the issue doesn't move forward due to a lack of response, I assume it's solved and will close it after some time to keep the list fresh.
|
||||
## How to open an issue
|
||||
Please read this carefully before opening any [issue](https://github.com/tubearchivist/tubearchivist/issues) on GitHub.
|
||||
|
||||
## Wiki
|
||||
**Do**:
|
||||
- Do provide details and context, this matters a lot and makes it easier for people to help.
|
||||
- Do familiarize yourself with the project first, some questions answer themselves when using the project for some time. Familiarize yourself with the [Readme](https://github.com/tubearchivist/tubearchivist) and the [documentation](https://docs.tubearchivist.com/), this covers a lot of the common questions, particularly the [FAQ](https://docs.tubearchivist.com/faq/).
|
||||
- Do respond to questions within a day or two so issues can progress. If the issue doesn't move forward due to a lack of response, we'll assume it's solved and we'll close it after some time to keep the list fresh.
|
||||
|
||||
The wiki is where all user functions are documented in detail. These pages are mirrored into the **docs** folder of the repo. This allows for pull requests and all other features like regular code. Make any changes there, and I'll sync them with the wiki tab.
|
||||
**Don't**:
|
||||
- Don't open *duplicates*, that includes open and closed issues.
|
||||
- Don't open an issue for something that's already on the [roadmap](https://github.com/tubearchivist/tubearchivist#roadmap), this needs your help to implement it, not another issue.
|
||||
- Don't open an issue for something that's a [known limitation](https://github.com/tubearchivist/tubearchivist#known-limitations). These are *known* by definition and don't need another reminder. Some limitations may be solved in the future, maybe by you?
|
||||
- Don't overwrite the *issue template*, they are there for a reason. Overwriting that shows that you don't really care about this project. It shows that you have a misunderstanding how open source collaboration works and just want to push your ideas through. Overwriting the template may result in a ban.
|
||||
|
||||
### Bug Report
|
||||
Bug reports are highly welcome! This project has improved a lot due to your help by providing feedback when something doesn't work as expected. The developers can't possibly cover all edge cases in an ever changing environment like YouTube and yt-dlp.
|
||||
|
||||
Please keep in mind:
|
||||
- Docker logs are the easiest way to understand what's happening when something goes wrong, *always* provide the logs upfront.
|
||||
- Set the environment variable `DJANGO_DEBUG=True` to Tube Archivist and reproduce the bug for a better log output. Don't forget to remove that variable again after.
|
||||
- A bug that can't be reproduced, is difficult or sometimes even impossible to fix. Provide very clear steps *how to reproduce*.
|
||||
|
||||
### Feature Request
|
||||
This project needs your help to grow further. There is no shortage of ideas, see the open [issues on GH](https://github.com/tubearchivist/tubearchivist/issues?q=is%3Aopen+is%3Aissue+label%3Aenhancement) and the [roadmap](https://github.com/tubearchivist/tubearchivist#roadmap), what this project lacks is contributors interested in helping with overall improvements of the application. Focus is *not* on adding new features, but improving existing ones.
|
||||
|
||||
Existing ideas are easily *multiple years* worth of development effort, at least at current speed. This project is *very* selective with accepting new feature requests at this point.
|
||||
|
||||
Good feature requests usually fall into one or more of these categories:
|
||||
- You want to work on your own small scoped idea within the next few days or weeks.
|
||||
- Your idea is beneficial for a wide range of users, not just for you.
|
||||
- Your idea extends the current project by building on and improving existing functionality.
|
||||
- Your idea is quick and easy to implement, for an experienced as well as for a first time contributor.
|
||||
|
||||
Your request is likely going to be rejected if:
|
||||
- Your idea requires multiple days worth of development time and is unrealistic to be implemented any time soon.
|
||||
- There are already other ways to do what you are trying to do.
|
||||
- You are trying to do something that only applies to your platform, your specific workflow or your specific setup.
|
||||
- Your idea would fundamentally change how the project works or it wouldn't be able to be implemented with backwards compatibility.
|
||||
- Your idea is not a good fit for this project.
|
||||
|
||||
### Installation Help
|
||||
GitHub is most likely not the best place to ask for installation help. That's inherently individual and one on one.
|
||||
1. First step is always, help yourself. Start at the [Readme](https://github.com/tubearchivist/tubearchivist) or the additional platform specific installation pages in the [docs](https://docs.tubearchivist.com/).
|
||||
2. If that doesn't answer your question, open a `#support` thread on [Discord](https://www.tubearchivist.com/discord).
|
||||
3. Only if that is not an option, open an issue here.
|
||||
|
||||
IMPORTANT: When receiving help, contribute back to the community by improving the installation instructions with your newly gained knowledge.
|
||||
|
||||
---
|
||||
|
||||
## How to make a Pull Request
|
||||
|
||||
Thank you for contributing and helping improve this project. Focus for the foreseeable future is on improving and building on existing functionality, *not* on adding and expanding the application.
|
||||
|
||||
This is a quick checklist to help streamline the process:
|
||||
|
||||
- For **code changes**, make your PR against the [testing branch](https://github.com/tubearchivist/tubearchivist/tree/testing). That's where all active development happens. This simplifies the later merging into *master*, minimizes any conflicts and usually allows for easy and convenient *fast-forward* merging.
|
||||
- For **documentation changes**, make your PR directly against the *master* branch.
|
||||
- Show off your progress, even if not yet complete, by creating a [draft](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/about-pull-requests#draft-pull-requests) PR first and switch it as *ready* when you are ready.
|
||||
- Make sure all your code is linted and formatted correctly, see below. The automatic GH action unfortunately needs to be triggered manually by a maintainer for first time contributors, but will trigger automatically for existing contributors.
|
||||
|
||||
### Making changes to the JavaScript
|
||||
|
||||
The JavaScript does not require any build step; you just edit the files directly. However, there is config for eslint and prettier (a linter and formatter respectively); their use is recommended but not required. To use them, install `node`, run `npm i` from the root directory of this repository to install dependencies, then run `npm run lint` and `npm run format` to run eslint and prettier respectively.
|
||||
|
||||
### Code formatting and linting
|
||||
|
||||
To keep things clean and consistent for everybody, there is a github action setup to lint and check the changes. You can test your code locally first if you want. For example if you made changes in the **video** module, run
|
||||
|
||||
```shell
|
||||
./deploy.sh validate tubearchivist/home/src/index/video.py
|
||||
```
|
||||
|
||||
to validate your changes. If you omit the path, all the project files will get checked. This is subject to change as the codebase improves.
|
||||
|
||||
---
|
||||
|
||||
## Contributions beyond the scope
|
||||
|
||||
As you have read the [FAQ](https://docs.tubearchivist.com/faq/) and the [known limitations](https://github.com/tubearchivist/tubearchivist#known-limitations) and have gotten an idea what this project tries to do, there will be some obvious shortcomings that stand out, that have been explicitly excluded from the scope of this project, at least for the time being.
|
||||
|
||||
Extending the scope of this project will only be feasible with more [regular contributors](https://github.com/tubearchivist/tubearchivist/graphs/contributors) that are willing to help improve this project in the long run. Contributors that have an overall improvement of the project in mind and not just about implementing this *one* thing.
|
||||
|
||||
Small minor additions, or making a PR for a documented feature request or bug, even if that was and will be your only contribution to this project, are always welcome and is *not* what this is about.
|
||||
|
||||
Beyond that, general rules to consider:
|
||||
|
||||
- Maintainability is key: It's not just about implementing something and being done with it, it's about maintaining it, fixing bugs as they occur, improving on it and supporting it in the long run.
|
||||
- Others can do it better: Some problems have been solved by very talented developers. These things don't need to be reinvented again here in this project.
|
||||
- Develop for the 80%: New features and additions *should* be beneficial for 80% of the users. If you are trying to solve your own problem that only applies to you, maybe that would be better to do in your own fork or if possible by a standalone implementation using the API.
|
||||
- If all of that sounds too strict for you, as stated above, start becoming a regular contributor to this project.
|
||||
|
||||
---
|
||||
|
||||
## User Scripts
|
||||
Some of you might have created useful scripts or API integrations around this project. Sharing is caring! Please add a link to your script to the Readme [here](https://github.com/tubearchivist/tubearchivist#user-scripts).
|
||||
- Your repo should have a `LICENSE` file with one of the common open source licenses. People are expected to fork, adapt and build upon your great work.
|
||||
- Your script should not modify the *official* files of Tube Archivist. E.g. your symlink script should build links *outside* of your `/youtube` folder. Or your fancy script that creates a beautiful artwork gallery should do that *outside* of the `/cache` folder. Modifying the *official* files and folders of TA are probably not supported.
|
||||
- On the top of the repo you should have a mention and a link back to the Tube Archivist repo. Clearly state to **not** to open any issues on the main TA repo regarding your script.
|
||||
- Example template:
|
||||
- `[<user>/<repo>](https://linktoyourrepo.com)`: A short one line description.
|
||||
|
||||
---
|
||||
|
||||
## Improve to the Documentation
|
||||
|
||||
The documentation available at [docs.tubearchivist.com](https://docs.tubearchivist.com/) and is build from a separate repo [tubearchivist/docs](https://github.com/tubearchivist/docs). The Readme has additional instructions on how to make changes.
|
||||
|
||||
---
|
||||
|
||||
## Development Environment
|
||||
|
||||
|
@ -35,18 +146,18 @@ Make your changes locally and re-run `docker compose up --build`. The `Dockerfil
|
|||
|
||||
You may find it nice to run everything inside of a VM, though this is not necessary. There's a `deploy.sh` script which has some helpers for this use case. YMMV, this is what one of the developers does:
|
||||
|
||||
- Clone the repo, work on it with your favorite code editor in your local filesystem. *testing* branch is the where all the changes are happening, might be unstable and is WIP.
|
||||
- Then I have a VM running standard Ubuntu Server LTS with docker installed. The VM keeps my projects separate and offers convenient snapshot functionality. The VM also offers ways to simulate lowend environments by limiting CPU cores and memory. You can use this [Ansible Docker Ubuntu](https://github.com/bbilly1/ansible-playbooks) playbook to get started quickly. But you could also just run docker on your host system.
|
||||
- Clone the repo, work on it with your favorite code editor in your local filesystem. *testing* branch is where all the changes are happening, might be unstable and is WIP.
|
||||
- Then I have a VM running standard Ubuntu Server LTS with docker installed. The VM keeps my projects separate and offers convenient snapshot functionality. The VM also offers ways to simulate low end environments by limiting CPU cores and memory. You can use this [Ansible Docker Ubuntu](https://github.com/bbilly1/ansible-playbooks) playbook to get started quickly. But you could also just run docker on your host system.
|
||||
- I have my local DNS resolve `tubearchivist.local` to the IP of the VM for convenience. To deploy the latest changes and rebuild the application to the testing VM run:
|
||||
```bash
|
||||
./deploy.sh test
|
||||
```
|
||||
- The command above will call the docker build command with `--build-arg INSTALL_DEBUG=1` to install additional useful debug tools.
|
||||
- The `test` argument takes another optional argument to build for a specific architecture valid options are: `amd64`, `arm64` and `multi`, default is `amd64`.
|
||||
- This `deploy.sh` script is not meant to be universally usable for every possible environment but could serve as an idea on how to automatically rebuild containers to test changes - customize to your liking.
|
||||
- This `deploy.sh` script is not meant to be universally usable for every possible environment but could serve as an idea on how to automatically rebuild containers to test changes - customize to your liking.
|
||||
|
||||
## Working with Elasticsearch
|
||||
Additionally to the required services as listed in the example docker-compose file, the **Dev Tools** of [Kibana](https://www.elastic.co/guide/en/kibana/current/docker.html) are invaluable for running and testing Elasticsearch queries.
|
||||
### Working with Elasticsearch
|
||||
Additionally to the required services as listed in the example docker-compose file, the **Dev Tools** of [Kibana](https://www.elastic.co/guide/en/kibana/current/docker.html) are invaluable for running and testing Elasticsearch queries.
|
||||
|
||||
**Quick start**
|
||||
Generate your access token in Elasitcsearch:
|
||||
|
@ -56,45 +167,15 @@ bin/elasticsearch-service-tokens create elastic/kibana kibana
|
|||
|
||||
Example docker compose, use same version as for Elasticsearch:
|
||||
```yml
|
||||
kibana:
|
||||
image: docker.elastic.co/kibana/kibana:0.0.0
|
||||
container_name: kibana
|
||||
environment:
|
||||
services:
|
||||
kibana:
|
||||
image: docker.elastic.co/kibana/kibana:0.0.0
|
||||
container_name: kibana
|
||||
environment:
|
||||
- "ELASTICSEARCH_HOSTS=http://archivist-es:9200"
|
||||
- "ELASTICSEARCH_SERVICEACCOUNTTOKEN=<your-token-here>"
|
||||
ports:
|
||||
ports:
|
||||
- "5601:5601"
|
||||
```
|
||||
|
||||
If you want to run queries on the Elasticsearch container directly from your host with for example `curl` or something like *postman*, you might want to **publish** the port 9200 instead of just **exposing** it.
|
||||
|
||||
## Implementing a new feature
|
||||
|
||||
Do you see anything on the roadmap that you would like to take a closer look at but you are not sure, what's the best way to tackle that? Or anything not on there yet you'd like to implement but are not sure how? Reach out on Discord and we'll look into it together.
|
||||
|
||||
## Making changes
|
||||
|
||||
To fix a bug or implement a feature, fork the repository and make all changes to the testing branch. When ready, create a pull request.
|
||||
|
||||
## Making changes to the JavaScript
|
||||
|
||||
The JavaScript does not require any build step; you just edit the files directly. However, there is config for eslint and prettier (a linter and formatter respectively); their use is recommended but not required. To use them, install `node`, run `npm i` from the root directory of this repository to install dependencies, then run `npm run lint` and `npm run format` to run eslint and prettier respectively.
|
||||
|
||||
## Releases
|
||||
|
||||
There are three different docker tags:
|
||||
- **latest**: As the name implies is the latest multiarch release for regular usage.
|
||||
- **unstable**: Intermediate amd64 builds for quick testing and improved collaboration. Don't mix with a *latest* installation, for your testing environment only. This is untested and WIP and will have breaking changes between commits that might require a reset to resolve.
|
||||
- **semantic versioning**: There will be a handful named version tags that will also have a matching release and tag on github.
|
||||
|
||||
If you want to see what's in your container, checkout the matching release tag. A merge to **master** usually means a *latest* or *unstable* release. If you want to preview changes in your testing environment, pull the *unstable* tag or clone the repository and build the docker container with the Dockerfile from the **testing** branch.
|
||||
|
||||
## Code formatting and linting
|
||||
|
||||
To keep things clean and consistent for everybody, there is a github action setup to lint and check the changes. You can test your code locally first if you want. For example if you made changes in the **video** module, run
|
||||
|
||||
```shell
|
||||
./deploy.sh validate tubearchivist/home/src/index/video.py
|
||||
```
|
||||
|
||||
to validate your changes. If you omit the path, all the project files will get checked. This is subject to change as the codebase improves.
|
||||
|
|
38
Dockerfile
|
@ -1,20 +1,25 @@
|
|||
# multi stage to build tube archivist
|
||||
# first stage to build python wheel, copy into final image
|
||||
# build python wheel, download and extract ffmpeg, copy into final image
|
||||
|
||||
|
||||
# First stage to build python wheel
|
||||
FROM python:3.10.9-slim-bullseye AS builder
|
||||
FROM python:3.11.3-slim-bullseye AS builder
|
||||
ARG TARGETPLATFORM
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential gcc libldap2-dev libsasl2-dev libssl-dev
|
||||
build-essential gcc libldap2-dev libsasl2-dev libssl-dev git
|
||||
|
||||
# install requirements
|
||||
COPY ./tubearchivist/requirements.txt /requirements.txt
|
||||
RUN pip install --user -r requirements.txt
|
||||
|
||||
# build ffmpeg
|
||||
FROM python:3.11.3-slim-bullseye as ffmpeg-builder
|
||||
COPY docker_assets/ffmpeg_download.py ffmpeg_download.py
|
||||
RUN python ffmpeg_download.py $TARGETPLATFORM
|
||||
|
||||
# build final image
|
||||
FROM python:3.10.9-slim-bullseye as tubearchivist
|
||||
FROM python:3.11.3-slim-bullseye as tubearchivist
|
||||
|
||||
ARG TARGETPLATFORM
|
||||
ARG INSTALL_DEBUG
|
||||
|
@ -25,30 +30,15 @@ ENV PYTHONUNBUFFERED 1
|
|||
COPY --from=builder /root/.local /root/.local
|
||||
ENV PATH=/root/.local/bin:$PATH
|
||||
|
||||
# copy ffmpeg
|
||||
COPY --from=ffmpeg-builder ./ffmpeg/ffmpeg /usr/bin/ffmpeg
|
||||
COPY --from=ffmpeg-builder ./ffprobe/ffprobe /usr/bin/ffprobe
|
||||
|
||||
# install distro packages needed
|
||||
RUN apt-get clean && apt-get -y update && apt-get -y install --no-install-recommends \
|
||||
nginx \
|
||||
atomicparsley \
|
||||
curl \
|
||||
xz-utils && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# install patched ffmpeg build, default to linux64
|
||||
RUN if [ "$TARGETPLATFORM" = "linux/arm64" ] ; then \
|
||||
curl -s https://api.github.com/repos/yt-dlp/FFmpeg-Builds/releases/latest \
|
||||
| grep browser_download_url \
|
||||
| grep ".*master.*linuxarm64.*tar.xz" \
|
||||
| cut -d '"' -f 4 \
|
||||
| xargs curl -L --output ffmpeg.tar.xz ; \
|
||||
else \
|
||||
curl -s https://api.github.com/repos/yt-dlp/FFmpeg-Builds/releases/latest \
|
||||
| grep browser_download_url \
|
||||
| grep ".*master.*linux64.*tar.xz" \
|
||||
| cut -d '"' -f 4 \
|
||||
| xargs curl -L --output ffmpeg.tar.xz ; \
|
||||
fi && \
|
||||
tar -xf ffmpeg.tar.xz --strip-components=2 --no-anchored -C /usr/bin/ "ffmpeg" && \
|
||||
tar -xf ffmpeg.tar.xz --strip-components=2 --no-anchored -C /usr/bin/ "ffprobe" && \
|
||||
rm ffmpeg.tar.xz
|
||||
curl && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# install debug tools for testing environment
|
||||
RUN if [ "$INSTALL_DEBUG" ] ; then \
|
||||
|
|
316
README.md
|
@ -1,6 +1,6 @@
|
|||
![Tube Archivist](assets/tube-archivist-banner.jpg?raw=true "Tube Archivist Banner")
|
||||
![Tube Archivist](assets/tube-archivist-front.jpg?raw=true "Tube Archivist Banner")
|
||||
[*more screenshots and video*](SHOWCASE.MD)
|
||||
|
||||
<h1 align="center">Your self hosted YouTube media server</h1>
|
||||
<div align="center">
|
||||
<a href="https://github.com/bbilly1/tilefy" target="_blank"><img src="https://tiles.tilefy.me/t/tubearchivist-docker.png" alt="tubearchivist-docker" title="Tube Archivist Docker Pulls" height="50" width="190"/></a>
|
||||
<a href="https://github.com/bbilly1/tilefy" target="_blank"><img src="https://tiles.tilefy.me/t/tubearchivist-github-star.png" alt="tubearchivist-github-star" title="Tube Archivist GitHub Stars" height="50" width="190"/></a>
|
||||
|
@ -9,202 +9,84 @@
|
|||
</div>
|
||||
|
||||
## Table of contents:
|
||||
* [Wiki](https://github.com/tubearchivist/tubearchivist/wiki) with [FAQ](https://github.com/tubearchivist/tubearchivist/wiki/FAQ)
|
||||
* [Docs](https://docs.tubearchivist.com/) with [FAQ](https://docs.tubearchivist.com/faq/), and API documentation
|
||||
* [Core functionality](#core-functionality)
|
||||
* [Screenshots](#screenshots)
|
||||
* [Problem Tube Archivist tries to solve](#problem-tube-archivist-tries-to-solve)
|
||||
* [Connect](#connect)
|
||||
* [Extended Universe](#extended-universe)
|
||||
* [Installing and updating](#installing-and-updating)
|
||||
* [Resources](#resources)
|
||||
* [Installing](#installing)
|
||||
* [Getting Started](#getting-started)
|
||||
* [Known limitations](#known-limitations)
|
||||
* [Port Collisions](#port-collisions)
|
||||
* [Common Errors](#common-errors)
|
||||
* [Roadmap](#roadmap)
|
||||
* [Known limitations](#known-limitations)
|
||||
* [Donate](#donate)
|
||||
|
||||
------------------------
|
||||
|
||||
## Core functionality
|
||||
Once your YouTube video collection grows, it becomes hard to search and find a specific video. That's where Tube Archivist comes in: By indexing your video collection with metadata from YouTube, you can organize, search and enjoy your archived YouTube videos without hassle offline through a convenient web interface. This includes:
|
||||
* Subscribe to your favorite YouTube channels
|
||||
* Download Videos using **yt-dlp**
|
||||
* Index and make videos searchable
|
||||
* Play videos
|
||||
* Keep track of viewed and unviewed videos
|
||||
|
||||
## Tube Archivist on YouTube
|
||||
[![ibracorp-youtube-video-thumb](assets/tube-archivist-ibracorp-O8H8Z01c0Ys.jpg)](https://www.youtube.com/watch?v=O8H8Z01c0Ys)
|
||||
|
||||
## Screenshots
|
||||
![home screenshot](assets/tube-archivist-screenshot-home.png?raw=true "Tube Archivist Home")
|
||||
*Home Page*
|
||||
|
||||
![channels screenshot](assets/tube-archivist-screenshot-channels.png?raw=true "Tube Archivist Channels")
|
||||
*All Channels*
|
||||
|
||||
![single channel screenshot](assets/tube-archivist-screenshot-single-channel.png?raw=true "Tube Archivist Single Channel")
|
||||
*Single Channel*
|
||||
|
||||
![video page screenshot](assets/tube-archivist-screenshot-video.png?raw=true "Tube Archivist Video Page")
|
||||
*Video Page*
|
||||
|
||||
![video page screenshot](assets/tube-archivist-screenshot-download.png?raw=true "Tube Archivist Video Page")
|
||||
*Downloads Page*
|
||||
|
||||
## Problem Tube Archivist tries to solve
|
||||
Once your YouTube video collection grows, it becomes hard to search and find a specific video. That's where Tube Archivist comes in: By indexing your video collection with metadata from YouTube, you can organize, search and enjoy your archived YouTube videos without hassle offline through a convenient web interface.
|
||||
|
||||
## Connect
|
||||
## Resources
|
||||
- [Discord](https://www.tubearchivist.com/discord): Connect with us on our Discord server.
|
||||
- [r/TubeArchivist](https://www.reddit.com/r/TubeArchivist/): Join our Subreddit.
|
||||
|
||||
## Extended Universe
|
||||
- [Browser Extension](https://github.com/tubearchivist/browser-extension) Tube Archivist Companion, for [Firefox](https://addons.mozilla.org/addon/tubearchivist-companion/) and [Chrome](https://chrome.google.com/webstore/detail/tubearchivist-companion/jjnkmicfnfojkkgobdfeieblocadmcie)
|
||||
- [Tube Archivist Metrics](https://github.com/tubearchivist/tubearchivist-metrics) to create statistics in Prometheus/OpenMetrics format.
|
||||
- [Jellyfin Plugin](https://github.com/tubearchivist/tubearchivist-jf-plugin): Add your videos to Jellyfin
|
||||
- [Plex Plugin](https://github.com/tubearchivist/tubearchivist-plex): Add your videos to Plex
|
||||
|
||||
## Installing and updating
|
||||
There's dedicated user-contributed install steps under [docs/Installation.md](./docs/Installation.md) for podman, Unraid, Truenas and Synology which you can use instead of this section if you happen to be using one of those. Otherwise, continue on.
|
||||
## Installing
|
||||
For minimal system requirements, the Tube Archivist stack needs around 2GB of available memory for a small testing setup and around 4GB of available memory for a mid to large sized installation. Minimal with dual core with 4 threads, better quad core plus.
|
||||
This project requires docker. Ensure it is installed and running on your system.
|
||||
|
||||
For minimal system requirements, the Tube Archivist stack needs around 2GB of available memory for a small testing setup and around 4GB of available memory for a mid to large sized installation. Minimal with dual core with 4 threads, better quad core plus.
|
||||
The documentation has additional user provided instructions for [Unraid](https://docs.tubearchivist.com/installation/unraid/), [Synology](https://docs.tubearchivist.com/installation/synology/), [Podman](https://docs.tubearchivist.com/installation/podman/) and [True NAS](https://docs.tubearchivist.com/installation/truenas-scale/).
|
||||
|
||||
Note for arm64 hosts: The Tube Archivist container is multi arch, so is Elasticsearch. RedisJSON doesn't offer arm builds, but you can use the image `bbilly1/rejson`, an unofficial rebuild for arm64.
|
||||
The instructions here should get you up and running quickly, for Docker beginners and full explanation about each environment variable, see the [docs](https://docs.tubearchivist.com/installation/docker-compose/).
|
||||
|
||||
This project requires docker. Ensure it is installed and running on your system.
|
||||
Take a look at the example [docker-compose.yml](https://github.com/tubearchivist/tubearchivist/blob/master/docker-compose.yml) and configure the required environment variables.
|
||||
|
||||
Save the [docker-compose.yml](./docker-compose.yml) file from this reposity somewhere permanent on your system, keeping it named `docker-compose.yml`. You'll need to refer to it whenever starting this application.
|
||||
**TubeArchivist**:
|
||||
| Environment Var | Value | |
|
||||
| ----------- | ----------- | ----------- |
|
||||
| TA_HOST | Server IP or hostname | Required |
|
||||
| TA_USERNAME | Initial username when logging into TA | Required |
|
||||
| TA_PASSWORD | Initial password when logging into TA | Required |
|
||||
| ELASTIC_PASSWORD | Password for ElasticSearch | Required |
|
||||
| REDIS_HOST | Hostname for Redis | Required |
|
||||
| TZ | Set your timezone for the scheduler | Required |
|
||||
| TA_PORT | Overwrite Nginx port | Optional |
|
||||
| TA_UWSGI_PORT | Overwrite container internal uwsgi port | Optional |
|
||||
| TA_ENABLE_AUTH_PROXY | Enables support for forwarding auth in reverse proxies | [Read more](https://docs.tubearchivist.com/configuration/forward-auth/) |
|
||||
| TA_AUTH_PROXY_USERNAME_HEADER | Header containing username to log in | Optional |
|
||||
| TA_AUTH_PROXY_LOGOUT_URL | Logout URL for forwarded auth | Optional |
|
||||
| ES_URL | URL That ElasticSearch runs on | Optional |
|
||||
| ES_DISABLE_VERIFY_SSL | Disable ElasticSearch SSL certificate verification | Optional |
|
||||
| ES_SNAPSHOT_DIR | Custom path where elastic search stores snapshots for master/data nodes | Optional |
|
||||
| HOST_GID | Allow TA to own the video files instead of container user | Optional |
|
||||
| HOST_UID | Allow TA to own the video files instead of container user | Optional |
|
||||
| ELASTIC_USER | Change the default ElasticSearch user | Optional |
|
||||
| REDIS_PORT | Port that Redis runs on | Optional |
|
||||
| TA_LDAP | Configure TA to use LDAP Authentication | [Read more](https://docs.tubearchivist.com/configuration/ldap/) |
|
||||
| ENABLE_CAST | Enable casting support | [Read more](https://docs.tubearchivist.com/configuration/cast/) |
|
||||
| DJANGO_DEBUG | Return additional error messages, for debug only | |
|
||||
|
||||
Edit the following values from that file:
|
||||
- under `tubearchivist`->`environment`:
|
||||
- `HOST_UID`: your UID, if you want TubeArchivist to create files with your UID. Remove if you are OK with files being owned by the the container user.
|
||||
- `HOST_GID`: as above but GID.
|
||||
- `TA_HOST`: change it to the address of the machine you're running this on. This can be an IP address or a domain name.
|
||||
- `TA_PASSWORD`: pick a password to use when logging in.
|
||||
- `ELASTIC_PASSWORD`: pick a password for the elastic service. You won't need to type this yourself.
|
||||
- `TZ`: your time zone. If you don't know yours, you can look it up [here](https://www.timezoneconverter.com/cgi-bin/findzone/findzone).
|
||||
- under `archivist-es`->`environment`:
|
||||
- `"ELASTIC_PASSWORD=verysecret"`: change `verysecret` to match the ELASTIC_PASSWORD you picked above.
|
||||
**ElasticSearch**
|
||||
| Environment Var | Value | State |
|
||||
| ----------- | ----------- | ----------- |
|
||||
| ELASTIC_PASSWORD | Matching password `ELASTIC_PASSWORD` from TubeArchivist | Required |
|
||||
| http.port | Change the port ElasticSearch runs on | Optional |
|
||||
|
||||
By default Docker will store all data, including downloaded data, in its own data-root directory (which you can find by running `docker info` and looking for the "Docker Root Dir"). If you want to use other locations, you can replace the `media:`, `cache:`, `redis:`, and `es:` volume names with absolute paths; if you do, remove them from the `volumes:` list at the bottom of the file.
|
||||
|
||||
From a terminal, `cd` into the directory you saved the `docker-compose.yml` file in and run `docker compose up --detach`. The first time you do this it will download the appropriate images, which can take a minute.
|
||||
## Update
|
||||
Always use the *latest* (the default) or a named semantic version tag for the docker images. The *unstable* tags are only for your testing environment, there might not be an update path for these testing builds.
|
||||
|
||||
You can follow the logs with `docker compose logs -f`. Once it's ready it will print something like `celery@1234567890ab ready`. At this point you should be able to go to `http://your-host:8000` and log in with the `TA_USER`/`TA_PASSWORD` credentials.
|
||||
You will see the current version number of **Tube Archivist** in the footer of the interface. There is a daily version check task querying tubearchivist.com, notifying you of any new releases in the footer. To update, you need to update the docker images, the method for which will depend on your platform. For example, if you're using `docker-compose`, run `docker-compose pull` and then restart with `docker-compose up -d`. After updating, check the footer to verify you are running the expected version.
|
||||
|
||||
You can bring the application down by running `docker compose down` in the same directory.
|
||||
|
||||
Use the *latest* (the default) or a named semantic version tag for the docker images. The *unstable* tag is for intermediate testing and as the name implies, is **unstable** and not be used on your main installation but in a [testing environment](CONTRIBUTING.md).
|
||||
|
||||
## Installation Details
|
||||
|
||||
Tube Archivist depends on three main components split up into separate docker containers:
|
||||
|
||||
### Tube Archivist
|
||||
The main Python application that displays and serves your video collection, built with Django.
|
||||
- Serves the interface on port `8000`
|
||||
- Needs a volume for the video archive at **/youtube**
|
||||
- And another volume to save application data at **/cache**.
|
||||
- The environment variables `ES_URL` and `REDIS_HOST` are needed to tell Tube Archivist where Elasticsearch and Redis respectively are located.
|
||||
- The environment variables `HOST_UID` and `HOST_GID` allows Tube Archivist to `chown` the video files to the main host system user instead of the container user. Those two variables are optional, not setting them will disable that functionality. That might be needed if the underlying filesystem doesn't support `chown` like *NFS*.
|
||||
- Set the environment variable `TA_HOST` to match with the system running Tube Archivist. This can be a domain like *example.com*, a subdomain like *ta.example.com* or an IP address like *192.168.1.20*, add without the protocol and without the port. You can add multiple hostnames separated with a space. Any wrong configurations here will result in a `Bad Request (400)` response.
|
||||
- Change the environment variables `TA_USERNAME` and `TA_PASSWORD` to create the initial credentials.
|
||||
- `ELASTIC_PASSWORD` is for the password for Elasticsearch. The environment variable `ELASTIC_USER` is optional, should you want to change the username from the default *elastic*.
|
||||
- For the scheduler to know what time it is, set your timezone with the `TZ` environment variable, defaults to *UTC*.
|
||||
- Set the environment variable `ENABLE_CAST=True` to send videos to your cast device, [read more](#enable-cast).
|
||||
|
||||
### Port collisions
|
||||
If you have a collision on port `8000`, best solution is to use dockers *HOST_PORT* and *CONTAINER_PORT* distinction: To for example change the interface to port 9000 use `9000:8000` in your docker-compose file.
|
||||
|
||||
Should that not be an option, the Tube Archivist container takes these two additional environment variables:
|
||||
- **TA_PORT**: To actually change the port where nginx listens, make sure to also change the ports value in your docker-compose file.
|
||||
- **TA_UWSGI_PORT**: To change the default uwsgi port 8080 used for container internal networking between uwsgi serving the django application and nginx.
|
||||
|
||||
Changing any of these two environment variables will change the files *nginx.conf* and *uwsgi.ini* at startup using `sed` in your container.
|
||||
|
||||
### LDAP Authentication
|
||||
You can configure LDAP with the following environment variables:
|
||||
|
||||
- `TA_LDAP` (ex: `true`) Set to anything besides empty string to use LDAP authentication **instead** of local user authentication.
|
||||
- `TA_LDAP_SERVER_URI` (ex: `ldap://ldap-server:389`) Set to the uri of your LDAP server.
|
||||
- `TA_LDAP_DISABLE_CERT_CHECK` (ex: `true`) Set to anything besides empty string to disable certificate checking when connecting over LDAPS.
|
||||
- `TA_LDAP_BIND_DN` (ex: `uid=search-user,ou=users,dc=your-server`) DN of the user that is able to perform searches on your LDAP account.
|
||||
- `TA_LDAP_BIND_PASSWORD` (ex: `yoursecretpassword`) Password for the search user.
|
||||
- `TA_LDAP_USER_ATTR_MAP_USERNAME` (default: `uid`) Bind attribute used to map LDAP user's username
|
||||
- `TA_LDAP_USER_ATTR_MAP_PERSONALNAME` (default: `givenName`) Bind attribute used to match LDAP user's First Name/Personal Name.
|
||||
- `TA_LDAP_USER_ATTR_MAP_SURNAME` (default: `sn`) Bind attribute used to match LDAP user's Last Name/Surname.
|
||||
- `TA_LDAP_USER_ATTR_MAP_EMAIL` (default: `mail`) Bind attribute used to match LDAP user's EMail address
|
||||
- `TA_LDAP_USER_BASE` (ex: `ou=users,dc=your-server`) Search base for user filter.
|
||||
- `TA_LDAP_USER_FILTER` (ex: `(objectClass=user)`) Filter for valid users. Login usernames are matched using the attribute specified in `TA_LDAP_USER_ATTR_MAP_USERNAME` and should not be specified in this filter.
|
||||
|
||||
When LDAP authentication is enabled, django passwords (e.g. the password defined in TA_PASSWORD), will not allow you to login, only the LDAP server is used.
|
||||
|
||||
### Enable Cast
|
||||
As Cast doesn't support authentication, enabling this functionality will make your static files like artwork and media files accessible by guessing the links. That's read only access, the application itself is still protected.
|
||||
|
||||
Enabling this integration will embed an additional third party JS library from **Google**.
|
||||
|
||||
**Requirements**:
|
||||
- HTTPS: To use the cast integration HTTPS needs to be enabled, which can be done using a reverse proxy. This is a requirement by Google as communication to the cast device is required to be encrypted, but the content itself is not.
|
||||
- Supported Browser: A supported browser is required for this integration such as Google Chrome. Other browsers, especially Chromium-based browsers, may support casting by enabling it in the settings.
|
||||
- Subtitles: Subtitles are supported however they do not work out of the box and require additional configuration. Due to requirements by Google, to use subtitles you need additional headers which will need to be configured in your reverse proxy. See this [page](https://developers.google.com/cast/docs/web_sender/advanced#cors_requirements) for the specific requirements.
|
||||
You need the following headers: Content-Type, Accept-Encoding, and Range. Note that the last two headers, Accept-Encoding and Range, are additional headers that you may not have needed previously.
|
||||
Wildcards "*" can not be used for the Access-Control-Allow-Origin header. If the page has protected media content, it must use a domain instead of a wildcard.
|
||||
|
||||
### Elasticsearch
|
||||
**Note**: Tube Archivist depends on Elasticsearch 8.
|
||||
|
||||
Use `bbilly1/tubearchivist-es` to automatically get the recommended version, or use the official image with the version tag in the docker-compose file.
|
||||
|
||||
Stores video meta data and makes everything searchable. Also keeps track of the download queue.
|
||||
- Needs to be accessible over the default port `9200`
|
||||
- Needs a volume at **/usr/share/elasticsearch/data** to store data
|
||||
|
||||
Follow the [documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/docker.html) for additional installation details.
|
||||
|
||||
### Redis JSON
|
||||
Functions as a cache and temporary link between the application and the file system. Used to store and display messages and configuration variables.
|
||||
- Needs to be accessible over the default port `6379`
|
||||
- Needs a volume at **/data** to make your configuration changes permanent.
|
||||
|
||||
### Redis on a custom port
|
||||
For some architectures it might be required to run Redis JSON on a nonstandard port. To for example change the Redis port to **6380**, set the following values:
|
||||
- Set the environment variable `REDIS_PORT=6380` to the *tubearchivist* service.
|
||||
- For the *archivist-redis* service, change the ports to `6380:6380`
|
||||
- Additionally set the following value to the *archivist-redis* service: `command: --port 6380 --loadmodule /usr/lib/redis/modules/rejson.so`
|
||||
|
||||
### Updating Tube Archivist
|
||||
You will see the current version number of **Tube Archivist** in the footer of the interface. There is a daily version check task querying tubearchivist.com, notifying you of any new releases in the footer. To take advantage of the latest fixes and improvements, make sure you are running the *latest and greatest*.
|
||||
* There can be breaking changes between updates, particularly as the application grows, new environment variables or settings might be required for you to set in the your docker-compose file. *Always* check the **release notes**: Any breaking changes will be marked there.
|
||||
* All testing and development is done with the Elasticsearch version number as mentioned in the provided *docker-compose.yml* file. This will be updated when a new release of Elasticsearch is available. Running an older version of Elasticsearch is most likely not going to result in any issues, but it's still recommended to run the same version as mentioned. Use `bbilly1/tubearchivist-es` to automatically get the recommended version.
|
||||
|
||||
### Helm charts
|
||||
|
||||
There is a Helm Chart available at https://github.com/insuusvenerati/helm-charts. Mostly self-explanatory but feel free to ask questions in the discord / subreddit.
|
||||
|
||||
## Common Errors
|
||||
### vm.max_map_count
|
||||
**Elastic Search** in Docker requires the kernel setting of the host machine `vm.max_map_count` to be set to at least 262144.
|
||||
|
||||
To temporary set the value run:
|
||||
```
|
||||
sudo sysctl -w vm.max_map_count=262144
|
||||
```
|
||||
|
||||
To apply the change permanently depends on your host operating system:
|
||||
- For example on Ubuntu Server add `vm.max_map_count = 262144` to the file */etc/sysctl.conf*.
|
||||
- On Arch based systems create a file */etc/sysctl.d/max_map_count.conf* with the content `vm.max_map_count = 262144`.
|
||||
- On any other platform look up in the documentation on how to pass kernel parameters.
|
||||
|
||||
### Permissions for elasticsearch
|
||||
If you see a message similar to `Unable to access 'path.repo' (/usr/share/elasticsearch/data/snapshot)` or `failed to obtain node locks, tried [/usr/share/elasticsearch/data]` and `maybe these locations are not writable` when initially starting elasticsearch, that probably means the container is not allowed to write files to the volume.
|
||||
To fix that issue, shutdown the container and on your host machine run:
|
||||
```
|
||||
chown 1000:0 -R /path/to/mount/point
|
||||
```
|
||||
This will match the permissions with the **UID** and **GID** of elasticsearch process within the container and should fix the issue.
|
||||
|
||||
### Disk usage
|
||||
The Elasticsearch index will turn to *read only* if the disk usage of the container goes above 95% until the usage drops below 90% again, you will see error messages like `disk usage exceeded flood-stage watermark`, [link](https://github.com/tubearchivist/tubearchivist#disk-usage).
|
||||
|
||||
Similar to that, TubeArchivist will become all sorts of messed up when running out of disk space. There are some error messages in the logs when that happens, but it's best to make sure to have enough disk space before starting to download.
|
||||
- This project is tested for updates between one or two releases maximum. Further updates back may or may not be supported and you might have to reset your index and configurations to update. Ideally apply new updates at least once per month.
|
||||
- There can be breaking changes between updates, particularly as the application grows, new environment variables or settings might be required for you to set in the your docker-compose file. *Always* check the **release notes**: Any breaking changes will be marked there.
|
||||
- All testing and development is done with the Elasticsearch version number as mentioned in the provided *docker-compose.yml* file. This will be updated when a new release of Elasticsearch is available. Running an older version of Elasticsearch is most likely not going to result in any issues, but it's still recommended to run the same version as mentioned. Use `bbilly1/tubearchivist-es` to automatically get the recommended version.
|
||||
|
||||
## Getting Started
|
||||
1. Go through the **settings** page and look at the available options. Particularly set *Download Format* to your desired video quality before downloading. **Tube Archivist** downloads the best available quality by default. To support iOS or MacOS and some other browsers a compatible format must be specified. For example:
|
||||
|
@ -215,24 +97,74 @@ bestvideo[vcodec*=avc1]+bestaudio[acodec*=mp4a]/mp4
|
|||
3. On the **downloads** page, click on *Rescan subscriptions* to add videos from the subscribed channels to your Download queue or click on *Add to download queue* to manually add Video IDs, links, channels or playlists.
|
||||
4. Click on *Start download* and let **Tube Archivist** to it's thing.
|
||||
5. Enjoy your archived collection!
|
||||
|
||||
|
||||
|
||||
### Port Collisions
|
||||
If you have a collision on port `8000`, best solution is to use dockers *HOST_PORT* and *CONTAINER_PORT* distinction: To for example change the interface to port 9000 use `9000:8000` in your docker-compose file.
|
||||
|
||||
For more information on port collisions, check the docs.
|
||||
|
||||
## Common Errors
|
||||
Here is a list of common errors and their solutions.
|
||||
|
||||
### `vm.max_map_count`
|
||||
**Elastic Search** in Docker requires the kernel setting of the host machine `vm.max_map_count` to be set to at least 262144.
|
||||
|
||||
To temporary set the value run:
|
||||
```
|
||||
sudo sysctl -w vm.max_map_count=262144
|
||||
```
|
||||
To apply the change permanently depends on your host operating system:
|
||||
|
||||
- For example on Ubuntu Server add `vm.max_map_count = 262144` to the file `/etc/sysctl.conf`.
|
||||
- On Arch based systems create a file `/etc/sysctl.d/max_map_count.conf` with the content `vm.max_map_count = 262144`.
|
||||
- On any other platform look up in the documentation on how to pass kernel parameters.
|
||||
|
||||
|
||||
### Permissions for elasticsearch
|
||||
If you see a message similar to `Unable to access 'path.repo' (/usr/share/elasticsearch/data/snapshot)` or `failed to obtain node locks, tried [/usr/share/elasticsearch/data]` and `maybe these locations are not writable` when initially starting elasticsearch, that probably means the container is not allowed to write files to the volume.
|
||||
To fix that issue, shutdown the container and on your host machine run:
|
||||
```
|
||||
chown 1000:0 -R /path/to/mount/point
|
||||
```
|
||||
This will match the permissions with the **UID** and **GID** of elasticsearch process within the container and should fix the issue.
|
||||
|
||||
|
||||
### Disk usage
|
||||
The Elasticsearch index will turn to ***read only*** if the disk usage of the container goes above 95% until the usage drops below 90% again, you will see error messages like `disk usage exceeded flood-stage watermark`.
|
||||
|
||||
Similar to that, TubeArchivist will become all sorts of messed up when running out of disk space. There are some error messages in the logs when that happens, but it's best to make sure to have enough disk space before starting to download.
|
||||
|
||||
## `error setting rlimit`
|
||||
If you are seeing errors like `failed to create shim: OCI runtime create failed` and `error during container init: error setting rlimits`, this means docker can't set these limits, usually because they are set at another place or are incompatible because of other reasons. Solution is to remove the `ulimits` key from the ES container in your docker compose and start again.
|
||||
|
||||
This can happen if you have nested virtualizations, e.g. LXC running Docker in Proxmox.
|
||||
|
||||
## Known limitations
|
||||
- Video files created by Tube Archivist need to be playable in your browser of choice. Not every codec is compatible with every browser and might require some testing with format selection.
|
||||
- Every limitation of **yt-dlp** will also be present in Tube Archivist. If **yt-dlp** can't download or extract a video for any reason, Tube Archivist won't be able to either.
|
||||
- There is no flexibility in naming of the media files.
|
||||
|
||||
## Roadmap
|
||||
We have come far, nonetheless we are not short of ideas on how to improve and extend this project. Issues waiting for you to be tackled in no particular order:
|
||||
|
||||
- [ ] User roles
|
||||
- [ ] Audio download
|
||||
- [ ] Podcast mode to serve channel as mp3
|
||||
- [ ] Implement [PyFilesystem](https://github.com/PyFilesystem/pyfilesystem2) for flexible video storage
|
||||
- [ ] Implement [Apprise](https://github.com/caronc/apprise) for notifications ([#97](https://github.com/tubearchivist/tubearchivist/issues/97))
|
||||
- [ ] User created playlists, random and repeat controls ([#108](https://github.com/tubearchivist/tubearchivist/issues/108), [#220](https://github.com/tubearchivist/tubearchivist/issues/220))
|
||||
- [ ] Random and repeat controls ([#108](https://github.com/tubearchivist/tubearchivist/issues/108), [#220](https://github.com/tubearchivist/tubearchivist/issues/220))
|
||||
- [ ] Auto play or play next link ([#226](https://github.com/tubearchivist/tubearchivist/issues/226))
|
||||
- [ ] Multi language support
|
||||
- [ ] Show total video downloaded vs total videos available in channel
|
||||
- [ ] Add statistics of index
|
||||
- [ ] Download speed schedule ([#198](https://github.com/tubearchivist/tubearchivist/issues/198))
|
||||
- [ ] Auto ignore videos by keyword ([#163](https://github.com/tubearchivist/tubearchivist/issues/163))
|
||||
- [ ] Download or Ignore videos by keyword ([#163](https://github.com/tubearchivist/tubearchivist/issues/163))
|
||||
- [ ] Custom searchable notes to videos, channels, playlists ([#144](https://github.com/tubearchivist/tubearchivist/issues/144))
|
||||
- [ ] Search comments
|
||||
- [ ] Search download queue
|
||||
- [ ] Configure shorts, streams and video sizes per channel
|
||||
|
||||
Implemented:
|
||||
- [X] User created playlists [2024-04-10]
|
||||
- [X] Add statistics of index [2023-09-03]
|
||||
- [X] Implement [Apprise](https://github.com/caronc/apprise) for notifications [2023-08-05]
|
||||
- [X] Download video comments [2022-11-30]
|
||||
- [X] Show similar videos on video page [2022-11-30]
|
||||
- [X] Implement complete offline media file import from json file [2022-08-20]
|
||||
|
@ -256,11 +188,18 @@ Implemented:
|
|||
- [X] Backup and restore [2021-09-22]
|
||||
- [X] Scan your file system to index already downloaded videos [2021-09-14]
|
||||
|
||||
## Known limitations
|
||||
- Video files created by Tube Archivist need to be playable in your browser of choice. Not every codec is compatible with every browser and might require some testing with format selection.
|
||||
- Every limitation of **yt-dlp** will also be present in Tube Archivist. If **yt-dlp** can't download or extract a video for any reason, Tube Archivist won't be able to either.
|
||||
- There is currently no flexibility in naming of the media files.
|
||||
## User Scripts
|
||||
This is a list of useful user scripts, generously created from folks like you to extend this project and its functionality. Make sure to check the respective repository links for detailed license information.
|
||||
|
||||
This is your time to shine, [read this](https://github.com/tubearchivist/tubearchivist/blob/master/CONTRIBUTING.md#user-scripts) then open a PR to add your script here.
|
||||
|
||||
- [danieljue/ta_dl_page_script](https://github.com/danieljue/ta_dl_page_script): Helper browser script to prioritize a channels' videos in download queue.
|
||||
- [dot-mike/ta-scripts](https://github.com/dot-mike/ta-scripts): A collection of personal scripts for managing TubeArchivist.
|
||||
- [DarkFighterLuke/ta_base_url_nginx](https://gist.github.com/DarkFighterLuke/4561b6bfbf83720493dc59171c58ac36): Set base URL with Nginx when you can't use subdomains.
|
||||
- [lamusmaser/ta_migration_helper](https://github.com/lamusmaser/ta_migration_helper): Advanced helper script for migration issues to TubeArchivist v0.4.4 or later.
|
||||
- [lamusmaser/create_info_json](https://gist.github.com/lamusmaser/837fb58f73ea0cad784a33497932e0dd): Script to generate `.info.json` files using `ffmpeg` collecting information from downloaded videos.
|
||||
- [lamusmaser/ta_fix_for_video_redirection](https://github.com/lamusmaser/ta_fix_for_video_redirection): Script to fix videos that were incorrectly indexed by YouTube's "Video is Unavailable" response.
|
||||
- [RoninTech/ta-helper](https://github.com/RoninTech/ta-helper): Helper script to provide a symlink association to reference TubeArchivist videos with their original titles.
|
||||
|
||||
## Donate
|
||||
The best donation to **Tube Archivist** is your time, take a look at the [contribution page](CONTRIBUTING.md) to get started.
|
||||
|
@ -270,6 +209,20 @@ Second best way to support the development is to provide for caffeinated beverag
|
|||
* [Paypal Subscription](https://www.paypal.com/webapps/billing/plans/subscribe?plan_id=P-03770005GR991451KMFGVPMQ) for a monthly coffee
|
||||
* [ko-fi.com](https://ko-fi.com/bbilly1) for an alternative platform
|
||||
|
||||
## Notable mentions
|
||||
This is a selection of places where this project has been featured on reddit, in the news, blogs or any other online media, newest on top.
|
||||
* **ycombinator**: Tube Archivist on Hackernews front page, [2023-07-16][[link](https://news.ycombinator.com/item?id=36744395)]
|
||||
* **linux-community.de**: Tube Archivist bringt Ordnung in die Youtube-Sammlung, [German][2023-05-01][[link](https://www.linux-community.de/ausgaben/linuxuser/2023/05/tube-archivist-bringt-ordnung-in-die-youtube-sammlung/)]
|
||||
* **noted.lol**: Dev Debrief, An Interview With the Developer of Tube Archivist, [2023-03-30] [[link](https://noted.lol/dev-debrief-tube-archivist/)]
|
||||
* **console.substack.com**: Interview With Simon of Tube Archivist, [2023-01-29] [[link](https://console.substack.com/p/console-142#%C2%A7interview-with-simon-of-tube-archivist)]
|
||||
* **reddit.com**: Tube Archivist v0.3.0 - Now Archiving Comments, [2022-12-02] [[link](https://www.reddit.com/r/selfhosted/comments/zaonzp/tube_archivist_v030_now_archiving_comments/)]
|
||||
* **reddit.com**: Tube Archivist v0.2 - Now with Full Text Search, [2022-07-24] [[link](https://www.reddit.com/r/selfhosted/comments/w6jfa1/tube_archivist_v02_now_with_full_text_search/)]
|
||||
* **noted.lol**: How I Control What Media My Kids Watch Using Tube Archivist, [2022-03-27] [[link](https://noted.lol/how-i-control-what-media-my-kids-watch-using-tube-archivist/)]
|
||||
* **thehomelab.wiki**: Tube Archivist - A Youtube-DL Alternative on Steroids, [2022-01-27] [[link](https://thehomelab.wiki/books/news/page/tube-archivist-a-youtube-dl-alternative-on-steroids)]
|
||||
* **reddit.com**: Celebrating TubeArchivist v0.1, [2022-01-09] [[link](https://www.reddit.com/r/selfhosted/comments/rzh084/celebrating_tubearchivist_v01/)]
|
||||
* **linuxunplugged.com**: Pick: tubearchivist — Your self-hosted YouTube media server, [2021-09-11] [[link](https://linuxunplugged.com/425)] and [2021-10-05] [[link](https://linuxunplugged.com/426)]
|
||||
* **reddit.com**: Introducing Tube Archivist, your self hosted Youtube media server, [2021-09-12] [[link](https://www.reddit.com/r/selfhosted/comments/pmj07b/introducing_tube_archivist_your_self_hosted/)]
|
||||
|
||||
|
||||
## Sponsor
|
||||
Big thank you to [Digitalocean](https://www.digitalocean.com/) for generously donating credit for the tubearchivist.com VPS and buildserver.
|
||||
|
@ -278,3 +231,4 @@ Big thank you to [Digitalocean](https://www.digitalocean.com/) for generously do
|
|||
<img src="https://opensource.nyc3.cdn.digitaloceanspaces.com/attribution/assets/PoweredByDO/DO_Powered_by_Badge_blue.svg" width="201px">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
## Tube Archivist on YouTube
|
||||
[![ibracorp-youtube-video-thumb](assets/tube-archivist-ibracorp-O8H8Z01c0Ys.jpg)](https://www.youtube.com/watch?v=O8H8Z01c0Ys)
|
||||
Video featuring Tube Archivist generously created by [IBRACORP](https://www.youtube.com/@IBRACORP).
|
||||
|
||||
## Screenshots
|
||||
![login screenshot](assets/tube-archivist-login.png?raw=true "Tube Archivist Login")
|
||||
*Login Page*: Secure way to access your media collection.
|
||||
|
||||
![home screenshot](assets/tube-archivist-home.png?raw=true "Tube Archivist Home")
|
||||
*Home Page*: Your recent videos, continue watching incomplete videos.
|
||||
|
||||
![channels screenshot](assets/tube-archivist-channels.png?raw=true "Tube Archivist Channels")
|
||||
*All Channels*: A list of all your indexed channels, filtered by subscribed only.
|
||||
|
||||
![single channel screenshot](assets/tube-archivist-single-channel.png?raw=true "Tube Archivist Single Channel")
|
||||
*Single Channel*: Single channel page with additional metadata and sub pages.
|
||||
|
||||
![video page screenshot](assets/tube-archivist-video.png?raw=true "Tube Archivist Video Page")
|
||||
*Video Page*: Stream your video directly from the interface.
|
||||
|
||||
![video page screenshot](assets/tube-archivist-download.png?raw=true "Tube Archivist Video Page")
|
||||
*Downloads Page*: Add, control, and monitor your download queue.
|
||||
|
||||
![search page screenshot](assets/tube-archivist-search.png?raw=true "Tube Archivist Search Page")
|
||||
*Search Page*. Use expressions to quickly search through your collection.
|
Before Width: | Height: | Size: 49 KiB |
After Width: | Height: | Size: 516 KiB |
After Width: | Height: | Size: 541 KiB |
After Width: | Height: | Size: 1.6 MiB |
After Width: | Height: | Size: 578 KiB |
After Width: | Height: | Size: 106 KiB |
Before Width: | Height: | Size: 131 KiB |
Before Width: | Height: | Size: 79 KiB |
Before Width: | Height: | Size: 174 KiB |
Before Width: | Height: | Size: 166 KiB |
Before Width: | Height: | Size: 238 KiB |
After Width: | Height: | Size: 96 KiB |
After Width: | Height: | Size: 716 KiB |
After Width: | Height: | Size: 684 KiB |
18
deploy.sh
|
@ -25,7 +25,9 @@ function sync_blackhole {
|
|||
--exclude ".gitignore" \
|
||||
--exclude "**/cache" \
|
||||
--exclude "**/__pycache__/" \
|
||||
--exclude ".venv" \
|
||||
--exclude "db.sqlite3" \
|
||||
--exclude ".mypy_cache" \
|
||||
. -e ssh "$host":tubearchivist
|
||||
|
||||
ssh "$host" 'docker build -t bbilly1/tubearchivist --build-arg TARGETPLATFORM="linux/amd64" tubearchivist'
|
||||
|
@ -48,7 +50,9 @@ function sync_test {
|
|||
--exclude ".gitignore" \
|
||||
--exclude "**/cache" \
|
||||
--exclude "**/__pycache__/" \
|
||||
--exclude ".venv" \
|
||||
--exclude "db.sqlite3" \
|
||||
--exclude ".mypy_cache" \
|
||||
. -e ssh "$host":tubearchivist
|
||||
|
||||
# copy default docker-compose file if not exist
|
||||
|
@ -85,14 +89,14 @@ function validate {
|
|||
# note: this logic is duplicated in the `./github/workflows/lint_python.yml` config
|
||||
# if you update this file, you should update that as well
|
||||
echo "running black"
|
||||
black --diff --color --check -l 79 "$check_path"
|
||||
black --force-exclude "migrations/*" --diff --color --check -l 79 "$check_path"
|
||||
echo "running codespell"
|
||||
codespell --skip="./.git,./package.json,./package-lock.json,./node_modules" "$check_path"
|
||||
codespell --skip="./.git,./.venv,./package.json,./package-lock.json,./node_modules,./.mypy_cache" "$check_path"
|
||||
echo "running flake8"
|
||||
flake8 "$check_path" --count --max-complexity=10 --max-line-length=79 \
|
||||
--show-source --statistics
|
||||
flake8 "$check_path" --exclude "migrations,.venv" --count --max-complexity=10 \
|
||||
--max-line-length=79 --show-source --statistics
|
||||
echo "running isort"
|
||||
isort --check-only --diff --profile black -l 79 "$check_path"
|
||||
isort --skip "migrations" --skip ".venv" --check-only --diff --profile black -l 79 "$check_path"
|
||||
printf " \n> all validations passed\n"
|
||||
|
||||
}
|
||||
|
@ -214,8 +218,6 @@ elif [[ $1 == "test" ]]; then
|
|||
sync_test "$2"
|
||||
elif [[ $1 == "validate" ]]; then
|
||||
validate "$2"
|
||||
elif [[ $1 == "versioncheck" ]]; then
|
||||
python version_check.py
|
||||
elif [[ $1 == "docker" ]]; then
|
||||
sync_docker
|
||||
elif [[ $1 == "unstable" ]]; then
|
||||
|
@ -223,7 +225,7 @@ elif [[ $1 == "unstable" ]]; then
|
|||
elif [[ $1 == "es" ]]; then
|
||||
sync_latest_es
|
||||
else
|
||||
echo "valid options are: blackhole | test | validate | versioncheck | docker | unstable | es"
|
||||
echo "valid options are: blackhole | test | validate | docker | unstable | es"
|
||||
fi
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
version: '3.3'
|
||||
version: '3.5'
|
||||
|
||||
services:
|
||||
tubearchivist:
|
||||
|
@ -20,11 +20,17 @@ services:
|
|||
- TA_PASSWORD=verysecret # your initial TA credentials
|
||||
- ELASTIC_PASSWORD=verysecret # set password for Elasticsearch
|
||||
- TZ=America/New_York # set your time zone
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
||||
interval: 2m
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
depends_on:
|
||||
- archivist-es
|
||||
- archivist-redis
|
||||
archivist-redis:
|
||||
image: redislabs/rejson # for arm64 use bbilly1/rejson
|
||||
image: redis/redis-stack-server
|
||||
container_name: archivist-redis
|
||||
restart: unless-stopped
|
||||
expose:
|
||||
|
@ -34,7 +40,7 @@ services:
|
|||
depends_on:
|
||||
- archivist-es
|
||||
archivist-es:
|
||||
image: bbilly1/tubearchivist-es # only for amd64, or use official es 8.6.0
|
||||
image: bbilly1/tubearchivist-es # only for amd64, or use official es 8.13.2
|
||||
container_name: archivist-es
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
|
|
|
@ -0,0 +1,71 @@
|
|||
"""
|
||||
ffmpeg link builder
|
||||
copied as into build step in Dockerfile
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import tarfile
|
||||
import urllib.request
|
||||
from enum import Enum
|
||||
|
||||
API_URL = "https://api.github.com/repos/yt-dlp/FFmpeg-Builds/releases/latest"
|
||||
BINARIES = ["ffmpeg", "ffprobe"]
|
||||
|
||||
|
||||
class PlatformFilter(Enum):
|
||||
"""options"""
|
||||
|
||||
ARM64 = "linuxarm64"
|
||||
AMD64 = "linux64"
|
||||
|
||||
|
||||
def get_assets():
|
||||
"""get all available assets from latest build"""
|
||||
with urllib.request.urlopen(API_URL) as f:
|
||||
all_links = json.loads(f.read().decode("utf-8"))
|
||||
|
||||
return all_links
|
||||
|
||||
|
||||
def pick_url(all_links, platform):
|
||||
"""pick url for platform"""
|
||||
filter_by = PlatformFilter[platform.split("/")[1].upper()].value
|
||||
options = [i for i in all_links["assets"] if filter_by in i["name"]]
|
||||
if not options:
|
||||
raise ValueError(f"no valid asset found for filter {filter_by}")
|
||||
|
||||
url_pick = options[0]["browser_download_url"]
|
||||
|
||||
return url_pick
|
||||
|
||||
|
||||
def download_extract(url):
|
||||
"""download and extract binaries"""
|
||||
print("download file")
|
||||
filename, _ = urllib.request.urlretrieve(url)
|
||||
print("extract file")
|
||||
with tarfile.open(filename, "r:xz") as tar:
|
||||
for member in tar.getmembers():
|
||||
member.name = os.path.basename(member.name)
|
||||
if member.name in BINARIES:
|
||||
print(f"extract {member.name}")
|
||||
tar.extract(member, member.name)
|
||||
|
||||
|
||||
def main():
|
||||
"""entry point"""
|
||||
args = sys.argv
|
||||
if len(args) == 1:
|
||||
platform = "linux/amd64"
|
||||
else:
|
||||
platform = args[1]
|
||||
|
||||
all_links = get_assets()
|
||||
url = pick_url(all_links, platform)
|
||||
download_extract(url)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -1,73 +1,23 @@
|
|||
#!/bin/bash
|
||||
# startup script inside the container for tubearchivist
|
||||
|
||||
if [[ -z "$ELASTIC_USER" ]]; then
|
||||
export ELASTIC_USER=elastic
|
||||
fi
|
||||
set -e
|
||||
|
||||
cachedir=/cache
|
||||
[[ -d $cachedir ]] || cachedir=.
|
||||
lockfile=${cachedir}/initsu.lock
|
||||
|
||||
required="Missing required environment variable"
|
||||
[[ -f $lockfile ]] || : "${TA_USERNAME:?$required}"
|
||||
: "${TA_PASSWORD:?$required}"
|
||||
: "${ELASTIC_PASSWORD:?$required}"
|
||||
: "${TA_HOST:?$required}"
|
||||
|
||||
# ugly nginx and uwsgi port overwrite with env vars
|
||||
if [[ -n "$TA_PORT" ]]; then
|
||||
sed -i "s/8000/$TA_PORT/g" /etc/nginx/sites-available/default
|
||||
fi
|
||||
|
||||
if [[ -n "$TA_UWSGI_PORT" ]]; then
|
||||
sed -i "s/8080/$TA_UWSGI_PORT/g" /etc/nginx/sites-available/default
|
||||
sed -i "s/8080/$TA_UWSGI_PORT/g" /app/uwsgi.ini
|
||||
fi
|
||||
|
||||
# disable auth on static files for cast support
|
||||
if [[ -n "$ENABLE_CAST" ]]; then
|
||||
sed -i "/auth_request/d" /etc/nginx/sites-available/default
|
||||
fi
|
||||
|
||||
# wait for elasticsearch
|
||||
counter=0
|
||||
until curl -u "$ELASTIC_USER":"$ELASTIC_PASSWORD" "$ES_URL" -fs; do
|
||||
echo "waiting for elastic search to start"
|
||||
counter=$((counter+1))
|
||||
if [[ $counter -eq 12 ]]; then
|
||||
# fail after 2 min
|
||||
echo "failed to connect to elastic search, exiting..."
|
||||
curl -v -u "$ELASTIC_USER":"$ELASTIC_PASSWORD" "$ES_URL"?pretty
|
||||
exit 1
|
||||
fi
|
||||
sleep 10
|
||||
done
|
||||
|
||||
# start python application
|
||||
python manage.py makemigrations
|
||||
# django setup
|
||||
python manage.py migrate
|
||||
|
||||
if [[ -f $lockfile ]]; then
|
||||
echo -e "\e[33;1m[WARNING]\e[0m This is not the first run! Skipping" \
|
||||
"superuser creation.\nTo force it, remove $lockfile"
|
||||
else
|
||||
export DJANGO_SUPERUSER_PASSWORD=$TA_PASSWORD
|
||||
output="$(python manage.py createsuperuser --noinput --name "$TA_USERNAME" 2>&1)"
|
||||
|
||||
case "$output" in
|
||||
*"Superuser created successfully"*)
|
||||
echo "$output" && touch $lockfile ;;
|
||||
*"That name is already taken."*)
|
||||
echo "Superuser already exists. Creation will be skipped on next start."
|
||||
touch $lockfile ;;
|
||||
*) echo "$output" && exit 1
|
||||
esac
|
||||
if [[ -z "$DJANGO_DEBUG" ]]; then
|
||||
python manage.py collectstatic --noinput -c
|
||||
fi
|
||||
|
||||
python manage.py collectstatic --noinput -c
|
||||
# ta setup
|
||||
python manage.py ta_envcheck
|
||||
python manage.py ta_connection
|
||||
python manage.py ta_startup
|
||||
|
||||
# start all tasks
|
||||
nginx &
|
||||
celery -A home.tasks worker --loglevel=INFO &
|
||||
celery -A home.tasks worker --loglevel=INFO --max-tasks-per-child 10 &
|
||||
celery -A home beat --loglevel=INFO \
|
||||
-s "${BEAT_SCHEDULE_PATH:-${cachedir}/celerybeat-schedule}" &
|
||||
uwsgi --ini uwsgi.ini
|
||||
|
|
|
@ -1,49 +0,0 @@
|
|||
# Channels Overview and Channel Detail Page
|
||||
|
||||
The channels are organized on two different levels, similar as the [playlists](Playlists):
|
||||
|
||||
## Channels Overview
|
||||
Accessible at `/channel/` of your Tube Archivist, the **Overview Page** shows a list of all channels you have indexed.
|
||||
- You can filter that list to show or hide subscribed channels with the toggle. Clicking on the channel banner or the channel name will direct you to the *Channel Detail Page*.
|
||||
- If you are subscribed to a channel a *Unsubscribe* button will show, if you aren't subscribed, a *Subscribe* button will show instead.
|
||||
|
||||
The **Subscribe to Channels** button <img src="assets/icon-add.png?raw=true" alt="add icon" width="20px" style="margin:0 5px;"> opens a text field to subscribe to a channel. You have a few options:
|
||||
- Enter the YouTube channel ID, a 25 character alphanumeric string. For example *UCBa659QWEk1AI4Tg--mrJ2A*
|
||||
- Enter the URL to the channel page on YouTube. For example *https://www.youtube.com/channel/UCBa659QWEk1AI4Tg--mrJ2A* or alias url *https://www.youtube.com/@TomScottGo*
|
||||
- Enter a channel alias starting with *@*, for example: *@TomScottGo*
|
||||
- Enter the video URL for any video and let Tube Archivist extract the channel ID for you, for example *https://www.youtube.com/watch?v=2tdiKTSdE9Y*
|
||||
- Add one per line.
|
||||
|
||||
To search your channels, click on the search icon <img src="assets/icon-search.png?raw=true" alt="search icon" width="20px" style="margin:0 5px;"> to reach the search page. Start your query with `channel:`, learn more on the [search](Search) page.
|
||||
|
||||
## Channel Detail
|
||||
Each channel gets a set of channel detail pages.
|
||||
- If you are subscribed to the channel, an *Unsubscribe* button will show, else the *Subscribe* button will show.
|
||||
- The **Mark as Watched** button will mark all videos of this channel as watched.
|
||||
|
||||
### Videos
|
||||
Accessible at `/channel/<channel-id>/`, this page shows all the videos you have downloaded from this channel.
|
||||
|
||||
### Streams
|
||||
If you have any streams indexed, this page will become accessible at `/channel/<channel-id>/streams/`, this page shows all available live streams of that channel.
|
||||
|
||||
### Shorts
|
||||
If you have any shorts videos indexed, this page will become accessible at `/channel/<channel-id>/shorts/`, this page shows all the shorts videos of that channel.
|
||||
|
||||
### Playlists
|
||||
If you have playlists from this channel indexed, this page will become accessible at `/channel/<channel-id>/playlist/`. Activate channel playlist indexing on the about page.
|
||||
|
||||
### About
|
||||
On the *Channel About* page, accessible at `/channel/<channel-id>/about/`, you can see additional metadata.
|
||||
- The button **Delete Channel** will delete the channel plus all videos of this channel, both media files and metadata additionally this will also delete playlists metadata belonging to that channel.
|
||||
- The button **Reindex** will reindex all channel metadata. This will also categorize existing videos as shorts or streams.
|
||||
- The button **Reindex Videos** will reindex metadata for all videos in this channel.
|
||||
|
||||
The channel customize form gives options to change settings on a per channel basis. Any configurations here will overwrite your configurations from the [settings](Settings) page.
|
||||
- **Download Format**: Overwrite the download quality for videos from this channel.
|
||||
- **Auto Delete**: Automatically delete watched videos from this channel after selected days.
|
||||
- **Index Playlists**: Automatically add all Playlists with at least a video downloaded to your index. Only do this for channels where you care about playlists as this will slow down indexing new videos for having to check which playlist this belongs to.
|
||||
- **SponsorBlock**: Using [SponsorBlock](https://sponsor.ajay.app/) to get and skip sponsored content. Customize per channel: You can *disable* or *enable* SponsorBlock for certain channels only to overwrite the behavior set on the [Settings](settings) page. Selecting *unset* will remove the overwrite and your setting will fall back to the default on the settings page.
|
||||
|
||||
### Downloads
|
||||
If you have any videos from this channel pending in the download queue, a *Downloads* link will show, bringing you directly to the [downloads](Downloads) page, filtering the list by the selected channel.
|
|
@ -1,59 +0,0 @@
|
|||
# Downloads Page
|
||||
Accessible at `/downloads/` of your Tube Archivist, this page handles all the download functionality.
|
||||
|
||||
|
||||
## Rescan Subscriptions
|
||||
The **Rescan Subscriptions** icon <img src="assets/icon-rescan.png?raw=true" alt="rescan icon" width="20px" style="margin:0 5px;"> will start a background task to look for new videos from the channels and playlists you are subscribed to.
|
||||
Tube Archivist will get available *videos*, *shorts* and *streams* from each channel, you can define the channel and playlist page size on the [settings page](Settings#subscriptions). With the default page size, expect this process to take around 2-3 seconds for each channel or playlist you are subscribed to. A status message will show the progress.
|
||||
|
||||
Then for every video found, **Tube Archivist** will skip the video if it has already been downloaded or if you added it to the *ignored* list before. All the other videos will get added to the download queue. Expect this to take around 2 seconds for each video as **Tube Archivist** needs to grab some additional metadata and artwork. New videos will get added at the bottom of the download queue.
|
||||
|
||||
## Download Queue
|
||||
The **Start Download** icon <img src="assets/icon-download.png?raw=true" alt="download icon" width="20px" style="margin:0 5px;"> will start the download process starting from the top of the queue. Take a look at the relevant settings on the [Settings Page](Settings#downloads). Once the process started, a progress message will show with additional details and controls:
|
||||
- The stop icon <img src="assets/icon-stop.png?raw=true" alt="stop icon" width="20px" style="margin:0 5px;"> will gracefully stop the download process, once the current video has been finished successfully.
|
||||
- The cancel icon <img src="assets/icon-close-red.png?raw=true" alt="close icon" width="20px" style="margin:0 5px;"> is equivalent to killing the process and will stop the download immediately. Any leftover files will get deleted, the canceled video will still be available in the download queue.
|
||||
|
||||
After downloading, Tube Archivist tries to add new videos to already indexed playlists and if activated on the settings page, get comments for the new videos.
|
||||
|
||||
## Add to Download Queue
|
||||
The **Add to Download Queue** icon <img src="assets/icon-add.png?raw=true" alt="add icon" width="20px" style="margin:0 5px;"> opens a text field to manually add videos to the download queue. Add one item per line. You have a few options:
|
||||
|
||||
### Videos
|
||||
- Add a YouTube video ID, for example *2tdiKTSdE9Y*
|
||||
- Add a link to a YouTube video, for example *https://www.youtube.com/watch?v=2tdiKTSdE9Y*
|
||||
- Add a link to a YouTube video by providing the shortened URL, for example *https://youtu.be/2tdiKTSdE9Y*
|
||||
- Add a link to a shorts video, for example *https://www.youtube.com/shorts/UOfe6e0k7cQ*
|
||||
|
||||
### Channels
|
||||
- When adding a channel, Tube Archivist will ignore the channel page size as described above, this is meant for an initial download of the whole channel. You can still ignore selected videos from the queue before starting the download.
|
||||
- Download a complete channel including shorts and streams by entering:
|
||||
- Channel ID: *UCBa659QWEk1AI4Tg--mrJ2A*
|
||||
- Channel URL: *https://www.youtube.com/channel/UCBa659QWEk1AI4Tg--mrJ2A*
|
||||
- Channel *@* alias handler: For example *@TomScottGo*
|
||||
- Channel alias URL: *https://www.youtube.com/@TomScottGo*
|
||||
- Download videos, live streams or shorts only, by providing a partial channel URL:
|
||||
- Videos only: *https://www.youtube.com/@IBRACORP/videos*
|
||||
- Shorts only: *https://www.youtube.com/@IBRACORP/shorts*
|
||||
- Streams only: *https://www.youtube.com/@IBRACORP/streams*
|
||||
- Every other channel sub page will default to download all, for example *https://www.youtube.com/@IBRACORP/featured* will download videos and shorts and streams.
|
||||
|
||||
### Playlist
|
||||
- Add a playlist ID or URL to add every available video in the list to the download queue, for example *https://www.youtube.com/playlist?list=PL96C35uN7xGLLeET0dOWaKHkAlPsrkcha* or *PL96C35uN7xGLLeET0dOWaKHkAlPsrkcha*.
|
||||
- When adding a playlist to the queue, this playlist will automatically get [indexed](Playlists#playlist-detail).
|
||||
- When you add a link to a video in a playlist, Tube Archivist assumes you want to download only the specific video and not the whole playlist, for example *https://www.youtube.com/watch?v=CINVwWHlzTY&list=PL96C35uN7xGLLeET0dOWaKHkAlPsrkcha* will only add one video *CINVwWHlzTY* to the queue.
|
||||
|
||||
## The Download Queue
|
||||
Below the three buttons you find the download queue. New items will get added at the bottom of the queue, the next video to download once you click on **Start Download** will be the first in the list.
|
||||
|
||||
You can filter the download queue with the **filter** dropdown box, the filter will show once you have more than one channel in the download queue. Select the channel to filter by name, the number in parentheses indicates how many videos you have pending from this channel. Reset the filter by selecting *all* from the dropdown. This will generate links for the top 30 channels with pending videos.
|
||||
|
||||
Every video in the download queue has two buttons:
|
||||
- **Ignore**: This will remove that video from the download queue and this video will not get added again, even when you **Rescan Subscriptions**.
|
||||
- **Download now**: This will give priority to this video. If the download process is already running, the prioritized video will get downloaded as soon as the current video is finished. If there is no download process running, this will start downloading this single video and stop after that.
|
||||
|
||||
You can flip the view by activating **Show Only Ignored Videos**. This will show all videos you have previously *ignored*.
|
||||
Every video in the ignored list has two buttons:
|
||||
- **Forget**: This will delete the item from the ignored list.
|
||||
- **Add to Queue**: This will add the ignored video back to the download queue.
|
||||
|
||||
You can delete your download queue from the [Settings](Settings#actions) page.
|
36
docs/FAQ.md
|
@ -1,36 +0,0 @@
|
|||
# Frequently Asked Questions
|
||||
|
||||
## 1. Scope of this project
|
||||
Tube Archivist is *Your self hosted YouTube media server*, which also defines the primary scope of what this project tries to do:
|
||||
- **Self hosted**: This assumes you have full control over the underlying operating system and hardware and can configure things to work properly with Docker, it's volumes and networks as well as whatever disk storage and filesystem you choose to use.
|
||||
- **YouTube**: Downloading, indexing and playing videos from YouTube, there are currently no plans to expand this to any additional platforms.
|
||||
- **Media server**: This project tries to be a stand alone media server in it's own web interface.
|
||||
|
||||
Additionally to that, progress is also happening on:
|
||||
- **API**: Endpoints for additional integrations.
|
||||
- **Browser Extension**: To integrate between youtube.com and Tube Archivist.
|
||||
|
||||
Defining the scope is important for the success of any project:
|
||||
- A scope too broad will result in development effort spreading too thin and will run into danger that his project tries to do too many things and none of them well.
|
||||
- A too narrow scope will make this project uninteresting and will exclude audiences that could also benefit from this project.
|
||||
- Not defining a scope will easily lead to misunderstandings and false hopes of where this project tries to go.
|
||||
|
||||
Of course this is subject to change: The scope can be expanded as this project continues to grow and more people contribute.
|
||||
|
||||
## 2. Emby/Plex/Jellyfin/Kodi integrations
|
||||
Although there are similarities between these excellent projects and Tube Archivist, they have a very different use case. Trying to fit the metadata relations and database structure of a YouTube archival project into these media servers that specialize in Movies and TV shows is always going to be limiting.
|
||||
|
||||
Part of the scope is to be its own media server, so that's where the focus and effort of this project is. That being said, the nature of self hosted and open source software gives you all the possible freedom to use your media as you wish.
|
||||
|
||||
## 3. To Docker or not to Docker
|
||||
This project is a classical docker application: There are multiple moving parts that need to be able to interact with each other and need to be compatible with multiple architectures and operating systems. Additionally Docker also drastically reduces development complexity which is highly appreciated.
|
||||
|
||||
So Docker is the only supported installation method. If you don't have any experience with Docker, consider investing the time to learn this very useful technology.
|
||||
|
||||
## 4. Finetuning Elasticsearch
|
||||
A minimal configuration of Elasticsearch (ES) is provided in the example docker-compose.yml file. ES is highly configurable and very interesting to learn more about. Refer to the [documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html) if you want to get into it.
|
||||
|
||||
## 5. When I subscribe to a channel it only downloads the most recent 50 videos
|
||||
Subscribing to a channel is a different operation from downloading it. You can [add the channel to the download queue](https://github.com/tubearchivist/tubearchivist/wiki/Downloads#add-to-download-queue) to download all past videos.
|
||||
|
||||
If you want to download the existing videos and also automatically download new videos, then you should both download the channel and subscribe to it.
|
41
docs/Home.md
|
@ -1,41 +0,0 @@
|
|||
# Tube Archivist Wiki
|
||||
Welcome to the official Tube Archivist Wiki. This is an up-to-date documentation of user functionality.
|
||||
|
||||
Table of contents:
|
||||
* [FAQ](FAQ): Frequently asked questions what this project is and tries to do
|
||||
* [Channels](Channels): Browse your channels, handle channel subscriptions
|
||||
* [Playlists](Playlists): Browse your indexed playlists, handle playlist subscriptions
|
||||
* [Downloads](Downloads): Scanning subscriptions, handle download queue
|
||||
* [Settings](Settings): All the configuration options
|
||||
* [Video](Video): All details of a single video and playlist navigation.
|
||||
* [Users](Users): User management admin interface
|
||||
* [Search](Search): Search your archive
|
||||
* [Installation](Installation): Detailed installation instructions for various platforms.
|
||||
|
||||
## Getting Started
|
||||
1. [Subscribe](Channels#channels-overview) to some of your favourite YouTube channels.
|
||||
2. [Scan](Downloads#rescan-subscriptions) subscriptions to add the latest videos to the download queue.
|
||||
3. [Add](Downloads#add-to-download-queue) additional videos, channels or playlist - ignore the ones you don't want to download.
|
||||
4. [Download](Downloads#download-queue) and let **Tube Archivist** do it's thing.
|
||||
5. Sit back and enjoy your archived and indexed collection!
|
||||
|
||||
## General Navigation
|
||||
* Clicking on the channel name or the channel icon brings you to the dedicated channel page to show videos from that channel.
|
||||
* Clicking on a video title brings you to the dedicated video page and shows additional details.
|
||||
* Clicking on a video thumbnail opens the video player and starts streaming the selected video.
|
||||
* Clicking on the search icon <img src="assets/icon-search.png?raw=true" alt="gridview icon" width="20px" style="margin:0 5px;"> will open a dedicated search page to search over your complete index.
|
||||
* The pagination - if available - builds links for up to 10'000 results, use the search, sort or filter functionality to find what you are looking for.
|
||||
|
||||
|
||||
An empty checkbox icon <img src="assets/icon-unseen.png?raw=true" alt="unseen icon" width="20px" style="margin:0 5px;"> will show for videos you haven't marked as watched. Click on it and the icon will change to a filled checkbox <img src="assets/icon-seen.png?raw=true" alt="seen icon" width="20px" style="margin:0 5px;"> indicating it as watched - click again to revert.
|
||||
|
||||
When available the <img src="assets/icon-gridview.png?raw=true" alt="gridview icon" width="20px" style="margin:0 5px;"> gridview icon will display the list in a grid. A grid row holds 3 items by default, use the <img src="assets/icon-add.png?raw=true" alt="listview icon" width="20px" style="margin:0 5px;"> icon to add more or the <img src="assets/icon-substract.png?raw=true" alt="listview icon" width="20px" style="margin:0 5px;"> icon to remove items per row, depending on your screen size. The <img src="assets/icon-listview.png?raw=true" alt="listview icon" width="20px" style="margin:0 5px;"> listview icon will arrange the items in a list. The sort icon <img src="assets/icon-sort.png?raw=true" alt="listview icon" width="20px" style="margin:0 5px;"> will open additional sort options.
|
||||
|
||||
You can control the video player with the following keyboard shortcuts:
|
||||
- `?`: Show help
|
||||
- `m`: toggle mute
|
||||
- `c`: toggle subtitles if available
|
||||
- `>`: increase playback speed
|
||||
- `<`: decrease playback speed
|
||||
- `←` (left arrow): jump back 5 seconds
|
||||
- `→` (right arrow): jump forward 5 seconds
|
|
@ -1,288 +0,0 @@
|
|||
# Detailed Installation Instructions for Various Platforms
|
||||
|
||||
## Table of Contents
|
||||
- [Podman](#podman)
|
||||
- [Unraid](#unraid)
|
||||
- [Truenas Scale](#truenas-scale)
|
||||
- [Synology](#synology)
|
||||
|
||||
These are beginners guides installation instructions for additional platforms generously provided by users of these platforms. When in doubt, verify the details with the [Readme](https://github.com/tubearchivist/tubearchivist#installing-and-updating). If you see any issues here while using these instructions, please contribute.
|
||||
|
||||
## Podman
|
||||
Podman handles container hostname resolving slightly differently than docker, so you need to make a few changes to the `docker-compose.yml` to get up and running.
|
||||
|
||||
### Step 1: Follow the installation instructions from the [README](https://github.com/tubearchivist/tubearchivist#installing-and-updating), with a few additional changes to the `docker-compose.yml`.
|
||||
|
||||
Edit these additional changes to the `docker-compose.yml`:
|
||||
- under `tubearchivist`->`image`:
|
||||
- prefix the container name with `docker.io/` (or the url of your repo of choice).
|
||||
- under `tubearchivist`->`environment`:
|
||||
- `ES_URL`: change `archivist-es` to the internal IP of the computer that will be running the containers.
|
||||
- `REDIS_HOST`: change `archivist-redis` to the internal IP of the computer that will be running the containers (should be the same as above).
|
||||
- under `archivist-redis`->`image`:
|
||||
- prefix the container name with `docker.io/` again.
|
||||
- under `archivist-redis`->`expose`:
|
||||
- change the whole entry from `expose: ["<PORT>"]` into `ports: ["<PORT>:<PORT>"].
|
||||
- under `archivist-es`->`image`:
|
||||
- prefix the container name with `docker.io/` again.
|
||||
- under `archivist-es`->`expose`:
|
||||
- change the whole entry from `expose: ["<PORT>"]` into `ports: ["<PORT>:<PORT>"].
|
||||
|
||||
### Step 2: Create service files (optional)
|
||||
|
||||
Since podman doesn't run as a service, it can't start containers after reboots, at least not without some help.
|
||||
|
||||
If you want to enable this behavior, you can follow [this example](https://techblog.jeppson.org/2020/04/create-podman-services-with-podman-compose/) to have `systemd` start up the containers with `podman-compose` when the computer boots up.
|
||||
|
||||
|
||||
## Unraid
|
||||
|
||||
Tube Archivist, and all if it's dependencies are located in the [community applications](https://unraid.net/community/apps?q=tubearchivist) store. The three containers you will need are as follows:
|
||||
|
||||
- **TubeArchivist-RedisJSON**: This container acts as a cache and temporary link between the application and the file system. Used to store and display messages and configuration variables.
|
||||
- **TubeArchivist-ES**: ElasticSearch stores video metadata and makes everything searchable. Also keeps track of the download queue.
|
||||
- **TubeArchivist**: Once your YouTube video collection grows, it becomes hard to search and find a specific video. That's where Tube Archivist comes in: By indexing your video collection with metadata from YouTube, you can organize, search and enjoy your archived YouTube videos without hassle offline through a convenient web interface.
|
||||
|
||||
### Step 1: Install `TubeArchivist-RedisJSON`
|
||||
|
||||
![TubeArchivist-RedisJSON](https://i.imgur.com/iGJZKYd.png)
|
||||
This is the easiest container to setup of the thee, just make sure that you do not have any port conflicts, and that your `/data` is mounted to the correct path. The other containers will map to the same root directory (/mnt/user/appdata/TubeArchivist).
|
||||
|
||||
If you need to install `TubeArchivist-RedisJSON`on a different port, you'll have to follow [these steps](https://github.com/tubearchivist/tubearchivist#redis-on-a-custom-port) later on when installing the `TubeArchivist` container
|
||||
|
||||
|
||||
### Step 2: Install `TubeArchivist-ES`
|
||||
![TubeArchivist-ES](https://i.imgur.com/RC38b3u.png)
|
||||
ElasticSeach is also pretty easy to setup. Again, make sure you have no port conflicts, make sure that you mapped the ElasticSearch Data to the same root directory as `RedisJSON` (/mnt/user/appdata/TubeArchivist), and make sure to change the default password to something more secure.
|
||||
|
||||
There is four additional settings in the "show more settings" area, but don't require any changes.
|
||||
|
||||
|
||||
### Step 3: Install `TubeArchivist`
|
||||
|
||||
![TubeArchivist](https://i.imgur.com/oLsmwBZ.png)
|
||||
It's finally time to set up TubeArchivist!
|
||||
|
||||
- `HOST:`This is a list of IP addresses that you will host TA from. Example, 192.168.0.14 is the IP address of my Unraid server. If I was going to access TA from a VPN, or domain name, I'd put those next to my host IP with just a space separating the different addresses. More information [here.](https://github.com/tubearchivist/tubearchivist#installing-and-updating)
|
||||
|
||||
|
||||
- `Port:`Again, make sure that you have no port conflicts on 8000.
|
||||
|
||||
- `Youtube Media Path:` is where you'll download all of your videos to.
|
||||
Make sure that this is an empty directory to not cause confusion when
|
||||
starting the application. If you have existing videos that you'd like
|
||||
to import into Tube Archivist, please checkout the [settings
|
||||
wiki.](https://github.com/tubearchivist/tubearchivist/wiki/Settings#manual-media-files-import)
|
||||
|
||||
|
||||
- `Appdata:` This should be the same base path as the other two containers (/mnt/user/appdata/TubeArchivist).
|
||||
|
||||
- `TA Username:`This will be your username for TubeArchivist.
|
||||
|
||||
- `TA Password:`This will be your password for TubeArchivist.
|
||||
|
||||
- `Redis` This will be JUST the ip address of your redis container.
|
||||
|
||||
- `ElasticSearch Password:`This is the password you defined in the `TubeArchivist-ES` container.
|
||||
- `ElasticSearch:` This seems to cause some confusion, but it's a pretty simple step, just replace the IP and Port to match you `TubeArchivist-ES` container.
|
||||
|
||||
(example: if your IP is 192.168.0.14, the value should be http://192.168.0.14:9200)
|
||||
|
||||
- `Time Zone:` This is an important step for your scheduler, to find your timezone, use a site like [TimeZoneConverter](http://www.timezoneconverter.com/cgi-bin/findzone.tzc)
|
||||
|
||||
### From there, you should be able to start up your containers and you're good to go!
|
||||
If you're still having trouble, join us on [discord](https://www.tubearchivist.com/discord) and come to the #support channel.
|
||||
|
||||
<br />
|
||||
<br />
|
||||
|
||||
## Truenas Scale
|
||||
|
||||
Truenas Scale can be a bit confusing, with its k3s kubernetes implementation.
|
||||
|
||||
However, there is a step by step guide available for it's users here:
|
||||
|
||||
https://heavysetup.info/applications/tube-archivist/dataset/
|
||||
|
||||
- Ensure you are navigating the columns under `Tube Archivist` on the left hand side of the screen
|
||||
|
||||
<br />
|
||||
<br />
|
||||
|
||||
## Synology
|
||||
|
||||
There are several different methods to install TubeArchivist on Synology platforms. This will focus on the available `docker` package and `docker-compose` implementations.
|
||||
|
||||
### Prepare Directories/Folders
|
||||
Before we setup TubeArchivist, we need to setup the directories/folders. You are assumed to be logged into the Synology NAS.
|
||||
#### 1. Docker Base Folder
|
||||
1. Open the `File Station` utility.
|
||||
2. Click on the **Create🔽** button and choose *Create New Shared Folder*.
|
||||
3. **Name** the folder "Docker".
|
||||
4. Add a **Description**.
|
||||
5. Select the **Volume Location**.
|
||||
> Note: By default, this will be where all data is stored. Change the folders as best meets your requirements.
|
||||
6. Select the appropriate options from the remaining checkbox configurations.
|
||||
![Synology - Create Docker Folder](assets/Synology_0.2.0_Docker-Folder-Create.png)
|
||||
7. Click the **Next** button.
|
||||
8. If you are going to **Encrypt** your folder, check the appropriate box and provide the Encryption Key and its confirmation.
|
||||
9. Click the **Next** button.
|
||||
10. On the **Advanced Settings** page, you can select the *Enable data checksum for advanced data integrity* setting. This may cause a performance impact, but will allow for potential file self-healing. **This cannot be changed later.**
|
||||
> Note: This is not recommended, as we will be hosting databases within this folder.
|
||||
11. If you are enabling a quota for how large the folder can get, you can select the *Enabled shared folder quota* setting and choose the maximum size this folder can grow. This can be changed later.
|
||||
12. Click the **Next** button.
|
||||
13. Confirm the settings, then click the **Apply** button. This will create the folder.
|
||||
#### 2. TubeArchivist Base Folder
|
||||
1. Open the `File Station` utility.
|
||||
2. Select the "Docker" folder on the left-hand side.
|
||||
3. Click on the `Create🔽` button and choose *create Folder*.
|
||||
4. **Name** the folder "TubeArchivist".
|
||||
#### 3. Redis Data
|
||||
1. Open the `File Station` utility.
|
||||
2. Select the "Docker" folder on the left-hand side.
|
||||
3. Select the "TubeArchivist" folder beneath "Docker".
|
||||
4. Click on the `Create🔽` button and choose *create Folder*.
|
||||
5. **Name** the folder "redis".
|
||||
#### 4. Elastic Search Data
|
||||
1. Open the `File Station` utility.
|
||||
2. Select the "Docker" folder on the left-hand side.
|
||||
3. Select the "TubeArchivist" folder beneath "Docker".
|
||||
4. Click on the `Create🔽` button and choose *create Folder*.
|
||||
5. **Name** the folder "es".
|
||||
#### 5. TubeArchivist Cache
|
||||
1. Open the `File Station` utility.
|
||||
2. Select the "Docker" folder on the left-hand side.
|
||||
3. Select the "TubeArchivist" folder beneath "Docker".
|
||||
4. Click on the `Create🔽` button and choose *create Folder*.
|
||||
5. **Name** the folder "cache".
|
||||
#### 6. TubeArchivist Output
|
||||
1. Open the `File Station` utility.
|
||||
2. Select the "Docker" folder on the left-hand side.
|
||||
3. Select the "TubeArchivist" folder beneath "Docker".
|
||||
4. Click on the `Create🔽` button and choose *create Folder*.
|
||||
5. **Name** the folder "media".
|
||||
#### 7. Confirm Folder Structure
|
||||
Once all of the folders have been created, it should have a folder structure within Docker\TubeArchivist that includes "cache", "es", "media", and "redis" folders.
|
||||
![Synology - Docker Folder Structure](assets/Synology_0.2.0_Docker-Folder-Structure.png)
|
||||
|
||||
#### 8. Change Permissions - CLI Required
|
||||
> If you do not have SSH access enabled for CLI, [enable it](https://kb.synology.com/en-sg/DSM/tutorial/How_to_login_to_DSM_with_root_permission_via_SSH_Telnet) before continuing.
|
||||
1. Open the SSH connection to the Synology. Login as your primary `Admin` user, or the user that was enabled for SSH access.
|
||||
2. Elevate your access to `root`. Steps are provided [here](https://kb.synology.com/en-sg/DSM/tutorial/How_to_login_to_DSM_with_root_permission_via_SSH_Telnet).
|
||||
3. Change directories to the **Volume** where the "Docker" folder resides.
|
||||
</br>Example: `cd /volume1`
|
||||
4. Change directories to the "Docker" folder.
|
||||
</br>Example: `cd Docker`
|
||||
5. Change directories to the "TubeArchivist" folder.
|
||||
</br>Example: `cd TubeArchivist`
|
||||
6. Change the owner of the "redis" folder. *If correct, this does not have an output.*
|
||||
</br>Example: `chown 999:100 redis`
|
||||
7. Change the owner of the "es" folder. *If correct, this does not have an output.*
|
||||
</br>Example: `chown 1000:1000 es`
|
||||
8. Confirm that the folders have the correct permissions.
|
||||
</br>Example: `ls -hl`
|
||||
![Synology - Docker Folder Permissions Command](assets/Synology_0.2.0_Docker-Folder-Permissions-Commands.png)
|
||||
9. Logout from root.
|
||||
</br>Example: `logout`
|
||||
10. Disconnect from the SSH connection.
|
||||
</br>Example: `exit`
|
||||
### Docker Setup
|
||||
1. Install the `Docker` Synology Package.
|
||||
1. Log in to your Synology NAS.
|
||||
2. Open the `Package Center` utility.
|
||||
3. Search for `Docker`.
|
||||
4. Click `Install`.
|
||||
|
||||
![Synology - Install Docker Utility](assets/Synology_0.2.0_Docker-Install.png)
|
||||
|
||||
2. After `Docker` is installed, open the `Docker` utility.
|
||||
3. Go to the `Registry` tab.
|
||||
4. Search for the following `images` and download them. Follow the recommended versions for each of the images.
|
||||
- `redislabs/rejson`
|
||||
![Synology - Redis Image Search](assets/Synology_0.2.0_Docker-Redis-Search.png)
|
||||
- `bbilly1/tubearchivist-es`
|
||||
![Synology - ElasticSearch Image Search](assets/Synology_0.2.0_Docker-ES-Search.png)
|
||||
- `bbilly1/tubearchivist`
|
||||
![Synology - TubeArchivist Image Search](assets/Synology_0.2.0_Docker-TA-Search.png)
|
||||
|
||||
|
||||
|
||||
5. Go to the `Image` tab. From here, create an container based on each image with the associated configurations below.
|
||||
- ElasticSearch
|
||||
1. Select the associated image.
|
||||
2. Click the **Launch** button in the top.
|
||||
3. Edit the **Container Name** to be "tubearchivist-es".
|
||||
4. Click on the **Advanced Settings** button.
|
||||
5. In the **Advanced Settings** tab, check the box for `Enable auto-restart`.
|
||||
6. In the **Volume** tab, click the **Add Folder** button and select the "Docker/TubeArchivist/es" folder, then type in `/usr/share/elasticsearch/data` for the mount path.
|
||||
7. In the **Network** tab, leave the default `bridge` Network (unless you have a specific Network design that you know how to implement).
|
||||
8. In the **Port Settings** tab, replace the "Auto" entry under **Local Port** with the port that will be used to connect to ElasticSearch (default is 9200).
|
||||
9. In the **Port Settings** tab, select the entryline for port 9300 and **➖ delete** the line. It is not needed for this container.
|
||||
10. The **Links** tab does not require configuration for this container.
|
||||
11. In the **Environment** tab, add in the following ElasticSearch specific environment variables that may apply.
|
||||
- "discovery.type=single-node"
|
||||
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
|
||||
- "UID=1000"
|
||||
- "GID=0"
|
||||
- "xpack.security.enabled=true"
|
||||
- "ELASTIC_PASSWORD=verysecret"
|
||||
- "path.repo=/usr/share/elasticsearch/data/snapshot"
|
||||
> Do not use the default password as it is very insecure.
|
||||
> Activating snapshots for backups should only be done *after* setting the `path.repo` setting.
|
||||
![Synology - ElasticSearch Environment Configurations](assets/Synology_0.2.0_Docker-ES-Env-Conf.png)
|
||||
12. Click on the **Apply** button.
|
||||
13. Back on the **Create Container** screen, click the **Next** button.
|
||||
14. Review the settings to confirm, then click the **Apply** button.
|
||||
- Redis
|
||||
1. Select the associated image.
|
||||
2. Click the **Launch** button in the top.
|
||||
3. Edit the **Container Name** to be "tubearchivist-redis".
|
||||
4. Click on the **Advanced Settings** button.
|
||||
5. In the **Advanced Settings** tab, check the box for `Enable auto-restart`.
|
||||
6. In the **Volume** tab, click the **Add Folder** button and select the "Docker/TubeArchivist/redis" folder, then type in `/data` for the mount path.
|
||||
7. In the **Network** tab, leave the default `bridge` Network (unless you have a specific Network design that you know how to implement).
|
||||
8. In the **Port Settings** tab, replace the "Auto" entry under **Local Port** with the port that will be used to connect to Redis (default is 6379).
|
||||
9. In the **Links** tab, select the "tubearchivist-es" container from the **Container Name** dropdown and provide it the same alias, "tubearchivist-es".
|
||||
10. In the **Environment** tab, add in any Redis specific environment variables that may apply (none by default).
|
||||
11. Click on the **Apply** button.
|
||||
12. Back on the **Create Container** screen, click the **Next** button.
|
||||
13. Review the settings to confirm, then click the **Apply** button.
|
||||
|
||||
- TubeArchivist
|
||||
1. Select the associated image.
|
||||
2. Click the **Launch** button in the top.
|
||||
3. Edit the **Container Name** to be "tubearchivist".
|
||||
4. Click on the **Advanced Settings** button.
|
||||
5. In the **Advanced Settings** tab, check the box for `Enable auto-restart`.
|
||||
6. In the **Volume** tab, click the **Add Folder** button and select the "Docker/TubeArchivist/cache" folder, then type in `/cache` for the mount path.
|
||||
7. In the **Volume** tab, click the **Add Folder** button and select the "Docker/TubeArchivist/media" folder, then type in `/youtube` for the mount path.
|
||||
8. In the **Network** tab, leave the default `bridge` Network (unless you have a specific Network design that you know how to implement).
|
||||
9. In the **Port Settings** tab, replace the "Auto" entry under **Local Port** with the port that will be used to connect to TubeArchivist (default is 8000).
|
||||
10. In the **Links** tab, select the "tubearchivist-es" container from the **Container Name** dropdown and provide it the same alias, "tubearchivist-es".
|
||||
11. In the **Links** tab, select the "tubearchivist-redis" container from the **Container Name** dropdown and provide it the same alias, "tubearchivist-redis".
|
||||
12. In the **Environment** tab, add in the following TubeArchivist specific environment variables that may apply. **Change the variables as-is appropriate to your use case. Follow the [README section](https://github.com/tubearchivist/tubearchivist#tube-archivist) for details on what to set each variable.**
|
||||
- "TA_HOST=synology.local"
|
||||
- "ES_URL=http://tubearchivist-es:9200"
|
||||
- "REDIS_HOST=tubearchivist-redis"
|
||||
- "HOST_UID=1000"
|
||||
- "HOST_GID=1000"
|
||||
- "TA_USERNAME=tubearchivist"
|
||||
- "TA_PASSWORD=verysecret"
|
||||
- "ELASTIC_PASSWORD=verysecret"
|
||||
- "TZ=America/New_York"
|
||||
> Do not use the default password as it is very insecure.
|
||||
> Ensure that ELASTIC_PASSWORD matches the password used on the tubearchivist-es container.
|
||||
![Synology - TubeArchivist Environment Configurations](assets/Synology_0.2.0_Docker-TA-Env-Conf.png)
|
||||
13. Click on the **Apply** button.
|
||||
14. Back on the **Create Container** screen, click the **Next** button.
|
||||
15. Review the settings to confirm, then click the **Apply** button.
|
||||
6. After the containers have been configured and started, you can go to the **Container** tab and monitor the containers.
|
||||
7. To review the logs to ensure that the system has started successfully, select the "tubearchivist" container and click on the **Details** button. In the new window, go to the **Log** tab. Monitor the logs until either an error occurs or the message `celery@tubearchivist ready.` is in the logs. This may take a few minutes, especially for a first time setup.
|
||||
> Note: Synology Docker presents the logs in a pagination format. If you are not seeing the logs update, check if there are additional pages.
|
||||
8. After it has started, go to the location in the `TA_HOST`. This should give you the standard TubeArchivist login screen.
|
||||
<!--
|
||||
### Docker-Compose Setup -->
|
||||
<!-- This section is a Work In Progress -->
|
||||
|
||||
### From there, you should be able to start up your containers and you're good to go!
|
||||
If you're still having trouble, join us on [discord](https://www.tubearchivist.com/discord) and come to the #support channel.
|
|
@ -1,25 +0,0 @@
|
|||
# Playlist Overview and Playlist Detail Page
|
||||
The playlists are organized in two different levels, similar as the [channels](Channels):
|
||||
|
||||
## Playlist Overview
|
||||
Accessible at `/playlist/` of your Tube Archivist, this **Overview Page** shows a list of all playlists you have indexed over all your channels.
|
||||
- You can filter that list to show only subscribed to playlists with the toggle.
|
||||
|
||||
You can index playlists of a channel from the channel detail page as described [here](Channels#channel-detail).
|
||||
|
||||
The **Subscribe to Playlist** button <img src="assets/icon-add.png?raw=true" alt="add icon" width="20px" style="margin:0 5px;"> opens a text field to subscribe to playlists. You have a few options:
|
||||
- Enter the YouTube playlist id, for example: *PL96C35uN7xGLLeET0dOWaKHkAlPsrkcha*
|
||||
- Enter the Youtube dedicated playlist url, for example: *https://www.youtube.com/playlist?list=PL96C35uN7xGLLeET0dOWaKHkAlPsrkcha*
|
||||
- Add one per line.
|
||||
- NOTE: It doesn't make sense to subscribe to a playlist if you are already subscribed the corresponding channel as this will slow down the **Rescan Subscriptions** [task](Downloads#rescan-subscriptions).
|
||||
|
||||
You can search your indexed playlists by clicking on the search icon <img src="assets/icon-search.png?raw=true" alt="search icon" width="20px" style="margin:0 5px;">. This will open a dedicated page.
|
||||
|
||||
## Playlist Detail
|
||||
Each playlist will get a dedicated playlist detail page accessible at `/playlist/<playlist-id>/` of your Tube Archivist. This page shows all the videos you have downloaded from this playlist.
|
||||
|
||||
- If you are subscribed to the playlist, an Unsubscribe button will show, else the Subscribe button will show.
|
||||
- The **Mark as Watched** button will mark all videos of this playlist as watched.
|
||||
- The button **Reindex** will reindex the playlist metadata.
|
||||
- The button **Reindex Videos** will reindex all videos from this playlist.
|
||||
- The **Delete Playlist** button will give you the option to delete just the *metadata* which won't delete any media files or *delete all* which will delete metadata plus all videos belonging to this playlist.
|
|
@ -1,57 +0,0 @@
|
|||
# Search Page
|
||||
Accessible at `/search/` of your **Tube Archivist**, search your archive for Videos, Channels and Playlists - or even full text search throughout your indexed subtitles.
|
||||
|
||||
- All your queries are case insensitive and are normalized to lowercase.
|
||||
- All your queries are analyzed for the english language, this means *singular*, *plural* and word variations like *-ing*, *-ed*, *-able* etc are treated as synonyms.
|
||||
- Fuzzy search is activated for all your searches by default. This can catch typos in your queries or in the matching documents with one to two letters difference, depending on the query length. You can configure fuzziness with the secondary keyword `fuzzy:`, e.g:
|
||||
- `fuzzy:0` or `fuzzy:no`: Deactivate fuzzy matching.
|
||||
- `fuzzy:1`: Set fuzziness to one letter difference.
|
||||
- `fuzzy:2`: Set fuzziness to two letters difference.
|
||||
- All text searches are ranked, meaning the better a match the higher ranked the result. Unless otherwise stated, queries with multiple words are processed with the `and` operator, meaning all words need to match so each word will narrow down the result.
|
||||
- This will return 30 results per query, pagination is not implemented yet.
|
||||
|
||||
Just start typing to start a *simple* search or start your query with a primary keyword to search for a specific type and narrow down the result with secondary keywords. Secondary keywords can be in any order. Use *yes* or *no* for boolean values.
|
||||
|
||||
## Simple
|
||||
Start your query without a keyword to make a simple query. This will search in *video titles*, *channel names* and *playlist titles* and will return matching videos, channels and playlists. Keyword searches will return more results in a particular category due to the fact that more fields are searched for matches.
|
||||
|
||||
## Video
|
||||
Start your query with the primary keyword `video:` to search for videos only. This will search through the *video titles*, *tags* and *category* fields. Narrow your search down with secondary keywords:
|
||||
- `channel:` search for videos matching the channel name.
|
||||
- `active:` is a boolean value, to search for videos that are still active on youtube or that are not active any more.
|
||||
|
||||
**Example**:
|
||||
- `video:learn python channel:corey shafer active:yes`: This will return all videos with the term *Learn Python* from the channel *Corey Shafer* that are still *Active* on YouTube.
|
||||
- `video: channel:tom scott active:no`: Note the omitted term after the primary key, this will show all videos from the channel *Tom Scott* that are no longer active on YouTube.
|
||||
|
||||
## Channel
|
||||
Start with the `channel:` primary keyword to search for channels matching your query. This will search through the *channel name* and *channel description* fields. Narrow your search down with secondary keywords:
|
||||
- `subscribed:` is a boolean value, search for channels that you are subscribed to or not.
|
||||
- `active:` is a boolean value, to search for channels that are still active on YouTube or that are no longer active.
|
||||
|
||||
**Example**:
|
||||
- `channel:linux subscribed:yes`: Search for channels with the term *Linux* that you are subscribed to.
|
||||
- `channel: active:no`: Note the omitted term after the primary key, this will return all channels that are no longer active on YouTube.
|
||||
|
||||
## Playlist
|
||||
Start your query with the primary keyword `playlist:` to search for playlists only. This will search through the *playlist title* and *playlist description* fields. Narrow down your search with these secondary keywords:
|
||||
- `subscribed`: is a boolean value, search for playlists that you are subscribed to or not.
|
||||
- `active:` is a boolean value, to search for playlists that are still active on YouTube or that are no longer active.
|
||||
|
||||
**Example**:
|
||||
- `playlist:backend engineering subscribed:yes`: Search for playlists about *Backend Engineering* that you are subscribed to.
|
||||
- `playlist: active:yes subscribed:yes`: Note the omitted primary search term, this will return all playlists active on YouTube that you are subscribed to.
|
||||
- `playlist:html css active:yes`: Search for playlists containing *HTML CSS* that are still active on YouTube.
|
||||
|
||||
## Full
|
||||
Start a full text search by beginning your query with the primary keyword `full:`. This will search through your indexed Subtitles showing segments with possible matches. This will only show any results if you have activated *subtitle download and index* on the settings page. The operator for full text searches is `or` meaning when searching for multiple words not all words need to match, but additional words will change the ranking of the result, the more words match and the better they match, the higher ranked the result. The matching words will get highlighted in the text preview.
|
||||
|
||||
Clicking the play button on the thumbnail will open the inplace player at the timestamp from where the segment starts. Same when clicking the video title, this will open the video page and put the player at the segment timestamp. This will overwrite any previous playback position.
|
||||
|
||||
Narrow down your search with these secondary keywords:
|
||||
- `lang`: Search for matches only within a language. Use the same two letter ISO country code as you have set on the settings page.
|
||||
- `source`: Can either be *auto* to search through auto generated subtitles only or *user* to search through user uploaded subtitles only.
|
||||
|
||||
**Example**:
|
||||
- `full:contribute to open source lang:en` search for subtitle segments matching with the words *Contribute to Open Source* in the language *en*.
|
||||
- `full:flight simulator cockpit source:user` to search for the words *Flight Simulator Cockpit* from *user* uploaded subtitle segments.
|
190
docs/Settings.md
|
@ -1,190 +0,0 @@
|
|||
# Settings Page
|
||||
Accessible at `/settings/` of your **Tube Archivist**, this page holds all the configurations and additional functionality related to the database.
|
||||
|
||||
Click on **Update Settings** at the bottom of the form to apply your configurations.
|
||||
|
||||
## Color scheme
|
||||
Switch between the easy on the eyes dark theme and the burning bright theme.
|
||||
|
||||
## Archive View
|
||||
- **Page Size**: Defines how many results get displayed on a given page. Same value goes for all archive views.
|
||||
|
||||
## Subscriptions
|
||||
Settings related to the channel management.
|
||||
- **Channel Page Size**: Defines how many pages will get analyzed by **Tube Archivist** each time you click on *Rescan Subscriptions*. The default page size used by yt-dlp is **50**, that's also the recommended value to set here. Any value higher will slow down the rescan process, for example if you set the value to 51, that means yt-dlp will have to go through 2 pages of results instead of 1 and by that doubling the time that process takes.
|
||||
- **Live Page Size**: Same as above, but for channel live streams.
|
||||
- **Shorts page Size**: Same as above, but for shorts videos.
|
||||
|
||||
## Downloads
|
||||
Settings related to the download process.
|
||||
- **Download Limit**: Stop the download process after downloading the set quantity of videos.
|
||||
- **Download Speed Limit**: Set your download speed limit in KB/s. This will pass the option `--limit-rate` to yt-dlp.
|
||||
- **Throttled Rate Limit**: Restart download if the download speed drops below this value in KB/s. This will pass the option `--throttled-rate` to yt-dlp. Using this option might have a negative effect if you have an unstable or slow internet connection.
|
||||
- **Sleep Interval**: Time in seconds to sleep between requests to YouTube. It's a good idea to set this to **3** seconds. Might be necessary to avoid throttling.
|
||||
- **Auto Delete Watched Videos**: Automatically delete videos marked as watched after selected days. If activated, checks your videos after download task is finished.
|
||||
|
||||
## Download Format
|
||||
Additional settings passed to yt-dlp.
|
||||
- **Format**: This controls which streams get downloaded and is equivalent to passing `--format` to yt-dlp. Use one of the recommended one or look at the documentation of [yt-dlp](https://github.com/yt-dlp/yt-dlp#format-selection). Please note: The option `--merge-output-format mp4` is automatically passed to yt-dlp to guarantee browser compatibility. Similar to that, `--check-formats` is passed as well to check that the selected formats are actually downloadable.
|
||||
- **Embed Metadata**: This saves the available tags directly into the media file by passing `--embed-metadata` to yt-dlp.
|
||||
- **Embed Thumbnail**: This will save the thumbnail into the media file by passing `--embed-thumbnail` to yt-dlp.
|
||||
|
||||
## Subtitles
|
||||
- **Download Setting**: Select the subtitle language you like to download. Add a comma separated list for multiple languages.
|
||||
- **Source Settings**: User created subtitles are provided from the uploader and are usually the video script. Auto generated is from YouTube, quality varies, particularly for auto translated tracks.
|
||||
- **Index Settings**: Enabling subtitle indexing will add the lines to Elasticsearch and will make subtitles searchable. This will increase the index size and is not recommended on low-end hardware.
|
||||
|
||||
## Comments
|
||||
- **Download and index comments**: Set your configuration for downloading and indexing comments. This takes the same values as documented in the `max_comments` section for the youtube extractor of [yt-dlp](https://github.com/yt-dlp/yt-dlp#youtube). Add without space between the four different fields: *max-comments,max-parents,max-replies,max-replies-per-thread*. Example:
|
||||
- `all,100,all,30`: Get 100 max-parents and 30 max-replies-per-thread.
|
||||
- `1000,all,all,50`: Get a total of 1000 comments over all, 50 replies per thread.
|
||||
- **Comment sort method**: Change sort method between *top* or *new*. The default is *top*, as decided by YouTube.
|
||||
- The [Refresh Metadata](#refresh-metadata) background task will get comments from your already archived videos, spreading the requests out over time.
|
||||
|
||||
Archiving comments is slow as only very few comments get returned per request with yt-dlp. Choose your configuration above wisely. Tube Archivist will download comments after the download queue finishes, your videos will be already available while the comments are getting downloaded.
|
||||
|
||||
## Cookie
|
||||
Importing your YouTube Cookie into Tube Archivist allows yt-dlp to bypass age restrictions, gives access to private videos and your *watch later* or *liked videos*.
|
||||
|
||||
### Security concerns
|
||||
Cookies are used to store your session and contain your access token to your google account, this information can be used to take over your account. Treat that data with utmost care as you would any other password or credential. *Tube Archivist* stores your cookie in Redis and will automatically append it to yt-dlp for every request.
|
||||
|
||||
### Auto import
|
||||
Easiest way to import your cookie is to use the **Tube Archivist Companion** [browser extension](https://github.com/tubearchivist/browser-extension) for Firefox and Chrome.
|
||||
|
||||
### Alternative Manual Export your cookie
|
||||
- Install **Cookies.txt** addon for [chrome](https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid) or [firefox](https://addons.mozilla.org/firefox/addon/cookies-txt).
|
||||
- Visit YouTube and login with whichever YouTube account you wish to use to generate the cookies.
|
||||
- Click on the extension icon in the toolbar - it will drop down showing the active cookies for YT.
|
||||
- Click Export to export the cookies, filename is by default *cookies.google.txt*.
|
||||
|
||||
### Alternative Manual Import your cookie
|
||||
Place the file *cookies.google.txt* into the *cache/import* folder of Tube Archivist and enable the cookie import. Once you click on *Update Application Configurations* to save your changes, your cookie will get imported and stored internally.
|
||||
|
||||
Once imported, a **Validate Cookie File** button will show, where you can confirm if your cookie is working or not.
|
||||
|
||||
### Use your cookie
|
||||
Once imported, additionally to the advantages above, your [Watch Later](https://www.youtube.com/playlist?list=WL) and [Liked Videos](https://www.youtube.com/playlist?list=LL) become a regular playlist you can download and subscribe to as any other [playlist](Playlists).
|
||||
|
||||
### Limitation
|
||||
There is only one cookie per Tube Archivist instance, this will be shared between all users.
|
||||
|
||||
## Integrations
|
||||
All third party integrations of TubeArchivist will **always** be *opt in*.
|
||||
- **API**: Your access token for the Tube Archivist API.
|
||||
- **returnyoutubedislike.com**: This will get return dislikes and average ratings for each video by integrating with the API from [returnyoutubedislike.com](https://www.returnyoutubedislike.com/).
|
||||
- **SponsorBlock**: Using [SponsorBlock](https://sponsor.ajay.app/) to get and skip sponsored content. If a video doesn't have timestamps, or has unlocked timestamps, use the browser addon to contribute to this excellent project. Can also be activated and deactivated as a per [channel overwrite](Settings#channel-customize).
|
||||
|
||||
## Snapshots
|
||||
System snapshots will automatically make daily snapshots of the Elasticsearch index. The task will start at 12pm your local time. Snapshots are deduplicated, meaning that each snapshot will only have to backup changes since the last snapshot. The initial snapshot may be slow, but subsequent runs will be much faster. There is also a cleanup function implemented, that will remove snapshots older than 30 days.
|
||||
|
||||
This will make a snapshot of your metadata index only, no media files or additional configuration variables you have set on the settings page will be backed up.
|
||||
|
||||
Due to these improvements compared to the previous backup solution, system snapshots will replace the current backup system in a future version.
|
||||
|
||||
Before activating system snapshots, you'll have to add one additional environment variables to the *archivist-es* container:
|
||||
```
|
||||
path.repo=/usr/share/elasticsearch/data/snapshot
|
||||
```
|
||||
The variable `path.repo` will set the folder where the snapshots will go inside the Elasticsearch container, you can't change it, but the variable needs to be set. Rebuild the container for changes to take effect, e.g `docker compose up -d`.
|
||||
|
||||
- **Create snapshot now**: Will start the snapshot process now, outside of the regular daily schedule.
|
||||
- **Restore**: Restore your index to that point in time.
|
||||
|
||||
# Scheduler Setup
|
||||
Schedule settings expect a cron like format, where the first value is minute, second is hour and third is day of the week. Day 0 is Sunday, day 1 is Monday etc.
|
||||
|
||||
Examples:
|
||||
- **0 15 \***: Run task every day at 15:00 in the afternoon.
|
||||
- **30 8 \*/2**: Run task every second day of the week (Sun, Tue, Thu, Sat) at 08:30 in the morning.
|
||||
- **0 \*/3,8-17 \***: Execute every hour divisible by 3, and every hour during office hours (8 in the morning - 5 in the afternoon).
|
||||
- **0 8,16 \***: Execute every day at 8 in the morning and at 4 in the afternoon.
|
||||
- **auto**: Sensible default.
|
||||
- **0**: (zero), deactivate that task.
|
||||
|
||||
NOTE:
|
||||
- Changes in the scheduler settings require a container restart to take effect.
|
||||
- Cron format as *number*/*number* are none standard cron and are not supported by the scheduler, for example **0 0/12 \*** is invalid, use **0 \*/12 \*** instead.
|
||||
- Avoid an unnecessary frequent schedule to not get blocked by YouTube. For that reason, the scheduler doesn't support schedules that trigger more than once per hour.
|
||||
|
||||
## Rescan Subscriptions
|
||||
That's the equivalent task as run from the downloads page looking through your channel and playlist and add missing videos to the download queue.
|
||||
|
||||
## Start download
|
||||
Start downloading all videos currently in the download queue.
|
||||
|
||||
## Refresh Metadata
|
||||
Rescan videos, channels and playlists on youtube and update metadata periodically. This will also refresh your subtitles based on your current settings. If an item is no longer available on YouTube, this will deactivate it and exclude it from future refreshes. This task is meant to be run once per day, set your schedule accordingly.
|
||||
|
||||
The field **Refresh older than x days** takes a number where TubeArchivist will consider an item as *outdated*. This value is used to calculate how many items need to be refreshed today based on the total indexed. This will spread out the requests to YouTube. Sensible value here is **90** days.
|
||||
|
||||
## Thumbnail check
|
||||
This will check if all expected thumbnails are there and will delete any artwork without matching video.
|
||||
|
||||
## Index backup
|
||||
Create a zip file of the metadata and select **Max auto backups to keep** to automatically delete old backups created from this task.
|
||||
|
||||
|
||||
# Actions
|
||||
Additional database functionality.
|
||||
|
||||
## Delete download queue
|
||||
The button **Delete all queued** will delete all pending videos from the download queue. The button **Delete all ignored** will delete all videos you have previously ignored.
|
||||
|
||||
## Manual Media Files Import
|
||||
NOTE: This is inherently error prone, as there are many variables, some outside of the control of this project. Read this carefully and use at your own risk.
|
||||
|
||||
Add the files you'd like to import to the */cache/import* folder. Only add files, don't add subdirectories. All files you are adding, need to have the same *base name* as the media file. Then start the process from the settings page *Manual Media Files Import*.
|
||||
|
||||
Valid media extensions are *.mp4*, *.mkv* or *.webm*. If you have other file extensions or incompatible codecs, convert them first to mp4. **Tube Archivist** can identify the videos with one of the following methods.
|
||||
|
||||
### Method 1:
|
||||
Add a matching *.info.json* file with the media file. Both files need to have the same base name, for example:
|
||||
- For the media file: `<base-name>.mp4`
|
||||
- For the JSON file: `<base-name>.info.json`
|
||||
|
||||
The import process then looks for the 'id' key within the JSON file to identify the video.
|
||||
|
||||
### Method 2:
|
||||
Detect the YouTube ID from filename, this accepts the default yt-dlp naming convention for file names like:
|
||||
- `<base-name>[<youtube-id>].mp4`
|
||||
- The YouTube ID in square brackets at the end of the filename is the crucial part.
|
||||
|
||||
### Offline import:
|
||||
If the video you are trying to import is not available on YouTube any more, **Tube Archivist** can import the required metadata:
|
||||
- The file `<base-name>.info.json` is required to extract the required information.
|
||||
- Add the thumbnail as `<base-name>.<ext>`, where valid file extensions are *.jpg*, *.png* or *.webp*. If there is no thumbnail file, **Tube Archivist** will try to extract the embedded cover from the media file or will fallback to a default thumbnail.
|
||||
- Add subtitles as `<base-name>.<lang>.vtt` where *lang* is the two letter ISO country code. This will archive all subtitle files you add to the import folder, independent from your configurations. Subtitles can be archived and used in the player, but they can't be indexed or made searchable due to the fact, that they have a very different structure than the subtitles as **Tube Archivist** needs them.
|
||||
- For videos, where the whole channel is not available any more, you can add the `<channel-id>.info.json` file as generated by *youtube-dl/yt-dlp* to get the full metadata. Alternatively **Tube Archivist** will extract as much info as possible from the video info.json file.
|
||||
|
||||
### Some notes:
|
||||
- This will **consume** the files you put into the import folder: Files will get converted to mp4 if needed (this might take a long time...) and moved to the archive, *.json* files will get deleted upon completion to avoid having duplicates on the next run.
|
||||
- For best file transcoding quality, convert your media files with desired settings first before importing.
|
||||
- Maybe start with a subset of your files to import to make sure everything goes well...
|
||||
- Follow the logs to monitor progress and errors: `docker-compose logs -f tubearchivist`.
|
||||
|
||||
## Embed thumbnails into media file
|
||||
This will write or overwrite all thumbnails in the media file using the downloaded thumbnail. This is only necessary if you didn't download the files with the option *Embed Thumbnail* enabled or want to make sure all media files get the newest thumbnail. Follow the docker-compose logs to monitor progress.
|
||||
|
||||
## Backup Database
|
||||
This will backup your metadata into a zip file. The file will get stored at *cache/backup* and will contain the necessary files to restore the Elasticsearch index formatted **nd-json** files.
|
||||
|
||||
BE AWARE: This will **not** backup any media files, just the metadata from the Elasticsearch.
|
||||
|
||||
## Restore From Backup
|
||||
The restore functionality will expect the same zip file in *cache/backup* as created from the **Backup database** function. This will recreate the index from the snapshot. There will be a list of all available backup to choose from. The *source* tag can have these different values:
|
||||
- **manual**: For backups manually created from here on the settings page.
|
||||
- **auto**: For backups automatically created via a sceduled task.
|
||||
- **update**: For backups created after a Tube Archivist update due to changes in the index.
|
||||
- **False**: Undefined.
|
||||
|
||||
BE AWARE: This will **replace** your current index with the one from the backup file. This won't restore any media files.
|
||||
|
||||
## Rescan Filesystem
|
||||
This function will go through all your media files and looks at the whole index to try to find any issues:
|
||||
- Should the filename not match with the indexed media url, this will rename the video files correctly and update the index with the new link.
|
||||
- When you delete media files from the filesystem outside of the Tube Archivist interface, this will delete leftover metadata from the index.
|
||||
- When you have media files that are not indexed yet, this will grab the metadata from YouTube like it was a newly downloaded video. This can be useful when restoring from an older backup file with missing metadata but already downloaded mediafiles. NOTE: This only works if the media files are named in the same convention as Tube Archivist does, particularly the YouTube ID needs to be at the same index in the filename, alternatively see above for *Manual Media Files Import*.
|
||||
-This will also check all of your thumbnails and download any that are missing.
|
||||
|
||||
BE AWARE: There is no undo.
|
|
@ -1,20 +0,0 @@
|
|||
# User Management
|
||||
|
||||
For now, **Tube Archivist** is a single user application. You can create multiple users with different names and passwords, they will share the same videos and permissions but some interface configurations are on a per user basis. *More is on the roadmap*.
|
||||
|
||||
## Superuser
|
||||
The first user gets created with the environment variables **TA_USERNAME** and **TA_PASSWORD** from your docker-compose file. That first user will automatically have *superuser* privileges.
|
||||
|
||||
## Admin Interface
|
||||
When logged in from your *superuser* account, you are able to access the admin interface from the settings page or at `/admin/`. This interface holds all functionality for user management.
|
||||
|
||||
## Create additional users
|
||||
From the admin interface when you click on *Accounts* you will get a list of all users. From there you can create additional users by clicking on *Add Account*, provide a name and confirm password and click on *Save* to create the user.
|
||||
|
||||
## Changing users
|
||||
You can delete or change permissions and password of a user by clicking on the username from the *Accounts* list page and follow the interface from there. Changing the password of the *superuser* here will overwrite the password originally set with the environment variables.
|
||||
|
||||
## Reset
|
||||
Delete all user configurations by deleting the file `cache/db.sqlite3` and restart the container. This will create the superuser again from the environment variables.
|
||||
|
||||
NOTE: Future improvements here will most likely require such a reset.
|
|
@ -1,25 +0,0 @@
|
|||
# Video Page
|
||||
Every video downloaded gets a dedicated page accessible at `/video/<video-id>/` of your Tube Archivist.
|
||||
|
||||
Clicking on the channel name or the channel icon will bring you to the dedicated channel detail [page](Channels#channel-detail).
|
||||
|
||||
- The button **Reindex** will reindex the metadata of this video.
|
||||
- The button **Download File** will download the media file in the browser.
|
||||
- The button **Delete Video** will delete that video including the media file.
|
||||
|
||||
If available, a tag cloud will show, representing the tags set by the uploader.
|
||||
|
||||
The video description is truncated to the first few lines, click on *show more* to expand the whole description.
|
||||
|
||||
## Playlist
|
||||
When available, a playlist navigation will show at the bottom. Clicking on the playlist name will bring you to the dedicated [Playlist Detail](Playlists#playlist-detail) page showing all videos downloaded from that playlist. The number in square brackets indicates the position of the current video in that playlist.
|
||||
|
||||
Clicking on the next or previous video name or thumbnail will bring you to that dedicated video page.
|
||||
|
||||
## Similar Videos
|
||||
Tube Archivist will show up to six similar videos in a grid. Similarity is detected from the **video title** and the **video tags**. This naturally will show some videos from the same channel, but can also return videos about the same topic from other channels.
|
||||
|
||||
When playing a video from the similar section with the inline player, the current video will get replaced, refresh the page to reset that or click on the video title to avoid that behavior.
|
||||
|
||||
## Comments
|
||||
If activated on the settings page, this will show the indexed comments. Reveal the threads by clicking the *+ Replies* button. Comments with a heart symbol are favorited by the uploader, comments by the uploader are highlighted in a different color.
|
Before Width: | Height: | Size: 29 KiB |
Before Width: | Height: | Size: 26 KiB |
Before Width: | Height: | Size: 62 KiB |
Before Width: | Height: | Size: 18 KiB |
Before Width: | Height: | Size: 27 KiB |
Before Width: | Height: | Size: 133 KiB |
Before Width: | Height: | Size: 26 KiB |
Before Width: | Height: | Size: 29 KiB |
Before Width: | Height: | Size: 32 KiB |
Before Width: | Height: | Size: 58 KiB |
Before Width: | Height: | Size: 48 KiB |
Before Width: | Height: | Size: 69 KiB |
Before Width: | Height: | Size: 2.5 KiB |
Before Width: | Height: | Size: 4.3 KiB |
Before Width: | Height: | Size: 4.3 KiB |
Before Width: | Height: | Size: 2.7 KiB |
Before Width: | Height: | Size: 3.1 KiB |
Before Width: | Height: | Size: 2.8 KiB |
Before Width: | Height: | Size: 4.4 KiB |
Before Width: | Height: | Size: 5.1 KiB |
Before Width: | Height: | Size: 3.3 KiB |
Before Width: | Height: | Size: 2.0 KiB |
Before Width: | Height: | Size: 2.3 KiB |
Before Width: | Height: | Size: 1.8 KiB |
Before Width: | Height: | Size: 2.2 KiB |
|
@ -1,438 +1,3 @@
|
|||
# TubeArchivist API
|
||||
Documentation of available API endpoints.
|
||||
|
||||
Note:
|
||||
- This is very early stages and will change!
|
||||
- Check the commit history to see if a documented feature is already in your release
|
||||
|
||||
## Table of contents
|
||||
- [Authentication](#authentication)
|
||||
- [Pagination](#pagination)
|
||||
|
||||
**Video**
|
||||
- [Video List](#video-list-view)
|
||||
- [Video Single](#video-item-view)
|
||||
- [Video Comments](#video-comment-view)
|
||||
- [Video Similar](#video-similar-view)
|
||||
- [Video Single Progress](#video-progress-view)
|
||||
- [Video Single Sponsorblock](#sponsor-block-view) WIP
|
||||
|
||||
**Channel**
|
||||
- [Channel List](#channel-list-view)
|
||||
- [Channel Single](#channel-item-view)
|
||||
- [Channel Video List](#channel-videos-view)
|
||||
|
||||
**Playlist**
|
||||
- [Playlist List](#playlist-list-view)
|
||||
- [Playlist Single](#playlist-item-view)
|
||||
- [Playlist Videos List](#playlist-videos-view)
|
||||
|
||||
**Download queue**
|
||||
- [Download Queue List](#download-queue-list-view)
|
||||
- [Download Queue Single](#download-queue-item-view)
|
||||
|
||||
**Snapshot management**
|
||||
- [Snapshot List](#snapshot-list-view)
|
||||
- [Snapshot Single](#snapshot-item-view)
|
||||
|
||||
**Additional**
|
||||
- [Login](#login-view)
|
||||
- [Task](#task-view) WIP
|
||||
- [Refresh](#refresh-view)
|
||||
- [Cookie](#cookie-view)
|
||||
- [Search](#search-view)
|
||||
- [Watched](#watched-view)
|
||||
- [Ping](#ping-view)
|
||||
|
||||
## Authentication
|
||||
API token will get automatically created, accessible on the settings page. Token needs to be passed as an authorization header with every request. Additionally session based authentication is enabled too: When you are logged into your TubeArchivist instance, you'll have access to the api in the browser for testing.
|
||||
|
||||
Curl example:
|
||||
```shell
|
||||
curl -v /api/video/<video-id>/ \
|
||||
-H "Authorization: Token xxxxxxxxxx"
|
||||
```
|
||||
|
||||
Python requests example:
|
||||
```python
|
||||
import requests
|
||||
|
||||
url = "/api/video/<video-id>/"
|
||||
headers = {"Authorization": "Token xxxxxxxxxx"}
|
||||
response = requests.get(url, headers=headers)
|
||||
```
|
||||
|
||||
## Pagination
|
||||
The list views return a paginate object with the following keys:
|
||||
- page_size: *int* current page size set in config
|
||||
- page_from: *int* first result idx
|
||||
- prev_pages: *array of ints* of previous pages, if available
|
||||
- current_page: *int* current page from query
|
||||
- max_hits: *bool* if max of 10k results is reached
|
||||
- params: *str* additional url encoded query parameters
|
||||
- last_page: *int* of last page link
|
||||
- next_pages: *array of ints* of next pages
|
||||
- total_hits: *int* total results
|
||||
|
||||
Pass page number as a query parameter: `page=2`. Defaults to *0*, `page=1` is redundant and falls back to *0*. If a page query doesn't return any results, you'll get `HTTP 404 Not Found`.
|
||||
|
||||
## Video List View
|
||||
/api/video/
|
||||
|
||||
## Video Item View
|
||||
GET: /api/video/\<video_id>/
|
||||
DELETE: /api/video/\<video_id>/
|
||||
|
||||
## Video Comment View
|
||||
/api/video/\<video_id>/comment/
|
||||
|
||||
## Video Similar View
|
||||
/api/video/\<video_id>/similar/
|
||||
|
||||
## Video Progress View
|
||||
/api/video/\<video_id>/progress/
|
||||
|
||||
Progress is stored for each user.
|
||||
|
||||
### Get last player position of a video
|
||||
GET /api/video/\<video_id>/progress/
|
||||
```json
|
||||
{
|
||||
"youtube_id": "<video_id>",
|
||||
"user_id": 1,
|
||||
"position": 100
|
||||
}
|
||||
```
|
||||
|
||||
### Post player position of video
|
||||
POST /api/video/\<video_id>/progress/
|
||||
```json
|
||||
{
|
||||
"position": 100
|
||||
}
|
||||
```
|
||||
|
||||
### Delete player position of video
|
||||
DELETE /api/video/\<video_id>/progress/
|
||||
|
||||
|
||||
## Sponsor Block View
|
||||
/api/video/\<video_id>/sponsor/
|
||||
|
||||
Integrate with sponsorblock
|
||||
|
||||
### Get list of segments
|
||||
GET /api/video/\<video_id>/sponsor/
|
||||
|
||||
|
||||
### Vote on existing segment
|
||||
**This only simulates the request**
|
||||
POST /api/video/\<video_id>/sponsor/
|
||||
```json
|
||||
{
|
||||
"vote": {
|
||||
"uuid": "<uuid>",
|
||||
"yourVote": 1
|
||||
}
|
||||
}
|
||||
```
|
||||
yourVote needs to be *int*: 0 for downvote, 1 for upvote, 20 to undo vote
|
||||
|
||||
### Create new segment
|
||||
**This only simulates the request**
|
||||
POST /api/video/\<video_id>/sponsor/
|
||||
```json
|
||||
{
|
||||
"segment": {
|
||||
"startTime": 5,
|
||||
"endTime": 10
|
||||
}
|
||||
}
|
||||
```
|
||||
Timestamps either *int* or *float*, end time can't be before start time.
|
||||
|
||||
|
||||
## Channel List View
|
||||
/api/channel/
|
||||
|
||||
### Subscribe to a list of channels
|
||||
POST /api/channel/
|
||||
```json
|
||||
{
|
||||
"data": [
|
||||
{"channel_id": "UC9-y-6csu5WGm29I7JiwpnA", "channel_subscribed": true}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Channel Item View
|
||||
GET: /api/channel/\<channel_id>/
|
||||
DELETE: /api/channel/\<channel_id>/
|
||||
- Will delete channel with all it's videos
|
||||
|
||||
## Channel Videos View
|
||||
/api/channel/\<channel_id>/video/
|
||||
|
||||
## Playlist List View
|
||||
/api/playlist/
|
||||
|
||||
## Playlist Item View
|
||||
/api/playlist/\<playlist_id>/
|
||||
|
||||
## Playlist Videos View
|
||||
/api/playlist/\<playlist_id>/video/
|
||||
|
||||
## Download Queue List View
|
||||
GET /api/download/
|
||||
|
||||
Parameter:
|
||||
- filter: pending, ignore
|
||||
- channel: channel-id
|
||||
|
||||
### Add list of videos to download queue
|
||||
POST /api/download/
|
||||
```json
|
||||
{
|
||||
"data": [
|
||||
{"youtube_id": "NYj3DnI81AQ", "status": "pending"}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### Delete download queue items by filter
|
||||
DELETE /api/download/?filter=ignore
|
||||
DELETE /api/download/?filter=pending
|
||||
|
||||
## Download Queue Item View
|
||||
GET /api/download/\<video_id>/
|
||||
POST /api/download/\<video_id>/
|
||||
|
||||
Ignore video in download queue:
|
||||
```json
|
||||
{
|
||||
"status": "ignore"
|
||||
}
|
||||
```
|
||||
|
||||
Add to queue previously ignored video:
|
||||
```json
|
||||
{
|
||||
"status": "pending"
|
||||
}
|
||||
```
|
||||
|
||||
Download existing video now:
|
||||
```json
|
||||
{
|
||||
"status": "priority"
|
||||
}
|
||||
```
|
||||
|
||||
DELETE /api/download/\<video_id>/
|
||||
Forget or delete from download queue
|
||||
|
||||
## Snapshot List View
|
||||
GET /api/snapshot/
|
||||
Return snapshot config and a list of available snapshots.
|
||||
|
||||
```json
|
||||
{
|
||||
"next_exec": epoch,
|
||||
"next_exec_str": "date_str",
|
||||
"expire_after": "30d",
|
||||
"snapshots": []
|
||||
}
|
||||
```
|
||||
|
||||
POST /api/snapshot/
|
||||
Create new snapshot now, will return immediately, task will run async in the background, will return snapshot name:
|
||||
```json
|
||||
{
|
||||
"snapshot_name": "ta_daily_<random-id>
|
||||
}
|
||||
```
|
||||
|
||||
## Snapshot Item View
|
||||
GET /api/snapshot/\<snapshot-id>/
|
||||
Return metadata of a single snapshot
|
||||
```json
|
||||
{
|
||||
"id": "ta_daily_<random-id>,
|
||||
"state": "SUCCESS",
|
||||
"es_version": "0.0.0",
|
||||
"start_date": "date_str",
|
||||
"end_date": "date_str",
|
||||
"end_stamp": epoch,
|
||||
"duration_s": 0
|
||||
}
|
||||
```
|
||||
|
||||
GET /api/snapshot/\<snapshot-id>/
|
||||
Restore this snapshot
|
||||
|
||||
DELETE /api/snapshot/\<snapshot-id>/
|
||||
Remove this snapshot from index
|
||||
|
||||
## Login View
|
||||
Return token and user ID for username and password:
|
||||
POST /api/login/
|
||||
```json
|
||||
{
|
||||
"username": "tubearchivist",
|
||||
"password": "verysecret"
|
||||
}
|
||||
```
|
||||
|
||||
after successful login returns
|
||||
```json
|
||||
{
|
||||
"token": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
|
||||
"user_id": 1
|
||||
}
|
||||
```
|
||||
|
||||
## Task View
|
||||
GET /api/task/
|
||||
POST /api/task/
|
||||
|
||||
Check if there is an ongoing task:
|
||||
GET /api/task/
|
||||
|
||||
Returns:
|
||||
```json
|
||||
{
|
||||
"rescan": false,
|
||||
"downloading": false
|
||||
}
|
||||
```
|
||||
|
||||
Start a background task
|
||||
POST /api/task/
|
||||
```json
|
||||
{
|
||||
"run": "task_name"
|
||||
}
|
||||
```
|
||||
|
||||
List of valid task names:
|
||||
- **download_pending**: Start the download queue
|
||||
- **rescan_pending**: Rescan your subscriptions
|
||||
|
||||
## Refresh View
|
||||
GET /api/refresh/
|
||||
parameters:
|
||||
- **type**: one of *video*, *channel*, *playlist*, optional
|
||||
- **id**: item id, optional
|
||||
|
||||
without specifying type: return total for all queued items:
|
||||
```json
|
||||
{
|
||||
"total_queued": 2,
|
||||
"type": "all",
|
||||
"state": "running"
|
||||
}
|
||||
```
|
||||
|
||||
specify type: return total items queue of this type:
|
||||
```json
|
||||
{
|
||||
"total_queued": 2,
|
||||
"type": "video",
|
||||
"state": "running"
|
||||
}
|
||||
```
|
||||
|
||||
specify type *and* id to get state of item in queue:
|
||||
```json
|
||||
{
|
||||
"total_queued": 2,
|
||||
"type": "video",
|
||||
"state": "in_queue",
|
||||
"id": "video-id"
|
||||
}
|
||||
```
|
||||
|
||||
POST /api/refresh/
|
||||
Parameter:
|
||||
- extract_videos: to refresh all videos for channels/playlists, default False
|
||||
|
||||
Manually start a refresh task: post list of *video*, *channel*, *playlist* IDs.
|
||||
```json
|
||||
{
|
||||
"video": ["video1", "video2", "video3"],
|
||||
"channel": ["channel1", "channel2", "channel3"],
|
||||
"playlist": ["playlist1", "playlist2"]
|
||||
}
|
||||
```
|
||||
|
||||
## Cookie View
|
||||
Check your youtube cookie settings, *status* turns to `true` if cookie has been validated.
|
||||
GET /api/cookie/
|
||||
```json
|
||||
{
|
||||
"cookie_enabled": true,
|
||||
"status": true,
|
||||
"validated": <timestamp>,
|
||||
"validated_str": "timestamp"
|
||||
}
|
||||
```
|
||||
|
||||
POST /api/cookie/
|
||||
Send empty post request to validate cookie.
|
||||
```json
|
||||
{
|
||||
"cookie_validated": true
|
||||
}
|
||||
```
|
||||
|
||||
PUT /api/cookie/
|
||||
Send put request containing the cookie as a string:
|
||||
```json
|
||||
{
|
||||
"cookie": "your-cookie-as-string"
|
||||
}
|
||||
```
|
||||
Imports and validates cookie, returns on success:
|
||||
```json
|
||||
{
|
||||
"cookie_import": "done",
|
||||
"cookie_validated": true
|
||||
}
|
||||
```
|
||||
Or returns status code 400 on failure:
|
||||
```json
|
||||
{
|
||||
"cookie_import": "fail",
|
||||
"cookie_validated": false
|
||||
}
|
||||
```
|
||||
|
||||
## Search View
|
||||
GET /api/search/?query=\<query>
|
||||
|
||||
Returns search results from your query.
|
||||
|
||||
## Watched View
|
||||
POST /api/watched/
|
||||
|
||||
Change watched state, where the `id` can be a single video, or channel/playlist to change all videos belonging to that channel/playlist.
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "xxxxxxx",
|
||||
"is_watched": True
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## Ping View
|
||||
Validate your connection with the API
|
||||
GET /api/ping/
|
||||
|
||||
When valid returns message with user id:
|
||||
```json
|
||||
{
|
||||
"response": "pong",
|
||||
"user": 1
|
||||
}
|
||||
```
|
||||
All API documentation has moved to [docs.tubearchivist.com](https://docs.tubearchivist.com/).
|
||||
|
|
|
@ -0,0 +1,351 @@
|
|||
"""aggregations"""
|
||||
|
||||
from home.src.es.connect import ElasticWrap
|
||||
from home.src.ta.helper import get_duration_str
|
||||
from home.src.ta.settings import EnvironmentSettings
|
||||
|
||||
|
||||
class AggBase:
|
||||
"""base class for aggregation calls"""
|
||||
|
||||
path: str = ""
|
||||
data: dict = {}
|
||||
name: str = ""
|
||||
|
||||
def get(self):
|
||||
"""make get call"""
|
||||
response, _ = ElasticWrap(self.path).get(self.data)
|
||||
print(f"[agg][{self.name}] took {response.get('took')} ms to process")
|
||||
|
||||
return response.get("aggregations")
|
||||
|
||||
def process(self):
|
||||
"""implement in subclassess"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class Video(AggBase):
|
||||
"""get video stats"""
|
||||
|
||||
name = "video_stats"
|
||||
path = "ta_video/_search"
|
||||
data = {
|
||||
"size": 0,
|
||||
"aggs": {
|
||||
"video_type": {
|
||||
"terms": {"field": "vid_type"},
|
||||
"aggs": {
|
||||
"media_size": {"sum": {"field": "media_size"}},
|
||||
"duration": {"sum": {"field": "player.duration"}},
|
||||
},
|
||||
},
|
||||
"video_active": {
|
||||
"terms": {"field": "active"},
|
||||
"aggs": {
|
||||
"media_size": {"sum": {"field": "media_size"}},
|
||||
"duration": {"sum": {"field": "player.duration"}},
|
||||
},
|
||||
},
|
||||
"video_media_size": {"sum": {"field": "media_size"}},
|
||||
"video_count": {"value_count": {"field": "youtube_id"}},
|
||||
"duration": {"sum": {"field": "player.duration"}},
|
||||
},
|
||||
}
|
||||
|
||||
def process(self):
|
||||
"""process aggregation"""
|
||||
aggregations = self.get()
|
||||
|
||||
duration = int(aggregations["duration"]["value"])
|
||||
response = {
|
||||
"doc_count": aggregations["video_count"]["value"],
|
||||
"media_size": int(aggregations["video_media_size"]["value"]),
|
||||
"duration": duration,
|
||||
"duration_str": get_duration_str(duration),
|
||||
}
|
||||
for bucket in aggregations["video_type"]["buckets"]:
|
||||
duration = int(bucket["duration"].get("value"))
|
||||
response.update(
|
||||
{
|
||||
f"type_{bucket['key']}": {
|
||||
"doc_count": bucket.get("doc_count"),
|
||||
"media_size": int(bucket["media_size"].get("value")),
|
||||
"duration": duration,
|
||||
"duration_str": get_duration_str(duration),
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
for bucket in aggregations["video_active"]["buckets"]:
|
||||
duration = int(bucket["duration"].get("value"))
|
||||
response.update(
|
||||
{
|
||||
f"active_{bucket['key_as_string']}": {
|
||||
"doc_count": bucket.get("doc_count"),
|
||||
"media_size": int(bucket["media_size"].get("value")),
|
||||
"duration": duration,
|
||||
"duration_str": get_duration_str(duration),
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
class Channel(AggBase):
|
||||
"""get channel stats"""
|
||||
|
||||
name = "channel_stats"
|
||||
path = "ta_channel/_search"
|
||||
data = {
|
||||
"size": 0,
|
||||
"aggs": {
|
||||
"channel_count": {"value_count": {"field": "channel_id"}},
|
||||
"channel_active": {"terms": {"field": "channel_active"}},
|
||||
"channel_subscribed": {"terms": {"field": "channel_subscribed"}},
|
||||
},
|
||||
}
|
||||
|
||||
def process(self):
|
||||
"""process aggregation"""
|
||||
aggregations = self.get()
|
||||
|
||||
response = {
|
||||
"doc_count": aggregations["channel_count"].get("value"),
|
||||
}
|
||||
for bucket in aggregations["channel_active"]["buckets"]:
|
||||
key = f"active_{bucket['key_as_string']}"
|
||||
response.update({key: bucket.get("doc_count")})
|
||||
for bucket in aggregations["channel_subscribed"]["buckets"]:
|
||||
key = f"subscribed_{bucket['key_as_string']}"
|
||||
response.update({key: bucket.get("doc_count")})
|
||||
|
||||
return response
|
||||
|
||||
|
||||
class Playlist(AggBase):
|
||||
"""get playlist stats"""
|
||||
|
||||
name = "playlist_stats"
|
||||
path = "ta_playlist/_search"
|
||||
data = {
|
||||
"size": 0,
|
||||
"aggs": {
|
||||
"playlist_count": {"value_count": {"field": "playlist_id"}},
|
||||
"playlist_active": {"terms": {"field": "playlist_active"}},
|
||||
"playlist_subscribed": {"terms": {"field": "playlist_subscribed"}},
|
||||
},
|
||||
}
|
||||
|
||||
def process(self):
|
||||
"""process aggregation"""
|
||||
aggregations = self.get()
|
||||
response = {"doc_count": aggregations["playlist_count"].get("value")}
|
||||
for bucket in aggregations["playlist_active"]["buckets"]:
|
||||
key = f"active_{bucket['key_as_string']}"
|
||||
response.update({key: bucket.get("doc_count")})
|
||||
for bucket in aggregations["playlist_subscribed"]["buckets"]:
|
||||
key = f"subscribed_{bucket['key_as_string']}"
|
||||
response.update({key: bucket.get("doc_count")})
|
||||
|
||||
return response
|
||||
|
||||
|
||||
class Download(AggBase):
|
||||
"""get downloads queue stats"""
|
||||
|
||||
name = "download_queue_stats"
|
||||
path = "ta_download/_search"
|
||||
data = {
|
||||
"size": 0,
|
||||
"aggs": {
|
||||
"status": {"terms": {"field": "status"}},
|
||||
"video_type": {
|
||||
"filter": {"term": {"status": "pending"}},
|
||||
"aggs": {"type_pending": {"terms": {"field": "vid_type"}}},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
def process(self):
|
||||
"""process aggregation"""
|
||||
aggregations = self.get()
|
||||
response = {}
|
||||
for bucket in aggregations["status"]["buckets"]:
|
||||
response.update({bucket["key"]: bucket.get("doc_count")})
|
||||
|
||||
for bucket in aggregations["video_type"]["type_pending"]["buckets"]:
|
||||
key = f"pending_{bucket['key']}"
|
||||
response.update({key: bucket.get("doc_count")})
|
||||
|
||||
return response
|
||||
|
||||
|
||||
class WatchProgress(AggBase):
|
||||
"""get watch progress"""
|
||||
|
||||
name = "watch_progress"
|
||||
path = "ta_video/_search"
|
||||
data = {
|
||||
"size": 0,
|
||||
"aggs": {
|
||||
name: {
|
||||
"terms": {"field": "player.watched"},
|
||||
"aggs": {
|
||||
"watch_docs": {
|
||||
"filter": {"terms": {"player.watched": [True, False]}},
|
||||
"aggs": {
|
||||
"true_count": {"value_count": {"field": "_index"}},
|
||||
"duration": {"sum": {"field": "player.duration"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
"total_duration": {"sum": {"field": "player.duration"}},
|
||||
"total_vids": {"value_count": {"field": "_index"}},
|
||||
},
|
||||
}
|
||||
|
||||
def process(self):
|
||||
"""make the call"""
|
||||
aggregations = self.get()
|
||||
buckets = aggregations[self.name]["buckets"]
|
||||
|
||||
response = {}
|
||||
all_duration = int(aggregations["total_duration"].get("value"))
|
||||
response.update(
|
||||
{
|
||||
"total": {
|
||||
"duration": all_duration,
|
||||
"duration_str": get_duration_str(all_duration),
|
||||
"items": aggregations["total_vids"].get("value"),
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
for bucket in buckets:
|
||||
response.update(self._build_bucket(bucket, all_duration))
|
||||
|
||||
return response
|
||||
|
||||
@staticmethod
|
||||
def _build_bucket(bucket, all_duration):
|
||||
"""parse bucket"""
|
||||
|
||||
duration = int(bucket["watch_docs"]["duration"]["value"])
|
||||
duration_str = get_duration_str(duration)
|
||||
items = bucket["watch_docs"]["true_count"]["value"]
|
||||
if bucket["key_as_string"] == "false":
|
||||
key = "unwatched"
|
||||
else:
|
||||
key = "watched"
|
||||
|
||||
bucket_parsed = {
|
||||
key: {
|
||||
"duration": duration,
|
||||
"duration_str": duration_str,
|
||||
"progress": duration / all_duration if all_duration else 0,
|
||||
"items": items,
|
||||
}
|
||||
}
|
||||
|
||||
return bucket_parsed
|
||||
|
||||
|
||||
class DownloadHist(AggBase):
|
||||
"""get downloads histogram last week"""
|
||||
|
||||
name = "videos_last_week"
|
||||
path = "ta_video/_search"
|
||||
data = {
|
||||
"size": 0,
|
||||
"aggs": {
|
||||
name: {
|
||||
"date_histogram": {
|
||||
"field": "date_downloaded",
|
||||
"calendar_interval": "day",
|
||||
"format": "yyyy-MM-dd",
|
||||
"order": {"_key": "desc"},
|
||||
"time_zone": EnvironmentSettings.TZ,
|
||||
},
|
||||
"aggs": {
|
||||
"total_videos": {"value_count": {"field": "youtube_id"}},
|
||||
"media_size": {"sum": {"field": "media_size"}},
|
||||
},
|
||||
}
|
||||
},
|
||||
"query": {
|
||||
"range": {
|
||||
"date_downloaded": {
|
||||
"gte": "now-7d/d",
|
||||
"time_zone": EnvironmentSettings.TZ,
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
def process(self):
|
||||
"""process query"""
|
||||
aggregations = self.get()
|
||||
buckets = aggregations[self.name]["buckets"]
|
||||
|
||||
response = [
|
||||
{
|
||||
"date": i.get("key_as_string"),
|
||||
"count": i.get("doc_count"),
|
||||
"media_size": i["media_size"].get("value"),
|
||||
}
|
||||
for i in buckets
|
||||
]
|
||||
|
||||
return response
|
||||
|
||||
|
||||
class BiggestChannel(AggBase):
|
||||
"""get channel aggregations"""
|
||||
|
||||
def __init__(self, order):
|
||||
self.data["aggs"][self.name]["multi_terms"]["order"] = {order: "desc"}
|
||||
|
||||
name = "channel_stats"
|
||||
path = "ta_video/_search"
|
||||
data = {
|
||||
"size": 0,
|
||||
"aggs": {
|
||||
name: {
|
||||
"multi_terms": {
|
||||
"terms": [
|
||||
{"field": "channel.channel_name.keyword"},
|
||||
{"field": "channel.channel_id"},
|
||||
],
|
||||
"order": {"doc_count": "desc"},
|
||||
},
|
||||
"aggs": {
|
||||
"doc_count": {"value_count": {"field": "_index"}},
|
||||
"duration": {"sum": {"field": "player.duration"}},
|
||||
"media_size": {"sum": {"field": "media_size"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
order_choices = ["doc_count", "duration", "media_size"]
|
||||
|
||||
def process(self):
|
||||
"""process aggregation, order_by validated in the view"""
|
||||
|
||||
aggregations = self.get()
|
||||
buckets = aggregations[self.name]["buckets"]
|
||||
|
||||
response = [
|
||||
{
|
||||
"id": i["key"][1],
|
||||
"name": i["key"][0].title(),
|
||||
"doc_count": i["doc_count"]["value"],
|
||||
"duration": i["duration"]["value"],
|
||||
"duration_str": get_duration_str(int(i["duration"]["value"])),
|
||||
"media_size": i["media_size"]["value"],
|
||||
}
|
||||
for i in buckets
|
||||
]
|
||||
|
||||
return response
|
|
@ -7,15 +7,14 @@ Functionality:
|
|||
import urllib.parse
|
||||
|
||||
from home.src.download.thumbnails import ThumbManager
|
||||
from home.src.ta.config import AppConfig
|
||||
from home.src.ta.helper import date_praser
|
||||
from home.src.ta.helper import date_praser, get_duration_str
|
||||
from home.src.ta.settings import EnvironmentSettings
|
||||
|
||||
|
||||
class SearchProcess:
|
||||
"""process search results"""
|
||||
|
||||
CONFIG = AppConfig().config
|
||||
CACHE_DIR = CONFIG["application"]["cache_dir"]
|
||||
CACHE_DIR = EnvironmentSettings.CACHE_DIR
|
||||
|
||||
def __init__(self, response):
|
||||
self.response = response
|
||||
|
@ -50,6 +49,16 @@ class SearchProcess:
|
|||
processed = self._process_download(result["_source"])
|
||||
if index == "ta_comment":
|
||||
processed = self._process_comment(result["_source"])
|
||||
if index == "ta_subtitle":
|
||||
processed = self._process_subtitle(result)
|
||||
|
||||
if isinstance(processed, dict):
|
||||
processed.update(
|
||||
{
|
||||
"_index": index,
|
||||
"_score": round(result.get("_score") or 0, 2),
|
||||
}
|
||||
)
|
||||
|
||||
return processed
|
||||
|
||||
|
@ -64,7 +73,7 @@ class SearchProcess:
|
|||
"channel_last_refresh": date_str,
|
||||
"channel_banner_url": f"{art_base}_banner.jpg",
|
||||
"channel_thumb_url": f"{art_base}_thumb.jpg",
|
||||
"channel_tvart_url": False,
|
||||
"channel_tvart_url": f"{art_base}_tvart.jpg",
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -139,3 +148,29 @@ class SearchProcess:
|
|||
processed_comments[-1]["comment_replies"].append(comment)
|
||||
|
||||
return processed_comments
|
||||
|
||||
def _process_subtitle(self, result):
|
||||
"""take complete result dict to extract highlight"""
|
||||
subtitle_dict = result["_source"]
|
||||
highlight = result.get("highlight")
|
||||
if highlight:
|
||||
# replace lines with the highlighted markdown
|
||||
subtitle_line = highlight.get("subtitle_line")[0]
|
||||
subtitle_dict.update({"subtitle_line": subtitle_line})
|
||||
|
||||
thumb_path = ThumbManager(subtitle_dict["youtube_id"]).vid_thumb_path()
|
||||
subtitle_dict.update({"vid_thumb_url": f"/cache/{thumb_path}"})
|
||||
|
||||
return subtitle_dict
|
||||
|
||||
|
||||
def process_aggs(response):
|
||||
"""convert aggs duration to str"""
|
||||
|
||||
if response.get("aggregations"):
|
||||
aggs = response["aggregations"]
|
||||
if "total_duration" in aggs:
|
||||
duration_sec = int(aggs["total_duration"]["value"])
|
||||
aggs["total_duration"].update(
|
||||
{"value_str": get_duration_str(duration_sec)}
|
||||
)
|
||||
|
|
|
@ -1,54 +0,0 @@
|
|||
"""
|
||||
Functionality:
|
||||
- process tasks from API
|
||||
- validate
|
||||
- handover to celery
|
||||
"""
|
||||
|
||||
from home.src.ta.ta_redis import RedisArchivist
|
||||
from home.tasks import download_pending, update_subscribed
|
||||
|
||||
|
||||
class TaskHandler:
|
||||
"""handle tasks from api"""
|
||||
|
||||
def __init__(self, data):
|
||||
self.data = data
|
||||
|
||||
def run_task(self):
|
||||
"""map data and run"""
|
||||
task_name = self.data["run"]
|
||||
try:
|
||||
to_run = self.exec_map(task_name)
|
||||
except KeyError as err:
|
||||
print(f"invalid task name {task_name}")
|
||||
raise ValueError from err
|
||||
|
||||
response = to_run()
|
||||
response.update({"task": task_name})
|
||||
return response
|
||||
|
||||
def exec_map(self, task_name):
|
||||
"""map dict key and return function to execute"""
|
||||
exec_map = {
|
||||
"download_pending": self._download_pending,
|
||||
"rescan_pending": self._rescan_pending,
|
||||
}
|
||||
|
||||
return exec_map[task_name]
|
||||
|
||||
@staticmethod
|
||||
def _rescan_pending():
|
||||
"""look for new items in subscribed channels"""
|
||||
print("rescan subscribed channels")
|
||||
update_subscribed.delay()
|
||||
return {"success": True}
|
||||
|
||||
@staticmethod
|
||||
def _download_pending():
|
||||
"""start the download queue"""
|
||||
print("download pending")
|
||||
running = download_pending.delay()
|
||||
print("set task id: " + running.id)
|
||||
RedisArchivist().set_message("dl_queue_id", running.id)
|
||||
return {"success": True}
|
|
@ -1,138 +1,189 @@
|
|||
"""all api urls"""
|
||||
|
||||
from api.views import (
|
||||
ChannelApiListView,
|
||||
ChannelApiVideoView,
|
||||
ChannelApiView,
|
||||
CookieView,
|
||||
DownloadApiListView,
|
||||
DownloadApiView,
|
||||
LoginApiView,
|
||||
PingView,
|
||||
PlaylistApiListView,
|
||||
PlaylistApiVideoView,
|
||||
PlaylistApiView,
|
||||
RefreshView,
|
||||
SearchView,
|
||||
SnapshotApiListView,
|
||||
SnapshotApiView,
|
||||
TaskApiView,
|
||||
VideoApiListView,
|
||||
VideoApiView,
|
||||
VideoCommentView,
|
||||
VideoProgressView,
|
||||
VideoSimilarView,
|
||||
VideoSponsorView,
|
||||
WatchedView,
|
||||
)
|
||||
from api import views
|
||||
from django.urls import path
|
||||
|
||||
urlpatterns = [
|
||||
path("ping/", PingView.as_view(), name="ping"),
|
||||
path("login/", LoginApiView.as_view(), name="api-login"),
|
||||
path("ping/", views.PingView.as_view(), name="ping"),
|
||||
path("login/", views.LoginApiView.as_view(), name="api-login"),
|
||||
path(
|
||||
"video/",
|
||||
VideoApiListView.as_view(),
|
||||
views.VideoApiListView.as_view(),
|
||||
name="api-video-list",
|
||||
),
|
||||
path(
|
||||
"video/<slug:video_id>/",
|
||||
VideoApiView.as_view(),
|
||||
views.VideoApiView.as_view(),
|
||||
name="api-video",
|
||||
),
|
||||
path(
|
||||
"video/<slug:video_id>/progress/",
|
||||
VideoProgressView.as_view(),
|
||||
views.VideoProgressView.as_view(),
|
||||
name="api-video-progress",
|
||||
),
|
||||
path(
|
||||
"video/<slug:video_id>/comment/",
|
||||
VideoCommentView.as_view(),
|
||||
views.VideoCommentView.as_view(),
|
||||
name="api-video-comment",
|
||||
),
|
||||
path(
|
||||
"video/<slug:video_id>/similar/",
|
||||
VideoSimilarView.as_view(),
|
||||
views.VideoSimilarView.as_view(),
|
||||
name="api-video-similar",
|
||||
),
|
||||
path(
|
||||
"video/<slug:video_id>/sponsor/",
|
||||
VideoSponsorView.as_view(),
|
||||
views.VideoSponsorView.as_view(),
|
||||
name="api-video-sponsor",
|
||||
),
|
||||
path(
|
||||
"channel/",
|
||||
ChannelApiListView.as_view(),
|
||||
views.ChannelApiListView.as_view(),
|
||||
name="api-channel-list",
|
||||
),
|
||||
path(
|
||||
"channel/search/",
|
||||
views.ChannelApiSearchView.as_view(),
|
||||
name="api-channel-search",
|
||||
),
|
||||
path(
|
||||
"channel/<slug:channel_id>/",
|
||||
ChannelApiView.as_view(),
|
||||
views.ChannelApiView.as_view(),
|
||||
name="api-channel",
|
||||
),
|
||||
path(
|
||||
"channel/<slug:channel_id>/video/",
|
||||
ChannelApiVideoView.as_view(),
|
||||
views.ChannelApiVideoView.as_view(),
|
||||
name="api-channel-video",
|
||||
),
|
||||
path(
|
||||
"playlist/",
|
||||
PlaylistApiListView.as_view(),
|
||||
views.PlaylistApiListView.as_view(),
|
||||
name="api-playlist-list",
|
||||
),
|
||||
path(
|
||||
"playlist/<slug:playlist_id>/",
|
||||
PlaylistApiView.as_view(),
|
||||
views.PlaylistApiView.as_view(),
|
||||
name="api-playlist",
|
||||
),
|
||||
path(
|
||||
"playlist/<slug:playlist_id>/video/",
|
||||
PlaylistApiVideoView.as_view(),
|
||||
views.PlaylistApiVideoView.as_view(),
|
||||
name="api-playlist-video",
|
||||
),
|
||||
path(
|
||||
"download/",
|
||||
DownloadApiListView.as_view(),
|
||||
views.DownloadApiListView.as_view(),
|
||||
name="api-download-list",
|
||||
),
|
||||
path(
|
||||
"download/<slug:video_id>/",
|
||||
DownloadApiView.as_view(),
|
||||
views.DownloadApiView.as_view(),
|
||||
name="api-download",
|
||||
),
|
||||
path(
|
||||
"refresh/",
|
||||
RefreshView.as_view(),
|
||||
views.RefreshView.as_view(),
|
||||
name="api-refresh",
|
||||
),
|
||||
path(
|
||||
"task/",
|
||||
TaskApiView.as_view(),
|
||||
name="api-task",
|
||||
),
|
||||
path(
|
||||
"snapshot/",
|
||||
SnapshotApiListView.as_view(),
|
||||
views.SnapshotApiListView.as_view(),
|
||||
name="api-snapshot-list",
|
||||
),
|
||||
path(
|
||||
"snapshot/<slug:snapshot_id>/",
|
||||
SnapshotApiView.as_view(),
|
||||
views.SnapshotApiView.as_view(),
|
||||
name="api-snapshot",
|
||||
),
|
||||
path(
|
||||
"backup/",
|
||||
views.BackupApiListView.as_view(),
|
||||
name="api-backup-list",
|
||||
),
|
||||
path(
|
||||
"backup/<str:filename>/",
|
||||
views.BackupApiView.as_view(),
|
||||
name="api-backup",
|
||||
),
|
||||
path(
|
||||
"task-name/",
|
||||
views.TaskListView.as_view(),
|
||||
name="api-task-list",
|
||||
),
|
||||
path(
|
||||
"task-name/<slug:task_name>/",
|
||||
views.TaskNameListView.as_view(),
|
||||
name="api-task-name-list",
|
||||
),
|
||||
path(
|
||||
"task-id/<slug:task_id>/",
|
||||
views.TaskIDView.as_view(),
|
||||
name="api-task-id",
|
||||
),
|
||||
path(
|
||||
"config/user/",
|
||||
views.UserConfigView.as_view(),
|
||||
name="api-config-user",
|
||||
),
|
||||
path(
|
||||
"cookie/",
|
||||
CookieView.as_view(),
|
||||
views.CookieView.as_view(),
|
||||
name="api-cookie",
|
||||
),
|
||||
path(
|
||||
"watched/",
|
||||
WatchedView.as_view(),
|
||||
views.WatchedView.as_view(),
|
||||
name="api-watched",
|
||||
),
|
||||
path(
|
||||
"search/",
|
||||
SearchView.as_view(),
|
||||
views.SearchView.as_view(),
|
||||
name="api-search",
|
||||
),
|
||||
path(
|
||||
"token/",
|
||||
views.TokenView.as_view(),
|
||||
name="api-token",
|
||||
),
|
||||
path(
|
||||
"notification/",
|
||||
views.NotificationView.as_view(),
|
||||
name="api-notification",
|
||||
),
|
||||
path(
|
||||
"stats/video/",
|
||||
views.StatVideoView.as_view(),
|
||||
name="api-stats-video",
|
||||
),
|
||||
path(
|
||||
"stats/channel/",
|
||||
views.StatChannelView.as_view(),
|
||||
name="api-stats-channel",
|
||||
),
|
||||
path(
|
||||
"stats/playlist/",
|
||||
views.StatPlaylistView.as_view(),
|
||||
name="api-stats-playlist",
|
||||
),
|
||||
path(
|
||||
"stats/download/",
|
||||
views.StatDownloadView.as_view(),
|
||||
name="api-stats-download",
|
||||
),
|
||||
path(
|
||||
"stats/watch/",
|
||||
views.StatWatchProgress.as_view(),
|
||||
name="api-stats-watch",
|
||||
),
|
||||
path(
|
||||
"stats/downloadhist/",
|
||||
views.StatDownloadHist.as_view(),
|
||||
name="api-stats-downloadhist",
|
||||
),
|
||||
path(
|
||||
"stats/biggestchannels/",
|
||||
views.StatBiggestChannel.as_view(),
|
||||
name="api-stats-biggestchannels",
|
||||
),
|
||||
]
|
||||
|
|
|
@ -1,43 +1,95 @@
|
|||
"""all API views"""
|
||||
|
||||
from api.src.aggs import (
|
||||
BiggestChannel,
|
||||
Channel,
|
||||
Download,
|
||||
DownloadHist,
|
||||
Playlist,
|
||||
Video,
|
||||
WatchProgress,
|
||||
)
|
||||
from api.src.search_processor import SearchProcess
|
||||
from api.src.task_processor import TaskHandler
|
||||
from home.src.download.queue import PendingInteract
|
||||
from home.src.download.subscriptions import (
|
||||
ChannelSubscription,
|
||||
PlaylistSubscription,
|
||||
)
|
||||
from home.src.download.yt_dlp_base import CookieHandler
|
||||
from home.src.es.backup import ElasticBackup
|
||||
from home.src.es.connect import ElasticWrap
|
||||
from home.src.es.snapshot import ElasticSnapshot
|
||||
from home.src.frontend.searching import SearchForm
|
||||
from home.src.frontend.watched import WatchState
|
||||
from home.src.index.channel import YoutubeChannel
|
||||
from home.src.index.generic import Pagination
|
||||
from home.src.index.playlist import YoutubePlaylist
|
||||
from home.src.index.reindex import ReindexProgress
|
||||
from home.src.index.video import SponsorBlock, YoutubeVideo
|
||||
from home.src.ta.config import AppConfig
|
||||
from home.src.ta.ta_redis import RedisArchivist, RedisQueue
|
||||
from home.src.ta.config import AppConfig, ReleaseVersion
|
||||
from home.src.ta.settings import EnvironmentSettings
|
||||
from home.src.ta.ta_redis import RedisArchivist
|
||||
from home.src.ta.task_manager import TaskCommand, TaskManager
|
||||
from home.src.ta.urlparser import Parser
|
||||
from home.tasks import check_reindex, download_single, extrac_dl, subscribe_to
|
||||
from home.src.ta.users import UserConfig
|
||||
from home.tasks import (
|
||||
BaseTask,
|
||||
check_reindex,
|
||||
download_pending,
|
||||
extrac_dl,
|
||||
run_restore_backup,
|
||||
subscribe_to,
|
||||
)
|
||||
from rest_framework import permissions, status
|
||||
from rest_framework.authentication import (
|
||||
SessionAuthentication,
|
||||
TokenAuthentication,
|
||||
)
|
||||
from rest_framework.authtoken.models import Token
|
||||
from rest_framework.authtoken.views import ObtainAuthToken
|
||||
from rest_framework.permissions import IsAuthenticated
|
||||
from rest_framework.response import Response
|
||||
from rest_framework.views import APIView
|
||||
|
||||
|
||||
def check_admin(user):
|
||||
"""check for admin permission for restricted views"""
|
||||
return user.is_staff or user.groups.filter(name="admin").exists()
|
||||
|
||||
|
||||
class AdminOnly(permissions.BasePermission):
|
||||
"""allow only admin"""
|
||||
|
||||
def has_permission(self, request, view):
|
||||
return check_admin(request.user)
|
||||
|
||||
|
||||
class AdminWriteOnly(permissions.BasePermission):
|
||||
"""allow only admin writes"""
|
||||
|
||||
def has_permission(self, request, view):
|
||||
if request.method in permissions.SAFE_METHODS:
|
||||
return permissions.IsAuthenticated().has_permission(request, view)
|
||||
|
||||
return check_admin(request.user)
|
||||
|
||||
|
||||
class ApiBaseView(APIView):
|
||||
"""base view to inherit from"""
|
||||
|
||||
authentication_classes = [SessionAuthentication, TokenAuthentication]
|
||||
permission_classes = [IsAuthenticated]
|
||||
search_base = False
|
||||
data = False
|
||||
permission_classes = [permissions.IsAuthenticated]
|
||||
search_base = ""
|
||||
data = ""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.response = {"data": False, "config": AppConfig().config}
|
||||
self.response = {
|
||||
"data": False,
|
||||
"config": {
|
||||
"enable_cast": EnvironmentSettings.ENABLE_CAST,
|
||||
"downloads": AppConfig().config["downloads"],
|
||||
},
|
||||
}
|
||||
self.data = {"query": {"match_all": {}}}
|
||||
self.status_code = False
|
||||
self.context = False
|
||||
|
@ -90,6 +142,7 @@ class VideoApiView(ApiBaseView):
|
|||
"""
|
||||
|
||||
search_base = "ta_video/_doc/"
|
||||
permission_classes = [AdminWriteOnly]
|
||||
|
||||
def get(self, request, video_id):
|
||||
# pylint: disable=unused-argument
|
||||
|
@ -128,7 +181,7 @@ class VideoApiListView(ApiBaseView):
|
|||
|
||||
|
||||
class VideoProgressView(ApiBaseView):
|
||||
"""resolves to /api/video/<video_id>/
|
||||
"""resolves to /api/video/<video_id>/progress/
|
||||
handle progress status for video
|
||||
"""
|
||||
|
||||
|
@ -153,7 +206,6 @@ class VideoProgressView(ApiBaseView):
|
|||
message = {"position": position, "youtube_id": video_id}
|
||||
RedisArchivist().set_message(key, message)
|
||||
self.response = request.data
|
||||
|
||||
return Response(self.response)
|
||||
|
||||
def delete(self, request, video_id):
|
||||
|
@ -183,7 +235,7 @@ class VideoCommentView(ApiBaseView):
|
|||
|
||||
class VideoSimilarView(ApiBaseView):
|
||||
"""resolves to /api/video/<video-id>/similar/
|
||||
GET: return max 3 videos similar to this
|
||||
GET: return max 6 videos similar to this
|
||||
"""
|
||||
|
||||
search_base = "ta_video/_search/"
|
||||
|
@ -217,6 +269,10 @@ class VideoSponsorView(ApiBaseView):
|
|||
# pylint: disable=unused-argument
|
||||
|
||||
self.get_document(video_id)
|
||||
if not self.response.get("data"):
|
||||
message = {"message": "video not found"}
|
||||
return Response(message, status=404)
|
||||
|
||||
sponsorblock = self.response["data"].get("sponsorblock")
|
||||
|
||||
return Response(sponsorblock)
|
||||
|
@ -260,6 +316,7 @@ class ChannelApiView(ApiBaseView):
|
|||
"""
|
||||
|
||||
search_base = "ta_channel/_doc/"
|
||||
permission_classes = [AdminWriteOnly]
|
||||
|
||||
def get(self, request, channel_id):
|
||||
# pylint: disable=unused-argument
|
||||
|
@ -289,19 +346,32 @@ class ChannelApiListView(ApiBaseView):
|
|||
"""
|
||||
|
||||
search_base = "ta_channel/_search/"
|
||||
valid_filter = ["subscribed"]
|
||||
permission_classes = [AdminWriteOnly]
|
||||
|
||||
def get(self, request):
|
||||
"""get request"""
|
||||
self.get_document_list(request)
|
||||
self.data.update(
|
||||
{"sort": [{"channel_name.keyword": {"order": "asc"}}]}
|
||||
)
|
||||
|
||||
query_filter = request.GET.get("filter", False)
|
||||
must_list = []
|
||||
if query_filter:
|
||||
if query_filter not in self.valid_filter:
|
||||
message = f"invalid url query filder: {query_filter}"
|
||||
print(message)
|
||||
return Response({"message": message}, status=400)
|
||||
|
||||
must_list.append({"term": {"channel_subscribed": {"value": True}}})
|
||||
|
||||
self.data["query"] = {"bool": {"must": must_list}}
|
||||
self.get_document_list(request)
|
||||
|
||||
return Response(self.response)
|
||||
|
||||
@staticmethod
|
||||
def post(request):
|
||||
"""subscribe to list of channels"""
|
||||
def post(self, request):
|
||||
"""subscribe/unsubscribe to list of channels"""
|
||||
data = request.data
|
||||
try:
|
||||
to_add = data["data"]
|
||||
|
@ -310,12 +380,58 @@ class ChannelApiListView(ApiBaseView):
|
|||
print(message)
|
||||
return Response({"message": message}, status=400)
|
||||
|
||||
pending = [i["channel_id"] for i in to_add if i["channel_subscribed"]]
|
||||
url_str = " ".join(pending)
|
||||
subscribe_to.delay(url_str)
|
||||
pending = []
|
||||
for channel_item in to_add:
|
||||
channel_id = channel_item["channel_id"]
|
||||
if channel_item["channel_subscribed"]:
|
||||
pending.append(channel_id)
|
||||
else:
|
||||
self._unsubscribe(channel_id)
|
||||
|
||||
if pending:
|
||||
url_str = " ".join(pending)
|
||||
subscribe_to.delay(url_str, expected_type="channel")
|
||||
|
||||
return Response(data)
|
||||
|
||||
@staticmethod
|
||||
def _unsubscribe(channel_id: str):
|
||||
"""unsubscribe"""
|
||||
print(f"[{channel_id}] unsubscribe from channel")
|
||||
ChannelSubscription().change_subscribe(
|
||||
channel_id, channel_subscribed=False
|
||||
)
|
||||
|
||||
|
||||
class ChannelApiSearchView(ApiBaseView):
|
||||
"""resolves to /api/channel/search/
|
||||
search for channel
|
||||
"""
|
||||
|
||||
search_base = "ta_channel/_doc/"
|
||||
|
||||
def get(self, request):
|
||||
"""handle get request, search with s parameter"""
|
||||
|
||||
query = request.GET.get("q")
|
||||
if not query:
|
||||
message = "missing expected q parameter"
|
||||
return Response({"message": message, "data": False}, status=400)
|
||||
|
||||
try:
|
||||
parsed = Parser(query).parse()[0]
|
||||
except (ValueError, IndexError, AttributeError):
|
||||
message = f"channel not found: {query}"
|
||||
return Response({"message": message, "data": False}, status=404)
|
||||
|
||||
if not parsed["type"] == "channel":
|
||||
message = "expected type channel"
|
||||
return Response({"message": message, "data": False}, status=400)
|
||||
|
||||
self.get_document(parsed["url"])
|
||||
|
||||
return Response(self.response, status=self.status_code)
|
||||
|
||||
|
||||
class ChannelApiVideoView(ApiBaseView):
|
||||
"""resolves to /api/channel/<channel-id>/video
|
||||
|
@ -345,15 +461,62 @@ class PlaylistApiListView(ApiBaseView):
|
|||
"""
|
||||
|
||||
search_base = "ta_playlist/_search/"
|
||||
permission_classes = [AdminWriteOnly]
|
||||
valid_playlist_type = ["regular", "custom"]
|
||||
|
||||
def get(self, request):
|
||||
"""handle get request"""
|
||||
self.data.update(
|
||||
{"sort": [{"playlist_name.keyword": {"order": "asc"}}]}
|
||||
)
|
||||
playlist_type = request.GET.get("playlist_type", None)
|
||||
query = {"sort": [{"playlist_name.keyword": {"order": "asc"}}]}
|
||||
if playlist_type is not None:
|
||||
if playlist_type not in self.valid_playlist_type:
|
||||
message = f"invalid playlist_type {playlist_type}"
|
||||
return Response({"message": message}, status=400)
|
||||
|
||||
query.update(
|
||||
{
|
||||
"query": {
|
||||
"term": {"playlist_type": {"value": playlist_type}}
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
self.data.update(query)
|
||||
self.get_document_list(request)
|
||||
return Response(self.response)
|
||||
|
||||
def post(self, request):
|
||||
"""subscribe/unsubscribe to list of playlists"""
|
||||
data = request.data
|
||||
try:
|
||||
to_add = data["data"]
|
||||
except KeyError:
|
||||
message = "missing expected data key"
|
||||
print(message)
|
||||
return Response({"message": message}, status=400)
|
||||
|
||||
pending = []
|
||||
for playlist_item in to_add:
|
||||
playlist_id = playlist_item["playlist_id"]
|
||||
if playlist_item["playlist_subscribed"]:
|
||||
pending.append(playlist_id)
|
||||
else:
|
||||
self._unsubscribe(playlist_id)
|
||||
|
||||
if pending:
|
||||
url_str = " ".join(pending)
|
||||
subscribe_to.delay(url_str, expected_type="playlist")
|
||||
|
||||
return Response(data)
|
||||
|
||||
@staticmethod
|
||||
def _unsubscribe(playlist_id: str):
|
||||
"""unsubscribe"""
|
||||
print(f"[{playlist_id}] unsubscribe from playlist")
|
||||
PlaylistSubscription().change_subscribe(
|
||||
playlist_id, subscribe_status=False
|
||||
)
|
||||
|
||||
|
||||
class PlaylistApiView(ApiBaseView):
|
||||
"""resolves to /api/playlist/<playlist_id>/
|
||||
|
@ -361,6 +524,8 @@ class PlaylistApiView(ApiBaseView):
|
|||
"""
|
||||
|
||||
search_base = "ta_playlist/_doc/"
|
||||
permission_classes = [AdminWriteOnly]
|
||||
valid_custom_actions = ["create", "remove", "up", "down", "top", "bottom"]
|
||||
|
||||
def get(self, request, playlist_id):
|
||||
# pylint: disable=unused-argument
|
||||
|
@ -368,6 +533,38 @@ class PlaylistApiView(ApiBaseView):
|
|||
self.get_document(playlist_id)
|
||||
return Response(self.response, status=self.status_code)
|
||||
|
||||
def post(self, request, playlist_id):
|
||||
"""post to custom playlist to add a video to list"""
|
||||
playlist = YoutubePlaylist(playlist_id)
|
||||
if not playlist.is_custom_playlist():
|
||||
message = f"playlist with ID {playlist_id} is not custom"
|
||||
return Response({"message": message}, status=400)
|
||||
|
||||
action = request.data.get("action")
|
||||
if action not in self.valid_custom_actions:
|
||||
message = f"invalid action: {action}"
|
||||
return Response({"message": message}, status=400)
|
||||
|
||||
video_id = request.data.get("video_id")
|
||||
if action == "create":
|
||||
playlist.add_video_to_playlist(video_id)
|
||||
else:
|
||||
hide = UserConfig(request.user.id).get_value("hide_watched")
|
||||
playlist.move_video(video_id, action, hide_watched=hide)
|
||||
|
||||
return Response({"success": True}, status=status.HTTP_201_CREATED)
|
||||
|
||||
def delete(self, request, playlist_id):
|
||||
"""delete playlist"""
|
||||
print(f"{playlist_id}: delete playlist")
|
||||
delete_videos = request.GET.get("delete-videos", False)
|
||||
if delete_videos:
|
||||
YoutubePlaylist(playlist_id).delete_videos_playlist()
|
||||
else:
|
||||
YoutubePlaylist(playlist_id).delete_metadata()
|
||||
|
||||
return Response({"success": True})
|
||||
|
||||
|
||||
class PlaylistApiVideoView(ApiBaseView):
|
||||
"""resolves to /api/playlist/<playlist_id>/video
|
||||
|
@ -396,6 +593,7 @@ class DownloadApiView(ApiBaseView):
|
|||
|
||||
search_base = "ta_download/_doc/"
|
||||
valid_status = ["pending", "ignore", "priority"]
|
||||
permission_classes = [AdminOnly]
|
||||
|
||||
def get(self, request, video_id):
|
||||
# pylint: disable=unused-argument
|
||||
|
@ -411,17 +609,15 @@ class DownloadApiView(ApiBaseView):
|
|||
print(message)
|
||||
return Response({"message": message}, status=400)
|
||||
|
||||
pending_video, status_code = PendingInteract(video_id).get_item()
|
||||
_, status_code = PendingInteract(video_id).get_item()
|
||||
if status_code == 404:
|
||||
message = f"{video_id}: item not found {status_code}"
|
||||
return Response({"message": message}, status=404)
|
||||
|
||||
print(f"{video_id}: change status to {item_status}")
|
||||
PendingInteract(video_id, item_status).update_status()
|
||||
if item_status == "priority":
|
||||
download_single.delay(pending_video)
|
||||
else:
|
||||
PendingInteract(video_id, item_status).update_status()
|
||||
RedisQueue(queue_name="dl_queue").clear_item(video_id)
|
||||
download_pending.delay(auto_only=True)
|
||||
|
||||
return Response(request.data)
|
||||
|
||||
|
@ -430,7 +626,7 @@ class DownloadApiView(ApiBaseView):
|
|||
# pylint: disable=unused-argument
|
||||
"""delete single video from queue"""
|
||||
print(f"{video_id}: delete from queue")
|
||||
PendingInteract(video_id=video_id).delete_item()
|
||||
PendingInteract(video_id).delete_item()
|
||||
|
||||
return Response({"success": True})
|
||||
|
||||
|
@ -444,6 +640,7 @@ class DownloadApiListView(ApiBaseView):
|
|||
|
||||
search_base = "ta_download/_search/"
|
||||
valid_filter = ["pending", "ignore"]
|
||||
permission_classes = [AdminOnly]
|
||||
|
||||
def get(self, request):
|
||||
"""get request"""
|
||||
|
@ -474,6 +671,7 @@ class DownloadApiListView(ApiBaseView):
|
|||
def post(request):
|
||||
"""add list of videos to download queue"""
|
||||
data = request.data
|
||||
auto_start = bool(request.GET.get("autostart"))
|
||||
try:
|
||||
to_add = data["data"]
|
||||
except KeyError:
|
||||
|
@ -490,7 +688,7 @@ class DownloadApiListView(ApiBaseView):
|
|||
print(message)
|
||||
return Response({"message": message}, status=400)
|
||||
|
||||
extrac_dl.delay(youtube_ids)
|
||||
extrac_dl.delay(youtube_ids, auto_start=auto_start)
|
||||
|
||||
return Response(data)
|
||||
|
||||
|
@ -517,7 +715,11 @@ class PingView(ApiBaseView):
|
|||
@staticmethod
|
||||
def get(request):
|
||||
"""get pong"""
|
||||
data = {"response": "pong", "user": request.user.id}
|
||||
data = {
|
||||
"response": "pong",
|
||||
"user": request.user.id,
|
||||
"version": ReleaseVersion().get_local_version(),
|
||||
}
|
||||
return Response(data)
|
||||
|
||||
|
||||
|
@ -541,35 +743,14 @@ class LoginApiView(ObtainAuthToken):
|
|||
return Response({"token": token.key, "user_id": user.pk})
|
||||
|
||||
|
||||
class TaskApiView(ApiBaseView):
|
||||
"""resolves to /api/task/
|
||||
GET: check if ongoing background task
|
||||
POST: start a new background task
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def get(request):
|
||||
"""handle get request"""
|
||||
# pylint: disable=unused-argument
|
||||
response = {"rescan": False, "downloading": False}
|
||||
for key in response.keys():
|
||||
response[key] = RedisArchivist().is_locked(key)
|
||||
|
||||
return Response(response)
|
||||
|
||||
def post(self, request):
|
||||
"""handle post request"""
|
||||
response = TaskHandler(request.data).run_task()
|
||||
|
||||
return Response(response)
|
||||
|
||||
|
||||
class SnapshotApiListView(ApiBaseView):
|
||||
"""resolves to /api/snapshot/
|
||||
GET: returns snashot config plus list of existing snapshots
|
||||
GET: returns snapshot config plus list of existing snapshots
|
||||
POST: take snapshot now
|
||||
"""
|
||||
|
||||
permission_classes = [AdminOnly]
|
||||
|
||||
@staticmethod
|
||||
def get(request):
|
||||
"""handle get request"""
|
||||
|
@ -594,6 +775,8 @@ class SnapshotApiView(ApiBaseView):
|
|||
DELETE: delete a snapshot
|
||||
"""
|
||||
|
||||
permission_classes = [AdminOnly]
|
||||
|
||||
@staticmethod
|
||||
def get(request, snapshot_id):
|
||||
"""handle get request"""
|
||||
|
@ -628,12 +811,195 @@ class SnapshotApiView(ApiBaseView):
|
|||
return Response(response)
|
||||
|
||||
|
||||
class BackupApiListView(ApiBaseView):
|
||||
"""resolves to /api/backup/
|
||||
GET: returns list of available zip backups
|
||||
POST: take zip backup now
|
||||
"""
|
||||
|
||||
permission_classes = [AdminOnly]
|
||||
task_name = "run_backup"
|
||||
|
||||
@staticmethod
|
||||
def get(request):
|
||||
"""handle get request"""
|
||||
# pylint: disable=unused-argument
|
||||
backup_files = ElasticBackup().get_all_backup_files()
|
||||
return Response(backup_files)
|
||||
|
||||
def post(self, request):
|
||||
"""handle post request"""
|
||||
# pylint: disable=unused-argument
|
||||
response = TaskCommand().start(self.task_name)
|
||||
message = {
|
||||
"message": "backup task started",
|
||||
"task_id": response["task_id"],
|
||||
}
|
||||
|
||||
return Response(message)
|
||||
|
||||
|
||||
class BackupApiView(ApiBaseView):
|
||||
"""resolves to /api/backup/<filename>/
|
||||
GET: return a single backup
|
||||
POST: restore backup
|
||||
DELETE: delete backup
|
||||
"""
|
||||
|
||||
permission_classes = [AdminOnly]
|
||||
task_name = "restore_backup"
|
||||
|
||||
@staticmethod
|
||||
def get(request, filename):
|
||||
"""get single backup"""
|
||||
# pylint: disable=unused-argument
|
||||
backup_file = ElasticBackup().build_backup_file_data(filename)
|
||||
if not backup_file:
|
||||
message = {"message": "file not found"}
|
||||
return Response(message, status=404)
|
||||
|
||||
return Response(backup_file)
|
||||
|
||||
def post(self, request, filename):
|
||||
"""restore backup file"""
|
||||
# pylint: disable=unused-argument
|
||||
task = run_restore_backup.delay(filename)
|
||||
message = {
|
||||
"message": "backup restore task started",
|
||||
"filename": filename,
|
||||
"task_id": task.id,
|
||||
}
|
||||
return Response(message)
|
||||
|
||||
@staticmethod
|
||||
def delete(request, filename):
|
||||
"""delete backup file"""
|
||||
# pylint: disable=unused-argument
|
||||
|
||||
backup_file = ElasticBackup().delete_file(filename)
|
||||
if not backup_file:
|
||||
message = {"message": "file not found"}
|
||||
return Response(message, status=404)
|
||||
|
||||
message = {"message": f"file {filename} deleted"}
|
||||
return Response(message)
|
||||
|
||||
|
||||
class TaskListView(ApiBaseView):
|
||||
"""resolves to /api/task-name/
|
||||
GET: return a list of all stored task results
|
||||
"""
|
||||
|
||||
permission_classes = [AdminOnly]
|
||||
|
||||
def get(self, request):
|
||||
"""handle get request"""
|
||||
# pylint: disable=unused-argument
|
||||
all_results = TaskManager().get_all_results()
|
||||
|
||||
return Response(all_results)
|
||||
|
||||
|
||||
class TaskNameListView(ApiBaseView):
|
||||
"""resolves to /api/task-name/<task-name>/
|
||||
GET: return a list of stored results of task
|
||||
POST: start new background process
|
||||
"""
|
||||
|
||||
permission_classes = [AdminOnly]
|
||||
|
||||
def get(self, request, task_name):
|
||||
"""handle get request"""
|
||||
# pylint: disable=unused-argument
|
||||
if task_name not in BaseTask.TASK_CONFIG:
|
||||
message = {"message": "invalid task name"}
|
||||
return Response(message, status=404)
|
||||
|
||||
all_results = TaskManager().get_tasks_by_name(task_name)
|
||||
|
||||
return Response(all_results)
|
||||
|
||||
def post(self, request, task_name):
|
||||
"""
|
||||
handle post request
|
||||
404 for invalid task_name
|
||||
400 if task can't be started here without argument
|
||||
"""
|
||||
# pylint: disable=unused-argument
|
||||
task_config = BaseTask.TASK_CONFIG.get(task_name)
|
||||
if not task_config:
|
||||
message = {"message": "invalid task name"}
|
||||
return Response(message, status=404)
|
||||
|
||||
if not task_config.get("api-start"):
|
||||
message = {"message": "can not start task through this endpoint"}
|
||||
return Response(message, status=400)
|
||||
|
||||
message = TaskCommand().start(task_name)
|
||||
|
||||
return Response({"message": message})
|
||||
|
||||
|
||||
class TaskIDView(ApiBaseView):
|
||||
"""resolves to /api/task-id/<task-id>/
|
||||
GET: return details of task id
|
||||
"""
|
||||
|
||||
valid_commands = ["stop", "kill"]
|
||||
permission_classes = [AdminOnly]
|
||||
|
||||
def get(self, request, task_id):
|
||||
"""handle get request"""
|
||||
# pylint: disable=unused-argument
|
||||
task_result = TaskManager().get_task(task_id)
|
||||
if not task_result:
|
||||
message = {"message": "task id not found"}
|
||||
return Response(message, status=404)
|
||||
|
||||
return Response(task_result)
|
||||
|
||||
def post(self, request, task_id):
|
||||
"""post command to task"""
|
||||
command = request.data.get("command")
|
||||
if not command or command not in self.valid_commands:
|
||||
message = {"message": "no valid command found"}
|
||||
return Response(message, status=400)
|
||||
|
||||
task_result = TaskManager().get_task(task_id)
|
||||
if not task_result:
|
||||
message = {"message": "task id not found"}
|
||||
return Response(message, status=404)
|
||||
|
||||
task_conf = BaseTask.TASK_CONFIG.get(task_result.get("name"))
|
||||
if command == "stop":
|
||||
if not task_conf.get("api-stop"):
|
||||
message = {"message": "task can not be stopped"}
|
||||
return Response(message, status=400)
|
||||
|
||||
message_key = self._build_message_key(task_conf, task_id)
|
||||
TaskCommand().stop(task_id, message_key)
|
||||
if command == "kill":
|
||||
if not task_conf.get("api-stop"):
|
||||
message = {"message": "task can not be killed"}
|
||||
return Response(message, status=400)
|
||||
|
||||
TaskCommand().kill(task_id)
|
||||
|
||||
return Response({"message": "command sent"})
|
||||
|
||||
def _build_message_key(self, task_conf, task_id):
|
||||
"""build message key to forward command to notification"""
|
||||
return f"message:{task_conf.get('group')}:{task_id.split('-')[0]}"
|
||||
|
||||
|
||||
class RefreshView(ApiBaseView):
|
||||
"""resolves to /api/refresh/
|
||||
GET: get refresh progress
|
||||
POST: start a manual refresh task
|
||||
"""
|
||||
|
||||
permission_classes = [AdminOnly]
|
||||
|
||||
def get(self, request):
|
||||
"""handle get request"""
|
||||
request_type = request.GET.get("type")
|
||||
|
@ -660,6 +1026,42 @@ class RefreshView(ApiBaseView):
|
|||
return Response(data)
|
||||
|
||||
|
||||
class UserConfigView(ApiBaseView):
|
||||
"""resolves to /api/config/user/
|
||||
GET: return current user config
|
||||
POST: update user config
|
||||
"""
|
||||
|
||||
def get(self, request):
|
||||
"""get config"""
|
||||
user_id = request.user.id
|
||||
response = UserConfig(user_id).get_config()
|
||||
response.update({"user_id": user_id})
|
||||
|
||||
return Response(response)
|
||||
|
||||
def post(self, request):
|
||||
"""update config"""
|
||||
user_id = request.user.id
|
||||
data = request.data
|
||||
|
||||
user_conf = UserConfig(user_id)
|
||||
for key, value in data.items():
|
||||
try:
|
||||
user_conf.set_value(key, value)
|
||||
except ValueError as err:
|
||||
message = {
|
||||
"status": "Bad Request",
|
||||
"message": f"failed updating {key} to '{value}', {err}",
|
||||
}
|
||||
return Response(message, status=400)
|
||||
|
||||
response = user_conf.get_config()
|
||||
response.update({"user_id": user_id})
|
||||
|
||||
return Response(response)
|
||||
|
||||
|
||||
class CookieView(ApiBaseView):
|
||||
"""resolves to /api/cookie/
|
||||
GET: check if cookie is enabled
|
||||
|
@ -667,6 +1069,8 @@ class CookieView(ApiBaseView):
|
|||
PUT: import cookie
|
||||
"""
|
||||
|
||||
permission_classes = [AdminOnly]
|
||||
|
||||
@staticmethod
|
||||
def get(request):
|
||||
"""handle get request"""
|
||||
|
@ -747,3 +1151,126 @@ class SearchView(ApiBaseView):
|
|||
|
||||
search_results = SearchForm().multi_search(search_query)
|
||||
return Response(search_results)
|
||||
|
||||
|
||||
class TokenView(ApiBaseView):
|
||||
"""resolves to /api/token/
|
||||
DELETE: revoke the token
|
||||
"""
|
||||
|
||||
permission_classes = [AdminOnly]
|
||||
|
||||
@staticmethod
|
||||
def delete(request):
|
||||
print("revoke API token")
|
||||
request.user.auth_token.delete()
|
||||
return Response({"success": True})
|
||||
|
||||
|
||||
class NotificationView(ApiBaseView):
|
||||
"""resolves to /api/notification/
|
||||
GET: returns a list of notifications
|
||||
filter query to filter messages by group
|
||||
"""
|
||||
|
||||
valid_filters = ["download", "settings", "channel"]
|
||||
|
||||
def get(self, request):
|
||||
"""get all notifications"""
|
||||
query = "message"
|
||||
filter_by = request.GET.get("filter", None)
|
||||
if filter_by in self.valid_filters:
|
||||
query = f"{query}:{filter_by}"
|
||||
|
||||
return Response(RedisArchivist().list_items(query))
|
||||
|
||||
|
||||
class StatVideoView(ApiBaseView):
|
||||
"""resolves to /api/stats/video/
|
||||
GET: return video stats
|
||||
"""
|
||||
|
||||
def get(self, request):
|
||||
"""get stats"""
|
||||
# pylint: disable=unused-argument
|
||||
|
||||
return Response(Video().process())
|
||||
|
||||
|
||||
class StatChannelView(ApiBaseView):
|
||||
"""resolves to /api/stats/channel/
|
||||
GET: return channel stats
|
||||
"""
|
||||
|
||||
def get(self, request):
|
||||
"""get stats"""
|
||||
# pylint: disable=unused-argument
|
||||
|
||||
return Response(Channel().process())
|
||||
|
||||
|
||||
class StatPlaylistView(ApiBaseView):
|
||||
"""resolves to /api/stats/playlist/
|
||||
GET: return playlist stats
|
||||
"""
|
||||
|
||||
def get(self, request):
|
||||
"""get stats"""
|
||||
# pylint: disable=unused-argument
|
||||
|
||||
return Response(Playlist().process())
|
||||
|
||||
|
||||
class StatDownloadView(ApiBaseView):
|
||||
"""resolves to /api/stats/download/
|
||||
GET: return download stats
|
||||
"""
|
||||
|
||||
def get(self, request):
|
||||
"""get stats"""
|
||||
# pylint: disable=unused-argument
|
||||
|
||||
return Response(Download().process())
|
||||
|
||||
|
||||
class StatWatchProgress(ApiBaseView):
|
||||
"""resolves to /api/stats/watchprogress/
|
||||
GET: return watch/unwatch progress stats
|
||||
"""
|
||||
|
||||
def get(self, request):
|
||||
"""handle get request"""
|
||||
# pylint: disable=unused-argument
|
||||
|
||||
return Response(WatchProgress().process())
|
||||
|
||||
|
||||
class StatDownloadHist(ApiBaseView):
|
||||
"""resolves to /api/stats/downloadhist/
|
||||
GET: return download video count histogram for last days
|
||||
"""
|
||||
|
||||
def get(self, request):
|
||||
"""handle get request"""
|
||||
# pylint: disable=unused-argument
|
||||
|
||||
return Response(DownloadHist().process())
|
||||
|
||||
|
||||
class StatBiggestChannel(ApiBaseView):
|
||||
"""resolves to /api/stats/biggestchannels/
|
||||
GET: return biggest channels
|
||||
param: order
|
||||
"""
|
||||
|
||||
order_choices = ["doc_count", "duration", "media_size"]
|
||||
|
||||
def get(self, request):
|
||||
"""handle get request"""
|
||||
|
||||
order = request.GET.get("order", "doc_count")
|
||||
if order and order not in self.order_choices:
|
||||
message = {"message": f"invalid order parameter {order}"}
|
||||
return Response(message, status=400)
|
||||
|
||||
return Response(BiggestChannel(order).process())
|
||||
|
|
|
@ -0,0 +1,163 @@
|
|||
"""
|
||||
Functionality:
|
||||
- check that all connections are working
|
||||
"""
|
||||
|
||||
from time import sleep
|
||||
|
||||
import requests
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
from home.src.es.connect import ElasticWrap
|
||||
from home.src.ta.settings import EnvironmentSettings
|
||||
from home.src.ta.ta_redis import RedisArchivist
|
||||
|
||||
TOPIC = """
|
||||
|
||||
#######################
|
||||
# Connection check #
|
||||
#######################
|
||||
|
||||
"""
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
"""command framework"""
|
||||
|
||||
TIMEOUT = 120
|
||||
MIN_MAJOR, MAX_MAJOR = 8, 8
|
||||
MIN_MINOR = 0
|
||||
|
||||
# pylint: disable=no-member
|
||||
help = "Check connections"
|
||||
|
||||
def handle(self, *args, **options):
|
||||
"""run all commands"""
|
||||
self.stdout.write(TOPIC)
|
||||
self._redis_connection_check()
|
||||
self._redis_config_set()
|
||||
self._es_connection_check()
|
||||
self._es_version_check()
|
||||
self._es_path_check()
|
||||
|
||||
def _redis_connection_check(self):
|
||||
"""check ir redis connection is established"""
|
||||
self.stdout.write("[1] connect to Redis")
|
||||
redis_conn = RedisArchivist().conn
|
||||
for _ in range(5):
|
||||
try:
|
||||
pong = redis_conn.execute_command("PING")
|
||||
if pong:
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(" ✓ Redis connection verified")
|
||||
)
|
||||
return
|
||||
|
||||
except Exception: # pylint: disable=broad-except
|
||||
self.stdout.write(" ... retry Redis connection")
|
||||
sleep(2)
|
||||
|
||||
message = " 🗙 Redis connection failed"
|
||||
self.stdout.write(self.style.ERROR(f"{message}"))
|
||||
RedisArchivist().exec("PING")
|
||||
sleep(60)
|
||||
raise CommandError(message)
|
||||
|
||||
def _redis_config_set(self):
|
||||
"""set config for redis if not set already"""
|
||||
self.stdout.write("[2] set Redis config")
|
||||
redis_conn = RedisArchivist().conn
|
||||
timeout_is = int(redis_conn.config_get("timeout").get("timeout"))
|
||||
if not timeout_is:
|
||||
redis_conn.config_set("timeout", 3600)
|
||||
|
||||
self.stdout.write(self.style.SUCCESS(" ✓ Redis config set"))
|
||||
|
||||
def _es_connection_check(self):
|
||||
"""wait for elasticsearch connection"""
|
||||
self.stdout.write("[3] connect to Elastic Search")
|
||||
total = self.TIMEOUT // 5
|
||||
for i in range(total):
|
||||
self.stdout.write(f" ... waiting for ES [{i}/{total}]")
|
||||
try:
|
||||
_, status_code = ElasticWrap("/").get(
|
||||
timeout=1, print_error=False
|
||||
)
|
||||
except (
|
||||
requests.exceptions.ConnectionError,
|
||||
requests.exceptions.Timeout,
|
||||
):
|
||||
sleep(5)
|
||||
continue
|
||||
|
||||
if status_code and status_code == 200:
|
||||
path = "_cluster/health?wait_for_status=yellow&timeout=60s"
|
||||
_, _ = ElasticWrap(path).get(timeout=60)
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(" ✓ ES connection established")
|
||||
)
|
||||
return
|
||||
|
||||
response, status_code = ElasticWrap("/").get(
|
||||
timeout=1, print_error=False
|
||||
)
|
||||
|
||||
message = " 🗙 ES connection failed"
|
||||
self.stdout.write(self.style.ERROR(f"{message}"))
|
||||
self.stdout.write(f" error message: {response}")
|
||||
self.stdout.write(f" status code: {status_code}")
|
||||
sleep(60)
|
||||
raise CommandError(message)
|
||||
|
||||
def _es_version_check(self):
|
||||
"""check for minimal elasticsearch version"""
|
||||
self.stdout.write("[4] Elastic Search version check")
|
||||
response, _ = ElasticWrap("/").get()
|
||||
version = response["version"]["number"]
|
||||
major = int(version.split(".")[0])
|
||||
|
||||
if self.MIN_MAJOR <= major <= self.MAX_MAJOR:
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(" ✓ ES version check passed")
|
||||
)
|
||||
return
|
||||
|
||||
message = (
|
||||
" 🗙 ES version check failed. "
|
||||
+ f"Expected {self.MIN_MAJOR}.{self.MIN_MINOR} but got {version}"
|
||||
)
|
||||
self.stdout.write(self.style.ERROR(f"{message}"))
|
||||
sleep(60)
|
||||
raise CommandError(message)
|
||||
|
||||
def _es_path_check(self):
|
||||
"""check that path.repo var is set"""
|
||||
self.stdout.write("[5] check ES path.repo env var")
|
||||
response, _ = ElasticWrap("_nodes/_all/settings").get()
|
||||
snaphost_roles = [
|
||||
"data",
|
||||
"data_cold",
|
||||
"data_content",
|
||||
"data_frozen",
|
||||
"data_hot",
|
||||
"data_warm",
|
||||
"master",
|
||||
]
|
||||
for node in response["nodes"].values():
|
||||
if not (set(node["roles"]) & set(snaphost_roles)):
|
||||
continue
|
||||
|
||||
if node["settings"]["path"].get("repo"):
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(" ✓ path.repo env var is set")
|
||||
)
|
||||
return
|
||||
|
||||
message = (
|
||||
" 🗙 path.repo env var not found. "
|
||||
+ "set the following env var to the ES container:\n"
|
||||
+ " path.repo="
|
||||
+ EnvironmentSettings.ES_SNAPSHOT_DIR
|
||||
)
|
||||
self.stdout.write(self.style.ERROR(message))
|
||||
sleep(60)
|
||||
raise CommandError(message)
|
|
@ -0,0 +1,193 @@
|
|||
"""
|
||||
Functionality:
|
||||
- Check environment at startup
|
||||
- Process config file overwrites from env var
|
||||
- Stop startup on error
|
||||
- python management.py ta_envcheck
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
from home.models import Account
|
||||
from home.src.ta.settings import EnvironmentSettings
|
||||
|
||||
LOGO = """
|
||||
|
||||
.... .....
|
||||
...'',;:cc,. .;::;;,'...
|
||||
..,;:cccllclc, .:ccllllcc;,..
|
||||
..,:cllcc:;,'.',. ....'',;ccllc:,..
|
||||
..;cllc:,'.. ...,:cccc:'.
|
||||
.;cccc;.. ..,:ccc:'.
|
||||
.ckkkOkxollllllllllllc. .,:::;. .,cclc;
|
||||
.:0MMMMMMMMMMMMMMMMMMMX: .cNMMMWx. .;clc:
|
||||
.;lOXK0000KNMMMMX00000KO; ;KMMMMMNl. .;ccl:,.
|
||||
.;:c:'.....kMMMNo........ 'OMMMWMMMK: '::;;'.
|
||||
....... .xMMMNl .dWMMXdOMMMO' ........
|
||||
.:cc:;. .xMMMNc .lNMMNo.:XMMWx. .:cl:.
|
||||
.:llc,. .:xxxd, ;KMMMk. .oWMMNl. .:llc'
|
||||
.cll:. .;:;;:::,. 'OMMMK:';''kWMMK: .;llc,
|
||||
.cll:. .,;;;;;;,. .,xWMMNl.:l:.;KMMMO' .;llc'
|
||||
.:llc. .cOOOk; .lKNMMWx..:l:..lNMMWx. .:llc'
|
||||
.;lcc,. .xMMMNc :KMMMM0, .:lc. .xWMMNl.'ccl:.
|
||||
.cllc. .xMMMNc 'OMMMMXc...:lc...,0MMMKl:lcc,.
|
||||
.,ccl:. .xMMMNc .xWMMMWo.,;;:lc;;;.cXMMMXdcc;.
|
||||
.,clc:. .xMMMNc .lNMMMWk. .':clc:,. .dWMMW0o;.
|
||||
.,clcc,. .ckkkx; .okkkOx, .';,. 'kKKK0l.
|
||||
.':lcc:'..... . .. ..,;cllc,.
|
||||
.,cclc,.... ....;clc;..
|
||||
..,:,..,c:'.. ...';:,..,:,.
|
||||
....:lcccc:;,'''.....'',;;:clllc,....
|
||||
.'',;:cllllllccccclllllcc:,'..
|
||||
...'',,;;;;;;;;;,''...
|
||||
.....
|
||||
|
||||
"""
|
||||
|
||||
TOPIC = """
|
||||
#######################
|
||||
# Environment Setup #
|
||||
#######################
|
||||
|
||||
"""
|
||||
|
||||
EXPECTED_ENV_VARS = [
|
||||
"TA_USERNAME",
|
||||
"TA_PASSWORD",
|
||||
"ELASTIC_PASSWORD",
|
||||
"ES_URL",
|
||||
"TA_HOST",
|
||||
]
|
||||
INST = "https://github.com/tubearchivist/tubearchivist#installing-and-updating"
|
||||
NGINX = "/etc/nginx/sites-available/default"
|
||||
UWSGI = "/app/uwsgi.ini"
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
"""command framework"""
|
||||
|
||||
# pylint: disable=no-member
|
||||
help = "Check environment before startup"
|
||||
|
||||
def handle(self, *args, **options):
|
||||
"""run all commands"""
|
||||
self.stdout.write(LOGO)
|
||||
self.stdout.write(TOPIC)
|
||||
self._expected_vars()
|
||||
self._elastic_user_overwrite()
|
||||
self._ta_port_overwrite()
|
||||
self._ta_uwsgi_overwrite()
|
||||
self._enable_cast_overwrite()
|
||||
self._create_superuser()
|
||||
|
||||
def _expected_vars(self):
|
||||
"""check if expected env vars are set"""
|
||||
self.stdout.write("[1] checking expected env vars")
|
||||
env = os.environ
|
||||
for var in EXPECTED_ENV_VARS:
|
||||
if not env.get(var):
|
||||
message = f" 🗙 expected env var {var} not set\n {INST}"
|
||||
self.stdout.write(self.style.ERROR(message))
|
||||
raise CommandError(message)
|
||||
|
||||
message = " ✓ all expected env vars are set"
|
||||
self.stdout.write(self.style.SUCCESS(message))
|
||||
|
||||
def _elastic_user_overwrite(self):
|
||||
"""check for ELASTIC_USER overwrite"""
|
||||
self.stdout.write("[2] check ES user overwrite")
|
||||
env = EnvironmentSettings.ES_USER
|
||||
self.stdout.write(self.style.SUCCESS(f" ✓ ES user is set to {env}"))
|
||||
|
||||
def _ta_port_overwrite(self):
|
||||
"""set TA_PORT overwrite for nginx"""
|
||||
self.stdout.write("[3] check TA_PORT overwrite")
|
||||
overwrite = EnvironmentSettings.TA_PORT
|
||||
if not overwrite:
|
||||
self.stdout.write(self.style.SUCCESS(" TA_PORT is not set"))
|
||||
return
|
||||
|
||||
regex = re.compile(r"listen [0-9]{1,5}")
|
||||
to_overwrite = f"listen {overwrite}"
|
||||
changed = file_overwrite(NGINX, regex, to_overwrite)
|
||||
if changed:
|
||||
message = f" ✓ TA_PORT changed to {overwrite}"
|
||||
else:
|
||||
message = f" ✓ TA_PORT already set to {overwrite}"
|
||||
|
||||
self.stdout.write(self.style.SUCCESS(message))
|
||||
|
||||
def _ta_uwsgi_overwrite(self):
|
||||
"""set TA_UWSGI_PORT overwrite"""
|
||||
self.stdout.write("[4] check TA_UWSGI_PORT overwrite")
|
||||
overwrite = EnvironmentSettings.TA_UWSGI_PORT
|
||||
if not overwrite:
|
||||
message = " TA_UWSGI_PORT is not set"
|
||||
self.stdout.write(self.style.SUCCESS(message))
|
||||
return
|
||||
|
||||
# nginx
|
||||
regex = re.compile(r"uwsgi_pass localhost:[0-9]{1,5}")
|
||||
to_overwrite = f"uwsgi_pass localhost:{overwrite}"
|
||||
changed = file_overwrite(NGINX, regex, to_overwrite)
|
||||
|
||||
# uwsgi
|
||||
regex = re.compile(r"socket = :[0-9]{1,5}")
|
||||
to_overwrite = f"socket = :{overwrite}"
|
||||
changed = file_overwrite(UWSGI, regex, to_overwrite)
|
||||
|
||||
if changed:
|
||||
message = f" ✓ TA_UWSGI_PORT changed to {overwrite}"
|
||||
else:
|
||||
message = f" ✓ TA_UWSGI_PORT already set to {overwrite}"
|
||||
|
||||
self.stdout.write(self.style.SUCCESS(message))
|
||||
|
||||
def _enable_cast_overwrite(self):
|
||||
"""cast workaround, remove auth for static files in nginx"""
|
||||
self.stdout.write("[5] check ENABLE_CAST overwrite")
|
||||
overwrite = EnvironmentSettings.ENABLE_CAST
|
||||
if not overwrite:
|
||||
self.stdout.write(self.style.SUCCESS(" ENABLE_CAST is not set"))
|
||||
return
|
||||
|
||||
regex = re.compile(r"[^\S\r\n]*auth_request /api/ping/;\n")
|
||||
changed = file_overwrite(NGINX, regex, "")
|
||||
if changed:
|
||||
message = " ✓ process nginx to enable Cast"
|
||||
else:
|
||||
message = " ✓ Cast is already enabled in nginx"
|
||||
|
||||
self.stdout.write(self.style.SUCCESS(message))
|
||||
|
||||
def _create_superuser(self):
|
||||
"""create superuser if not exist"""
|
||||
self.stdout.write("[6] create superuser")
|
||||
is_created = Account.objects.filter(is_superuser=True)
|
||||
if is_created:
|
||||
message = " superuser already created"
|
||||
self.stdout.write(self.style.SUCCESS(message))
|
||||
return
|
||||
|
||||
name = EnvironmentSettings.TA_USERNAME
|
||||
password = EnvironmentSettings.TA_PASSWORD
|
||||
Account.objects.create_superuser(name, password)
|
||||
message = f" ✓ new superuser with name {name} created"
|
||||
self.stdout.write(self.style.SUCCESS(message))
|
||||
|
||||
|
||||
def file_overwrite(file_path, regex, overwrite):
|
||||
"""change file content from old to overwrite, return true when changed"""
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
file_content = f.read()
|
||||
|
||||
changed = re.sub(regex, overwrite, file_content)
|
||||
if changed == file_content:
|
||||
return False
|
||||
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write(changed)
|
||||
|
||||
return True
|
|
@ -0,0 +1,185 @@
|
|||
"""
|
||||
filepath migration from v0.3.6 to v0.3.7
|
||||
not getting called at startup any more, to run manually if needed:
|
||||
python manage.py ta_migpath
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
from home.src.es.connect import ElasticWrap, IndexPaginate
|
||||
from home.src.ta.helper import ignore_filelist
|
||||
from home.src.ta.settings import EnvironmentSettings
|
||||
|
||||
TOPIC = """
|
||||
|
||||
########################
|
||||
# Filesystem Migration #
|
||||
########################
|
||||
|
||||
"""
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
"""command framework"""
|
||||
|
||||
# pylint: disable=no-member
|
||||
|
||||
def handle(self, *args, **options):
|
||||
"""run commands"""
|
||||
self.stdout.write(TOPIC)
|
||||
|
||||
handler = FolderMigration()
|
||||
to_migrate = handler.get_to_migrate()
|
||||
if not to_migrate:
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(" no channel migration needed\n")
|
||||
)
|
||||
return
|
||||
|
||||
self.stdout.write(self.style.SUCCESS(" migrating channels"))
|
||||
total_channels = handler.create_folders(to_migrate)
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(f" created {total_channels} channels")
|
||||
)
|
||||
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(f" migrating {len(to_migrate)} videos")
|
||||
)
|
||||
handler.migrate_videos(to_migrate)
|
||||
self.stdout.write(self.style.SUCCESS(" update videos in index"))
|
||||
handler.send_bulk()
|
||||
|
||||
self.stdout.write(self.style.SUCCESS(" cleanup old folders"))
|
||||
handler.delete_old()
|
||||
|
||||
self.stdout.write(self.style.SUCCESS(" ✓ migration completed\n"))
|
||||
|
||||
|
||||
class FolderMigration:
|
||||
"""migrate video archive folder"""
|
||||
|
||||
def __init__(self):
|
||||
self.videos = EnvironmentSettings.MEDIA_DIR
|
||||
self.bulk_list = []
|
||||
|
||||
def get_to_migrate(self):
|
||||
"""get videos to migrate"""
|
||||
script = (
|
||||
"doc['media_url'].value == "
|
||||
+ "doc['channel.channel_id'].value + '/'"
|
||||
+ " + doc['youtube_id'].value + '.mp4'"
|
||||
)
|
||||
data = {
|
||||
"query": {"bool": {"must_not": [{"script": {"script": script}}]}},
|
||||
"_source": [
|
||||
"youtube_id",
|
||||
"media_url",
|
||||
"channel.channel_id",
|
||||
"subtitles",
|
||||
],
|
||||
}
|
||||
response = IndexPaginate("ta_video", data).get_results()
|
||||
|
||||
return response
|
||||
|
||||
def create_folders(self, to_migrate):
|
||||
"""create required channel folders"""
|
||||
host_uid = EnvironmentSettings.HOST_UID
|
||||
host_gid = EnvironmentSettings.HOST_GID
|
||||
all_channel_ids = {i["channel"]["channel_id"] for i in to_migrate}
|
||||
|
||||
for channel_id in all_channel_ids:
|
||||
new_folder = os.path.join(self.videos, channel_id)
|
||||
os.makedirs(new_folder, exist_ok=True)
|
||||
if host_uid and host_gid:
|
||||
os.chown(new_folder, host_uid, host_gid)
|
||||
|
||||
return len(all_channel_ids)
|
||||
|
||||
def migrate_videos(self, to_migrate):
|
||||
"""migrate all videos of channel"""
|
||||
total = len(to_migrate)
|
||||
for idx, video in enumerate(to_migrate):
|
||||
new_media_url = self._move_video_file(video)
|
||||
if not new_media_url:
|
||||
continue
|
||||
|
||||
all_subtitles = self._move_subtitles(video)
|
||||
action = {
|
||||
"update": {"_id": video["youtube_id"], "_index": "ta_video"}
|
||||
}
|
||||
source = {"doc": {"media_url": new_media_url}}
|
||||
if all_subtitles:
|
||||
source["doc"].update({"subtitles": all_subtitles})
|
||||
|
||||
self.bulk_list.append(json.dumps(action))
|
||||
self.bulk_list.append(json.dumps(source))
|
||||
if idx % 1000 == 0:
|
||||
print(f"processing migration [{idx}/{total}]")
|
||||
self.send_bulk()
|
||||
|
||||
def _move_video_file(self, video):
|
||||
"""move video file to new location"""
|
||||
old_path = os.path.join(self.videos, video["media_url"])
|
||||
if not os.path.exists(old_path):
|
||||
print(f"did not find expected video at {old_path}")
|
||||
return False
|
||||
|
||||
new_media_url = os.path.join(
|
||||
video["channel"]["channel_id"], video["youtube_id"] + ".mp4"
|
||||
)
|
||||
new_path = os.path.join(self.videos, new_media_url)
|
||||
os.rename(old_path, new_path)
|
||||
|
||||
return new_media_url
|
||||
|
||||
def _move_subtitles(self, video):
|
||||
"""move subtitle files to new location"""
|
||||
all_subtitles = video.get("subtitles")
|
||||
if not all_subtitles:
|
||||
return False
|
||||
|
||||
for subtitle in all_subtitles:
|
||||
old_path = os.path.join(self.videos, subtitle["media_url"])
|
||||
if not os.path.exists(old_path):
|
||||
print(f"did not find expected subtitle at {old_path}")
|
||||
continue
|
||||
|
||||
new_media_url = os.path.join(
|
||||
video["channel"]["channel_id"],
|
||||
f"{video.get('youtube_id')}.{subtitle.get('lang')}.vtt",
|
||||
)
|
||||
new_path = os.path.join(self.videos, new_media_url)
|
||||
os.rename(old_path, new_path)
|
||||
subtitle["media_url"] = new_media_url
|
||||
|
||||
return all_subtitles
|
||||
|
||||
def send_bulk(self):
|
||||
"""send bulk request to update index with new urls"""
|
||||
if not self.bulk_list:
|
||||
print("nothing to update")
|
||||
return
|
||||
|
||||
self.bulk_list.append("\n")
|
||||
path = "_bulk?refresh=true"
|
||||
data = "\n".join(self.bulk_list)
|
||||
response, status = ElasticWrap(path).post(data=data, ndjson=True)
|
||||
if not status == 200:
|
||||
print(response)
|
||||
|
||||
self.bulk_list = []
|
||||
|
||||
def delete_old(self):
|
||||
"""delete old empty folders"""
|
||||
all_folders = ignore_filelist(os.listdir(self.videos))
|
||||
for folder in all_folders:
|
||||
folder_path = os.path.join(self.videos, folder)
|
||||
if not os.path.isdir(folder_path):
|
||||
continue
|
||||
|
||||
if not ignore_filelist(os.listdir(folder_path)):
|
||||
shutil.rmtree(folder_path)
|
|
@ -0,0 +1,279 @@
|
|||
"""
|
||||
Functionality:
|
||||
- Application startup
|
||||
- Apply migrations
|
||||
"""
|
||||
|
||||
import os
|
||||
from time import sleep
|
||||
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
from home.src.es.connect import ElasticWrap
|
||||
from home.src.es.index_setup import ElasitIndexWrap
|
||||
from home.src.es.snapshot import ElasticSnapshot
|
||||
from home.src.ta.config import AppConfig, ReleaseVersion
|
||||
from home.src.ta.helper import clear_dl_cache
|
||||
from home.src.ta.settings import EnvironmentSettings
|
||||
from home.src.ta.ta_redis import RedisArchivist
|
||||
from home.src.ta.task_manager import TaskManager
|
||||
from home.src.ta.users import UserConfig
|
||||
|
||||
TOPIC = """
|
||||
|
||||
#######################
|
||||
# Application Start #
|
||||
#######################
|
||||
|
||||
"""
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
"""command framework"""
|
||||
|
||||
# pylint: disable=no-member
|
||||
|
||||
def handle(self, *args, **options):
|
||||
"""run all commands"""
|
||||
self.stdout.write(TOPIC)
|
||||
self._sync_redis_state()
|
||||
self._make_folders()
|
||||
self._clear_redis_keys()
|
||||
self._clear_tasks()
|
||||
self._clear_dl_cache()
|
||||
self._mig_clear_failed_versioncheck()
|
||||
self._version_check()
|
||||
self._mig_index_setup()
|
||||
self._mig_snapshot_check()
|
||||
self._mig_move_users_to_es()
|
||||
self._mig_custom_playlist()
|
||||
|
||||
def _sync_redis_state(self):
|
||||
"""make sure redis gets new config.json values"""
|
||||
self.stdout.write("[1] set new config.json values")
|
||||
needs_update = AppConfig().load_new_defaults()
|
||||
if needs_update:
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(" ✓ new config values set")
|
||||
)
|
||||
else:
|
||||
self.stdout.write(self.style.SUCCESS(" no new config values"))
|
||||
|
||||
def _make_folders(self):
|
||||
"""make expected cache folders"""
|
||||
self.stdout.write("[2] create expected cache folders")
|
||||
folders = [
|
||||
"backup",
|
||||
"channels",
|
||||
"download",
|
||||
"import",
|
||||
"playlists",
|
||||
"videos",
|
||||
]
|
||||
cache_dir = EnvironmentSettings.CACHE_DIR
|
||||
for folder in folders:
|
||||
folder_path = os.path.join(cache_dir, folder)
|
||||
os.makedirs(folder_path, exist_ok=True)
|
||||
|
||||
self.stdout.write(self.style.SUCCESS(" ✓ expected folders created"))
|
||||
|
||||
def _clear_redis_keys(self):
|
||||
"""make sure there are no leftover locks or keys set in redis"""
|
||||
self.stdout.write("[3] clear leftover keys in redis")
|
||||
all_keys = [
|
||||
"dl_queue_id",
|
||||
"dl_queue",
|
||||
"downloading",
|
||||
"manual_import",
|
||||
"reindex",
|
||||
"rescan",
|
||||
"run_backup",
|
||||
"startup_check",
|
||||
"reindex:ta_video",
|
||||
"reindex:ta_channel",
|
||||
"reindex:ta_playlist",
|
||||
]
|
||||
|
||||
redis_con = RedisArchivist()
|
||||
has_changed = False
|
||||
for key in all_keys:
|
||||
if redis_con.del_message(key):
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(f" ✓ cleared key {key}")
|
||||
)
|
||||
has_changed = True
|
||||
|
||||
if not has_changed:
|
||||
self.stdout.write(self.style.SUCCESS(" no keys found"))
|
||||
|
||||
def _clear_tasks(self):
|
||||
"""clear tasks and messages"""
|
||||
self.stdout.write("[4] clear task leftovers")
|
||||
TaskManager().fail_pending()
|
||||
redis_con = RedisArchivist()
|
||||
to_delete = redis_con.list_keys("message:")
|
||||
if to_delete:
|
||||
for key in to_delete:
|
||||
redis_con.del_message(key)
|
||||
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(f" ✓ cleared {len(to_delete)} messages")
|
||||
)
|
||||
|
||||
def _clear_dl_cache(self):
|
||||
"""clear leftover files from dl cache"""
|
||||
self.stdout.write("[5] clear leftover files from dl cache")
|
||||
leftover_files = clear_dl_cache(EnvironmentSettings.CACHE_DIR)
|
||||
if leftover_files:
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(f" ✓ cleared {leftover_files} files")
|
||||
)
|
||||
else:
|
||||
self.stdout.write(self.style.SUCCESS(" no files found"))
|
||||
|
||||
def _version_check(self):
|
||||
"""remove new release key if updated now"""
|
||||
self.stdout.write("[6] check for first run after update")
|
||||
new_version = ReleaseVersion().is_updated()
|
||||
if new_version:
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(f" ✓ update to {new_version} completed")
|
||||
)
|
||||
else:
|
||||
self.stdout.write(self.style.SUCCESS(" no new update found"))
|
||||
|
||||
def _mig_index_setup(self):
|
||||
"""migration: validate index mappings"""
|
||||
self.stdout.write("[MIGRATION] validate index mappings")
|
||||
ElasitIndexWrap().setup()
|
||||
|
||||
def _mig_snapshot_check(self):
|
||||
"""migration setup snapshots"""
|
||||
self.stdout.write("[MIGRATION] setup snapshots")
|
||||
ElasticSnapshot().setup()
|
||||
|
||||
def _mig_clear_failed_versioncheck(self):
|
||||
"""hotfix for v0.4.5, clearing faulty versioncheck"""
|
||||
ReleaseVersion().clear_fail()
|
||||
|
||||
def _mig_move_users_to_es(self): # noqa: C901
|
||||
"""migration: update from 0.4.1 to 0.4.2 move user config to ES"""
|
||||
self.stdout.write("[MIGRATION] move user configuration to ES")
|
||||
redis = RedisArchivist()
|
||||
|
||||
# 1: Find all users in Redis
|
||||
users = {i.split(":")[0] for i in redis.list_keys("[0-9]*:")}
|
||||
if not users:
|
||||
self.stdout.write(" no users needed migrating to ES")
|
||||
return
|
||||
|
||||
# 2: Write all Redis user settings to ES
|
||||
# 3: Remove user settings from Redis
|
||||
try:
|
||||
for user in users:
|
||||
new_conf = UserConfig(user)
|
||||
|
||||
stylesheet_key = f"{user}:color"
|
||||
stylesheet = redis.get_message(stylesheet_key).get("status")
|
||||
if stylesheet:
|
||||
new_conf.set_value("stylesheet", stylesheet)
|
||||
redis.del_message(stylesheet_key)
|
||||
|
||||
sort_by_key = f"{user}:sort_by"
|
||||
sort_by = redis.get_message(sort_by_key).get("status")
|
||||
if sort_by:
|
||||
new_conf.set_value("sort_by", sort_by)
|
||||
redis.del_message(sort_by_key)
|
||||
|
||||
page_size_key = f"{user}:page_size"
|
||||
page_size = redis.get_message(page_size_key).get("status")
|
||||
if page_size:
|
||||
new_conf.set_value("page_size", page_size)
|
||||
redis.del_message(page_size_key)
|
||||
|
||||
sort_order_key = f"{user}:sort_order"
|
||||
sort_order = redis.get_message(sort_order_key).get("status")
|
||||
if sort_order:
|
||||
new_conf.set_value("sort_order", sort_order)
|
||||
redis.del_message(sort_order_key)
|
||||
|
||||
grid_items_key = f"{user}:grid_items"
|
||||
grid_items = redis.get_message(grid_items_key).get("status")
|
||||
if grid_items:
|
||||
new_conf.set_value("grid_items", grid_items)
|
||||
redis.del_message(grid_items_key)
|
||||
|
||||
hide_watch_key = f"{user}:hide_watched"
|
||||
hide_watch = redis.get_message(hide_watch_key).get("status")
|
||||
if hide_watch:
|
||||
new_conf.set_value("hide_watched", hide_watch)
|
||||
redis.del_message(hide_watch_key)
|
||||
|
||||
ignore_only_key = f"{user}:show_ignored_only"
|
||||
ignore_only = redis.get_message(ignore_only_key).get("status")
|
||||
if ignore_only:
|
||||
new_conf.set_value("show_ignored_only", ignore_only)
|
||||
redis.del_message(ignore_only_key)
|
||||
|
||||
subed_only_key = f"{user}:show_subed_only"
|
||||
subed_only = redis.get_message(subed_only_key).get("status")
|
||||
if subed_only:
|
||||
new_conf.set_value("show_subed_only", subed_only)
|
||||
redis.del_message(subed_only_key)
|
||||
|
||||
for view in ["channel", "playlist", "home", "downloads"]:
|
||||
view_key = f"{user}:view:{view}"
|
||||
view_style = redis.get_message(view_key).get("status")
|
||||
if view_style:
|
||||
new_conf.set_value(f"view_style_{view}", view_style)
|
||||
redis.del_message(view_key)
|
||||
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(
|
||||
f" ✓ Settings for user '{user}' migrated to ES"
|
||||
)
|
||||
)
|
||||
except Exception as err:
|
||||
message = " 🗙 user migration to ES failed"
|
||||
self.stdout.write(self.style.ERROR(message))
|
||||
self.stdout.write(self.style.ERROR(err))
|
||||
sleep(60)
|
||||
raise CommandError(message) from err
|
||||
else:
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(
|
||||
" ✓ Settings for all users migrated to ES"
|
||||
)
|
||||
)
|
||||
|
||||
def _mig_custom_playlist(self):
|
||||
"""migration for custom playlist"""
|
||||
self.stdout.write("[MIGRATION] custom playlist")
|
||||
data = {
|
||||
"query": {
|
||||
"bool": {"must_not": [{"exists": {"field": "playlist_type"}}]}
|
||||
},
|
||||
"script": {"source": "ctx._source['playlist_type'] = 'regular'"},
|
||||
}
|
||||
path = "ta_playlist/_update_by_query"
|
||||
response, status_code = ElasticWrap(path).post(data=data)
|
||||
if status_code == 200:
|
||||
updated = response.get("updated", 0)
|
||||
if updated:
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(
|
||||
f" ✓ {updated} playlist_type updated in ta_playlist"
|
||||
)
|
||||
)
|
||||
else:
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(
|
||||
" no playlist_type needed updating in ta_playlist"
|
||||
)
|
||||
)
|
||||
return
|
||||
|
||||
message = " 🗙 ta_playlist playlist_type update failed"
|
||||
self.stdout.write(self.style.ERROR(message))
|
||||
self.stdout.write(response)
|
||||
sleep(60)
|
||||
raise CommandError(message)
|
|
@ -17,7 +17,8 @@ from pathlib import Path
|
|||
import ldap
|
||||
from corsheaders.defaults import default_headers
|
||||
from django_auth_ldap.config import LDAPSearch
|
||||
from home.src.ta.config import AppConfig
|
||||
from home.src.ta.helper import ta_host_parser
|
||||
from home.src.ta.settings import EnvironmentSettings
|
||||
|
||||
# Build paths inside the project like this: BASE_DIR / 'subdir'.
|
||||
BASE_DIR = Path(__file__).resolve().parent.parent
|
||||
|
@ -26,21 +27,13 @@ BASE_DIR = Path(__file__).resolve().parent.parent
|
|||
# Quick-start development settings - unsuitable for production
|
||||
# See https://docs.djangoproject.com/en/3.2/howto/deployment/checklist/
|
||||
|
||||
PW_HASH = hashlib.sha256(environ.get("TA_PASSWORD").encode())
|
||||
PW_HASH = hashlib.sha256(EnvironmentSettings.TA_PASSWORD.encode())
|
||||
SECRET_KEY = PW_HASH.hexdigest()
|
||||
|
||||
# SECURITY WARNING: don't run with debug turned on in production!
|
||||
DEBUG = bool(environ.get("DJANGO_DEBUG"))
|
||||
|
||||
ALLOWED_HOSTS = [i.strip() for i in environ.get("TA_HOST").split()]
|
||||
|
||||
CSRF_TRUSTED_ORIGINS = []
|
||||
for host in ALLOWED_HOSTS:
|
||||
if host.startswith("http://") or host.startswith("https://"):
|
||||
CSRF_TRUSTED_ORIGINS.append(host)
|
||||
else:
|
||||
CSRF_TRUSTED_ORIGINS.append(f"http://{host}")
|
||||
|
||||
ALLOWED_HOSTS, CSRF_TRUSTED_ORIGINS = ta_host_parser(environ["TA_HOST"])
|
||||
|
||||
# Application definition
|
||||
|
||||
|
@ -58,6 +51,7 @@ INSTALLED_APPS = [
|
|||
"rest_framework",
|
||||
"rest_framework.authtoken",
|
||||
"api",
|
||||
"config",
|
||||
]
|
||||
|
||||
MIDDLEWARE = [
|
||||
|
@ -70,6 +64,7 @@ MIDDLEWARE = [
|
|||
"django.contrib.auth.middleware.AuthenticationMiddleware",
|
||||
"django.contrib.messages.middleware.MessageMiddleware",
|
||||
"django.middleware.clickjacking.XFrameOptionsMiddleware",
|
||||
"home.src.ta.health.HealthCheckMiddleware",
|
||||
]
|
||||
|
||||
ROOT_URLCONF = "config.urls"
|
||||
|
@ -180,13 +175,12 @@ if bool(environ.get("TA_LDAP")):
|
|||
ldap.OPT_X_TLS_REQUIRE_CERT: ldap.OPT_X_TLS_NEVER,
|
||||
}
|
||||
|
||||
global AUTHENTICATION_BACKENDS
|
||||
AUTHENTICATION_BACKENDS = ("django_auth_ldap.backend.LDAPBackend",)
|
||||
|
||||
# Database
|
||||
# https://docs.djangoproject.com/en/3.2/ref/settings/#databases
|
||||
|
||||
CACHE_DIR = AppConfig().config["application"]["cache_dir"]
|
||||
CACHE_DIR = EnvironmentSettings.CACHE_DIR
|
||||
DB_PATH = path.join(CACHE_DIR, "db.sqlite3")
|
||||
DATABASES = {
|
||||
"default": {
|
||||
|
@ -216,12 +210,25 @@ AUTH_PASSWORD_VALIDATORS = [
|
|||
|
||||
AUTH_USER_MODEL = "home.Account"
|
||||
|
||||
# Forward-auth authentication
|
||||
if bool(environ.get("TA_ENABLE_AUTH_PROXY")):
|
||||
TA_AUTH_PROXY_USERNAME_HEADER = (
|
||||
environ.get("TA_AUTH_PROXY_USERNAME_HEADER") or "HTTP_REMOTE_USER"
|
||||
)
|
||||
TA_AUTH_PROXY_LOGOUT_URL = environ.get("TA_AUTH_PROXY_LOGOUT_URL")
|
||||
|
||||
MIDDLEWARE.append("home.src.ta.auth.HttpRemoteUserMiddleware")
|
||||
|
||||
AUTHENTICATION_BACKENDS = (
|
||||
"django.contrib.auth.backends.RemoteUserBackend",
|
||||
)
|
||||
|
||||
|
||||
# Internationalization
|
||||
# https://docs.djangoproject.com/en/3.2/topics/i18n/
|
||||
|
||||
LANGUAGE_CODE = "en-us"
|
||||
TIME_ZONE = environ.get("TZ") or "UTC"
|
||||
TIME_ZONE = EnvironmentSettings.TZ
|
||||
USE_I18N = True
|
||||
USE_L10N = True
|
||||
USE_TZ = True
|
||||
|
@ -262,4 +269,4 @@ CORS_ALLOW_HEADERS = list(default_headers) + [
|
|||
|
||||
# TA application settings
|
||||
TA_UPSTREAM = "https://github.com/tubearchivist/tubearchivist"
|
||||
TA_VERSION = "v0.3.1"
|
||||
TA_VERSION = "v0.4.8-unstable"
|
||||
|
|
|
@ -13,6 +13,7 @@ Including another URLconf
|
|||
1. Import the include() function: from django.urls import include, path
|
||||
2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
|
||||
"""
|
||||
|
||||
from django.contrib import admin
|
||||
from django.urls import include, path
|
||||
|
||||
|
|
|
@ -1,158 +1,6 @@
|
|||
"""handle custom startup functions"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
from django.apps import AppConfig
|
||||
from home.src.es.connect import ElasticWrap
|
||||
from home.src.es.index_setup import ElasitIndexWrap
|
||||
from home.src.es.snapshot import ElasticSnapshot
|
||||
from home.src.ta.config import AppConfig as ArchivistConfig
|
||||
from home.src.ta.config import ReleaseVersion
|
||||
from home.src.ta.helper import clear_dl_cache
|
||||
from home.src.ta.ta_redis import RedisArchivist
|
||||
|
||||
|
||||
class StartupCheck:
|
||||
"""checks to run at application startup"""
|
||||
|
||||
MIN_MAJOR, MAX_MAJOR = 8, 8
|
||||
MIN_MINOR = 0
|
||||
|
||||
def __init__(self):
|
||||
self.config_handler = ArchivistConfig()
|
||||
self.redis_con = RedisArchivist()
|
||||
self.has_run = self.get_has_run()
|
||||
|
||||
def run(self):
|
||||
"""run all startup checks"""
|
||||
print("run startup checks")
|
||||
self.es_version_check()
|
||||
self.release_lock()
|
||||
self.sync_redis_state()
|
||||
self.set_redis_conf()
|
||||
ElasitIndexWrap().setup()
|
||||
self.make_folders()
|
||||
clear_dl_cache(self.config_handler.config)
|
||||
self.snapshot_check()
|
||||
self.ta_version_check()
|
||||
self.es_set_vid_type()
|
||||
self.set_has_run()
|
||||
|
||||
def get_has_run(self):
|
||||
"""validate if check has already executed"""
|
||||
return self.redis_con.get_message("startup_check")
|
||||
|
||||
def set_has_run(self):
|
||||
"""startup checks run"""
|
||||
message = {"status": True}
|
||||
self.redis_con.set_message("startup_check", message, expire=120)
|
||||
|
||||
def sync_redis_state(self):
|
||||
"""make sure redis gets new config.json values"""
|
||||
print("sync redis")
|
||||
self.config_handler.load_new_defaults()
|
||||
|
||||
def set_redis_conf(self):
|
||||
"""set conf values for redis"""
|
||||
self.redis_con.conn.config_set("timeout", 3600)
|
||||
|
||||
def make_folders(self):
|
||||
"""make needed cache folders here so docker doesn't mess it up"""
|
||||
folders = [
|
||||
"download",
|
||||
"channels",
|
||||
"videos",
|
||||
"playlists",
|
||||
"import",
|
||||
"backup",
|
||||
]
|
||||
cache_dir = self.config_handler.config["application"]["cache_dir"]
|
||||
for folder in folders:
|
||||
folder_path = os.path.join(cache_dir, folder)
|
||||
os.makedirs(folder_path, exist_ok=True)
|
||||
|
||||
def release_lock(self):
|
||||
"""make sure there are no leftover locks set in redis"""
|
||||
all_locks = [
|
||||
"startup_check",
|
||||
"manual_import",
|
||||
"downloading",
|
||||
"dl_queue",
|
||||
"dl_queue_id",
|
||||
"reindex",
|
||||
"rescan",
|
||||
"run_backup",
|
||||
]
|
||||
for lock in all_locks:
|
||||
response = self.redis_con.del_message(lock)
|
||||
if response:
|
||||
print("deleted leftover key from redis: " + lock)
|
||||
|
||||
def snapshot_check(self):
|
||||
"""setup snapshot config, create if needed"""
|
||||
app = self.config_handler.config["application"]
|
||||
if not app.get("enable_snapshot"):
|
||||
return
|
||||
|
||||
ElasticSnapshot().setup()
|
||||
|
||||
def is_invalid(self, version):
|
||||
"""return true if es version is invalid, false if ok"""
|
||||
major, minor = [int(i) for i in version.split(".")[:2]]
|
||||
if not self.MIN_MAJOR <= major <= self.MAX_MAJOR:
|
||||
return True
|
||||
|
||||
if minor >= self.MIN_MINOR:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def es_version_check(self):
|
||||
"""check for minimal elasticsearch version"""
|
||||
response, _ = ElasticWrap("/").get()
|
||||
version = response["version"]["number"]
|
||||
invalid = self.is_invalid(version)
|
||||
|
||||
if invalid:
|
||||
print(
|
||||
"required elasticsearch version: "
|
||||
+ f"{self.MIN_MAJOR}.{self.MIN_MINOR}"
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
print("elasticsearch version check passed")
|
||||
|
||||
def ta_version_check(self):
|
||||
"""remove key if updated now"""
|
||||
ReleaseVersion().is_updated()
|
||||
|
||||
def es_set_vid_type(self):
|
||||
"""
|
||||
update path 0.3.0 to 0.3.1, set default vid_type to video
|
||||
fix unidentified vids in unstable
|
||||
"""
|
||||
index_list = ["ta_video", "ta_download"]
|
||||
data = {
|
||||
"query": {
|
||||
"bool": {
|
||||
"should": [
|
||||
{
|
||||
"bool": {
|
||||
"must_not": [{"exists": {"field": "vid_type"}}]
|
||||
}
|
||||
},
|
||||
{"term": {"vid_type": {"value": "unknown"}}},
|
||||
]
|
||||
}
|
||||
},
|
||||
"script": {"source": "ctx._source['vid_type'] = 'videos'"},
|
||||
}
|
||||
|
||||
for index_name in index_list:
|
||||
path = f"{index_name}/_update_by_query"
|
||||
response, _ = ElasticWrap(path).post(data=data)
|
||||
print(f"{index_name} vid_type index update ran: {response}")
|
||||
|
||||
|
||||
class HomeConfig(AppConfig):
|
||||
|
@ -160,11 +8,3 @@ class HomeConfig(AppConfig):
|
|||
|
||||
default_auto_field = "django.db.models.BigAutoField"
|
||||
name = "home"
|
||||
|
||||
def ready(self):
|
||||
startup = StartupCheck()
|
||||
if startup.has_run["status"]:
|
||||
print("startup checks run in other thread")
|
||||
return
|
||||
|
||||
startup.run()
|
||||
|
|
|
@ -1,29 +1,16 @@
|
|||
{
|
||||
"archive": {
|
||||
"sort_by": "published",
|
||||
"sort_order": "desc",
|
||||
"page_size": 12
|
||||
},
|
||||
"default_view": {
|
||||
"home": "grid",
|
||||
"channel": "list",
|
||||
"downloads": "list",
|
||||
"playlist": "grid",
|
||||
"grid_items": 3
|
||||
},
|
||||
"subscriptions": {
|
||||
"auto_search": false,
|
||||
"auto_download": false,
|
||||
"channel_size": 50,
|
||||
"live_channel_size": 50,
|
||||
"shorts_channel_size": 50
|
||||
"shorts_channel_size": 50,
|
||||
"auto_start": false
|
||||
},
|
||||
"downloads": {
|
||||
"limit_count": false,
|
||||
"limit_speed": false,
|
||||
"sleep_interval": 3,
|
||||
"autodelete_days": false,
|
||||
"format": false,
|
||||
"format_sort": false,
|
||||
"add_metadata": false,
|
||||
"add_thumbnail": false,
|
||||
"subtitle": false,
|
||||
|
@ -33,24 +20,23 @@
|
|||
"comment_sort": "top",
|
||||
"cookie_import": false,
|
||||
"throttledratelimit": false,
|
||||
"extractor_lang": false,
|
||||
"integrate_ryd": false,
|
||||
"integrate_sponsorblock": false
|
||||
},
|
||||
"application": {
|
||||
"app_root": "/app",
|
||||
"cache_dir": "/cache",
|
||||
"videos": "/youtube",
|
||||
"colors": "dark",
|
||||
"enable_cast": false,
|
||||
"enable_snapshot": false
|
||||
"enable_snapshot": true
|
||||
},
|
||||
"scheduler": {
|
||||
"update_subscribed": false,
|
||||
"update_subscribed_notify": false,
|
||||
"download_pending": false,
|
||||
"download_pending_notify": false,
|
||||
"check_reindex": {"minute": "0", "hour": "12", "day_of_week": "*"},
|
||||
"check_reindex_notify": false,
|
||||
"check_reindex_days": 90,
|
||||
"thumbnail_check": {"minute": "0", "hour": "17", "day_of_week": "*"},
|
||||
"run_backup": {"minute": "0", "hour": "8", "day_of_week": "0"},
|
||||
"run_backup": false,
|
||||
"run_backup_rotate": 5,
|
||||
"version_check": "rand-d"
|
||||
}
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
# Generated by Django 4.1.5 on 2023-02-02 06:49
|
||||
|
||||
from django.db import migrations, models
|
||||
import home.models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
initial = True
|
||||
|
||||
dependencies = [
|
||||
('auth', '0012_alter_user_first_name_max_length'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='Account',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('password', models.CharField(max_length=128, verbose_name='password')),
|
||||
('last_login', models.DateTimeField(blank=True, null=True, verbose_name='last login')),
|
||||
('is_superuser', models.BooleanField(default=False, help_text='Designates that this user has all permissions without explicitly assigning them.', verbose_name='superuser status')),
|
||||
('name', models.CharField(max_length=150, unique=True)),
|
||||
('is_staff', models.BooleanField(default=False)),
|
||||
('groups', models.ManyToManyField(blank=True, help_text='The groups this user belongs to. A user will get all permissions granted to each of their groups.', related_name='user_set', related_query_name='user', to='auth.group', verbose_name='groups')),
|
||||
('user_permissions', models.ManyToManyField(blank=True, help_text='Specific permissions for this user.', related_name='user_set', related_query_name='user', to='auth.permission', verbose_name='user permissions')),
|
||||
],
|
||||
options={
|
||||
'abstract': False,
|
||||
},
|
||||
managers=[
|
||||
('objects', home.models.AccountManager()),
|
||||
],
|
||||
),
|
||||
]
|
|
@ -1,4 +1,5 @@
|
|||
"""custom models"""
|
||||
|
||||
from django.contrib.auth.models import (
|
||||
AbstractBaseUser,
|
||||
BaseUserManager,
|
||||
|
|
|
@ -17,8 +17,7 @@ from home.src.es.connect import ElasticWrap, IndexPaginate
|
|||
from home.src.index.playlist import YoutubePlaylist
|
||||
from home.src.index.video_constants import VideoTypeEnum
|
||||
from home.src.ta.config import AppConfig
|
||||
from home.src.ta.helper import DurationConverter, is_shorts
|
||||
from home.src.ta.ta_redis import RedisArchivist
|
||||
from home.src.ta.helper import get_duration_str, is_shorts
|
||||
|
||||
|
||||
class PendingIndex:
|
||||
|
@ -96,13 +95,13 @@ class PendingIndex:
|
|||
class PendingInteract:
|
||||
"""interact with items in download queue"""
|
||||
|
||||
def __init__(self, video_id=False, status=False):
|
||||
self.video_id = video_id
|
||||
def __init__(self, youtube_id=False, status=False):
|
||||
self.youtube_id = youtube_id
|
||||
self.status = status
|
||||
|
||||
def delete_item(self):
|
||||
"""delete single item from pending"""
|
||||
path = f"ta_download/_doc/{self.video_id}"
|
||||
path = f"ta_download/_doc/{self.youtube_id}"
|
||||
_, _ = ElasticWrap(path).delete(refresh=True)
|
||||
|
||||
def delete_by_status(self):
|
||||
|
@ -112,35 +111,63 @@ class PendingInteract:
|
|||
_, _ = ElasticWrap(path).post(data=data)
|
||||
|
||||
def update_status(self):
|
||||
"""update status field of pending item"""
|
||||
data = {"doc": {"status": self.status}}
|
||||
path = f"ta_download/_update/{self.video_id}"
|
||||
"""update status of pending item"""
|
||||
if self.status == "priority":
|
||||
data = {
|
||||
"doc": {
|
||||
"status": "pending",
|
||||
"auto_start": True,
|
||||
"message": None,
|
||||
}
|
||||
}
|
||||
else:
|
||||
data = {"doc": {"status": self.status}}
|
||||
|
||||
path = f"ta_download/_update/{self.youtube_id}/?refresh=true"
|
||||
_, _ = ElasticWrap(path).post(data=data)
|
||||
|
||||
def get_item(self):
|
||||
"""return pending item dict"""
|
||||
path = f"ta_download/_doc/{self.video_id}"
|
||||
path = f"ta_download/_doc/{self.youtube_id}"
|
||||
response, status_code = ElasticWrap(path).get()
|
||||
return response["_source"], status_code
|
||||
|
||||
def get_channel(self):
|
||||
"""
|
||||
get channel metadata from queue to not depend on channel to be indexed
|
||||
"""
|
||||
data = {
|
||||
"size": 1,
|
||||
"query": {"term": {"channel_id": {"value": self.youtube_id}}},
|
||||
}
|
||||
response, _ = ElasticWrap("ta_download/_search").get(data=data)
|
||||
hits = response["hits"]["hits"]
|
||||
if not hits:
|
||||
channel_name = "NA"
|
||||
else:
|
||||
channel_name = hits[0]["_source"].get("channel_name", "NA")
|
||||
|
||||
return {
|
||||
"channel_id": self.youtube_id,
|
||||
"channel_name": channel_name,
|
||||
}
|
||||
|
||||
|
||||
class PendingList(PendingIndex):
|
||||
"""manage the pending videos list"""
|
||||
|
||||
yt_obs = {
|
||||
"default_search": "ytsearch",
|
||||
"quiet": True,
|
||||
"check_formats": "selected",
|
||||
"noplaylist": True,
|
||||
"writethumbnail": True,
|
||||
"simulate": True,
|
||||
"socket_timeout": 3,
|
||||
"check_formats": None,
|
||||
}
|
||||
|
||||
def __init__(self, youtube_ids=False):
|
||||
def __init__(self, youtube_ids=False, task=False):
|
||||
super().__init__()
|
||||
self.config = AppConfig().config
|
||||
self.youtube_ids = youtube_ids
|
||||
self.task = task
|
||||
self.to_skip = False
|
||||
self.missing_videos = False
|
||||
|
||||
|
@ -149,16 +176,16 @@ class PendingList(PendingIndex):
|
|||
self.missing_videos = []
|
||||
self.get_download()
|
||||
self.get_indexed()
|
||||
for entry in self.youtube_ids:
|
||||
# notify
|
||||
mess_dict = {
|
||||
"status": "message:add",
|
||||
"level": "info",
|
||||
"title": "Adding to download queue.",
|
||||
"message": "Extracting lists",
|
||||
}
|
||||
RedisArchivist().set_message("message:add", mess_dict, expire=True)
|
||||
total = len(self.youtube_ids)
|
||||
for idx, entry in enumerate(self.youtube_ids):
|
||||
self._process_entry(entry)
|
||||
if not self.task:
|
||||
continue
|
||||
|
||||
self.task.send_progress(
|
||||
message_lines=[f"Extracting items {idx + 1}/{total}"],
|
||||
progress=(idx + 1) / total,
|
||||
)
|
||||
|
||||
def _process_entry(self, entry):
|
||||
"""process single entry from url list"""
|
||||
|
@ -201,24 +228,40 @@ class PendingList(PendingIndex):
|
|||
"""add all videos of playlist to list"""
|
||||
playlist = YoutubePlaylist(url)
|
||||
playlist.build_json()
|
||||
if not playlist.json_data:
|
||||
message = f"{playlist.youtube_id}: failed to extract metadata"
|
||||
print(message)
|
||||
raise ValueError(message)
|
||||
|
||||
video_results = playlist.json_data.get("playlist_entries")
|
||||
youtube_ids = [i["youtube_id"] for i in video_results]
|
||||
for video_id in youtube_ids:
|
||||
# match vid_type later
|
||||
self._add_video(video_id, VideoTypeEnum.UNKNOWN)
|
||||
|
||||
def add_to_pending(self, status="pending"):
|
||||
def add_to_pending(self, status="pending", auto_start=False):
|
||||
"""add missing videos to pending list"""
|
||||
self.get_channels()
|
||||
bulk_list = []
|
||||
|
||||
total = len(self.missing_videos)
|
||||
for idx, (youtube_id, vid_type) in enumerate(self.missing_videos):
|
||||
print(f"{youtube_id} ({vid_type}): add to download queue")
|
||||
if self.task and self.task.is_stopped():
|
||||
break
|
||||
|
||||
print(f"{youtube_id}: [{idx + 1}/{total}]: add to queue")
|
||||
self._notify_add(idx, total)
|
||||
video_details = self.get_youtube_details(youtube_id, vid_type)
|
||||
if not video_details:
|
||||
continue
|
||||
|
||||
video_details["status"] = status
|
||||
video_details.update(
|
||||
{
|
||||
"status": status,
|
||||
"auto_start": auto_start,
|
||||
}
|
||||
)
|
||||
|
||||
action = {"create": {"_id": youtube_id, "_index": "ta_download"}}
|
||||
bulk_list.append(json.dumps(action))
|
||||
bulk_list.append(json.dumps(video_details))
|
||||
|
@ -226,31 +269,34 @@ class PendingList(PendingIndex):
|
|||
url = video_details["vid_thumb_url"]
|
||||
ThumbManager(youtube_id).download_video_thumb(url)
|
||||
|
||||
self._notify_add(idx)
|
||||
if len(bulk_list) >= 20:
|
||||
self._ingest_bulk(bulk_list)
|
||||
bulk_list = []
|
||||
|
||||
if bulk_list:
|
||||
# add last newline
|
||||
bulk_list.append("\n")
|
||||
query_str = "\n".join(bulk_list)
|
||||
_, _ = ElasticWrap("_bulk").post(query_str, ndjson=True)
|
||||
self._ingest_bulk(bulk_list)
|
||||
|
||||
def _notify_add(self, idx):
|
||||
def _ingest_bulk(self, bulk_list):
|
||||
"""add items to queue in bulk"""
|
||||
if not bulk_list:
|
||||
return
|
||||
|
||||
# add last newline
|
||||
bulk_list.append("\n")
|
||||
query_str = "\n".join(bulk_list)
|
||||
_, _ = ElasticWrap("_bulk?refresh=true").post(query_str, ndjson=True)
|
||||
|
||||
def _notify_add(self, idx, total):
|
||||
"""send notification for adding videos to download queue"""
|
||||
progress = f"{idx + 1}/{len(self.missing_videos)}"
|
||||
mess_dict = {
|
||||
"status": "message:add",
|
||||
"level": "info",
|
||||
"title": "Adding new videos to download queue.",
|
||||
"message": "Progress: " + progress,
|
||||
}
|
||||
if idx + 1 == len(self.missing_videos):
|
||||
expire = 4
|
||||
else:
|
||||
expire = True
|
||||
if not self.task:
|
||||
return
|
||||
|
||||
RedisArchivist().set_message("message:add", mess_dict, expire=expire)
|
||||
if idx + 1 % 25 == 0:
|
||||
print("adding to queue progress: " + progress)
|
||||
self.task.send_progress(
|
||||
message_lines=[
|
||||
"Adding new videos to download queue.",
|
||||
f"Extracting items {idx + 1}/{total}",
|
||||
],
|
||||
progress=(idx + 1) / total,
|
||||
)
|
||||
|
||||
def get_youtube_details(self, youtube_id, vid_type=VideoTypeEnum.VIDEOS):
|
||||
"""get details from youtubedl for single pending video"""
|
||||
|
@ -264,6 +310,7 @@ class PendingList(PendingIndex):
|
|||
return False
|
||||
# stop if video is streaming live now
|
||||
if vid["live_status"] in ["is_upcoming", "is_live"]:
|
||||
print(f"{youtube_id}: skip is_upcoming or is_live")
|
||||
return False
|
||||
|
||||
if vid["live_status"] == "was_live":
|
||||
|
@ -292,9 +339,6 @@ class PendingList(PendingIndex):
|
|||
def _parse_youtube_details(self, vid, vid_type=VideoTypeEnum.VIDEOS):
|
||||
"""parse response"""
|
||||
vid_id = vid.get("id")
|
||||
duration_str = DurationConverter.get_str(vid["duration"])
|
||||
if duration_str == "NA":
|
||||
print(f"skip extracting duration for: {vid_id}")
|
||||
published = datetime.strptime(vid["upload_date"], "%Y%m%d").strftime(
|
||||
"%Y-%m-%d"
|
||||
)
|
||||
|
@ -306,7 +350,7 @@ class PendingList(PendingIndex):
|
|||
"vid_thumb_url": vid["thumbnail"],
|
||||
"title": vid["title"],
|
||||
"channel_id": vid["channel_id"],
|
||||
"duration": duration_str,
|
||||
"duration": get_duration_str(vid["duration"]),
|
||||
"published": published,
|
||||
"timestamp": int(datetime.now().timestamp()),
|
||||
# Pulling enum value out so it is serializable
|
||||
|
|
|
@ -12,14 +12,15 @@ from home.src.index.channel import YoutubeChannel
|
|||
from home.src.index.playlist import YoutubePlaylist
|
||||
from home.src.index.video_constants import VideoTypeEnum
|
||||
from home.src.ta.config import AppConfig
|
||||
from home.src.ta.ta_redis import RedisArchivist
|
||||
from home.src.ta.urlparser import Parser
|
||||
|
||||
|
||||
class ChannelSubscription:
|
||||
"""manage the list of channels subscribed"""
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, task=False):
|
||||
self.config = AppConfig().config
|
||||
self.task = task
|
||||
|
||||
@staticmethod
|
||||
def get_channels(subscribed_only=True):
|
||||
|
@ -44,7 +45,7 @@ class ChannelSubscription:
|
|||
|
||||
last_videos = []
|
||||
|
||||
for vid_type, limit_amount in queries:
|
||||
for vid_type_enum, limit_amount in queries:
|
||||
obs = {
|
||||
"skip_download": True,
|
||||
"extract_flat": True,
|
||||
|
@ -52,9 +53,9 @@ class ChannelSubscription:
|
|||
if limit:
|
||||
obs["playlistend"] = limit_amount
|
||||
|
||||
path = vid_type.value
|
||||
vid_type = vid_type_enum.value
|
||||
channel = YtWrap(obs, self.config).extract(
|
||||
f"https://www.youtube.com/channel/{channel_id}/{path}"
|
||||
f"https://www.youtube.com/channel/{channel_id}/{vid_type}"
|
||||
)
|
||||
if not channel:
|
||||
continue
|
||||
|
@ -85,6 +86,10 @@ class ChannelSubscription:
|
|||
return queries
|
||||
|
||||
for query_item, default_limit in limit_map.items():
|
||||
if not default_limit:
|
||||
# is deactivated in config
|
||||
continue
|
||||
|
||||
if limit:
|
||||
query_limit = default_limit
|
||||
else:
|
||||
|
@ -97,12 +102,16 @@ class ChannelSubscription:
|
|||
def find_missing(self):
|
||||
"""add missing videos from subscribed channels to pending"""
|
||||
all_channels = self.get_channels()
|
||||
if not all_channels:
|
||||
return False
|
||||
|
||||
pending = queue.PendingList()
|
||||
pending.get_download()
|
||||
pending.get_indexed()
|
||||
|
||||
missing_videos = []
|
||||
|
||||
total = len(all_channels)
|
||||
for idx, channel in enumerate(all_channels):
|
||||
channel_id = channel["channel_id"]
|
||||
print(f"{channel_id}: find missing videos.")
|
||||
|
@ -112,21 +121,19 @@ class ChannelSubscription:
|
|||
for video_id, _, vid_type in last_videos:
|
||||
if video_id not in pending.to_skip:
|
||||
missing_videos.append((video_id, vid_type))
|
||||
# notify
|
||||
message = {
|
||||
"status": "message:rescan",
|
||||
"level": "info",
|
||||
"title": "Scanning channels: Looking for new videos.",
|
||||
"message": f"Progress: {idx + 1}/{len(all_channels)}",
|
||||
}
|
||||
if idx + 1 == len(all_channels):
|
||||
expire = 4
|
||||
else:
|
||||
expire = True
|
||||
|
||||
RedisArchivist().set_message(
|
||||
"message:rescan", message=message, expire=expire
|
||||
)
|
||||
if not self.task:
|
||||
continue
|
||||
|
||||
if self.task:
|
||||
if self.task.is_stopped():
|
||||
self.task.send_progress(["Received Stop signal."])
|
||||
break
|
||||
|
||||
self.task.send_progress(
|
||||
message_lines=[f"Scanning Channel {idx + 1}/{total}"],
|
||||
progress=(idx + 1) / total,
|
||||
)
|
||||
|
||||
return missing_videos
|
||||
|
||||
|
@ -143,8 +150,9 @@ class ChannelSubscription:
|
|||
class PlaylistSubscription:
|
||||
"""manage the playlist download functionality"""
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, task=False):
|
||||
self.config = AppConfig().config
|
||||
self.task = task
|
||||
|
||||
@staticmethod
|
||||
def get_playlists(subscribed_only=True):
|
||||
|
@ -166,10 +174,7 @@ class PlaylistSubscription:
|
|||
|
||||
def process_url_str(self, new_playlists, subscribed=True):
|
||||
"""process playlist subscribe form url_str"""
|
||||
data = {
|
||||
"query": {"match_all": {}},
|
||||
"sort": [{"published": {"order": "desc"}}],
|
||||
}
|
||||
data = {"query": {"match_all": {}}, "_source": ["youtube_id"]}
|
||||
all_indexed = IndexPaginate("ta_video", data).get_results()
|
||||
all_youtube_ids = [i["youtube_id"] for i in all_indexed]
|
||||
|
||||
|
@ -182,6 +187,11 @@ class PlaylistSubscription:
|
|||
playlist_h = YoutubePlaylist(playlist_id)
|
||||
playlist_h.all_youtube_ids = all_youtube_ids
|
||||
playlist_h.build_json()
|
||||
if not playlist_h.json_data:
|
||||
message = f"{playlist_h.youtube_id}: failed to extract data"
|
||||
print(message)
|
||||
raise ValueError(message)
|
||||
|
||||
playlist_h.json_data["playlist_subscribed"] = subscribed
|
||||
playlist_h.upload_to_es()
|
||||
playlist_h.add_vids_to_playlist()
|
||||
|
@ -191,16 +201,13 @@ class PlaylistSubscription:
|
|||
thumb = ThumbManager(playlist_id, item_type="playlist")
|
||||
thumb.download_playlist_thumb(url)
|
||||
|
||||
# notify
|
||||
message = {
|
||||
"status": "message:subplaylist",
|
||||
"level": "info",
|
||||
"title": "Subscribing to Playlists",
|
||||
"message": f"Processing {idx + 1} of {len(new_playlists)}",
|
||||
}
|
||||
RedisArchivist().set_message(
|
||||
"message:subplaylist", message=message, expire=True
|
||||
)
|
||||
if self.task:
|
||||
self.task.send_progress(
|
||||
message_lines=[
|
||||
f"Processing {idx + 1} of {len(new_playlists)}"
|
||||
],
|
||||
progress=(idx + 1) / len(new_playlists),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def channel_validate(channel_id):
|
||||
|
@ -228,14 +235,18 @@ class PlaylistSubscription:
|
|||
def find_missing(self):
|
||||
"""find videos in subscribed playlists not downloaded yet"""
|
||||
all_playlists = [i["playlist_id"] for i in self.get_playlists()]
|
||||
if not all_playlists:
|
||||
return False
|
||||
|
||||
to_ignore = self.get_to_ignore()
|
||||
|
||||
missing_videos = []
|
||||
total = len(all_playlists)
|
||||
for idx, playlist_id in enumerate(all_playlists):
|
||||
size_limit = self.config["subscriptions"]["channel_size"]
|
||||
playlist = YoutubePlaylist(playlist_id)
|
||||
playlist.update_playlist()
|
||||
if not playlist:
|
||||
is_active = playlist.update_playlist()
|
||||
if not is_active:
|
||||
playlist.deactivate()
|
||||
continue
|
||||
|
||||
|
@ -245,19 +256,134 @@ class PlaylistSubscription:
|
|||
|
||||
all_missing = [i for i in playlist_entries if not i["downloaded"]]
|
||||
|
||||
message = {
|
||||
"status": "message:rescan",
|
||||
"level": "info",
|
||||
"title": "Scanning playlists: Looking for new videos.",
|
||||
"message": f"Progress: {idx + 1}/{len(all_playlists)}",
|
||||
}
|
||||
RedisArchivist().set_message(
|
||||
"message:rescan", message=message, expire=True
|
||||
)
|
||||
|
||||
for video in all_missing:
|
||||
youtube_id = video["youtube_id"]
|
||||
if youtube_id not in to_ignore:
|
||||
missing_videos.append(youtube_id)
|
||||
|
||||
if not self.task:
|
||||
continue
|
||||
|
||||
if self.task:
|
||||
self.task.send_progress(
|
||||
message_lines=[f"Scanning Playlists {idx + 1}/{total}"],
|
||||
progress=(idx + 1) / total,
|
||||
)
|
||||
if self.task.is_stopped():
|
||||
self.task.send_progress(["Received Stop signal."])
|
||||
break
|
||||
|
||||
return missing_videos
|
||||
|
||||
|
||||
class SubscriptionScanner:
|
||||
"""add missing videos to queue"""
|
||||
|
||||
def __init__(self, task=False):
|
||||
self.task = task
|
||||
self.missing_videos = False
|
||||
self.auto_start = AppConfig().config["subscriptions"].get("auto_start")
|
||||
|
||||
def scan(self):
|
||||
"""scan channels and playlists"""
|
||||
if self.task:
|
||||
self.task.send_progress(["Rescanning channels and playlists."])
|
||||
|
||||
self.missing_videos = []
|
||||
self.scan_channels()
|
||||
if self.task and not self.task.is_stopped():
|
||||
self.scan_playlists()
|
||||
|
||||
return self.missing_videos
|
||||
|
||||
def scan_channels(self):
|
||||
"""get missing from channels"""
|
||||
channel_handler = ChannelSubscription(task=self.task)
|
||||
missing = channel_handler.find_missing()
|
||||
if not missing:
|
||||
return
|
||||
|
||||
for vid_id, vid_type in missing:
|
||||
self.missing_videos.append(
|
||||
{"type": "video", "vid_type": vid_type, "url": vid_id}
|
||||
)
|
||||
|
||||
def scan_playlists(self):
|
||||
"""get missing from playlists"""
|
||||
playlist_handler = PlaylistSubscription(task=self.task)
|
||||
missing = playlist_handler.find_missing()
|
||||
if not missing:
|
||||
return
|
||||
|
||||
for i in missing:
|
||||
self.missing_videos.append(
|
||||
{
|
||||
"type": "video",
|
||||
"vid_type": VideoTypeEnum.VIDEOS.value,
|
||||
"url": i,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
class SubscriptionHandler:
|
||||
"""subscribe to channels and playlists from url_str"""
|
||||
|
||||
def __init__(self, url_str, task=False):
|
||||
self.url_str = url_str
|
||||
self.task = task
|
||||
self.to_subscribe = False
|
||||
|
||||
def subscribe(self, expected_type=False):
|
||||
"""subscribe to url_str items"""
|
||||
if self.task:
|
||||
self.task.send_progress(["Processing form content."])
|
||||
self.to_subscribe = Parser(self.url_str).parse()
|
||||
|
||||
total = len(self.to_subscribe)
|
||||
for idx, item in enumerate(self.to_subscribe):
|
||||
if self.task:
|
||||
self._notify(idx, item, total)
|
||||
|
||||
self.subscribe_type(item, expected_type=expected_type)
|
||||
|
||||
def subscribe_type(self, item, expected_type):
|
||||
"""process single item"""
|
||||
if item["type"] == "playlist":
|
||||
if expected_type and expected_type != "playlist":
|
||||
raise TypeError(
|
||||
f"expected {expected_type} url but got {item.get('type')}"
|
||||
)
|
||||
|
||||
PlaylistSubscription().process_url_str([item])
|
||||
return
|
||||
|
||||
if item["type"] == "video":
|
||||
# extract channel id from video
|
||||
vid = queue.PendingList().get_youtube_details(item["url"])
|
||||
channel_id = vid["channel_id"]
|
||||
elif item["type"] == "channel":
|
||||
channel_id = item["url"]
|
||||
else:
|
||||
raise ValueError("failed to subscribe to: " + item["url"])
|
||||
|
||||
if expected_type and expected_type != "channel":
|
||||
raise TypeError(
|
||||
f"expected {expected_type} url but got {item.get('type')}"
|
||||
)
|
||||
|
||||
self._subscribe(channel_id)
|
||||
|
||||
def _subscribe(self, channel_id):
|
||||
"""subscribe to channel"""
|
||||
ChannelSubscription().change_subscribe(
|
||||
channel_id, channel_subscribed=True
|
||||
)
|
||||
|
||||
def _notify(self, idx, item, total):
|
||||
"""send notification message to redis"""
|
||||
subscribe_type = item["type"].title()
|
||||
message_lines = [
|
||||
f"Subscribe to {subscribe_type}",
|
||||
f"Progress: {idx + 1}/{total}",
|
||||
]
|
||||
self.task.send_progress(message_lines, progress=(idx + 1) / total)
|
||||
|
|
|
@ -10,9 +10,8 @@ from io import BytesIO
|
|||
from time import sleep
|
||||
|
||||
import requests
|
||||
from home.src.download import queue # partial import
|
||||
from home.src.es.connect import IndexPaginate
|
||||
from home.src.ta.config import AppConfig
|
||||
from home.src.es.connect import ElasticWrap, IndexPaginate
|
||||
from home.src.ta.settings import EnvironmentSettings
|
||||
from mutagen.mp4 import MP4, MP4Cover
|
||||
from PIL import Image, ImageFile, ImageFilter, UnidentifiedImageError
|
||||
|
||||
|
@ -22,8 +21,7 @@ ImageFile.LOAD_TRUNCATED_IMAGES = True
|
|||
class ThumbManagerBase:
|
||||
"""base class for thumbnail management"""
|
||||
|
||||
CONFIG = AppConfig().config
|
||||
CACHE_DIR = CONFIG["application"]["cache_dir"]
|
||||
CACHE_DIR = EnvironmentSettings.CACHE_DIR
|
||||
VIDEO_DIR = os.path.join(CACHE_DIR, "videos")
|
||||
CHANNEL_DIR = os.path.join(CACHE_DIR, "channels")
|
||||
PLAYLIST_DIR = os.path.join(CACHE_DIR, "playlists")
|
||||
|
@ -43,33 +41,41 @@ class ThumbManagerBase:
|
|||
response = requests.get(url, stream=True, timeout=5)
|
||||
if response.ok:
|
||||
try:
|
||||
return Image.open(response.raw)
|
||||
except UnidentifiedImageError:
|
||||
img = Image.open(response.raw)
|
||||
if isinstance(img, Image.Image):
|
||||
return img
|
||||
return self.get_fallback()
|
||||
|
||||
except (UnidentifiedImageError, OSError):
|
||||
print(f"failed to open thumbnail: {url}")
|
||||
return self.get_fallback()
|
||||
|
||||
if response.status_code == 404:
|
||||
return self.get_fallback()
|
||||
|
||||
except requests.exceptions.RequestException:
|
||||
except (
|
||||
requests.exceptions.RequestException,
|
||||
requests.exceptions.ReadTimeout,
|
||||
):
|
||||
print(f"{self.item_id}: retry thumbnail download {url}")
|
||||
sleep((i + 1) ** i)
|
||||
|
||||
return False
|
||||
return self.get_fallback()
|
||||
|
||||
def get_fallback(self):
|
||||
"""get fallback thumbnail if not available"""
|
||||
print(f"{self.item_id}: failed to extract thumbnail, use fallback")
|
||||
if self.fallback:
|
||||
img_raw = Image.open(self.fallback)
|
||||
return img_raw
|
||||
|
||||
app_root = self.CONFIG["application"]["app_root"]
|
||||
app_root = EnvironmentSettings.APP_DIR
|
||||
default_map = {
|
||||
"video": os.path.join(
|
||||
app_root, "static/img/default-video-thumb.jpg"
|
||||
),
|
||||
"playlist": os.path.join(
|
||||
app_root, "static/img/default-video-thumb.jpg"
|
||||
app_root, "static/img/default-playlist-thumb.jpg"
|
||||
),
|
||||
"icon": os.path.join(
|
||||
app_root, "static/img/default-channel-icon.jpg"
|
||||
|
@ -77,6 +83,9 @@ class ThumbManagerBase:
|
|||
"banner": os.path.join(
|
||||
app_root, "static/img/default-channel-banner.jpg"
|
||||
),
|
||||
"tvart": os.path.join(
|
||||
app_root, "static/img/default-channel-art.jpg"
|
||||
),
|
||||
}
|
||||
|
||||
img_raw = Image.open(default_map[self.item_type])
|
||||
|
@ -145,9 +154,10 @@ class ThumbManager(ThumbManagerBase):
|
|||
|
||||
def download_channel_art(self, urls, skip_existing=False):
|
||||
"""pass tuple of channel thumbnails"""
|
||||
channel_thumb, channel_banner = urls
|
||||
channel_thumb, channel_banner, channel_tv = urls
|
||||
self._download_channel_thumb(channel_thumb, skip_existing)
|
||||
self._download_channel_banner(channel_banner, skip_existing)
|
||||
self._download_channel_tv(channel_tv, skip_existing)
|
||||
|
||||
def _download_channel_thumb(self, channel_thumb, skip_existing):
|
||||
"""download channel thumbnail"""
|
||||
|
@ -176,13 +186,34 @@ class ThumbManager(ThumbManagerBase):
|
|||
img_raw = self.download_raw(channel_banner)
|
||||
img_raw.convert("RGB").save(banner_path)
|
||||
|
||||
def _download_channel_tv(self, channel_tv, skip_existing):
|
||||
"""download channel tv art"""
|
||||
art_path = os.path.join(self.CHANNEL_DIR, self.item_id + "_tvart.jpg")
|
||||
self.item_type = "tvart"
|
||||
if skip_existing and os.path.exists(art_path):
|
||||
return
|
||||
|
||||
img_raw = self.download_raw(channel_tv)
|
||||
img_raw.convert("RGB").save(art_path)
|
||||
|
||||
def download_playlist_thumb(self, url, skip_existing=False):
|
||||
"""pass thumbnail url"""
|
||||
thumb_path = os.path.join(self.PLAYLIST_DIR, f"{self.item_id}.jpg")
|
||||
if skip_existing and os.path.exists(thumb_path):
|
||||
return
|
||||
|
||||
img_raw = self.download_raw(url)
|
||||
img_raw = (
|
||||
self.download_raw(url)
|
||||
if not isinstance(url, str) or url.startswith("http")
|
||||
else Image.open(os.path.join(self.CACHE_DIR, url))
|
||||
)
|
||||
width, height = img_raw.size
|
||||
|
||||
if not width / height == 16 / 9:
|
||||
new_height = width / 16 * 9
|
||||
offset = (height - new_height) / 2
|
||||
img_raw = img_raw.crop((0, offset, width, height - offset))
|
||||
img_raw = img_raw.resize((336, 189))
|
||||
img_raw.convert("RGB").save(thumb_path)
|
||||
|
||||
def delete_video_thumb(self):
|
||||
|
@ -225,9 +256,10 @@ class ThumbManager(ThumbManagerBase):
|
|||
class ValidatorCallback:
|
||||
"""handle callback validate thumbnails page by page"""
|
||||
|
||||
def __init__(self, source, index_name):
|
||||
def __init__(self, source, index_name, counter=0):
|
||||
self.source = source
|
||||
self.index_name = index_name
|
||||
self.counter = counter
|
||||
|
||||
def run(self):
|
||||
"""run the task for page"""
|
||||
|
@ -252,6 +284,7 @@ class ValidatorCallback:
|
|||
urls = (
|
||||
channel["_source"]["channel_thumb_url"],
|
||||
channel["_source"]["channel_banner_url"],
|
||||
channel["_source"].get("channel_tvart_url", False),
|
||||
)
|
||||
handler = ThumbManager(channel["_source"]["channel_id"])
|
||||
handler.download_channel_art(urls, skip_existing=True)
|
||||
|
@ -267,102 +300,122 @@ class ValidatorCallback:
|
|||
class ThumbValidator:
|
||||
"""validate thumbnails"""
|
||||
|
||||
def download_missing(self):
|
||||
"""download all missing artwork"""
|
||||
self.download_missing_videos()
|
||||
self.download_missing_channels()
|
||||
self.download_missing_playlists()
|
||||
|
||||
def download_missing_videos(self):
|
||||
"""get all missing video thumbnails"""
|
||||
data = {
|
||||
"query": {"term": {"active": {"value": True}}},
|
||||
"sort": [{"youtube_id": {"order": "asc"}}],
|
||||
"_source": ["vid_thumb_url", "youtube_id"],
|
||||
}
|
||||
paginate = IndexPaginate(
|
||||
"ta_video", data, size=5000, callback=ValidatorCallback
|
||||
)
|
||||
_ = paginate.get_results()
|
||||
|
||||
def download_missing_channels(self):
|
||||
"""get all missing channel thumbnails"""
|
||||
data = {
|
||||
"query": {"term": {"channel_active": {"value": True}}},
|
||||
"sort": [{"channel_id": {"order": "asc"}}],
|
||||
"_source": {
|
||||
"excludes": ["channel_description", "channel_overwrites"]
|
||||
INDEX = [
|
||||
{
|
||||
"data": {
|
||||
"query": {"term": {"active": {"value": True}}},
|
||||
"_source": ["vid_thumb_url", "youtube_id"],
|
||||
},
|
||||
}
|
||||
paginate = IndexPaginate(
|
||||
"ta_channel", data, callback=ValidatorCallback
|
||||
)
|
||||
_ = paginate.get_results()
|
||||
"name": "ta_video",
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"query": {"term": {"channel_active": {"value": True}}},
|
||||
"_source": {
|
||||
"excludes": ["channel_description", "channel_overwrites"]
|
||||
},
|
||||
},
|
||||
"name": "ta_channel",
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"query": {"term": {"playlist_active": {"value": True}}},
|
||||
"_source": ["playlist_id", "playlist_thumbnail"],
|
||||
},
|
||||
"name": "ta_playlist",
|
||||
},
|
||||
]
|
||||
|
||||
def download_missing_playlists(self):
|
||||
"""get all missing playlist artwork"""
|
||||
data = {
|
||||
"query": {"term": {"playlist_active": {"value": True}}},
|
||||
"sort": [{"playlist_id": {"order": "asc"}}],
|
||||
"_source": ["playlist_id", "playlist_thumbnail"],
|
||||
}
|
||||
paginate = IndexPaginate(
|
||||
"ta_playlist", data, callback=ValidatorCallback
|
||||
)
|
||||
_ = paginate.get_results()
|
||||
def __init__(self, task):
|
||||
self.task = task
|
||||
|
||||
def validate(self):
|
||||
"""validate all indexes"""
|
||||
for index in self.INDEX:
|
||||
total = self._get_total(index["name"])
|
||||
if not total:
|
||||
continue
|
||||
|
||||
paginate = IndexPaginate(
|
||||
index_name=index["name"],
|
||||
data=index["data"],
|
||||
size=1000,
|
||||
callback=ValidatorCallback,
|
||||
task=self.task,
|
||||
total=total,
|
||||
)
|
||||
_ = paginate.get_results()
|
||||
|
||||
@staticmethod
|
||||
def _get_total(index_name):
|
||||
"""get total documents in index"""
|
||||
path = f"{index_name}/_count"
|
||||
response, _ = ElasticWrap(path).get()
|
||||
|
||||
return response.get("count")
|
||||
|
||||
|
||||
class ThumbFilesystem:
|
||||
"""filesystem tasks for thumbnails"""
|
||||
"""sync thumbnail files to media files"""
|
||||
|
||||
CONFIG = AppConfig().config
|
||||
CACHE_DIR = CONFIG["application"]["cache_dir"]
|
||||
MEDIA_DIR = CONFIG["application"]["videos"]
|
||||
VIDEO_DIR = os.path.join(CACHE_DIR, "videos")
|
||||
INDEX_NAME = "ta_video"
|
||||
|
||||
def sync(self):
|
||||
"""embed thumbnails to mediafiles"""
|
||||
video_list = self.get_thumb_list()
|
||||
self._embed_thumbs(video_list)
|
||||
def __init__(self, task=False):
|
||||
self.task = task
|
||||
|
||||
def get_thumb_list(self):
|
||||
"""get list of mediafiles and matching thumbnails"""
|
||||
pending = queue.PendingList()
|
||||
pending.get_download()
|
||||
pending.get_indexed()
|
||||
def embed(self):
|
||||
"""entry point"""
|
||||
data = {
|
||||
"query": {"match_all": {}},
|
||||
"_source": ["media_url", "youtube_id"],
|
||||
}
|
||||
paginate = IndexPaginate(
|
||||
index_name=self.INDEX_NAME,
|
||||
data=data,
|
||||
size=200,
|
||||
callback=EmbedCallback,
|
||||
task=self.task,
|
||||
total=self._get_total(),
|
||||
)
|
||||
_ = paginate.get_results()
|
||||
|
||||
video_list = []
|
||||
for video in pending.all_videos:
|
||||
video_id = video["youtube_id"]
|
||||
media_url = os.path.join(self.MEDIA_DIR, video["media_url"])
|
||||
def _get_total(self):
|
||||
"""get total documents in index"""
|
||||
path = f"{self.INDEX_NAME}/_count"
|
||||
response, _ = ElasticWrap(path).get()
|
||||
|
||||
return response.get("count")
|
||||
|
||||
|
||||
class EmbedCallback:
|
||||
"""callback class to embed thumbnails"""
|
||||
|
||||
CACHE_DIR = EnvironmentSettings.CACHE_DIR
|
||||
MEDIA_DIR = EnvironmentSettings.MEDIA_DIR
|
||||
FORMAT = MP4Cover.FORMAT_JPEG
|
||||
|
||||
def __init__(self, source, index_name, counter=0):
|
||||
self.source = source
|
||||
self.index_name = index_name
|
||||
self.counter = counter
|
||||
|
||||
def run(self):
|
||||
"""run embed"""
|
||||
for video in self.source:
|
||||
video_id = video["_source"]["youtube_id"]
|
||||
media_url = os.path.join(
|
||||
self.MEDIA_DIR, video["_source"]["media_url"]
|
||||
)
|
||||
thumb_path = os.path.join(
|
||||
self.CACHE_DIR, ThumbManager(video_id).vid_thumb_path()
|
||||
)
|
||||
video_list.append(
|
||||
{
|
||||
"media_url": media_url,
|
||||
"thumb_path": thumb_path,
|
||||
}
|
||||
)
|
||||
if os.path.exists(thumb_path):
|
||||
self.embed(media_url, thumb_path)
|
||||
|
||||
return video_list
|
||||
def embed(self, media_url, thumb_path):
|
||||
"""embed thumb in single media file"""
|
||||
video = MP4(media_url)
|
||||
with open(thumb_path, "rb") as f:
|
||||
video["covr"] = [MP4Cover(f.read(), imageformat=self.FORMAT)]
|
||||
|
||||
@staticmethod
|
||||
def _embed_thumbs(video_list):
|
||||
"""rewrite the thumbnail into media file"""
|
||||
|
||||
counter = 1
|
||||
for video in video_list:
|
||||
# loop through all videos
|
||||
media_url = video["media_url"]
|
||||
thumb_path = video["thumb_path"]
|
||||
|
||||
mutagen_vid = MP4(media_url)
|
||||
with open(thumb_path, "rb") as f:
|
||||
mutagen_vid["covr"] = [
|
||||
MP4Cover(f.read(), imageformat=MP4Cover.FORMAT_JPEG)
|
||||
]
|
||||
mutagen_vid.save()
|
||||
if counter % 50 == 0:
|
||||
print(f"thumbnail write progress {counter}/{len(video_list)}")
|
||||
counter = counter + 1
|
||||
video.save()
|
||||
|
|
|
@ -10,6 +10,7 @@ from http import cookiejar
|
|||
from io import StringIO
|
||||
|
||||
import yt_dlp
|
||||
from home.src.ta.settings import EnvironmentSettings
|
||||
from home.src.ta.ta_redis import RedisArchivist
|
||||
|
||||
|
||||
|
@ -20,8 +21,9 @@ class YtWrap:
|
|||
"default_search": "ytsearch",
|
||||
"quiet": True,
|
||||
"check_formats": "selected",
|
||||
"socket_timeout": 3,
|
||||
"socket_timeout": 10,
|
||||
"extractor_retries": 3,
|
||||
"retries": 10,
|
||||
}
|
||||
|
||||
def __init__(self, obs_request, config=False):
|
||||
|
@ -47,21 +49,33 @@ class YtWrap:
|
|||
with yt_dlp.YoutubeDL(self.obs) as ydl:
|
||||
try:
|
||||
ydl.download([url])
|
||||
except yt_dlp.utils.DownloadError:
|
||||
print(f"{url}: failed to download.")
|
||||
return False
|
||||
except yt_dlp.utils.DownloadError as err:
|
||||
print(f"{url}: failed to download with message {err}")
|
||||
if "Temporary failure in name resolution" in str(err):
|
||||
raise ConnectionError("lost the internet, abort!") from err
|
||||
|
||||
return True
|
||||
return False, str(err)
|
||||
|
||||
return True, True
|
||||
|
||||
def extract(self, url):
|
||||
"""make extract request"""
|
||||
try:
|
||||
response = yt_dlp.YoutubeDL(self.obs).extract_info(url)
|
||||
except cookiejar.LoadError:
|
||||
print("cookie file is invalid")
|
||||
except cookiejar.LoadError as err:
|
||||
print(f"cookie file is invalid: {err}")
|
||||
return False
|
||||
except (yt_dlp.utils.ExtractorError, yt_dlp.utils.DownloadError):
|
||||
print(f"{url}: failed to get info from youtube")
|
||||
except yt_dlp.utils.ExtractorError as err:
|
||||
print(f"{url}: failed to extract with message: {err}, continue...")
|
||||
return False
|
||||
except yt_dlp.utils.DownloadError as err:
|
||||
if "This channel does not have a" in str(err):
|
||||
return False
|
||||
|
||||
print(f"{url}: failed to get info from youtube with message {err}")
|
||||
if "Temporary failure in name resolution" in str(err):
|
||||
raise ConnectionError("lost the internet, abort!") from err
|
||||
|
||||
return False
|
||||
|
||||
return response
|
||||
|
@ -73,6 +87,7 @@ class CookieHandler:
|
|||
def __init__(self, config):
|
||||
self.cookie_io = False
|
||||
self.config = config
|
||||
self.cache_dir = EnvironmentSettings.CACHE_DIR
|
||||
|
||||
def get(self):
|
||||
"""get cookie io stream"""
|
||||
|
@ -82,8 +97,9 @@ class CookieHandler:
|
|||
|
||||
def import_cookie(self):
|
||||
"""import cookie from file"""
|
||||
cache_path = self.config["application"]["cache_dir"]
|
||||
import_path = os.path.join(cache_path, "import", "cookies.google.txt")
|
||||
import_path = os.path.join(
|
||||
self.cache_dir, "import", "cookies.google.txt"
|
||||
)
|
||||
|
||||
try:
|
||||
with open(import_path, encoding="utf-8") as cookie_file:
|
||||
|
@ -98,10 +114,10 @@ class CookieHandler:
|
|||
print("cookie: import successful")
|
||||
|
||||
def set_cookie(self, cookie):
|
||||
"""set cookie str and activate in cofig"""
|
||||
RedisArchivist().set_message("cookie", cookie)
|
||||
"""set cookie str and activate in config"""
|
||||
RedisArchivist().set_message("cookie", cookie, save=True)
|
||||
path = ".downloads.cookie_import"
|
||||
RedisArchivist().set_message("config", True, path=path)
|
||||
RedisArchivist().set_message("config", True, path=path, save=True)
|
||||
self.config["downloads"]["cookie_import"] = True
|
||||
print("cookie: activated and stored in Redis")
|
||||
|
||||
|
|
|
@ -6,14 +6,13 @@ functionality:
|
|||
- move to archive
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
|
||||
from home.src.download.queue import PendingList
|
||||
from home.src.download.subscriptions import PlaylistSubscription
|
||||
from home.src.download.yt_dlp_base import CookieHandler, YtWrap
|
||||
from home.src.download.yt_dlp_base import YtWrap
|
||||
from home.src.es.connect import ElasticWrap, IndexPaginate
|
||||
from home.src.index.channel import YoutubeChannel
|
||||
from home.src.index.comments import CommentList
|
||||
|
@ -21,8 +20,8 @@ from home.src.index.playlist import YoutubePlaylist
|
|||
from home.src.index.video import YoutubeVideo, index_new_video
|
||||
from home.src.index.video_constants import VideoTypeEnum
|
||||
from home.src.ta.config import AppConfig
|
||||
from home.src.ta.helper import clean_string, ignore_filelist
|
||||
from home.src.ta.ta_redis import RedisArchivist, RedisQueue
|
||||
from home.src.ta.helper import ignore_filelist
|
||||
from home.src.ta.settings import EnvironmentSettings
|
||||
|
||||
|
||||
class DownloadPostProcess:
|
||||
|
@ -51,7 +50,7 @@ class DownloadPostProcess:
|
|||
return
|
||||
|
||||
print(f"auto delete older than {autodelete_days} days")
|
||||
now_lte = self.now - autodelete_days * 24 * 60 * 60
|
||||
now_lte = str(self.now - autodelete_days * 24 * 60 * 60)
|
||||
data = {
|
||||
"query": {"range": {"player.watched_date": {"lte": now_lte}}},
|
||||
"sort": [{"player.watched_date": {"order": "asc"}}],
|
||||
|
@ -64,7 +63,7 @@ class DownloadPostProcess:
|
|||
if "autodelete_days" in value:
|
||||
autodelete_days = value.get("autodelete_days")
|
||||
print(f"{channel_id}: delete older than {autodelete_days}d")
|
||||
now_lte = self.now - autodelete_days * 24 * 60 * 60
|
||||
now_lte = str(self.now - autodelete_days * 24 * 60 * 60)
|
||||
must_list = [
|
||||
{"range": {"player.watched_date": {"lte": now_lte}}},
|
||||
{"term": {"channel.channel_id": {"value": channel_id}}},
|
||||
|
@ -96,7 +95,7 @@ class DownloadPostProcess:
|
|||
def validate_playlists(self):
|
||||
"""look for playlist needing to update"""
|
||||
for id_c, channel_id in enumerate(self.download.channels):
|
||||
channel = YoutubeChannel(channel_id)
|
||||
channel = YoutubeChannel(channel_id, task=self.download.task)
|
||||
overwrites = self.pending.channel_overwrites.get(channel_id, False)
|
||||
if overwrites and overwrites.get("index_playlists"):
|
||||
# validate from remote
|
||||
|
@ -125,28 +124,22 @@ class DownloadPostProcess:
|
|||
|
||||
def _notify_playlist_progress(self, all_channel_playlist, id_c, id_p):
|
||||
"""notify to UI"""
|
||||
title = (
|
||||
"Processing playlists for channels: "
|
||||
+ f"{id_c + 1}/{len(self.download.channels)}"
|
||||
)
|
||||
message = f"Progress: {id_p + 1}/{len(all_channel_playlist)}"
|
||||
key = "message:download"
|
||||
mess_dict = {
|
||||
"status": key,
|
||||
"level": "info",
|
||||
"title": title,
|
||||
"message": message,
|
||||
}
|
||||
if id_p + 1 == len(all_channel_playlist):
|
||||
expire = 4
|
||||
else:
|
||||
expire = True
|
||||
if not self.download.task:
|
||||
return
|
||||
|
||||
RedisArchivist().set_message(key, mess_dict, expire=expire)
|
||||
total_channel = len(self.download.channels)
|
||||
total_playlist = len(all_channel_playlist)
|
||||
|
||||
message = [
|
||||
f"Post Processing Channels: {id_c}/{total_channel}",
|
||||
f"Validate Playlists {id_p + 1}/{total_playlist}",
|
||||
]
|
||||
progress = (id_c + 1) / total_channel
|
||||
self.download.task.send_progress(message, progress=progress)
|
||||
|
||||
def get_comments(self):
|
||||
"""get comments from youtube"""
|
||||
CommentList(self.download.videos).index(notify=True)
|
||||
CommentList(self.download.videos, task=self.download.task).index()
|
||||
|
||||
|
||||
class VideoDownloader:
|
||||
|
@ -155,159 +148,114 @@ class VideoDownloader:
|
|||
if not initiated with list, take from queue
|
||||
"""
|
||||
|
||||
MSG = "message:download"
|
||||
|
||||
def __init__(self, youtube_id_list=False):
|
||||
def __init__(self, youtube_id_list=False, task=False):
|
||||
self.obs = False
|
||||
self.video_overwrites = False
|
||||
self.youtube_id_list = youtube_id_list
|
||||
self.task = task
|
||||
self.config = AppConfig().config
|
||||
self.cache_dir = EnvironmentSettings.CACHE_DIR
|
||||
self.media_dir = EnvironmentSettings.MEDIA_DIR
|
||||
self._build_obs()
|
||||
self.channels = set()
|
||||
self.videos = set()
|
||||
|
||||
def run_queue(self):
|
||||
def run_queue(self, auto_only=False):
|
||||
"""setup download queue in redis loop until no more items"""
|
||||
self._setup_queue()
|
||||
|
||||
queue = RedisQueue(queue_name="dl_queue")
|
||||
|
||||
limit_queue = self.config["downloads"]["limit_count"]
|
||||
if limit_queue:
|
||||
queue.trim(limit_queue - 1)
|
||||
|
||||
self._get_overwrites()
|
||||
while True:
|
||||
youtube_data = queue.get_next()
|
||||
if not youtube_data:
|
||||
video_data = self._get_next(auto_only)
|
||||
if self.task.is_stopped() or not video_data:
|
||||
self._reset_auto()
|
||||
break
|
||||
|
||||
try:
|
||||
youtube_data = json.loads(youtube_data)
|
||||
except json.JSONDecodeError: # This many not be necessary
|
||||
continue
|
||||
|
||||
youtube_id = youtube_data.get("youtube_id")
|
||||
|
||||
tmp_vid_type = youtube_data.get(
|
||||
"vid_type", VideoTypeEnum.VIDEOS.value
|
||||
)
|
||||
video_type = VideoTypeEnum(tmp_vid_type)
|
||||
print(f"Downloading type: {video_type}")
|
||||
youtube_id = video_data.get("youtube_id")
|
||||
print(f"{youtube_id}: Downloading video")
|
||||
self._notify(video_data, "Validate download format")
|
||||
|
||||
success = self._dl_single_vid(youtube_id)
|
||||
if not success:
|
||||
continue
|
||||
|
||||
mess_dict = {
|
||||
"status": self.MSG,
|
||||
"level": "info",
|
||||
"title": "Indexing....",
|
||||
"message": "Add video metadata to index.",
|
||||
}
|
||||
RedisArchivist().set_message(self.MSG, mess_dict, expire=60)
|
||||
self._notify(video_data, "Add video metadata to index", progress=1)
|
||||
|
||||
vid_dict = index_new_video(
|
||||
youtube_id,
|
||||
video_overwrites=self.video_overwrites,
|
||||
video_type=video_type,
|
||||
video_type=VideoTypeEnum(video_data["vid_type"]),
|
||||
)
|
||||
self.channels.add(vid_dict["channel"]["channel_id"])
|
||||
self.videos.add(vid_dict["youtube_id"])
|
||||
mess_dict = {
|
||||
"status": self.MSG,
|
||||
"level": "info",
|
||||
"title": "Moving....",
|
||||
"message": "Moving downloaded file to storage folder",
|
||||
}
|
||||
RedisArchivist().set_message(self.MSG, mess_dict)
|
||||
|
||||
if queue.has_item():
|
||||
message = "Continue with next video."
|
||||
else:
|
||||
message = "Download queue is finished."
|
||||
|
||||
self._notify(video_data, "Move downloaded file to archive")
|
||||
self.move_to_archive(vid_dict)
|
||||
mess_dict = {
|
||||
"status": self.MSG,
|
||||
"level": "info",
|
||||
"title": "Completed",
|
||||
"message": message,
|
||||
}
|
||||
RedisArchivist().set_message(self.MSG, mess_dict, expire=10)
|
||||
self._delete_from_pending(youtube_id)
|
||||
|
||||
# post processing
|
||||
self._add_subscribed_channels()
|
||||
DownloadPostProcess(self).run()
|
||||
|
||||
def _setup_queue(self):
|
||||
"""setup required and validate"""
|
||||
if self.config["downloads"]["cookie_import"]:
|
||||
valid = CookieHandler(self.config).validate()
|
||||
if not valid:
|
||||
return
|
||||
return self.videos
|
||||
|
||||
def _notify(self, video_data, message, progress=False):
|
||||
"""send progress notification to task"""
|
||||
if not self.task:
|
||||
return
|
||||
|
||||
typ = VideoTypeEnum(video_data["vid_type"]).value.rstrip("s").title()
|
||||
title = video_data.get("title")
|
||||
self.task.send_progress(
|
||||
[f"Processing {typ}: {title}", message], progress=progress
|
||||
)
|
||||
|
||||
def _get_next(self, auto_only):
|
||||
"""get next item in queue"""
|
||||
must_list = [{"term": {"status": {"value": "pending"}}}]
|
||||
must_not_list = [{"exists": {"field": "message"}}]
|
||||
if auto_only:
|
||||
must_list.append({"term": {"auto_start": {"value": True}}})
|
||||
|
||||
data = {
|
||||
"size": 1,
|
||||
"query": {"bool": {"must": must_list, "must_not": must_not_list}},
|
||||
"sort": [
|
||||
{"auto_start": {"order": "desc"}},
|
||||
{"timestamp": {"order": "asc"}},
|
||||
],
|
||||
}
|
||||
path = "ta_download/_search"
|
||||
response, _ = ElasticWrap(path).get(data=data)
|
||||
if not response["hits"]["hits"]:
|
||||
return False
|
||||
|
||||
return response["hits"]["hits"][0]["_source"]
|
||||
|
||||
def _get_overwrites(self):
|
||||
"""get channel overwrites"""
|
||||
pending = PendingList()
|
||||
pending.get_download()
|
||||
pending.get_channels()
|
||||
self.video_overwrites = pending.video_overwrites
|
||||
|
||||
def add_pending(self):
|
||||
"""add pending videos to download queue"""
|
||||
mess_dict = {
|
||||
"status": self.MSG,
|
||||
"level": "info",
|
||||
"title": "Looking for videos to download",
|
||||
"message": "Scanning your download queue.",
|
||||
}
|
||||
RedisArchivist().set_message(self.MSG, mess_dict, expire=True)
|
||||
pending = PendingList()
|
||||
pending.get_download()
|
||||
to_add = [
|
||||
json.dumps(
|
||||
{
|
||||
"youtube_id": i["youtube_id"],
|
||||
# Using .value in default val to match what would be
|
||||
# decoded when parsing json if not set
|
||||
"vid_type": i.get("vid_type", VideoTypeEnum.VIDEOS.value),
|
||||
}
|
||||
)
|
||||
for i in pending.all_pending
|
||||
]
|
||||
if not to_add:
|
||||
# there is nothing pending
|
||||
print("download queue is empty")
|
||||
mess_dict = {
|
||||
"status": self.MSG,
|
||||
"level": "error",
|
||||
"title": "Download queue is empty",
|
||||
"message": "Add some videos to the queue first.",
|
||||
}
|
||||
RedisArchivist().set_message(self.MSG, mess_dict, expire=True)
|
||||
return
|
||||
|
||||
RedisQueue(queue_name="dl_queue").add_list(to_add)
|
||||
|
||||
def _progress_hook(self, response):
|
||||
"""process the progress_hooks from yt_dlp"""
|
||||
title = "Downloading: " + response["info_dict"]["title"]
|
||||
|
||||
progress = False
|
||||
try:
|
||||
size = response.get("_total_bytes_str")
|
||||
if size.strip() == "N/A":
|
||||
size = response.get("_total_bytes_estimate_str", "N/A")
|
||||
|
||||
percent = response["_percent_str"]
|
||||
size = response["_total_bytes_str"]
|
||||
progress = float(percent.strip("%")) / 100
|
||||
speed = response["_speed_str"]
|
||||
eta = response["_eta_str"]
|
||||
message = f"{percent} of {size} at {speed} - time left: {eta}"
|
||||
except KeyError:
|
||||
message = "processing"
|
||||
|
||||
mess_dict = {
|
||||
"status": self.MSG,
|
||||
"level": "info",
|
||||
"title": title,
|
||||
"message": message,
|
||||
}
|
||||
RedisArchivist().set_message(self.MSG, mess_dict, expire=True)
|
||||
if self.task:
|
||||
title = response["info_dict"]["title"]
|
||||
self.task.send_progress([title, message], progress=progress)
|
||||
|
||||
def _build_obs(self):
|
||||
"""collection to build all obs passed to yt-dlp"""
|
||||
|
@ -318,27 +266,23 @@ class VideoDownloader:
|
|||
def _build_obs_basic(self):
|
||||
"""initial obs"""
|
||||
self.obs = {
|
||||
"default_search": "ytsearch",
|
||||
"merge_output_format": "mp4",
|
||||
"outtmpl": (
|
||||
self.config["application"]["cache_dir"]
|
||||
+ "/download/%(id)s.mp4"
|
||||
),
|
||||
"outtmpl": (self.cache_dir + "/download/%(id)s.mp4"),
|
||||
"progress_hooks": [self._progress_hook],
|
||||
"noprogress": True,
|
||||
"quiet": True,
|
||||
"continuedl": True,
|
||||
"retries": 3,
|
||||
"writethumbnail": False,
|
||||
"noplaylist": True,
|
||||
"check_formats": "selected",
|
||||
"socket_timeout": 3,
|
||||
}
|
||||
|
||||
def _build_obs_user(self):
|
||||
"""build user customized options"""
|
||||
if self.config["downloads"]["format"]:
|
||||
self.obs["format"] = self.config["downloads"]["format"]
|
||||
if self.config["downloads"]["format_sort"]:
|
||||
format_sort = self.config["downloads"]["format_sort"]
|
||||
format_sort_list = [i.strip() for i in format_sort.split(",")]
|
||||
self.obs["format_sort"] = format_sort_list
|
||||
if self.config["downloads"]["limit_speed"]:
|
||||
self.obs["ratelimit"] = (
|
||||
self.config["downloads"]["limit_speed"] * 1024
|
||||
|
@ -398,7 +342,7 @@ class VideoDownloader:
|
|||
if format_overwrite:
|
||||
obs["format"] = format_overwrite
|
||||
|
||||
dl_cache = self.config["application"]["cache_dir"] + "/download/"
|
||||
dl_cache = self.cache_dir + "/download/"
|
||||
|
||||
# check if already in cache to continue from there
|
||||
all_cached = ignore_filelist(os.listdir(dl_cache))
|
||||
|
@ -406,7 +350,9 @@ class VideoDownloader:
|
|||
if youtube_id in file_name:
|
||||
obs["outtmpl"] = os.path.join(dl_cache, file_name)
|
||||
|
||||
success = YtWrap(obs, self.config).download(youtube_id)
|
||||
success, message = YtWrap(obs, self.config).download(youtube_id)
|
||||
if not success:
|
||||
self._handle_error(youtube_id, message)
|
||||
|
||||
if self.obs["writethumbnail"]:
|
||||
# webp files don't get cleaned up automatically
|
||||
|
@ -418,29 +364,28 @@ class VideoDownloader:
|
|||
|
||||
return success
|
||||
|
||||
@staticmethod
|
||||
def _handle_error(youtube_id, message):
|
||||
"""store error message"""
|
||||
data = {"doc": {"message": message}}
|
||||
_, _ = ElasticWrap(f"ta_download/_update/{youtube_id}").post(data=data)
|
||||
|
||||
def move_to_archive(self, vid_dict):
|
||||
"""move downloaded video from cache to archive"""
|
||||
videos = self.config["application"]["videos"]
|
||||
host_uid = self.config["application"]["HOST_UID"]
|
||||
host_gid = self.config["application"]["HOST_GID"]
|
||||
channel_name = clean_string(vid_dict["channel"]["channel_name"])
|
||||
if len(channel_name) <= 3:
|
||||
# fall back to channel id
|
||||
channel_name = vid_dict["channel"]["channel_id"]
|
||||
# make archive folder with correct permissions
|
||||
new_folder = os.path.join(videos, channel_name)
|
||||
if not os.path.exists(new_folder):
|
||||
os.makedirs(new_folder)
|
||||
host_uid = EnvironmentSettings.HOST_UID
|
||||
host_gid = EnvironmentSettings.HOST_GID
|
||||
# make folder
|
||||
folder = os.path.join(
|
||||
self.media_dir, vid_dict["channel"]["channel_id"]
|
||||
)
|
||||
if not os.path.exists(folder):
|
||||
os.makedirs(folder)
|
||||
if host_uid and host_gid:
|
||||
os.chown(new_folder, host_uid, host_gid)
|
||||
# find real filename
|
||||
cache_dir = self.config["application"]["cache_dir"]
|
||||
all_cached = ignore_filelist(os.listdir(cache_dir + "/download/"))
|
||||
for file_str in all_cached:
|
||||
if vid_dict["youtube_id"] in file_str:
|
||||
old_file = file_str
|
||||
old_path = os.path.join(cache_dir, "download", old_file)
|
||||
new_path = os.path.join(videos, vid_dict["media_url"])
|
||||
os.chown(folder, host_uid, host_gid)
|
||||
# move media file
|
||||
media_file = vid_dict["youtube_id"] + ".mp4"
|
||||
old_path = os.path.join(self.cache_dir, "download", media_file)
|
||||
new_path = os.path.join(self.media_dir, vid_dict["media_url"])
|
||||
# move media file and fix permission
|
||||
shutil.move(old_path, new_path, copy_function=shutil.copyfile)
|
||||
if host_uid and host_gid:
|
||||
|
@ -449,7 +394,7 @@ class VideoDownloader:
|
|||
@staticmethod
|
||||
def _delete_from_pending(youtube_id):
|
||||
"""delete downloaded video from pending index if its there"""
|
||||
path = f"ta_download/_doc/{youtube_id}"
|
||||
path = f"ta_download/_doc/{youtube_id}?refresh=true"
|
||||
_, _ = ElasticWrap(path).delete()
|
||||
|
||||
def _add_subscribed_channels(self):
|
||||
|
@ -463,3 +408,18 @@ class VideoDownloader:
|
|||
self.channels.add(channel_id)
|
||||
|
||||
return
|
||||
|
||||
def _reset_auto(self):
|
||||
"""reset autostart to defaults after queue stop"""
|
||||
path = "ta_download/_update_by_query"
|
||||
data = {
|
||||
"query": {"term": {"auto_start": {"value": True}}},
|
||||
"script": {
|
||||
"source": "ctx._source.auto_start = false",
|
||||
"lang": "painless",
|
||||
},
|
||||
}
|
||||
response, _ = ElasticWrap(path).post(data=data)
|
||||
updated = response.get("updated")
|
||||
if updated:
|
||||
print(f"[download] reset auto start on {updated} videos.")
|
||||
|
|
|
@ -13,17 +13,22 @@ from datetime import datetime
|
|||
from home.src.es.connect import ElasticWrap, IndexPaginate
|
||||
from home.src.ta.config import AppConfig
|
||||
from home.src.ta.helper import get_mapping, ignore_filelist
|
||||
from home.src.ta.settings import EnvironmentSettings
|
||||
|
||||
|
||||
class ElasticBackup:
|
||||
"""dump index to nd-json files for later bulk import"""
|
||||
|
||||
def __init__(self, reason=False):
|
||||
INDEX_SPLIT = ["comment"]
|
||||
CACHE_DIR = EnvironmentSettings.CACHE_DIR
|
||||
BACKUP_DIR = os.path.join(CACHE_DIR, "backup")
|
||||
|
||||
def __init__(self, reason=False, task=False):
|
||||
self.config = AppConfig().config
|
||||
self.cache_dir = self.config["application"]["cache_dir"]
|
||||
self.timestamp = datetime.now().strftime("%Y%m%d")
|
||||
self.index_config = get_mapping()
|
||||
self.reason = reason
|
||||
self.task = task
|
||||
|
||||
def backup_all_indexes(self):
|
||||
"""backup all indexes, add reason to init"""
|
||||
|
@ -31,6 +36,8 @@ class ElasticBackup:
|
|||
if not self.reason:
|
||||
raise ValueError("missing backup reason in ElasticBackup")
|
||||
|
||||
if self.task:
|
||||
self.task.send_progress(["Scanning your index."])
|
||||
for index in self.index_config:
|
||||
index_name = index["index_name"]
|
||||
print(f"backup: export in progress for {index_name}")
|
||||
|
@ -40,33 +47,46 @@ class ElasticBackup:
|
|||
|
||||
self.backup_index(index_name)
|
||||
|
||||
if self.task:
|
||||
self.task.send_progress(["Compress files to zip archive."])
|
||||
self.zip_it()
|
||||
if self.reason == "auto":
|
||||
self.rotate_backup()
|
||||
|
||||
@staticmethod
|
||||
def backup_index(index_name):
|
||||
def backup_index(self, index_name):
|
||||
"""export all documents of a single index"""
|
||||
data = {
|
||||
"query": {"match_all": {}},
|
||||
"sort": [{"_doc": {"order": "desc"}}],
|
||||
paginate_kwargs = {
|
||||
"data": {"query": {"match_all": {}}},
|
||||
"keep_source": True,
|
||||
"callback": BackupCallback,
|
||||
"task": self.task,
|
||||
"total": self._get_total(index_name),
|
||||
}
|
||||
paginate = IndexPaginate(
|
||||
f"ta_{index_name}", data, keep_source=True, callback=BackupCallback
|
||||
)
|
||||
|
||||
if index_name in self.INDEX_SPLIT:
|
||||
paginate_kwargs.update({"size": 200})
|
||||
|
||||
paginate = IndexPaginate(f"ta_{index_name}", **paginate_kwargs)
|
||||
_ = paginate.get_results()
|
||||
|
||||
@staticmethod
|
||||
def _get_total(index_name):
|
||||
"""get total documents in index"""
|
||||
path = f"ta_{index_name}/_count"
|
||||
response, _ = ElasticWrap(path).get()
|
||||
|
||||
return response.get("count")
|
||||
|
||||
def zip_it(self):
|
||||
"""pack it up into single zip file"""
|
||||
file_name = f"ta_backup-{self.timestamp}-{self.reason}.zip"
|
||||
folder = os.path.join(self.cache_dir, "backup")
|
||||
|
||||
to_backup = []
|
||||
for file in os.listdir(folder):
|
||||
for file in os.listdir(self.BACKUP_DIR):
|
||||
if file.endswith(".json"):
|
||||
to_backup.append(os.path.join(folder, file))
|
||||
to_backup.append(os.path.join(self.BACKUP_DIR, file))
|
||||
|
||||
backup_file = os.path.join(folder, file_name)
|
||||
backup_file = os.path.join(self.BACKUP_DIR, file_name)
|
||||
|
||||
comp = zipfile.ZIP_DEFLATED
|
||||
with zipfile.ZipFile(backup_file, "w", compression=comp) as zip_f:
|
||||
|
@ -79,7 +99,7 @@ class ElasticBackup:
|
|||
|
||||
def post_bulk_restore(self, file_name):
|
||||
"""send bulk to es"""
|
||||
file_path = os.path.join(self.cache_dir, file_name)
|
||||
file_path = os.path.join(self.CACHE_DIR, file_name)
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
data = f.read()
|
||||
|
||||
|
@ -90,9 +110,7 @@ class ElasticBackup:
|
|||
|
||||
def get_all_backup_files(self):
|
||||
"""build all available backup files for view"""
|
||||
backup_dir = os.path.join(self.cache_dir, "backup")
|
||||
backup_files = os.listdir(backup_dir)
|
||||
all_backup_files = ignore_filelist(backup_files)
|
||||
all_backup_files = ignore_filelist(os.listdir(self.BACKUP_DIR))
|
||||
all_available_backups = [
|
||||
i
|
||||
for i in all_backup_files
|
||||
|
@ -101,24 +119,36 @@ class ElasticBackup:
|
|||
all_available_backups.sort(reverse=True)
|
||||
|
||||
backup_dicts = []
|
||||
for backup_file in all_available_backups:
|
||||
file_split = backup_file.split("-")
|
||||
if len(file_split) == 2:
|
||||
timestamp = file_split[1].strip(".zip")
|
||||
reason = False
|
||||
elif len(file_split) == 3:
|
||||
timestamp = file_split[1]
|
||||
reason = file_split[2].strip(".zip")
|
||||
|
||||
to_add = {
|
||||
"filename": backup_file,
|
||||
"timestamp": timestamp,
|
||||
"reason": reason,
|
||||
}
|
||||
backup_dicts.append(to_add)
|
||||
for filename in all_available_backups:
|
||||
data = self.build_backup_file_data(filename)
|
||||
backup_dicts.append(data)
|
||||
|
||||
return backup_dicts
|
||||
|
||||
def build_backup_file_data(self, filename):
|
||||
"""build metadata of single backup file"""
|
||||
file_path = os.path.join(self.BACKUP_DIR, filename)
|
||||
if not os.path.exists(file_path):
|
||||
return False
|
||||
|
||||
file_split = filename.split("-")
|
||||
if len(file_split) == 2:
|
||||
timestamp = file_split[1].strip(".zip")
|
||||
reason = False
|
||||
elif len(file_split) == 3:
|
||||
timestamp = file_split[1]
|
||||
reason = file_split[2].strip(".zip")
|
||||
|
||||
data = {
|
||||
"filename": filename,
|
||||
"file_path": file_path,
|
||||
"file_size": os.path.getsize(file_path),
|
||||
"timestamp": timestamp,
|
||||
"reason": reason,
|
||||
}
|
||||
|
||||
return data
|
||||
|
||||
def restore(self, filename):
|
||||
"""
|
||||
restore from backup zip file
|
||||
|
@ -129,22 +159,19 @@ class ElasticBackup:
|
|||
|
||||
def _unpack_zip_backup(self, filename):
|
||||
"""extract backup zip and return filelist"""
|
||||
backup_dir = os.path.join(self.cache_dir, "backup")
|
||||
file_path = os.path.join(backup_dir, filename)
|
||||
file_path = os.path.join(self.BACKUP_DIR, filename)
|
||||
|
||||
with zipfile.ZipFile(file_path, "r") as z:
|
||||
zip_content = z.namelist()
|
||||
z.extractall(backup_dir)
|
||||
z.extractall(self.BACKUP_DIR)
|
||||
|
||||
return zip_content
|
||||
|
||||
def _restore_json_files(self, zip_content):
|
||||
"""go through the unpacked files and restore"""
|
||||
backup_dir = os.path.join(self.cache_dir, "backup")
|
||||
|
||||
for json_f in zip_content:
|
||||
|
||||
file_name = os.path.join(backup_dir, json_f)
|
||||
for idx, json_f in enumerate(zip_content):
|
||||
self._notify_restore(idx, json_f, len(zip_content))
|
||||
file_name = os.path.join(self.BACKUP_DIR, json_f)
|
||||
|
||||
if not json_f.startswith("es_") or not json_f.endswith(".json"):
|
||||
os.remove(file_name)
|
||||
|
@ -154,6 +181,12 @@ class ElasticBackup:
|
|||
self.post_bulk_restore(file_name)
|
||||
os.remove(file_name)
|
||||
|
||||
def _notify_restore(self, idx, json_f, total_files):
|
||||
"""notify restore progress"""
|
||||
message = [f"Restore index from json backup file {json_f}."]
|
||||
progress = (idx + 1) / total_files
|
||||
self.task.send_progress(message_lines=message, progress=progress)
|
||||
|
||||
@staticmethod
|
||||
def index_exists(index_name):
|
||||
"""check if index already exists to skip"""
|
||||
|
@ -175,22 +208,32 @@ class ElasticBackup:
|
|||
print("no backup files to rotate")
|
||||
return
|
||||
|
||||
backup_dir = os.path.join(self.cache_dir, "backup")
|
||||
|
||||
all_to_delete = auto[rotate:]
|
||||
for to_delete in all_to_delete:
|
||||
file_path = os.path.join(backup_dir, to_delete["filename"])
|
||||
print(f"remove old backup file: {file_path}")
|
||||
os.remove(file_path)
|
||||
self.delete_file(to_delete["filename"])
|
||||
|
||||
def delete_file(self, filename):
|
||||
"""delete backup file"""
|
||||
file_path = os.path.join(self.BACKUP_DIR, filename)
|
||||
if not os.path.exists(file_path):
|
||||
print(f"backup file not found: {filename}")
|
||||
return False
|
||||
|
||||
print(f"remove old backup file: {file_path}")
|
||||
os.remove(file_path)
|
||||
|
||||
return file_path
|
||||
|
||||
|
||||
class BackupCallback:
|
||||
"""handle backup ndjson writer as callback for IndexPaginate"""
|
||||
|
||||
def __init__(self, source, index_name):
|
||||
def __init__(self, source, index_name, counter=0):
|
||||
self.source = source
|
||||
self.index_name = index_name
|
||||
self.counter = counter
|
||||
self.timestamp = datetime.now().strftime("%Y%m%d")
|
||||
self.cache_dir = EnvironmentSettings.CACHE_DIR
|
||||
|
||||
def run(self):
|
||||
"""run the junk task"""
|
||||
|
@ -217,8 +260,8 @@ class BackupCallback:
|
|||
|
||||
def _write_es_json(self, file_content):
|
||||
"""write nd-json file for es _bulk API to disk"""
|
||||
cache_dir = AppConfig().config["application"]["cache_dir"]
|
||||
file_name = f"es_{self.index_name.lstrip('ta_')}-{self.timestamp}.json"
|
||||
file_path = os.path.join(cache_dir, "backup", file_name)
|
||||
index = self.index_name.lstrip("ta_")
|
||||
file_name = f"es_{index}-{self.timestamp}-{self.counter}.json"
|
||||
file_path = os.path.join(self.cache_dir, "backup", file_name)
|
||||
with open(file_path, "a+", encoding="utf-8") as f:
|
||||
f.write(file_content)
|
||||
|
|
|
@ -4,10 +4,14 @@ functionality:
|
|||
- reusable search_after to extract total index
|
||||
"""
|
||||
|
||||
# pylint: disable=missing-timeout
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
from home.src.ta.config import AppConfig
|
||||
import urllib3
|
||||
from home.src.ta.settings import EnvironmentSettings
|
||||
|
||||
|
||||
class ElasticWrap:
|
||||
|
@ -15,69 +19,94 @@ class ElasticWrap:
|
|||
returns response json and status code tuple
|
||||
"""
|
||||
|
||||
def __init__(self, path, config=False):
|
||||
self.url = False
|
||||
self.auth = False
|
||||
self.path = path
|
||||
self.config = config
|
||||
self._get_config()
|
||||
def __init__(self, path: str):
|
||||
self.url: str = f"{EnvironmentSettings.ES_URL}/{path}"
|
||||
self.auth: tuple[str, str] = (
|
||||
EnvironmentSettings.ES_USER,
|
||||
EnvironmentSettings.ES_PASS,
|
||||
)
|
||||
|
||||
def _get_config(self):
|
||||
"""add config if not passed"""
|
||||
if not self.config:
|
||||
self.config = AppConfig().config
|
||||
if EnvironmentSettings.ES_DISABLE_VERIFY_SSL:
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
es_url = self.config["application"]["es_url"]
|
||||
self.auth = self.config["application"]["es_auth"]
|
||||
self.url = f"{es_url}/{self.path}"
|
||||
|
||||
def get(self, data=False):
|
||||
def get(
|
||||
self,
|
||||
data: bool | dict = False,
|
||||
timeout: int = 10,
|
||||
print_error: bool = True,
|
||||
) -> tuple[dict, int]:
|
||||
"""get data from es"""
|
||||
|
||||
kwargs: dict[str, Any] = {
|
||||
"auth": self.auth,
|
||||
"timeout": timeout,
|
||||
}
|
||||
|
||||
if EnvironmentSettings.ES_DISABLE_VERIFY_SSL:
|
||||
kwargs["verify"] = False
|
||||
|
||||
if data:
|
||||
response = requests.get(
|
||||
self.url, json=data, auth=self.auth, timeout=10
|
||||
)
|
||||
else:
|
||||
response = requests.get(self.url, auth=self.auth, timeout=10)
|
||||
if not response.ok:
|
||||
kwargs["json"] = data
|
||||
|
||||
response = requests.get(self.url, **kwargs)
|
||||
|
||||
if print_error and not response.ok:
|
||||
print(response.text)
|
||||
|
||||
return response.json(), response.status_code
|
||||
|
||||
def post(self, data=False, ndjson=False):
|
||||
def post(
|
||||
self, data: bool | dict = False, ndjson: bool = False
|
||||
) -> tuple[dict, int]:
|
||||
"""post data to es"""
|
||||
if ndjson:
|
||||
headers = {"Content-type": "application/x-ndjson"}
|
||||
payload = data
|
||||
else:
|
||||
headers = {"Content-type": "application/json"}
|
||||
payload = json.dumps(data)
|
||||
|
||||
if data:
|
||||
response = requests.post(
|
||||
self.url,
|
||||
data=payload,
|
||||
headers=headers,
|
||||
auth=self.auth,
|
||||
timeout=10,
|
||||
kwargs: dict[str, Any] = {"auth": self.auth}
|
||||
|
||||
if ndjson and data:
|
||||
kwargs.update(
|
||||
{
|
||||
"headers": {"Content-type": "application/x-ndjson"},
|
||||
"data": data,
|
||||
}
|
||||
)
|
||||
else:
|
||||
response = requests.post(
|
||||
self.url, headers=headers, auth=self.auth, timeout=10
|
||||
elif data:
|
||||
kwargs.update(
|
||||
{
|
||||
"headers": {"Content-type": "application/json"},
|
||||
"data": json.dumps(data),
|
||||
}
|
||||
)
|
||||
|
||||
if EnvironmentSettings.ES_DISABLE_VERIFY_SSL:
|
||||
kwargs["verify"] = False
|
||||
|
||||
response = requests.post(self.url, **kwargs)
|
||||
|
||||
if not response.ok:
|
||||
print(response.text)
|
||||
|
||||
return response.json(), response.status_code
|
||||
|
||||
def put(self, data, refresh=False):
|
||||
def put(
|
||||
self,
|
||||
data: bool | dict = False,
|
||||
refresh: bool = False,
|
||||
) -> tuple[dict, Any]:
|
||||
"""put data to es"""
|
||||
|
||||
if refresh:
|
||||
self.url = f"{self.url}/?refresh=true"
|
||||
response = requests.put(
|
||||
f"{self.url}", json=data, auth=self.auth, timeout=10
|
||||
)
|
||||
|
||||
kwargs: dict[str, Any] = {
|
||||
"json": data,
|
||||
"auth": self.auth,
|
||||
}
|
||||
|
||||
if EnvironmentSettings.ES_DISABLE_VERIFY_SSL:
|
||||
kwargs["verify"] = False
|
||||
|
||||
response = requests.put(self.url, **kwargs)
|
||||
|
||||
if not response.ok:
|
||||
print(response.text)
|
||||
print(data)
|
||||
|
@ -85,16 +114,25 @@ class ElasticWrap:
|
|||
|
||||
return response.json(), response.status_code
|
||||
|
||||
def delete(self, data=False, refresh=False):
|
||||
def delete(
|
||||
self,
|
||||
data: bool | dict = False,
|
||||
refresh: bool = False,
|
||||
) -> tuple[dict, Any]:
|
||||
"""delete document from es"""
|
||||
|
||||
if refresh:
|
||||
self.url = f"{self.url}/?refresh=true"
|
||||
|
||||
kwargs: dict[str, Any] = {"auth": self.auth}
|
||||
|
||||
if data:
|
||||
response = requests.delete(
|
||||
self.url, json=data, auth=self.auth, timeout=10
|
||||
)
|
||||
else:
|
||||
response = requests.delete(self.url, auth=self.auth, timeout=10)
|
||||
kwargs["json"] = data
|
||||
|
||||
if EnvironmentSettings.ES_DISABLE_VERIFY_SSL:
|
||||
kwargs["verify"] = False
|
||||
|
||||
response = requests.delete(self.url, **kwargs)
|
||||
|
||||
if not response.ok:
|
||||
print(response.text)
|
||||
|
@ -106,8 +144,10 @@ class IndexPaginate:
|
|||
"""use search_after to go through whole index
|
||||
kwargs:
|
||||
- size: int, overwrite DEFAULT_SIZE
|
||||
- keep_source: bool, keep _source key from es resutls
|
||||
- callback: obj, Class with run method collback for every loop
|
||||
- keep_source: bool, keep _source key from es results
|
||||
- callback: obj, Class implementing run method callback for every loop
|
||||
- task: task object to send notification
|
||||
- total: int, total items in index for progress message
|
||||
"""
|
||||
|
||||
DEFAULT_SIZE = 500
|
||||
|
@ -116,12 +156,10 @@ class IndexPaginate:
|
|||
self.index_name = index_name
|
||||
self.data = data
|
||||
self.pit_id = False
|
||||
self.size = kwargs.get("size")
|
||||
self.keep_source = kwargs.get("keep_source")
|
||||
self.callback = kwargs.get("callback")
|
||||
self.kwargs = kwargs
|
||||
|
||||
def get_results(self):
|
||||
"""get all results"""
|
||||
"""get all results, add task and total for notifications"""
|
||||
self.get_pit()
|
||||
self.validate_data()
|
||||
all_results = self.run_loop()
|
||||
|
@ -136,10 +174,16 @@ class IndexPaginate:
|
|||
|
||||
def validate_data(self):
|
||||
"""add pit and size to data"""
|
||||
if not self.data:
|
||||
self.data = {}
|
||||
|
||||
if "query" not in self.data.keys():
|
||||
self.data.update({"query": {"match_all": {}}})
|
||||
|
||||
if "sort" not in self.data.keys():
|
||||
self.data.update({"sort": [{"_doc": {"order": "desc"}}]})
|
||||
|
||||
self.data["size"] = self.size or self.DEFAULT_SIZE
|
||||
self.data["size"] = self.kwargs.get("size") or self.DEFAULT_SIZE
|
||||
self.data["pit"] = {"id": self.pit_id, "keep_alive": "10m"}
|
||||
|
||||
def run_loop(self):
|
||||
|
@ -149,30 +193,39 @@ class IndexPaginate:
|
|||
while True:
|
||||
response, _ = ElasticWrap("_search").get(data=self.data)
|
||||
all_hits = response["hits"]["hits"]
|
||||
if all_hits:
|
||||
for hit in all_hits:
|
||||
if self.keep_source:
|
||||
source = hit
|
||||
else:
|
||||
source = hit["_source"]
|
||||
|
||||
if not self.callback:
|
||||
all_results.append(source)
|
||||
|
||||
if self.callback:
|
||||
self.callback(all_hits, self.index_name).run()
|
||||
if counter % 10 == 0:
|
||||
print(f"{self.index_name}: processing page {counter}")
|
||||
counter = counter + 1
|
||||
|
||||
# update search_after with last hit data
|
||||
self.data["search_after"] = all_hits[-1]["sort"]
|
||||
else:
|
||||
if not all_hits:
|
||||
break
|
||||
|
||||
for hit in all_hits:
|
||||
if self.kwargs.get("keep_source"):
|
||||
all_results.append(hit)
|
||||
else:
|
||||
all_results.append(hit["_source"])
|
||||
|
||||
if self.kwargs.get("callback"):
|
||||
self.kwargs.get("callback")(
|
||||
all_hits, self.index_name, counter=counter
|
||||
).run()
|
||||
|
||||
if self.kwargs.get("task"):
|
||||
print(f"{self.index_name}: processing page {counter}")
|
||||
self._notify(len(all_results))
|
||||
|
||||
counter += 1
|
||||
|
||||
# update search_after with last hit data
|
||||
self.data["search_after"] = all_hits[-1]["sort"]
|
||||
|
||||
return all_results
|
||||
|
||||
def _notify(self, processed):
|
||||
"""send notification on task"""
|
||||
total = self.kwargs.get("total")
|
||||
progress = processed / total
|
||||
index_clean = self.index_name.lstrip("ta_").title()
|
||||
message = [f"Processing {index_clean}s {processed}/{total}"]
|
||||
self.kwargs.get("task").send_progress(message, progress=progress)
|
||||
|
||||
def clean_pit(self):
|
||||
"""delete pit from elastic search"""
|
||||
data = {"id": self.pit_id}
|
||||
ElasticWrap("_pit").delete(data=data)
|
||||
ElasticWrap("_pit").delete(data={"id": self.pit_id})
|
||||
|
|
|
@ -1,5 +1,17 @@
|
|||
{
|
||||
"index_config": [{
|
||||
"index_name": "config",
|
||||
"expected_map": {
|
||||
"config": {
|
||||
"type": "object",
|
||||
"enabled": false
|
||||
}
|
||||
},
|
||||
"expected_set": {
|
||||
"number_of_replicas": "0"
|
||||
}
|
||||
},
|
||||
{
|
||||
"index_name": "channel",
|
||||
"expected_map": {
|
||||
"channel_id": {
|
||||
|
@ -37,7 +49,18 @@
|
|||
"type": "text"
|
||||
},
|
||||
"channel_last_refresh": {
|
||||
"type": "date"
|
||||
"type": "date",
|
||||
"format": "epoch_second"
|
||||
},
|
||||
"channel_tags": {
|
||||
"type": "text",
|
||||
"analyzer": "english",
|
||||
"fields": {
|
||||
"keyword": {
|
||||
"type": "keyword",
|
||||
"ignore_above": 256
|
||||
}
|
||||
}
|
||||
},
|
||||
"channel_overwrites": {
|
||||
"properties": {
|
||||
|
@ -51,7 +74,7 @@
|
|||
"type": "boolean"
|
||||
},
|
||||
"integrate_sponsorblock": {
|
||||
"type" : "boolean"
|
||||
"type": "boolean"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -80,7 +103,8 @@
|
|||
"index": false
|
||||
},
|
||||
"date_downloaded": {
|
||||
"type": "date"
|
||||
"type": "date",
|
||||
"format": "epoch_second"
|
||||
},
|
||||
"channel": {
|
||||
"properties": {
|
||||
|
@ -119,7 +143,18 @@
|
|||
"type": "text"
|
||||
},
|
||||
"channel_last_refresh": {
|
||||
"type": "date"
|
||||
"type": "date",
|
||||
"format": "epoch_second"
|
||||
},
|
||||
"channel_tags": {
|
||||
"type": "text",
|
||||
"analyzer": "english",
|
||||
"fields": {
|
||||
"keyword": {
|
||||
"type": "keyword",
|
||||
"ignore_above": 256
|
||||
}
|
||||
}
|
||||
},
|
||||
"channel_overwrites": {
|
||||
"properties": {
|
||||
|
@ -133,7 +168,7 @@
|
|||
"type": "boolean"
|
||||
},
|
||||
"integrate_sponsorblock": {
|
||||
"type" : "boolean"
|
||||
"type": "boolean"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -146,6 +181,9 @@
|
|||
"type": "keyword",
|
||||
"index": false
|
||||
},
|
||||
"media_size": {
|
||||
"type": "long"
|
||||
},
|
||||
"tags": {
|
||||
"type": "text",
|
||||
"analyzer": "english",
|
||||
|
@ -173,7 +211,8 @@
|
|||
}
|
||||
},
|
||||
"vid_last_refresh": {
|
||||
"type": "date"
|
||||
"type": "date",
|
||||
"format": "epoch_second"
|
||||
},
|
||||
"youtube_id": {
|
||||
"type": "keyword"
|
||||
|
@ -197,19 +236,37 @@
|
|||
"comment_count": {
|
||||
"type": "long"
|
||||
},
|
||||
"stats" : {
|
||||
"properties" : {
|
||||
"average_rating" : {
|
||||
"type" : "float"
|
||||
"stats": {
|
||||
"properties": {
|
||||
"average_rating": {
|
||||
"type": "float"
|
||||
},
|
||||
"dislike_count" : {
|
||||
"type" : "long"
|
||||
"dislike_count": {
|
||||
"type": "long"
|
||||
},
|
||||
"like_count" : {
|
||||
"type" : "long"
|
||||
"like_count": {
|
||||
"type": "long"
|
||||
},
|
||||
"view_count" : {
|
||||
"type" : "long"
|
||||
"view_count": {
|
||||
"type": "long"
|
||||
}
|
||||
}
|
||||
},
|
||||
"player": {
|
||||
"properties": {
|
||||
"duration": {
|
||||
"type": "long"
|
||||
},
|
||||
"duration_str": {
|
||||
"type": "keyword",
|
||||
"index": false
|
||||
},
|
||||
"watched": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"watched_date": {
|
||||
"type": "date",
|
||||
"format": "epoch_second"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
@ -239,10 +296,35 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"streams": {
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "keyword",
|
||||
"index": false
|
||||
},
|
||||
"index": {
|
||||
"type": "short",
|
||||
"index": false
|
||||
},
|
||||
"codec": {
|
||||
"type": "text"
|
||||
},
|
||||
"width": {
|
||||
"type": "short"
|
||||
},
|
||||
"height": {
|
||||
"type": "short"
|
||||
},
|
||||
"bitrate": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"sponsorblock": {
|
||||
"properties": {
|
||||
"last_refresh": {
|
||||
"type": "date"
|
||||
"type": "date",
|
||||
"format": "epoch_second"
|
||||
},
|
||||
"has_unlocked": {
|
||||
"type": "boolean"
|
||||
|
@ -250,28 +332,28 @@
|
|||
"is_enabled": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"segments" : {
|
||||
"properties" : {
|
||||
"UUID" : {
|
||||
"segments": {
|
||||
"properties": {
|
||||
"UUID": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"actionType" : {
|
||||
"actionType": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"category" : {
|
||||
"category": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"locked" : {
|
||||
"type" : "short"
|
||||
"locked": {
|
||||
"type": "short"
|
||||
},
|
||||
"segment" : {
|
||||
"type" : "float"
|
||||
"segment": {
|
||||
"type": "float"
|
||||
},
|
||||
"videoDuration" : {
|
||||
"type" : "float"
|
||||
"videoDuration": {
|
||||
"type": "float"
|
||||
},
|
||||
"votes" : {
|
||||
"type" : "long"
|
||||
"votes": {
|
||||
"type": "long"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -294,7 +376,8 @@
|
|||
"index_name": "download",
|
||||
"expected_map": {
|
||||
"timestamp": {
|
||||
"type": "date"
|
||||
"type": "date",
|
||||
"format": "epoch_second"
|
||||
},
|
||||
"channel_id": {
|
||||
"type": "keyword"
|
||||
|
@ -330,6 +413,12 @@
|
|||
},
|
||||
"vid_type": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"auto_start": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"message": {
|
||||
"type": "text"
|
||||
}
|
||||
},
|
||||
"expected_set": {
|
||||
|
@ -386,7 +475,43 @@
|
|||
"type": "keyword"
|
||||
},
|
||||
"playlist_last_refresh": {
|
||||
"type": "date"
|
||||
"type": "date",
|
||||
"format": "epoch_second"
|
||||
},
|
||||
"playlist_entries": {
|
||||
"properties": {
|
||||
"downloaded": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"idx": {
|
||||
"type": "long"
|
||||
},
|
||||
"title": {
|
||||
"type": "text",
|
||||
"analyzer": "english",
|
||||
"fields": {
|
||||
"keyword": {
|
||||
"type": "keyword",
|
||||
"ignore_above": 256,
|
||||
"normalizer": "to_lower"
|
||||
}
|
||||
}
|
||||
},
|
||||
"uploader": {
|
||||
"type": "text",
|
||||
"analyzer": "english",
|
||||
"fields": {
|
||||
"keyword": {
|
||||
"type": "keyword",
|
||||
"ignore_above": 256,
|
||||
"normalizer": "to_lower"
|
||||
}
|
||||
}
|
||||
},
|
||||
"youtube_id": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"expected_set": {
|
||||
|
@ -440,10 +565,11 @@
|
|||
"type": "text"
|
||||
},
|
||||
"subtitle_last_refresh": {
|
||||
"type": "date"
|
||||
"type": "date",
|
||||
"format": "epoch_second"
|
||||
},
|
||||
"subtitle_index": {
|
||||
"type" : "long"
|
||||
"type": "long"
|
||||
},
|
||||
"subtitle_lang": {
|
||||
"type": "keyword"
|
||||
|
@ -452,7 +578,7 @@
|
|||
"type": "keyword"
|
||||
},
|
||||
"subtitle_line": {
|
||||
"type" : "text",
|
||||
"type": "text",
|
||||
"analyzer": "english"
|
||||
}
|
||||
},
|
||||
|
@ -475,7 +601,8 @@
|
|||
"type": "keyword"
|
||||
},
|
||||
"comment_last_refresh": {
|
||||
"type": "date"
|
||||
"type": "date",
|
||||
"format": "epoch_second"
|
||||
},
|
||||
"comment_channel_id": {
|
||||
"type": "keyword"
|
||||
|
@ -486,13 +613,14 @@
|
|||
"type": "keyword"
|
||||
},
|
||||
"comment_text": {
|
||||
"type" : "text"
|
||||
"type": "text"
|
||||
},
|
||||
"comment_timestamp": {
|
||||
"type": "date"
|
||||
"type": "date",
|
||||
"format": "epoch_second"
|
||||
},
|
||||
"comment_time_text": {
|
||||
"type" : "text"
|
||||
"type": "text"
|
||||
},
|
||||
"comment_likecount": {
|
||||
"type": "long"
|
||||
|
|
|
@ -4,12 +4,12 @@ functionality:
|
|||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from os import environ
|
||||
from time import sleep
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
from home.src.es.connect import ElasticWrap
|
||||
from home.src.ta.helper import get_mapping
|
||||
from home.src.ta.settings import EnvironmentSettings
|
||||
|
||||
|
||||
class ElasticSnapshot:
|
||||
|
@ -19,7 +19,7 @@ class ElasticSnapshot:
|
|||
REPO_SETTINGS = {
|
||||
"compress": "true",
|
||||
"chunk_size": "1g",
|
||||
"location": "/usr/share/elasticsearch/data/snapshot",
|
||||
"location": EnvironmentSettings.ES_SNAPSHOT_DIR,
|
||||
}
|
||||
POLICY = "ta_daily"
|
||||
|
||||
|
@ -254,7 +254,7 @@ class ElasticSnapshot:
|
|||
expected_format = "%Y-%m-%dT%H:%M:%S.%fZ"
|
||||
date = datetime.strptime(date_utc, expected_format)
|
||||
local_datetime = date.replace(tzinfo=ZoneInfo("localtime"))
|
||||
converted = local_datetime.astimezone(ZoneInfo(environ.get("TZ")))
|
||||
converted = local_datetime.astimezone(ZoneInfo(EnvironmentSettings.TZ))
|
||||
converted_str = converted.strftime("%Y-%m-%d %H:%M")
|
||||
|
||||
return converted_str
|
||||
|
|
|
@ -1,240 +0,0 @@
|
|||
"""
|
||||
Functionality:
|
||||
- collection of functions and tasks from frontend
|
||||
- called via user input
|
||||
"""
|
||||
|
||||
from home.src.download.subscriptions import (
|
||||
ChannelSubscription,
|
||||
PlaylistSubscription,
|
||||
)
|
||||
from home.src.index.playlist import YoutubePlaylist
|
||||
from home.src.ta.ta_redis import RedisArchivist, RedisQueue
|
||||
from home.src.ta.urlparser import Parser
|
||||
from home.tasks import (
|
||||
download_pending,
|
||||
index_channel_playlists,
|
||||
kill_dl,
|
||||
re_sync_thumbs,
|
||||
rescan_filesystem,
|
||||
run_backup,
|
||||
run_manual_import,
|
||||
run_restore_backup,
|
||||
subscribe_to,
|
||||
update_subscribed,
|
||||
)
|
||||
|
||||
|
||||
class PostData:
|
||||
"""
|
||||
map frontend http post values to backend funcs
|
||||
handover long running tasks to celery
|
||||
"""
|
||||
|
||||
def __init__(self, post_dict, current_user):
|
||||
self.post_dict = post_dict
|
||||
self.to_exec, self.exec_val = list(post_dict.items())[0]
|
||||
self.current_user = current_user
|
||||
|
||||
def run_task(self):
|
||||
"""execute and return task result"""
|
||||
to_exec = self.exec_map()
|
||||
task_result = to_exec()
|
||||
return task_result
|
||||
|
||||
def exec_map(self):
|
||||
"""map dict key and return function to execute"""
|
||||
exec_map = {
|
||||
"change_view": self._change_view,
|
||||
"change_grid": self._change_grid,
|
||||
"rescan_pending": self._rescan_pending,
|
||||
"dl_pending": self._dl_pending,
|
||||
"queue": self._queue_handler,
|
||||
"unsubscribe": self._unsubscribe,
|
||||
"subscribe": self._subscribe,
|
||||
"sort_order": self._sort_order,
|
||||
"hide_watched": self._hide_watched,
|
||||
"show_subed_only": self._show_subed_only,
|
||||
"show_ignored_only": self._show_ignored_only,
|
||||
"manual-import": self._manual_import,
|
||||
"re-embed": self._re_embed,
|
||||
"db-backup": self._db_backup,
|
||||
"db-restore": self._db_restore,
|
||||
"fs-rescan": self._fs_rescan,
|
||||
"delete-playlist": self._delete_playlist,
|
||||
"find-playlists": self._find_playlists,
|
||||
}
|
||||
|
||||
return exec_map[self.to_exec]
|
||||
|
||||
def _change_view(self):
|
||||
"""process view changes in home, channel, and downloads"""
|
||||
origin, new_view = self.exec_val.split(":")
|
||||
key = f"{self.current_user}:view:{origin}"
|
||||
print(f"change view: {key} to {new_view}")
|
||||
RedisArchivist().set_message(key, {"status": new_view})
|
||||
return {"success": True}
|
||||
|
||||
def _change_grid(self):
|
||||
"""process change items in grid"""
|
||||
grid_items = int(self.exec_val)
|
||||
grid_items = max(grid_items, 3)
|
||||
grid_items = min(grid_items, 7)
|
||||
|
||||
key = f"{self.current_user}:grid_items"
|
||||
print(f"change grid items: {grid_items}")
|
||||
RedisArchivist().set_message(key, {"status": grid_items})
|
||||
return {"success": True}
|
||||
|
||||
@staticmethod
|
||||
def _rescan_pending():
|
||||
"""look for new items in subscribed channels"""
|
||||
print("rescan subscribed channels")
|
||||
update_subscribed.delay()
|
||||
return {"success": True}
|
||||
|
||||
@staticmethod
|
||||
def _dl_pending():
|
||||
"""start the download queue"""
|
||||
print("download pending")
|
||||
running = download_pending.delay()
|
||||
task_id = running.id
|
||||
print(f"{task_id}: set task id")
|
||||
RedisArchivist().set_message("dl_queue_id", task_id)
|
||||
return {"success": True}
|
||||
|
||||
def _queue_handler(self):
|
||||
"""queue controls from frontend"""
|
||||
to_execute = self.exec_val
|
||||
if to_execute == "stop":
|
||||
print("stopping download queue")
|
||||
RedisQueue(queue_name="dl_queue").clear()
|
||||
elif to_execute == "kill":
|
||||
task_id = RedisArchivist().get_message("dl_queue_id")
|
||||
if not isinstance(task_id, str):
|
||||
task_id = False
|
||||
else:
|
||||
print("brutally killing " + task_id)
|
||||
kill_dl(task_id)
|
||||
|
||||
return {"success": True}
|
||||
|
||||
def _unsubscribe(self):
|
||||
"""unsubscribe from channels or playlists"""
|
||||
id_unsub = self.exec_val
|
||||
print(f"{id_unsub}: unsubscribe")
|
||||
to_unsub_list = Parser(id_unsub).parse()
|
||||
for to_unsub in to_unsub_list:
|
||||
unsub_type = to_unsub["type"]
|
||||
unsub_id = to_unsub["url"]
|
||||
if unsub_type == "playlist":
|
||||
PlaylistSubscription().change_subscribe(
|
||||
unsub_id, subscribe_status=False
|
||||
)
|
||||
elif unsub_type == "channel":
|
||||
ChannelSubscription().change_subscribe(
|
||||
unsub_id, channel_subscribed=False
|
||||
)
|
||||
else:
|
||||
raise ValueError("failed to process " + id_unsub)
|
||||
|
||||
return {"success": True}
|
||||
|
||||
def _subscribe(self):
|
||||
"""subscribe to channel or playlist, called from js buttons"""
|
||||
id_sub = self.exec_val
|
||||
print(f"{id_sub}: subscribe")
|
||||
subscribe_to.delay(id_sub)
|
||||
return {"success": True}
|
||||
|
||||
def _sort_order(self):
|
||||
"""change the sort between published to downloaded"""
|
||||
sort_order = {"status": self.exec_val}
|
||||
if self.exec_val in ["asc", "desc"]:
|
||||
RedisArchivist().set_message(
|
||||
f"{self.current_user}:sort_order", sort_order
|
||||
)
|
||||
else:
|
||||
RedisArchivist().set_message(
|
||||
f"{self.current_user}:sort_by", sort_order
|
||||
)
|
||||
return {"success": True}
|
||||
|
||||
def _hide_watched(self):
|
||||
"""toggle if to show watched vids or not"""
|
||||
key = f"{self.current_user}:hide_watched"
|
||||
message = {"status": bool(int(self.exec_val))}
|
||||
print(f"toggle {key}: {message}")
|
||||
RedisArchivist().set_message(key, message)
|
||||
return {"success": True}
|
||||
|
||||
def _show_subed_only(self):
|
||||
"""show or hide subscribed channels only on channels page"""
|
||||
key = f"{self.current_user}:show_subed_only"
|
||||
message = {"status": bool(int(self.exec_val))}
|
||||
print(f"toggle {key}: {message}")
|
||||
RedisArchivist().set_message(key, message)
|
||||
return {"success": True}
|
||||
|
||||
def _show_ignored_only(self):
|
||||
"""switch view on /downloads/ to show ignored only"""
|
||||
show_value = self.exec_val
|
||||
key = f"{self.current_user}:show_ignored_only"
|
||||
value = {"status": show_value}
|
||||
print(f"Filter download view ignored only: {show_value}")
|
||||
RedisArchivist().set_message(key, value)
|
||||
return {"success": True}
|
||||
|
||||
@staticmethod
|
||||
def _manual_import():
|
||||
"""run manual import from settings page"""
|
||||
print("starting manual import")
|
||||
run_manual_import.delay()
|
||||
return {"success": True}
|
||||
|
||||
@staticmethod
|
||||
def _re_embed():
|
||||
"""rewrite thumbnails into media files"""
|
||||
print("start video thumbnail embed process")
|
||||
re_sync_thumbs.delay()
|
||||
return {"success": True}
|
||||
|
||||
@staticmethod
|
||||
def _db_backup():
|
||||
"""backup es to zip from settings page"""
|
||||
print("backing up database")
|
||||
run_backup.delay("manual")
|
||||
return {"success": True}
|
||||
|
||||
def _db_restore(self):
|
||||
"""restore es zip from settings page"""
|
||||
print("restoring index from backup zip")
|
||||
filename = self.exec_val
|
||||
run_restore_backup.delay(filename)
|
||||
return {"success": True}
|
||||
|
||||
@staticmethod
|
||||
def _fs_rescan():
|
||||
"""start file system rescan task"""
|
||||
print("start filesystem scan")
|
||||
rescan_filesystem.delay()
|
||||
return {"success": True}
|
||||
|
||||
def _delete_playlist(self):
|
||||
"""delete playlist, only metadata or incl all videos"""
|
||||
playlist_dict = self.exec_val
|
||||
playlist_id = playlist_dict["playlist-id"]
|
||||
playlist_action = playlist_dict["playlist-action"]
|
||||
print(f"{playlist_id}: delete playlist {playlist_action}")
|
||||
if playlist_action == "metadata":
|
||||
YoutubePlaylist(playlist_id).delete_metadata()
|
||||
elif playlist_action == "all":
|
||||
YoutubePlaylist(playlist_id).delete_videos_playlist()
|
||||
|
||||
return {"success": True}
|
||||
|
||||
def _find_playlists(self):
|
||||
"""add all playlists of a channel"""
|
||||
channel_id = self.exec_val
|
||||
index_channel_playlists.delay(channel_id)
|
||||
return {"success": True}
|
|
@ -2,9 +2,12 @@
|
|||
- hold all form classes used in the views
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
from django import forms
|
||||
from django.contrib.auth.forms import AuthenticationForm
|
||||
from django.forms.widgets import PasswordInput, TextInput
|
||||
from home.src.ta.helper import get_stylesheets
|
||||
|
||||
|
||||
class CustomAuthForm(AuthenticationForm):
|
||||
|
@ -29,14 +32,16 @@ class CustomAuthForm(AuthenticationForm):
|
|||
class UserSettingsForm(forms.Form):
|
||||
"""user configurations values"""
|
||||
|
||||
CHOICES = [
|
||||
("", "-- change color scheme --"),
|
||||
("dark", "Dark"),
|
||||
("light", "Light"),
|
||||
]
|
||||
STYLESHEET_CHOICES = [("", "-- change stylesheet --")]
|
||||
STYLESHEET_CHOICES.extend(
|
||||
[
|
||||
(stylesheet, os.path.splitext(stylesheet)[0].title())
|
||||
for stylesheet in get_stylesheets()
|
||||
]
|
||||
)
|
||||
|
||||
colors = forms.ChoiceField(
|
||||
widget=forms.Select, choices=CHOICES, required=False
|
||||
stylesheet = forms.ChoiceField(
|
||||
widget=forms.Select, choices=STYLESHEET_CHOICES, required=False
|
||||
)
|
||||
page_size = forms.IntegerField(required=False)
|
||||
|
||||
|
@ -44,6 +49,12 @@ class UserSettingsForm(forms.Form):
|
|||
class ApplicationSettingsForm(forms.Form):
|
||||
"""handle all application settings"""
|
||||
|
||||
AUTOSTART_CHOICES = [
|
||||
("", "-- change subscription autostart --"),
|
||||
("0", "disable auto start"),
|
||||
("1", "enable auto start"),
|
||||
]
|
||||
|
||||
METADATA_CHOICES = [
|
||||
("", "-- change metadata embed --"),
|
||||
("0", "don't embed metadata"),
|
||||
|
@ -94,8 +105,8 @@ class ApplicationSettingsForm(forms.Form):
|
|||
|
||||
COOKIE_IMPORT_CHOICES = [
|
||||
("", "-- change cookie settings"),
|
||||
("0", "disable cookie"),
|
||||
("1", "enable cookie"),
|
||||
("0", "remove cookie"),
|
||||
("1", "import cookie"),
|
||||
]
|
||||
|
||||
subscriptions_channel_size = forms.IntegerField(
|
||||
|
@ -107,12 +118,16 @@ class ApplicationSettingsForm(forms.Form):
|
|||
subscriptions_shorts_channel_size = forms.IntegerField(
|
||||
required=False, min_value=0
|
||||
)
|
||||
downloads_limit_count = forms.IntegerField(required=False)
|
||||
subscriptions_auto_start = forms.ChoiceField(
|
||||
widget=forms.Select, choices=AUTOSTART_CHOICES, required=False
|
||||
)
|
||||
downloads_limit_speed = forms.IntegerField(required=False)
|
||||
downloads_throttledratelimit = forms.IntegerField(required=False)
|
||||
downloads_sleep_interval = forms.IntegerField(required=False)
|
||||
downloads_autodelete_days = forms.IntegerField(required=False)
|
||||
downloads_format = forms.CharField(required=False)
|
||||
downloads_format_sort = forms.CharField(required=False)
|
||||
downloads_extractor_lang = forms.CharField(required=False)
|
||||
downloads_add_metadata = forms.ChoiceField(
|
||||
widget=forms.Select, choices=METADATA_CHOICES, required=False
|
||||
)
|
||||
|
@ -147,9 +162,41 @@ class ApplicationSettingsForm(forms.Form):
|
|||
class SchedulerSettingsForm(forms.Form):
|
||||
"""handle scheduler settings"""
|
||||
|
||||
HELP_TEXT = "Add Apprise notification URLs, one per line"
|
||||
|
||||
update_subscribed = forms.CharField(required=False)
|
||||
update_subscribed_notify = forms.CharField(
|
||||
label=False,
|
||||
widget=forms.Textarea(
|
||||
attrs={
|
||||
"rows": 2,
|
||||
"placeholder": HELP_TEXT,
|
||||
}
|
||||
),
|
||||
required=False,
|
||||
)
|
||||
download_pending = forms.CharField(required=False)
|
||||
download_pending_notify = forms.CharField(
|
||||
label=False,
|
||||
widget=forms.Textarea(
|
||||
attrs={
|
||||
"rows": 2,
|
||||
"placeholder": HELP_TEXT,
|
||||
}
|
||||
),
|
||||
required=False,
|
||||
)
|
||||
check_reindex = forms.CharField(required=False)
|
||||
check_reindex_notify = forms.CharField(
|
||||
label=False,
|
||||
widget=forms.Textarea(
|
||||
attrs={
|
||||
"rows": 2,
|
||||
"placeholder": HELP_TEXT,
|
||||
}
|
||||
),
|
||||
required=False,
|
||||
)
|
||||
check_reindex_days = forms.IntegerField(required=False)
|
||||
thumbnail_check = forms.CharField(required=False)
|
||||
run_backup = forms.CharField(required=False)
|
||||
|
@ -218,6 +265,20 @@ class SubscribeToPlaylistForm(forms.Form):
|
|||
)
|
||||
|
||||
|
||||
class CreatePlaylistForm(forms.Form):
|
||||
"""text area form to create a single custom playlist"""
|
||||
|
||||
create = forms.CharField(
|
||||
label="Or create custom playlist",
|
||||
widget=forms.Textarea(
|
||||
attrs={
|
||||
"rows": 1,
|
||||
"placeholder": "Input playlist name",
|
||||
}
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class ChannelOverwriteForm(forms.Form):
|
||||
"""custom overwrites for channel settings"""
|
||||
|
||||
|
|
|
@ -6,154 +6,18 @@ Functionality:
|
|||
- calculate pagination values
|
||||
"""
|
||||
|
||||
import urllib.parse
|
||||
from datetime import datetime
|
||||
|
||||
from home.src.download.thumbnails import ThumbManager
|
||||
from api.src.search_processor import SearchProcess
|
||||
from home.src.es.connect import ElasticWrap
|
||||
from home.src.ta.config import AppConfig
|
||||
|
||||
|
||||
class SearchHandler:
|
||||
"""search elastic search"""
|
||||
|
||||
def __init__(self, path, config, data=False):
|
||||
self.max_hits = None
|
||||
self.path = path
|
||||
self.config = config
|
||||
self.data = data
|
||||
|
||||
def get_data(self):
|
||||
"""get the data"""
|
||||
response, _ = ElasticWrap(self.path, config=self.config).get(self.data)
|
||||
|
||||
if "hits" in response.keys():
|
||||
self.max_hits = response["hits"]["total"]["value"]
|
||||
return_value = response["hits"]["hits"]
|
||||
else:
|
||||
# simulate list for single result to reuse rest of class
|
||||
return_value = [response]
|
||||
|
||||
# stop if empty
|
||||
if not return_value:
|
||||
return False
|
||||
|
||||
all_videos = []
|
||||
all_channels = []
|
||||
for idx, hit in enumerate(return_value):
|
||||
return_value[idx] = self.hit_cleanup(hit)
|
||||
if hit["_index"] == "ta_video":
|
||||
video_dict, channel_dict = self.vid_cache_link(hit)
|
||||
if video_dict not in all_videos:
|
||||
all_videos.append(video_dict)
|
||||
if channel_dict not in all_channels:
|
||||
all_channels.append(channel_dict)
|
||||
elif hit["_index"] == "ta_channel":
|
||||
channel_dict = self.channel_cache_link(hit)
|
||||
if channel_dict not in all_channels:
|
||||
all_channels.append(channel_dict)
|
||||
|
||||
return return_value
|
||||
|
||||
@staticmethod
|
||||
def vid_cache_link(hit):
|
||||
"""download thumbnails into cache"""
|
||||
vid_thumb = hit["source"]["vid_thumb_url"]
|
||||
youtube_id = hit["source"]["youtube_id"]
|
||||
channel_id_hit = hit["source"]["channel"]["channel_id"]
|
||||
chan_thumb = hit["source"]["channel"]["channel_thumb_url"]
|
||||
try:
|
||||
chan_banner = hit["source"]["channel"]["channel_banner_url"]
|
||||
except KeyError:
|
||||
chan_banner = False
|
||||
video_dict = {"youtube_id": youtube_id, "vid_thumb": vid_thumb}
|
||||
channel_dict = {
|
||||
"channel_id": channel_id_hit,
|
||||
"chan_thumb": chan_thumb,
|
||||
"chan_banner": chan_banner,
|
||||
}
|
||||
return video_dict, channel_dict
|
||||
|
||||
@staticmethod
|
||||
def channel_cache_link(hit):
|
||||
"""build channel thumb links"""
|
||||
channel_id_hit = hit["source"]["channel_id"]
|
||||
chan_thumb = hit["source"]["channel_thumb_url"]
|
||||
try:
|
||||
chan_banner = hit["source"]["channel_banner_url"]
|
||||
except KeyError:
|
||||
chan_banner = False
|
||||
channel_dict = {
|
||||
"channel_id": channel_id_hit,
|
||||
"chan_thumb": chan_thumb,
|
||||
"chan_banner": chan_banner,
|
||||
}
|
||||
return channel_dict
|
||||
|
||||
@staticmethod
|
||||
def hit_cleanup(hit):
|
||||
"""clean up and parse data from a single hit"""
|
||||
hit["source"] = hit.pop("_source")
|
||||
hit_keys = hit["source"].keys()
|
||||
if "media_url" in hit_keys:
|
||||
parsed_url = urllib.parse.quote(hit["source"]["media_url"])
|
||||
hit["source"]["media_url"] = parsed_url
|
||||
|
||||
if "published" in hit_keys:
|
||||
published = hit["source"]["published"]
|
||||
date_pub = datetime.strptime(published, "%Y-%m-%d")
|
||||
date_str = datetime.strftime(date_pub, "%d %b, %Y")
|
||||
hit["source"]["published"] = date_str
|
||||
|
||||
if "vid_last_refresh" in hit_keys:
|
||||
vid_last_refresh = hit["source"]["vid_last_refresh"]
|
||||
date_refresh = datetime.fromtimestamp(vid_last_refresh)
|
||||
date_str = datetime.strftime(date_refresh, "%d %b, %Y")
|
||||
hit["source"]["vid_last_refresh"] = date_str
|
||||
|
||||
if "playlist_last_refresh" in hit_keys:
|
||||
playlist_last_refresh = hit["source"]["playlist_last_refresh"]
|
||||
date_refresh = datetime.fromtimestamp(playlist_last_refresh)
|
||||
date_str = datetime.strftime(date_refresh, "%d %b, %Y")
|
||||
hit["source"]["playlist_last_refresh"] = date_str
|
||||
|
||||
if "vid_thumb_url" in hit_keys:
|
||||
youtube_id = hit["source"]["youtube_id"]
|
||||
thumb_path = ThumbManager(youtube_id).vid_thumb_path()
|
||||
hit["source"]["vid_thumb_url"] = f"/cache/{thumb_path}"
|
||||
|
||||
if "channel_last_refresh" in hit_keys:
|
||||
refreshed = hit["source"]["channel_last_refresh"]
|
||||
date_refresh = datetime.fromtimestamp(refreshed)
|
||||
date_str = datetime.strftime(date_refresh, "%d %b, %Y")
|
||||
hit["source"]["channel_last_refresh"] = date_str
|
||||
|
||||
if "channel" in hit_keys:
|
||||
channel_keys = hit["source"]["channel"].keys()
|
||||
if "channel_last_refresh" in channel_keys:
|
||||
refreshed = hit["source"]["channel"]["channel_last_refresh"]
|
||||
date_refresh = datetime.fromtimestamp(refreshed)
|
||||
date_str = datetime.strftime(date_refresh, "%d %b, %Y")
|
||||
hit["source"]["channel"]["channel_last_refresh"] = date_str
|
||||
|
||||
if "subtitle_fragment_id" in hit_keys:
|
||||
youtube_id = hit["source"]["youtube_id"]
|
||||
thumb_path = ThumbManager(youtube_id).vid_thumb_path()
|
||||
hit["source"]["vid_thumb_url"] = f"/cache/{thumb_path}"
|
||||
|
||||
return hit
|
||||
|
||||
|
||||
class SearchForm:
|
||||
"""build query from search form data"""
|
||||
|
||||
CONFIG = AppConfig().config
|
||||
|
||||
def multi_search(self, search_query):
|
||||
"""searching through index"""
|
||||
path, query, query_type = SearchParser(search_query).run()
|
||||
look_up = SearchHandler(path, config=self.CONFIG, data=query)
|
||||
search_results = look_up.get_data()
|
||||
response, _ = ElasticWrap(path).get(data=query)
|
||||
search_results = SearchProcess(response).process()
|
||||
all_results = self.build_results(search_results)
|
||||
|
||||
return {"results": all_results, "queryType": query_type}
|
||||
|
@ -429,6 +293,7 @@ class QueryBuilder:
|
|||
"channel_name._2gram^2",
|
||||
"channel_name._3gram^2",
|
||||
"channel_name.search_as_you_type^2",
|
||||
"channel_tags",
|
||||
],
|
||||
}
|
||||
}
|
||||
|
@ -502,7 +367,6 @@ class QueryBuilder:
|
|||
|
||||
query = {
|
||||
"size": 30,
|
||||
"_source": {"excludes": "subtitle_line"},
|
||||
"query": {"bool": {"must": must_list}},
|
||||
"highlight": {
|
||||
"fields": {
|
||||
|
|
|
@ -6,159 +6,15 @@ functionality:
|
|||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from home.src.download import queue # partial import
|
||||
from home.src.download.thumbnails import ThumbManager
|
||||
from home.src.download.yt_dlp_base import YtWrap
|
||||
from home.src.es.connect import ElasticWrap, IndexPaginate
|
||||
from home.src.index.generic import YouTubeItem
|
||||
from home.src.index.playlist import YoutubePlaylist
|
||||
from home.src.ta.helper import clean_string, requests_headers
|
||||
from home.src.ta.ta_redis import RedisArchivist
|
||||
|
||||
|
||||
class ChannelScraper:
|
||||
"""custom scraper using bs4 to scrape channel about page
|
||||
will be able to be integrated into yt-dlp
|
||||
once #2237 and #2350 are merged upstream
|
||||
"""
|
||||
|
||||
def __init__(self, channel_id):
|
||||
self.channel_id = channel_id
|
||||
self.soup = False
|
||||
self.yt_json = False
|
||||
self.json_data = False
|
||||
|
||||
def get_json(self):
|
||||
"""main method to return channel dict"""
|
||||
self.get_soup()
|
||||
self._extract_yt_json()
|
||||
if self._is_deactivated():
|
||||
return False
|
||||
|
||||
self._parse_channel_main()
|
||||
self._parse_channel_meta()
|
||||
return self.json_data
|
||||
|
||||
def get_soup(self):
|
||||
"""return soup from youtube"""
|
||||
print(f"{self.channel_id}: scrape channel data from youtube")
|
||||
url = f"https://www.youtube.com/channel/{self.channel_id}/about?hl=en"
|
||||
cookies = {"CONSENT": "YES+xxxxxxxxxxxxxxxxxxxxxxxxxxx"}
|
||||
response = requests.get(
|
||||
url, cookies=cookies, headers=requests_headers(), timeout=10
|
||||
)
|
||||
if response.ok:
|
||||
channel_page = response.text
|
||||
else:
|
||||
print(f"{self.channel_id}: failed to extract channel info")
|
||||
raise ConnectionError
|
||||
self.soup = BeautifulSoup(channel_page, "html.parser")
|
||||
|
||||
def _extract_yt_json(self):
|
||||
"""parse soup and get ytInitialData json"""
|
||||
all_scripts = self.soup.find("body").find_all("script")
|
||||
for script in all_scripts:
|
||||
if "var ytInitialData = " in str(script):
|
||||
script_content = str(script)
|
||||
break
|
||||
# extract payload
|
||||
script_content = script_content.split("var ytInitialData = ")[1]
|
||||
json_raw = script_content.rstrip(";</script>")
|
||||
self.yt_json = json.loads(json_raw)
|
||||
|
||||
def _is_deactivated(self):
|
||||
"""check if channel is deactivated"""
|
||||
alerts = self.yt_json.get("alerts")
|
||||
if not alerts:
|
||||
return False
|
||||
|
||||
for alert in alerts:
|
||||
alert_text = alert["alertRenderer"]["text"]["simpleText"]
|
||||
print(f"{self.channel_id}: failed to extract, {alert_text}")
|
||||
return True
|
||||
|
||||
def _parse_channel_main(self):
|
||||
"""extract maintab values from scraped channel json data"""
|
||||
main_tab = self.yt_json["header"]["c4TabbedHeaderRenderer"]
|
||||
# build and return dict
|
||||
self.json_data = {
|
||||
"channel_active": True,
|
||||
"channel_last_refresh": int(datetime.now().timestamp()),
|
||||
"channel_subs": self._get_channel_subs(main_tab),
|
||||
"channel_name": main_tab["title"],
|
||||
"channel_banner_url": self._get_thumbnails(main_tab, "banner"),
|
||||
"channel_tvart_url": self._get_thumbnails(main_tab, "tvBanner"),
|
||||
"channel_id": self.channel_id,
|
||||
"channel_subscribed": False,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _get_thumbnails(main_tab, thumb_name):
|
||||
"""extract banner url from main_tab"""
|
||||
try:
|
||||
all_banners = main_tab[thumb_name]["thumbnails"]
|
||||
banner = sorted(all_banners, key=lambda k: k["width"])[-1]["url"]
|
||||
except KeyError:
|
||||
banner = False
|
||||
|
||||
return banner
|
||||
|
||||
@staticmethod
|
||||
def _get_channel_subs(main_tab):
|
||||
"""process main_tab to get channel subs as int"""
|
||||
try:
|
||||
sub_text_simple = main_tab["subscriberCountText"]["simpleText"]
|
||||
sub_text = sub_text_simple.split(" ")[0]
|
||||
if sub_text[-1] == "K":
|
||||
channel_subs = int(float(sub_text.replace("K", "")) * 1000)
|
||||
elif sub_text[-1] == "M":
|
||||
channel_subs = int(float(sub_text.replace("M", "")) * 1000000)
|
||||
elif int(sub_text) >= 0:
|
||||
channel_subs = int(sub_text)
|
||||
else:
|
||||
message = f"{sub_text} not dealt with"
|
||||
print(message)
|
||||
except KeyError:
|
||||
channel_subs = 0
|
||||
|
||||
return channel_subs
|
||||
|
||||
def _parse_channel_meta(self):
|
||||
"""extract meta tab values from channel payload"""
|
||||
# meta tab
|
||||
meta_tab = self.yt_json["metadata"]["channelMetadataRenderer"]
|
||||
all_thumbs = meta_tab["avatar"]["thumbnails"]
|
||||
thumb_url = sorted(all_thumbs, key=lambda k: k["width"])[-1]["url"]
|
||||
# stats tab
|
||||
renderer = "twoColumnBrowseResultsRenderer"
|
||||
all_tabs = self.yt_json["contents"][renderer]["tabs"]
|
||||
for tab in all_tabs:
|
||||
if "tabRenderer" in tab.keys():
|
||||
if tab["tabRenderer"]["title"] == "About":
|
||||
about_tab = tab["tabRenderer"]["content"][
|
||||
"sectionListRenderer"
|
||||
]["contents"][0]["itemSectionRenderer"]["contents"][0][
|
||||
"channelAboutFullMetadataRenderer"
|
||||
]
|
||||
break
|
||||
try:
|
||||
channel_views_text = about_tab["viewCountText"]["simpleText"]
|
||||
channel_views = int(re.sub(r"\D", "", channel_views_text))
|
||||
except KeyError:
|
||||
channel_views = 0
|
||||
|
||||
self.json_data.update(
|
||||
{
|
||||
"channel_description": meta_tab["description"],
|
||||
"channel_thumb_url": thumb_url,
|
||||
"channel_views": channel_views,
|
||||
}
|
||||
)
|
||||
from home.src.ta.settings import EnvironmentSettings
|
||||
|
||||
|
||||
class YoutubeChannel(YouTubeItem):
|
||||
|
@ -167,12 +23,16 @@ class YoutubeChannel(YouTubeItem):
|
|||
es_path = False
|
||||
index_name = "ta_channel"
|
||||
yt_base = "https://www.youtube.com/channel/"
|
||||
msg = "message:playlistscan"
|
||||
yt_obs = {"playlist_items": "0,0"}
|
||||
|
||||
def __init__(self, youtube_id):
|
||||
def __init__(self, youtube_id, task=False):
|
||||
super().__init__(youtube_id)
|
||||
self.es_path = f"{self.index_name}/_doc/{youtube_id}"
|
||||
self.all_playlists = False
|
||||
self.task = task
|
||||
|
||||
def build_yt_url(self):
|
||||
"""overwrite base to use channel about page"""
|
||||
return f"{self.yt_base}{self.youtube_id}/about"
|
||||
|
||||
def build_json(self, upload=False, fallback=False):
|
||||
"""get from es or from youtube"""
|
||||
|
@ -180,23 +40,75 @@ class YoutubeChannel(YouTubeItem):
|
|||
if self.json_data:
|
||||
return
|
||||
|
||||
self.get_from_youtube(fallback)
|
||||
self.get_from_youtube()
|
||||
if not self.youtube_meta and fallback:
|
||||
self._video_fallback(fallback)
|
||||
else:
|
||||
self.process_youtube_meta()
|
||||
self.get_channel_art()
|
||||
|
||||
if upload:
|
||||
self.upload_to_es()
|
||||
return
|
||||
|
||||
def get_from_youtube(self, fallback=False):
|
||||
"""use bs4 to scrape channel about page"""
|
||||
self.json_data = ChannelScraper(self.youtube_id).get_json()
|
||||
def process_youtube_meta(self):
|
||||
"""extract relevant fields"""
|
||||
self.youtube_meta["thumbnails"].reverse()
|
||||
channel_subs = self.youtube_meta.get("channel_follower_count") or 0
|
||||
self.json_data = {
|
||||
"channel_active": True,
|
||||
"channel_description": self.youtube_meta.get("description", False),
|
||||
"channel_id": self.youtube_id,
|
||||
"channel_last_refresh": int(datetime.now().timestamp()),
|
||||
"channel_name": self.youtube_meta["uploader"],
|
||||
"channel_subs": channel_subs,
|
||||
"channel_subscribed": False,
|
||||
"channel_tags": self._parse_tags(self.youtube_meta.get("tags")),
|
||||
"channel_banner_url": self._get_banner_art(),
|
||||
"channel_thumb_url": self._get_thumb_art(),
|
||||
"channel_tvart_url": self._get_tv_art(),
|
||||
"channel_views": self.youtube_meta.get("view_count") or 0,
|
||||
}
|
||||
|
||||
if not self.json_data and fallback:
|
||||
self._video_fallback(fallback)
|
||||
def _parse_tags(self, tags):
|
||||
"""parse channel tags"""
|
||||
if not tags:
|
||||
return False
|
||||
|
||||
if not self.json_data:
|
||||
return
|
||||
joined = " ".join(tags)
|
||||
return [i.strip() for i in joined.split('"') if i and not i == " "]
|
||||
|
||||
self.get_channel_art()
|
||||
def _get_thumb_art(self):
|
||||
"""extract thumb art"""
|
||||
for i in self.youtube_meta["thumbnails"]:
|
||||
if not i.get("width"):
|
||||
continue
|
||||
if i.get("width") == i.get("height"):
|
||||
return i["url"]
|
||||
|
||||
return False
|
||||
|
||||
def _get_tv_art(self):
|
||||
"""extract tv artwork"""
|
||||
for i in self.youtube_meta["thumbnails"]:
|
||||
if i.get("id") == "banner_uncropped":
|
||||
return i["url"]
|
||||
for i in self.youtube_meta["thumbnails"]:
|
||||
if not i.get("width"):
|
||||
continue
|
||||
if i["width"] // i["height"] < 2 and not i["width"] == i["height"]:
|
||||
return i["url"]
|
||||
|
||||
return False
|
||||
|
||||
def _get_banner_art(self):
|
||||
"""extract banner artwork"""
|
||||
for i in self.youtube_meta["thumbnails"]:
|
||||
if not i.get("width"):
|
||||
continue
|
||||
if i["width"] // i["height"] > 5:
|
||||
return i["url"]
|
||||
|
||||
return False
|
||||
|
||||
def _video_fallback(self, fallback):
|
||||
"""use video metadata as fallback"""
|
||||
|
@ -210,6 +122,7 @@ class YoutubeChannel(YouTubeItem):
|
|||
"channel_tvart_url": False,
|
||||
"channel_id": self.youtube_id,
|
||||
"channel_subscribed": False,
|
||||
"channel_tags": False,
|
||||
"channel_description": False,
|
||||
"channel_thumb_url": False,
|
||||
"channel_views": 0,
|
||||
|
@ -219,7 +132,7 @@ class YoutubeChannel(YouTubeItem):
|
|||
def _info_json_fallback(self):
|
||||
"""read channel info.json for additional metadata"""
|
||||
info_json = os.path.join(
|
||||
self.config["application"]["cache_dir"],
|
||||
EnvironmentSettings.CACHE_DIR,
|
||||
"import",
|
||||
f"{self.youtube_id}.info.json",
|
||||
)
|
||||
|
@ -241,6 +154,7 @@ class YoutubeChannel(YouTubeItem):
|
|||
urls = (
|
||||
self.json_data["channel_thumb_url"],
|
||||
self.json_data["channel_banner_url"],
|
||||
self.json_data["channel_tvart_url"],
|
||||
)
|
||||
ThumbManager(self.youtube_id, item_type="channel").download(urls)
|
||||
|
||||
|
@ -261,12 +175,10 @@ class YoutubeChannel(YouTubeItem):
|
|||
|
||||
def get_folder_path(self):
|
||||
"""get folder where media files get stored"""
|
||||
channel_name = self.json_data["channel_name"]
|
||||
folder_name = clean_string(channel_name)
|
||||
if len(folder_name) <= 3:
|
||||
# fall back to channel id
|
||||
folder_name = self.json_data["channel_id"]
|
||||
folder_path = os.path.join(self.app_conf["videos"], folder_name)
|
||||
folder_path = os.path.join(
|
||||
EnvironmentSettings.MEDIA_DIR,
|
||||
self.json_data["channel_id"],
|
||||
)
|
||||
return folder_path
|
||||
|
||||
def delete_es_videos(self):
|
||||
|
@ -287,11 +199,21 @@ class YoutubeChannel(YouTubeItem):
|
|||
}
|
||||
_, _ = ElasticWrap("ta_comment/_delete_by_query").post(data)
|
||||
|
||||
def delete_es_subtitles(self):
|
||||
"""delete all subtitles from this channel"""
|
||||
data = {
|
||||
"query": {
|
||||
"term": {"subtitle_channel_id": {"value": self.youtube_id}}
|
||||
}
|
||||
}
|
||||
_, _ = ElasticWrap("ta_subtitle/_delete_by_query").post(data)
|
||||
|
||||
def delete_playlists(self):
|
||||
"""delete all indexed playlist from es"""
|
||||
all_playlists = self.get_indexed_playlists()
|
||||
for playlist in all_playlists:
|
||||
playlist_id = playlist["playlist_id"]
|
||||
playlist = YoutubePlaylist(playlist_id)
|
||||
YoutubePlaylist(playlist_id).delete_metadata()
|
||||
|
||||
def delete_channel(self):
|
||||
|
@ -317,6 +239,7 @@ class YoutubeChannel(YouTubeItem):
|
|||
print(f"{self.youtube_id}: delete indexed videos")
|
||||
self.delete_es_videos()
|
||||
self.delete_es_comments()
|
||||
self.delete_es_subtitles()
|
||||
self.del_in_es()
|
||||
|
||||
def index_channel_playlists(self):
|
||||
|
@ -324,34 +247,29 @@ class YoutubeChannel(YouTubeItem):
|
|||
print(f"{self.youtube_id}: index all playlists")
|
||||
self.get_from_es()
|
||||
channel_name = self.json_data["channel_name"]
|
||||
mess_dict = {
|
||||
"status": self.msg,
|
||||
"level": "info",
|
||||
"title": "Looking for playlists",
|
||||
"message": f"{channel_name}: Scanning channel in progress",
|
||||
}
|
||||
RedisArchivist().set_message(self.msg, mess_dict, expire=True)
|
||||
self.task.send_progress([f"{channel_name}: Looking for Playlists"])
|
||||
self.get_all_playlists()
|
||||
if not self.all_playlists:
|
||||
print(f"{self.youtube_id}: no playlists found.")
|
||||
return
|
||||
|
||||
all_youtube_ids = self.get_all_video_ids()
|
||||
total = len(self.all_playlists)
|
||||
for idx, playlist in enumerate(self.all_playlists):
|
||||
self._notify_single_playlist(idx, playlist)
|
||||
self._index_single_playlist(playlist, all_youtube_ids)
|
||||
if self.task:
|
||||
self._notify_single_playlist(idx, total)
|
||||
|
||||
def _notify_single_playlist(self, idx, playlist):
|
||||
self._index_single_playlist(playlist, all_youtube_ids)
|
||||
print("add playlist: " + playlist[1])
|
||||
|
||||
def _notify_single_playlist(self, idx, total):
|
||||
"""send notification"""
|
||||
channel_name = self.json_data["channel_name"]
|
||||
mess_dict = {
|
||||
"status": self.msg,
|
||||
"level": "info",
|
||||
"title": f"{channel_name}: Scanning channel for playlists",
|
||||
"message": f"Progress: {idx + 1}/{len(self.all_playlists)}",
|
||||
}
|
||||
RedisArchivist().set_message(self.msg, mess_dict, expire=True)
|
||||
print("add playlist: " + playlist[1])
|
||||
message = [
|
||||
f"{channel_name}: Scanning channel for playlists",
|
||||
f"Progress: {idx + 1}/{total}",
|
||||
]
|
||||
self.task.send_progress(message, progress=(idx + 1) / total)
|
||||
|
||||
@staticmethod
|
||||
def _index_single_playlist(playlist, all_youtube_ids):
|
||||
|
|
|
@ -10,7 +10,6 @@ from datetime import datetime
|
|||
from home.src.download.yt_dlp_base import YtWrap
|
||||
from home.src.es.connect import ElasticWrap
|
||||
from home.src.ta.config import AppConfig
|
||||
from home.src.ta.ta_redis import RedisArchivist
|
||||
|
||||
|
||||
class Comments:
|
||||
|
@ -24,14 +23,13 @@ class Comments:
|
|||
self.is_activated = False
|
||||
self.comments_format = False
|
||||
|
||||
def build_json(self, notify=False):
|
||||
def build_json(self):
|
||||
"""build json document for es"""
|
||||
print(f"{self.youtube_id}: get comments")
|
||||
self.check_config()
|
||||
if not self.is_activated:
|
||||
return
|
||||
|
||||
self._send_notification(notify)
|
||||
comments_raw, channel_id = self.get_yt_comments()
|
||||
if not comments_raw and not channel_id:
|
||||
return
|
||||
|
@ -52,23 +50,6 @@ class Comments:
|
|||
|
||||
self.is_activated = bool(self.config["downloads"]["comment_max"])
|
||||
|
||||
@staticmethod
|
||||
def _send_notification(notify):
|
||||
"""send notification for download post process message"""
|
||||
if not notify:
|
||||
return
|
||||
|
||||
key = "message:download"
|
||||
idx, total_videos = notify
|
||||
message = {
|
||||
"status": key,
|
||||
"level": "info",
|
||||
"title": "Download and index comments",
|
||||
"message": f"Progress: {idx + 1}/{total_videos}",
|
||||
}
|
||||
|
||||
RedisArchivist().set_message(key, message)
|
||||
|
||||
def build_yt_obs(self):
|
||||
"""
|
||||
get extractor config
|
||||
|
@ -79,13 +60,15 @@ class Comments:
|
|||
comment_sort = self.config["downloads"]["comment_sort"]
|
||||
|
||||
yt_obs = {
|
||||
"check_formats": None,
|
||||
"skip_download": True,
|
||||
"quiet": False,
|
||||
"getcomments": True,
|
||||
"ignoreerrors": True,
|
||||
"extractor_args": {
|
||||
"youtube": {
|
||||
"max_comments": max_comments_list,
|
||||
"comment_sort": [comment_sort],
|
||||
"player_client": ["ios", "web"], # workaround yt-dlp #9554
|
||||
}
|
||||
},
|
||||
}
|
||||
|
@ -95,7 +78,7 @@ class Comments:
|
|||
def get_yt_comments(self):
|
||||
"""get comments from youtube"""
|
||||
yt_obs = self.build_yt_obs()
|
||||
info_json = YtWrap(yt_obs).extract(self.youtube_id)
|
||||
info_json = YtWrap(yt_obs, config=self.config).extract(self.youtube_id)
|
||||
if not info_json:
|
||||
return False, False
|
||||
|
||||
|
@ -110,12 +93,20 @@ class Comments:
|
|||
if comments_raw:
|
||||
for comment in comments_raw:
|
||||
cleaned_comment = self.clean_comment(comment)
|
||||
if not cleaned_comment:
|
||||
continue
|
||||
|
||||
comments.append(cleaned_comment)
|
||||
|
||||
self.comments_format = comments
|
||||
|
||||
def clean_comment(self, comment):
|
||||
"""parse metadata from comment for indexing"""
|
||||
if not comment.get("text"):
|
||||
# comment text can be empty
|
||||
print(f"{self.youtube_id}: Failed to extract text, {comment}")
|
||||
return False
|
||||
|
||||
time_text_datetime = datetime.utcfromtimestamp(comment["timestamp"])
|
||||
|
||||
if time_text_datetime.hour == 0 and time_text_datetime.minute == 0:
|
||||
|
@ -125,17 +116,22 @@ class Comments:
|
|||
|
||||
time_text = time_text_datetime.strftime(format_string)
|
||||
|
||||
if not comment.get("author"):
|
||||
comment["author"] = comment.get("author_id", "Unknown")
|
||||
|
||||
cleaned_comment = {
|
||||
"comment_id": comment["id"],
|
||||
"comment_text": comment["text"].replace("\xa0", ""),
|
||||
"comment_timestamp": comment["timestamp"],
|
||||
"comment_time_text": time_text,
|
||||
"comment_likecount": comment["like_count"],
|
||||
"comment_is_favorited": comment["is_favorited"],
|
||||
"comment_likecount": comment.get("like_count", None),
|
||||
"comment_is_favorited": comment.get("is_favorited", False),
|
||||
"comment_author": comment["author"],
|
||||
"comment_author_id": comment["author_id"],
|
||||
"comment_author_thumbnail": comment["author_thumbnail"],
|
||||
"comment_author_is_uploader": comment["author_is_uploader"],
|
||||
"comment_author_is_uploader": comment.get(
|
||||
"author_is_uploader", False
|
||||
),
|
||||
"comment_parent": comment["parent"],
|
||||
}
|
||||
|
||||
|
@ -193,35 +189,28 @@ class Comments:
|
|||
class CommentList:
|
||||
"""interact with comments in group"""
|
||||
|
||||
def __init__(self, video_ids):
|
||||
def __init__(self, video_ids, task=False):
|
||||
self.video_ids = video_ids
|
||||
self.task = task
|
||||
self.config = AppConfig().config
|
||||
|
||||
def index(self, notify=False):
|
||||
"""index group of videos"""
|
||||
def index(self):
|
||||
"""index comments for list, init with task object to notify"""
|
||||
if not self.config["downloads"].get("comment_max"):
|
||||
return
|
||||
|
||||
total_videos = len(self.video_ids)
|
||||
for idx, video_id in enumerate(self.video_ids):
|
||||
comment = Comments(video_id, config=self.config)
|
||||
if notify:
|
||||
notify = (idx, total_videos)
|
||||
comment.build_json(notify=notify)
|
||||
for idx, youtube_id in enumerate(self.video_ids):
|
||||
if self.task:
|
||||
self.notify(idx, total_videos)
|
||||
|
||||
comment = Comments(youtube_id, config=self.config)
|
||||
comment.build_json()
|
||||
if comment.json_data:
|
||||
comment.upload_comments()
|
||||
|
||||
if notify:
|
||||
self.notify_final(total_videos)
|
||||
|
||||
@staticmethod
|
||||
def notify_final(total_videos):
|
||||
"""send final notification"""
|
||||
key = "message:download"
|
||||
message = {
|
||||
"status": key,
|
||||
"level": "info",
|
||||
"title": "Download and index comments finished",
|
||||
"message": f"added comments for {total_videos} videos",
|
||||
}
|
||||
RedisArchivist().set_message(key, message, expire=4)
|
||||
def notify(self, idx, total_videos):
|
||||
"""send notification on task"""
|
||||
message = [f"Add comments for new videos {idx + 1}/{total_videos}"]
|
||||
progress = (idx + 1) / total_videos
|
||||
self.task.send_progress(message, progress=progress)
|
||||
|
|
|
@ -1,609 +1,124 @@
|
|||
"""
|
||||
Functionality:
|
||||
- reindexing old documents
|
||||
- syncing updated values between indexes
|
||||
- scan the filesystem to delete or index
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
|
||||
from home.src.download.queue import PendingList
|
||||
from home.src.download.thumbnails import ThumbManager
|
||||
from home.src.es.connect import ElasticWrap
|
||||
from home.src.es.connect import ElasticWrap, IndexPaginate
|
||||
from home.src.index.comments import CommentList
|
||||
from home.src.index.video import YoutubeVideo, index_new_video
|
||||
from home.src.ta.config import AppConfig
|
||||
from home.src.ta.helper import clean_string, ignore_filelist
|
||||
from PIL import Image, ImageFile
|
||||
from yt_dlp.utils import ISO639Utils
|
||||
|
||||
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
||||
from home.src.ta.helper import ignore_filelist
|
||||
from home.src.ta.settings import EnvironmentSettings
|
||||
|
||||
|
||||
class FilesystemScanner:
|
||||
"""handle scanning and fixing from filesystem"""
|
||||
class Scanner:
|
||||
"""scan index and filesystem"""
|
||||
|
||||
CONFIG = AppConfig().config
|
||||
VIDEOS = CONFIG["application"]["videos"]
|
||||
VIDEOS: str = EnvironmentSettings.MEDIA_DIR
|
||||
|
||||
def __init__(self):
|
||||
self.all_downloaded = self.get_all_downloaded()
|
||||
self.all_indexed = self.get_all_indexed()
|
||||
self.mismatch = None
|
||||
self.to_rename = None
|
||||
self.to_index = None
|
||||
self.to_delete = None
|
||||
def __init__(self, task=False) -> None:
|
||||
self.task = task
|
||||
self.to_delete: set[str] = set()
|
||||
self.to_index: set[str] = set()
|
||||
|
||||
def get_all_downloaded(self):
|
||||
"""get a list of all video files downloaded"""
|
||||
channels = os.listdir(self.VIDEOS)
|
||||
all_channels = ignore_filelist(channels)
|
||||
all_channels.sort()
|
||||
all_downloaded = []
|
||||
for channel_name in all_channels:
|
||||
channel_path = os.path.join(self.VIDEOS, channel_name)
|
||||
channel_files = os.listdir(channel_path)
|
||||
channel_files_clean = ignore_filelist(channel_files)
|
||||
all_videos = [i for i in channel_files_clean if i.endswith(".mp4")]
|
||||
for video in all_videos:
|
||||
youtube_id = video[9:20]
|
||||
all_downloaded.append((channel_name, video, youtube_id))
|
||||
def scan(self) -> None:
|
||||
"""scan the filesystem"""
|
||||
downloaded: set[str] = self._get_downloaded()
|
||||
indexed: set[str] = self._get_indexed()
|
||||
self.to_index = downloaded - indexed
|
||||
self.to_delete = indexed - downloaded
|
||||
|
||||
return all_downloaded
|
||||
def _get_downloaded(self) -> set[str]:
|
||||
"""get downloaded ids"""
|
||||
if self.task:
|
||||
self.task.send_progress(["Scan your filesystem for videos."])
|
||||
|
||||
@staticmethod
|
||||
def get_all_indexed():
|
||||
"""get a list of all indexed videos"""
|
||||
index_handler = PendingList()
|
||||
index_handler.get_download()
|
||||
index_handler.get_indexed()
|
||||
downloaded: set = set()
|
||||
channels = ignore_filelist(os.listdir(self.VIDEOS))
|
||||
for channel in channels:
|
||||
folder = os.path.join(self.VIDEOS, channel)
|
||||
files = ignore_filelist(os.listdir(folder))
|
||||
downloaded.update({i.split(".")[0] for i in files})
|
||||
|
||||
all_indexed = []
|
||||
for video in index_handler.all_videos:
|
||||
youtube_id = video["youtube_id"]
|
||||
media_url = video["media_url"]
|
||||
published = video["published"]
|
||||
title = video["title"]
|
||||
all_indexed.append((youtube_id, media_url, published, title))
|
||||
return all_indexed
|
||||
return downloaded
|
||||
|
||||
def list_comarison(self):
|
||||
"""compare the lists to figure out what to do"""
|
||||
self.find_unindexed()
|
||||
self.find_missing()
|
||||
self.find_bad_media_url()
|
||||
def _get_indexed(self) -> set:
|
||||
"""get all indexed ids"""
|
||||
if self.task:
|
||||
self.task.send_progress(["Get all videos indexed."])
|
||||
|
||||
def find_unindexed(self):
|
||||
"""find video files without a matching document indexed"""
|
||||
all_indexed_ids = [i[0] for i in self.all_indexed]
|
||||
to_index = []
|
||||
for downloaded in self.all_downloaded:
|
||||
if downloaded[2] not in all_indexed_ids:
|
||||
to_index.append(downloaded)
|
||||
data = {"query": {"match_all": {}}, "_source": ["youtube_id"]}
|
||||
response = IndexPaginate("ta_video", data).get_results()
|
||||
return {i["youtube_id"] for i in response}
|
||||
|
||||
self.to_index = to_index
|
||||
def apply(self) -> None:
|
||||
"""apply all changes"""
|
||||
self.delete()
|
||||
self.index()
|
||||
self.url_fix()
|
||||
|
||||
def find_missing(self):
|
||||
"""find indexed videos without matching media file"""
|
||||
all_downloaded_ids = [i[2] for i in self.all_downloaded]
|
||||
to_delete = []
|
||||
for video in self.all_indexed:
|
||||
youtube_id = video[0]
|
||||
if youtube_id not in all_downloaded_ids:
|
||||
to_delete.append(video)
|
||||
|
||||
self.to_delete = to_delete
|
||||
|
||||
def find_bad_media_url(self):
|
||||
"""rename media files not matching the indexed title"""
|
||||
to_fix = []
|
||||
to_rename = []
|
||||
for downloaded in self.all_downloaded:
|
||||
channel, filename, downloaded_id = downloaded
|
||||
# find in indexed
|
||||
for indexed in self.all_indexed:
|
||||
indexed_id, media_url, published, title = indexed
|
||||
if indexed_id == downloaded_id:
|
||||
# found it
|
||||
title_c = clean_string(title)
|
||||
pub = published.replace("-", "")
|
||||
expected_filename = f"{pub}_{indexed_id}_{title_c}.mp4"
|
||||
new_url = os.path.join(channel, expected_filename)
|
||||
if expected_filename != filename:
|
||||
# file to rename
|
||||
to_rename.append(
|
||||
(channel, filename, expected_filename)
|
||||
)
|
||||
if media_url != new_url:
|
||||
# media_url to update in es
|
||||
to_fix.append((indexed_id, new_url))
|
||||
|
||||
break
|
||||
|
||||
self.mismatch = to_fix
|
||||
self.to_rename = to_rename
|
||||
|
||||
def rename_files(self):
|
||||
"""rename media files as identified by find_bad_media_url"""
|
||||
for bad_filename in self.to_rename:
|
||||
channel, filename, expected_filename = bad_filename
|
||||
print(f"renaming [{filename}] to [{expected_filename}]")
|
||||
old_path = os.path.join(self.VIDEOS, channel, filename)
|
||||
new_path = os.path.join(self.VIDEOS, channel, expected_filename)
|
||||
os.rename(old_path, new_path)
|
||||
|
||||
def send_mismatch_bulk(self):
|
||||
"""build bulk update"""
|
||||
bulk_list = []
|
||||
for video_mismatch in self.mismatch:
|
||||
youtube_id, media_url = video_mismatch
|
||||
print(f"{youtube_id}: fixing media url {media_url}")
|
||||
action = {"update": {"_id": youtube_id, "_index": "ta_video"}}
|
||||
source = {"doc": {"media_url": media_url}}
|
||||
bulk_list.append(json.dumps(action))
|
||||
bulk_list.append(json.dumps(source))
|
||||
# add last newline
|
||||
bulk_list.append("\n")
|
||||
data = "\n".join(bulk_list)
|
||||
_, _ = ElasticWrap("_bulk").post(data=data, ndjson=True)
|
||||
|
||||
def delete_from_index(self):
|
||||
"""find indexed but deleted mediafile"""
|
||||
for indexed in self.to_delete:
|
||||
youtube_id = indexed[0]
|
||||
print(f"deleting {youtube_id} from index")
|
||||
path = f"ta_video/_doc/{youtube_id}"
|
||||
_, _ = ElasticWrap(path).delete()
|
||||
|
||||
|
||||
class ImportFolderScanner:
|
||||
"""import and indexing existing video files
|
||||
- identify all media files belonging to a video
|
||||
- identify youtube id
|
||||
- convert if needed
|
||||
"""
|
||||
|
||||
CONFIG = AppConfig().config
|
||||
CACHE_DIR = CONFIG["application"]["cache_dir"]
|
||||
IMPORT_DIR = os.path.join(CACHE_DIR, "import")
|
||||
|
||||
EXT_MAP = {
|
||||
"media": [".mp4", ".mkv", ".webm"],
|
||||
"metadata": [".json"],
|
||||
"thumb": [".jpg", ".png", ".webp"],
|
||||
"subtitle": [".vtt"],
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self.to_import = False
|
||||
|
||||
def scan(self):
|
||||
"""scan and match media files"""
|
||||
all_files = self.get_all_files()
|
||||
self.match_files(all_files)
|
||||
self.process_videos()
|
||||
|
||||
return self.to_import
|
||||
|
||||
def get_all_files(self):
|
||||
"""get all files in /import"""
|
||||
rel_paths = ignore_filelist(os.listdir(self.IMPORT_DIR))
|
||||
all_files = [os.path.join(self.IMPORT_DIR, i) for i in rel_paths]
|
||||
all_files.sort()
|
||||
|
||||
return all_files
|
||||
|
||||
@staticmethod
|
||||
def _get_template():
|
||||
"""base dict for video"""
|
||||
return {
|
||||
"media": False,
|
||||
"video_id": False,
|
||||
"metadata": False,
|
||||
"thumb": False,
|
||||
"subtitle": [],
|
||||
}
|
||||
|
||||
def match_files(self, all_files):
|
||||
"""loop through all files, join what matches"""
|
||||
self.to_import = []
|
||||
|
||||
current_video = self._get_template()
|
||||
last_base = False
|
||||
|
||||
for file_path in all_files:
|
||||
base_name, ext = self._detect_base_name(file_path)
|
||||
key, file_path = self._detect_type(file_path, ext)
|
||||
if not key or not file_path:
|
||||
continue
|
||||
|
||||
if base_name != last_base:
|
||||
if last_base:
|
||||
print(f"manual import: {current_video}")
|
||||
self.to_import.append(current_video)
|
||||
|
||||
current_video = self._get_template()
|
||||
last_base = base_name
|
||||
|
||||
if key == "subtitle":
|
||||
current_video["subtitle"].append(file_path)
|
||||
else:
|
||||
current_video[key] = file_path
|
||||
|
||||
if current_video.get("media"):
|
||||
print(f"manual import: {current_video}")
|
||||
self.to_import.append(current_video)
|
||||
|
||||
def _detect_base_name(self, file_path):
|
||||
"""extract base_name and ext for matching"""
|
||||
base_name_raw, ext = os.path.splitext(file_path)
|
||||
base_name, ext2 = os.path.splitext(base_name_raw)
|
||||
|
||||
if ext2:
|
||||
if ISO639Utils.short2long(ext2.strip(".")) or ext2 == ".info":
|
||||
# valid secondary extension
|
||||
return base_name, ext
|
||||
|
||||
return base_name_raw, ext
|
||||
|
||||
def _detect_type(self, file_path, ext):
|
||||
"""detect metadata type for file"""
|
||||
|
||||
for key, value in self.EXT_MAP.items():
|
||||
if ext in value:
|
||||
return key, file_path
|
||||
|
||||
return False, False
|
||||
|
||||
def process_videos(self):
|
||||
"""loop through all videos"""
|
||||
for current_video in self.to_import:
|
||||
if not current_video["media"]:
|
||||
print(f"{current_video}: no matching media file found.")
|
||||
raise ValueError
|
||||
|
||||
self._detect_youtube_id(current_video)
|
||||
self._dump_thumb(current_video)
|
||||
self._convert_thumb(current_video)
|
||||
self._get_subtitles(current_video)
|
||||
self._convert_video(current_video)
|
||||
print(f"manual import: {current_video}")
|
||||
|
||||
ManualImport(current_video, self.CONFIG).run()
|
||||
|
||||
def _detect_youtube_id(self, current_video):
|
||||
"""find video id from filename or json"""
|
||||
youtube_id = self._extract_id_from_filename(current_video["media"])
|
||||
if youtube_id:
|
||||
current_video["video_id"] = youtube_id
|
||||
def delete(self) -> None:
|
||||
"""delete videos from index"""
|
||||
if not self.to_delete:
|
||||
print("nothing to delete")
|
||||
return
|
||||
|
||||
youtube_id = self._extract_id_from_json(current_video["metadata"])
|
||||
if youtube_id:
|
||||
current_video["video_id"] = youtube_id
|
||||
if self.task:
|
||||
self.task.send_progress(
|
||||
[f"Remove {len(self.to_delete)} videos from index."]
|
||||
)
|
||||
|
||||
for youtube_id in self.to_delete:
|
||||
YoutubeVideo(youtube_id).delete_media_file()
|
||||
|
||||
def index(self) -> None:
|
||||
"""index new"""
|
||||
if not self.to_index:
|
||||
print("nothing to index")
|
||||
return
|
||||
|
||||
raise ValueError("failed to find video id")
|
||||
|
||||
@staticmethod
|
||||
def _extract_id_from_filename(file_name):
|
||||
"""
|
||||
look at the file name for the youtube id
|
||||
expects filename ending in [<youtube_id>].<ext>
|
||||
"""
|
||||
base_name, _ = os.path.splitext(file_name)
|
||||
id_search = re.search(r"\[([a-zA-Z0-9_-]{11})\]$", base_name)
|
||||
if id_search:
|
||||
youtube_id = id_search.group(1)
|
||||
return youtube_id
|
||||
|
||||
print(f"id extraction failed from filename: {file_name}")
|
||||
|
||||
return False
|
||||
|
||||
def _extract_id_from_json(self, json_file):
|
||||
"""open json file and extract id"""
|
||||
json_path = os.path.join(self.CACHE_DIR, "import", json_file)
|
||||
with open(json_path, "r", encoding="utf-8") as f:
|
||||
json_content = f.read()
|
||||
|
||||
youtube_id = json.loads(json_content)["id"]
|
||||
|
||||
return youtube_id
|
||||
|
||||
def _dump_thumb(self, current_video):
|
||||
"""extract embedded thumb before converting"""
|
||||
if current_video["thumb"]:
|
||||
return
|
||||
|
||||
media_path = current_video["media"]
|
||||
_, ext = os.path.splitext(media_path)
|
||||
|
||||
new_path = False
|
||||
if ext == ".mkv":
|
||||
idx, thumb_type = self._get_mkv_thumb_stream(media_path)
|
||||
if idx is not None:
|
||||
new_path = self.dump_mpv_thumb(media_path, idx, thumb_type)
|
||||
|
||||
elif ext == ".mp4":
|
||||
thumb_type = self.get_mp4_thumb_type(media_path)
|
||||
if thumb_type:
|
||||
new_path = self.dump_mp4_thumb(media_path, thumb_type)
|
||||
|
||||
if new_path:
|
||||
current_video["thumb"] = new_path
|
||||
|
||||
def _get_mkv_thumb_stream(self, media_path):
|
||||
"""get stream idx of thumbnail for mkv files"""
|
||||
streams = self._get_streams(media_path)
|
||||
attachments = [
|
||||
i for i in streams["streams"] if i["codec_type"] == "attachment"
|
||||
]
|
||||
|
||||
for idx, stream in enumerate(attachments):
|
||||
tags = stream["tags"]
|
||||
if "mimetype" in tags and tags["filename"].startswith("cover"):
|
||||
_, ext = os.path.splitext(tags["filename"])
|
||||
return idx, ext
|
||||
|
||||
return None, None
|
||||
|
||||
@staticmethod
|
||||
def dump_mpv_thumb(media_path, idx, thumb_type):
|
||||
"""write cover to disk for mkv"""
|
||||
_, media_ext = os.path.splitext(media_path)
|
||||
new_path = f"{media_path.rstrip(media_ext)}{thumb_type}"
|
||||
subprocess.run(
|
||||
[
|
||||
"ffmpeg",
|
||||
"-v",
|
||||
"quiet",
|
||||
f"-dump_attachment:t:{idx}",
|
||||
new_path,
|
||||
"-i",
|
||||
media_path,
|
||||
],
|
||||
check=False,
|
||||
)
|
||||
|
||||
return new_path
|
||||
|
||||
def get_mp4_thumb_type(self, media_path):
|
||||
"""detect filetype of embedded thumbnail"""
|
||||
streams = self._get_streams(media_path)
|
||||
|
||||
for stream in streams["streams"]:
|
||||
if stream["codec_name"] in ["png", "jpg"]:
|
||||
return stream["codec_name"]
|
||||
|
||||
return False
|
||||
|
||||
def _convert_thumb(self, current_video):
|
||||
"""convert all thumbnails to jpg"""
|
||||
if not current_video["thumb"]:
|
||||
return
|
||||
|
||||
thumb_path = current_video["thumb"]
|
||||
|
||||
base_path, ext = os.path.splitext(thumb_path)
|
||||
if ext == ".jpg":
|
||||
return
|
||||
|
||||
new_path = f"{base_path}.jpg"
|
||||
img_raw = Image.open(thumb_path)
|
||||
img_raw.convert("RGB").save(new_path)
|
||||
|
||||
os.remove(thumb_path)
|
||||
current_video["thumb"] = new_path
|
||||
|
||||
def _get_subtitles(self, current_video):
|
||||
"""find all subtitles in media file"""
|
||||
if current_video["subtitle"]:
|
||||
return
|
||||
|
||||
media_path = current_video["media"]
|
||||
streams = self._get_streams(media_path)
|
||||
base_path, ext = os.path.splitext(media_path)
|
||||
|
||||
if ext == ".webm":
|
||||
print(f"{media_path}: subtitle extract from webm not supported")
|
||||
return
|
||||
|
||||
for idx, stream in enumerate(streams["streams"]):
|
||||
if stream["codec_type"] == "subtitle":
|
||||
lang = ISO639Utils.long2short(stream["tags"]["language"])
|
||||
sub_path = f"{base_path}.{lang}.vtt"
|
||||
self._dump_subtitle(idx, media_path, sub_path)
|
||||
current_video["subtitle"].append(sub_path)
|
||||
|
||||
@staticmethod
|
||||
def _dump_subtitle(idx, media_path, sub_path):
|
||||
"""extract subtitle from media file"""
|
||||
subprocess.run(
|
||||
["ffmpeg", "-i", media_path, "-map", f"0:{idx}", sub_path],
|
||||
check=True,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _get_streams(media_path):
|
||||
"""return all streams from media_path"""
|
||||
streams_raw = subprocess.run(
|
||||
[
|
||||
"ffprobe",
|
||||
"-v",
|
||||
"error",
|
||||
"-show_streams",
|
||||
"-print_format",
|
||||
"json",
|
||||
media_path,
|
||||
],
|
||||
capture_output=True,
|
||||
check=True,
|
||||
)
|
||||
streams = json.loads(streams_raw.stdout.decode())
|
||||
|
||||
return streams
|
||||
|
||||
@staticmethod
|
||||
def dump_mp4_thumb(media_path, thumb_type):
|
||||
"""save cover to disk"""
|
||||
_, ext = os.path.splitext(media_path)
|
||||
new_path = f"{media_path.rstrip(ext)}.{thumb_type}"
|
||||
|
||||
subprocess.run(
|
||||
[
|
||||
"ffmpeg",
|
||||
"-i",
|
||||
media_path,
|
||||
"-map",
|
||||
"0:v",
|
||||
"-map",
|
||||
"-0:V",
|
||||
"-c",
|
||||
"copy",
|
||||
new_path,
|
||||
],
|
||||
check=True,
|
||||
)
|
||||
|
||||
return new_path
|
||||
|
||||
def _convert_video(self, current_video):
|
||||
"""convert if needed"""
|
||||
current_path = current_video["media"]
|
||||
base_path, ext = os.path.splitext(current_path)
|
||||
if ext == ".mp4":
|
||||
return
|
||||
|
||||
new_path = base_path + ".mp4"
|
||||
subprocess.run(
|
||||
[
|
||||
"ffmpeg",
|
||||
"-i",
|
||||
current_path,
|
||||
new_path,
|
||||
"-loglevel",
|
||||
"warning",
|
||||
"-stats",
|
||||
],
|
||||
check=True,
|
||||
)
|
||||
current_video["media"] = new_path
|
||||
os.remove(current_path)
|
||||
|
||||
|
||||
class ManualImport:
|
||||
"""import single identified video"""
|
||||
|
||||
def __init__(self, current_video, config):
|
||||
self.current_video = current_video
|
||||
self.config = config
|
||||
|
||||
def run(self):
|
||||
"""run all"""
|
||||
json_data = self.index_metadata()
|
||||
self._move_to_archive(json_data)
|
||||
self._cleanup(json_data)
|
||||
|
||||
def index_metadata(self):
|
||||
"""get metadata from yt or json"""
|
||||
video_id = self.current_video["video_id"]
|
||||
video = YoutubeVideo(video_id)
|
||||
video.build_json(
|
||||
youtube_meta_overwrite=self._get_info_json(),
|
||||
media_path=self.current_video["media"],
|
||||
)
|
||||
if not video.json_data:
|
||||
print(f"{video_id}: manual import failed, and no metadata found.")
|
||||
raise ValueError
|
||||
|
||||
video.check_subtitles(subtitle_files=self.current_video["subtitle"])
|
||||
video.upload_to_es()
|
||||
|
||||
if video.offline_import and self.current_video["thumb"]:
|
||||
old_path = self.current_video["thumb"]
|
||||
thumbs = ThumbManager(video_id)
|
||||
new_path = thumbs.vid_thumb_path(absolute=True, create_folder=True)
|
||||
shutil.move(old_path, new_path, copy_function=shutil.copyfile)
|
||||
else:
|
||||
url = video.json_data["vid_thumb_url"]
|
||||
ThumbManager(video_id).download_video_thumb(url)
|
||||
|
||||
return video.json_data
|
||||
|
||||
def _get_info_json(self):
|
||||
"""read info_json from file"""
|
||||
if not self.current_video["metadata"]:
|
||||
return False
|
||||
|
||||
with open(self.current_video["metadata"], "r", encoding="utf-8") as f:
|
||||
info_json = json.loads(f.read())
|
||||
|
||||
return info_json
|
||||
|
||||
def _move_to_archive(self, json_data):
|
||||
"""move identified media file to archive"""
|
||||
videos = self.config["application"]["videos"]
|
||||
|
||||
channel, file = os.path.split(json_data["media_url"])
|
||||
channel_folder = os.path.join(videos, channel)
|
||||
if not os.path.exists(channel_folder):
|
||||
os.makedirs(channel_folder)
|
||||
|
||||
old_path = self.current_video["media"]
|
||||
new_path = os.path.join(channel_folder, file)
|
||||
shutil.move(old_path, new_path, copy_function=shutil.copyfile)
|
||||
|
||||
base_name, _ = os.path.splitext(new_path)
|
||||
for old_path in self.current_video["subtitle"]:
|
||||
lang = old_path.split(".")[-2]
|
||||
new_path = f"{base_name}.{lang}.vtt"
|
||||
shutil.move(old_path, new_path, copy_function=shutil.copyfile)
|
||||
|
||||
def _cleanup(self, json_data):
|
||||
"""cleanup leftover files"""
|
||||
meta_data = self.current_video["metadata"]
|
||||
if meta_data and os.path.exists(meta_data):
|
||||
os.remove(meta_data)
|
||||
|
||||
thumb = self.current_video["thumb"]
|
||||
if thumb and os.path.exists(thumb):
|
||||
os.remove(thumb)
|
||||
|
||||
for subtitle_file in self.current_video["subtitle"]:
|
||||
if os.path.exists(subtitle_file):
|
||||
os.remove(subtitle_file)
|
||||
|
||||
channel_info = os.path.join(
|
||||
self.config["application"]["cache_dir"],
|
||||
"import",
|
||||
f"{json_data['channel']['channel_id']}.info.json",
|
||||
)
|
||||
if os.path.exists(channel_info):
|
||||
os.remove(channel_info)
|
||||
|
||||
|
||||
def scan_filesystem():
|
||||
"""grouped function to delete and update index"""
|
||||
filesystem_handler = FilesystemScanner()
|
||||
filesystem_handler.list_comarison()
|
||||
if filesystem_handler.to_rename:
|
||||
print("renaming files")
|
||||
filesystem_handler.rename_files()
|
||||
if filesystem_handler.mismatch:
|
||||
print("fixing media urls in index")
|
||||
filesystem_handler.send_mismatch_bulk()
|
||||
if filesystem_handler.to_delete:
|
||||
print("delete metadata from index")
|
||||
filesystem_handler.delete_from_index()
|
||||
if filesystem_handler.to_index:
|
||||
print("index new videos")
|
||||
video_ids = [i[2] for i in filesystem_handler.to_index]
|
||||
for youtube_id in video_ids:
|
||||
total = len(self.to_index)
|
||||
for idx, youtube_id in enumerate(self.to_index):
|
||||
if self.task:
|
||||
self.task.send_progress(
|
||||
message_lines=[
|
||||
f"Index missing video {youtube_id}, {idx + 1}/{total}"
|
||||
],
|
||||
progress=(idx + 1) / total,
|
||||
)
|
||||
index_new_video(youtube_id)
|
||||
|
||||
CommentList(video_ids).index()
|
||||
CommentList(self.to_index, task=self.task).index()
|
||||
|
||||
def url_fix(self) -> None:
|
||||
"""
|
||||
update path v0.3.6 to v0.3.7
|
||||
fix url not matching channel-videoid pattern
|
||||
"""
|
||||
bool_must = (
|
||||
"doc['media_url'].value == "
|
||||
+ "(doc['channel.channel_id'].value + '/' + "
|
||||
+ "doc['youtube_id'].value) + '.mp4'"
|
||||
)
|
||||
to_update = (
|
||||
"ctx._source['media_url'] = "
|
||||
+ "ctx._source.channel['channel_id'] + '/' + "
|
||||
+ "ctx._source['youtube_id'] + '.mp4'"
|
||||
)
|
||||
data = {
|
||||
"query": {
|
||||
"bool": {
|
||||
"must_not": [{"script": {"script": {"source": bool_must}}}]
|
||||
}
|
||||
},
|
||||
"script": {"source": to_update},
|
||||
}
|
||||
response, _ = ElasticWrap("ta_video/_update_by_query").post(data=data)
|
||||
updated = response.get("updates")
|
||||
if updated:
|
||||
print(f"updated {updated} bad media_url")
|
||||
if self.task:
|
||||
self.task.send_progress(
|
||||
[f"Updated {updated} wrong media urls."]
|
||||
)
|
||||
|
|
|
@ -8,15 +8,15 @@ import math
|
|||
from home.src.download.yt_dlp_base import YtWrap
|
||||
from home.src.es.connect import ElasticWrap
|
||||
from home.src.ta.config import AppConfig
|
||||
from home.src.ta.ta_redis import RedisArchivist
|
||||
from home.src.ta.users import UserConfig
|
||||
|
||||
|
||||
class YouTubeItem:
|
||||
"""base class for youtube"""
|
||||
|
||||
es_path = False
|
||||
index_name = False
|
||||
yt_base = False
|
||||
index_name = ""
|
||||
yt_base = ""
|
||||
yt_obs = {
|
||||
"skip_download": True,
|
||||
"noplaylist": True,
|
||||
|
@ -24,18 +24,26 @@ class YouTubeItem:
|
|||
|
||||
def __init__(self, youtube_id):
|
||||
self.youtube_id = youtube_id
|
||||
self.es_path = f"{self.index_name}/_doc/{youtube_id}"
|
||||
self.config = AppConfig().config
|
||||
self.app_conf = self.config["application"]
|
||||
self.youtube_meta = False
|
||||
self.json_data = False
|
||||
|
||||
def build_yt_url(self):
|
||||
"""build youtube url"""
|
||||
return self.yt_base + self.youtube_id
|
||||
|
||||
def get_from_youtube(self):
|
||||
"""use yt-dlp to get meta data from youtube"""
|
||||
print(f"{self.youtube_id}: get metadata from youtube")
|
||||
url = self.yt_base + self.youtube_id
|
||||
response = YtWrap(self.yt_obs, self.config).extract(url)
|
||||
obs_request = self.yt_obs.copy()
|
||||
if self.config["downloads"]["extractor_lang"]:
|
||||
langs = self.config["downloads"]["extractor_lang"]
|
||||
langs_list = [i.strip() for i in langs.split(",")]
|
||||
obs_request["extractor_args"] = {"youtube": {"lang": langs_list}}
|
||||
|
||||
self.youtube_meta = response
|
||||
url = self.build_yt_url()
|
||||
self.youtube_meta = YtWrap(obs_request, self.config).extract(url)
|
||||
|
||||
def get_from_es(self):
|
||||
"""get indexed data from elastic search"""
|
||||
|
@ -91,13 +99,7 @@ class Pagination:
|
|||
|
||||
def get_page_size(self):
|
||||
"""get default or user modified page_size"""
|
||||
key = f"{self.request.user.id}:page_size"
|
||||
page_size = RedisArchivist().get_message(key)["status"]
|
||||
if not page_size:
|
||||
config = AppConfig().config
|
||||
page_size = config["archive"]["page_size"]
|
||||
|
||||
return page_size
|
||||
return UserConfig(self.request.user.id).get_value("page_size")
|
||||
|
||||
def first_guess(self):
|
||||
"""build first guess before api call"""
|
||||
|
|
|
@ -0,0 +1,481 @@
|
|||
"""
|
||||
Functionality:
|
||||
- Handle manual import task
|
||||
- Scan and identify media files in import folder
|
||||
- Process import media files
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
|
||||
from home.src.download.thumbnails import ThumbManager
|
||||
from home.src.index.comments import CommentList
|
||||
from home.src.index.video import YoutubeVideo
|
||||
from home.src.ta.config import AppConfig
|
||||
from home.src.ta.helper import ignore_filelist
|
||||
from home.src.ta.settings import EnvironmentSettings
|
||||
from PIL import Image
|
||||
from yt_dlp.utils import ISO639Utils
|
||||
|
||||
|
||||
class ImportFolderScanner:
|
||||
"""import and indexing existing video files
|
||||
- identify all media files belonging to a video
|
||||
- identify youtube id
|
||||
- convert if needed
|
||||
"""
|
||||
|
||||
CONFIG = AppConfig().config
|
||||
CACHE_DIR = EnvironmentSettings.CACHE_DIR
|
||||
IMPORT_DIR = os.path.join(CACHE_DIR, "import")
|
||||
|
||||
"""All extensions should be in lowercase until better handling is in place.
|
||||
Described in Issue #502.
|
||||
"""
|
||||
EXT_MAP = {
|
||||
"media": [".mp4", ".mkv", ".webm"],
|
||||
"metadata": [".json"],
|
||||
"thumb": [".jpg", ".png", ".webp"],
|
||||
"subtitle": [".vtt"],
|
||||
}
|
||||
|
||||
def __init__(self, task=False):
|
||||
self.task = task
|
||||
self.to_import = False
|
||||
|
||||
def scan(self):
|
||||
"""scan and match media files"""
|
||||
if self.task:
|
||||
self.task.send_progress(["Scanning your import folder."])
|
||||
|
||||
all_files = self.get_all_files()
|
||||
self.match_files(all_files)
|
||||
self.process_videos()
|
||||
|
||||
return self.to_import
|
||||
|
||||
def get_all_files(self):
|
||||
"""get all files in /import"""
|
||||
rel_paths = ignore_filelist(os.listdir(self.IMPORT_DIR))
|
||||
all_files = [os.path.join(self.IMPORT_DIR, i) for i in rel_paths]
|
||||
all_files.sort()
|
||||
|
||||
return all_files
|
||||
|
||||
@staticmethod
|
||||
def _get_template():
|
||||
"""base dict for video"""
|
||||
return {
|
||||
"media": False,
|
||||
"video_id": False,
|
||||
"metadata": False,
|
||||
"thumb": False,
|
||||
"subtitle": [],
|
||||
}
|
||||
|
||||
def match_files(self, all_files):
|
||||
"""loop through all files, join what matches"""
|
||||
self.to_import = []
|
||||
|
||||
current_video = self._get_template()
|
||||
last_base = False
|
||||
|
||||
for file_path in all_files:
|
||||
base_name, ext = self._detect_base_name(file_path)
|
||||
key, file_path = self._detect_type(file_path, ext)
|
||||
if not key or not file_path:
|
||||
continue
|
||||
|
||||
if base_name != last_base:
|
||||
if last_base:
|
||||
print(f"manual import: {current_video}")
|
||||
self.to_import.append(current_video)
|
||||
|
||||
current_video = self._get_template()
|
||||
last_base = base_name
|
||||
|
||||
if key == "subtitle":
|
||||
current_video["subtitle"].append(file_path)
|
||||
else:
|
||||
current_video[key] = file_path
|
||||
|
||||
if current_video.get("media"):
|
||||
print(f"manual import: {current_video}")
|
||||
self.to_import.append(current_video)
|
||||
|
||||
def _detect_base_name(self, file_path):
|
||||
"""extract base_name and ext for matching"""
|
||||
base_name_raw, ext = os.path.splitext(file_path)
|
||||
base_name, ext2 = os.path.splitext(base_name_raw)
|
||||
|
||||
if ext2:
|
||||
if ISO639Utils.short2long(ext2.strip(".")) or ext2 == ".info":
|
||||
# valid secondary extension
|
||||
return base_name, ext
|
||||
|
||||
return base_name_raw, ext
|
||||
|
||||
def _detect_type(self, file_path, ext):
|
||||
"""detect metadata type for file"""
|
||||
|
||||
for key, value in self.EXT_MAP.items():
|
||||
if ext.lower() in value:
|
||||
return key, file_path
|
||||
|
||||
return False, False
|
||||
|
||||
def process_videos(self):
|
||||
"""loop through all videos"""
|
||||
for idx, current_video in enumerate(self.to_import):
|
||||
if not current_video["media"]:
|
||||
print(f"{current_video}: no matching media file found.")
|
||||
raise ValueError
|
||||
|
||||
if self.task:
|
||||
self._notify(idx, current_video)
|
||||
|
||||
self._detect_youtube_id(current_video)
|
||||
self._dump_thumb(current_video)
|
||||
self._convert_thumb(current_video)
|
||||
self._get_subtitles(current_video)
|
||||
self._convert_video(current_video)
|
||||
print(f"manual import: {current_video}")
|
||||
|
||||
ManualImport(current_video, self.CONFIG).run()
|
||||
|
||||
video_ids = [i["video_id"] for i in self.to_import]
|
||||
CommentList(video_ids, task=self.task).index()
|
||||
|
||||
def _notify(self, idx, current_video):
|
||||
"""send notification back to task"""
|
||||
filename = os.path.split(current_video["media"])[-1]
|
||||
if len(filename) > 50:
|
||||
filename = filename[:50] + "..."
|
||||
|
||||
message = [
|
||||
f"Import queue processing video {idx + 1}/{len(self.to_import)}",
|
||||
filename,
|
||||
]
|
||||
progress = (idx + 1) / len(self.to_import)
|
||||
self.task.send_progress(message, progress=progress)
|
||||
|
||||
def _detect_youtube_id(self, current_video):
|
||||
"""find video id from filename or json"""
|
||||
youtube_id = self._extract_id_from_filename(current_video["media"])
|
||||
if youtube_id:
|
||||
current_video["video_id"] = youtube_id
|
||||
return
|
||||
|
||||
youtube_id = self._extract_id_from_json(current_video["metadata"])
|
||||
if youtube_id:
|
||||
current_video["video_id"] = youtube_id
|
||||
return
|
||||
|
||||
raise ValueError("failed to find video id")
|
||||
|
||||
@staticmethod
|
||||
def _extract_id_from_filename(file_name):
|
||||
"""
|
||||
look at the file name for the youtube id
|
||||
expects filename ending in [<youtube_id>].<ext>
|
||||
"""
|
||||
base_name, _ = os.path.splitext(file_name)
|
||||
id_search = re.search(r"\[([a-zA-Z0-9_-]{11})\]$", base_name)
|
||||
if id_search:
|
||||
youtube_id = id_search.group(1)
|
||||
return youtube_id
|
||||
|
||||
print(f"id extraction failed from filename: {file_name}")
|
||||
|
||||
return False
|
||||
|
||||
def _extract_id_from_json(self, json_file):
|
||||
"""open json file and extract id"""
|
||||
json_path = os.path.join(self.CACHE_DIR, "import", json_file)
|
||||
with open(json_path, "r", encoding="utf-8") as f:
|
||||
json_content = f.read()
|
||||
|
||||
youtube_id = json.loads(json_content)["id"]
|
||||
|
||||
return youtube_id
|
||||
|
||||
def _dump_thumb(self, current_video):
|
||||
"""extract embedded thumb before converting"""
|
||||
if current_video["thumb"]:
|
||||
return
|
||||
|
||||
media_path = current_video["media"]
|
||||
_, ext = os.path.splitext(media_path)
|
||||
|
||||
new_path = False
|
||||
if ext == ".mkv":
|
||||
idx, thumb_type = self._get_mkv_thumb_stream(media_path)
|
||||
if idx is not None:
|
||||
new_path = self.dump_mpv_thumb(media_path, idx, thumb_type)
|
||||
|
||||
elif ext == ".mp4":
|
||||
thumb_type = self.get_mp4_thumb_type(media_path)
|
||||
if thumb_type:
|
||||
new_path = self.dump_mp4_thumb(media_path, thumb_type)
|
||||
|
||||
if new_path:
|
||||
current_video["thumb"] = new_path
|
||||
|
||||
def _get_mkv_thumb_stream(self, media_path):
|
||||
"""get stream idx of thumbnail for mkv files"""
|
||||
streams = self._get_streams(media_path)
|
||||
attachments = [
|
||||
i for i in streams["streams"] if i["codec_type"] == "attachment"
|
||||
]
|
||||
|
||||
for idx, stream in enumerate(attachments):
|
||||
tags = stream["tags"]
|
||||
if "mimetype" in tags and tags["filename"].startswith("cover"):
|
||||
_, ext = os.path.splitext(tags["filename"])
|
||||
return idx, ext
|
||||
|
||||
return None, None
|
||||
|
||||
@staticmethod
|
||||
def dump_mpv_thumb(media_path, idx, thumb_type):
|
||||
"""write cover to disk for mkv"""
|
||||
_, media_ext = os.path.splitext(media_path)
|
||||
new_path = f"{media_path.rstrip(media_ext)}{thumb_type}"
|
||||
subprocess.run(
|
||||
[
|
||||
"ffmpeg",
|
||||
"-v",
|
||||
"quiet",
|
||||
f"-dump_attachment:t:{idx}",
|
||||
new_path,
|
||||
"-i",
|
||||
media_path,
|
||||
],
|
||||
check=False,
|
||||
)
|
||||
|
||||
return new_path
|
||||
|
||||
def get_mp4_thumb_type(self, media_path):
|
||||
"""detect filetype of embedded thumbnail"""
|
||||
streams = self._get_streams(media_path)
|
||||
|
||||
for stream in streams["streams"]:
|
||||
if stream["codec_name"] in ["png", "jpg"]:
|
||||
return stream["codec_name"]
|
||||
|
||||
return False
|
||||
|
||||
def _convert_thumb(self, current_video):
|
||||
"""convert all thumbnails to jpg"""
|
||||
if not current_video["thumb"]:
|
||||
return
|
||||
|
||||
thumb_path = current_video["thumb"]
|
||||
|
||||
base_path, ext = os.path.splitext(thumb_path)
|
||||
if ext == ".jpg":
|
||||
return
|
||||
|
||||
new_path = f"{base_path}.jpg"
|
||||
img_raw = Image.open(thumb_path)
|
||||
img_raw.convert("RGB").save(new_path)
|
||||
|
||||
os.remove(thumb_path)
|
||||
current_video["thumb"] = new_path
|
||||
|
||||
def _get_subtitles(self, current_video):
|
||||
"""find all subtitles in media file"""
|
||||
if current_video["subtitle"]:
|
||||
return
|
||||
|
||||
media_path = current_video["media"]
|
||||
streams = self._get_streams(media_path)
|
||||
base_path, ext = os.path.splitext(media_path)
|
||||
|
||||
if ext == ".webm":
|
||||
print(f"{media_path}: subtitle extract from webm not supported")
|
||||
return
|
||||
|
||||
for idx, stream in enumerate(streams["streams"]):
|
||||
if stream["codec_type"] == "subtitle":
|
||||
lang = ISO639Utils.long2short(stream["tags"]["language"])
|
||||
sub_path = f"{base_path}.{lang}.vtt"
|
||||
self._dump_subtitle(idx, media_path, sub_path)
|
||||
current_video["subtitle"].append(sub_path)
|
||||
|
||||
@staticmethod
|
||||
def _dump_subtitle(idx, media_path, sub_path):
|
||||
"""extract subtitle from media file"""
|
||||
subprocess.run(
|
||||
["ffmpeg", "-i", media_path, "-map", f"0:{idx}", sub_path],
|
||||
check=True,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _get_streams(media_path):
|
||||
"""return all streams from media_path"""
|
||||
streams_raw = subprocess.run(
|
||||
[
|
||||
"ffprobe",
|
||||
"-v",
|
||||
"error",
|
||||
"-show_streams",
|
||||
"-print_format",
|
||||
"json",
|
||||
media_path,
|
||||
],
|
||||
capture_output=True,
|
||||
check=True,
|
||||
)
|
||||
streams = json.loads(streams_raw.stdout.decode())
|
||||
|
||||
return streams
|
||||
|
||||
@staticmethod
|
||||
def dump_mp4_thumb(media_path, thumb_type):
|
||||
"""save cover to disk"""
|
||||
_, ext = os.path.splitext(media_path)
|
||||
new_path = f"{media_path.rstrip(ext)}.{thumb_type}"
|
||||
|
||||
subprocess.run(
|
||||
[
|
||||
"ffmpeg",
|
||||
"-i",
|
||||
media_path,
|
||||
"-map",
|
||||
"0:v",
|
||||
"-map",
|
||||
"-0:V",
|
||||
"-c",
|
||||
"copy",
|
||||
new_path,
|
||||
],
|
||||
check=True,
|
||||
)
|
||||
|
||||
return new_path
|
||||
|
||||
def _convert_video(self, current_video):
|
||||
"""convert if needed"""
|
||||
current_path = current_video["media"]
|
||||
base_path, ext = os.path.splitext(current_path)
|
||||
if ext == ".mp4":
|
||||
return
|
||||
|
||||
new_path = base_path + ".mp4"
|
||||
subprocess.run(
|
||||
[
|
||||
"ffmpeg",
|
||||
"-i",
|
||||
current_path,
|
||||
new_path,
|
||||
"-loglevel",
|
||||
"warning",
|
||||
"-stats",
|
||||
],
|
||||
check=True,
|
||||
)
|
||||
current_video["media"] = new_path
|
||||
os.remove(current_path)
|
||||
|
||||
|
||||
class ManualImport:
|
||||
"""import single identified video"""
|
||||
|
||||
def __init__(self, current_video, config):
|
||||
self.current_video = current_video
|
||||
self.config = config
|
||||
|
||||
def run(self):
|
||||
"""run all"""
|
||||
json_data = self.index_metadata()
|
||||
self._move_to_archive(json_data)
|
||||
self._cleanup(json_data)
|
||||
|
||||
def index_metadata(self):
|
||||
"""get metadata from yt or json"""
|
||||
video_id = self.current_video["video_id"]
|
||||
video = YoutubeVideo(video_id)
|
||||
video.build_json(
|
||||
youtube_meta_overwrite=self._get_info_json(),
|
||||
media_path=self.current_video["media"],
|
||||
)
|
||||
if not video.json_data:
|
||||
print(f"{video_id}: manual import failed, and no metadata found.")
|
||||
raise ValueError
|
||||
|
||||
video.check_subtitles(subtitle_files=self.current_video["subtitle"])
|
||||
video.upload_to_es()
|
||||
|
||||
if video.offline_import and self.current_video["thumb"]:
|
||||
old_path = self.current_video["thumb"]
|
||||
thumbs = ThumbManager(video_id)
|
||||
new_path = thumbs.vid_thumb_path(absolute=True, create_folder=True)
|
||||
shutil.move(old_path, new_path, copy_function=shutil.copyfile)
|
||||
else:
|
||||
url = video.json_data["vid_thumb_url"]
|
||||
ThumbManager(video_id).download_video_thumb(url)
|
||||
|
||||
return video.json_data
|
||||
|
||||
def _get_info_json(self):
|
||||
"""read info_json from file"""
|
||||
if not self.current_video["metadata"]:
|
||||
return False
|
||||
|
||||
with open(self.current_video["metadata"], "r", encoding="utf-8") as f:
|
||||
info_json = json.loads(f.read())
|
||||
|
||||
return info_json
|
||||
|
||||
def _move_to_archive(self, json_data):
|
||||
"""move identified media file to archive"""
|
||||
videos = EnvironmentSettings.MEDIA_DIR
|
||||
host_uid = EnvironmentSettings.HOST_UID
|
||||
host_gid = EnvironmentSettings.HOST_GID
|
||||
|
||||
channel, file = os.path.split(json_data["media_url"])
|
||||
channel_folder = os.path.join(videos, channel)
|
||||
if not os.path.exists(channel_folder):
|
||||
os.makedirs(channel_folder)
|
||||
|
||||
if host_uid and host_gid:
|
||||
os.chown(channel_folder, host_uid, host_gid)
|
||||
|
||||
old_path = self.current_video["media"]
|
||||
new_path = os.path.join(channel_folder, file)
|
||||
shutil.move(old_path, new_path, copy_function=shutil.copyfile)
|
||||
if host_uid and host_gid:
|
||||
os.chown(new_path, host_uid, host_gid)
|
||||
|
||||
base_name, _ = os.path.splitext(new_path)
|
||||
for old_path in self.current_video["subtitle"]:
|
||||
lang = old_path.split(".")[-2]
|
||||
new_path = f"{base_name}.{lang}.vtt"
|
||||
shutil.move(old_path, new_path, copy_function=shutil.copyfile)
|
||||
|
||||
def _cleanup(self, json_data):
|
||||
"""cleanup leftover files"""
|
||||
meta_data = self.current_video["metadata"]
|
||||
if meta_data and os.path.exists(meta_data):
|
||||
os.remove(meta_data)
|
||||
|
||||
thumb = self.current_video["thumb"]
|
||||
if thumb and os.path.exists(thumb):
|
||||
os.remove(thumb)
|
||||
|
||||
for subtitle_file in self.current_video["subtitle"]:
|
||||
if os.path.exists(subtitle_file):
|
||||
os.remove(subtitle_file)
|
||||
|
||||
channel_info = os.path.join(
|
||||
EnvironmentSettings.CACHE_DIR,
|
||||
"import",
|
||||
f"{json_data['channel']['channel_id']}.info.json",
|
||||
)
|
||||
if os.path.exists(channel_info):
|
||||
os.remove(channel_info)
|