basic auth for elasticsearch

This commit is contained in:
simon 2021-10-28 15:49:58 +07:00
parent ebd8368856
commit 254c518505
10 changed files with 131 additions and 49 deletions

View File

@ -17,6 +17,7 @@ services:
- HOST_GID=1000 - HOST_GID=1000
- TA_USERNAME=tubearchivist - TA_USERNAME=tubearchivist
- TA_PASSWORD=verysecret - TA_PASSWORD=verysecret
- ELASTIC_PASSWORD=verysecret
depends_on: depends_on:
- archivist-es - archivist-es
- archivist-redis - archivist-redis
@ -35,6 +36,8 @@ services:
container_name: archivist-es container_name: archivist-es
restart: always restart: always
environment: environment:
- "xpack.security.enabled=true"
- "ELASTIC_PASSWORD=verysecret"
- "discovery.type=single-node" - "discovery.type=single-node"
- "ES_JAVA_OPTS=-Xms512m -Xmx512m" - "ES_JAVA_OPTS=-Xms512m -Xmx512m"
ulimits: ulimits:

6
run.sh
View File

@ -1,8 +1,12 @@
#!/bin/bash #!/bin/bash
# startup script inside the container for tubearchivist # startup script inside the container for tubearchivist
if [[ -z "$ELASTIC_USER" ]]; then
export ELASTIC_USER=elastic
fi
counter=0 counter=0
until curl "$ES_URL" -fs; do until curl -u "$ELASTIC_USER":"$ELASTIC_PASSWORD" "$ES_URL" -fs; do
echo "waiting for elastic search to start" echo "waiting for elastic search to start"
counter=$((counter+1)) counter=$((counter+1))
if [[ $counter -eq 12 ]]; then if [[ $counter -eq 12 ]]; then

View File

@ -51,9 +51,13 @@ class AppConfig:
else: else:
host_gid = False host_gid = False
es_pass = os.environ.get("ELASTIC_PASSWORD")
es_user = os.environ.get("ELASTIC_USER", default="elastic")
application = { application = {
"REDIS_HOST": os.environ.get("REDIS_HOST"), "REDIS_HOST": os.environ.get("REDIS_HOST"),
"es_url": os.environ.get("ES_URL"), "es_url": os.environ.get("ES_URL"),
"es_auth": (es_user, es_pass),
"HOST_UID": host_uid, "HOST_UID": host_uid,
"HOST_GID": host_gid, "HOST_GID": host_gid,
} }

View File

@ -29,6 +29,7 @@ class PendingList:
CONFIG = AppConfig().config CONFIG = AppConfig().config
ES_URL = CONFIG["application"]["es_url"] ES_URL = CONFIG["application"]["es_url"]
ES_AUTH = CONFIG["application"]["es_auth"]
VIDEOS = CONFIG["application"]["videos"] VIDEOS = CONFIG["application"]["videos"]
@staticmethod @staticmethod
@ -107,7 +108,9 @@ class PendingList:
query_str = "\n".join(bulk_list) query_str = "\n".join(bulk_list)
headers = {"Content-type": "application/x-ndjson"} headers = {"Content-type": "application/x-ndjson"}
url = self.ES_URL + "/_bulk" url = self.ES_URL + "/_bulk"
request = requests.post(url, data=query_str, headers=headers) request = requests.post(
url, data=query_str, headers=headers, auth=self.ES_AUTH
)
if not request.ok: if not request.ok:
print(request) print(request)
@ -155,7 +158,7 @@ class PendingList:
headers = {"Content-type": "application/json"} headers = {"Content-type": "application/json"}
# get PIT ID # get PIT ID
url = self.ES_URL + "/ta_download/_pit?keep_alive=1m" url = self.ES_URL + "/ta_download/_pit?keep_alive=1m"
response = requests.post(url) response = requests.post(url, auth=self.ES_AUTH)
json_data = json.loads(response.text) json_data = json.loads(response.text)
pit_id = json_data["id"] pit_id = json_data["id"]
# query # query
@ -170,7 +173,9 @@ class PendingList:
all_pending = [] all_pending = []
all_ignore = [] all_ignore = []
while True: while True:
response = requests.get(url, data=query_str, headers=headers) response = requests.get(
url, data=query_str, headers=headers, auth=self.ES_AUTH
)
json_data = json.loads(response.text) json_data = json.loads(response.text)
all_hits = json_data["hits"]["hits"] all_hits = json_data["hits"]["hits"]
if all_hits: if all_hits:
@ -188,7 +193,12 @@ class PendingList:
break break
# clean up PIT # clean up PIT
query_str = json.dumps({"id": pit_id}) query_str = json.dumps({"id": pit_id})
requests.delete(self.ES_URL + "/_pit", data=query_str, headers=headers) requests.delete(
self.ES_URL + "/_pit",
data=query_str,
headers=headers,
auth=self.ES_AUTH,
)
return all_pending, all_ignore return all_pending, all_ignore
def get_all_indexed(self): def get_all_indexed(self):
@ -196,7 +206,7 @@ class PendingList:
headers = {"Content-type": "application/json"} headers = {"Content-type": "application/json"}
# get PIT ID # get PIT ID
url = self.ES_URL + "/ta_video/_pit?keep_alive=1m" url = self.ES_URL + "/ta_video/_pit?keep_alive=1m"
response = requests.post(url) response = requests.post(url, auth=self.ES_AUTH)
json_data = json.loads(response.text) json_data = json.loads(response.text)
pit_id = json_data["id"] pit_id = json_data["id"]
# query # query
@ -210,7 +220,9 @@ class PendingList:
url = self.ES_URL + "/_search" url = self.ES_URL + "/_search"
all_indexed = [] all_indexed = []
while True: while True:
response = requests.get(url, data=query_str, headers=headers) response = requests.get(
url, data=query_str, headers=headers, auth=self.ES_AUTH
)
json_data = json.loads(response.text) json_data = json.loads(response.text)
all_hits = json_data["hits"]["hits"] all_hits = json_data["hits"]["hits"]
if all_hits: if all_hits:
@ -224,7 +236,12 @@ class PendingList:
break break
# clean up PIT # clean up PIT
query_str = json.dumps({"id": pit_id}) query_str = json.dumps({"id": pit_id})
requests.delete(self.ES_URL + "/_pit", data=query_str, headers=headers) requests.delete(
self.ES_URL + "/_pit",
data=query_str,
headers=headers,
auth=self.ES_AUTH,
)
return all_indexed return all_indexed
def get_all_downloaded(self): def get_all_downloaded(self):
@ -244,7 +261,7 @@ class PendingList:
def delete_from_pending(self, youtube_id): def delete_from_pending(self, youtube_id):
"""delete the youtube_id from ta_download""" """delete the youtube_id from ta_download"""
url = f"{self.ES_URL}/ta_download/_doc/{youtube_id}" url = f"{self.ES_URL}/ta_download/_doc/{youtube_id}"
response = requests.delete(url) response = requests.delete(url, auth=self.ES_AUTH)
if not response.ok: if not response.ok:
print(response.text) print(response.text)
@ -266,7 +283,9 @@ class PendingList:
headers = {"Content-type": "application/x-ndjson"} headers = {"Content-type": "application/x-ndjson"}
url = self.ES_URL + "/_bulk" url = self.ES_URL + "/_bulk"
request = requests.post(url, data=query_str, headers=headers) request = requests.post(
url, data=query_str, headers=headers, auth=self.ES_AUTH
)
mess_dict = { mess_dict = {
"status": "ignore", "status": "ignore",
"level": "info", "level": "info",
@ -284,6 +303,7 @@ class ChannelSubscription:
def __init__(self): def __init__(self):
config = AppConfig().config config = AppConfig().config
self.es_url = config["application"]["es_url"] self.es_url = config["application"]["es_url"]
self.es_auth = config["application"]["es_auth"]
self.channel_size = config["subscriptions"]["channel_size"] self.channel_size = config["subscriptions"]["channel_size"]
def get_channels(self, subscribed_only=True): def get_channels(self, subscribed_only=True):
@ -291,7 +311,7 @@ class ChannelSubscription:
headers = {"Content-type": "application/json"} headers = {"Content-type": "application/json"}
# get PIT ID # get PIT ID
url = self.es_url + "/ta_channel/_pit?keep_alive=1m" url = self.es_url + "/ta_channel/_pit?keep_alive=1m"
response = requests.post(url) response = requests.post(url, auth=self.es_auth)
json_data = json.loads(response.text) json_data = json.loads(response.text)
pit_id = json_data["id"] pit_id = json_data["id"]
# query # query
@ -313,7 +333,9 @@ class ChannelSubscription:
url = self.es_url + "/_search" url = self.es_url + "/_search"
all_channels = [] all_channels = []
while True: while True:
response = requests.get(url, data=query_str, headers=headers) response = requests.get(
url, data=query_str, headers=headers, auth=self.es_auth
)
json_data = json.loads(response.text) json_data = json.loads(response.text)
all_hits = json_data["hits"]["hits"] all_hits = json_data["hits"]["hits"]
if all_hits: if all_hits:
@ -328,7 +350,12 @@ class ChannelSubscription:
break break
# clean up PIT # clean up PIT
query_str = json.dumps({"id": pit_id}) query_str = json.dumps({"id": pit_id})
requests.delete(self.es_url + "/_pit", data=query_str, headers=headers) requests.delete(
self.es_url + "/_pit",
data=query_str,
headers=headers,
auth=self.es_auth,
)
return all_channels return all_channels
def get_last_youtube_videos(self, channel_id, limit=True): def get_last_youtube_videos(self, channel_id, limit=True):
@ -394,7 +421,9 @@ class ChannelSubscription:
url = self.es_url + "/ta_channel/_update/" + channel_id url = self.es_url + "/ta_channel/_update/" + channel_id
payload = json.dumps({"doc": channel_dict}) payload = json.dumps({"doc": channel_dict})
# update channel # update channel
request = requests.post(url, data=payload, headers=headers) request = requests.post(
url, data=payload, headers=headers, auth=self.es_auth
)
if not request.ok: if not request.ok:
print(request.text) print(request.text)
# sync to videos # sync to videos
@ -602,7 +631,8 @@ class VideoDownloader:
def delete_from_pending(self, youtube_id): def delete_from_pending(self, youtube_id):
"""delete downloaded video from pending index if its there""" """delete downloaded video from pending index if its there"""
es_url = self.config["application"]["es_url"] es_url = self.config["application"]["es_url"]
es_auth = self.config["application"]["es_auth"]
url = f"{es_url}/ta_download/_doc/{youtube_id}" url = f"{es_url}/ta_download/_doc/{youtube_id}"
response = requests.delete(url) response = requests.delete(url, auth=es_auth)
if not response.ok and not response.status_code == 404: if not response.ok and not response.status_code == 404:
print(response.text) print(response.text)

View File

@ -14,13 +14,13 @@ import redis
import requests import requests
def get_total_hits(index, es_url, match_field): def get_total_hits(index, es_url, es_auth, match_field):
"""get total hits from index""" """get total hits from index"""
headers = {"Content-type": "application/json"} headers = {"Content-type": "application/json"}
data = {"query": {"match": {match_field: True}}} data = {"query": {"match": {match_field: True}}}
payload = json.dumps(data) payload = json.dumps(data)
url = f"{es_url}/{index}/_search?filter_path=hits.total" url = f"{es_url}/{index}/_search?filter_path=hits.total"
request = requests.post(url, data=payload, headers=headers) request = requests.post(url, data=payload, headers=headers, auth=es_auth)
if not request.ok: if not request.ok:
print(request.text) print(request.text)
total_json = json.loads(request.text) total_json = json.loads(request.text)

View File

@ -24,6 +24,7 @@ class YoutubeChannel:
CONFIG = AppConfig().config CONFIG = AppConfig().config
ES_URL = CONFIG["application"]["es_url"] ES_URL = CONFIG["application"]["es_url"]
ES_AUTH = CONFIG["application"]["es_auth"]
CACHE_DIR = CONFIG["application"]["cache_dir"] CACHE_DIR = CONFIG["application"]["cache_dir"]
VIDEOS = CONFIG["application"]["videos"] VIDEOS = CONFIG["application"]["videos"]
@ -51,7 +52,7 @@ class YoutubeChannel:
"""get from elastic search first if possible""" """get from elastic search first if possible"""
channel_id = self.channel_id channel_id = self.channel_id
url = f"{self.ES_URL}/ta_channel/_doc/{channel_id}" url = f"{self.ES_URL}/ta_channel/_doc/{channel_id}"
response = requests.get(url) response = requests.get(url, auth=self.ES_AUTH)
if response.ok: if response.ok:
channel_source = response.json()["_source"] channel_source = response.json()["_source"]
self.source = "elastic" self.source = "elastic"
@ -63,7 +64,7 @@ class YoutubeChannel:
channel_id = self.channel_id channel_id = self.channel_id
url = f"https://www.youtube.com/channel/{channel_id}/about?hl=en" url = f"https://www.youtube.com/channel/{channel_id}/about?hl=en"
cookies = {"CONSENT": "YES+xxxxxxxxxxxxxxxxxxxxxxxxxxx"} cookies = {"CONSENT": "YES+xxxxxxxxxxxxxxxxxxxxxxxxxxx"}
response = requests.get(url, cookies=cookies) response = requests.get(url, cookies=cookies, auth=self.ES_AUTH)
if response.ok: if response.ok:
channel_page = response.text channel_page = response.text
else: else:
@ -166,7 +167,7 @@ class YoutubeChannel:
def upload_to_es(self): def upload_to_es(self):
"""upload channel data to elastic search""" """upload channel data to elastic search"""
url = f"{self.ES_URL}/ta_channel/_doc/{self.channel_id}" url = f"{self.ES_URL}/ta_channel/_doc/{self.channel_id}"
response = requests.put(url, json=self.channel_dict) response = requests.put(url, json=self.channel_dict, auth=self.ES_AUTH)
print(f"added {self.channel_id} to es") print(f"added {self.channel_id} to es")
if not response.ok: if not response.ok:
print(response.text) print(response.text)
@ -183,14 +184,18 @@ class YoutubeChannel:
data = {"description": channel_id, "processors": processors} data = {"description": channel_id, "processors": processors}
payload = json.dumps(data) payload = json.dumps(data)
url = self.ES_URL + "/_ingest/pipeline/" + channel_id url = self.ES_URL + "/_ingest/pipeline/" + channel_id
request = requests.put(url, data=payload, headers=headers) request = requests.put(
url, data=payload, headers=headers, auth=self.ES_AUTH
)
if not request.ok: if not request.ok:
print(request.text) print(request.text)
# apply pipeline # apply pipeline
data = {"query": {"match": {"channel.channel_id": channel_id}}} data = {"query": {"match": {"channel.channel_id": channel_id}}}
payload = json.dumps(data) payload = json.dumps(data)
url = self.ES_URL + "/ta_video/_update_by_query?pipeline=" + channel_id url = self.ES_URL + "/ta_video/_update_by_query?pipeline=" + channel_id
request = requests.post(url, data=payload, headers=headers) request = requests.post(
url, data=payload, headers=headers, auth=self.ES_AUTH
)
if not request.ok: if not request.ok:
print(request.text) print(request.text)
@ -211,7 +216,9 @@ class YoutubeChannel:
} }
payload = json.dumps(data) payload = json.dumps(data)
url = self.ES_URL + "/ta_video/_delete_by_query" url = self.ES_URL + "/ta_video/_delete_by_query"
response = requests.post(url, data=payload, headers=headers) response = requests.post(
url, data=payload, headers=headers, auth=self.ES_AUTH
)
if not response.ok: if not response.ok:
print(response.text) print(response.text)
@ -230,7 +237,7 @@ class YoutubeChannel:
print("delete indexed videos") print("delete indexed videos")
self.delete_es_videos() self.delete_es_videos()
url = self.ES_URL + "/ta_channel/_doc/" + self.channel_id url = self.ES_URL + "/ta_channel/_doc/" + self.channel_id
response = requests.delete(url) response = requests.delete(url, auth=self.ES_AUTH)
if not response.ok: if not response.ok:
print(response.text) print(response.text)
@ -240,6 +247,7 @@ class YoutubeVideo:
CONFIG = AppConfig().config CONFIG = AppConfig().config
ES_URL = CONFIG["application"]["es_url"] ES_URL = CONFIG["application"]["es_url"]
ES_AUTH = CONFIG["application"]["es_auth"]
CACHE_DIR = CONFIG["application"]["cache_dir"] CACHE_DIR = CONFIG["application"]["cache_dir"]
VIDEOS = CONFIG["application"]["videos"] VIDEOS = CONFIG["application"]["videos"]
@ -360,7 +368,7 @@ class YoutubeVideo:
def get_es_data(self): def get_es_data(self):
"""get current data from elastic search""" """get current data from elastic search"""
url = self.ES_URL + "/ta_video/_doc/" + self.youtube_id url = self.ES_URL + "/ta_video/_doc/" + self.youtube_id
response = requests.get(url) response = requests.get(url, auth=self.ES_AUTH)
if not response.ok: if not response.ok:
print(response.text) print(response.text)
es_vid_dict = json.loads(response.text) es_vid_dict = json.loads(response.text)
@ -369,7 +377,7 @@ class YoutubeVideo:
def upload_to_es(self): def upload_to_es(self):
"""upload channel data to elastic search""" """upload channel data to elastic search"""
url = f"{self.ES_URL}/ta_video/_doc/{self.youtube_id}" url = f"{self.ES_URL}/ta_video/_doc/{self.youtube_id}"
response = requests.put(url, json=self.vid_dict) response = requests.put(url, json=self.vid_dict, auth=self.ES_AUTH)
if not response.ok: if not response.ok:
print(response.text) print(response.text)
@ -380,7 +388,9 @@ class YoutubeVideo:
url = f"{self.ES_URL}/ta_video/_update/{youtube_id}" url = f"{self.ES_URL}/ta_video/_update/{youtube_id}"
data = {"script": "ctx._source.active = false"} data = {"script": "ctx._source.active = false"}
json_str = json.dumps(data) json_str = json.dumps(data)
response = requests.post(url, data=json_str, headers=headers) response = requests.post(
url, data=json_str, headers=headers, auth=self.ES_AUTH
)
print(f"deactivated {youtube_id}") print(f"deactivated {youtube_id}")
if not response.ok: if not response.ok:
print(response.text) print(response.text)
@ -395,7 +405,7 @@ class YoutubeVideo:
os.remove(to_delete) os.remove(to_delete)
# delete from index # delete from index
url = f"{self.ES_URL}/ta_video/_doc/{self.youtube_id}" url = f"{self.ES_URL}/ta_video/_doc/{self.youtube_id}"
response = requests.delete(url) response = requests.delete(url, auth=self.ES_AUTH)
if not response.ok: if not response.ok:
print(response.text) print(response.text)
# delete thumbs from cache # delete thumbs from cache
@ -407,6 +417,7 @@ class WatchState:
CONFIG = AppConfig().config CONFIG = AppConfig().config
ES_URL = CONFIG["application"]["es_url"] ES_URL = CONFIG["application"]["es_url"]
ES_AUTH = CONFIG["application"]["es_auth"]
HEADERS = {"Content-type": "application/json"} HEADERS = {"Content-type": "application/json"}
def __init__(self, youtube_id): def __init__(self, youtube_id):
@ -450,7 +461,9 @@ class WatchState:
data["doc"]["player"]["watched"] = False data["doc"]["player"]["watched"] = False
payload = json.dumps(data) payload = json.dumps(data)
request = requests.post(url, data=payload, headers=self.HEADERS) request = requests.post(
url, data=payload, headers=self.HEADERS, auth=self.ES_AUTH
)
if not request.ok: if not request.ok:
print(request.text) print(request.text)
@ -472,7 +485,9 @@ class WatchState:
payload = json.dumps(data) payload = json.dumps(data)
url = f"{es_url}/_ingest/pipeline/{youtube_id}" url = f"{es_url}/_ingest/pipeline/{youtube_id}"
request = requests.put(url, data=payload, headers=headers) request = requests.put(
url, data=payload, headers=headers, auth=self.ES_AUTH
)
if not request.ok: if not request.ok:
print(request.text) print(request.text)
raise ValueError("failed to post ingest pipeline") raise ValueError("failed to post ingest pipeline")
@ -485,7 +500,9 @@ class WatchState:
data = {"query": {"bool": {"must": must_list}}} data = {"query": {"bool": {"must": must_list}}}
payload = json.dumps(data) payload = json.dumps(data)
url = f"{es_url}/ta_video/_update_by_query?pipeline={youtube_id}" url = f"{es_url}/ta_video/_update_by_query?pipeline={youtube_id}"
request = requests.post(url, data=payload, headers=headers) request = requests.post(
url, data=payload, headers=headers, auth=self.ES_AUTH
)
if not request.ok: if not request.ok:
print(request.text) print(request.text)

View File

@ -164,6 +164,7 @@ class ElasticIndex:
CONFIG = AppConfig().config CONFIG = AppConfig().config
ES_URL = CONFIG["application"]["es_url"] ES_URL = CONFIG["application"]["es_url"]
ES_AUTH = CONFIG["application"]["es_auth"]
HEADERS = {"Content-type": "application/json"} HEADERS = {"Content-type": "application/json"}
def __init__(self, index_name, expected_map, expected_set): def __init__(self, index_name, expected_map, expected_set):
@ -176,7 +177,7 @@ class ElasticIndex:
"""check if index already exists and return mapping if it does""" """check if index already exists and return mapping if it does"""
index_name = self.index_name index_name = self.index_name
url = f"{self.ES_URL}/ta_{index_name}" url = f"{self.ES_URL}/ta_{index_name}"
response = requests.get(url) response = requests.get(url, auth=self.ES_AUTH)
exists = response.ok exists = response.ok
if exists: if exists:
@ -274,7 +275,9 @@ class ElasticIndex:
query = {"source": {"index": source}, "dest": {"index": destination}} query = {"source": {"index": source}, "dest": {"index": destination}}
data = json.dumps(query) data = json.dumps(query)
url = self.ES_URL + "/_reindex?refresh=true" url = self.ES_URL + "/_reindex?refresh=true"
response = requests.post(url=url, data=data, headers=self.HEADERS) response = requests.post(
url=url, data=data, headers=self.HEADERS, auth=self.ES_AUTH
)
if not response.ok: if not response.ok:
print(response.text) print(response.text)
@ -284,7 +287,7 @@ class ElasticIndex:
url = f"{self.ES_URL}/ta_{self.index_name}_backup" url = f"{self.ES_URL}/ta_{self.index_name}_backup"
else: else:
url = f"{self.ES_URL}/ta_{self.index_name}" url = f"{self.ES_URL}/ta_{self.index_name}"
response = requests.delete(url) response = requests.delete(url, auth=self.ES_AUTH)
if not response.ok: if not response.ok:
print(response.text) print(response.text)
@ -301,7 +304,9 @@ class ElasticIndex:
# create # create
url = f"{self.ES_URL}/ta_{self.index_name}" url = f"{self.ES_URL}/ta_{self.index_name}"
data = json.dumps(payload) data = json.dumps(payload)
response = requests.put(url=url, data=data, headers=self.HEADERS) response = requests.put(
url=url, data=data, headers=self.HEADERS, auth=self.ES_AUTH
)
if not response.ok: if not response.ok:
print(response.text) print(response.text)
@ -319,9 +324,10 @@ class ElasticBackup:
"""export all documents of a single index""" """export all documents of a single index"""
headers = {"Content-type": "application/json"} headers = {"Content-type": "application/json"}
es_url = self.config["application"]["es_url"] es_url = self.config["application"]["es_url"]
es_auth = self.config["application"]["es_auth"]
# get PIT ID # get PIT ID
url = f"{es_url}/ta_{index_name}/_pit?keep_alive=1m" url = f"{es_url}/ta_{index_name}/_pit?keep_alive=1m"
response = requests.post(url) response = requests.post(url, auth=es_auth)
json_data = json.loads(response.text) json_data = json.loads(response.text)
pit_id = json_data["id"] pit_id = json_data["id"]
# build query # build query
@ -336,7 +342,9 @@ class ElasticBackup:
# loop until nothing left # loop until nothing left
all_results = [] all_results = []
while True: while True:
response = requests.get(url, data=query_str, headers=headers) response = requests.get(
url, data=query_str, headers=headers, auth=es_auth
)
json_data = json.loads(response.text) json_data = json.loads(response.text)
all_hits = json_data["hits"]["hits"] all_hits = json_data["hits"]["hits"]
if all_hits: if all_hits:
@ -350,7 +358,9 @@ class ElasticBackup:
break break
# clean up PIT # clean up PIT
query_str = json.dumps({"id": pit_id}) query_str = json.dumps({"id": pit_id})
requests.delete(es_url + "/_pit", data=query_str, headers=headers) requests.delete(
es_url + "/_pit", data=query_str, headers=headers, auth=es_auth
)
return all_results return all_results
@ -416,6 +426,7 @@ class ElasticBackup:
"""send bulk to es""" """send bulk to es"""
cache_dir = self.config["application"]["cache_dir"] cache_dir = self.config["application"]["cache_dir"]
es_url = self.config["application"]["es_url"] es_url = self.config["application"]["es_url"]
es_auth = self.config["application"]["es_auth"]
headers = {"Content-type": "application/x-ndjson"} headers = {"Content-type": "application/x-ndjson"}
file_path = os.path.join(cache_dir, file_name) file_path = os.path.join(cache_dir, file_name)
@ -426,7 +437,9 @@ class ElasticBackup:
return return
url = es_url + "/_bulk" url = es_url + "/_bulk"
request = requests.post(url, data=query_str, headers=headers) request = requests.post(
url, data=query_str, headers=headers, auth=es_auth
)
if not request.ok: if not request.ok:
print(request.text) print(request.text)

View File

@ -35,6 +35,7 @@ class Reindex:
config = AppConfig().config config = AppConfig().config
self.sleep_interval = config["downloads"]["sleep_interval"] self.sleep_interval = config["downloads"]["sleep_interval"]
self.es_url = config["application"]["es_url"] self.es_url = config["application"]["es_url"]
self.es_auth = config["application"]["auth"]
self.refresh_interval = 90 self.refresh_interval = 90
# scan # scan
self.video_daily, self.channel_daily = self.get_daily() self.video_daily, self.channel_daily = self.get_daily()
@ -43,10 +44,12 @@ class Reindex:
def get_daily(self): def get_daily(self):
"""get daily refresh values""" """get daily refresh values"""
total_videos = get_total_hits("ta_video", self.es_url, "active") total_videos = get_total_hits(
"ta_video", self.es_url, self.es_auth, "active"
)
video_daily = ceil(total_videos / self.refresh_interval * 1.2) video_daily = ceil(total_videos / self.refresh_interval * 1.2)
total_channels = get_total_hits( total_channels = get_total_hits(
"ta_channel", self.es_url, "channel_active" "ta_channel", self.es_url, self.es_auth, "channel_active"
) )
channel_daily = ceil(total_channels / self.refresh_interval * 1.2) channel_daily = ceil(total_channels / self.refresh_interval * 1.2)
return (video_daily, channel_daily) return (video_daily, channel_daily)
@ -72,7 +75,9 @@ class Reindex:
} }
query_str = json.dumps(data) query_str = json.dumps(data)
url = self.es_url + "/ta_video/_search" url = self.es_url + "/ta_video/_search"
response = requests.get(url, data=query_str, headers=headers) response = requests.get(
url, data=query_str, headers=headers, auth=self.es_auth
)
if not response.ok: if not response.ok:
print(response.text) print(response.text)
response_dict = json.loads(response.text) response_dict = json.loads(response.text)
@ -100,7 +105,9 @@ class Reindex:
} }
query_str = json.dumps(data) query_str = json.dumps(data)
url = self.es_url + "/ta_channel/_search" url = self.es_url + "/ta_channel/_search"
response = requests.get(url, data=query_str, headers=headers) response = requests.get(
url, data=query_str, headers=headers, auth=self.es_auth
)
if not response.ok: if not response.ok:
print(response.text) print(response.text)
response_dict = json.loads(response.text) response_dict = json.loads(response.text)
@ -208,6 +215,7 @@ class FilesystemScanner:
CONFIG = AppConfig().config CONFIG = AppConfig().config
ES_URL = CONFIG["application"]["es_url"] ES_URL = CONFIG["application"]["es_url"]
ES_AUTH = CONFIG["application"]["es_auth"]
VIDEOS = CONFIG["application"]["videos"] VIDEOS = CONFIG["application"]["videos"]
def __init__(self): def __init__(self):
@ -329,7 +337,9 @@ class FilesystemScanner:
# make the call # make the call
headers = {"Content-type": "application/x-ndjson"} headers = {"Content-type": "application/x-ndjson"}
url = self.ES_URL + "/_bulk" url = self.ES_URL + "/_bulk"
request = requests.post(url, data=query_str, headers=headers) request = requests.post(
url, data=query_str, headers=headers, auth=self.ES_AUTH
)
if not request.ok: if not request.ok:
print(request.text) print(request.text)
@ -339,7 +349,7 @@ class FilesystemScanner:
youtube_id = indexed[0] youtube_id = indexed[0]
print(f"deleting {youtube_id} from index") print(f"deleting {youtube_id} from index")
url = self.ES_URL + "/ta_video/_doc/" + youtube_id url = self.ES_URL + "/ta_video/_doc/" + youtube_id
request = requests.delete(url) request = requests.delete(url, auth=self.ES_AUTH)
if not request.ok: if not request.ok:
print(request.text) print(request.text)

View File

@ -20,6 +20,7 @@ class SearchHandler:
CONFIG = AppConfig().config CONFIG = AppConfig().config
CACHE_DIR = CONFIG["application"]["cache_dir"] CACHE_DIR = CONFIG["application"]["cache_dir"]
ES_AUTH = CONFIG["application"]["es_auth"]
def __init__(self, url, data): def __init__(self, url, data):
self.max_hits = None self.max_hits = None
@ -29,9 +30,11 @@ class SearchHandler:
def get_data(self): def get_data(self):
"""get the data""" """get the data"""
if self.data: if self.data:
response = requests.get(self.url, json=self.data).json() response = requests.get(
self.url, json=self.data, auth=self.ES_AUTH
).json()
else: else:
response = requests.get(self.url).json() response = requests.get(self.url, auth=self.ES_AUTH).json()
if "hits" in response.keys(): if "hits" in response.keys():
self.max_hits = response["hits"]["total"]["value"] self.max_hits = response["hits"]["total"]["value"]

View File

@ -115,8 +115,6 @@ class HomeView(View):
} }
data["query"] = query data["query"] = query
print(data)
return data return data
@staticmethod @staticmethod