From c186798e782a5c38e3714a6d358275ad1746fa08 Mon Sep 17 00:00:00 2001
From: simon <simobilleter@gmail.com>
Date: Wed, 9 Mar 2022 00:25:44 +0700
Subject: [PATCH 1/8] rewrite SubtitleParser, #180

---
 tubearchivist/home/src/index/video.py | 182 +++++++++++---------------
 1 file changed, 73 insertions(+), 109 deletions(-)

diff --git a/tubearchivist/home/src/index/video.py b/tubearchivist/home/src/index/video.py
index bc6f272..461fb89 100644
--- a/tubearchivist/home/src/index/video.py
+++ b/tubearchivist/home/src/index/video.py
@@ -6,7 +6,6 @@ functionality:
 
 import json
 import os
-import re
 from datetime import datetime
 
 import requests
@@ -65,7 +64,7 @@ class YoutubeSubtitle:
         if not all_formats:
             return False
 
-        subtitle = [i for i in all_formats if i["ext"] == "vtt"][0]
+        subtitle = [i for i in all_formats if i["ext"] == "json3"][0]
         subtitle.update(
             {"lang": lang, "source": "auto", "media_url": media_url}
         )
@@ -102,7 +101,7 @@ class YoutubeSubtitle:
             # no user subtitles found
             return False
 
-        subtitle = [i for i in all_formats if i["ext"] == "vtt"][0]
+        subtitle = [i for i in all_formats if i["ext"] == "json3"][0]
         subtitle.update(
             {"lang": lang, "source": "user", "media_url": media_url}
         )
@@ -145,109 +144,65 @@ class YoutubeSubtitle:
 class SubtitleParser:
     """parse subtitle str from youtube"""
 
-    time_reg = r"^([0-9]{2}:?){3}\.[0-9]{3} --> ([0-9]{2}:?){3}\.[0-9]{3}"
-    stamp_reg = r"<([0-9]{2}:?){3}\.[0-9]{3}>"
-    tag_reg = r"</?c>"
-
     def __init__(self, subtitle_str, lang):
-        self.subtitle_str = subtitle_str
+        self.subtitle_raw = json.loads(subtitle_str)
         self.lang = lang
-        self.header = False
-        self.parsed_cue_list = False
-        self.all_text_lines = False
-        self.matched = False
+        self.all_cues = False
 
     def process(self):
-        """collection to process subtitle string"""
-        self._parse_cues()
-        self._match_text_lines()
-        self._add_id()
-        self._timestamp_check()
+        """extract relevant que data"""
+        self.all_cues = []
+        for idx, event in enumerate(self.subtitle_raw.get("events")):
+            cue = {
+                "start": self.ms_conv(event["tStartMs"]),
+                "end": self.ms_conv(event["tStartMs"] + event["dDurationMs"]),
+                "text": "".join([i.get("utf8") for i in event["segs"]]),
+                "idx": idx + 1,
+            }
+            self.all_cues.append(cue)
 
-    def _parse_cues(self):
-        """split into cues"""
-        all_cues = self.subtitle_str.replace("\n \n", "\n").split("\n\n")
-        self.header = all_cues[0]
-        self.all_text_lines = []
-        self.parsed_cue_list = [self._cue_cleaner(i) for i in all_cues[1:]]
+    @staticmethod
+    def ms_conv(ms):
+        """convert ms to timestamp"""
+        hours = str((ms // (1000 * 60 * 60)) % 24).zfill(2)
+        minutes = str((ms // (1000 * 60)) % 60).zfill(2)
+        secs = str((ms // 1000) % 60).zfill(2)
+        millis = str(ms % 1000).zfill(3)
 
-    def _cue_cleaner(self, cue):
-        """parse single cue"""
-        all_lines = cue.split("\n")
-        cue_dict = {"lines": []}
-
-        for line in all_lines:
-            if re.match(self.time_reg, line):
-                clean = re.search(self.time_reg, line).group()
-                start, end = clean.split(" --> ")
-                cue_dict.update({"start": start, "end": end})
-            else:
-                clean = re.sub(self.stamp_reg, "", line)
-                clean = re.sub(self.tag_reg, "", clean)
-                cue_dict["lines"].append(clean)
-                if clean.strip() and clean not in self.all_text_lines[-4:]:
-                    # remove immediate duplicates
-                    self.all_text_lines.append(clean)
-
-        return cue_dict
-
-    def _match_text_lines(self):
-        """match unique text lines with timestamps"""
-
-        self.matched = []
-
-        while self.all_text_lines:
-            check = self.all_text_lines[0]
-            matches = [i for i in self.parsed_cue_list if check in i["lines"]]
-            new_cue = matches[-1]
-            new_cue["start"] = matches[0]["start"]
-
-            for line in new_cue["lines"]:
-                try:
-                    self.all_text_lines.remove(line)
-                except ValueError:
-                    continue
-
-            self.matched.append(new_cue)
-
-    def _timestamp_check(self):
-        """check if end timestamp is bigger than start timestamp"""
-        for idx, cue in enumerate(self.matched):
-            # this
-            end = int(re.sub("[^0-9]", "", cue.get("end")))
-            # next
-            try:
-                next_cue = self.matched[idx + 1]
-            except IndexError:
-                continue
-
-            start_next = int(re.sub("[^0-9]", "", next_cue.get("start")))
-            if end > start_next:
-                self.matched[idx]["end"] = next_cue.get("start")
-
-    def _add_id(self):
-        """add id to matched cues"""
-        for idx, _ in enumerate(self.matched):
-            self.matched[idx]["id"] = idx + 1
+        return f"{hours}:{minutes}:{secs}.{millis}"
 
     def get_subtitle_str(self):
-        """stitch cues and return processed new string"""
-        new_subtitle_str = self.header + "\n\n"
+        """create vtt text str from cues"""
+        subtitle_str = f"WEBVTT\nKind: captions\nLanguage: {self.lang}"
 
-        for cue in self.matched:
-            timestamp = f"{cue.get('start')} --> {cue.get('end')}"
-            lines = "\n".join(cue.get("lines"))
-            cue_text = f"{cue.get('id')}\n{timestamp}\n{lines}\n\n"
-            new_subtitle_str = new_subtitle_str + cue_text
+        for cue in self.all_cues:
+            stamp = f"{cue.get('start')} --> {cue.get('end')}"
+            cue_text = f"\n\n{cue.get('idx')}\n{stamp}\n{cue.get('text')}"
+            subtitle_str = subtitle_str + cue_text
 
-        return new_subtitle_str
+        return subtitle_str
 
     def create_bulk_import(self, video, source):
-        """process matched for es import"""
+        """subtitle lines for es import"""
+        documents = self.create_documents(video, source)
         bulk_list = []
-        channel = video.json_data.get("channel")
 
-        document = {
+        for document in documents:
+            document_id = document.get("subtitle_fragment_id")
+            action = {"index": {"_index": "ta_subtitle", "_id": document_id}}
+            bulk_list.append(json.dumps(action))
+            bulk_list.append(json.dumps(document))
+
+        bulk_list.append("\n")
+        query_str = "\n".join(bulk_list)
+
+        return query_str
+
+    def create_documents(self, video, source):
+        """process documents"""
+        documents = self.chunk_list(video.youtube_id)
+        channel = video.json_data.get("channel")
+        meta_dict = {
             "youtube_id": video.youtube_id,
             "title": video.json_data.get("title"),
             "subtitle_channel": channel.get("channel_name"),
@@ -257,26 +212,35 @@ class SubtitleParser:
             "subtitle_source": source,
         }
 
-        for match in self.matched:
-            match_id = match.get("id")
-            document_id = f"{video.youtube_id}-{self.lang}-{match_id}"
-            action = {"index": {"_index": "ta_subtitle", "_id": document_id}}
-            document.update(
-                {
-                    "subtitle_fragment_id": document_id,
-                    "subtitle_start": match.get("start"),
-                    "subtitle_end": match.get("end"),
-                    "subtitle_index": match_id,
-                    "subtitle_line": " ".join(match.get("lines")),
+        _ = [i.update(meta_dict) for i in documents]
+
+        return documents
+
+    def chunk_list(self, youtube_id):
+        """join cues for bulk import"""
+        chunk_list = []
+
+        chunk = {}
+        for cue in self.all_cues:
+            if chunk:
+                text = f"{chunk.get('subtitle_line')} {cue.get('text')}\n"
+                chunk["subtitle_line"] = text
+            else:
+                idx = len(chunk_list) + 1
+                chunk = {
+                    "subtitle_index": idx,
+                    "subtitle_line": cue.get("text"),
+                    "subtitle_start": cue.get("start"),
                 }
-            )
-            bulk_list.append(json.dumps(action))
-            bulk_list.append(json.dumps(document))
 
-        bulk_list.append("\n")
-        query_str = "\n".join(bulk_list)
+            chunk["subtitle_fragment_id"] = f"{youtube_id}-{self.lang}-{idx}"
 
-        return query_str
+            if cue["idx"] % 5 == 0:
+                chunk["subtitle_end"] = cue.get("end")
+                chunk_list.append(chunk)
+                chunk = {}
+
+        return chunk_list
 
 
 class YoutubeVideo(YouTubeItem, YoutubeSubtitle):

From 6a6c8fa5d80b99ae4b10502ac53eec1da2b26466 Mon Sep 17 00:00:00 2001
From: simon <simobilleter@gmail.com>
Date: Thu, 10 Mar 2022 17:39:35 +0700
Subject: [PATCH 2/8] bump yt-dlp version

---
 tubearchivist/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tubearchivist/requirements.txt b/tubearchivist/requirements.txt
index ecebf8c..3d10c3f 100644
--- a/tubearchivist/requirements.txt
+++ b/tubearchivist/requirements.txt
@@ -9,4 +9,4 @@ requests==2.27.1
 ryd-client==0.0.3
 uWSGI==2.0.20
 whitenoise==6.0.0
-yt_dlp==2022.2.4
+yt_dlp==2022.3.8.2

From 1ce832b846e590a4f424716558d82129e0d30b17 Mon Sep 17 00:00:00 2001
From: simon <simobilleter@gmail.com>
Date: Thu, 10 Mar 2022 19:58:13 +0700
Subject: [PATCH 3/8] fix autocaption extraction, flatten words, #180

---
 tubearchivist/home/src/index/video.py | 28 ++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/tubearchivist/home/src/index/video.py b/tubearchivist/home/src/index/video.py
index 461fb89..7fe20a0 100644
--- a/tubearchivist/home/src/index/video.py
+++ b/tubearchivist/home/src/index/video.py
@@ -114,12 +114,13 @@ class YoutubeSubtitle:
         for subtitle in relevant_subtitles:
             dest_path = os.path.join(videos_base, subtitle["media_url"])
             source = subtitle["source"]
+            lang = subtitle.get("lang")
             response = requests.get(subtitle["url"])
             if not response.ok:
                 print(f"{self.video.youtube_id}: failed to download subtitle")
                 continue
 
-            parser = SubtitleParser(response.text, subtitle.get("lang"))
+            parser = SubtitleParser(response.text, lang, source)
             parser.process()
             subtitle_str = parser.get_subtitle_str()
             self._write_subtitle_file(dest_path, subtitle_str)
@@ -144,15 +145,20 @@ class YoutubeSubtitle:
 class SubtitleParser:
     """parse subtitle str from youtube"""
 
-    def __init__(self, subtitle_str, lang):
+    def __init__(self, subtitle_str, lang, source):
         self.subtitle_raw = json.loads(subtitle_str)
         self.lang = lang
+        self.source = source
         self.all_cues = False
 
     def process(self):
         """extract relevant que data"""
+        all_events = self.subtitle_raw.get("events")
+        if self.source == "auto":
+            all_events = self._flat_auto_caption(all_events)
+
         self.all_cues = []
-        for idx, event in enumerate(self.subtitle_raw.get("events")):
+        for idx, event in enumerate(all_events):
             cue = {
                 "start": self.ms_conv(event["tStartMs"]),
                 "end": self.ms_conv(event["tStartMs"] + event["dDurationMs"]),
@@ -161,6 +167,22 @@ class SubtitleParser:
             }
             self.all_cues.append(cue)
 
+    @staticmethod
+    def _flat_auto_caption(all_events):
+        """flatten autocaption segments"""
+        flatten = []
+        for event in all_events:
+            if "segs" not in event.keys():
+                continue
+            text = "".join([i.get("utf8") for i in event.get("segs")])
+            if not text.strip():
+                continue
+
+            event.update({"segs": [{"utf8": text}]})
+            flatten.append(event)
+
+        return flatten
+
     @staticmethod
     def ms_conv(ms):
         """convert ms to timestamp"""

From 6e3df21f8c959640a3c50687641a58fb12f3e557 Mon Sep 17 00:00:00 2001
From: Nathan DeTar <n8detar@gmail.com>
Date: Thu, 10 Mar 2022 05:20:23 -0800
Subject: [PATCH 4/8] Continue Watching Section (#188)

* Replaced isWatched() function.

* Switched to `updateVideoWatchStatus()` function

* Updated Onclick to `updateVideoWatchStatus(this)`

* Handle `this` input in `updateVideoWatchStatus()`
---
 .../home/templates/home/channel_id.html       |   4 +-
 tubearchivist/home/templates/home/home.html   |   8 +-
 .../home/templates/home/playlist_id.html      |   4 +-
 tubearchivist/home/templates/home/video.html  |   4 +-
 tubearchivist/static/script.js                | 142 ++++++++++++------
 5 files changed, 107 insertions(+), 55 deletions(-)

diff --git a/tubearchivist/home/templates/home/channel_id.html b/tubearchivist/home/templates/home/channel_id.html
index 1198a15..3a84037 100644
--- a/tubearchivist/home/templates/home/channel_id.html
+++ b/tubearchivist/home/templates/home/channel_id.html
@@ -124,9 +124,9 @@
                     <div class="video-desc {{ view_style }}">
                         <div class="video-desc-player" id="video-info-{{ video.source.youtube_id }}">
                             {% if video.source.player.watched %}
-                                <img src="{% static 'img/icon-seen.svg' %}" alt="seen-icon" data-id="{{ video.source.youtube_id }}" data-status="watched" onclick="isUnwatched(this.id)" class="watch-button" title="Mark as unwatched">
+                                <img src="{% static 'img/icon-seen.svg' %}" alt="seen-icon" data-id="{{ video.source.youtube_id }}" data-status="watched" onclick="updateVideoWatchStatus(this)" class="watch-button" title="Mark as unwatched">
                             {% else %}
-                                <img src="{% static 'img/icon-unseen.svg' %}" alt="unseen-icon" data-id="{{ video.source.youtube_id }}" data-status="unwatched" onclick="isWatched(this.id)" class="watch-button" title="Mark as watched">
+                                <img src="{% static 'img/icon-unseen.svg' %}" alt="unseen-icon" data-id="{{ video.source.youtube_id }}" data-status="unwatched" onclick="updateVideoWatchStatus(this)" class="watch-button" title="Mark as watched">
                             {% endif %}
                             <span>{{ video.source.published }} | {{ video.source.player.duration_str }}</span>
                         </div>
diff --git a/tubearchivist/home/templates/home/home.html b/tubearchivist/home/templates/home/home.html
index 042ceed..0eb2926 100644
--- a/tubearchivist/home/templates/home/home.html
+++ b/tubearchivist/home/templates/home/home.html
@@ -27,9 +27,9 @@
                     <div class="video-desc {{ view_style }}">
                         <div class="video-desc-player" id="video-info-{{ video.youtube_id }}">
                             {% if video.player.watched %}
-                                <img src="{% static 'img/icon-seen.svg' %}" alt="seen-icon" data-id="{{ video.youtube_id }}" data-status="watched" onclick="isUnwatched(this.id)" class="watch-button" title="Mark as unwatched">
+                                <img src="{% static 'img/icon-seen.svg' %}" alt="seen-icon" data-id="{{ video.youtube_id }}" data-status="watched" onclick="updateVideoWatchStatus(this)" class="watch-button" title="Mark as unwatched">
                             {% else %}
-                                <img src="{% static 'img/icon-unseen.svg' %}" alt="unseen-icon" data-id="{{ video.youtube_id }}" data-status="unwatched" onclick="isWatched(this.id)" class="watch-button" title="Mark as watched">
+                                <img src="{% static 'img/icon-unseen.svg' %}" alt="unseen-icon" data-id="{{ video.youtube_id }}" data-status="unwatched" onclick="updateVideoWatchStatus(this)" class="watch-button" title="Mark as watched">
                             {% endif %}
                             <span>{{ video.published }} | {{ video.player.duration_str }}</span>
                         </div>
@@ -103,9 +103,9 @@
                     <div class="video-desc {{ view_style }}">
                         <div class="video-desc-player" id="video-info-{{ video.source.youtube_id }}">
                             {% if video.source.player.watched %}
-                                <img src="{% static 'img/icon-seen.svg' %}" alt="seen-icon" data-id="{{ video.source.youtube_id }}" data-status="watched" onclick="isUnwatched(this.id)" class="watch-button" title="Mark as unwatched">
+                                <img src="{% static 'img/icon-seen.svg' %}" alt="seen-icon" data-id="{{ video.source.youtube_id }}" data-status="watched" onclick="updateVideoWatchStatus(this)" class="watch-button" title="Mark as unwatched">
                             {% else %}
-                                <img src="{% static 'img/icon-unseen.svg' %}" alt="unseen-icon" data-id="{{ video.source.youtube_id }}" data-status="unwatched" onclick="isWatched(this.id)" class="watch-button" title="Mark as watched">
+                                <img src="{% static 'img/icon-unseen.svg' %}" alt="unseen-icon" data-id="{{ video.source.youtube_id }}" data-status="unwatched" onclick="updateVideoWatchStatus(this)" class="watch-button" title="Mark as watched">
                             {% endif %}
                             <span>{{ video.source.published }} | {{ video.source.player.duration_str }}</span>
                         </div>
diff --git a/tubearchivist/home/templates/home/playlist_id.html b/tubearchivist/home/templates/home/playlist_id.html
index 635bdca..f9e987e 100644
--- a/tubearchivist/home/templates/home/playlist_id.html
+++ b/tubearchivist/home/templates/home/playlist_id.html
@@ -105,9 +105,9 @@
                     <div class="video-desc {{ view_style }}">
                         <div class="video-desc-player" id="video-info-{{ video.source.youtube_id }}">
                             {% if video.source.player.watched %}
-                                <img src="{% static 'img/icon-seen.svg' %}" alt="seen-icon" data-id="{{ video.source.youtube_id }}" data-status="watched" onclick="isUnwatched(this.id)" class="watch-button" title="Mark as unwatched">
+                                <img src="{% static 'img/icon-seen.svg' %}" alt="seen-icon" data-id="{{ video.source.youtube_id }}" data-status="watched" onclick="updateVideoWatchStatus(this)" class="watch-button" title="Mark as unwatched">
                             {% else %}
-                                <img src="{% static 'img/icon-unseen.svg' %}" alt="unseen-icon" data-id="{{ video.source.youtube_id }}" data-status="unwatched" onclick="isWatched(this.id)" class="watch-button" title="Mark as watched">
+                                <img src="{% static 'img/icon-unseen.svg' %}" alt="unseen-icon" data-id="{{ video.source.youtube_id }}" data-status="unwatched" onclick="updateVideoWatchStatus(this)" class="watch-button" title="Mark as watched">
                             {% endif %}
                             <span>{{ video.source.published }} | {{ video.source.player.duration_str }}</span>
                         </div>
diff --git a/tubearchivist/home/templates/home/video.html b/tubearchivist/home/templates/home/video.html
index 7a8b522..324d293 100644
--- a/tubearchivist/home/templates/home/video.html
+++ b/tubearchivist/home/templates/home/video.html
@@ -32,9 +32,9 @@
                 <p>Last refreshed: {{ video.vid_last_refresh }}</p>
                 <p class="video-info-watched">Watched:
                     {% if video.player.watched %}
-                        <img src="{% static 'img/icon-seen.svg' %}" alt="seen-icon" id="{{ video.youtube_id }}" onclick="isUnwatched(this.id)" class="seen-icon" title="Mark as unwatched">
+                        <img src="{% static 'img/icon-seen.svg' %}" alt="seen-icon" data-id="{{ video.youtube_id }}" data-status="watched" onclick="updateVideoWatchStatus(this)" class="watch-button" title="Mark as unwatched">
                     {% else %}
-                        <img src="{% static 'img/icon-unseen.svg' %}" alt="unseen-icon" id="{{ video.youtube_id }}" onclick="isWatched(this.id)" class="unseen-icon" title="Mark as watched.">
+                        <img src="{% static 'img/icon-unseen.svg' %}" alt="unseen-icon" data-id="{{ video.youtube_id }}" data-status="unwatched" onclick="updateVideoWatchStatus(this)" class="watch-button" title="Mark as watched">
                     {% endif %}
                 </p>
                 {% if video.active %}
diff --git a/tubearchivist/static/script.js b/tubearchivist/static/script.js
index acc592a..6e5c818 100644
--- a/tubearchivist/static/script.js
+++ b/tubearchivist/static/script.js
@@ -8,21 +8,62 @@ function sortChange(sortValue) {
     }, 500);
 }
 
-function isWatched(youtube_id) {
-    postVideoProgress(youtube_id, 0); // Reset video progress on watched;
-    removeProgressBar(youtube_id);
-    var payload = JSON.stringify({'watched': youtube_id});
-    sendPost(payload);
-    var seenIcon = document.createElement('img');
-    seenIcon.setAttribute('src', "/static/img/icon-seen.svg");
-    seenIcon.setAttribute('alt', 'seen-icon');
-    seenIcon.setAttribute('id', youtube_id);
-    seenIcon.setAttribute('title', "Mark as unwatched");
-    seenIcon.setAttribute('onclick', "isUnwatched(this.id)");
-    seenIcon.classList = 'seen-icon';
-    document.getElementById(youtube_id).replaceWith(seenIcon);
+// Updates video watch status when passed a video id and it's current state (ex if the video was unwatched but you want to mark it as watched you will pass "unwatched")
+function updateVideoWatchStatus(input1, videoCurrentWatchStatus) {
+    if (videoCurrentWatchStatus) {
+        videoId = input1;
+    } else if (input1.getAttribute("data-id")) {
+        videoId = input1.getAttribute("data-id");
+        videoCurrentWatchStatus = input1.getAttribute("data-status");
+    }
+
+    postVideoProgress(videoId, 0); // Reset video progress on watched/unwatched;
+    removeProgressBar(videoId);
+
+    if (videoCurrentWatchStatus == "watched") {
+        var watchStatusIndicator = createWatchStatusIndicator(videoId, "unwatched");
+        var payload = JSON.stringify({'un_watched': videoId});
+        sendPost(payload);
+    } else if (videoCurrentWatchStatus == "unwatched") {
+        var watchStatusIndicator = createWatchStatusIndicator(videoId, "watched");
+        var payload = JSON.stringify({'watched': videoId});
+        sendPost(payload);
+    }
+
+    var watchButtons = document.getElementsByClassName("watch-button");
+    for (let i = 0; i < watchButtons.length; i++) {
+        if (watchButtons[i].getAttribute("data-id") == videoId) {
+            watchButtons[i].outerHTML = watchStatusIndicator;
+        }
+    }
 }
 
+// Creates a watch status indicator when passed a video id and the videos watch status
+function createWatchStatusIndicator(videoId, videoWatchStatus) {
+    if (videoWatchStatus == "watched") {
+        var seen = "seen";
+        var title = "Mark as unwatched";
+    } else if (videoWatchStatus == "unwatched") {
+        var seen = "unseen";
+        var title = "Mark as watched";
+    }
+    var watchStatusIndicator = `<img src="/static/img/icon-${seen}.svg" alt="${seen}-icon" data-id="${videoId}" data-status="${videoWatchStatus}" onclick="updateVideoWatchStatus(this)" class="watch-button" title="${title}">`;
+    return watchStatusIndicator;
+}
+
+// function isWatched(youtube_id) {
+//     var payload = JSON.stringify({'watched': youtube_id});
+//     sendPost(payload);
+//     var seenIcon = document.createElement('img');
+//     seenIcon.setAttribute('src', "/static/img/icon-seen.svg");
+//     seenIcon.setAttribute('alt', 'seen-icon');
+//     seenIcon.setAttribute('id', youtube_id);
+//     seenIcon.setAttribute('title', "Mark as unwatched");
+//     seenIcon.setAttribute('onclick', "isUnwatched(this.id)");
+//     seenIcon.classList = 'seen-icon';
+//     document.getElementById(youtube_id).replaceWith(seenIcon);
+// }
+
 // Removes the progress bar when passed a video id
 function removeProgressBar(videoId) {
     setProgressBar(videoId, 0, 1);
@@ -39,19 +80,19 @@ function isWatchedButton(button) {
     }, 1000);
 }
 
-function isUnwatched(youtube_id) {
-    postVideoProgress(youtube_id, 0); // Reset video progress on unwatched;
-    var payload = JSON.stringify({'un_watched': youtube_id});
-    sendPost(payload);
-    var unseenIcon = document.createElement('img');
-    unseenIcon.setAttribute('src', "/static/img/icon-unseen.svg");
-    unseenIcon.setAttribute('alt', 'unseen-icon');
-    unseenIcon.setAttribute('id', youtube_id);
-    unseenIcon.setAttribute('title', "Mark as watched");
-    unseenIcon.setAttribute('onclick', "isWatched(this.id)");
-    unseenIcon.classList = 'unseen-icon';
-    document.getElementById(youtube_id).replaceWith(unseenIcon);
-}
+// function isUnwatched(youtube_id) {
+//     postVideoProgress(youtube_id, 0); // Reset video progress on unwatched;
+//     var payload = JSON.stringify({'un_watched': youtube_id});
+//     sendPost(payload);
+//     var unseenIcon = document.createElement('img');
+//     unseenIcon.setAttribute('src', "/static/img/icon-unseen.svg");
+//     unseenIcon.setAttribute('alt', 'unseen-icon');
+//     unseenIcon.setAttribute('id', youtube_id);
+//     unseenIcon.setAttribute('title', "Mark as watched");
+//     unseenIcon.setAttribute('onclick', "isWatched(this.id)");
+//     unseenIcon.classList = 'unseen-icon';
+//     document.getElementById(youtube_id).replaceWith(unseenIcon);
+// }
 
 function unsubscribe(id_unsub) {
     var payload = JSON.stringify({'unsubscribe': id_unsub});
@@ -327,7 +368,7 @@ function createPlayer(button) {
     var channelName = videoData.data.channel.channel_name;
 
     removePlayer();
-    document.getElementById(videoId).outerHTML = ''; // Remove watch indicator from video info
+    // document.getElementById(videoId).outerHTML = ''; // Remove watch indicator from video info
 
     // If cast integration is enabled create cast button
     var castButton = '';
@@ -337,12 +378,11 @@ function createPlayer(button) {
 
     // Watched indicator
     if (videoData.data.player.watched) {
-        var playerState = "seen";
-        var watchedFunction = "Unwatched";
+        var watchStatusIndicator = createWatchStatusIndicator(videoId, "watched");
     } else {
-        var playerState = "unseen";
-        var watchedFunction = "Watched";  
+        var watchStatusIndicator = createWatchStatusIndicator(videoId, "unwatched");
     }
+    
 
     var playerStats = `<div class="thumb-icon player-stats"><img src="/static/img/icon-eye.svg" alt="views icon"><span>${videoViews}</span>`;
     if (videoData.data.stats.like_count) {
@@ -360,7 +400,7 @@ function createPlayer(button) {
         ${videoTag}
         <div class="player-title boxed-content">
             <img class="close-button" src="/static/img/icon-close.svg" alt="close-icon" data="${videoId}" onclick="removePlayer()" title="Close player">
-            <img src="/static/img/icon-${playerState}.svg" alt="${playerState}-icon" id="${videoId}" onclick="is${watchedFunction}(this.id)" class="${playerState}-icon" title="Mark as ${watchedFunction}">
+            ${watchStatusIndicator}
             ${castButton}
             ${playerStats}
             <div class="player-channel-playlist">
@@ -444,8 +484,12 @@ function getVideoPlayerDuration() {
 function getVideoPlayerWatchStatus() {
     var videoId = getVideoPlayerVideoId();
     var watched = false;
-    if(document.getElementById(videoId) != null && document.getElementById(videoId).className != "unseen-icon") {
-        watched = true;
+
+    var watchButtons = document.getElementsByClassName("watch-button");
+    for (let i = 0; i < watchButtons.length; i++) {
+        if (watchButtons[i].getAttribute("data-id") == videoId && watchButtons[i].getAttribute("data-status") == "watched") {
+            watched = true;
+        }
     }
     return watched;
 }
@@ -459,7 +503,7 @@ function onVideoProgress() {
         postVideoProgress(videoId, currentTime);
         if (!getVideoPlayerWatchStatus()) { // Check if video is already marked as watched
             if (watchedThreshold(currentTime, duration)) {
-                isWatched(videoId);
+                updateVideoWatchStatus(videoId, "unwatched");
             }
         }
     }
@@ -469,7 +513,7 @@ function onVideoProgress() {
 function onVideoEnded() {
     var videoId = getVideoPlayerVideoId();
     if (!getVideoPlayerWatchStatus()) { // Check if video is already marked as watched
-        isWatched(videoId);
+        updateVideoWatchStatus(videoId, "unwatched");
     }
 }
 
@@ -606,13 +650,21 @@ function removePlayer() {
 
 // Sets the progress bar when passed a video id, video progress and video duration
 function setProgressBar(videoId, currentTime, duration) {
-    progressBar = document.getElementById("progress-" + videoId);
-    progressBarWidth = (currentTime / duration) * 100 + "%";
-    if (progressBar && !getVideoPlayerWatchStatus()) {
-        progressBar.style.width = progressBarWidth;
-    } else if (progressBar) {
-        progressBar.style.width = "0%";
+    var progressBarWidth = (currentTime / duration) * 100 + "%";
+    var progressBars = document.getElementsByClassName("video-progress-bar");
+    for (let i = 0; i < progressBars.length; i++) {
+        if (progressBars[i].id == "progress-" + videoId) {
+            if (!getVideoPlayerWatchStatus()) {
+                progressBars[i].style.width = progressBarWidth;
+            } else {
+                progressBars[i].style.width = "0%";
+            }
+        }
     }
+
+    // progressBar = document.getElementById("progress-" + videoId);
+    
+    
 }
 
 // multi search form
@@ -681,9 +733,9 @@ function createVideo(video, viewStyle) {
     const videoPublished = video.published;
     const videoDuration = video.player.duration_str;
     if (video.player.watched) {
-        var playerState = "seen";
+        var watchStatusIndicator = createWatchStatusIndicator(videoId, "watched");
     } else {
-        var playerState = "unseen";
+        var watchStatusIndicator = createWatchStatusIndicator(videoId, "unwatched");
     };
     const channelId = video.channel.channel_id;
     const channelName = video.channel.channel_name;
@@ -701,7 +753,7 @@ function createVideo(video, viewStyle) {
     </a>
     <div class="video-desc ${viewStyle}">
         <div class="video-desc-player" id="video-info-${videoId}">
-                <img src="/static/img/icon-${playerState}.svg" alt="${playerState}-icon" id="${videoId}" onclick="isWatched(this.id)" class="${playerState}-icon">
+                ${watchStatusIndicator}
             <span>${videoPublished} | ${videoDuration}</span>
         </div>
         <div>

From d3e9646fb6daf3a5e01c0284639eaad655dd5871 Mon Sep 17 00:00:00 2001
From: simon <simobilleter@gmail.com>
Date: Thu, 10 Mar 2022 20:45:13 +0700
Subject: [PATCH 5/8] private methods for YoutubeSubtitle and SubtitleParser

---
 tubearchivist/home/src/index/video.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/tubearchivist/home/src/index/video.py b/tubearchivist/home/src/index/video.py
index 7fe20a0..a840ea8 100644
--- a/tubearchivist/home/src/index/video.py
+++ b/tubearchivist/home/src/index/video.py
@@ -23,7 +23,7 @@ class YoutubeSubtitle:
         self.video = video
         self.languages = False
 
-    def sub_conf_parse(self):
+    def _sub_conf_parse(self):
         """add additional conf values to self"""
         languages_raw = self.video.config["downloads"]["subtitle"]
         if languages_raw:
@@ -31,26 +31,26 @@ class YoutubeSubtitle:
 
     def get_subtitles(self):
         """check what to do"""
-        self.sub_conf_parse()
+        self._sub_conf_parse()
         if not self.languages:
             # no subtitles
             return False
 
         relevant_subtitles = []
         for lang in self.languages:
-            user_sub = self.get_user_subtitles(lang)
+            user_sub = self._get_user_subtitles(lang)
             if user_sub:
                 relevant_subtitles.append(user_sub)
                 continue
 
             if self.video.config["downloads"]["subtitle_source"] == "auto":
-                auto_cap = self.get_auto_caption(lang)
+                auto_cap = self._get_auto_caption(lang)
                 if auto_cap:
                     relevant_subtitles.append(auto_cap)
 
         return relevant_subtitles
 
-    def get_auto_caption(self, lang):
+    def _get_auto_caption(self, lang):
         """get auto_caption subtitles"""
         print(f"{self.video.youtube_id}-{lang}: get auto generated subtitles")
         all_subtitles = self.video.youtube_meta.get("automatic_captions")
@@ -87,7 +87,7 @@ class YoutubeSubtitle:
 
         return all_subtitles
 
-    def get_user_subtitles(self, lang):
+    def _get_user_subtitles(self, lang):
         """get subtitles uploaded from channel owner"""
         print(f"{self.video.youtube_id}-{lang}: get user uploaded subtitles")
         all_subtitles = self._normalize_lang()
@@ -160,8 +160,8 @@ class SubtitleParser:
         self.all_cues = []
         for idx, event in enumerate(all_events):
             cue = {
-                "start": self.ms_conv(event["tStartMs"]),
-                "end": self.ms_conv(event["tStartMs"] + event["dDurationMs"]),
+                "start": self._ms_conv(event["tStartMs"]),
+                "end": self._ms_conv(event["tStartMs"] + event["dDurationMs"]),
                 "text": "".join([i.get("utf8") for i in event["segs"]]),
                 "idx": idx + 1,
             }
@@ -184,7 +184,7 @@ class SubtitleParser:
         return flatten
 
     @staticmethod
-    def ms_conv(ms):
+    def _ms_conv(ms):
         """convert ms to timestamp"""
         hours = str((ms // (1000 * 60 * 60)) % 24).zfill(2)
         minutes = str((ms // (1000 * 60)) % 60).zfill(2)
@@ -206,7 +206,7 @@ class SubtitleParser:
 
     def create_bulk_import(self, video, source):
         """subtitle lines for es import"""
-        documents = self.create_documents(video, source)
+        documents = self._create_documents(video, source)
         bulk_list = []
 
         for document in documents:
@@ -220,9 +220,9 @@ class SubtitleParser:
 
         return query_str
 
-    def create_documents(self, video, source):
+    def _create_documents(self, video, source):
         """process documents"""
-        documents = self.chunk_list(video.youtube_id)
+        documents = self._chunk_list(video.youtube_id)
         channel = video.json_data.get("channel")
         meta_dict = {
             "youtube_id": video.youtube_id,
@@ -238,7 +238,7 @@ class SubtitleParser:
 
         return documents
 
-    def chunk_list(self, youtube_id):
+    def _chunk_list(self, youtube_id):
         """join cues for bulk import"""
         chunk_list = []
 

From aff0cfb794c09d900a18782e332de35787ae04b5 Mon Sep 17 00:00:00 2001
From: simon <simobilleter@gmail.com>
Date: Fri, 11 Mar 2022 17:47:04 +0700
Subject: [PATCH 6/8] fix retiming issue for auto subtitles

---
 tubearchivist/home/src/index/video.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tubearchivist/home/src/index/video.py b/tubearchivist/home/src/index/video.py
index a840ea8..9968cc7 100644
--- a/tubearchivist/home/src/index/video.py
+++ b/tubearchivist/home/src/index/video.py
@@ -178,6 +178,14 @@ class SubtitleParser:
             if not text.strip():
                 continue
 
+            if flatten:
+                # fix overlapping retiming issue
+                last_end = flatten[-1]["tStartMs"] + flatten[-1]["dDurationMs"]
+                if event["tStartMs"] < last_end:
+                    joined = flatten[-1]["segs"][0]["utf8"] + "\n" + text
+                    flatten[-1]["segs"][0]["utf8"] = joined
+                    continue
+
             event.update({"segs": [{"utf8": text}]})
             flatten.append(event)
 

From f6950a2ca5005536c40fb467fef2ba16063a9157 Mon Sep 17 00:00:00 2001
From: simon <simobilleter@gmail.com>
Date: Sat, 12 Mar 2022 17:29:34 +0700
Subject: [PATCH 7/8] list all in progress videos

---
 tubearchivist/home/templates/home/home.html | 24 ++++++++---------
 tubearchivist/home/views.py                 | 30 +++++++++++++++++----
 2 files changed, 37 insertions(+), 17 deletions(-)

diff --git a/tubearchivist/home/templates/home/home.html b/tubearchivist/home/templates/home/home.html
index 0eb2926..445de5b 100644
--- a/tubearchivist/home/templates/home/home.html
+++ b/tubearchivist/home/templates/home/home.html
@@ -9,14 +9,14 @@
         <div class="video-list {{ view_style }}">
             {% for video in continue_vids %}
                 <div class="video-item {{ view_style }}">
-                    <a href="#player" data-id="{{ video.youtube_id }}" onclick="createPlayer(this)">
+                    <a href="#player" data-id="{{ video.source.youtube_id }}" onclick="createPlayer(this)">
                         <div class="video-thumb-wrap {{ view_style }}">
                             <div class="video-thumb">
-                                <img src="/cache/{{ video.vid_thumb_url }}" alt="video-thumb">
-                                {% if video.player.progress %}
-                                    <div class="video-progress-bar" id="progress-{{ video.youtube_id }}" style="width: {{video.player.progress}}%;"></div>
+                                <img src="/cache/{{ video.source.vid_thumb_url }}" alt="video-thumb">
+                                {% if video.source.player.progress %}
+                                    <div class="video-progress-bar" id="progress-{{ video.source.youtube_id }}" style="width: {{video.source.player.progress}}%;"></div>
                                 {% else %}
-                                    <div class="video-progress-bar" id="progress-{{ video.youtube_id }}" style="width: 0%;"></div>
+                                    <div class="video-progress-bar" id="progress-{{ video.source.youtube_id }}" style="width: 0%;"></div>
                                 {% endif %}
                             </div>
                             <div class="video-play">
@@ -25,17 +25,17 @@
                         </div>
                     </a>
                     <div class="video-desc {{ view_style }}">
-                        <div class="video-desc-player" id="video-info-{{ video.youtube_id }}">
-                            {% if video.player.watched %}
-                                <img src="{% static 'img/icon-seen.svg' %}" alt="seen-icon" data-id="{{ video.youtube_id }}" data-status="watched" onclick="updateVideoWatchStatus(this)" class="watch-button" title="Mark as unwatched">
+                        <div class="video-desc-player" id="video-info-{{ video.source.youtube_id }}">
+                            {% if video.source.player.watched %}
+                                <img src="{% static 'img/icon-seen.svg' %}" alt="seen-icon" data-id="{{ video.source.youtube_id }}" data-status="watched" onclick="updateVideoWatchStatus(this)" class="watch-button" title="Mark as unwatched">
                             {% else %}
-                                <img src="{% static 'img/icon-unseen.svg' %}" alt="unseen-icon" data-id="{{ video.youtube_id }}" data-status="unwatched" onclick="updateVideoWatchStatus(this)" class="watch-button" title="Mark as watched">
+                                <img src="{% static 'img/icon-unseen.svg' %}" alt="unseen-icon" data-id="{{ video.source.youtube_id }}" data-status="unwatched" onclick="updateVideoWatchStatus(this)" class="watch-button" title="Mark as watched">
                             {% endif %}
-                            <span>{{ video.published }} | {{ video.player.duration_str }}</span>
+                            <span>{{ video.source.published }} | {{ video.source.player.duration_str }}</span>
                         </div>
                         <div>
-                            <a href="{% url 'channel_id' video.channel.channel_id %}"><h3>{{ video.channel.channel_name }}</h3></a>
-                            <a class="video-more" href="{% url 'video' video.youtube_id %}"><h2>{{ video.title }}</h2></a>
+                            <a href="{% url 'channel_id' video.source.channel.channel_id %}"><h3>{{ video.source.channel.channel_name }}</h3></a>
+                            <a class="video-more" href="{% url 'video' video.source.youtube_id %}"><h2>{{ video.source.title }}</h2></a>
                         </div>
                     </div>
                 </div>
diff --git a/tubearchivist/home/views.py b/tubearchivist/home/views.py
index 781b690..08e5df8 100644
--- a/tubearchivist/home/views.py
+++ b/tubearchivist/home/views.py
@@ -175,15 +175,35 @@ class ArchivistResultsView(ArchivistViewConfig):
         if not results or not self.context["results"]:
             return
 
-        self.context["continue_vids"] = []
-        progress = {i["youtube_id"]: i["position"] for i in results}
+        self.context["continue_vids"] = self.get_in_progress(results)
+
+        in_progress = {i["youtube_id"]: i["position"] for i in results}
         for hit in self.context["results"]:
             video = hit["source"]
-            if video["youtube_id"] in progress:
-                played_sec = progress.get(video["youtube_id"])
+            if video["youtube_id"] in in_progress:
+                played_sec = in_progress.get(video["youtube_id"])
                 total = video["player"]["duration"]
                 video["player"]["progress"] = 100 * (played_sec / total)
-                self.context["continue_vids"].append(video)
+
+    def get_in_progress(self, results):
+        """get all videos in progress"""
+        ids = [{"match": {"youtube_id": i.get("youtube_id")}} for i in results]
+        data = {
+            "size": self.default_conf["archive"]["page_size"],
+            "query": {"bool": {"should": ids}},
+        }
+        search = SearchHandler(
+            "ta_video/_search", self.default_conf, data=data
+        )
+        videos = search.get_data()
+        for video in videos:
+            youtube_id = video["source"]["youtube_id"]
+            matched = [i for i in results if i["youtube_id"] == youtube_id]
+            played_sec = matched[0]["position"]
+            total = video["source"]["player"]["duration"]
+            video["source"]["player"]["progress"] = 100 * (played_sec / total)
+
+        return videos
 
     def single_lookup(self, es_path):
         """retrieve a single item from url"""

From 7595e7501f19b33d8cea828f80cab1f11838ac12 Mon Sep 17 00:00:00 2001
From: simon <simobilleter@gmail.com>
Date: Sat, 12 Mar 2022 20:29:26 +0700
Subject: [PATCH 8/8] sort continue watching videos

---
 tubearchivist/home/views.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tubearchivist/home/views.py b/tubearchivist/home/views.py
index 08e5df8..0587704 100644
--- a/tubearchivist/home/views.py
+++ b/tubearchivist/home/views.py
@@ -191,6 +191,7 @@ class ArchivistResultsView(ArchivistViewConfig):
         data = {
             "size": self.default_conf["archive"]["page_size"],
             "query": {"bool": {"should": ids}},
+            "sort": [{"published": {"order": "desc"}}],
         }
         search = SearchHandler(
             "ta_video/_search", self.default_conf, data=data