From 92226a31644f45134bdb800b629eeceea9c6ee2c Mon Sep 17 00:00:00 2001 From: Blondel MONDESIR Date: Wed, 27 Mar 2024 00:15:04 -0400 Subject: [PATCH 1/3] Sort videos by views-per-day + download the top ones Videos are sorted by views-per-day and limited by MAX_VIDEOS_PER_DOWNLOAD --- cps/tasks/metadata_extract.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cps/tasks/metadata_extract.py b/cps/tasks/metadata_extract.py index d45b0467b3..202f4f31b9 100644 --- a/cps/tasks/metadata_extract.py +++ b/cps/tasks/metadata_extract.py @@ -5,7 +5,7 @@ from datetime import datetime from flask_babel import lazy_gettext as N_, gettext as _ -from cps.constants import XKLB_DB_FILE +from cps.constants import XKLB_DB_FILE, MAX_VIDEOS_PER_DOWNLOAD from cps.services.worker import WorkerThread from cps.tasks.download import TaskDownload from cps.services.worker import CalibreTask, STAT_FINISH_SUCCESS, STAT_FAIL, STAT_STARTED, STAT_WAITING @@ -129,6 +129,9 @@ def _calculate_views_per_day(self, requested_urls, conn): log.error("An error occurred during the calculation of views per day for %s: %s", requested_url, e) self.message = f"{requested_url} failed: {e}" + def _sort_and_limit_requested_urls(self, requested_urls): + return dict(sorted(requested_urls.items(), key=lambda item: item[1]["views_per_day"], reverse=True)[:min(MAX_VIDEOS_PER_DOWNLOAD, len(requested_urls))]) + def _add_download_tasks_to_worker(self, requested_urls): for index, requested_url in enumerate(requested_urls.keys()): task_download = TaskDownload(_("Downloading %(url)s...", url=requested_url), @@ -165,6 +168,7 @@ def run(self, worker_thread): self._send_shelf_title() self._update_metadata(requested_urls) self._calculate_views_per_day(requested_urls, conn) + requested_urls = self._sort_and_limit_requested_urls(requested_urls) self._add_download_tasks_to_worker(requested_urls) conn.close() From 774710150415960d46f8aa1cbf4e48cb243033e1 Mon Sep 17 00:00:00 2001 From: Blondel MONDESIR Date: Wed, 27 Mar 2024 00:51:19 -0400 Subject: [PATCH 2/3] Handle YouTube Shorts and Failed videos size and time_uploaded metadata are missing for YouTube Shorts. We don't count them, failed videos too. --- cps/tasks/metadata_extract.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cps/tasks/metadata_extract.py b/cps/tasks/metadata_extract.py index 202f4f31b9..6d420e7a38 100644 --- a/cps/tasks/metadata_extract.py +++ b/cps/tasks/metadata_extract.py @@ -117,6 +117,8 @@ def _update_metadata(self, requested_urls): self.message = f"{subprocess_args[2]} failed: {e}" failed_urls.append(subprocess_args[2]) + requested_urls = {url: requested_urls[url] for url in requested_urls.keys() if "shorts" not in url and url not in failed_urls} + def _calculate_views_per_day(self, requested_urls, conn): now = datetime.now() for requested_url in requested_urls.keys(): From 19284d5f92960dc09c97b58363a1cc9b1f231c6a Mon Sep 17 00:00:00 2001 From: Blondel MONDESIR Date: Wed, 27 Mar 2024 01:38:03 -0400 Subject: [PATCH 3/3] Prevent videos from other sessions to be includede in the current session Other users might be testing or some videos did not download from a previous session. Focus on the requested video(s). --- cps/tasks/metadata_extract.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cps/tasks/metadata_extract.py b/cps/tasks/metadata_extract.py index 6d420e7a38..4f6ec53e96 100644 --- a/cps/tasks/metadata_extract.py +++ b/cps/tasks/metadata_extract.py @@ -171,6 +171,14 @@ def run(self, worker_thread): self._update_metadata(requested_urls) self._calculate_views_per_day(requested_urls, conn) requested_urls = self._sort_and_limit_requested_urls(requested_urls) + else: + try: + extractor_id = conn.execute("SELECT extractor_id FROM media WHERE ? LIKE '%' || extractor_id || '%'", (self.media_url,)).fetchone()[0] + requested_urls = {url: requested_urls[url] for url in requested_urls.keys() if extractor_id in url} + except Exception as e: + log.error("An error occurred during the selection of the extractor ID: %s", e) + self.message = f"{self.media_url_link} failed: {e}" + return self._add_download_tasks_to_worker(requested_urls) conn.close()