diff --git a/cps/tasks/metadata_extract.py b/cps/tasks/metadata_extract.py index d45b0467b3..4f6ec53e96 100644 --- a/cps/tasks/metadata_extract.py +++ b/cps/tasks/metadata_extract.py @@ -5,7 +5,7 @@ from datetime import datetime from flask_babel import lazy_gettext as N_, gettext as _ -from cps.constants import XKLB_DB_FILE +from cps.constants import XKLB_DB_FILE, MAX_VIDEOS_PER_DOWNLOAD from cps.services.worker import WorkerThread from cps.tasks.download import TaskDownload from cps.services.worker import CalibreTask, STAT_FINISH_SUCCESS, STAT_FAIL, STAT_STARTED, STAT_WAITING @@ -117,6 +117,8 @@ def _update_metadata(self, requested_urls): self.message = f"{subprocess_args[2]} failed: {e}" failed_urls.append(subprocess_args[2]) + requested_urls = {url: requested_urls[url] for url in requested_urls.keys() if "shorts" not in url and url not in failed_urls} + def _calculate_views_per_day(self, requested_urls, conn): now = datetime.now() for requested_url in requested_urls.keys(): @@ -129,6 +131,9 @@ def _calculate_views_per_day(self, requested_urls, conn): log.error("An error occurred during the calculation of views per day for %s: %s", requested_url, e) self.message = f"{requested_url} failed: {e}" + def _sort_and_limit_requested_urls(self, requested_urls): + return dict(sorted(requested_urls.items(), key=lambda item: item[1]["views_per_day"], reverse=True)[:min(MAX_VIDEOS_PER_DOWNLOAD, len(requested_urls))]) + def _add_download_tasks_to_worker(self, requested_urls): for index, requested_url in enumerate(requested_urls.keys()): task_download = TaskDownload(_("Downloading %(url)s...", url=requested_url), @@ -165,6 +170,15 @@ def run(self, worker_thread): self._send_shelf_title() self._update_metadata(requested_urls) self._calculate_views_per_day(requested_urls, conn) + requested_urls = self._sort_and_limit_requested_urls(requested_urls) + else: + try: + extractor_id = conn.execute("SELECT extractor_id FROM media WHERE ? LIKE '%' || extractor_id || '%'", (self.media_url,)).fetchone()[0] + requested_urls = {url: requested_urls[url] for url in requested_urls.keys() if extractor_id in url} + except Exception as e: + log.error("An error occurred during the selection of the extractor ID: %s", e) + self.message = f"{self.media_url_link} failed: {e}" + return self._add_download_tasks_to_worker(requested_urls) conn.close()