Skip to content
3 changes: 3 additions & 0 deletions cps/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@
# an initial metadata manifest (prior to downloading videos or media) here:
XKLB_DB_FILE = "/library/calibre-web/xklb-metadata.db"

# Number of videos to be downloaded (default: 100) based on views per day
NUMBER_OF_VIDEOS = 100

if HOME_CONFIG:
home_dir = os.path.join(os.path.expanduser("~"), ".calibre-web")
if not os.path.exists(home_dir):
Expand Down
43 changes: 34 additions & 9 deletions cps/tasks/metadata_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from datetime import datetime
from flask_babel import lazy_gettext as N_, gettext as _

from cps.constants import XKLB_DB_FILE
from cps.constants import XKLB_DB_FILE, NUMBER_OF_VIDEOS
from cps.services.worker import WorkerThread
from cps.tasks.download import TaskDownload
from cps.services.worker import CalibreTask, STAT_FINISH_SUCCESS, STAT_FAIL, STAT_STARTED, STAT_WAITING
Expand Down Expand Up @@ -48,7 +48,7 @@ def run(self, worker_thread):
subprocess_args = [lb_executable, "tubeadd", self.media_url]
log.info("Subprocess args: %s", subprocess_args)

# Execute the download process using process_open
# Execute the metadata fetching process using process_open
try:
p = process_open(subprocess_args, newlines=True)

Expand Down Expand Up @@ -115,16 +115,41 @@ def run(self, worker_thread):
self.progress = 0
finally:
log.info("Shelf title: %s", self.shelf_title)
response = requests.get(self.original_url, params={"current_user_name": self.current_user_name, "shelf_title": self.shelf_title})
if response.status_code == 200:
self.shelf_id = response.json()["shelf_id"]
else:
log.error("An error occurred while trying to send the shelf title to %s", self.original_url)

# update the metadata of every video in the shelf
for index, requested_url in enumerate(requested_urls.keys()):
try:
p = process_open([lb_executable, "tubeadd", requested_url], newlines=True)
p.wait()
except Exception as e:
log.error("An error occurred during updating the metadata of %s: %s", requested_url, e)
self.message = f"{requested_url} failed: {e}"
for index, requested_url in enumerate(requested_urls.keys()):
try:
view_count = conn.execute("SELECT view_count FROM media WHERE path = ?", (requested_url,)).fetchone()[0]
time_uploaded = conn.execute("SELECT time_uploaded FROM media WHERE path = ?", (requested_url,)).fetchone()[0]
time_uploaded = datetime.utcfromtimestamp(time_uploaded)
now = datetime.now()
# calculate views per day
days_since_publish = (now - time_uploaded).days

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we force days_since_publish to be 1 at minimum (i.e. 1 day) to avoid divide-by-zero errors?

try:
requested_urls[requested_url]["views_per_day"] = view_count / days_since_publish
except ZeroDivisionError:
requested_urls[requested_url]["views_per_day"] = 0
except Exception as e:
log.error("An error occurred during the subprocess execution: %s", e)
self.message = f"{requested_url} failed: {e}"

# sort the videos by views per day and get the top ones (up to the NUMBER_OF_VIDEOS constant or the length of the dictionary)
requested_urls = dict(sorted(requested_urls.items(), key=lambda item: item[1]["views_per_day"], reverse=True)[:min(NUMBER_OF_VIDEOS, len(requested_urls))])

conn.close()

if self.shelf_title:
response = requests.get(self.original_url, params={"current_user_name": self.current_user_name, "shelf_title": self.shelf_title})
if response.status_code == 200:
self.shelf_id = response.json()["shelf_id"]
else:
log.error("An error occurred while trying to send the shelf title to %s", self.original_url)

num_requested_urls = len(requested_urls.keys())
total_duration = 0

Expand Down