--- /dev/null
+from tabulate import tabulate
+from bs4 import BeautifulSoup
+import requests
+import sys
+import os
+
+
+# =============== CONFIGURATION ================
+
+
+# folder where the seasons / arcs will be created
+arcs_folders = "/home/something/something/Anime/One Piece (1999) [tvdbid-81797]"
+# folder where the html files are located
+html_files_path = "/home/Downloads/One Pace Episode Guide/"
+# default season cover image, will be used if the image is not found or is invalid. MUST BE JPG
+default_image_path = os.path.join(html_files_path, "default_image.jpg")
+
+
+# =============== COLOR CLASS ================
+
+
+class bcolors:
+ ENDC = '\033[0m'
+ BOLD = '\033[1m'
+ UNDERLINE = '\033[4m'
+ PURPLE = '\033[95m'
+ BLUE = '\033[94m'
+ CYAN = '\033[96m'
+ GREEN = '\033[92m'
+ YELLOW = '\033[93m'
+ RED = '\033[91m'
+ LIGHT_GRAY = '\033[37m'
+ DARK_GRAY = '\033[90m'
+ BG_RED = '\033[41m'
+
+
+# =============== PRINT HELP AND INFO ================
+
+
+def help_n_info():
+ print()
+ print(f"{bcolors.DARK_GRAY} Author: {bcolors.BOLD}EduFdezSoy ({bcolors.UNDERLINE}https://edufdez.es/{bcolors.ENDC}{bcolors.BOLD}{bcolors.DARK_GRAY}) {bcolors.ENDC}")
+ print(f"{bcolors.DARK_GRAY} Version: {bcolors.BOLD}2.1 {bcolors.ENDC}")
+ print(f"{bcolors.DARK_GRAY} Date: {bcolors.BOLD}2025/05/04 {bcolors.ENDC}")
+ print(f"{bcolors.DARK_GRAY} License: {bcolors.BOLD}MIT {bcolors.ENDC}")
+ print()
+ print()
+ print(f"{bcolors.UNDERLINE}{bcolors.BOLD}{bcolors.PURPLE}One Pace Arc Formatter{bcolors.ENDC}")
+ print()
+ print(f"{bcolors.BLUE}This script creates folders for each arc and downloads the cover image for each one{bcolors.ENDC}")
+ print(f"{bcolors.BLUE}then if the root folder contains [One Pace] files it will move each one to their right arc and rename it to a more readable title for Jellyfin (and posibly others).{bcolors.ENDC}")
+ print()
+ print(f"{bcolors.BLUE}You can download the needed HTML files from the oficial One Pace Doc {bcolors.DARK_GRAY}by clicking Archive > Download > Web Page (.html){bcolors.ENDC}")
+ print(f"{bcolors.CYAN}{bcolors.UNDERLINE}https://docs.google.com/spreadsheets/d/1HQRMJgu_zArp-sLnvFMDzOyjdsht87eFLECxMK858lA/edit{bcolors.ENDC}")
+ print()
+ print()
+ print(f"{bcolors.YELLOW}⚠️ Make sure to set the correct paths in the configuration section {bcolors.LIGHT_GRAY}(At the top of the file){bcolors.ENDC}")
+ print()
+ print()
+ print(f"{bcolors.UNDERLINE}Options:{bcolors.ENDC}")
+ print()
+ print("-h, -v shows this message")
+ print(" no arg executes all: creates the folders, downloads the images and orders the chapters")
+ print("-f creates the folders and donwloads the images")
+ print("-o orders the chapters")
+ print("-c reports missing chapters")
+ print()
+
+
+# =============== CHECK IF FOLDERS EXIST ================
+
+
+def chech_folders():
+ if not os.path.exists(arcs_folders):
+ print(f"{bcolors.FAIL}Error: The directory does not exist: {arcs_folders}{bcolors.ENDC}")
+ exit(1)
+
+ if not os.path.exists(html_files_path):
+ print(f"{bcolors.FAIL}Error: The directory does not exist: {html_files_path}{bcolors.ENDC}")
+ exit(1)
+
+ if not os.path.exists(default_image_path):
+ print(f"{bcolors.FAIL}Error: The default image does not exist: {default_image_path}{bcolors.ENDC}")
+ exit(1)
+
+ html_file_path = html_files_path + "Arc Overview.html"
+ if not os.path.exists(html_file_path):
+ print(f"{bcolors.FAIL}Error: The HTML file does not exist: {html_file_path}{bcolors.ENDC}")
+ exit(1)
+
+
+# =============== PARSE ARC TABLE ================
+
+
+data = []
+def parse_arc_table():
+ global data
+ # Load the HTML file
+ html_file_path = html_files_path + "Arc Overview.html"
+
+ with open(html_file_path, "r", encoding="utf-8") as file:
+ soup = BeautifulSoup(file, "html.parser")
+
+ # Find the table
+ table = soup.find("table", class_="waffle")
+
+ # Parse the table into a 2D array
+ if table:
+ rows = table.find_all("tr")
+ for row in rows:
+ cells = row.find_all(["td", "th"]) # Include both <td> and <th>
+ data.append([cell.get_text(strip=True) for cell in cells])
+
+
+# =============== SANITYZE ARC TABLE ================
+
+
+def sanityze_table():
+ global data
+ # remove empty rows
+ data = [row for row in data if any(cell.strip() for cell in row)]
+
+ # now I want to delete the first row and the first column
+ if data:
+ # Remove the first row
+ data.pop(0)
+ # Remove the first column from each row
+ for i in range(len(data)):
+ if len(data[i]) > 0:
+ data[i].pop(0)
+
+ # remove last thre rows only if the first column is not a number
+ for i in range(3):
+ if data:
+ # Check if the first column of the last row is not a number
+ if not data[-1][0].isdigit():
+ # Remove the last row
+ data.pop(-1)
+ else:
+ break
+
+ # Clean up all cells in the data
+ for i in range(len(data)):
+ for j in range(len(data[i])):
+ # Remove any HTML specifics like \n and trim whitespace
+ data[i][j] = data[i][j].replace("\n", "").strip()
+ data[i][j] = ' '.join(data[i][j].split())
+
+
+# =============== ADD IMAGES TO ARRAY ================
+
+
+# default_image = "https://onepace.net/_next/static/media/logo.0bbcd6da.svg"
+
+def add_images_to_data():
+ global data
+ # second column is equal to the file name of the html with the data of the arc
+ for i in range(len(data)):
+ if i == 0: continue # Skip the first row (header)
+ # first get the arc name and remove whatever is in the brackets
+ arc_name = data[i][1].split('(')[0].strip()
+ # now get the file name
+ file_name = html_files_path + arc_name + ".html"
+ # now get the image, file only contains one img tag
+ if os.path.exists(file_name):
+ with open(file_name, "r", encoding="utf-8") as file:
+ arc_soup = BeautifulSoup(file, "html.parser")
+ img_tag = arc_soup.find("img")
+ if img_tag and "src" in img_tag.attrs:
+ data[i].append(img_tag["src"])
+ else:
+ data[i].append('none') # Append none if no image is found
+ else:
+ data[i].append(None) # Append None if the file does not exist
+
+
+# ================= CREATE FOLDERS AND DOWNLOAD IMAGES ===================
+
+
+def create_arc_folders_and_dd_images():
+ global data
+ # Create folders in the arcs_folders directory
+ for i in range(len(data)):
+ if i == 0: continue # Skip the first row (header)
+ if len(data[i]) > 1:
+ folder_number = f"{int(data[i][0]):02}" if data[i][0].isdigit() else f"{float(data[i][0]):04.1f}"
+ folder_name = f"{folder_number} - {data[i][1]}"
+ folder_path = os.path.join(arcs_folders, folder_name)
+ os.makedirs(folder_path, exist_ok=True)
+ # Download the image and save it as "cover" inside the folder
+ if data[i][-1]: # Check if the image URL exists
+ image_url = data[i][-1]
+
+ # Check if the image URL is valid
+ if not image_url.startswith("http"):
+ print(f"Invalid image URL for {folder_name}: {image_url}, default image will be used.")
+ # copy the default image, html_files_path + "default_image.jpg"
+ if os.path.exists(default_image_path):
+ with open(default_image_path, "rb") as default_img_file:
+ with open(os.path.join(folder_path, "cover.jpg"), "wb") as img_file:
+ img_file.write(default_img_file.read())
+ else:
+ print(f"Default image not found: {default_image_path}")
+ continue
+
+ # Determine the file extension from the image URL
+ extension = os.path.splitext(image_url)[-1] or ".jpg"
+ image_path = os.path.join(folder_path, f"cover{extension}")
+ try:
+ response = requests.get(image_url, stream=True)
+ if response.status_code == 200:
+ with open(image_path, "wb") as img_file:
+ for chunk in response.iter_content(1024):
+ img_file.write(chunk)
+ except Exception as e:
+ print(f"Failed to download image for {folder_name}: {e}")
+
+
+# ================= ORDER AND RENAME FILES ===================
+
+
+def order_and_rename():
+ global data
+ # List files in the specified directory that contain "[one pace]" in the title
+ anime_directory = arcs_folders
+ pace_files = []
+ arcs_names = []
+
+ # get files from [one pace]
+ if os.path.exists(anime_directory):
+ anime_files = os.listdir(anime_directory)
+ for file in anime_files:
+ if "[one pace]" in file.lower():
+ pace_files.append(file)
+ else:
+ print(f"Directory does not exist: {anime_directory}")
+
+ # get arcs names from data array
+ for i in range(len(data)):
+ if i == 0: continue # Skip the first row (header)
+ if len(data[i]) > 1:
+ name = data[i][1]
+ # remove the brackets and everything inside
+ name = name.split('(')[0].strip()
+ arcs_names.append(name)
+
+ for arc in arcs_names:
+ arc_files = []
+ arc_folder = ""
+ # search arc files in anime_files
+ for file in pace_files:
+ if arc.lower() in file.lower():
+ arc_files.append(file)
+
+ # get the folder that contains the arc
+ for folder in os.listdir(anime_directory):
+ if arc.lower() in folder.lower():
+ # check if the folder is a directory!
+ if not os.path.isdir(os.path.join(anime_directory, folder)):
+ continue
+ arc_folder = folder
+ break
+
+ # move and rename files
+ for file in arc_files:
+ # get the number of the file
+ # Extract the number after the arc name in the file
+ number = file.lower().split(arc.lower())[-1].strip().split()[0]
+ # Rename the file
+ new_name = f"{number} - {file}"
+ # repalce [One Pace] with (One Pace)
+ new_name = new_name.replace("[One Pace]", "(One Pace) ")
+ # remove only the first ocurences of [] and add at the start "Manga Chapters "
+ new_name = new_name.replace("[", "Manga Ch. ", 1).replace("]", " -", 1)
+ # rename the file
+ os.rename(os.path.join(anime_directory, file), os.path.join(anime_directory, arc_folder, new_name))
+
+ # print the arc name and the files
+ to_print = f"Arc: {arc}, Files: {arc_files}, Folder: {arc_folder}"
+
+ if not arc_files:
+ to_print = f"{bcolors.DARK_GRAY} {to_print} {bcolors.ENDC}"
+
+ print(to_print)
+
+
+# ============== CREATE CHAPTER STATUS REPORT ==============
+
+
+def chapter_report():
+ tabulate_headers = [f"{bcolors.BOLD}Arc", "Expected", "Got", f"Missing{bcolors.ENDC}"]
+ tabulate_data = []
+
+ for i in range(len(data)):
+ if i == 0: continue # Skip the first row (header)
+ folder_number = f"{int(data[i][0]):02}" if data[i][0].isdigit() else f"{float(data[i][0]):04.1f}"
+ folder_name = f"{folder_number} - {data[i][1]}"
+ arc = data[i][1]
+ n_ch = data[i][8]
+ # get the number of files in the folder
+ arc_folder = os.path.join(arcs_folders, folder_name)
+ n_files = 0
+ if os.path.exists(arc_folder):
+ n_files = len(os.listdir(arc_folder)) - 1 # -1 for the cover image
+
+ missing_chapters = int(n_ch) - n_files
+
+ line = ""
+
+ if missing_chapters > 0: # there are missing episodes
+ tabulate_data.append([f'{bcolors.RED}{arc}{bcolors.ENDC}', n_ch, n_files, f'{bcolors.RED}{bcolors.BOLD}{missing_chapters}{bcolors.ENDC}'])
+ elif missing_chapters == 0: # no missing episodes
+ tabulate_data.append([f'{bcolors.GREEN}{arc}{bcolors.ENDC}', n_ch, n_files, f'{bcolors.GREEN}{missing_chapters}{bcolors.ENDC}'])
+ elif missing_chapters < 0: # more episodes than expected
+ tabulate_data.append([f'{bcolors.YELLOW}{arc}{bcolors.ENDC}', f'{bcolors.BOLD}{n_ch}{bcolors.ENDC}', f'{bcolors.YELLOW}{bcolors.BOLD}{n_files}{bcolors.ENDC}', f'{bcolors.YELLOW}{missing_chapters}{bcolors.ENDC}'])
+
+ print(tabulate(tabulate_data, headers=tabulate_headers, tablefmt="fancy_grid"))
+ print()
+
+
+
+
+# =============== PARAMS BEHAVIOUR SELECTOR ================
+
+
+# -h, -v params, print the help and exit
+if len(sys.argv) > 1 and (sys.argv[1].lower() == "-h" or sys.argv[1].lower() == "-v"):
+ help_n_info()
+ exit(0)
+
+# -f only creates the folders
+if len(sys.argv) > 1 and sys.argv[1].lower() == "-f":
+ print(f"\n{bcolors.LIGHT_GRAY}Running...{bcolors.ENDC}\n")
+ chech_folders()
+ parse_arc_table()
+ sanityze_table()
+ add_images_to_data()
+ create_arc_folders_and_dd_images()
+ exit(0)
+
+# -o, orders the chapters only
+if len(sys.argv) > 1 and sys.argv[1].lower() == "-o":
+ print(f"\n{bcolors.LIGHT_GRAY}Running...{bcolors.ENDC}\n")
+ chech_folders()
+ parse_arc_table()
+ sanityze_table()
+ order_and_rename()
+ exit(0)
+
+# -c, reports missing chapters
+if len(sys.argv) > 1 and sys.argv[1].lower() == "-c":
+ print(f"\n{bcolors.LIGHT_GRAY}Running...{bcolors.ENDC}\n")
+ chech_folders()
+ parse_arc_table()
+ sanityze_table()
+ chapter_report()
+ exit(0)
+
+# if no params are given, run all the script
+help_n_info()
+print(f"\n{bcolors.LIGHT_GRAY}Running...{bcolors.ENDC}\n")
+chech_folders()
+parse_arc_table()
+sanityze_table()
+add_images_to_data()
+create_arc_folders_and_dd_images()
+order_and_rename()
+chapter_report()