why didnt I set git before?
authorEduardo <[email protected]>
Sun, 4 May 2025 12:27:43 +0000 (14:27 +0200)
committerEduardo <[email protected]>
Sun, 4 May 2025 12:27:43 +0000 (14:27 +0200)
.gitignore [new file with mode: 0644]
LICENSE [new file with mode: 0644]
README.MD [new file with mode: 0644]
default_image.jpg [new file with mode: 0644]
formator.py [new file with mode: 0755]
requirements.txt [new file with mode: 0644]

diff --git a/.gitignore b/.gitignore
new file mode 100644 (file)
index 0000000..4d7f227
--- /dev/null
@@ -0,0 +1,3 @@
+*.html
+.venv
+resources/
diff --git a/LICENSE b/LICENSE
new file mode 100644 (file)
index 0000000..73d60ef
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,7 @@
+Copyright 2025 EduFdezSoy (Eduardo Fernandez)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/README.MD b/README.MD
new file mode 100644 (file)
index 0000000..bc7feb3
--- /dev/null
+++ b/README.MD
@@ -0,0 +1,53 @@
+# One Pace Arc Formatter
+
+This script creates folders for each arc and downloads the cover image for each one then if the root folder contains `[One Pace]` files it will move each one to their right arc and rename it to a more readable title for [Jellyfin](https://jellyfin.org/) _(and posibly others)_.  
+It will then report how many episodes there are per arc so you can easily see if there are missing episodes and where.
+
+**Note about the rename:**  
+It does not destroy info from the title. It changes some [] and adds a numbering order.  
+**Example:**
+`[One Pace][123-456] Arabasta 01 [1080p][ABCD1234]` >> `01 - (One Pace) Manga Ch. 123-456 - Arabasta 01 [1080p][ABCD1234]`
+
+## Usage
+
+> You will need [python](https://www.python.org/) if you dont have that installed already.
+
+Install the **requeriments.txt**  
+`pip install -r requirements.txt`
+
+Download the oficial One Pace Doc in HTML format:
+
+> You can download the needed HTML files from the oficial One Pace Doc by clicking:  
+> Archive > Download > Web Page (.html)  
+> https://docs.google.com/spreadsheets/d/1HQRMJgu_zArp-sLnvFMDzOyjdsht87eFLECxMK858lA/edit
+
+Then open the **formator.py** file and edit the **CONFIGURATION** at the top of the file, you will need to set:
+
+- the folder paths where it will create the folders for each arc,
+- the path where the HTML files you just downloaded are
+- and the default image, for those arcs without cover image. There is a default image in this repo, you can use it if you want.
+
+Then you can launch it. You may want to add `-h` to see all the options.  
+Something like: `python formator.py -h`
+
+### Current options (may be outdate)
+
+| Options: |                                                                               |
+| -------- | ----------------------------------------------------------------------------- |
+| -h, -v   | shows the help message                                                        |
+|          | no arg executes all: creates the folders, adds images and orders the chapters |
+| -f       | creates the folders and donwloads the images                                  |
+| -o       | orders the chapters                                                           |
+| -c       | reports missing chapters                                                      |
+
+> I use `-f` only when a new arc is added to the docs, otherwise I mainly use `-o` and `-c` to check.
+
+## Donations
+
+If you really liked it and feel like I deserve some money, you can buy me a [coffee](https://ko-fi.com/EduFdezSoy) and I'll continue transforming caffeine into code!
+
+## Copyright
+
+Copyright &copy; 2025 Eduardo Fernandez.
+
+**One Pace Arc Formatter** is released under a MIT License; see _LICENSE_ for further details.
diff --git a/default_image.jpg b/default_image.jpg
new file mode 100644 (file)
index 0000000..6b751e5
Binary files /dev/null and b/default_image.jpg differ
diff --git a/formator.py b/formator.py
new file mode 100755 (executable)
index 0000000..9d3b449
--- /dev/null
@@ -0,0 +1,368 @@
+from tabulate import tabulate
+from bs4 import BeautifulSoup
+import requests
+import sys
+import os
+
+
+# =============== CONFIGURATION ================
+
+
+# folder where the seasons / arcs will be created
+arcs_folders = "/home/something/something/Anime/One Piece (1999) [tvdbid-81797]"
+# folder where the html files are located
+html_files_path = "/home/Downloads/One Pace Episode Guide/"
+# default season cover image, will be used if the image is not found or is invalid. MUST BE JPG
+default_image_path = os.path.join(html_files_path, "default_image.jpg")
+
+
+# =============== COLOR CLASS ================
+
+
+class bcolors:
+    ENDC = '\033[0m'
+    BOLD = '\033[1m'
+    UNDERLINE = '\033[4m'
+    PURPLE = '\033[95m'
+    BLUE = '\033[94m'
+    CYAN = '\033[96m'
+    GREEN = '\033[92m'
+    YELLOW = '\033[93m'
+    RED = '\033[91m'
+    LIGHT_GRAY = '\033[37m'
+    DARK_GRAY = '\033[90m'
+    BG_RED = '\033[41m'
+
+
+# =============== PRINT HELP AND INFO ================
+
+
+def help_n_info():
+    print()
+    print(f"{bcolors.DARK_GRAY} Author: {bcolors.BOLD}EduFdezSoy ({bcolors.UNDERLINE}https://edufdez.es/{bcolors.ENDC}{bcolors.BOLD}{bcolors.DARK_GRAY}) {bcolors.ENDC}")
+    print(f"{bcolors.DARK_GRAY} Version: {bcolors.BOLD}2.1 {bcolors.ENDC}")
+    print(f"{bcolors.DARK_GRAY} Date: {bcolors.BOLD}2025/05/04 {bcolors.ENDC}")
+    print(f"{bcolors.DARK_GRAY} License: {bcolors.BOLD}MIT {bcolors.ENDC}")
+    print()
+    print()
+    print(f"{bcolors.UNDERLINE}{bcolors.BOLD}{bcolors.PURPLE}One Pace Arc Formatter{bcolors.ENDC}")
+    print()
+    print(f"{bcolors.BLUE}This script creates folders for each arc and downloads the cover image for each one{bcolors.ENDC}")
+    print(f"{bcolors.BLUE}then if the root folder contains [One Pace] files it will move each one to their right arc and rename it to a more readable title for Jellyfin (and posibly others).{bcolors.ENDC}")
+    print()
+    print(f"{bcolors.BLUE}You can download the needed HTML files from the oficial One Pace Doc {bcolors.DARK_GRAY}by clicking Archive > Download > Web Page (.html){bcolors.ENDC}")
+    print(f"{bcolors.CYAN}{bcolors.UNDERLINE}https://docs.google.com/spreadsheets/d/1HQRMJgu_zArp-sLnvFMDzOyjdsht87eFLECxMK858lA/edit{bcolors.ENDC}")
+    print()
+    print()
+    print(f"{bcolors.YELLOW}⚠️  Make sure to set the correct paths in the configuration section {bcolors.LIGHT_GRAY}(At the top of the file){bcolors.ENDC}")
+    print()
+    print()
+    print(f"{bcolors.UNDERLINE}Options:{bcolors.ENDC}")
+    print()
+    print("-h, -v   shows this message")
+    print("         no arg executes all: creates the folders, downloads the images and orders the chapters")
+    print("-f       creates the folders and donwloads the images")
+    print("-o       orders the chapters")
+    print("-c       reports missing chapters")
+    print()
+
+
+# =============== CHECK IF FOLDERS EXIST ================
+
+
+def chech_folders():
+    if not os.path.exists(arcs_folders):
+        print(f"{bcolors.FAIL}Error: The directory does not exist: {arcs_folders}{bcolors.ENDC}")
+        exit(1)
+
+    if not os.path.exists(html_files_path):
+        print(f"{bcolors.FAIL}Error: The directory does not exist: {html_files_path}{bcolors.ENDC}")
+        exit(1)
+
+    if not os.path.exists(default_image_path):
+        print(f"{bcolors.FAIL}Error: The default image does not exist: {default_image_path}{bcolors.ENDC}")
+        exit(1)
+
+    html_file_path = html_files_path + "Arc Overview.html"
+    if not os.path.exists(html_file_path):
+        print(f"{bcolors.FAIL}Error: The HTML file does not exist: {html_file_path}{bcolors.ENDC}")
+        exit(1)
+
+
+# =============== PARSE ARC TABLE ================
+
+
+data = []
+def parse_arc_table():
+    global data
+    # Load the HTML file
+    html_file_path = html_files_path + "Arc Overview.html"
+
+    with open(html_file_path, "r", encoding="utf-8") as file:
+        soup = BeautifulSoup(file, "html.parser")
+
+    # Find the table
+    table = soup.find("table", class_="waffle")
+
+    # Parse the table into a 2D array
+    if table:
+        rows = table.find_all("tr")
+        for row in rows:
+            cells = row.find_all(["td", "th"])  # Include both <td> and <th>
+            data.append([cell.get_text(strip=True) for cell in cells])
+
+
+# =============== SANITYZE ARC TABLE ================
+
+
+def sanityze_table():
+    global data
+    # remove empty rows
+    data = [row for row in data if any(cell.strip() for cell in row)]
+
+    # now I want to delete the first row and the first column
+    if data:
+        # Remove the first row
+        data.pop(0)
+        # Remove the first column from each row
+        for i in range(len(data)):
+            if len(data[i]) > 0:
+                data[i].pop(0)
+
+    # remove last thre rows only if the first column is not a number
+    for i in range(3):
+        if data:
+            # Check if the first column of the last row is not a number
+            if not data[-1][0].isdigit():
+                # Remove the last row
+                data.pop(-1)
+            else:
+                break
+
+    # Clean up all cells in the data
+    for i in range(len(data)):
+        for j in range(len(data[i])):
+            # Remove any HTML specifics like \n and trim whitespace
+            data[i][j] = data[i][j].replace("\n", "").strip()
+            data[i][j] = ' '.join(data[i][j].split())
+
+
+# =============== ADD IMAGES TO ARRAY ================
+
+
+# default_image = "https://onepace.net/_next/static/media/logo.0bbcd6da.svg"
+
+def add_images_to_data():
+    global data
+    # second column is equal to the file name of the html with the data of the arc
+    for i in range(len(data)):
+        if i == 0: continue # Skip the first row (header)
+        # first get the arc name and remove whatever is in the brackets
+        arc_name = data[i][1].split('(')[0].strip()
+        # now get the file name
+        file_name = html_files_path + arc_name + ".html"
+        # now get the image, file only contains one img tag
+        if os.path.exists(file_name):
+            with open(file_name, "r", encoding="utf-8") as file:
+                arc_soup = BeautifulSoup(file, "html.parser")
+                img_tag = arc_soup.find("img")
+                if img_tag and "src" in img_tag.attrs:
+                    data[i].append(img_tag["src"])
+                else:
+                    data[i].append('none')  # Append none if no image is found
+        else:
+            data[i].append(None)  # Append None if the file does not exist
+
+
+# ================= CREATE FOLDERS AND DOWNLOAD IMAGES ===================
+
+
+def create_arc_folders_and_dd_images():
+    global data
+    # Create folders in the arcs_folders directory
+    for i in range(len(data)):
+        if i == 0: continue # Skip the first row (header)
+        if len(data[i]) > 1:
+            folder_number = f"{int(data[i][0]):02}" if data[i][0].isdigit() else f"{float(data[i][0]):04.1f}"
+            folder_name = f"{folder_number} - {data[i][1]}"
+            folder_path = os.path.join(arcs_folders, folder_name)
+            os.makedirs(folder_path, exist_ok=True)
+            # Download the image and save it as "cover" inside the folder
+            if data[i][-1]:  # Check if the image URL exists
+                image_url = data[i][-1]
+
+                # Check if the image URL is valid
+                if not image_url.startswith("http"):
+                    print(f"Invalid image URL for {folder_name}: {image_url}, default image will be used.")
+                    # copy the default image, html_files_path + "default_image.jpg"
+                    if os.path.exists(default_image_path):
+                        with open(default_image_path, "rb") as default_img_file:
+                            with open(os.path.join(folder_path, "cover.jpg"), "wb") as img_file:
+                                img_file.write(default_img_file.read())
+                    else:
+                        print(f"Default image not found: {default_image_path}")
+                    continue
+
+                # Determine the file extension from the image URL
+                extension = os.path.splitext(image_url)[-1] or ".jpg"
+                image_path = os.path.join(folder_path, f"cover{extension}")
+                try:
+                    response = requests.get(image_url, stream=True)
+                    if response.status_code == 200:
+                        with open(image_path, "wb") as img_file:
+                            for chunk in response.iter_content(1024):
+                                img_file.write(chunk)
+                except Exception as e:
+                    print(f"Failed to download image for {folder_name}: {e}")
+
+
+# ================= ORDER AND RENAME FILES ===================
+
+
+def order_and_rename():
+    global data
+    # List files in the specified directory that contain "[one pace]" in the title
+    anime_directory = arcs_folders
+    pace_files = []
+    arcs_names = []
+
+    # get files from [one pace]
+    if os.path.exists(anime_directory):
+        anime_files = os.listdir(anime_directory)
+        for file in anime_files:
+            if "[one pace]" in file.lower():
+                pace_files.append(file)
+    else:
+        print(f"Directory does not exist: {anime_directory}")
+
+    # get arcs names from data array
+    for i in range(len(data)):
+        if i == 0: continue # Skip the first row (header)
+        if len(data[i]) > 1:
+            name = data[i][1]
+            # remove the brackets and everything inside
+            name = name.split('(')[0].strip()
+            arcs_names.append(name)
+
+    for arc in arcs_names:
+        arc_files = []
+        arc_folder = ""
+        # search arc files in anime_files
+        for file in pace_files:
+            if arc.lower() in file.lower():
+                arc_files.append(file)
+        
+        # get the folder that contains the arc
+        for folder in os.listdir(anime_directory):
+            if arc.lower() in folder.lower():
+                # check if the folder is a directory!
+                if not os.path.isdir(os.path.join(anime_directory, folder)):
+                    continue
+                arc_folder = folder
+                break
+        
+        # move and rename files
+        for file in arc_files:
+            # get the number of the file
+            # Extract the number after the arc name in the file
+            number = file.lower().split(arc.lower())[-1].strip().split()[0]
+            # Rename the file
+            new_name = f"{number} - {file}"
+            # repalce [One Pace] with (One Pace)
+            new_name = new_name.replace("[One Pace]", "(One Pace) ")
+            # remove only the first ocurences of [] and add at the start "Manga Chapters "
+            new_name = new_name.replace("[", "Manga Ch. ", 1).replace("]", " -", 1)
+            # rename the file
+            os.rename(os.path.join(anime_directory, file), os.path.join(anime_directory, arc_folder, new_name))
+
+        # print the arc name and the files
+        to_print = f"Arc: {arc}, Files: {arc_files}, Folder: {arc_folder}"
+
+        if not arc_files:
+            to_print = f"{bcolors.DARK_GRAY} {to_print} {bcolors.ENDC}"
+
+        print(to_print)
+
+
+# ============== CREATE CHAPTER STATUS REPORT ==============
+
+
+def chapter_report():
+    tabulate_headers = [f"{bcolors.BOLD}Arc", "Expected", "Got", f"Missing{bcolors.ENDC}"]
+    tabulate_data = []
+    
+    for i in range(len(data)):
+        if i == 0: continue # Skip the first row (header)
+        folder_number = f"{int(data[i][0]):02}" if data[i][0].isdigit() else f"{float(data[i][0]):04.1f}"
+        folder_name = f"{folder_number} - {data[i][1]}"
+        arc = data[i][1]
+        n_ch = data[i][8]
+        # get the number of files in the folder
+        arc_folder = os.path.join(arcs_folders, folder_name)
+        n_files = 0
+        if os.path.exists(arc_folder):
+            n_files = len(os.listdir(arc_folder)) - 1 # -1 for the cover image
+        
+        missing_chapters = int(n_ch) - n_files
+
+        line = ""
+        
+        if missing_chapters > 0: # there are missing episodes
+            tabulate_data.append([f'{bcolors.RED}{arc}{bcolors.ENDC}', n_ch, n_files, f'{bcolors.RED}{bcolors.BOLD}{missing_chapters}{bcolors.ENDC}'])
+        elif missing_chapters == 0: # no missing episodes
+            tabulate_data.append([f'{bcolors.GREEN}{arc}{bcolors.ENDC}', n_ch, n_files, f'{bcolors.GREEN}{missing_chapters}{bcolors.ENDC}'])
+        elif missing_chapters < 0: # more episodes than expected
+            tabulate_data.append([f'{bcolors.YELLOW}{arc}{bcolors.ENDC}', f'{bcolors.BOLD}{n_ch}{bcolors.ENDC}', f'{bcolors.YELLOW}{bcolors.BOLD}{n_files}{bcolors.ENDC}', f'{bcolors.YELLOW}{missing_chapters}{bcolors.ENDC}'])
+
+    print(tabulate(tabulate_data, headers=tabulate_headers, tablefmt="fancy_grid"))
+    print()
+
+
+
+
+# =============== PARAMS BEHAVIOUR SELECTOR ================
+
+
+# -h, -v params, print the help and exit
+if len(sys.argv) > 1 and (sys.argv[1].lower() == "-h" or sys.argv[1].lower() == "-v"):
+    help_n_info()
+    exit(0)
+
+# -f only creates the folders
+if len(sys.argv) > 1 and sys.argv[1].lower() == "-f":
+    print(f"\n{bcolors.LIGHT_GRAY}Running...{bcolors.ENDC}\n")
+    chech_folders()
+    parse_arc_table()
+    sanityze_table()
+    add_images_to_data()
+    create_arc_folders_and_dd_images()
+    exit(0)
+
+# -o, orders the chapters only
+if len(sys.argv) > 1 and sys.argv[1].lower() == "-o":
+    print(f"\n{bcolors.LIGHT_GRAY}Running...{bcolors.ENDC}\n")
+    chech_folders()
+    parse_arc_table()
+    sanityze_table()
+    order_and_rename()
+    exit(0)
+
+# -c, reports missing chapters
+if len(sys.argv) > 1 and sys.argv[1].lower() == "-c":
+    print(f"\n{bcolors.LIGHT_GRAY}Running...{bcolors.ENDC}\n")
+    chech_folders()
+    parse_arc_table()
+    sanityze_table()
+    chapter_report()
+    exit(0)
+
+# if no params are given, run all the script
+help_n_info()
+print(f"\n{bcolors.LIGHT_GRAY}Running...{bcolors.ENDC}\n")
+chech_folders()
+parse_arc_table()
+sanityze_table()
+add_images_to_data()
+create_arc_folders_and_dd_images()
+order_and_rename()
+chapter_report()
diff --git a/requirements.txt b/requirements.txt
new file mode 100644 (file)
index 0000000..9545532
--- /dev/null
@@ -0,0 +1,10 @@
+beautifulsoup4==4.13.3
+bs4==0.0.2
+certifi==2025.1.31
+charset-normalizer==3.4.1
+idna==3.10
+requests==2.32.3
+soupsieve==2.6
+tabulate==0.9.0
+typing_extensions==4.13.1
+urllib3==2.3.0