-
-
Save moi15moi/cbc9b29bba40e1eae035a4bc212c60c7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import logging | |
import os | |
import shutil | |
import subprocess | |
from aenum import MultiValueEnum | |
from enum import Enum | |
from ass import parse_file, Document | |
from argparse import ArgumentParser | |
from decimal import Decimal | |
from font_collector import Font, AssDocument, Helpers, set_loglevel | |
set_loglevel(logging.CRITICAL) | |
from pathlib import Path | |
from tempfile import gettempdir | |
from typing import Any, Dict, List | |
# From mpv: https://github.com/mpv-player/mpv/blob/305332f8a06e174c5c45c9c4547293502ac7ecdb/sub/sd_ass.c#L101 | |
FONT_MIMETYPE = [ | |
"application/x-truetype-font", | |
"application/vnd.ms-opentype", | |
"application/x-font-ttf", | |
"application/x-font", | |
"application/font-sfnt", | |
"font/collection", | |
"font/otf", | |
"font/sfnt", | |
"font/ttf", | |
] | |
class ColorSpaceFFmpeg(Enum): | |
# https://github.com/FFmpeg/FFmpeg/blob/10f1bbfe117e990a6fe56c03085966fa0c0f0f2e/libavfilter/vf_setparams.c#L104-L117 | |
RGB = "gbr" | |
BT709 = "bt709" | |
UNSPECIFIED = "unknown" | |
FCC = "fcc" | |
BT470BG = "bt470bg" | |
SMPTE170M = "smpte170m" | |
SMPTE240M = "smpte240m" | |
YCGCO = "ycgco" | |
BT2020_NCL = "bt2020nc" | |
BT2020_CL = "bt2020c" | |
SMPTE2085 = "smpte2085" | |
CHROMA_DERIVED_NCL = "chroma-derived-nc" | |
CHROMA_DERIVED_CL = "chroma-derived-c" | |
ICTCP = "ictcp" | |
class ColorRangeFFmpeg(MultiValueEnum): | |
# https://github.com/FFmpeg/FFmpeg/blob/10f1bbfe117e990a6fe56c03085966fa0c0f0f2e/libavfilter/vf_setparams.c#L57-L64 | |
UNSPECIFIED = "unspecified", "unknown" | |
MPEG = "limited", "tv", "mpeg" | |
JPEG = "full", "pc", "jpeg" | |
def get_color_matrix_name(color_space: ColorSpaceFFmpeg, color_range: ColorRangeFFmpeg): | |
# https://github.com/arch1t3cht/Aegisub/blob/26a5f00c8af92b6e6460d35411b71cf8835c6503/src/video_provider_ffmpegsource.cpp#L124-L143 | |
str = "PC" if color_range == ColorRangeFFmpeg.JPEG else "TV" | |
if color_space == ColorSpaceFFmpeg.RGB: | |
return "None" | |
elif color_space == ColorSpaceFFmpeg.BT709: | |
return f"{str}.709" | |
elif color_space == ColorSpaceFFmpeg.FCC: | |
return f"{str}.FCC" | |
elif color_space in (ColorSpaceFFmpeg.BT470BG, ColorSpaceFFmpeg.SMPTE170M): | |
return f"{str}.601" | |
elif color_space == ColorSpaceFFmpeg.SMPTE240M: | |
return f"{str}.240M" | |
else: | |
raise Exception(f'The ColorSpace "{color_space.name}" cannot be handle by .ass format') | |
def print_warning(message: str): | |
print(f"WARNING: {message}") | |
def print_error(message: str): | |
print(f"ERROR: {message}") | |
def get_subtitles(video_path: Path) -> Dict[int, Document]: | |
""" | |
Parameters: | |
video_path (Path): The path of an mkv file. | |
Returns: | |
An dictionary where: | |
- Key (int): Video's index of the subtitle stream. | |
- Value (Document): The subtitle. | |
""" | |
cmd = f'ffprobe -select_streams s -show_entries stream=index,codec_name "{video_path.resolve()}" -print_format json' | |
ffprobeOutput = subprocess.run(cmd, capture_output=True, text=True) | |
ffprobeOutput = json.loads(ffprobeOutput.stdout) | |
if len(ffprobeOutput["streams"]) == 0: | |
print_warning(f'The file "{video_path}" doesn\'t contain any subtitle stream.') | |
temp_dir = gettempdir() | |
documents = {} | |
for stream in ffprobeOutput["streams"]: | |
if stream["codec_name"] == "ass": | |
subs_path = os.path.join(temp_dir, f'{stream["index"]}.ass') | |
cmd = f'ffmpeg -y -i "{video_path.resolve()}" -map 0:{stream["index"]} "{subs_path}"' | |
subprocess.run(cmd, capture_output=True) | |
with open(subs_path, encoding="utf_8_sig") as file: | |
subtitle = parse_file(file) | |
documents[stream["index"]] = subtitle | |
os.remove(subs_path) | |
return documents | |
def get_video_muxed_fonts(video_path: Path) -> List[Font]: | |
""" | |
Parameters: | |
video_path (Path): The path of an mkv file. | |
Returns: | |
An dictionary where: | |
- Key (int): Video's index of the subtitle stream. | |
- Value (Document): The subtitle. | |
""" | |
cmd = f'ffprobe -select_streams t -show_entries "stream=index : stream_tags=mimetype" "{video_path.resolve()}" -print_format json' | |
ffprobeOutput = subprocess.run(cmd, capture_output=True, text=True) | |
ffprobeOutput = json.loads(ffprobeOutput.stdout) | |
if len(ffprobeOutput["streams"]) == 0: | |
print_warning(f'The file "{video_path}" doesn\'t contain any attachments.') | |
temp_dir = gettempdir() | |
fonts_path = [] | |
for stream in ffprobeOutput["streams"]: | |
if stream["tags"]["mimetype"] in FONT_MIMETYPE: | |
font_path = os.path.join(temp_dir, f'{stream["index"]}.font') | |
cmd = f'ffmpeg -y -dump_attachment:{stream["index"]} "{font_path}" -i "{video_path.resolve()}"' | |
subprocess.run(cmd, capture_output=True, text=True) | |
font = Font.from_font_path(font_path) | |
fonts_path.extend(font) | |
# We don't directly delete the font file, since we can need it to verify if the font contain glyph | |
return fonts_path | |
def get_video_timestamps_from_index(video_path: Path, index: int) -> List[int]: | |
""" | |
Parameters: | |
video (str): Video path. | |
index (int): Stream index of the video. | |
Returns: | |
The list of timestamps. | |
""" | |
def get_pts(packets) -> List[int]: | |
pts: List[int] = [] | |
for packet in packets: | |
# + 0.5 to round the result | |
pts.append(int(Decimal(packet["pts_time"]) * 1000 + Decimal(0.5))) | |
pts.sort() | |
return pts | |
cmd = f"ffprobe -select_streams {index} -show_entries packet=pts_time:stream=codec_type \"{video_path.resolve()}\" -print_format json" | |
ffprobe_output = subprocess.run(cmd, capture_output=True, text=True) | |
ffprobe_output_dict = json.loads(ffprobe_output.stdout) | |
if len(ffprobe_output_dict) == 0: | |
raise Exception(f"The file {video_path} is not a video file or the file does not exist.") | |
if len(ffprobe_output_dict["streams"]) == 0: | |
raise ValueError(f"The index {index} is not in the file {video_path}.") | |
if ffprobe_output_dict["streams"][0]["codec_type"] != "video": | |
raise ValueError(f'The index {index} is not a video stream. It is an {ffprobe_output_dict["streams"][0]["codec_type"]} stream.') | |
return get_pts(ffprobe_output_dict["packets"]) | |
def get_mkv_video_track_detail(video_path: Path): | |
cmd = f"ffprobe -select_streams V -show_entries stream=index,color_space,color_range,width,height,start_time -show_optional_fields always \"{video_path.resolve()}\" -print_format json" | |
ffprobe_output = subprocess.run(cmd, capture_output=True, text=True) | |
ffprobe_output_dict = json.loads(ffprobe_output.stdout) | |
return ffprobe_output_dict["streams"] | |
def get_mkv_audio_track_detail(video_path: Path): | |
cmd = f"ffprobe -select_streams a -show_entries stream=index,start_time \"{video_path.resolve()}\" -print_format json" | |
ffprobe_output = subprocess.run(cmd, capture_output=True, text=True) | |
ffprobe_output_dict = json.loads(ffprobe_output.stdout) | |
return ffprobe_output_dict["streams"] | |
def get_mkv_subtitle_track_detail(video_path: Path): | |
cmd = f"ffprobe -select_streams s -show_entries stream=index,start_time \"{video_path.resolve()}\" -print_format json" | |
ffprobe_output = subprocess.run(cmd, capture_output=True, text=True) | |
ffprobe_output_dict = json.loads(ffprobe_output.stdout) | |
return ffprobe_output_dict["streams"] | |
def verify_video_errors_mediainfo(video_path: Path, subtitles: Dict[int, Document], font_collection: List[Font]): | |
video_tracks = get_mkv_video_track_detail(video_path) | |
audio_tracks = get_mkv_audio_track_detail(video_path) | |
subtitle_tracks = get_mkv_subtitle_track_detail(video_path) | |
track_codec_type_and_start_time: Dict[int, Dict[str, Any]] = {} | |
mkv_has_problem = False | |
# Verify if | |
# - the subtitle YCbCr Matrix is the same has the video. | |
# - the first timestamps is equals to 0. It is necessary in order not to have a lag with the subtitles. | |
for track in video_tracks: | |
color_space = ColorSpaceFFmpeg(track["color_space"]) | |
color_range = ColorRangeFFmpeg(track["color_range"]) | |
if color_space == ColorSpaceFFmpeg.UNSPECIFIED: | |
# From: https://github.com/arch1t3cht/Aegisub/blob/26a5f00c8af92b6e6460d35411b71cf8835c6503/src/video_provider_ffmpegsource.cpp#L267 | |
color_space = ColorSpaceFFmpeg.BT709 if track["width"] > 1024 or track["height"] >= 600 else ColorSpaceFFmpeg.BT470BG | |
video_stream_color_matrix = get_color_matrix_name(color_space, color_range) | |
for sub_index, subtitle in subtitles.items(): | |
try: | |
subtitle.info["YCbCr Matrix"] | |
except KeyError: | |
mkv_has_problem = True | |
print_error(f"The subtitle at the index {sub_index} doesn't contain an YCbCr Matrix. It should contain the YCbCr Matrix \"{video_stream_color_matrix}\"") | |
continue | |
if subtitle.info["YCbCr Matrix"] != video_stream_color_matrix: | |
mkv_has_problem = True | |
print_error(f"The YCbCr Matrix of the subtitle ({subtitle.info['YCbCr Matrix']}) at the index {sub_index} doesn't correspond to the video color matrix \"{video_stream_color_matrix}\"") | |
if Decimal(track["start_time"]) != 0: | |
track_index = int(track["index"]) | |
track_codec_type_and_start_time[track_index] = {} | |
track_codec_type_and_start_time[track_index]["codec_type"] = "video" | |
track_codec_type_and_start_time[track_index]["start_time"] = int(Decimal(track["start_time"]) * 1000) | |
for track in audio_tracks: | |
if Decimal(track["start_time"]) != 0: | |
track_index = int(track["index"]) | |
track_codec_type_and_start_time[track_index] = {} | |
track_codec_type_and_start_time[track_index]["codec_type"] = "audio" | |
track_codec_type_and_start_time[track_index]["start_time"] = int(Decimal(track["start_time"]) * 1000) | |
for track in subtitle_tracks: | |
if Decimal(track["start_time"]) != 0: | |
track_index = int(track["index"]) | |
track_codec_type_and_start_time[track_index] = {} | |
track_codec_type_and_start_time[track_index]["codec_type"] = "subtitle" | |
track_codec_type_and_start_time[track_index]["start_time"] = int(Decimal(track["start_time"]) * 1000) | |
if track_codec_type_and_start_time: | |
mkv_has_problem = True | |
print_error("There is a least one track where the start isn't 0:") | |
sync_command = " ".join([f"--sync {index}:{track['start_time'] * -1}" for index, track in track_codec_type_and_start_time.items()]) | |
sync_command_without_subtitle = " ".join([f"--sync {index}:{track['start_time'] * -1}" for index, track in track_codec_type_and_start_time.items() if track['codec_type'] != "subtitle"]) | |
if sync_command != sync_command_without_subtitle: | |
print("\tHere is an command you can try to change the video timestamps of the video and audio") | |
print(f'\t\tmkvmerge --output "{video_path.stem} - Timestamps corrected.mkv" {sync_command_without_subtitle} "{video_path}"') | |
print("\tHere is an command you can try to change the video timestamps of all the tracks:") | |
print(f'\t\tmkvmerge --output "{video_path.stem} - Timestamps corrected.mkv" {sync_command} "{video_path}"') | |
# Verify if | |
# - the mkv contain all the needed fonts | |
for subtitle_index, subtitle in subtitles.items(): | |
ass_doc = AssDocument(subtitle) | |
styles = ass_doc.get_used_style() | |
for style, usage_data in styles.items(): | |
font_result = Helpers.get_used_font_by_style(font_collection, style) | |
if font_result is None: | |
print_error(f"Subtitle Index {subtitle_index} - You need to mux the font named: '{style.fontname}'") | |
print(f"\tUsed on lines: {' '.join(str(line) for line in usage_data.ordered_lines)}") | |
mkv_has_problem = True | |
else: | |
if font_result.mismatch_bold: | |
print_warning(f"Subtitle Index {subtitle_index} - The font has an different weight then the style. Font weight is {font_result.font.weight} and the style is {style.weight}.") | |
if font_result.mismatch_italic: | |
if style.italic: | |
print_warning(f"Subtitle Index {subtitle_index} - The font is not italic, but the style is.") | |
else: | |
print_warning(f"Subtitle Index {subtitle_index} - The font is italic, but the style is not.") | |
if font_result.mismatch_bold or font_result.mismatch_italic: | |
print(f"\tUsed on lines: {' '.join(str(line) for line in usage_data.ordered_lines)}") | |
missing_glyphs = font_result.font.get_missing_glyphs( | |
usage_data.characters_used | |
) | |
if len(missing_glyphs) != 0: | |
mkv_has_problem = True | |
print_error(f"Subtitle Index {subtitle_index} - '{style.fontname}' is missing the following glyphs: {missing_glyphs}") | |
if not mkv_has_problem: | |
print(f'The file "{video_path}" has no problem.') | |
def main(): | |
parser = ArgumentParser( | |
prog="Verify if timestamps are normalize", | |
description="Normalize timestamps, so there is no difference between Aegisub and any player. See the issue: https://github.com/TypesettingTools/Aegisub/issues/21", | |
) | |
parser.add_argument( | |
"-d", | |
"--directory", | |
default=os.getcwd(), | |
help=""" | |
Folder where all mkv are store. | |
""", | |
) | |
args = parser.parse_args() | |
files = [Path(os.path.join(args.directory, f)) for f in os.listdir(args.directory) if os.path.isfile(os.path.join(args.directory, f)) and f.endswith(".mkv")] | |
# Verify if ffprobe is installed | |
if shutil.which("ffprobe") is None: | |
raise Exception("ffprobe is not in the environment variable.") | |
for file in files: | |
if not file.is_file(): | |
raise Exception(f'The file {file} doesn\'t exist.') | |
print(f"\nVerification of: {file}") | |
subtitles = get_subtitles(file) | |
fonts = get_video_muxed_fonts(file) | |
verify_video_errors_mediainfo(file, subtitles, fonts) | |
for font in fonts: | |
os.remove(font.filename) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This scripts verify if:
To use this script, you need to install 3 dependencies:
ffmpeg - At least the version of the May 5, 2021
Then, run the script: