Content-Length: 217133 | pFad | https://gist.github.com/moi15moi/cbc9b29bba40e1eae035a4bc212c60c7

9F Verify mkv release.py · GitHub
Skip to content

Instantly share code, notes, and snippets.

@moi15moi
Last active July 15, 2023 17:10
Show Gist options
  • Save moi15moi/cbc9b29bba40e1eae035a4bc212c60c7 to your computer and use it in GitHub Desktop.
Save moi15moi/cbc9b29bba40e1eae035a4bc212c60c7 to your computer and use it in GitHub Desktop.
import json
import logging
import os
import shutil
import subprocess
from aenum import MultiValueEnum
from enum import Enum
from ass import parse_file, Document
from argparse import ArgumentParser
from decimal import Decimal
from font_collector import Font, AssDocument, Helpers, set_loglevel
set_loglevel(logging.CRITICAL)
from pathlib import Path
from tempfile import gettempdir
from typing import Any, Dict, List
# From mpv: https://github.com/mpv-player/mpv/blob/305332f8a06e174c5c45c9c4547293502ac7ecdb/sub/sd_ass.c#L101
FONT_MIMETYPE = [
"application/x-truetype-font",
"application/vnd.ms-opentype",
"application/x-font-ttf",
"application/x-font",
"application/font-sfnt",
"font/collection",
"font/otf",
"font/sfnt",
"font/ttf",
]
class ColorSpaceFFmpeg(Enum):
# https://github.com/FFmpeg/FFmpeg/blob/10f1bbfe117e990a6fe56c03085966fa0c0f0f2e/libavfilter/vf_setparams.c#L104-L117
RGB = "gbr"
BT709 = "bt709"
UNSPECIFIED = "unknown"
FCC = "fcc"
BT470BG = "bt470bg"
SMPTE170M = "smpte170m"
SMPTE240M = "smpte240m"
YCGCO = "ycgco"
BT2020_NCL = "bt2020nc"
BT2020_CL = "bt2020c"
SMPTE2085 = "smpte2085"
CHROMA_DERIVED_NCL = "chroma-derived-nc"
CHROMA_DERIVED_CL = "chroma-derived-c"
ICTCP = "ictcp"
class ColorRangeFFmpeg(MultiValueEnum):
# https://github.com/FFmpeg/FFmpeg/blob/10f1bbfe117e990a6fe56c03085966fa0c0f0f2e/libavfilter/vf_setparams.c#L57-L64
UNSPECIFIED = "unspecified", "unknown"
MPEG = "limited", "tv", "mpeg"
JPEG = "full", "pc", "jpeg"
def get_color_matrix_name(color_space: ColorSpaceFFmpeg, color_range: ColorRangeFFmpeg):
# https://github.com/arch1t3cht/Aegisub/blob/26a5f00c8af92b6e6460d35411b71cf8835c6503/src/video_provider_ffmpegsource.cpp#L124-L143
str = "PC" if color_range == ColorRangeFFmpeg.JPEG else "TV"
if color_space == ColorSpaceFFmpeg.RGB:
return "None"
elif color_space == ColorSpaceFFmpeg.BT709:
return f"{str}.709"
elif color_space == ColorSpaceFFmpeg.FCC:
return f"{str}.FCC"
elif color_space in (ColorSpaceFFmpeg.BT470BG, ColorSpaceFFmpeg.SMPTE170M):
return f"{str}.601"
elif color_space == ColorSpaceFFmpeg.SMPTE240M:
return f"{str}.240M"
else:
raise Exception(f'The ColorSpace "{color_space.name}" cannot be handle by .ass format')
def print_warning(message: str):
print(f"WARNING: {message}")
def print_error(message: str):
print(f"ERROR: {message}")
def get_subtitles(video_path: Path) -> Dict[int, Document]:
"""
Parameters:
video_path (Path): The path of an mkv file.
Returns:
An dictionary where:
- Key (int): Video's index of the subtitle stream.
- Value (Document): The subtitle.
"""
cmd = f'ffprobe -select_streams s -show_entries stream=index,codec_name "{video_path.resolve()}" -print_format json'
ffprobeOutput = subprocess.run(cmd, capture_output=True, text=True)
ffprobeOutput = json.loads(ffprobeOutput.stdout)
if len(ffprobeOutput["streams"]) == 0:
print_warning(f'The file "{video_path}" doesn\'t contain any subtitle stream.')
temp_dir = gettempdir()
documents = {}
for stream in ffprobeOutput["streams"]:
if stream["codec_name"] == "ass":
subs_path = os.path.join(temp_dir, f'{stream["index"]}.ass')
cmd = f'ffmpeg -y -i "{video_path.resolve()}" -map 0:{stream["index"]} "{subs_path}"'
subprocess.run(cmd, capture_output=True)
with open(subs_path, encoding="utf_8_sig") as file:
subtitle = parse_file(file)
documents[stream["index"]] = subtitle
os.remove(subs_path)
return documents
def get_video_muxed_fonts(video_path: Path) -> List[Font]:
"""
Parameters:
video_path (Path): The path of an mkv file.
Returns:
An dictionary where:
- Key (int): Video's index of the subtitle stream.
- Value (Document): The subtitle.
"""
cmd = f'ffprobe -select_streams t -show_entries "stream=index : stream_tags=mimetype" "{video_path.resolve()}" -print_format json'
ffprobeOutput = subprocess.run(cmd, capture_output=True, text=True)
ffprobeOutput = json.loads(ffprobeOutput.stdout)
if len(ffprobeOutput["streams"]) == 0:
print_warning(f'The file "{video_path}" doesn\'t contain any attachments.')
temp_dir = gettempdir()
fonts_path = []
for stream in ffprobeOutput["streams"]:
if stream["tags"]["mimetype"] in FONT_MIMETYPE:
font_path = os.path.join(temp_dir, f'{stream["index"]}.font')
cmd = f'ffmpeg -y -dump_attachment:{stream["index"]} "{font_path}" -i "{video_path.resolve()}"'
subprocess.run(cmd, capture_output=True, text=True)
font = Font.from_font_path(font_path)
fonts_path.extend(font)
# We don't directly delete the font file, since we can need it to verify if the font contain glyph
return fonts_path
def get_video_timestamps_from_index(video_path: Path, index: int) -> List[int]:
"""
Parameters:
video (str): Video path.
index (int): Stream index of the video.
Returns:
The list of timestamps.
"""
def get_pts(packets) -> List[int]:
pts: List[int] = []
for packet in packets:
# + 0.5 to round the result
pts.append(int(Decimal(packet["pts_time"]) * 1000 + Decimal(0.5)))
pts.sort()
return pts
cmd = f"ffprobe -select_streams {index} -show_entries packet=pts_time:stream=codec_type \"{video_path.resolve()}\" -print_format json"
ffprobe_output = subprocess.run(cmd, capture_output=True, text=True)
ffprobe_output_dict = json.loads(ffprobe_output.stdout)
if len(ffprobe_output_dict) == 0:
raise Exception(f"The file {video_path} is not a video file or the file does not exist.")
if len(ffprobe_output_dict["streams"]) == 0:
raise ValueError(f"The index {index} is not in the file {video_path}.")
if ffprobe_output_dict["streams"][0]["codec_type"] != "video":
raise ValueError(f'The index {index} is not a video stream. It is an {ffprobe_output_dict["streams"][0]["codec_type"]} stream.')
return get_pts(ffprobe_output_dict["packets"])
def get_mkv_video_track_detail(video_path: Path):
cmd = f"ffprobe -select_streams V -show_entries stream=index,color_space,color_range,width,height,start_time -show_optional_fields always \"{video_path.resolve()}\" -print_format json"
ffprobe_output = subprocess.run(cmd, capture_output=True, text=True)
ffprobe_output_dict = json.loads(ffprobe_output.stdout)
return ffprobe_output_dict["streams"]
def get_mkv_audio_track_detail(video_path: Path):
cmd = f"ffprobe -select_streams a -show_entries stream=index,start_time \"{video_path.resolve()}\" -print_format json"
ffprobe_output = subprocess.run(cmd, capture_output=True, text=True)
ffprobe_output_dict = json.loads(ffprobe_output.stdout)
return ffprobe_output_dict["streams"]
def get_mkv_subtitle_track_detail(video_path: Path):
cmd = f"ffprobe -select_streams s -show_entries stream=index,start_time \"{video_path.resolve()}\" -print_format json"
ffprobe_output = subprocess.run(cmd, capture_output=True, text=True)
ffprobe_output_dict = json.loads(ffprobe_output.stdout)
return ffprobe_output_dict["streams"]
def verify_video_errors_mediainfo(video_path: Path, subtitles: Dict[int, Document], font_collection: List[Font]):
video_tracks = get_mkv_video_track_detail(video_path)
audio_tracks = get_mkv_audio_track_detail(video_path)
subtitle_tracks = get_mkv_subtitle_track_detail(video_path)
track_codec_type_and_start_time: Dict[int, Dict[str, Any]] = {}
mkv_has_problem = False
# Verify if
# - the subtitle YCbCr Matrix is the same has the video.
# - the first timestamps is equals to 0. It is necessary in order not to have a lag with the subtitles.
for track in video_tracks:
color_space = ColorSpaceFFmpeg(track["color_space"])
color_range = ColorRangeFFmpeg(track["color_range"])
if color_space == ColorSpaceFFmpeg.UNSPECIFIED:
# From: https://github.com/arch1t3cht/Aegisub/blob/26a5f00c8af92b6e6460d35411b71cf8835c6503/src/video_provider_ffmpegsource.cpp#L267
color_space = ColorSpaceFFmpeg.BT709 if track["width"] > 1024 or track["height"] >= 600 else ColorSpaceFFmpeg.BT470BG
video_stream_color_matrix = get_color_matrix_name(color_space, color_range)
for sub_index, subtitle in subtitles.items():
try:
subtitle.info["YCbCr Matrix"]
except KeyError:
mkv_has_problem = True
print_error(f"The subtitle at the index {sub_index} doesn't contain an YCbCr Matrix. It should contain the YCbCr Matrix \"{video_stream_color_matrix}\"")
continue
if subtitle.info["YCbCr Matrix"] != video_stream_color_matrix:
mkv_has_problem = True
print_error(f"The YCbCr Matrix of the subtitle ({subtitle.info['YCbCr Matrix']}) at the index {sub_index} doesn't correspond to the video color matrix \"{video_stream_color_matrix}\"")
if Decimal(track["start_time"]) != 0:
track_index = int(track["index"])
track_codec_type_and_start_time[track_index] = {}
track_codec_type_and_start_time[track_index]["codec_type"] = "video"
track_codec_type_and_start_time[track_index]["start_time"] = int(Decimal(track["start_time"]) * 1000)
for track in audio_tracks:
if Decimal(track["start_time"]) != 0:
track_index = int(track["index"])
track_codec_type_and_start_time[track_index] = {}
track_codec_type_and_start_time[track_index]["codec_type"] = "audio"
track_codec_type_and_start_time[track_index]["start_time"] = int(Decimal(track["start_time"]) * 1000)
for track in subtitle_tracks:
if Decimal(track["start_time"]) != 0:
track_index = int(track["index"])
track_codec_type_and_start_time[track_index] = {}
track_codec_type_and_start_time[track_index]["codec_type"] = "subtitle"
track_codec_type_and_start_time[track_index]["start_time"] = int(Decimal(track["start_time"]) * 1000)
if track_codec_type_and_start_time:
mkv_has_problem = True
print_error("There is a least one track where the start isn't 0:")
sync_command = " ".join([f"--sync {index}:{track['start_time'] * -1}" for index, track in track_codec_type_and_start_time.items()])
sync_command_without_subtitle = " ".join([f"--sync {index}:{track['start_time'] * -1}" for index, track in track_codec_type_and_start_time.items() if track['codec_type'] != "subtitle"])
if sync_command != sync_command_without_subtitle:
print("\tHere is an command you can try to change the video timestamps of the video and audio")
print(f'\t\tmkvmerge --output "{video_path.stem} - Timestamps corrected.mkv" {sync_command_without_subtitle} "{video_path}"')
print("\tHere is an command you can try to change the video timestamps of all the tracks:")
print(f'\t\tmkvmerge --output "{video_path.stem} - Timestamps corrected.mkv" {sync_command} "{video_path}"')
# Verify if
# - the mkv contain all the needed fonts
for subtitle_index, subtitle in subtitles.items():
ass_doc = AssDocument(subtitle)
styles = ass_doc.get_used_style()
for style, usage_data in styles.items():
font_result = Helpers.get_used_font_by_style(font_collection, style)
if font_result is None:
print_error(f"Subtitle Index {subtitle_index} - You need to mux the font named: '{style.fontname}'")
print(f"\tUsed on lines: {' '.join(str(line) for line in usage_data.ordered_lines)}")
mkv_has_problem = True
else:
if font_result.mismatch_bold:
print_warning(f"Subtitle Index {subtitle_index} - The font has an different weight then the style. Font weight is {font_result.font.weight} and the style is {style.weight}.")
if font_result.mismatch_italic:
if style.italic:
print_warning(f"Subtitle Index {subtitle_index} - The font is not italic, but the style is.")
else:
print_warning(f"Subtitle Index {subtitle_index} - The font is italic, but the style is not.")
if font_result.mismatch_bold or font_result.mismatch_italic:
print(f"\tUsed on lines: {' '.join(str(line) for line in usage_data.ordered_lines)}")
missing_glyphs = font_result.font.get_missing_glyphs(
usage_data.characters_used
)
if len(missing_glyphs) != 0:
mkv_has_problem = True
print_error(f"Subtitle Index {subtitle_index} - '{style.fontname}' is missing the following glyphs: {missing_glyphs}")
if not mkv_has_problem:
print(f'The file "{video_path}" has no problem.')
def main():
parser = ArgumentParser(
prog="Verify if timestamps are normalize",
description="Normalize timestamps, so there is no difference between Aegisub and any player. See the issue: https://github.com/TypesettingTools/Aegisub/issues/21",
)
parser.add_argument(
"-d",
"--directory",
default=os.getcwd(),
help="""
Folder where all mkv are store.
""",
)
args = parser.parse_args()
files = [Path(os.path.join(args.directory, f)) for f in os.listdir(args.directory) if os.path.isfile(os.path.join(args.directory, f)) and f.endswith(".mkv")]
# Verify if ffprobe is installed
if shutil.which("ffprobe") is None:
raise Exception("ffprobe is not in the environment variable.")
for file in files:
if not file.is_file():
raise Exception(f'The file {file} doesn\'t exist.')
print(f"\nVerification of: {file}")
subtitles = get_subtitles(file)
fonts = get_video_muxed_fonts(file)
verify_video_errors_mediainfo(file, subtitles, fonts)
for font in fonts:
os.remove(font.filename)
if __name__ == "__main__":
main()
@moi15moi
Copy link
Author

moi15moi commented Jun 24, 2023

This scripts verify if:

  • the subtitle YCbCr Matrix is the same has the video.
  • the video, audio and subtitle "start_time" are 0. It is necessary, so the subs display at the right image.
  • the mkv contain all the needed fonts to display correctly .ass subtitle.

To use this script, you need to install 3 dependencies:

ffmpeg - At least the version of the May 5, 2021

pip install aenum -U
pip install FontCollector -U

Then, run the script:

py "Verify mkv release.py" -h

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: https://gist.github.com/moi15moi/cbc9b29bba40e1eae035a4bc212c60c7

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy