Files

351 lines
14 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/python3
2024-02-07 15:40:54 +01:00
# FFmpeg HDR Metadata Extractor is an HDR metadata extractor that uses ffprobe output.
# Copyright (C) 2024 Kirill Harmatulla Shakirov
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
import subprocess
import json
import argparse
x265_valid_color_matrix = [
"gbr", "bt709", "unknown", "reserved", "fcc", "bt470bg", "smpte170m", "smpte240m", "ycgco",
"bt2020nc", "bt2020c", "smpte2085", "chroma-derived-nc", "chroma-derived-c", "ictcp"
]
x265_color_matrix_mapping = {"bt2020_ncl": "bt2020nc", "bt2020_cl": "bt2020c"}
# aomenc --help
# https://ffmpeg.org/ffmpeg-codecs.html
libaom_valid_matrix_coefficients = [
"bt709", "fcc73", "bt470bg", "bt601", "smpte240", "ycgco",
"bt2020ncl", "bt2020cl", "smpte2085", "chromncl", "chromcl", "ictcp"
]
libaom_matrix_coefficients_mapping = {
"fcc": "fcc73",
"smpte240m": "smpte240",
"bt2020nc": "bt2020ncl",
"bt2020_ncl": "bt2020ncl",
"bt2020c": "bt2020cl",
"bt2020_cl": "bt2020cl",
"chroma-derived-nc": "chromncl",
"chroma-derived-c": "chromcl"
}
def libaom_get_matrix_coefficients(color_space: str) -> str | None:
if color_space in libaom_valid_matrix_coefficients:
return color_space
elif color_space in libaom_matrix_coefficients_mapping:
return libaom_matrix_coefficients_mapping[color_space]
return None
# https://gitlab.com/AOMediaCodec/SVT-AV1/-/blob/master/Docs/Parameters.md
libsvtav1_color_primaries_mapping = {
"bt709": 1,
"bt470m": 4,
"bt470bg": 5,
"bt601": 6,
"smpte240": 7,
"film": 8,
"bt2020": 9,
"xyz": 10,
"smpte431": 11,
"smpte432": 12,
"ebu3213": 22
}
def libsvtav1_get_cp_code(color_primaries: str) -> int:
# 2: unspecified, default
return libsvtav1_color_primaries_mapping.get(color_primaries, 2)
# 2: unspecified, default
# https://gitlab.com/AOMediaCodec/SVT-AV1/-/blob/master/Docs/Parameters.md
libsvtav1_transfer_characteristics_mapping = {
"bt709": 1,
"bt470m": 4,
"bt470bg": 5,
"bt601": 6,
"smpte240": 7,
"linear": 8,
"log100": 9,
"log100-sqrt10": 10,
"iec61966": 11,
"bt1361": 12,
"srgb": 13,
"bt2020-10": 14,
"bt2020-12": 15,
"smpte2084": 16,
"smpte428": 17,
"hlg": 18
}
def libsvtav1_get_tch_code(transfer_characteristics: str) -> int:
# 2: unspecified, default
return libsvtav1_transfer_characteristics_mapping.get(transfer_characteristics, 2)
class MDItem:
def __init__(self, raw_val: str):
self.raw_value: str = raw_val
val_list = raw_val.split("/")
self.numerator: int = int(val_list[0])
self.denominator: int = int(val_list[1])
self.float_value = self.numerator / self.denominator
def __str__(self) -> str:
return self.raw_value
# *If your data for colors is not divided by /50000 or luminescence not divided by 10000 and have been simplified,
# you will have to expand it back out to the full ratio. For example if yours lists 'red_x': '17/25',
# 'red_y': '8/25' you will have to divide 50000 by the current denominator (25) to get the ratio (2000)
# and multiply that by the numerator (17 and 8) to get the proper R(34000,16000).
def expand_to_ratio(self, denominator: int) -> int:
return int(self.numerator * (denominator / self.denominator))
class MDItemColorXY:
def __init__(self, side_data: dict, prefix: str):
self.prefix: str = prefix
self.x_data = MDItem(side_data[prefix + "_x"])
self.y_data = MDItem(side_data[prefix + "_y"])
def __str__(self) -> str:
return f"{self.prefix}_x: {self.x_data}\n{self.prefix}_y: {self.y_data}"
def to_x265(self) -> str:
return f"({self.x_data.expand_to_ratio(50000)},{self.y_data.expand_to_ratio(50000)})"
def to_libsvtav1(self) -> str:
return f"({round(self.x_data.float_value, 4):.4f},{round(self.y_data.float_value, 4):.4f})"
class MasteringDisplayData:
def __init__(self, side_data: dict):
self.red = MDItemColorXY(side_data, "red")
self.green = MDItemColorXY(side_data, "green")
self.blue = MDItemColorXY(side_data, "blue")
self.white_point = MDItemColorXY(side_data, "white_point")
self.min_luminance = MDItem(side_data["min_luminance"])
self.max_luminance = MDItem(side_data["max_luminance"])
def __str__(self) -> str:
return f"{self.red}\n{self.green}\n{self.blue}\n{self.white_point}\n" + \
f"min_luminance: {self.min_luminance}\nmax_luminance{self.max_luminance}"
def to_x265_params(self) -> str:
return f"display=G{self.green.to_x265()}B{self.blue.to_x265()}R{self.red.to_x265()}" + \
f"WP{self.white_point.to_x265()}" + \
f"L({self.max_luminance.expand_to_ratio(10000)},{self.min_luminance.expand_to_ratio(10000)})"
def to_libsvtav1_params(self) -> str:
return f"mastering-display=G{self.green.to_libsvtav1()}B{self.blue.to_libsvtav1()}" + \
f"R{self.red.to_libsvtav1()}WP{self.white_point.to_libsvtav1()}" + \
f"L({round(self.max_luminance.float_value, 4):.4f},{round(self.min_luminance.float_value, 4):.4f})"
class ContentLightLevelData:
def __init__(self, side_data: dict):
self.max_content: int = side_data["max_content"]
self.max_average: int = side_data["max_average"]
def __str__(self) -> str:
return f"max_content: {self.max_content}, max_average {self.max_average}"
# This data, as well as the Content light level <max_content>,<max_average> of 0,0
# will be fed into the encoder command options.
# max-cll=1000,239
def to_x265_params(self) -> str:
return f"max-cll={self.max_content},{self.max_average}"
def to_libsvtav1_params(self) -> str:
return f"content-light={self.max_content},{self.max_average}"
class ColorData:
def __init__(self, frame_data: dict):
self.pix_fmt = frame_data["pix_fmt"]
self.color_space = frame_data["color_space"]
self.color_primaries = frame_data["color_primaries"]
self.color_transfer = frame_data["color_transfer"]
def __str__(self) -> str:
return "pix_fmt: " + self.pix_fmt + "\ncolor_space: " + self.color_space + \
"\ncolor_primaries: " + self.color_primaries + "\ncolor_transfer: " + self.color_transfer
def to_ffmpeg_options(self) -> str:
return f"-pix_fmt {self.pix_fmt} -colorspace {self.color_space} " + \
f"-color_trc {self.color_transfer} -color_primaries {self.color_primaries}"
def to_x265_params(self) -> str:
if self.color_space in x265_valid_color_matrix:
return f"colormatrix={self.color_space}"
elif self.color_space in x265_color_matrix_mapping:
return f"colormatrix={x265_color_matrix_mapping[self.color_space]}"
return ""
# matrix-coefficients=<arg> Matrix coefficients (CICP) of input content:
# identity, bt709, unspecified, fcc73, bt470bg, bt601, smpte240,
# ycgco, bt2020ncl, bt2020cl, smpte2085, chromncl, chromcl, ictcp
def to_libaom_av1_params(self) -> str:
res = f"color-primaries={self.color_primaries}:transfer-characteristics={self.color_transfer}"
mc = libaom_get_matrix_coefficients(self.color_space)
if mc is not None:
res += f":matrix-coefficients={mc}"
return res
# From Fastfix
# if (fastflix.current_video.color_space and "bt2020" in fastflix.current_video.color_space):
# svtav1_params.append(f"matrix-coefficients=9")
def to_libsvtav1_params(self) -> str:
res = f"color-primaries={libsvtav1_get_cp_code(self.color_primaries)}"
res += f":transfer-characteristics={libsvtav1_get_tch_code(self.color_transfer)}"
if "bt2020" in self.color_space:
res += ":matrix-coefficients=9"
return res
def parse_frame_data(frame_data: dict):
color_params = ["pix_fmt", "color_space", "color_primaries", "color_transfer"]
missing_params = [x for x in color_params if x not in frame_data.keys()]
if len(missing_params) != 0:
print(f"Missing {missing_params} parameters in frame metadata!")
print("Probably not an HDR stream!")
print("Exit!")
return
color_data = ColorData(frame_data)
print("Color Data:")
print(color_data)
print("")
x265_params: str = color_data.to_x265_params()
libaom_av1_params: str = color_data.to_libaom_av1_params()
libsvtav1_params: str = color_data.to_libsvtav1_params()
side_data_list = frame_data["side_data_list"]
for side_data in side_data_list:
if side_data["side_data_type"] == "Mastering display metadata":
mastering_display_data = MasteringDisplayData(side_data)
x265_params += ":" + mastering_display_data.to_x265_params()
libsvtav1_params += ":" + mastering_display_data.to_libsvtav1_params()
print("Mastering display metadata:")
print(mastering_display_data)
print("")
elif side_data["side_data_type"] == "Content light level metadata":
content_light_level_data = ContentLightLevelData(side_data)
x265_params += ":" + content_light_level_data.to_x265_params()
libsvtav1_params += ":" + content_light_level_data.to_libsvtav1_params()
print("Content light level metadata:")
print(content_light_level_data)
print("")
print(f"\nFFmpeg options: {color_data.to_ffmpeg_options()}\n")
print(f"x265 params: {x265_params}\n")
print(f"libsvtav1 params: {libsvtav1_params}\n")
print(f"libaom-av1 params: {libaom_av1_params}\n")
print("Done!")
if __name__ == '__main__':
# Initialize arguments parser
parser = argparse.ArgumentParser(
2024-02-07 15:40:54 +01:00
prog="get_hdr_metadata.py",
description="This program parse HDR metadata from ffprobe output and generates ffmpeg parameters based on it.",
epilog="Have a nice day!")
parser.add_argument("-i", "--input-file",
action="store",
default=None,
help="video file name from which hdr metadata needs to be extracted.",
required=True)
parser.add_argument("-s", "--input-stream",
action="store",
type=int,
default=0,
2024-02-07 15:40:54 +01:00
help="video stream number in the input file, default 0.",
required=False)
parser.add_argument("-e", "--ffprobe-binary",
action="store",
default="ffprobe",
help="specify ffprobe binary to use, default - \"ffprobe\".",
required=False)
arguments = parser.parse_args()
print(f"Reading data from file: {arguments.input_file}")
print(f"Stream: {arguments.input_stream}")
print("")
# https://codecalamity.com/encoding-uhd-4k-hdr10-videos-with-ffmpeg/
# https://www.reddit.com/r/AV1/comments/yb0eck/getting_accurate_hdr_data_from_h264_265_to_encode/
# https://www.reddit.com/r/AV1/comments/ut2y4l/svtav1_hdr_encoding_libsvtav1_with_ffmpeg/
# -hide_banner -loglevel warning Dont display what we dont need
# -select_streams v We only want the details for the video (v) stream
# -print_format json Make it easier to parse
# -read_intervals "%+#1" Only grab data from the first frame
# -show_entries ... Pick only the relevant data we want
# -i GlassBlowingUHD.mp4 input (-i) is our Dobly Vision demo file
ffprobe_cmd: list[str] = [arguments.ffprobe_binary, "-hide_banner", "-loglevel", "warning",
"-select_streams", str(arguments.input_stream),
"-print_format", "json", "-show_frames", "-read_intervals", "%+#1",
"-show_entries",
"stream=codec_type:" +
"frame=pix_fmt,color_space,color_primaries,color_transfer,side_data_list",
"-i", arguments.input_file]
try:
result = subprocess.run(ffprobe_cmd, capture_output=True, encoding="UTF-8")
if result.returncode == 0 and result.stdout is not None:
metadata = json.loads(result.stdout)
stream_codec_type = metadata["streams"][0]["codec_type"]
if stream_codec_type == "video":
parse_frame_data(metadata["frames"][0])
else:
print(f"Selected stream type is \"{stream_codec_type}\"")
print("Not a video stream!")
print("Exit!")
else:
print("Error executing ffprobe binary!")
print(result.stderr)
print("Exit!")
except FileNotFoundError as err:
print("Cannot find ffprobe binary!")
print("Try to specify it thru -e command line argument.")
print(err)
print("Exit!")
except Exception as ex:
print("Unknown error!")
print(ex)
print("Exit!")