ffmpeg_normalize
1from ._errors import FFmpegNormalizeError 2from ._ffmpeg_normalize import FFmpegNormalize 3from ._media_file import MediaFile 4from ._streams import AudioStream, MediaStream, SubtitleStream, VideoStream 5from ._version import __version__ 6 7__module_name__ = "ffmpeg_normalize" 8 9__all__ = [ 10 "FFmpegNormalize", 11 "FFmpegNormalizeError", 12 "MediaFile", 13 "AudioStream", 14 "VideoStream", 15 "SubtitleStream", 16 "MediaStream", 17 "__version__", 18]
51class FFmpegNormalize: 52 """ 53 ffmpeg-normalize class. 54 55 Args: 56 normalization_type (str, optional): Normalization type. Defaults to "ebu". 57 target_level (float, optional): Target level. Defaults to -23.0. 58 print_stats (bool, optional): Print loudnorm stats. Defaults to False. 59 loudness_range_target (float, optional): Loudness range target. Defaults to 7.0. 60 keep_loudness_range_target (bool, optional): Keep loudness range target. Defaults to False. 61 keep_lra_above_loudness_range_target (bool, optional): Keep input loudness range above loudness range target. Defaults to False. 62 true_peak (float, optional): True peak. Defaults to -2.0. 63 offset (float, optional): Offset. Defaults to 0.0. 64 lower_only (bool, optional): Whether the audio should not increase in loudness. Defaults to False. 65 auto_lower_loudness_target (bool, optional): Automatically lower EBU Integrated Loudness Target. 66 dual_mono (bool, optional): Dual mono. Defaults to False. 67 dynamic (bool, optional): Dynamic. Defaults to False. 68 audio_codec (str, optional): Audio codec. Defaults to "pcm_s16le". 69 audio_bitrate (float, optional): Audio bitrate. Defaults to None. 70 sample_rate (int, optional): Sample rate. Defaults to None. 71 audio_channels (int | None, optional): Audio channels. Defaults to None. 72 keep_original_audio (bool, optional): Keep original audio. Defaults to False. 73 pre_filter (str, optional): Pre filter. Defaults to None. 74 post_filter (str, optional): Post filter. Defaults to None. 75 video_codec (str, optional): Video codec. Defaults to "copy". 76 video_disable (bool, optional): Disable video. Defaults to False. 77 subtitle_disable (bool, optional): Disable subtitles. Defaults to False. 78 metadata_disable (bool, optional): Disable metadata. Defaults to False. 79 chapters_disable (bool, optional): Disable chapters. Defaults to False. 80 extra_input_options (list, optional): Extra input options. Defaults to None. 81 extra_output_options (list, optional): Extra output options. Defaults to None. 82 output_format (str, optional): Output format. Defaults to None. 83 extension (str, optional): Output file extension to use for output files that were not explicitly specified. Defaults to "mkv". 84 dry_run (bool, optional): Dry run. Defaults to False. 85 debug (bool, optional): Debug. Defaults to False. 86 progress (bool, optional): Progress. Defaults to False. 87 88 Raises: 89 FFmpegNormalizeError: If the ffmpeg executable is not found or does not support the loudnorm filter. 90 """ 91 92 def __init__( 93 self, 94 normalization_type: Literal["ebu", "rms", "peak"] = "ebu", 95 target_level: float = -23.0, 96 print_stats: bool = False, 97 # threshold=0.5, 98 loudness_range_target: float = 7.0, 99 keep_loudness_range_target: bool = False, 100 keep_lra_above_loudness_range_target: bool = False, 101 true_peak: float = -2.0, 102 offset: float = 0.0, 103 lower_only: bool = False, 104 auto_lower_loudness_target: bool = False, 105 dual_mono: bool = False, 106 dynamic: bool = False, 107 audio_codec: str = "pcm_s16le", 108 audio_bitrate: float | None = None, 109 sample_rate: float | int | None = None, 110 audio_channels: int | None = None, 111 keep_original_audio: bool = False, 112 pre_filter: str | None = None, 113 post_filter: str | None = None, 114 video_codec: str = "copy", 115 video_disable: bool = False, 116 subtitle_disable: bool = False, 117 metadata_disable: bool = False, 118 chapters_disable: bool = False, 119 extra_input_options: list[str] | None = None, 120 extra_output_options: list[str] | None = None, 121 output_format: str | None = None, 122 extension: str = "mkv", 123 dry_run: bool = False, 124 debug: bool = False, 125 progress: bool = False, 126 ): 127 self.ffmpeg_exe = get_ffmpeg_exe() 128 self.has_loudnorm_capabilities = ffmpeg_has_loudnorm() 129 130 if normalization_type not in NORMALIZATION_TYPES: 131 raise FFmpegNormalizeError( 132 "Normalization type must be: 'ebu', 'rms', or 'peak'" 133 ) 134 self.normalization_type = normalization_type 135 136 if not self.has_loudnorm_capabilities and self.normalization_type == "ebu": 137 raise FFmpegNormalizeError( 138 "Your ffmpeg does not support the 'loudnorm' EBU R128 filter. " 139 "Please install ffmpeg v4.2 or above, or choose another normalization type." 140 ) 141 142 if self.normalization_type == "ebu": 143 self.target_level = check_range(target_level, -70, -5, name="target_level") 144 else: 145 self.target_level = check_range(target_level, -99, 0, name="target_level") 146 147 self.print_stats = print_stats 148 149 # self.threshold = float(threshold) 150 151 self.loudness_range_target = check_range( 152 loudness_range_target, 1, 50, name="loudness_range_target" 153 ) 154 155 self.keep_loudness_range_target = keep_loudness_range_target 156 157 if self.keep_loudness_range_target and loudness_range_target != 7.0: 158 _logger.warning( 159 "Setting --keep-loudness-range-target will override your set loudness range target value! " 160 "Remove --keep-loudness-range-target or remove the --lrt/--loudness-range-target option." 161 ) 162 163 self.keep_lra_above_loudness_range_target = keep_lra_above_loudness_range_target 164 165 if ( 166 self.keep_loudness_range_target 167 and self.keep_lra_above_loudness_range_target 168 ): 169 raise FFmpegNormalizeError( 170 "Options --keep-loudness-range-target and --keep-lra-above-loudness-range-target are mutually exclusive! " 171 "Please choose just one of the two options." 172 ) 173 174 self.true_peak = check_range(true_peak, -9, 0, name="true_peak") 175 self.offset = check_range(offset, -99, 99, name="offset") 176 self.lower_only = lower_only 177 self.auto_lower_loudness_target = auto_lower_loudness_target 178 179 # Ensure library user is passing correct types 180 assert isinstance(dual_mono, bool), "dual_mono must be bool" 181 assert isinstance(dynamic, bool), "dynamic must be bool" 182 183 self.dual_mono = dual_mono 184 self.dynamic = dynamic 185 self.sample_rate = None if sample_rate is None else int(sample_rate) 186 self.audio_channels = None if audio_channels is None else int(audio_channels) 187 188 self.audio_codec = audio_codec 189 self.audio_bitrate = audio_bitrate 190 self.keep_original_audio = keep_original_audio 191 self.video_codec = video_codec 192 self.video_disable = video_disable 193 self.subtitle_disable = subtitle_disable 194 self.metadata_disable = metadata_disable 195 self.chapters_disable = chapters_disable 196 197 self.extra_input_options = extra_input_options 198 self.extra_output_options = extra_output_options 199 self.pre_filter = pre_filter 200 self.post_filter = post_filter 201 202 self.output_format = output_format 203 self.extension = extension 204 self.dry_run = dry_run 205 self.debug = debug 206 self.progress = progress 207 208 if ( 209 self.audio_codec is None or "pcm" in self.audio_codec 210 ) and self.output_format in PCM_INCOMPATIBLE_FORMATS: 211 raise FFmpegNormalizeError( 212 f"Output format {self.output_format} does not support PCM audio. " 213 "Please choose a suitable audio codec with the -c:a option." 214 ) 215 216 self.stats: list[LoudnessStatisticsWithMetadata] = [] 217 self.media_files: list[MediaFile] = [] 218 self.file_count = 0 219 220 def add_media_file(self, input_file: str, output_file: str) -> None: 221 """ 222 Add a media file to normalize 223 224 Args: 225 input_file (str): Path to input file 226 output_file (str): Path to output file 227 """ 228 if not os.path.exists(input_file): 229 raise FFmpegNormalizeError(f"file {input_file} does not exist") 230 231 ext = os.path.splitext(output_file)[1][1:] 232 if ( 233 self.audio_codec is None or "pcm" in self.audio_codec 234 ) and ext in PCM_INCOMPATIBLE_EXTS: 235 raise FFmpegNormalizeError( 236 f"Output extension {ext} does not support PCM audio. " 237 "Please choose a suitable audio codec with the -c:a option." 238 ) 239 240 self.media_files.append(MediaFile(self, input_file, output_file)) 241 self.file_count += 1 242 243 def run_normalization(self) -> None: 244 """ 245 Run the normalization procedures 246 """ 247 for index, media_file in enumerate( 248 tqdm(self.media_files, desc="File", disable=not self.progress, position=0) 249 ): 250 _logger.info( 251 f"Normalizing file {media_file} ({index + 1} of {self.file_count})" 252 ) 253 254 try: 255 media_file.run_normalization() 256 except Exception as e: 257 if len(self.media_files) > 1: 258 # simply warn and do not die 259 _logger.error( 260 f"Error processing input file {media_file}, will " 261 f"continue batch-processing. Error was: {e}" 262 ) 263 else: 264 # raise the error so the program will exit 265 raise e 266 267 _logger.info(f"Normalized file written to {media_file.output_file}") 268 269 if self.print_stats: 270 json.dump(list(chain.from_iterable(media_file.get_stats() for media_file in self.media_files)), sys.stdout, indent=4) 271 print()
ffmpeg-normalize class.
Arguments:
- normalization_type (str, optional): Normalization type. Defaults to "ebu".
- target_level (float, optional): Target level. Defaults to -23.0.
- print_stats (bool, optional): Print loudnorm stats. Defaults to False.
- loudness_range_target (float, optional): Loudness range target. Defaults to 7.0.
- keep_loudness_range_target (bool, optional): Keep loudness range target. Defaults to False.
- keep_lra_above_loudness_range_target (bool, optional): Keep input loudness range above loudness range target. Defaults to False.
- true_peak (float, optional): True peak. Defaults to -2.0.
- offset (float, optional): Offset. Defaults to 0.0.
- lower_only (bool, optional): Whether the audio should not increase in loudness. Defaults to False.
- auto_lower_loudness_target (bool, optional): Automatically lower EBU Integrated Loudness Target.
- dual_mono (bool, optional): Dual mono. Defaults to False.
- dynamic (bool, optional): Dynamic. Defaults to False.
- audio_codec (str, optional): Audio codec. Defaults to "pcm_s16le".
- audio_bitrate (float, optional): Audio bitrate. Defaults to None.
- sample_rate (int, optional): Sample rate. Defaults to None.
- audio_channels (int | None, optional): Audio channels. Defaults to None.
- keep_original_audio (bool, optional): Keep original audio. Defaults to False.
- pre_filter (str, optional): Pre filter. Defaults to None.
- post_filter (str, optional): Post filter. Defaults to None.
- video_codec (str, optional): Video codec. Defaults to "copy".
- video_disable (bool, optional): Disable video. Defaults to False.
- subtitle_disable (bool, optional): Disable subtitles. Defaults to False.
- metadata_disable (bool, optional): Disable metadata. Defaults to False.
- chapters_disable (bool, optional): Disable chapters. Defaults to False.
- extra_input_options (list, optional): Extra input options. Defaults to None.
- extra_output_options (list, optional): Extra output options. Defaults to None.
- output_format (str, optional): Output format. Defaults to None.
- extension (str, optional): Output file extension to use for output files that were not explicitly specified. Defaults to "mkv".
- dry_run (bool, optional): Dry run. Defaults to False.
- debug (bool, optional): Debug. Defaults to False.
- progress (bool, optional): Progress. Defaults to False.
Raises:
- FFmpegNormalizeError: If the ffmpeg executable is not found or does not support the loudnorm filter.
92 def __init__( 93 self, 94 normalization_type: Literal["ebu", "rms", "peak"] = "ebu", 95 target_level: float = -23.0, 96 print_stats: bool = False, 97 # threshold=0.5, 98 loudness_range_target: float = 7.0, 99 keep_loudness_range_target: bool = False, 100 keep_lra_above_loudness_range_target: bool = False, 101 true_peak: float = -2.0, 102 offset: float = 0.0, 103 lower_only: bool = False, 104 auto_lower_loudness_target: bool = False, 105 dual_mono: bool = False, 106 dynamic: bool = False, 107 audio_codec: str = "pcm_s16le", 108 audio_bitrate: float | None = None, 109 sample_rate: float | int | None = None, 110 audio_channels: int | None = None, 111 keep_original_audio: bool = False, 112 pre_filter: str | None = None, 113 post_filter: str | None = None, 114 video_codec: str = "copy", 115 video_disable: bool = False, 116 subtitle_disable: bool = False, 117 metadata_disable: bool = False, 118 chapters_disable: bool = False, 119 extra_input_options: list[str] | None = None, 120 extra_output_options: list[str] | None = None, 121 output_format: str | None = None, 122 extension: str = "mkv", 123 dry_run: bool = False, 124 debug: bool = False, 125 progress: bool = False, 126 ): 127 self.ffmpeg_exe = get_ffmpeg_exe() 128 self.has_loudnorm_capabilities = ffmpeg_has_loudnorm() 129 130 if normalization_type not in NORMALIZATION_TYPES: 131 raise FFmpegNormalizeError( 132 "Normalization type must be: 'ebu', 'rms', or 'peak'" 133 ) 134 self.normalization_type = normalization_type 135 136 if not self.has_loudnorm_capabilities and self.normalization_type == "ebu": 137 raise FFmpegNormalizeError( 138 "Your ffmpeg does not support the 'loudnorm' EBU R128 filter. " 139 "Please install ffmpeg v4.2 or above, or choose another normalization type." 140 ) 141 142 if self.normalization_type == "ebu": 143 self.target_level = check_range(target_level, -70, -5, name="target_level") 144 else: 145 self.target_level = check_range(target_level, -99, 0, name="target_level") 146 147 self.print_stats = print_stats 148 149 # self.threshold = float(threshold) 150 151 self.loudness_range_target = check_range( 152 loudness_range_target, 1, 50, name="loudness_range_target" 153 ) 154 155 self.keep_loudness_range_target = keep_loudness_range_target 156 157 if self.keep_loudness_range_target and loudness_range_target != 7.0: 158 _logger.warning( 159 "Setting --keep-loudness-range-target will override your set loudness range target value! " 160 "Remove --keep-loudness-range-target or remove the --lrt/--loudness-range-target option." 161 ) 162 163 self.keep_lra_above_loudness_range_target = keep_lra_above_loudness_range_target 164 165 if ( 166 self.keep_loudness_range_target 167 and self.keep_lra_above_loudness_range_target 168 ): 169 raise FFmpegNormalizeError( 170 "Options --keep-loudness-range-target and --keep-lra-above-loudness-range-target are mutually exclusive! " 171 "Please choose just one of the two options." 172 ) 173 174 self.true_peak = check_range(true_peak, -9, 0, name="true_peak") 175 self.offset = check_range(offset, -99, 99, name="offset") 176 self.lower_only = lower_only 177 self.auto_lower_loudness_target = auto_lower_loudness_target 178 179 # Ensure library user is passing correct types 180 assert isinstance(dual_mono, bool), "dual_mono must be bool" 181 assert isinstance(dynamic, bool), "dynamic must be bool" 182 183 self.dual_mono = dual_mono 184 self.dynamic = dynamic 185 self.sample_rate = None if sample_rate is None else int(sample_rate) 186 self.audio_channels = None if audio_channels is None else int(audio_channels) 187 188 self.audio_codec = audio_codec 189 self.audio_bitrate = audio_bitrate 190 self.keep_original_audio = keep_original_audio 191 self.video_codec = video_codec 192 self.video_disable = video_disable 193 self.subtitle_disable = subtitle_disable 194 self.metadata_disable = metadata_disable 195 self.chapters_disable = chapters_disable 196 197 self.extra_input_options = extra_input_options 198 self.extra_output_options = extra_output_options 199 self.pre_filter = pre_filter 200 self.post_filter = post_filter 201 202 self.output_format = output_format 203 self.extension = extension 204 self.dry_run = dry_run 205 self.debug = debug 206 self.progress = progress 207 208 if ( 209 self.audio_codec is None or "pcm" in self.audio_codec 210 ) and self.output_format in PCM_INCOMPATIBLE_FORMATS: 211 raise FFmpegNormalizeError( 212 f"Output format {self.output_format} does not support PCM audio. " 213 "Please choose a suitable audio codec with the -c:a option." 214 ) 215 216 self.stats: list[LoudnessStatisticsWithMetadata] = [] 217 self.media_files: list[MediaFile] = [] 218 self.file_count = 0
220 def add_media_file(self, input_file: str, output_file: str) -> None: 221 """ 222 Add a media file to normalize 223 224 Args: 225 input_file (str): Path to input file 226 output_file (str): Path to output file 227 """ 228 if not os.path.exists(input_file): 229 raise FFmpegNormalizeError(f"file {input_file} does not exist") 230 231 ext = os.path.splitext(output_file)[1][1:] 232 if ( 233 self.audio_codec is None or "pcm" in self.audio_codec 234 ) and ext in PCM_INCOMPATIBLE_EXTS: 235 raise FFmpegNormalizeError( 236 f"Output extension {ext} does not support PCM audio. " 237 "Please choose a suitable audio codec with the -c:a option." 238 ) 239 240 self.media_files.append(MediaFile(self, input_file, output_file)) 241 self.file_count += 1
Add a media file to normalize
Arguments:
- input_file (str): Path to input file
- output_file (str): Path to output file
243 def run_normalization(self) -> None: 244 """ 245 Run the normalization procedures 246 """ 247 for index, media_file in enumerate( 248 tqdm(self.media_files, desc="File", disable=not self.progress, position=0) 249 ): 250 _logger.info( 251 f"Normalizing file {media_file} ({index + 1} of {self.file_count})" 252 ) 253 254 try: 255 media_file.run_normalization() 256 except Exception as e: 257 if len(self.media_files) > 1: 258 # simply warn and do not die 259 _logger.error( 260 f"Error processing input file {media_file}, will " 261 f"continue batch-processing. Error was: {e}" 262 ) 263 else: 264 # raise the error so the program will exit 265 raise e 266 267 _logger.info(f"Normalized file written to {media_file.output_file}") 268 269 if self.print_stats: 270 json.dump(list(chain.from_iterable(media_file.get_stats() for media_file in self.media_files)), sys.stdout, indent=4) 271 print()
Run the normalization procedures
Common base class for all non-exit exceptions.
Inherited Members
- builtins.Exception
- Exception
- builtins.BaseException
- with_traceback
- add_note
- args
51class MediaFile: 52 """ 53 Class that holds a file, its streams and adjustments 54 """ 55 56 def __init__( 57 self, ffmpeg_normalize: FFmpegNormalize, input_file: str, output_file: str 58 ): 59 """ 60 Initialize a media file for later normalization by parsing the streams. 61 62 Args: 63 ffmpeg_normalize (FFmpegNormalize): reference to overall settings 64 input_file (str): Path to input file 65 output_file (str): Path to output file 66 """ 67 self.ffmpeg_normalize = ffmpeg_normalize 68 self.skip = False 69 self.input_file = input_file 70 self.output_file = output_file 71 current_ext = os.path.splitext(output_file)[1][1:] 72 # we need to check if it's empty, e.g. /dev/null or NUL 73 if current_ext == "" or self.output_file == os.devnull: 74 self.output_ext = self.ffmpeg_normalize.extension 75 else: 76 self.output_ext = current_ext 77 self.streams: StreamDict = {"audio": {}, "video": {}, "subtitle": {}} 78 79 self.parse_streams() 80 81 def _stream_ids(self) -> list[int]: 82 """ 83 Get all stream IDs of this file. 84 85 Returns: 86 list: List of stream IDs 87 """ 88 return ( 89 list(self.streams["audio"].keys()) 90 + list(self.streams["video"].keys()) 91 + list(self.streams["subtitle"].keys()) 92 ) 93 94 def __repr__(self) -> str: 95 return os.path.basename(self.input_file) 96 97 def parse_streams(self) -> None: 98 """ 99 Try to parse all input streams from file and set them in self.streams. 100 101 Raises: 102 FFmpegNormalizeError: If no audio streams are found 103 """ 104 _logger.debug(f"Parsing streams of {self.input_file}") 105 106 cmd = [ 107 self.ffmpeg_normalize.ffmpeg_exe, 108 "-i", 109 self.input_file, 110 "-c", 111 "copy", 112 "-t", 113 "0", 114 "-map", 115 "0", 116 "-f", 117 "null", 118 os.devnull, 119 ] 120 121 output = CommandRunner().run_command(cmd).get_output() 122 123 _logger.debug("Stream parsing command output:") 124 _logger.debug(output) 125 126 output_lines = [line.strip() for line in output.split("\n")] 127 128 duration = None 129 for line in output_lines: 130 if "Duration" in line: 131 if duration_search := DUR_REGEX.search(line): 132 duration = _to_ms(**duration_search.groupdict()) / 1000 133 _logger.debug(f"Found duration: {duration} s") 134 else: 135 _logger.warning("Could not extract duration from input file!") 136 137 if not line.startswith("Stream"): 138 continue 139 140 if stream_id_match := re.search(r"#0:([\d]+)", line): 141 stream_id = int(stream_id_match.group(1)) 142 if stream_id in self._stream_ids(): 143 continue 144 else: 145 continue 146 147 if "Audio" in line: 148 _logger.debug(f"Found audio stream at index {stream_id}") 149 sample_rate_match = re.search(r"(\d+) Hz", line) 150 sample_rate = ( 151 int(sample_rate_match.group(1)) if sample_rate_match else None 152 ) 153 bit_depth_match = re.search(r"[sfu](\d+)(p|le|be)?", line) 154 bit_depth = int(bit_depth_match.group(1)) if bit_depth_match else None 155 self.streams["audio"][stream_id] = AudioStream( 156 self.ffmpeg_normalize, 157 self, 158 stream_id, 159 sample_rate, 160 bit_depth, 161 duration, 162 ) 163 164 elif "Video" in line: 165 _logger.debug(f"Found video stream at index {stream_id}") 166 self.streams["video"][stream_id] = VideoStream( 167 self.ffmpeg_normalize, self, stream_id 168 ) 169 170 elif "Subtitle" in line: 171 _logger.debug(f"Found subtitle stream at index {stream_id}") 172 self.streams["subtitle"][stream_id] = SubtitleStream( 173 self.ffmpeg_normalize, self, stream_id 174 ) 175 176 if not self.streams["audio"]: 177 raise FFmpegNormalizeError( 178 f"Input file {self.input_file} does not contain any audio streams" 179 ) 180 181 if ( 182 self.output_ext.lower() in ONE_STREAM 183 and len(self.streams["audio"].values()) > 1 184 ): 185 _logger.warning( 186 "Output file only supports one stream. Keeping only first audio stream." 187 ) 188 first_stream = list(self.streams["audio"].values())[0] 189 self.streams["audio"] = {first_stream.stream_id: first_stream} 190 self.streams["video"] = {} 191 self.streams["subtitle"] = {} 192 193 def run_normalization(self) -> None: 194 """ 195 Run the normalization process for this file. 196 """ 197 _logger.debug(f"Running normalization for {self.input_file}") 198 199 # run the first pass to get loudness stats 200 self._first_pass() 201 202 # run the second pass as a whole 203 if self.ffmpeg_normalize.progress: 204 with tqdm( 205 total=100, 206 position=1, 207 desc="Second Pass", 208 bar_format=TQDM_BAR_FORMAT, 209 ) as pbar: 210 for progress in self._second_pass(): 211 pbar.update(progress - pbar.n) 212 else: 213 for _ in self._second_pass(): 214 pass 215 216 def _can_write_output_video(self) -> bool: 217 """ 218 Determine whether the output file can contain video at all. 219 220 Returns: 221 bool: True if the output file can contain video, False otherwise 222 """ 223 if self.output_ext.lower() in AUDIO_ONLY_FORMATS: 224 return False 225 226 return not self.ffmpeg_normalize.video_disable 227 228 def _first_pass(self) -> None: 229 """ 230 Run the first pass of the normalization process. 231 """ 232 _logger.debug(f"Parsing normalization info for {self.input_file}") 233 234 for index, audio_stream in enumerate(self.streams["audio"].values()): 235 if self.ffmpeg_normalize.normalization_type == "ebu": 236 fun = getattr(audio_stream, "parse_loudnorm_stats") 237 else: 238 fun = getattr(audio_stream, "parse_astats") 239 240 if self.ffmpeg_normalize.progress: 241 with tqdm( 242 total=100, 243 position=1, 244 desc=f"Stream {index + 1}/{len(self.streams['audio'].values())}", 245 bar_format=TQDM_BAR_FORMAT, 246 ) as pbar: 247 for progress in fun(): 248 pbar.update(progress - pbar.n) 249 else: 250 for _ in fun(): 251 pass 252 253 def _get_audio_filter_cmd(self) -> tuple[str, list[str]]: 254 """ 255 Return the audio filter command and output labels needed. 256 257 Returns: 258 tuple[str, list[str]]: filter_complex command and the required output labels 259 """ 260 filter_chains = [] 261 output_labels = [] 262 263 for audio_stream in self.streams["audio"].values(): 264 skip_normalization = False 265 if self.ffmpeg_normalize.lower_only: 266 if self.ffmpeg_normalize.normalization_type == "ebu": 267 if ( 268 audio_stream.loudness_statistics["ebu_pass1"] is not None 269 and audio_stream.loudness_statistics["ebu_pass1"]["input_i"] 270 < self.ffmpeg_normalize.target_level 271 ): 272 skip_normalization = True 273 elif self.ffmpeg_normalize.normalization_type == "peak": 274 if ( 275 audio_stream.loudness_statistics["max"] is not None 276 and audio_stream.loudness_statistics["max"] 277 < self.ffmpeg_normalize.target_level 278 ): 279 skip_normalization = True 280 elif self.ffmpeg_normalize.normalization_type == "rms": 281 if ( 282 audio_stream.loudness_statistics["mean"] is not None 283 and audio_stream.loudness_statistics["mean"] 284 < self.ffmpeg_normalize.target_level 285 ): 286 skip_normalization = True 287 288 if skip_normalization: 289 _logger.warning( 290 f"Stream {audio_stream.stream_id} had measured input loudness lower than target, skipping normalization." 291 ) 292 normalization_filter = "acopy" 293 else: 294 if self.ffmpeg_normalize.normalization_type == "ebu": 295 normalization_filter = audio_stream.get_second_pass_opts_ebu() 296 else: 297 normalization_filter = audio_stream.get_second_pass_opts_peakrms() 298 299 input_label = f"[0:{audio_stream.stream_id}]" 300 output_label = f"[norm{audio_stream.stream_id}]" 301 output_labels.append(output_label) 302 303 filter_chain = [] 304 305 if self.ffmpeg_normalize.pre_filter: 306 filter_chain.append(self.ffmpeg_normalize.pre_filter) 307 308 filter_chain.append(normalization_filter) 309 310 if self.ffmpeg_normalize.post_filter: 311 filter_chain.append(self.ffmpeg_normalize.post_filter) 312 313 filter_chains.append(input_label + ",".join(filter_chain) + output_label) 314 315 filter_complex_cmd = ";".join(filter_chains) 316 317 return filter_complex_cmd, output_labels 318 319 def _second_pass(self) -> Iterator[float]: 320 """ 321 Construct the second pass command and run it. 322 323 FIXME: make this method simpler 324 """ 325 _logger.info(f"Running second pass for {self.input_file}") 326 327 # get the target output stream types depending on the options 328 output_stream_types: list[Literal["audio", "video", "subtitle"]] = ["audio"] 329 if self._can_write_output_video(): 330 output_stream_types.append("video") 331 if not self.ffmpeg_normalize.subtitle_disable: 332 output_stream_types.append("subtitle") 333 334 # base command, here we will add all other options 335 cmd = [self.ffmpeg_normalize.ffmpeg_exe, "-hide_banner", "-y"] 336 337 # extra options (if any) 338 if self.ffmpeg_normalize.extra_input_options: 339 cmd.extend(self.ffmpeg_normalize.extra_input_options) 340 341 # get complex filter command 342 audio_filter_cmd, output_labels = self._get_audio_filter_cmd() 343 344 # add input file and basic filter 345 cmd.extend(["-i", self.input_file, "-filter_complex", audio_filter_cmd]) 346 347 # map metadata, only if needed 348 if self.ffmpeg_normalize.metadata_disable: 349 cmd.extend(["-map_metadata", "-1"]) 350 else: 351 # map global metadata 352 cmd.extend(["-map_metadata", "0"]) 353 # map per-stream metadata (e.g. language tags) 354 for stream_type in output_stream_types: 355 stream_key = stream_type[0] 356 if stream_type not in self.streams: 357 continue 358 for idx, _ in enumerate(self.streams[stream_type].items()): 359 cmd.extend( 360 [ 361 f"-map_metadata:s:{stream_key}:{idx}", 362 f"0:s:{stream_key}:{idx}", 363 ] 364 ) 365 366 # map chapters if needed 367 if self.ffmpeg_normalize.chapters_disable: 368 cmd.extend(["-map_chapters", "-1"]) 369 else: 370 cmd.extend(["-map_chapters", "0"]) 371 372 # collect all '-map' and codecs needed for output video based on input video 373 if self.streams["video"]: 374 if self._can_write_output_video(): 375 for s in self.streams["video"].keys(): 376 cmd.extend(["-map", f"0:{s}"]) 377 # set codec (copy by default) 378 cmd.extend(["-c:v", self.ffmpeg_normalize.video_codec]) 379 else: 380 if not self.ffmpeg_normalize.video_disable: 381 _logger.warning( 382 f"The chosen output extension {self.output_ext} does not support video/cover art. It will be disabled." 383 ) 384 385 # ... and map the output of the normalization filters 386 for ol in output_labels: 387 cmd.extend(["-map", ol]) 388 389 # set audio codec (never copy) 390 if self.ffmpeg_normalize.audio_codec: 391 cmd.extend(["-c:a", self.ffmpeg_normalize.audio_codec]) 392 else: 393 for index, (_, audio_stream) in enumerate(self.streams["audio"].items()): 394 cmd.extend([f"-c:a:{index}", audio_stream.get_pcm_codec()]) 395 396 # other audio options (if any) 397 if self.ffmpeg_normalize.audio_bitrate: 398 if self.ffmpeg_normalize.audio_codec == "libvorbis": 399 # libvorbis takes just a "-b" option, for some reason 400 # https://github.com/slhck/ffmpeg-normalize/issues/277 401 cmd.extend(["-b", str(self.ffmpeg_normalize.audio_bitrate)]) 402 else: 403 cmd.extend(["-b:a", str(self.ffmpeg_normalize.audio_bitrate)]) 404 if self.ffmpeg_normalize.sample_rate: 405 cmd.extend(["-ar", str(self.ffmpeg_normalize.sample_rate)]) 406 if self.ffmpeg_normalize.audio_channels: 407 cmd.extend(["-ac", str(self.ffmpeg_normalize.audio_channels)]) 408 409 # ... and subtitles 410 if not self.ffmpeg_normalize.subtitle_disable: 411 for s in self.streams["subtitle"].keys(): 412 cmd.extend(["-map", f"0:{s}"]) 413 # copy subtitles 414 cmd.extend(["-c:s", "copy"]) 415 416 if self.ffmpeg_normalize.keep_original_audio: 417 highest_index = len(self.streams["audio"]) 418 for index, _ in enumerate(self.streams["audio"].items()): 419 cmd.extend(["-map", f"0:a:{index}"]) 420 cmd.extend([f"-c:a:{highest_index + index}", "copy"]) 421 422 # extra options (if any) 423 if self.ffmpeg_normalize.extra_output_options: 424 cmd.extend(self.ffmpeg_normalize.extra_output_options) 425 426 # output format (if any) 427 if self.ffmpeg_normalize.output_format: 428 cmd.extend(["-f", self.ffmpeg_normalize.output_format]) 429 430 # if dry run, only show sample command 431 if self.ffmpeg_normalize.dry_run: 432 cmd.append(self.output_file) 433 _logger.warning("Dry run used, not actually running second-pass command") 434 CommandRunner(dry=True).run_command(cmd) 435 yield 100 436 return 437 438 # special case: if output is a null device, write directly to it 439 if self.output_file == os.devnull: 440 cmd.append(self.output_file) 441 else: 442 temp_dir = mkdtemp() 443 temp_file = os.path.join(temp_dir, f"out.{self.output_ext}") 444 cmd.append(temp_file) 445 446 cmd_runner = CommandRunner() 447 try: 448 try: 449 yield from cmd_runner.run_ffmpeg_command(cmd) 450 except Exception as e: 451 _logger.error( 452 f"Error while running command {shlex.join(cmd)}! Error: {e}" 453 ) 454 raise e 455 else: 456 if self.output_file != os.devnull: 457 _logger.debug( 458 f"Moving temporary file from {temp_file} to {self.output_file}" 459 ) 460 move(temp_file, self.output_file) 461 rmtree(temp_dir, ignore_errors=True) 462 except Exception as e: 463 if self.output_file != os.devnull: 464 rmtree(temp_dir, ignore_errors=True) 465 raise e 466 467 output = cmd_runner.get_output() 468 # in the second pass, we do not normalize stream-by-stream, so we set the stats based on the 469 # overall output (which includes multiple loudnorm stats) 470 if self.ffmpeg_normalize.normalization_type == "ebu": 471 all_stats = AudioStream.prune_and_parse_loudnorm_output(output) 472 for stream_id, audio_stream in self.streams["audio"].items(): 473 if stream_id in all_stats: 474 audio_stream.set_second_pass_stats(all_stats[stream_id]) 475 476 # warn if self.media_file.ffmpeg_normalize.dynamic == False and any of the second pass stats contain "normalization_type" == "dynamic" 477 if self.ffmpeg_normalize.dynamic is False: 478 for audio_stream in self.streams["audio"].values(): 479 pass2_stats = audio_stream.get_stats()["ebu_pass2"] 480 if pass2_stats is None: 481 continue 482 if pass2_stats["normalization_type"] == "dynamic": 483 _logger.warning( 484 "You specified linear normalization, but the loudnorm filter reverted to dynamic normalization. " 485 "This may lead to unexpected results." 486 "Consider your input settings, e.g. choose a lower target level or higher target loudness range." 487 ) 488 489 _logger.debug("Normalization finished") 490 491 def get_stats(self) -> Iterable[LoudnessStatisticsWithMetadata]: 492 return ( 493 audio_stream.get_stats() for audio_stream in self.streams["audio"].values() 494 )
Class that holds a file, its streams and adjustments
56 def __init__( 57 self, ffmpeg_normalize: FFmpegNormalize, input_file: str, output_file: str 58 ): 59 """ 60 Initialize a media file for later normalization by parsing the streams. 61 62 Args: 63 ffmpeg_normalize (FFmpegNormalize): reference to overall settings 64 input_file (str): Path to input file 65 output_file (str): Path to output file 66 """ 67 self.ffmpeg_normalize = ffmpeg_normalize 68 self.skip = False 69 self.input_file = input_file 70 self.output_file = output_file 71 current_ext = os.path.splitext(output_file)[1][1:] 72 # we need to check if it's empty, e.g. /dev/null or NUL 73 if current_ext == "" or self.output_file == os.devnull: 74 self.output_ext = self.ffmpeg_normalize.extension 75 else: 76 self.output_ext = current_ext 77 self.streams: StreamDict = {"audio": {}, "video": {}, "subtitle": {}} 78 79 self.parse_streams()
Initialize a media file for later normalization by parsing the streams.
Arguments:
- ffmpeg_normalize (FFmpegNormalize): reference to overall settings
- input_file (str): Path to input file
- output_file (str): Path to output file
97 def parse_streams(self) -> None: 98 """ 99 Try to parse all input streams from file and set them in self.streams. 100 101 Raises: 102 FFmpegNormalizeError: If no audio streams are found 103 """ 104 _logger.debug(f"Parsing streams of {self.input_file}") 105 106 cmd = [ 107 self.ffmpeg_normalize.ffmpeg_exe, 108 "-i", 109 self.input_file, 110 "-c", 111 "copy", 112 "-t", 113 "0", 114 "-map", 115 "0", 116 "-f", 117 "null", 118 os.devnull, 119 ] 120 121 output = CommandRunner().run_command(cmd).get_output() 122 123 _logger.debug("Stream parsing command output:") 124 _logger.debug(output) 125 126 output_lines = [line.strip() for line in output.split("\n")] 127 128 duration = None 129 for line in output_lines: 130 if "Duration" in line: 131 if duration_search := DUR_REGEX.search(line): 132 duration = _to_ms(**duration_search.groupdict()) / 1000 133 _logger.debug(f"Found duration: {duration} s") 134 else: 135 _logger.warning("Could not extract duration from input file!") 136 137 if not line.startswith("Stream"): 138 continue 139 140 if stream_id_match := re.search(r"#0:([\d]+)", line): 141 stream_id = int(stream_id_match.group(1)) 142 if stream_id in self._stream_ids(): 143 continue 144 else: 145 continue 146 147 if "Audio" in line: 148 _logger.debug(f"Found audio stream at index {stream_id}") 149 sample_rate_match = re.search(r"(\d+) Hz", line) 150 sample_rate = ( 151 int(sample_rate_match.group(1)) if sample_rate_match else None 152 ) 153 bit_depth_match = re.search(r"[sfu](\d+)(p|le|be)?", line) 154 bit_depth = int(bit_depth_match.group(1)) if bit_depth_match else None 155 self.streams["audio"][stream_id] = AudioStream( 156 self.ffmpeg_normalize, 157 self, 158 stream_id, 159 sample_rate, 160 bit_depth, 161 duration, 162 ) 163 164 elif "Video" in line: 165 _logger.debug(f"Found video stream at index {stream_id}") 166 self.streams["video"][stream_id] = VideoStream( 167 self.ffmpeg_normalize, self, stream_id 168 ) 169 170 elif "Subtitle" in line: 171 _logger.debug(f"Found subtitle stream at index {stream_id}") 172 self.streams["subtitle"][stream_id] = SubtitleStream( 173 self.ffmpeg_normalize, self, stream_id 174 ) 175 176 if not self.streams["audio"]: 177 raise FFmpegNormalizeError( 178 f"Input file {self.input_file} does not contain any audio streams" 179 ) 180 181 if ( 182 self.output_ext.lower() in ONE_STREAM 183 and len(self.streams["audio"].values()) > 1 184 ): 185 _logger.warning( 186 "Output file only supports one stream. Keeping only first audio stream." 187 ) 188 first_stream = list(self.streams["audio"].values())[0] 189 self.streams["audio"] = {first_stream.stream_id: first_stream} 190 self.streams["video"] = {} 191 self.streams["subtitle"] = {}
Try to parse all input streams from file and set them in self.streams.
Raises:
- FFmpegNormalizeError: If no audio streams are found
193 def run_normalization(self) -> None: 194 """ 195 Run the normalization process for this file. 196 """ 197 _logger.debug(f"Running normalization for {self.input_file}") 198 199 # run the first pass to get loudness stats 200 self._first_pass() 201 202 # run the second pass as a whole 203 if self.ffmpeg_normalize.progress: 204 with tqdm( 205 total=100, 206 position=1, 207 desc="Second Pass", 208 bar_format=TQDM_BAR_FORMAT, 209 ) as pbar: 210 for progress in self._second_pass(): 211 pbar.update(progress - pbar.n) 212 else: 213 for _ in self._second_pass(): 214 pass
Run the normalization process for this file.
91class AudioStream(MediaStream): 92 def __init__( 93 self, 94 ffmpeg_normalize: FFmpegNormalize, 95 media_file: MediaFile, 96 stream_id: int, 97 sample_rate: int | None, 98 bit_depth: int | None, 99 duration: float | None, 100 ): 101 """ 102 Create an AudioStream object. 103 104 Args: 105 ffmpeg_normalize (FFmpegNormalize): The FFmpegNormalize object. 106 media_file (MediaFile): The MediaFile object. 107 stream_id (int): The stream ID. 108 sample_rate (int): sample rate in Hz 109 bit_depth (int): bit depth in bits 110 duration (float): duration in seconds 111 """ 112 super().__init__(ffmpeg_normalize, media_file, "audio", stream_id) 113 114 self.loudness_statistics: LoudnessStatistics = { 115 "ebu_pass1": None, 116 "ebu_pass2": None, 117 "mean": None, 118 "max": None, 119 } 120 121 self.sample_rate = sample_rate 122 self.bit_depth = bit_depth 123 124 self.duration = duration 125 126 @staticmethod 127 def _constrain( 128 number: float, min_range: float, max_range: float, name: str | None = None 129 ) -> float: 130 """ 131 Constrain a number between two values. 132 133 Args: 134 number (float): The number to constrain. 135 min_range (float): The minimum value. 136 max_range (float): The maximum value. 137 name (str): The name of the number (for logging). 138 139 Returns: 140 float: The constrained number. 141 142 Raises: 143 ValueError: If min_range is greater than max_range. 144 """ 145 if min_range > max_range: 146 raise ValueError("min must be smaller than max") 147 result = max(min(number, max_range), min_range) 148 if result != number and name is not None: 149 _logger.warning( 150 f"Constraining {name} to range of [{min_range}, {max_range}]: {number} -> {result}" 151 ) 152 return result 153 154 def get_stats(self) -> LoudnessStatisticsWithMetadata: 155 """ 156 Return loudness statistics for the stream. 157 158 Returns: 159 dict: A dictionary containing the loudness statistics. 160 """ 161 stats: LoudnessStatisticsWithMetadata = { 162 "input_file": self.media_file.input_file, 163 "output_file": self.media_file.output_file, 164 "stream_id": self.stream_id, 165 "ebu_pass1": self.loudness_statistics["ebu_pass1"], 166 "ebu_pass2": self.loudness_statistics["ebu_pass2"], 167 "mean": self.loudness_statistics["mean"], 168 "max": self.loudness_statistics["max"], 169 } 170 return stats 171 172 def set_second_pass_stats(self, stats: EbuLoudnessStatistics) -> None: 173 """ 174 Set the EBU loudness statistics for the second pass. 175 176 Args: 177 stats (dict): The EBU loudness statistics. 178 """ 179 self.loudness_statistics["ebu_pass2"] = stats 180 181 def get_pcm_codec(self) -> str: 182 """ 183 Get the PCM codec string for the stream. 184 185 Returns: 186 str: The PCM codec string. 187 """ 188 if not self.bit_depth: 189 return "pcm_s16le" 190 elif self.bit_depth <= 8: 191 return "pcm_s8" 192 elif self.bit_depth in [16, 24, 32, 64]: 193 return f"pcm_s{self.bit_depth}le" 194 else: 195 _logger.warning( 196 f"Unsupported bit depth {self.bit_depth}, falling back to pcm_s16le" 197 ) 198 return "pcm_s16le" 199 200 def _get_filter_str_with_pre_filter(self, current_filter: str) -> str: 201 """ 202 Get a filter string for current_filter, with the pre-filter 203 added before. Applies the input label before. 204 205 Args: 206 current_filter (str): The current filter. 207 208 Returns: 209 str: The filter string. 210 """ 211 input_label = f"[0:{self.stream_id}]" 212 filter_chain = [] 213 if self.media_file.ffmpeg_normalize.pre_filter: 214 filter_chain.append(self.media_file.ffmpeg_normalize.pre_filter) 215 filter_chain.append(current_filter) 216 filter_str = input_label + ",".join(filter_chain) 217 return filter_str 218 219 def parse_astats(self) -> Iterator[float]: 220 """ 221 Use ffmpeg with astats filter to get the mean (RMS) and max (peak) volume of the input file. 222 223 Yields: 224 float: The progress of the command. 225 """ 226 _logger.info(f"Running first pass astats filter for stream {self.stream_id}") 227 228 filter_str = self._get_filter_str_with_pre_filter( 229 "astats=measure_overall=Peak_level+RMS_level:measure_perchannel=0" 230 ) 231 232 cmd = [ 233 self.media_file.ffmpeg_normalize.ffmpeg_exe, 234 "-hide_banner", 235 "-y", 236 "-i", 237 self.media_file.input_file, 238 "-filter_complex", 239 filter_str, 240 "-vn", 241 "-sn", 242 "-f", 243 "null", 244 os.devnull, 245 ] 246 247 cmd_runner = CommandRunner() 248 yield from cmd_runner.run_ffmpeg_command(cmd) 249 output = cmd_runner.get_output() 250 251 _logger.debug( 252 f"astats command output: {CommandRunner.prune_ffmpeg_progress_from_output(output)}" 253 ) 254 255 mean_volume_matches = re.findall(r"RMS level dB: ([\-\d\.]+)", output) 256 if mean_volume_matches: 257 if mean_volume_matches[0] == "-": 258 self.loudness_statistics["mean"] = float("-inf") 259 else: 260 self.loudness_statistics["mean"] = float(mean_volume_matches[0]) 261 else: 262 raise FFmpegNormalizeError( 263 f"Could not get mean volume for {self.media_file.input_file}" 264 ) 265 266 max_volume_matches = re.findall(r"Peak level dB: ([\-\d\.]+)", output) 267 if max_volume_matches: 268 if max_volume_matches[0] == "-": 269 self.loudness_statistics["max"] = float("-inf") 270 else: 271 self.loudness_statistics["max"] = float(max_volume_matches[0]) 272 else: 273 raise FFmpegNormalizeError( 274 f"Could not get max volume for {self.media_file.input_file}" 275 ) 276 277 def parse_loudnorm_stats(self) -> Iterator[float]: 278 """ 279 Run a first pass loudnorm filter to get measured data. 280 281 Yields: 282 float: The progress of the command. 283 """ 284 _logger.info(f"Running first pass loudnorm filter for stream {self.stream_id}") 285 286 opts = { 287 "i": self.media_file.ffmpeg_normalize.target_level, 288 "lra": self.media_file.ffmpeg_normalize.loudness_range_target, 289 "tp": self.media_file.ffmpeg_normalize.true_peak, 290 "offset": self.media_file.ffmpeg_normalize.offset, 291 "print_format": "json", 292 } 293 294 if self.media_file.ffmpeg_normalize.dual_mono: 295 opts["dual_mono"] = "true" 296 297 filter_str = self._get_filter_str_with_pre_filter( 298 "loudnorm=" + dict_to_filter_opts(opts) 299 ) 300 301 cmd = [ 302 self.media_file.ffmpeg_normalize.ffmpeg_exe, 303 "-hide_banner", 304 "-y", 305 "-i", 306 self.media_file.input_file, 307 "-map", 308 f"0:{self.stream_id}", 309 "-filter_complex", 310 filter_str, 311 "-vn", 312 "-sn", 313 "-f", 314 "null", 315 os.devnull, 316 ] 317 318 cmd_runner = CommandRunner() 319 yield from cmd_runner.run_ffmpeg_command(cmd) 320 output = cmd_runner.get_output() 321 322 _logger.debug( 323 f"Loudnorm first pass command output: {CommandRunner.prune_ffmpeg_progress_from_output(output)}" 324 ) 325 326 # only one stream 327 self.loudness_statistics["ebu_pass1"] = next( 328 iter(AudioStream.prune_and_parse_loudnorm_output(output).values()) 329 ) 330 331 @staticmethod 332 def prune_and_parse_loudnorm_output( 333 output: str, 334 ) -> dict[int, EbuLoudnessStatistics]: 335 """ 336 Prune ffmpeg progress lines from output and parse the loudnorm filter output. 337 There may be multiple outputs if multiple streams were processed. 338 339 Args: 340 output (str): The output from ffmpeg. 341 342 Returns: 343 list: The EBU loudness statistics. 344 """ 345 pruned_output = CommandRunner.prune_ffmpeg_progress_from_output(output) 346 output_lines = [line.strip() for line in pruned_output.split("\n")] 347 return AudioStream._parse_loudnorm_output(output_lines) 348 349 @staticmethod 350 def _parse_loudnorm_output( 351 output_lines: list[str], 352 ) -> dict[int, EbuLoudnessStatistics]: 353 """ 354 Parse the output of a loudnorm filter to get the EBU loudness statistics. 355 356 Args: 357 output_lines (list[str]): The output lines of the loudnorm filter. 358 359 Raises: 360 FFmpegNormalizeError: When the output could not be parsed. 361 362 Returns: 363 EbuLoudnessStatistics: The EBU loudness statistics, if found. 364 """ 365 result = dict[int, EbuLoudnessStatistics]() 366 stream_index = -1 367 loudnorm_start = 0 368 for index, line in enumerate(output_lines): 369 if stream_index < 0: 370 if m := _loudnorm_pattern.match(line): 371 loudnorm_start = index + 1 372 stream_index = int(m.group(1)) 373 else: 374 if line.startswith("}"): 375 loudnorm_end = index + 1 376 loudnorm_data = "\n".join(output_lines[loudnorm_start:loudnorm_end]) 377 378 try: 379 loudnorm_stats = json.loads(loudnorm_data) 380 381 _logger.debug( 382 f"Loudnorm stats for stream {stream_index} parsed: {loudnorm_data}" 383 ) 384 385 for key in [ 386 "input_i", 387 "input_tp", 388 "input_lra", 389 "input_thresh", 390 "output_i", 391 "output_tp", 392 "output_lra", 393 "output_thresh", 394 "target_offset", 395 "normalization_type", 396 ]: 397 if key not in loudnorm_stats: 398 continue 399 if key == "normalization_type": 400 loudnorm_stats[key] = loudnorm_stats[key].lower() 401 # handle infinite values 402 elif float(loudnorm_stats[key]) == -float("inf"): 403 loudnorm_stats[key] = -99 404 elif float(loudnorm_stats[key]) == float("inf"): 405 loudnorm_stats[key] = 0 406 else: 407 # convert to floats 408 loudnorm_stats[key] = float(loudnorm_stats[key]) 409 410 result[stream_index] = cast( 411 EbuLoudnessStatistics, loudnorm_stats 412 ) 413 stream_index = -1 414 except Exception as e: 415 raise FFmpegNormalizeError( 416 f"Could not parse loudnorm stats; wrong JSON format in string: {e}" 417 ) 418 return result 419 420 def get_second_pass_opts_ebu(self) -> str: 421 """ 422 Return second pass loudnorm filter options string for ffmpeg 423 """ 424 425 if not self.loudness_statistics["ebu_pass1"]: 426 raise FFmpegNormalizeError( 427 "First pass not run, you must call parse_loudnorm_stats first" 428 ) 429 430 if float(self.loudness_statistics["ebu_pass1"]["input_i"]) > 0: 431 _logger.warning( 432 "Input file had measured input loudness greater than zero " 433 f"({self.loudness_statistics['ebu_pass1']['input_i']}), capping at 0" 434 ) 435 self.loudness_statistics["ebu_pass1"]["input_i"] = 0 436 437 will_use_dynamic_mode = self.media_file.ffmpeg_normalize.dynamic 438 439 if self.media_file.ffmpeg_normalize.keep_loudness_range_target: 440 _logger.debug( 441 "Keeping target loudness range in second pass loudnorm filter" 442 ) 443 input_lra = self.loudness_statistics["ebu_pass1"]["input_lra"] 444 if input_lra < 1 or input_lra > 50: 445 _logger.warning( 446 "Input file had measured loudness range outside of [1,50] " 447 f"({input_lra}), capping to allowed range" 448 ) 449 450 self.media_file.ffmpeg_normalize.loudness_range_target = self._constrain( 451 self.loudness_statistics["ebu_pass1"]["input_lra"], 1, 50 452 ) 453 454 if self.media_file.ffmpeg_normalize.keep_lra_above_loudness_range_target: 455 if ( 456 self.loudness_statistics["ebu_pass1"]["input_lra"] 457 <= self.media_file.ffmpeg_normalize.loudness_range_target 458 ): 459 _logger.debug( 460 "Setting loudness range target in second pass loudnorm filter" 461 ) 462 else: 463 self.media_file.ffmpeg_normalize.loudness_range_target = ( 464 self.loudness_statistics["ebu_pass1"]["input_lra"] 465 ) 466 _logger.debug( 467 "Keeping target loudness range in second pass loudnorm filter" 468 ) 469 470 if ( 471 self.media_file.ffmpeg_normalize.loudness_range_target 472 < self.loudness_statistics["ebu_pass1"]["input_lra"] 473 and not will_use_dynamic_mode 474 ): 475 _logger.warning( 476 f"Input file had loudness range of {self.loudness_statistics['ebu_pass1']['input_lra']}. " 477 f"This is larger than the loudness range target ({self.media_file.ffmpeg_normalize.loudness_range_target}). " 478 "Normalization will revert to dynamic mode. Choose a higher target loudness range if you want linear normalization. " 479 "Alternatively, use the --keep-loudness-range-target or --keep-lra-above-loudness-range-target option to keep the target loudness range from " 480 "the input." 481 ) 482 will_use_dynamic_mode = True 483 484 if will_use_dynamic_mode and not self.ffmpeg_normalize.sample_rate: 485 _logger.warning( 486 "In dynamic mode, the sample rate will automatically be set to 192 kHz by the loudnorm filter. " 487 "Specify -ar/--sample-rate to override it." 488 ) 489 490 target_level = self.ffmpeg_normalize.target_level 491 if self.ffmpeg_normalize.auto_lower_loudness_target: 492 safe_target = ( 493 self.loudness_statistics["ebu_pass1"]["input_i"] 494 - self.loudness_statistics["ebu_pass1"]["input_tp"] 495 + self.ffmpeg_normalize.true_peak 496 - 0.1 497 ) 498 if safe_target < self.ffmpeg_normalize.target_level: 499 target_level = safe_target 500 _logger.warning( 501 f"Using loudness target {target_level} because --auto-lower-loudness-target given.", 502 ) 503 504 stats = self.loudness_statistics["ebu_pass1"] 505 506 opts = { 507 "i": target_level, 508 "lra": self.media_file.ffmpeg_normalize.loudness_range_target, 509 "tp": self.media_file.ffmpeg_normalize.true_peak, 510 "offset": self._constrain( 511 stats["target_offset"], -99, 99, name="target_offset" 512 ), 513 "measured_i": self._constrain(stats["input_i"], -99, 0, name="input_i"), 514 "measured_lra": self._constrain( 515 stats["input_lra"], 0, 99, name="input_lra" 516 ), 517 "measured_tp": self._constrain(stats["input_tp"], -99, 99, name="input_tp"), 518 "measured_thresh": self._constrain( 519 stats["input_thresh"], -99, 0, name="input_thresh" 520 ), 521 "linear": "false" if self.media_file.ffmpeg_normalize.dynamic else "true", 522 "print_format": "json", 523 } 524 525 if self.media_file.ffmpeg_normalize.dual_mono: 526 opts["dual_mono"] = "true" 527 528 return "loudnorm=" + dict_to_filter_opts(opts) 529 530 def get_second_pass_opts_peakrms(self) -> str: 531 """ 532 Set the adjustment gain based on chosen option and mean/max volume, 533 return the matching ffmpeg volume filter. 534 535 Returns: 536 str: ffmpeg volume filter string 537 """ 538 if ( 539 self.loudness_statistics["max"] is None 540 or self.loudness_statistics["mean"] is None 541 ): 542 raise FFmpegNormalizeError( 543 "First pass not run, no mean/max volume to normalize to" 544 ) 545 546 normalization_type = self.media_file.ffmpeg_normalize.normalization_type 547 target_level = self.media_file.ffmpeg_normalize.target_level 548 549 if normalization_type == "peak": 550 adjustment = 0 + target_level - self.loudness_statistics["max"] 551 elif normalization_type == "rms": 552 adjustment = target_level - self.loudness_statistics["mean"] 553 else: 554 raise FFmpegNormalizeError( 555 "Can only set adjustment for peak and RMS normalization" 556 ) 557 558 _logger.info( 559 f"Adjusting stream {self.stream_id} by {adjustment} dB to reach {target_level}" 560 ) 561 562 clip_amount = self.loudness_statistics["max"] + adjustment 563 if clip_amount > 0: 564 _logger.warning(f"Adjusting will lead to clipping of {clip_amount} dB") 565 566 return f"volume={adjustment}dB"
92 def __init__( 93 self, 94 ffmpeg_normalize: FFmpegNormalize, 95 media_file: MediaFile, 96 stream_id: int, 97 sample_rate: int | None, 98 bit_depth: int | None, 99 duration: float | None, 100 ): 101 """ 102 Create an AudioStream object. 103 104 Args: 105 ffmpeg_normalize (FFmpegNormalize): The FFmpegNormalize object. 106 media_file (MediaFile): The MediaFile object. 107 stream_id (int): The stream ID. 108 sample_rate (int): sample rate in Hz 109 bit_depth (int): bit depth in bits 110 duration (float): duration in seconds 111 """ 112 super().__init__(ffmpeg_normalize, media_file, "audio", stream_id) 113 114 self.loudness_statistics: LoudnessStatistics = { 115 "ebu_pass1": None, 116 "ebu_pass2": None, 117 "mean": None, 118 "max": None, 119 } 120 121 self.sample_rate = sample_rate 122 self.bit_depth = bit_depth 123 124 self.duration = duration
Create an AudioStream object.
Arguments:
- ffmpeg_normalize (FFmpegNormalize): The FFmpegNormalize object.
- media_file (MediaFile): The MediaFile object.
- stream_id (int): The stream ID.
- sample_rate (int): sample rate in Hz
- bit_depth (int): bit depth in bits
- duration (float): duration in seconds
154 def get_stats(self) -> LoudnessStatisticsWithMetadata: 155 """ 156 Return loudness statistics for the stream. 157 158 Returns: 159 dict: A dictionary containing the loudness statistics. 160 """ 161 stats: LoudnessStatisticsWithMetadata = { 162 "input_file": self.media_file.input_file, 163 "output_file": self.media_file.output_file, 164 "stream_id": self.stream_id, 165 "ebu_pass1": self.loudness_statistics["ebu_pass1"], 166 "ebu_pass2": self.loudness_statistics["ebu_pass2"], 167 "mean": self.loudness_statistics["mean"], 168 "max": self.loudness_statistics["max"], 169 } 170 return stats
Return loudness statistics for the stream.
Returns:
dict: A dictionary containing the loudness statistics.
172 def set_second_pass_stats(self, stats: EbuLoudnessStatistics) -> None: 173 """ 174 Set the EBU loudness statistics for the second pass. 175 176 Args: 177 stats (dict): The EBU loudness statistics. 178 """ 179 self.loudness_statistics["ebu_pass2"] = stats
Set the EBU loudness statistics for the second pass.
Arguments:
- stats (dict): The EBU loudness statistics.
181 def get_pcm_codec(self) -> str: 182 """ 183 Get the PCM codec string for the stream. 184 185 Returns: 186 str: The PCM codec string. 187 """ 188 if not self.bit_depth: 189 return "pcm_s16le" 190 elif self.bit_depth <= 8: 191 return "pcm_s8" 192 elif self.bit_depth in [16, 24, 32, 64]: 193 return f"pcm_s{self.bit_depth}le" 194 else: 195 _logger.warning( 196 f"Unsupported bit depth {self.bit_depth}, falling back to pcm_s16le" 197 ) 198 return "pcm_s16le"
Get the PCM codec string for the stream.
Returns:
str: The PCM codec string.
219 def parse_astats(self) -> Iterator[float]: 220 """ 221 Use ffmpeg with astats filter to get the mean (RMS) and max (peak) volume of the input file. 222 223 Yields: 224 float: The progress of the command. 225 """ 226 _logger.info(f"Running first pass astats filter for stream {self.stream_id}") 227 228 filter_str = self._get_filter_str_with_pre_filter( 229 "astats=measure_overall=Peak_level+RMS_level:measure_perchannel=0" 230 ) 231 232 cmd = [ 233 self.media_file.ffmpeg_normalize.ffmpeg_exe, 234 "-hide_banner", 235 "-y", 236 "-i", 237 self.media_file.input_file, 238 "-filter_complex", 239 filter_str, 240 "-vn", 241 "-sn", 242 "-f", 243 "null", 244 os.devnull, 245 ] 246 247 cmd_runner = CommandRunner() 248 yield from cmd_runner.run_ffmpeg_command(cmd) 249 output = cmd_runner.get_output() 250 251 _logger.debug( 252 f"astats command output: {CommandRunner.prune_ffmpeg_progress_from_output(output)}" 253 ) 254 255 mean_volume_matches = re.findall(r"RMS level dB: ([\-\d\.]+)", output) 256 if mean_volume_matches: 257 if mean_volume_matches[0] == "-": 258 self.loudness_statistics["mean"] = float("-inf") 259 else: 260 self.loudness_statistics["mean"] = float(mean_volume_matches[0]) 261 else: 262 raise FFmpegNormalizeError( 263 f"Could not get mean volume for {self.media_file.input_file}" 264 ) 265 266 max_volume_matches = re.findall(r"Peak level dB: ([\-\d\.]+)", output) 267 if max_volume_matches: 268 if max_volume_matches[0] == "-": 269 self.loudness_statistics["max"] = float("-inf") 270 else: 271 self.loudness_statistics["max"] = float(max_volume_matches[0]) 272 else: 273 raise FFmpegNormalizeError( 274 f"Could not get max volume for {self.media_file.input_file}" 275 )
Use ffmpeg with astats filter to get the mean (RMS) and max (peak) volume of the input file.
Yields:
float: The progress of the command.
277 def parse_loudnorm_stats(self) -> Iterator[float]: 278 """ 279 Run a first pass loudnorm filter to get measured data. 280 281 Yields: 282 float: The progress of the command. 283 """ 284 _logger.info(f"Running first pass loudnorm filter for stream {self.stream_id}") 285 286 opts = { 287 "i": self.media_file.ffmpeg_normalize.target_level, 288 "lra": self.media_file.ffmpeg_normalize.loudness_range_target, 289 "tp": self.media_file.ffmpeg_normalize.true_peak, 290 "offset": self.media_file.ffmpeg_normalize.offset, 291 "print_format": "json", 292 } 293 294 if self.media_file.ffmpeg_normalize.dual_mono: 295 opts["dual_mono"] = "true" 296 297 filter_str = self._get_filter_str_with_pre_filter( 298 "loudnorm=" + dict_to_filter_opts(opts) 299 ) 300 301 cmd = [ 302 self.media_file.ffmpeg_normalize.ffmpeg_exe, 303 "-hide_banner", 304 "-y", 305 "-i", 306 self.media_file.input_file, 307 "-map", 308 f"0:{self.stream_id}", 309 "-filter_complex", 310 filter_str, 311 "-vn", 312 "-sn", 313 "-f", 314 "null", 315 os.devnull, 316 ] 317 318 cmd_runner = CommandRunner() 319 yield from cmd_runner.run_ffmpeg_command(cmd) 320 output = cmd_runner.get_output() 321 322 _logger.debug( 323 f"Loudnorm first pass command output: {CommandRunner.prune_ffmpeg_progress_from_output(output)}" 324 ) 325 326 # only one stream 327 self.loudness_statistics["ebu_pass1"] = next( 328 iter(AudioStream.prune_and_parse_loudnorm_output(output).values()) 329 )
Run a first pass loudnorm filter to get measured data.
Yields:
float: The progress of the command.
331 @staticmethod 332 def prune_and_parse_loudnorm_output( 333 output: str, 334 ) -> dict[int, EbuLoudnessStatistics]: 335 """ 336 Prune ffmpeg progress lines from output and parse the loudnorm filter output. 337 There may be multiple outputs if multiple streams were processed. 338 339 Args: 340 output (str): The output from ffmpeg. 341 342 Returns: 343 list: The EBU loudness statistics. 344 """ 345 pruned_output = CommandRunner.prune_ffmpeg_progress_from_output(output) 346 output_lines = [line.strip() for line in pruned_output.split("\n")] 347 return AudioStream._parse_loudnorm_output(output_lines)
Prune ffmpeg progress lines from output and parse the loudnorm filter output. There may be multiple outputs if multiple streams were processed.
Arguments:
- output (str): The output from ffmpeg.
Returns:
list: The EBU loudness statistics.
420 def get_second_pass_opts_ebu(self) -> str: 421 """ 422 Return second pass loudnorm filter options string for ffmpeg 423 """ 424 425 if not self.loudness_statistics["ebu_pass1"]: 426 raise FFmpegNormalizeError( 427 "First pass not run, you must call parse_loudnorm_stats first" 428 ) 429 430 if float(self.loudness_statistics["ebu_pass1"]["input_i"]) > 0: 431 _logger.warning( 432 "Input file had measured input loudness greater than zero " 433 f"({self.loudness_statistics['ebu_pass1']['input_i']}), capping at 0" 434 ) 435 self.loudness_statistics["ebu_pass1"]["input_i"] = 0 436 437 will_use_dynamic_mode = self.media_file.ffmpeg_normalize.dynamic 438 439 if self.media_file.ffmpeg_normalize.keep_loudness_range_target: 440 _logger.debug( 441 "Keeping target loudness range in second pass loudnorm filter" 442 ) 443 input_lra = self.loudness_statistics["ebu_pass1"]["input_lra"] 444 if input_lra < 1 or input_lra > 50: 445 _logger.warning( 446 "Input file had measured loudness range outside of [1,50] " 447 f"({input_lra}), capping to allowed range" 448 ) 449 450 self.media_file.ffmpeg_normalize.loudness_range_target = self._constrain( 451 self.loudness_statistics["ebu_pass1"]["input_lra"], 1, 50 452 ) 453 454 if self.media_file.ffmpeg_normalize.keep_lra_above_loudness_range_target: 455 if ( 456 self.loudness_statistics["ebu_pass1"]["input_lra"] 457 <= self.media_file.ffmpeg_normalize.loudness_range_target 458 ): 459 _logger.debug( 460 "Setting loudness range target in second pass loudnorm filter" 461 ) 462 else: 463 self.media_file.ffmpeg_normalize.loudness_range_target = ( 464 self.loudness_statistics["ebu_pass1"]["input_lra"] 465 ) 466 _logger.debug( 467 "Keeping target loudness range in second pass loudnorm filter" 468 ) 469 470 if ( 471 self.media_file.ffmpeg_normalize.loudness_range_target 472 < self.loudness_statistics["ebu_pass1"]["input_lra"] 473 and not will_use_dynamic_mode 474 ): 475 _logger.warning( 476 f"Input file had loudness range of {self.loudness_statistics['ebu_pass1']['input_lra']}. " 477 f"This is larger than the loudness range target ({self.media_file.ffmpeg_normalize.loudness_range_target}). " 478 "Normalization will revert to dynamic mode. Choose a higher target loudness range if you want linear normalization. " 479 "Alternatively, use the --keep-loudness-range-target or --keep-lra-above-loudness-range-target option to keep the target loudness range from " 480 "the input." 481 ) 482 will_use_dynamic_mode = True 483 484 if will_use_dynamic_mode and not self.ffmpeg_normalize.sample_rate: 485 _logger.warning( 486 "In dynamic mode, the sample rate will automatically be set to 192 kHz by the loudnorm filter. " 487 "Specify -ar/--sample-rate to override it." 488 ) 489 490 target_level = self.ffmpeg_normalize.target_level 491 if self.ffmpeg_normalize.auto_lower_loudness_target: 492 safe_target = ( 493 self.loudness_statistics["ebu_pass1"]["input_i"] 494 - self.loudness_statistics["ebu_pass1"]["input_tp"] 495 + self.ffmpeg_normalize.true_peak 496 - 0.1 497 ) 498 if safe_target < self.ffmpeg_normalize.target_level: 499 target_level = safe_target 500 _logger.warning( 501 f"Using loudness target {target_level} because --auto-lower-loudness-target given.", 502 ) 503 504 stats = self.loudness_statistics["ebu_pass1"] 505 506 opts = { 507 "i": target_level, 508 "lra": self.media_file.ffmpeg_normalize.loudness_range_target, 509 "tp": self.media_file.ffmpeg_normalize.true_peak, 510 "offset": self._constrain( 511 stats["target_offset"], -99, 99, name="target_offset" 512 ), 513 "measured_i": self._constrain(stats["input_i"], -99, 0, name="input_i"), 514 "measured_lra": self._constrain( 515 stats["input_lra"], 0, 99, name="input_lra" 516 ), 517 "measured_tp": self._constrain(stats["input_tp"], -99, 99, name="input_tp"), 518 "measured_thresh": self._constrain( 519 stats["input_thresh"], -99, 0, name="input_thresh" 520 ), 521 "linear": "false" if self.media_file.ffmpeg_normalize.dynamic else "true", 522 "print_format": "json", 523 } 524 525 if self.media_file.ffmpeg_normalize.dual_mono: 526 opts["dual_mono"] = "true" 527 528 return "loudnorm=" + dict_to_filter_opts(opts)
Return second pass loudnorm filter options string for ffmpeg
530 def get_second_pass_opts_peakrms(self) -> str: 531 """ 532 Set the adjustment gain based on chosen option and mean/max volume, 533 return the matching ffmpeg volume filter. 534 535 Returns: 536 str: ffmpeg volume filter string 537 """ 538 if ( 539 self.loudness_statistics["max"] is None 540 or self.loudness_statistics["mean"] is None 541 ): 542 raise FFmpegNormalizeError( 543 "First pass not run, no mean/max volume to normalize to" 544 ) 545 546 normalization_type = self.media_file.ffmpeg_normalize.normalization_type 547 target_level = self.media_file.ffmpeg_normalize.target_level 548 549 if normalization_type == "peak": 550 adjustment = 0 + target_level - self.loudness_statistics["max"] 551 elif normalization_type == "rms": 552 adjustment = target_level - self.loudness_statistics["mean"] 553 else: 554 raise FFmpegNormalizeError( 555 "Can only set adjustment for peak and RMS normalization" 556 ) 557 558 _logger.info( 559 f"Adjusting stream {self.stream_id} by {adjustment} dB to reach {target_level}" 560 ) 561 562 clip_amount = self.loudness_statistics["max"] + adjustment 563 if clip_amount > 0: 564 _logger.warning(f"Adjusting will lead to clipping of {clip_amount} dB") 565 566 return f"volume={adjustment}dB"
Set the adjustment gain based on chosen option and mean/max volume, return the matching ffmpeg volume filter.
Returns:
str: ffmpeg volume filter string
Inherited Members
77class VideoStream(MediaStream): 78 def __init__( 79 self, ffmpeg_normalize: FFmpegNormalize, media_file: MediaFile, stream_id: int 80 ): 81 super().__init__(ffmpeg_normalize, media_file, "video", stream_id)
78 def __init__( 79 self, ffmpeg_normalize: FFmpegNormalize, media_file: MediaFile, stream_id: int 80 ): 81 super().__init__(ffmpeg_normalize, media_file, "video", stream_id)
Create a MediaStream object.
Arguments:
- ffmpeg_normalize (FFmpegNormalize): The FFmpegNormalize object.
- media_file (MediaFile): The MediaFile object.
- stream_type (Literal["audio", "video", "subtitle"]): The type of the stream.
- stream_id (int): The stream ID.
Inherited Members
84class SubtitleStream(MediaStream): 85 def __init__( 86 self, ffmpeg_normalize: FFmpegNormalize, media_file: MediaFile, stream_id: int 87 ): 88 super().__init__(ffmpeg_normalize, media_file, "subtitle", stream_id)
85 def __init__( 86 self, ffmpeg_normalize: FFmpegNormalize, media_file: MediaFile, stream_id: int 87 ): 88 super().__init__(ffmpeg_normalize, media_file, "subtitle", stream_id)
Create a MediaStream object.
Arguments:
- ffmpeg_normalize (FFmpegNormalize): The FFmpegNormalize object.
- media_file (MediaFile): The MediaFile object.
- stream_type (Literal["audio", "video", "subtitle"]): The type of the stream.
- stream_id (int): The stream ID.
Inherited Members
48class MediaStream: 49 def __init__( 50 self, 51 ffmpeg_normalize: FFmpegNormalize, 52 media_file: MediaFile, 53 stream_type: Literal["audio", "video", "subtitle"], 54 stream_id: int, 55 ): 56 """ 57 Create a MediaStream object. 58 59 Args: 60 ffmpeg_normalize (FFmpegNormalize): The FFmpegNormalize object. 61 media_file (MediaFile): The MediaFile object. 62 stream_type (Literal["audio", "video", "subtitle"]): The type of the stream. 63 stream_id (int): The stream ID. 64 """ 65 self.ffmpeg_normalize = ffmpeg_normalize 66 self.media_file = media_file 67 self.stream_type = stream_type 68 self.stream_id = stream_id 69 70 def __repr__(self) -> str: 71 return ( 72 f"<{os.path.basename(self.media_file.input_file)}, " 73 f"{self.stream_type} stream {self.stream_id}>" 74 )
49 def __init__( 50 self, 51 ffmpeg_normalize: FFmpegNormalize, 52 media_file: MediaFile, 53 stream_type: Literal["audio", "video", "subtitle"], 54 stream_id: int, 55 ): 56 """ 57 Create a MediaStream object. 58 59 Args: 60 ffmpeg_normalize (FFmpegNormalize): The FFmpegNormalize object. 61 media_file (MediaFile): The MediaFile object. 62 stream_type (Literal["audio", "video", "subtitle"]): The type of the stream. 63 stream_id (int): The stream ID. 64 """ 65 self.ffmpeg_normalize = ffmpeg_normalize 66 self.media_file = media_file 67 self.stream_type = stream_type 68 self.stream_id = stream_id
Create a MediaStream object.
Arguments:
- ffmpeg_normalize (FFmpegNormalize): The FFmpegNormalize object.
- media_file (MediaFile): The MediaFile object.
- stream_type (Literal["audio", "video", "subtitle"]): The type of the stream.
- stream_id (int): The stream ID.