# Adaptive loudness averaging. # Hens Zimmerman, 02-05-2023. # Python 3. import matplotlib.pyplot as plt import numpy import os import pyloudnorm import regex import scipy import soundfile import sys import warnings # These next parameters for the stereo limiter are fixed for now. delay = 40 # samples signal_length = 1 # second release_coeff = 0.9995 # release time factor attack_coeff = 0.9 # attack time factor block_length = 1024 # samples # End of fixed parameters. class StereoLimiter: def __init__(self, attack_coeff, release_coeff, delay, threshold): self.delay_index = 0 self.envelope_left = 0 self.envelope_right = 0 self.gain = 1 self.gain_left = 1 self.gain_right = 1 self.delay = delay self.delay_line_left = numpy.zeros(delay) self.delay_line_right = numpy.zeros(delay) self.release_coeff = release_coeff self.attack_coeff = attack_coeff self.threshold = threshold def limit(self, signal): for idx, sample in enumerate(signal): self.delay_line_left[self.delay_index] = sample[0] self.delay_line_right[self.delay_index] = sample[1] self.delay_index = (self.delay_index + 1) % self.delay # Calculate an envelope of the signal. self.envelope_left = max(abs(sample[0]), self.envelope_left * self.release_coeff) self.envelope_right = max(abs(sample[1]), self.envelope_right * self.release_coeff) if self.envelope_left > self.threshold: target_gain_left = self.threshold / self.envelope_left else: target_gain_left = 1.0 if self.envelope_right > self.threshold: target_gain_right = self.threshold / self.envelope_right else: target_gain_right = 1.0 # Have self.gain go towards a desired limiter gain. self.gain_left = (self.gain_left * self.attack_coeff + target_gain_left * (1 - self.attack_coeff)) self.gain_right = (self.gain_right * self.attack_coeff + target_gain_right * (1 - self.attack_coeff)) # Gang stereo channels. self.gain = min(self.gain_left, self.gain_right) # Limit the delayed signal. signal[idx][0] = self.delay_line_left[self.delay_index] * self.gain signal[idx][1] = self.delay_line_right[self.delay_index] * self.gain return signal # Suppress pyloudnorm warning about clipping. # Since we compute in float64, we can fix this ourselves. warnings.simplefilter("ignore") # What command line args did we get? arg_count = len(sys.argv) if arg_count < 2: print("python dyn_adapt.py file div:xx loudness:-xx xfade:xx lower:xx max-up:x max-down:x oversample:x limit:-x") exit() # Name of input file. filename = sys.argv[1] # Does this file exist at all? if not os.path.isfile(filename): print(filename + " doesn't appear to exist\n") exit() # Default division of file into blocks. division = 10 seconds = False # Default crossfade ratio into previous block. xfade = 0.5 # Default target loudness. final_loudness = -16.0 # Default no block loudness adaptation if loudness for block below a certain value. lower = 12.0 # Default max upwards gain. max_upwards = 6.0 # Default max downwards gain. max_downwards = 6.0 # Oversampling factor. oversampling = 4 # dBFS for limiter. limit = -1 # Scan through optional arguments that override defaults # div:10 div:10s loudness:-16 xfade:90 lower:12 max-up:6 max-down:6 oversample:4 limit:-2 if arg_count > 2: for idx in range(2, arg_count): arg = sys.argv[idx] match = regex.search(r"div:(\d+)", arg, regex.IGNORECASE) if match: division = int(match.group(1)) match = regex.search(r"div:(\d+)s", arg, regex.IGNORECASE) if match: seconds = True match = regex.search(r"loudness:-(\d+)", arg, regex.IGNORECASE) if match: final_loudness = -int(match.group(1)) match = regex.search(r"xfade:(\d+)", arg, regex.IGNORECASE) if match: xfade = int(match.group(1)) / 100 match = regex.search(r"lower:(\d+)", arg, regex.IGNORECASE) if match: lower = int(match.group(1)) match = regex.search(r"max-up:(\d+)", arg, regex.IGNORECASE) if match: max_upwards = int(match.group(1)) match = regex.search(r"max-down:(\d+)", arg, regex.IGNORECASE) if match: max_downwards = int(match.group(1)) match = regex.search(r"oversample:(\d+)", arg, regex.IGNORECASE) if match: oversampling = int(match.group(1)) match = regex.search(r"limit:-(\d+)", arg, regex.IGNORECASE) if match: limit = -int(match.group(1)) lower_threshold = final_loudness - lower # Read entire file into ndarray. audio, samplerate = soundfile.read(filename, frames=-1, dtype='float64', always_2d=True) # Basic stats about file we got from soundfile. samples = audio.shape[0] # Is it a mono file or a multichannel file? if len(audio.shape) > 1: channels = audio.shape[1] if channels > 2: print("Only stereo audio is currently supported") exit() else: print("Mono files are not supported") exit() # Division of file into blocks of size blocksize. # If user supplied argument in seconds, divide into blocks of that many seconds. if seconds: blocksize = division * samplerate division = int(samples / blocksize) else: blocksize = int(samples / division) print(str(blocksize)) # This leads to an integer size for the crossfade. fadesize = int(blocksize * xfade) # create BS.1770 meter meter = pyloudnorm.Meter(samplerate) # Buffers to copy data back into. new_audio = numpy.empty((0, channels)) sub_audio = numpy.empty((0, channels)) prev_audio = numpy.empty((0, channels)) for idx in range(0, division): # Create this block. Last block may have padding samples. print("Processing block {0} of {1}".format(idx + 1, division)) start_idx = (idx * blocksize) - fadesize stop_idx = start_idx + blocksize + fadesize # First block does not require a crossfade section at the start. if start_idx < 0: start_idx = 0 if idx == division - 1: sub_audio = audio[start_idx:] else: sub_audio = audio[start_idx:stop_idx] # Loudness adapt this block. loudness = meter.integrated_loudness(sub_audio) # Do not change "silent" portions of the mix. if loudness > lower_threshold: if loudness > final_loudness: delta = max(loudness - max_downwards, final_loudness) else: delta = min(loudness + max_upwards, final_loudness) sub_audio = pyloudnorm.normalize.loudness(sub_audio, loudness, delta) # This might issue a warning when we are correctly out of bounds [-1.0 .. 1.0] # Warning is suppressed so we check and correct for the digital clipping case here. # Crossfade into previous block. if idx > 0: for jdx in range(0, fadesize): mult = jdx * (1.0 / fadesize) inv_mult = 1.0 - mult for ch in range(0, channels): prev_audio[jdx + blocksize - fadesize][ch] = inv_mult * prev_audio[jdx + blocksize - fadesize][ch] + mult * sub_audio[jdx][ch] # Remove crossfade area at the beginning of this block, but not for first block. if idx > 0: sub_audio = sub_audio[fadesize:] # Append previous block to new_audio. new_audio = numpy.append(new_audio, prev_audio, axis = 0) # This block becomes previous block for next iteration. prev_audio = sub_audio # Out of the loop we still need to concat the last block. new_audio = numpy.append(new_audio, prev_audio, axis = 0) # Gain scale final buffer to requested loudness norm. loudness = meter.integrated_loudness(new_audio) new_audio = pyloudnorm.normalize.loudness(new_audio, loudness, final_loudness) peak_dB = 20.0 * numpy.log10(max(abs(numpy.min(new_audio)), numpy.max(new_audio))) print("Sample peak at " + str(peak_dB) + " dBFS") if oversampling > 1: print("Oversampling... hold on to your seats...") oversampled_new_audio = scipy.signal.resample(new_audio, samples * oversampling) peak_dB = 20.0 * numpy.log10(max(abs(numpy.min(oversampled_new_audio)), numpy.max(oversampled_new_audio))) print("Oversampled peak at " + str(peak_dB) + " dBFS") # Remove extension from filename. ext_length = 4 new_name = filename[:-ext_length] + '_new.wav' if peak_dB > limit: threshold = 10**(limit / 20) limiter = StereoLimiter(attack_coeff, release_coeff, delay, threshold) limited_new_audio = limiter.limit(new_audio) soundfile.write(new_name, limited_new_audio, samplerate, 'PCM_24') else: soundfile.write(new_name, new_audio, samplerate, 'PCM_24')