Updates reflecting the discussion on issue #196 (305f1331) · Commits · IVAS Codec Public Collaboration / IVAS Codec

scripts/reverb/generate_scene_metadata.py

0 → 100644

+399 −0

Original line number	Diff line number	Diff line
		#!/usr/bin/env python3

		"""
		(C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
		Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
		Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
		Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
		contributors to this repository. All Rights Reserved.

		This software is protected by copyright law and by international treaties.
		The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
		Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
		Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
		Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
		contributors to this repository retain full ownership rights in their respective contributions in
		the software. This notice grants no license of any kind, including but not limited to patent
		license, nor is any license granted by implication, estoppel or otherwise.

		Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
		contributions.

		This software is provided "AS IS", without any express or implied warranties. The software is in the
		development stage. It is intended exclusively for experts who have experience with such software and
		solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
		and fitness for a particular purpose are hereby disclaimed and excluded.

		Any dispute, controversy or claim arising under or in relation to providing this software shall be
		submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
		accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
		the United Nations Convention on Contracts on the International Sales of Goods.
		"""

		#
		# Generate binary render configuration output files for testing purposes
		# The binary code generation is based on the MPEG-I audio standard
		# which defines functions to decode raw bitstream into internal parameters
		#


		from bitarray import bitarray, test as bitarray_test
		import math
		from enum import Enum
		import numpy as np


		# Set to True to print values suitable for inclusion into .cfg configuration files
		print_cfg = False

		def get_id_code(id):
		code = format(id % 128, '07b') + '0'
		id //= 128
		while id > 0:
		code = format(id % 128, '07b') + '1' + code
		id = id // 128
		return code


		def get_count_or_index_code(n):
		# 0, 1, ... 63
		countOrIndexLoCodes = [
		'0111', '100', '01100', '01101', '01010', '01011', '01000', '01001', '001111', '001110',
		'001101', '001100', '001011', '001010', '001001', '001000', '000111', '000110', '000101', '000100',
		'000011', '000010', '000001', '000000', '111111', '111110', '111101', '111100', '111011', '111010',
		'111001', '111000', '1101111', '1101110', '1101101', '1101100', '1101011', '1101010', '1101001', '1101000',
		'1100111', '1100110', '1100101', '1100100', '1100011', '1100010', '1100001', '1100000', '1011111', '1011110',
		'1011101', '1011100', '1011011', '1011010', '1011001', '1011000', '1010111', '1010110', '1010101', '1010100',
		'1010011', '1010010', '1010001', '1010000']

		# 1, 2, ... 15
		countOrIndexHiCode = [
		'001', '000', '110', '101', '100', '0111', '0101', '1111', '1110', '01101',
		'01001', '01000', '011001', '0110001', '0110000']

		assert 0 <= n < 16 * 64
		code = countOrIndexLoCodes[n % 64]
		if n < 64:
		code += '0'
		else:
		code += '1' + countOrIndexHiCode[n // 64 - 1]
		return code


		def get_duration_code(duration):
		# 1, 2, ... 30
		secondsCode = [
		'0011', '0001', '0000', '1111', '1101', '1100', '1011', '1001', '1000', '01110',
		'01101', '01100', '01011', '01001', '01000', '00101', '11101', '11100', '10101', '011111',
		'011110', '010101', '001001', '001000', '101001', '0101001', '0101000', '1010001', '10100001', '10100000' ]

		# 0, 0.1, ... 1.0
		deciSecondsCode = [
		'110', '100', '101', '0110', '0111', '111', '0100', '0101', '0010', '0011', '000' ]

		# 0, 1, ..., 99
		millisecondsCode = [
		'1111010', '1111011', '1111000', '1111001', '1111110', '1111111', '1111100', '1111101', '1110010', '1110011',
		'11001', '1110000', '1110001', '1110110', '1110111', '1110100', '1110101', '0101010', '0101011', '0101000',
		'10010', '0101001', '0101110', '0101111', '0101100', '0101101', '0100010', '0100011', '0100000', '0100001',
		'10011', '0100110', '0100111', '0100100', '0100101', '0111010', '0111011', '0111000', '0111001', '0111110',
		'10000', '0111111', '0111100', '0111101', '0110010', '0110011', '0110000', '0110001', '0110110', '0110111',
		'10001', '0110100', '0110101', '0001010', '0001011', '0001000', '0001001', '0001110', '0001111', '0001100',
		'10110', '0001101', '0000010', '0000011', '0000000', '0000001', '0000110', '0000111', '0000100', '0000101',
		'10111', '0011010', '0011011', '0011000', '0011001', '0011110', '0011111', '0011100', '0011101', '0010010',
		'10100', '0010011', '0010000', '0010001', '0010110', '0010111', '0010100', '0010101', '1101010', '1101011',
		'10101', '1101000', '1101001', '1101110', '1101111', '1101100', '1101101', '1100010', '1100011', '110000' ]

		# 10, 20, ... 990
		microsecondsCode = [
		'110111100', '10010', '110111101', '10011', '1101111110', '10000', '1101111111', '10001', '1101111100', '10110',
		'1101111101', '10111', '110110010', '10100', '110110011', '10101', '110110000', '001010', '110110001', '001011',
		'110110110', '001000', '110110111', '001001', '110110100', '001110', '110110101', '001111', '110011010', '001100',
		'110011011', '001101', '110011000', '000010', '110011001', '000011', '110011110', '000000', '110011111', '000001',
		'110011100', '000110', '110011101', '000111', '110010010', '000100', '110010011', '000101', '110010000', '011010',
		'110010001', '011011', '110010110', '011000', '110010111', '011001', '110010100', '011110', '110010101', '011111',
		'110101010', '011100', '110101011', '011101', '110101000', '010010', '110101001', '010011', '110101110', '010000',
		'110101111', '010001', '110101100', '010110', '110101101', '010111', '110100010', '010100', '110100011', '010101',
		'110100000', '111010', '110100001', '111011', '110100110', '111000', '110100111', '111001', '110100100', '111110',
		'110100101', '111111', '110111010', '111100', '110111011', '111101', '110111000', '11000', '110111001' ]

		duration_dus = int(round(np.float32(duration) * np.float32(100000))) # [deca us]
		if print_cfg:
		print('duration: ', duration_dus)

		dus = duration_dus # [deca us]
		s = dus // 100000 # 0, 1, ... 30 [s]
		ms = (dus % 100000) // 100 # 0, 1, ... 999 [ms]
		dus = (dus % 100) # 0, 1, ... 99 [deca us]
		ds = ms // 100 # 0, 1, ... 9 [deci s]
		ms = ms % 100 # 0, 1, ... 99 [ms]
		if s >= 1 and ds == 0:
		s -= 1
		ds = 10 # 0, 1, ... 10 [deci s]

		assert 0 <= s <= 30
		assert 0 <= ds <= 10
		assert 0 <= ms <= 99
		assert 0 <= dus <= 99
		assert duration_dus == s * 100000 + ds * 10000 + ms * 100 + dus

		code = deciSecondsCode[ds]
		if ms > 0 or dus > 0:
		code += '1' + millisecondsCode[ms]
		if dus > 0:
		code += '1' + microsecondsCode[dus - 1]
		else:
		code += '0'
		else:
		code += '0'
		if s > 0:
		# long range mode not implemented
		code += '1' + secondsCode[s - 1]
		else:
		code += '0'

		return code


		def get_frequency_code(f):
		frequencyCode = {
		16 : '100011', 20 : '001110', 25 : '001111', 31.5 : '1001', 40 : '001100',
		50 : '001101', 63 : '0000', 80 : '011010', 100 : '011011', 125 : '0001',
		160 : '011000', 200 : '011001', 250 : '1110', 315 : '011110', 400 : '011111',
		500 : '1111', 630 : '011100', 800 : '011101', 1000 : '1100', 1250 : '010010',
		1600 : '010011', 2000 : '1101', 2500 : '010000', 3150 : '010001', 4000 : '1010',
		5000 : '010110', 6300 : '010111', 8000 : '1011', 10000: '010100', 12500: '010101',
		16000: '0010', 20000: '10000', 25000: '10001010', 31500: '10001011', 40000: '1000100', }

		assert 16 <= f <= 40000
		if f in frequencyCode.keys():
		if print_cfg:
		print('frequency:', f)
		return frequencyCode[f] + '0'
		else:
		# exact frequency not found, use frequency refinement to aproximate
		# (largest relative deviation seen for range(16, 40000) was 0.006818)
		# find frequencies enveloping f
		f_low = 16
		f_high = 40000
		for key in frequencyCode.keys():
		if key < f:
		f_low = max(f_low, key)
		else:
		f_high = min(f_high, key)
		refinement = round(51 * math.log(f / f_low, 2)) - 1
		if refinement >= 16:
		# choose next higer frequency
		if print_cfg:
		print('frequency:', list(frequencyCode)[f_high])
		return frequencyCode[f_high] + '0'
		else:
		if print_cfg:
		print('frequency:', list(frequencyCode)[f_low], ', refined: ', f_low * 2 ** ((refinement + 1) / 51))
		return frequencyCode[f_low] + '1' + format(refinement, '04b')


		def get_frequency_hop_code(index):
		assert 0 <= index < 9
		return [
		'1100', # 2^(1/8)
		'1101', # 2^(1/7)
		'0010', # 2^(1/6)
		'0011', # 2^(1/5)
		'0000', # 2^(1/4)
		'01', # 2^(1/3)
		'0001', # 2^(1/2)
		'10', # 2^1
		'111'][index] # 2^2


		def get_dsr_code(dsr):
		# -150.0, -149.0, ... -10.0
		dsrCode = [
		'10001100', '10001101', '100011110', '100011111', '100011100', '100011101', '10000010', '10000011', '10000000', '10000001',
		'10000110', '10000111', '10000100', '10000101', '011101010', '011101011', '011101000', '011101001', '011101110', '011101111',
		'011101100', '011101101', '011100010', '011100011', '011100000', '011100001', '011100110', '011100111', '011100100', '011100101',
		'011111010', '011111011', '011111000', '011111001', '011111110', '011111111', '011111100', '011111101', '011110010', '011110011',
		'011110000', '011110001', '011110110', '011110111', '011110100', '011110101', '011001010', '011001011', '011001000', '011001001',
		'011001110', '011001111', '011001100', '011001101', '011000010', '011000011', '011000000', '011000001', '011000110', '011000111',
		'011000100', '011000101', '011011010', '011011011', '011011000', '011011001', '011011110', '011011111', '011011100', '011011101',
		'010100', '010101', '100110', '100111', '100100', '100101', '111010', '111011', '111000', '111001',
		'111110', '111111', '111100', '111101', '110010', '110011', '110000', '110001', '110110', '110111',
		'110100', '110101', '001010', '001011', '001000', '001001', '001110', '001111', '001100', '001101',
		'000010', '000011', '000000', '000001', '000110', '000111', '000100', '000101', '101010', '101011',
		'101000', '101001', '101110', '101111', '101100', '101101', '010010', '010011', '010000', '010001',
		'010110', '011010010', '011010011', '011010000', '011010001', '011010110', '011010111', '011010100', '011010101', '010111010',
		'010111011', '010111000', '010111001', '010111110', '010111111', '010111100', '010111101', '10001010', '10001011', '10001000',
		'10001001' ]

		d = math.log10(dsr) * 10
		d = round(d + 150)
		assert 0 <= d <= 140
		if print_cfg:
		print('dsr:', np.float32(np.power(np.float32(10), np.float32(d - 150) / np.float32(10)))) # C decoder uses float precision math
		return dsrCode[d]


		class fgdMethod(Enum):
		Individual_Frequencies = '00'
		Start_Hop_Amount = '01'
		Default_Banding = '10'


		# apply function to elements of list and concatenate the resulting strings
		def concatenate(function, data):
		return ''.join([function(d) for d in data])


		def test():
		# generate binary output which can be compared with the Matlab implementation output
		string = ''

		# count or index encoding
		string += concatenate(get_count_or_index_code, [n for n in range(0, 16 * 64)])

		# duration encoding
		string += concatenate(get_duration_code, [d / 1000 for d in range(0, 30 * 1000)])
		string += concatenate(get_duration_code, [d / 10000 for d in range(0, 30 * 1000)])
		string += concatenate(get_duration_code, [d / 100000 for d in range(0, 30 * 1000)])

		# frequency encoding
		string += concatenate(get_frequency_code,
		[16 , 20 , 25 , 31.5 , 40 , 50 , 63 , 80 , 100 , 125 ,
		160 , 200 , 250 , 315 , 400 , 500 , 630 , 800 , 1000 , 1250 ,
		1600 , 2000 , 2500 , 3150 , 4000 , 5000 , 6300 , 8000, 10000, 12500,
		16000, 20000, 25000, 31500, 40000])

		# frequency hop encoding
		string += concatenate(get_frequency_hop_code, [index for index in range(0, 9)])

		# DSR encoding
		string += concatenate(get_dsr_code, [math.pow(10, dsr / 10) for dsr in range(-150, -10 + 1)])

		data = bitarray(string, endian='big')

		file = open('test_python.dat', 'wb')
		data.tofile(file)
		file.close()


		def generate_reverb_payload_equivalent_to_rend_config_renderer_cfg():
		# based on config_renderer.cfg
		# note that because of encoding, resolution is lost and behaviour may not be bit-exact compared to .cfg file based values
		data = bitarray(
		get_count_or_index_code(1) # fgdNrGrids
		+ fgdMethod.Individual_Frequencies.value # fgdMethod
		+ get_count_or_index_code(31) # fgdNrBands

		+ concatenate(get_frequency_code, # fgdCenterFreq
		[ 20.0, 25.0, 31.5, 40.0, 50.0, 63.0, 80.0, 100.0, 125.0, 160.0,
		200.0, 250.0, 315.0, 400.0, 500.0, 630.0, 800.0, 1000.0, 1250.0, 1600.0,
		2000.0, 2500.0, 3150.0, 4000.0, 5000.0, 6300.0, 8000.0, 10000.0, 12500.0, 16000.0,
		20000.0 ])

		+ get_count_or_index_code(1) # AcousticEnvCount
		+ get_id_code(0) # ID
		+ get_count_or_index_code(0) # FreqGridID
		+ get_duration_code(0.1) # (input)Predelay

		+ concatenate(get_duration_code, # RT60
		[ 1.3622, 1.4486, 1.3168, 1.5787, 1.4766, 1.3954, 1.2889, 1.3462, 1.0759, 1.0401,
		1.0970, 1.0850, 1.0910, 1.0404, 1.0499, 1.0699, 1.1028, 1.1714, 1.1027, 1.0666,
		1.0550, 1.0553, 1.0521, 1.0569, 1.0421, 0.97822, 0.80487, 0.75944, 0.71945, 0.61682,
		0.60031 ])

		+ concatenate(get_dsr_code, # DSR
		[ 1.8811e-08, 2.1428e-08, 1.3972e-08, 1.51e-08, 1.287e-08, 1.8747e-08, 2.413e-08, 3.9927e-08, 8.9719e-08, 1.902e-07,
		3.702e-07, 6.1341e-07, 7.1432e-07, 6.5331e-07, 4.6094e-07, 5.4683e-07, 7.0134e-07, 6.856e-07, 7.114e-07, 6.9604e-07,
		5.2939e-07, 5.699e-07, 6.1773e-07, 5.7488e-07, 4.7748e-07, 2.7213e-07, 1.3681e-07, 1.0941e-07, 6.2001e-08, 2.8483e-08,
		2.6267e-08 ])

		, endian='big')

		file = open('rend_config_renderer.dat', 'wb')
		data.tofile(file)
		file.close()


		def generate_reverb_payload_equivalent_to_rend_config_hospital_patientroom_cfg():
		# based on config_hospital_patientroom.cfg
		# note that because of encoding, resolution is lost and behaviour may not be bit-exact compared to .cfg file based values
		data = bitarray(
		get_count_or_index_code(1) # fgdNrGrids
		+ fgdMethod.Individual_Frequencies.value # fgdMethod
		+ get_count_or_index_code(31) # fgdNrBands


		+ concatenate(get_frequency_code, # fgdCenterFreq
		[ 20.0, 25.0, 31.5, 40.0, 50.0, 63.0, 80.0, 100.0, 125.0, 160.0,
		200.0, 250.0, 315.0, 400.0, 500.0, 630.0, 800.0, 1000.0, 1250.0, 1600.0,
		2000.0, 2500.0, 3150.0, 4000.0, 5000.0, 6300.0, 8000.0, 10000.0, 12500.0, 16000.0,
		20000.0 ])

		+ get_count_or_index_code(1) # AcousticEnvCount
		+ get_id_code(0) # ID
		+ get_count_or_index_code(0) # FreqGridID
		+ get_duration_code(0.08163) # (input)Predelay

		+ concatenate(get_duration_code, # RT60
		[ 0.81275, 0.61888, 0.45111, 0.34672, 0.46683, 0.53987, 0.61874, 0.70291, 0.66657, 0.73037,
		0.75090, 0.72470, 0.75486, 0.75857, 0.76844, 0.74999, 0.77622, 0.78227, 0.77441, 0.74688,
		0.73521, 0.73782, 0.71928, 0.71708, 0.71465, 0.60592, 0.52031, 0.51768, 0.52102, 0.37956,
		0.30786 ])

		+ concatenate(get_dsr_code, # DSR
		[ 0.000219780698, 0.000205275364, 7.18711e-05, 4.5745977e-05, 8.381106e-06, 6.884964e-06, 6.532765e-06, 8.296928e-06, 1.0005793e-05, 9.191127e-06,
		8.635287e-06, 9.627704e-06, 1.0806965e-05, 1.0041916e-05, 7.77047e-06, 9.695803e-06, 9.594324e-06, 8.32215e-06, 7.564813e-06, 6.215871e-06,
		6.379496e-06, 6.358105e-06, 6.6696e-06, 6.369334e-06, 6.378474e-06, 3.339913e-06, 3.129318e-06, 2.892564e-06, 6.00202e-07, 3.40124e-07,
		3.37705e-07 ])

		, endian='big')

		file = open('rend_config_hospital_patientroom.dat', 'wb')
		data.tofile(file)
		file.close()


		def generate_reverb_payload_equivalent_to_rend_config_recreation_cfg():
		# based on config_recreation.cfg
		# note that because of encoding, resolution is lost and behaviour may not be bit-exact compared to .cfg file based values
		data = bitarray(
		get_count_or_index_code(1) # fgdNrGrids
		+ fgdMethod.Individual_Frequencies.value # fgdMethod
		+ get_count_or_index_code(31) # fgdNrBands


		+ concatenate(get_frequency_code, # fgdCenterFreq
		[ 20.0, 25.0, 31.5, 40.0, 50.0, 63.0, 80.0, 100.0, 125.0, 160.0,
		200.0, 250.0, 315.0, 400.0, 500.0, 630.0, 800.0, 1000.0, 1250.0, 1600.0,
		2000.0, 2500.0, 3150.0, 4000.0, 5000.0, 6300.0, 8000.0, 10000.0, 12500.0, 16000.0,
		20000.0 ])

		+ get_count_or_index_code(1) # AcousticEnvCount
		+ get_id_code(0) # ID
		+ get_count_or_index_code(0) # FreqGridID
		+ get_duration_code(0.43031) # (input)Predelay

		+ concatenate(get_duration_code, # RT60
		[ 4.51916, 4.89553, 4.83276, 5.00198, 5.34468, 5.76026, 6.36818, 6.95503, 7.27557, 7.62559,
		8.08892, 8.16002, 8.13900, 8.17919, 8.16280, 8.46226, 9.61806, 9.93048, 9.81353, 8.59340,
		8.38885, 8.36823, 6.51845, 3.76089, 3.75374, 3.57451, 1.28724, 1.22174, 1.22448, 1.71631,
		2.14343 ])

		+ concatenate(get_dsr_code, # DSR
		[ 9.18578e-07, 7.63803e-07, 9.23183e-07, 1.048656e-06, 1.61449e-06, 2.13745e-06, 2.854805e-06, 3.979651e-06, 6.229977e-06, 7.782421e-06,
		9.091754e-06, 8.545798e-06, 7.482083e-06, 7.351071e-06, 7.947039e-06, 8.152676e-06, 5.201189e-06, 4.744103e-06, 4.397069e-06, 3.017449e-06,
		2.958383e-06, 2.725911e-06, 7.94912e-07, 6.20198e-07, 5.71181e-07, 5.5546e-08, 1.3987e-08, 1.338e-08, 1.322e-09, 1.3e-11,
		4e-12 ])

		, endian='big')

		file = open('rend_config_recreation.dat', 'wb')
		data.tofile(file)
		file.close()


		#test()
		generate_reverb_payload_equivalent_to_rend_config_renderer_cfg()
		generate_reverb_payload_equivalent_to_rend_config_hospital_patientroom_cfg()
		generate_reverb_payload_equivalent_to_rend_config_recreation_cfg()

tests/test_param_file.py

+1 −1

Original line number	Diff line number	Diff line
		@@ -57,7 +57,7 @@ VALID_DEC_OUTPUT_CONF = [
		"HOA2",
		"HOA3",
		"BINAURAL",
		"BINAURAL_ROOM",
		"BINAURAL_ROOM_IR",
		"EXT",
		]