Commit 2755c583 authored by malenovsky's avatar malenovsky
Browse files

Merge branch 'ericsson/review-item-creation-ism' into 'main'

Item generation: align with test plan for P800-6 and P800-7

See merge request !103
parents f5a9aeb5 25136843
Loading
Loading
Loading
Loading
+254 −254
Original line number Diff line number Diff line
@@ -31,11 +31,12 @@ postamble: 1.0
add_low_level_random_noise: true

### File designators, default is "l" for listening lab, "EN" for language, "p06" for exp and "g" for provider
listening_lab: "l"
language: "EN"
listening_lab: "a"
language: "JP"
exp: "p06"
provider: "g"


### Use prefix for all input filenames (default: "")
### l stands for the 'listening_lab' designator, L stands for the 'language', e stands for the 'exp' designator (the number of consecutive letters define the length of the field) 
use_input_prefix: "lLLeee"
@@ -92,55 +93,55 @@ scenes:
        source: ["m1s01.wav", "m1s07.wav"]
        azimuth: 0 
        elevation: 0 
        overlap: -1.0
        overlap: -0.5
        
    cat1_2: 
        name: "cat1/a1s02.wav"
        description: "Talker sitting at a table"
        source: ["f3s02.wav", "f3s08.wav"]
        azimuth: 60 
        elevation: 0 
        overlap: -1.0
        description: "Standing talker."
        source: ["m1s02.wav", "m1s08.wav"]
        azimuth: 180 
        elevation: 35 
        overlap: -0.5

    cat1_3: 
        name: "cat1/a1s03.wav"
        description: "Talker sitting at a table"
        source: ["m3s03.wav", "m3s09.wav"]
        azimuth: 120 
        description: "Smaller talker (child) walking around a table."
        source: ["m1s03.wav", "m1s09.wav"]
        azimuth: "120:1:120+360" 
        elevation: 0 
        overlap: -1.0
        overlap: -0.5

    cat1_4: 
        name: "cat1/a1s04.wav"
        description: "Talker sitting at a table"
        source: ["f2s04.wav", "f2s10.wav"]
        azimuth: 180 
        elevation: 0 
        overlap: -1.0
        description: "Talker walking around the table."
        source: ["m1s04.wav", "m1s10.wav"]
        azimuth: "180:-1:180-360" 
        elevation: 35 
        overlap: -0.5

    cat1_5: 
        name: "cat1/a1s05.wav"
        description: "Talker sitting at a table"
        source: ["m2s05.wav", "m2s11.wav"]
        azimuth: 240 
        elevation: 0 
        overlap: -1.0
        description: "Elevation displacement."
        source: ["m1s05.wav", "m1s11.wav"]
        azimuth: 120 
        elevation: "-90:0.3:90"  
        overlap: -0.5

    cat1_6: 
        name: "cat1/a1s06.wav"
        description: "Talker sitting at a table"
        source: ["f1s06.wav", "f1s12.wav"]
        azimuth: 300 
        elevation: 0 
        overlap: -1.0
        description: "Azimuth and elevation displacement."
        source: ["m1s06.wav", "m1s12.wav"]
        azimuth: "0:0.5:0+180" 
        elevation: "35:-0.2:-35" 
        overlap: -0.5
        
    cat1_7:
        name: "cat1/a1s07.wav"
        description: "Preliminary: Talker sitting at a table"
        source: ["f1s13.wav", "f1s14.wav"]
        azimuth: 0 
        elevation: 0 
        overlap: -1.0
        description: "Preliminary: Standing talker."
        source: ["m1s13.wav", "m1s14.wav"]
        azimuth: 180 
        elevation: 35 
        overlap: -0.5        

    cat2_1: 
        name: "cat2/a2s01.wav"
@@ -148,55 +149,55 @@ scenes:
        source: ["f1s01.wav", "f1s07.wav"]
        azimuth: 120 
        elevation: 35 
        overlap: -1.0
        overlap: -0.5

    cat2_2: 
        name: "cat2/a2s02.wav"
        description: "Standing talker."
        source: ["m1s02.wav", "m1s08.wav"]
        azimuth: 180 
        elevation: 35 
        overlap: -1.0
        description: "Smaller talker (child) walking around a table."
        source: ["f1s02.wav", "f1s08.wav"]
        azimuth: "60:1:60+360" 
        elevation: 0 
        overlap: -0.5

    cat2_3: 
        name: "cat2/a2s03.wav"
        description: "Standing talker."
        source: ["f3s03.wav", "f3s09.wav"]
        azimuth: 240 
        description: "Talker walking around the table."
        source: ["f1s03.wav", "f1s09.wav"]
        azimuth: "120:-1:120-360" 
        elevation: 35 
        overlap: -1.0
        overlap: -0.5

    cat2_4: 
        name: "cat2/a2s04.wav"
        description: "Standing talker."
        source: ["m3s04.wav", "m3s10.wav"]
        azimuth: 300 
        elevation: 35 
        overlap: -1.0
        description: "Elevation displacement."
        source: ["f1s04.wav", "f1s10.wav"]
        azimuth: 60 
        elevation: "-90:0.3:90"  
        overlap: -0.5

    cat2_5: 
        name: "cat2/a2s05.wav"
        description: "Standing talker."
        source: ["f2s05.wav", "f2s11.wav"]
        azimuth: 0 
        elevation: 35 
        overlap: -1.0
        description: "Azimuth and elevation displacement."
        source: ["f1s05.wav", "f1s11.wav"]
        azimuth: "300:0.5:300+180" 
        elevation: "35:-0.2:-35" 
        overlap: -0.5

    cat2_6: 
        name: "cat2/a2s06.wav"
        description: "Standing talker."
        source: ["m2s06.wav", "m2s12.wav"]
        azimuth: 60 
        elevation: 35 
        overlap: -1.0
        description: "Talker sitting at a table"
        source: ["f1s06.wav", "f1s12.wav"]
        azimuth: 300 
        elevation: 0 
        overlap: -0.5
        
    cat2_7: 
        name: "cat2/a2s07.wav"
        description: "Preliminary: Standing talker."
        source: ["m1s13.wav", "m1s14.wav"]
        azimuth: 180 
        elevation: 35 
        overlap: -1.0
        description: "Preliminary: Talker sitting at a table"
        source: ["f1s13.wav", "f1s14.wav"]
        azimuth: 0 
        elevation: 0 
        overlap: -0.5        

    cat3_1: 
        name: "cat3/a3s01.wav"
@@ -204,55 +205,55 @@ scenes:
        source: ["m2s01.wav", "m2s07.wav"]
        azimuth: "0:1:360"
        elevation: 0 
        overlap: -1.0
        overlap: -0.5

    cat3_2: 
        name: "cat3/a3s02.wav"
        description: "Smaller talker (child) walking around a table."
        source: ["f1s02.wav", "f1s08.wav"]
        azimuth: "60:1:60+360" 
        elevation: 0 
        overlap: -1.0
        description: "Talker walking around the table."
        source: ["m2s02.wav", "m2s08.wav"]
        azimuth: "60:-1:60-360" 
        elevation: 35 
        overlap: -0.5

    cat3_3: 
        name: "cat3/a3s03.wav"
        description: "Smaller talker (child) walking around a table."
        source: ["m1s03.wav", "m1s09.wav"]
        azimuth: "120:1:120+360" 
        elevation: 0 
        overlap: -1.0
        description: "Elevation displacement."
        source: ["m2s03.wav", "m2s09.wav"]
        azimuth: 0 
        elevation: "-90:0.3:90"  
        overlap: -0.5

    cat3_4: 
        name: "cat3/a3s04.wav"
        description: "Smaller talker (child) walking around a table."
        source: ["f3s04.wav", "f3s10.wav"]
        azimuth: "180:1:180+360" 
        elevation: 0 
        overlap: -1.0
        description: "Azimuth and elevation displacement."
        source: ["m2s04.wav", "m2s10.wav"]
        azimuth: "240:0.5:240+180" 
        elevation: "35:-0.2:-35"
        overlap: -0.5

    cat3_5: 
        name: "cat3/a3s05.wav"
        description: "Smaller talker (child) walking around a table."
        source: ["m3s05.wav", "m3s11.wav"]
        azimuth: "240:1:240+360"
        description: "Talker sitting at a table"
        source: ["m2s05.wav", "m2s11.wav"]
        azimuth: 240 
        elevation: 0 
        overlap: -1.0
        overlap: -0.5

    cat3_6: 
        name: "cat3/a3s06.wav"
        description: "Smaller talker (child) walking around a table."
        source: ["f2s06.wav", "f2s12.wav"]
        azimuth: "300:1:300+360" 
        elevation: 0 
        overlap: -1.0
        description: "Standing talker."
        source: ["m2s06.wav", "m2s12.wav"]
        azimuth: 60 
        elevation: 35 
        overlap: -0.5
        
    cat3_7:
        name: "cat3/a3s07.wav"
        description: "Preliminary: Smaller talker (child) walking around a table."
        source: ["f2s13.wav", "f2s14.wav"]
        azimuth: "120:1:120+360" 
        elevation: 0 
        overlap: -1.0
        description: "Preliminary: Talker walking around the table."
        source: ["m2s13.wav", "m2s14.wav"]
        azimuth: "180:-1:180-360" 
        elevation: 35 
        overlap: -0.5

    cat4_1: 
        name: "cat4/a4s01.wav"
@@ -260,111 +261,111 @@ scenes:
        source: ["f2s01.wav", "f2s07.wav"]
        azimuth: "0:-1:-360"
        elevation: 35 
        overlap: -1.0
        overlap: -0.5

    cat4_2: 
        name: "cat4/a4s02.wav"
        description: "Talker walking around the table."
        source: ["m2s02.wav", "m2s08.wav"]
        azimuth: "60:-1:60-360" 
        elevation: 35 
        overlap: -1.0
        description: "Elevation displacement."
        source: ["f2s02.wav", "f2s08.wav"]
        azimuth: 300 
        elevation: "-90:0.3:90" 
        overlap: -0.5

    cat4_3: 
        name: "cat4/a4s03.wav"
        description: "Talker walking around the table."
        source: ["f1s03.wav", "f1s09.wav"]
        azimuth: "120:-1:120-360" 
        elevation: 35 
        overlap: -1.0
        description: "Azimuth and elevation displacement."
        source: ["f2s03.wav", "f2s09.wav"]
        azimuth: "180:0.5:180+180" 
        elevation: "35:-0.2:-35" 
        overlap: -0.5

    cat4_4: 
        name: "cat4/a4s04.wav"
        description: "Talker walking around the table."
        source: ["m1s04.wav", "m1s10.wav"]
        azimuth: "180:-1:180-360" 
        elevation: 35 
        overlap: -1.0
        description: "Talker sitting at a table"
        source: ["f2s04.wav", "f2s10.wav"]
        azimuth: 180 
        elevation: 0 
        overlap: -0.5

    cat4_5: 
        name: "cat4/a4s05.wav"
        description: "Talker walking around the table."
        source: ["f3s05.wav", "f3s11.wav"]
        azimuth: "240:-1:240-360"
        description: "Standing talker."
        source: ["f2s05.wav", "f2s11.wav"]
        azimuth: 0 
        elevation: 35 
        overlap: -1.0
        overlap: -0.5

    cat4_6: 
        name: "cat4/a4s06.wav"
        description: "Talker walking around the table."
        source: ["m3s06.wav", "m3s12.wav"]
        azimuth: "300:-1:300-360" 
        elevation: 35
        overlap: -1.0
        description: "Smaller talker (child) walking around a table."
        source: ["f2s06.wav", "f2s12.wav"]
        azimuth: "300:1:300+360" 
        elevation: 0 
        overlap: -0.5
        
    cat4_7:
        name: "cat4/a4s07.wav"
        description: "Preliminary: Talker walking around the table."
        source: ["m2s13.wav", "m2s14.wav"]
        azimuth: "180:-1:180-360" 
        elevation: 35 
        overlap: -1.0
        description: "Preliminary: Smaller talker (child) walking around a table."
        source: ["f2s13.wav", "f2s14.wav"]
        azimuth: "120:1:120+360" 
        elevation: 0 
        overlap: -0.5        

    cat5_1: 
        name: "cat5/a5s01.wav"
        description: "Elevation displacement."
        source: ["m3s01.wav", "m3s07.wav"]
        azimuth: 240 
        elevation: "-90:0.5:90" 
        overlap: -1.0
        elevation: "-90:0.3:90" 
        overlap: -0.5

    cat5_2: 
        name: "cat5/a5s02.wav"
        description: "Elevation displacement."
        source: ["f2s02.wav", "f2s08.wav"]
        azimuth: 300 
        elevation: 0 
        overlap: -1.0
        description: "Azimuth and elevation displacement."
        source: ["m3s02.wav", "m3s08.wav"]
        azimuth: "120:0.5:120+180" 
        elevation: "35:-0.2:-35" 
        overlap: -0.5

    cat5_3: 
        name: "cat5/a5s03.wav"
        description: "Elevation displacement."
        source: ["m2s03.wav", "m2s09.wav"]
        azimuth: 0 
        elevation: "-90:0.5:90"  
        overlap: -1.0
        description: "Talker sitting at a table"
        source: ["m3s03.wav", "m3s09.wav"]
        azimuth: 120 
        elevation: 0 
        overlap: -0.5

    cat5_4: 
        name: "cat5/a5s04.wav"
        description: "Elevation displacement."
        source: ["f1s04.wav", "f1s10.wav"]
        azimuth: 60 
        elevation: "-90:0.5:90"  
        overlap: -1.0
        description: "Standing talker."
        source: ["m3s04.wav", "m3s10.wav"]
        azimuth: 300 
        elevation: 35 
        overlap: -0.5

    cat5_5: 
        name: "cat5/a5s05.wav"
        description: "Elevation displacement."
        source: ["m1s05.wav", "m1s11.wav"]
        azimuth: 120 
        elevation: "-90:0.5:90"  
        overlap: -1.0
        description: "Smaller talker (child) walking around a table."
        source: ["m3s05.wav", "m3s11.wav"]
        azimuth: "240:1:240+360"
        elevation: 0 
        overlap: -0.5

    cat5_6: 
        name: "cat5/a5s06.wav"
        description: "Elevation displacement."
        source: ["f3s06.wav", "f3s12.wav"]
        azimuth: 180 
        elevation: "-90:0.5:90"  
        overlap: -1.0
        description: "Talker walking around the table."
        source: ["m3s06.wav", "m3s12.wav"]
        azimuth: "300:-1:300-360" 
        elevation: 35
        overlap: -0.5
        
    cat5_7:
        name: "cat5/a5s07.wav"
        description: "Preliminary: Elevation displacement."
        source: ["f3s13.wav", "f3s14.wav"]
        azimuth: 120 
        elevation: "-90:0.5:90"  
        overlap: -1.0
        description: "Preliminary: Azimuth and elevation displacement."
        source: ["m3s13.wav", "m3s14.wav"]
        azimuth: "0:0.5:0+180" 
        elevation: "35:-0.2:-35" 
        overlap: -0.5        

    cat6_1: 
        name: "cat6/a6s01.wav"
@@ -372,53 +373,52 @@ scenes:
        source: ["f3s01.wav", "f3s07.wav"]
        azimuth: "60:0.5:60+180" 
        elevation: "35:-0.2:-35"
        overlap: -1.0
        overlap: -0.5
        
    cat6_2: 
        name: "cat6/a6s02.wav"
        description: "Azimuth and elevation displacement."
        source: ["m3s02.wav", "m3s08.wav"]
        azimuth: "120:0.5:120+180" 
        elevation: "35:-0.2:-35" 
        overlap: -1.0
        description: "Talker sitting at a table"
        source: ["f3s02.wav", "f3s08.wav"]
        azimuth: 60 
        elevation: 0 
        overlap: -0.5
        
    cat6_3: 
        name: "cat6/a6s03.wav"
        description: "Azimuth and elevation displacement."
        source: ["f2s03.wav", "f2s09.wav"]
        azimuth: "180:0.5:180+180" 
        elevation: "35:-0.2:-35" 
        overlap: -1.0
        description: "Standing talker."
        source: ["f3s03.wav", "f3s09.wav"]
        azimuth: 240 
        elevation: 35 
        overlap: -0.5

    cat6_4: 
        name: "cat6/a6s04.wav"
        description: "Azimuth and elevation displacement."
        source: ["m2s04.wav", "m2s10.wav"]
        azimuth: "240:0.5:240+180" 
        elevation: "35:-0.2:-35"
        overlap: -1.0
        description: "Smaller talker (child) walking around a table."
        source: ["f3s04.wav", "f3s10.wav"]
        azimuth: "180:1:180+360" 
        elevation: 0 
        overlap: -0.5

    cat6_5: 
        name: "cat6/a6s05.wav"
        description: "Azimuth and elevation displacement."
        source: ["f1s05.wav", "f1s11.wav"]
        azimuth: "300:0.5:300+180" 
        elevation: "35:-0.2:-35" 
        overlap: -1.0
        description: "Talker walking around the table."
        source: ["f3s05.wav", "f3s11.wav"]
        azimuth: "240:-1:240-360"
        elevation: 35 
        overlap: -0.5

    cat6_6: 
        name: "cat6/a6s06.wav"
        description: "Azimuth and elevation displacement."
        source: ["m1s06.wav", "m1s12.wav"]
        azimuth: "0:0.5:0+180" 
        elevation: "35:-0.2:-35" 
        overlap: -1.0
        description: "Elevation displacement."
        source: ["f3s06.wav", "f3s12.wav"]
        azimuth: 180 
        elevation: "-90:0.3:90"  
        overlap: -0.5

    cat6_7:
        name: "cat6/a6s07.wav"
        description: "Preliminary: Azimuth and elevation displacement."
        source: ["m3s13.wav", "m3s14.wav"]
        azimuth: "0:0.5:0+180" 
        elevation: "35:-0.2:-35" 
        overlap: -1.0
  
 No newline at end of file
        description: "Preliminary: Elevation displacement."
        source: ["f3s13.wav", "f3s14.wav"]
        azimuth: 120 
        elevation: "-90:0.3:90"  
        overlap: -0.5
+199 −199

File changed.

Preview size limit exceeded, changes collapsed.

+48 −68
Original line number Diff line number Diff line
@@ -234,64 +234,85 @@ def generate_ism1_scene(
            y.audio.resize(x.audio.shape, refcheck=False)
            y.audio += x.audio

    # append pre-amble and post-amble to all sources
    if cfg.preamble != 0.0:
        # ensure that pre-amble is a multiple of 20ms
        N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs)

        # insert all-zero preamble to all sources
        pre = np.zeros((N_pre, y.audio.shape[1]))
        y.audio = np.concatenate([pre, y.audio])

    if cfg.postamble != 0.0:
        # ensure that post-amble is a multiple of 20ms
        N_post = int(floor(cfg.postamble * 50) / 50 * y.fs)

        # append all-zero postamble to all sources
        post = np.zeros((N_post, y.audio.shape[1]))
        y.audio = np.concatenate([y.audio, post])

    # add random noise
    if cfg.add_low_level_random_noise:
        # create uniformly distributed noise between -4 and 4
        np.random.seed(SEED_RANDOM_NOISE)
        noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float")

        # superimpose
        y.audio += noise

    # process azimuth and elevation
    source_azi = scene["azimuth"]
    source_ele = scene["elevation"]

    N_frames = int(len(y.audio) / y.fs * 50)

    # read azimuth information and create array
    # read azimuth information and convert to an array
    if isinstance(source_azi, str):
        if ":" in source_azi:
            # start with the initial azimuth value and apply step N_frames times
            source_azi = source_azi.split(":")
            azi = np.arange(
                float(eval(source_azi[0])),
                float(eval(source_azi[2])),
                float(eval(source_azi[0])) + N_frames * float(eval(source_azi[1])),
                float(eval(source_azi[1])),
            )
        else:
            azi = np.array(float(eval(source_azi)), ndmin=1)[:N_frames]
            # replicate static azimuth value N_frames times
            azi = np.repeat(float(eval(source_azi)), N_frames)
    else:
        azi = np.array(source_azi, ndmin=1)[:N_frames]

    # ensure that azimuth array has N_frames values
    if len(azi) > N_frames:
        # cut the array of azimuth values
        azi = azi[:N_frames]
    elif len(azi) < N_frames:
        # replicate the last azimuth value
        azi = np.append(azi, np.full(N_frames - len(azi), azi[-1]))
        # replicate static azimuth value N_frames times
        azi = np.repeat(float(source_azi), N_frames)

    # convert azimuth from 0 .. 360 to -180 .. +180
    azi = (azi + 180) % 360 - 180

    # check if azimuth is from -180 .. +180
    # check, if azimuth is from -180 .. +180
    if any(azi > 180) or any(azi < -180):
        logger.error(
            f"Incorrect value(s) of azimuth: {azi[(azi > 180) | (azi < -180)]}"
        )

    # read elevation information and create array
    # read elevation information and convert to an array
    if isinstance(source_ele, str):
        if ":" in source_ele:
            # convert into array (initial_value:step:stop_value)
            # note: the stop_value value is +-90 degrees depending on the sign of the step
            source_ele = source_ele.split(":")
            ele = np.arange(
                float(eval(source_ele[0])),
                float(eval(source_ele[2])),
                np.sign(float(eval(source_ele[1]))) * 90,
                float(eval(source_ele[1])),
            )
            )[:N_frames]

            # repeat the last elevation value, if array is shorter than N_frames
            if len(ele) < N_frames:
                ele = np.append(ele, np.full(N_frames - len(ele), ele[-1]))
        else:
            ele = np.array(float(eval(source_ele)), ndmin=1)[:N_frames]
            # replicate static elevation value N_frames times
            ele = np.repeat(float(eval(source_ele)), N_frames)
    else:
        ele = np.array(source_ele, ndmin=1)[:N_frames]

    # ensure that elevation array has N_frames values
    if len(ele) > N_frames:
        # cut the array of elevation values
        ele = ele[:N_frames]
    elif len(ele) < N_frames:
        # replicate the last elevation
        ele = np.append(ele, np.full(N_frames - len(ele), ele[-1]))
        # replicate static elevation value N_frames times
        ele = np.repeat(float(source_ele), N_frames)

    # check if elevation is from -90 .. +90
    if any(ele > 90) or any(ele < -90):
@@ -299,49 +320,8 @@ def generate_ism1_scene(
            f"Incorrect value(s) of elevation: {ele[(ele > 90) | (ele < -90)]}"
        )

    # additional metadata (default values)
    radius = np.ones(N_frames)
    spread = np.zeros(N_frames)
    gain = np.ones(N_frames)

    # arrange all metadata fields column-wise into a matrix
    y_meta = np.column_stack((azi, ele, radius, spread, gain))

    # append pre-amble and post-amble to all sources
    if cfg.preamble != 0.0:
        # ensure that pre-amble is a multiple of 20ms
        N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs)

        # insert all-zero preamble to all sources
        pre = np.zeros((N_pre, y.audio.shape[1]))
        y.audio = np.concatenate([pre, y.audio])

        # insert neutral position as a pre-amble to all sources
        N_pre = int(N_pre / frame_len)
        pre = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (N_pre, 1))
        y_meta = np.concatenate([pre, y_meta], axis=0)

    if cfg.postamble != 0.0:
        # ensure that post-amble is a multiple of 20ms
        N_post = int(floor(cfg.postamble * 50) / 50 * y.fs)

        # append all-zero postamble to all sources
        post = np.zeros((N_post, y.audio.shape[1]))
        y.audio = np.concatenate([y.audio, post])

        # append neutral position as a post-amble to all sources
        N_post = int(N_post / frame_len)
        post = np.tile([0.00, 0.00, 1.00, 0.00, 1.00], (N_post, 1))
        y_meta = np.concatenate([y_meta, post], axis=0)

    # add random noise
    if cfg.add_low_level_random_noise:
        # create uniformly distributed noise between -4 and 4
        np.random.seed(SEED_RANDOM_NOISE)
        noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float")

        # superimpose
        y.audio += noise
    y_meta = np.column_stack((azi, ele))

    # write ISM audio stream to the output file
    audiofile.write(
+79 −119

File changed.

Preview size limit exceeded, changes collapsed.