Commit 81628b69 authored by Vladimir Malenovsky's avatar Vladimir Malenovsky
Browse files

support of .raw format

parent 872d533c
Loading
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -91,6 +91,7 @@ def main(args):
            cfg.output_path,
            cfg.scenes,
            logger,
            fs=cfg.fs
        )

    # copy configuration to output directory
+39 −39
Original line number Diff line number Diff line
@@ -13,7 +13,7 @@ format: "ISM1"
# delete_tmp: true

### Output sampling rate in Hz needed for headerless audio files; default = 48000
# fs: 32000
fs: 48000

### Any relative paths will be interpreted relative to the working directory the script is called from!
### Usage of absolute paths is recommended.
@@ -21,10 +21,10 @@ format: "ISM1"
### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions

### Input path to mono files
input_path: "/mnt/c/Work/IVAS/3gpp_forge_gitlab/ivas-processing-scripts/items_mono"
input_path: "./items_mono"

### Output path for generated test items and metadata files
output_path: "/mnt/c/Work/IVAS/3gpp_forge_gitlab/ivas-processing-scripts/output"
output_path: "./output"

### Target loudness in LKFS; default = null (no loudness normalization applied)
loudness: -26
@@ -51,7 +51,7 @@ scenes:
    a1: 
        name: "G1S1.wav"
        description: "Talker sitting at a table"
        source: "f2s5a_Talker1.wav"
        source: "test_single.wav"
        azimuth: 0 
        elevation: 0 
        delay: 0 
@@ -59,7 +59,7 @@ scenes:
    a2: 
        name: "G6S2.wav"
        description: "Talker sitting at a table"
        source: "f5s10a_Talker1.wav"
        source: "test_single.wav"
        azimuth: 60 
        elevation: 0 
        delay: 0 
@@ -67,7 +67,7 @@ scenes:
    a3: 
        name: "G5S3.wav"
        description: "Talker sitting at a table"
        source: "f2s5a_Talker1.wav"
        source: "test_single.wav"
        azimuth: 120 
        elevation: 0 
        delay: 0 
@@ -75,7 +75,7 @@ scenes:
    a4: 
        name: "G4S4.wav"
        description: "Talker sitting at a table"
        source: "m4s11b_Talker1.wav"
        source: "test_single.wav"
        azimuth: 180 
        elevation: 0 
        delay: 0 
@@ -83,7 +83,7 @@ scenes:
    a5: 
        name: "G3S5.wav"
        description: "Talker sitting at a table"
        source: "m1s4a_Talker1.wav"
        source: "test_single.wav"
        azimuth: 240 
        elevation: 0 
        delay: 0 
@@ -91,7 +91,7 @@ scenes:
    a6: 
        name: "G2S6.wav"
        description: "Talker sitting at a table"
        source: "f5s10a_Talker1.wav"
        source: "test_single.wav"
        azimuth: 300 
        elevation: 0 
        delay: 0 
@@ -99,7 +99,7 @@ scenes:
    b1: 
        name: "G2S1.wav"
        description: "standing talker."
        source: "f5s10b_Talker1.wav"
        source: "test_single.wav"
        azimuth: 120 
        elevation: 35 
        delay: 0 
@@ -107,7 +107,7 @@ scenes:
    b2: 
        name: "G1S2.wav"
        description: "standing talker."
        source: "f2s1a_Talker1.wav"
        source: "test_single.wav"
        azimuth: 180 
        elevation: 35 
        delay: 0 
@@ -115,7 +115,7 @@ scenes:
    b3: 
        name: "G6S3.wav"
        description: "standing talker."
        source: "f5s10b_Talker1.wav"
        source: "test_single.wav"
        azimuth: 240 
        elevation: 35 
        delay: 0 
@@ -123,7 +123,7 @@ scenes:
    b4: 
        name: "G5S4.wav"
        description: "standing talker."
        source: "f2s1a_Talker1.wav"
        source: "test_single.wav"
        azimuth: 300 
        elevation: 35 
        delay: 0 
@@ -131,7 +131,7 @@ scenes:
    b5: 
        name: "G4S5.wav"
        description: "standing talker."
        source: "m4s11a_Talker1.wav"
        source: "test_single.wav"
        azimuth: 0 
        elevation: 35 
        delay: 0 
@@ -139,7 +139,7 @@ scenes:
    b6: 
        name: "G3S6.wav"
        description: "standing talker."
        source: "m1s2b_Talker1.wav"
        source: "test_single.wav"
        azimuth: 60 
        elevation: 35 
        delay: 0 
@@ -147,7 +147,7 @@ scenes:
    c1: 
        name: "G3S1.wav"
        description: "Smaller talker (child) walking around a table."
        source: "m1s6b_Talker1.wav"
        source: "test_single.wav"
        azimuth: "0:1:360"
        elevation: 0 
        delay: 0 
@@ -155,7 +155,7 @@ scenes:
    c2: 
        name: "G2S2.wav"
        description: "Smaller talker (child) walking around a table."
        source: "f5s14a_Talker1.wav"
        source: "test_single.wav"
        azimuth: "60:1:60+360" 
        elevation: 0 
        delay: 0 
@@ -163,7 +163,7 @@ scenes:
    c3: 
        name: "G1S3.wav"
        description: "Smaller talker (child) walking around a table."
        source: "f2s6a_Talker1.wav"
        source: "test_single.wav"
        azimuth: "120:1:120+360" 
        elevation: 0 
        delay: 0 
@@ -171,7 +171,7 @@ scenes:
    c4: 
        name: "G6S4.wav"
        description: "Smaller talker (child) walking around a table."
        source: "f5s14a_Talker1.wav"
        source: "test_single.wav"
        azimuth: "180:1:180+360" 
        elevation: 0 
        delay: 0 
@@ -179,7 +179,7 @@ scenes:
    c5: 
        name: "G5S5.wav"
        description: "Smaller talker (child) walking around a table."
        source: "f2s6a_Talker1.wav"
        source: "test_single.wav"
        azimuth: "240:1:240+360"
        elevation: 0 
        delay: 0 
@@ -187,7 +187,7 @@ scenes:
    c6: 
        name: "G4S6.wav"
        description: "Smaller talker (child) walking around a table."
        source: "m4s13a_Talker1.wav"
        source: "test_single.wav"
        azimuth: "300:1:300+360" 
        elevation: 0 
        delay: 0 
@@ -195,7 +195,7 @@ scenes:
    d1: 
        name: "G4S1.wav"
        description: "Talker walking around the table."
        source: "m4s12b_Talker1.wav"
        source: "test_single.wav"
        azimuth: "0:-1:-360"
        elevation: 35 
        delay: 0 
@@ -203,7 +203,7 @@ scenes:
    d2: 
        name: "G3S2.wav"
        description: "Talker walking around the table."
        source: "m1s12a_Talker1.wav"
        source: "test_single.wav"
        azimuth: "60:-1:60-360" 
        elevation: 35 
        delay: 0 
@@ -211,7 +211,7 @@ scenes:
    d3: 
        name: "G3S2.wav"
        description: "Talker walking around the table."
        source: "f5s15b_Talker1.wav"
        source: "test_single.wav"
        azimuth: "120:-1:120-360" 
        elevation: 35 
        delay: 0 
@@ -219,7 +219,7 @@ scenes:
    d4: 
        name: "G1S4.wav"
        description: "Talker walking around the table."
        source: "f2s3b_Talker1.wav"
        source: "test_single.wav"
        azimuth: "180:-1:180-360" 
        elevation: 35 
        delay: 0 
@@ -227,7 +227,7 @@ scenes:
    d5: 
        name: "G6S5.wav"
        description: "Talker walking around the table."
        source: "f5s15b_Talker1.wav"
        source: "test_single.wav"
        azimuth: "240:-1:240-360"
        elevation: 35 
        delay: 0 
@@ -235,7 +235,7 @@ scenes:
    d6: 
        name: "G5S6.wav"
        description: "Talker walking around the table."
        source: "f2s3b_Talker1.wav"
        source: "test_single.wav"
        azimuth: "300:-1:300-360" 
        elevation: 35
        delay: 0 
@@ -243,7 +243,7 @@ scenes:
    e1: 
        name: "G5S1.wav"
        description: "Elevation displacement."
        source: "f2s4a_Talker1.wav"
        source: "test_single.wav"
        azimuth: 240 
        elevation: "-90:0.5:90" 
        delay: 0 
@@ -251,7 +251,7 @@ scenes:
    e2: 
        name: "G4S2.wav"
        description: "Elevation displacement."
        source: "m4s16a_Talker1.wav"
        source: "test_single.wav"
        azimuth: 300 
        elevation: 0 
        delay: 0 
@@ -259,7 +259,7 @@ scenes:
    e3: 
        name: "G3S3.wav"
        description: "Elevation displacement."
        source: "m1s16b_Talker1.wav"
        source: "test_single.wav"
        azimuth: 0 
        elevation: "-90:0.5:90"  
        delay: 0 
@@ -267,7 +267,7 @@ scenes:
    e4: 
        name: "G2S4.wav"
        description: "Elevation displacement."
        source: "f5s19a_Talker1.wav"
        source: "test_single.wav"
        azimuth: 60 
        elevation: "-90:0.5:90"  
        delay: 0 
@@ -275,7 +275,7 @@ scenes:
    e5: 
        name: "G1S5.wav"
        description: "Elevation displacement."
        source: "f2s4a_Talker1.wav"
        source: "test_single.wav"
        azimuth: 120 
        elevation: "-90:0.5:90"  
        delay: 0 
@@ -283,7 +283,7 @@ scenes:
    e6: 
        name: "G6S6.wav"
        description: "Elevation displacement."
        source: "f5s19a_Talker1.wav"
        source: "test_single.wav"
        azimuth: 180 
        elevation: "-90:0.5:90"  
        delay: 0 
@@ -291,7 +291,7 @@ scenes:
    f1: 
        name: "G6S1.wav"
        description: "Azimuth and elevation displacement."
        source: "f5s15a_Talker1.wav"
        source: "test_single.wav"
        azimuth: "60:0.5:60+180" 
        elevation: "35:-0.2:-35"
        delay: 0 
@@ -299,7 +299,7 @@ scenes:
    f2: 
        name: "G5S2.wav"
        description: "Azimuth and elevation displacement."
        source: "f2s7b_Talker1.wav"
        source: "test_single.wav"
        azimuth: "120:0.5:120+180" 
        elevation: "35:-0.2:-35" 
        delay: 0 
@@ -307,7 +307,7 @@ scenes:
    f3: 
        name: "G4S3.wav"
        description: "Azimuth and elevation displacement."
        source: "m4s14a_Talker1.wav"
        source: "test_single.wav"
        azimuth: "180:0.5:180+180" 
        elevation: "35:-0.2:-35" 
        delay: 0 
@@ -315,7 +315,7 @@ scenes:
    f4: 
        name: "G3S4.wav"
        description: "Azimuth and elevation displacement."
        source: "m1s7a_Talker1.wav"
        source: "test_single.wav"
        azimuth: "240:0.5:240+180" 
        elevation: "35:-0.2:-35"
        delay: 0 
@@ -323,7 +323,7 @@ scenes:
    f5: 
        name: "G2S5.wav"
        description: "Azimuth and elevation displacement."
        source: "f5s15a_Talker1.wav"
        source: "test_single.wav"
        azimuth: "300:0.5:300+180" 
        elevation: "35:-0.2:-35" 
        delay: 0 
@@ -331,7 +331,7 @@ scenes:
    f6: 
        name: "G1S6.wav"
        description: "Azimuth and elevation displacement."
        source: "f2s7b_Talker1.wav"
        source: "test_single.wav"
        azimuth: "0:0.5:0+180" 
        elevation: "35:-0.2:-35" 
        delay: 0 
+3 −3
Original line number Diff line number Diff line
@@ -13,7 +13,7 @@ format: "ISM2"
# delete_tmp: true

### Output sampling rate in Hz needed for headerless audio files; default = 48000
# fs: 32000
fs: 48000

### Any relative paths will be interpreted relative to the working directory the script is called from!
### Usage of absolute paths is recommended.
@@ -21,10 +21,10 @@ format: "ISM2"
### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions

### Input path to mono files
input_path: "/mnt/c/Work/IVAS/3gpp_forge_gitlab/ivas-processing-scripts/items_mono"
input_path: "./items_mono"

### Output path for generated test items and metadata files
output_path: "/mnt/c/Work/IVAS/3gpp_forge_gitlab/ivas-processing-scripts/output"
output_path: "./output"

### Target loudness in LKFS; default = null (no loudness normalization applied)
loudness: -26
+1 −4
Original line number Diff line number Diff line
@@ -35,10 +35,7 @@ from pathlib import Path

import yaml

from item_generation_scripts.constants import (
    DEFAULT_CONFIG,
    REQUIRED_KEYS,
)
from item_generation_scripts.constants import DEFAULT_CONFIG, REQUIRED_KEYS


def merge_dicts(base: dict, other: dict) -> None:
+4 −9
Original line number Diff line number Diff line
@@ -35,6 +35,7 @@ import csv
import logging
import os
from pathlib import Path
from typing import Optional

import numpy as np

@@ -55,6 +56,7 @@ def generate_ism_items(
    output_path: Path,
    scenes: dict,
    logger: logging.Logger,
    fs: Optional[int] = 48000,
):
    """Generate ISM items with metadata from mono items based on scene description"""

@@ -73,7 +75,6 @@ def generate_ism_items(
            source_file = np.atleast_1d(scene["source"])[i]
            source_azi = np.atleast_1d(scene["azimuth"])[i]
            source_ele = np.atleast_1d(scene["elevation"])[i]
            # source_type = "speech"  # !!!! TBD - support generic audio + background noise and speech in the .yml file
            source_delay = np.atleast_1d(scene["delay"])[i]

            logger.info(
@@ -81,10 +82,7 @@ def generate_ism_items(
            )

            # read source file
            # x, fs = audiofile.read(os.path.join(input_path, source_file))    # !!!! TBD - check the support for headerless .raw files
            # pdb.set_trace()
            audio_object = audio.fromfile("MONO", os.path.join(input_path, source_file))

            audio_object = audio.fromfile("MONO", os.path.join(input_path, source_file), fs=fs)
            x = audio_object.audio
            fs = audio_object.fs

@@ -93,7 +91,6 @@ def generate_ism_items(

            # adjust the level of the source file
            _, scale_factor = get_loudness(audio_object, target_level, "MONO")
            # print(f"Scaling loudness with factor: {scale_factor}")
            x *= scale_factor

            # read azimuth information and create array
@@ -156,9 +153,7 @@ def generate_ism_items(
                )

            # additional metadata
            dist = np.ones(
                N_frames
            )  # !!!! TBD - check what to do with these metadata
            dist = np.ones(N_frames)  # !!!! TBD - check what to do with these metadata
            spread = np.zeros(N_frames)
            gain = np.ones(N_frames)