Commit 5a18ed43 authored by Archit Tamarapu's avatar Archit Tamarapu
Browse files

Merge branch 'main' of forge.3gpp.org:ivas-codec-pc/ivas-processing-scripts...

Merge branch 'main' of forge.3gpp.org:ivas-codec-pc/ivas-processing-scripts into 13-huge-rendering-time-for-concatenated-files
parents 27876683 5d9dd9d6
Loading
Loading
Loading
Loading
Loading
+5 −5
Original line number Diff line number Diff line
@@ -6,17 +6,17 @@ venv/
.vscode/
.idea/
.DS_Store
*.wav
!tests/data/**/*.wav
*.pcm
*.bs
*.192
mc.double
proc_input/*.wav
proc_input/*.pcm
proc_output/
experiments/selection/*/proc_input/*.wav
experiments/selection/*/proc_input/*.pcm
experiments/selection/*/proc_output/
experiments/selection/*/config/*cat*-lab_*.yml
*~
tests/tmp_output_*
tests/temp_output_*
tests/cut
tests/ref
tests/concatenation_folder
 No newline at end of file
+41 −3
Original line number Diff line number Diff line
@@ -6,6 +6,17 @@ default:
  interruptible: true


workflow:
  rules:
    # see https://docs.gitlab.com/ee/ci/yaml/workflow.html#switch-between-branch-pipelines-and-merge-request-pipelines
    - if: $CI_COMMIT_BRANCH && $CI_OPEN_MERGE_REQUESTS && $CI_PIPELINE_SOURCE == "push"
      when: never
    - if: $CI_PIPELINE_SOURCE == 'merge_request_event'
    - if: $CI_PIPELINE_SOURCE == 'push' && $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH # Pushes to main
      when: never
    - if: $CI_PIPELINE_SOURCE == 'web' # for testing


stages:
  - check
  - test
@@ -14,10 +25,10 @@ stages:

# print some info on the runner setup
.print-common-info: &print-common-info
  - echo "--------------------------------------------\n"
  - echo "--------------------------------------------"
  - python3 --version
  - pip3 freeze | grep "numpy\|scipy\|YAML\|pytest\|black\|isort\|flake8"
  - echo "--------------------------------------------\n\n"
  - echo "--------------------------------------------"


# script anchor for updating the codec repo
@@ -26,7 +37,12 @@ stages:
  # NOTE: CODEC_DIR has to be in PATH
  - cd $CODEC_DIR
  # make sure that we are at latest main
  - git pull
  # TODO: temporarily use the RC1a tag
  - git restore .
  - git checkout 20230511-RC1a-listening-tests
  - echo "--------------------------------------------"
  - echo "Building codec on commit $(git rev-parse HEAD --short)"
  - echo "--------------------------------------------"
  # only builds if code has actually changed
  - make -j
  - cd $dir
@@ -57,9 +73,29 @@ test_audiotools_convert:
    - *print-common-info
    - python3 -m pytest -n auto tests/test_audiotools_convert.py

# run the test configs for the selection experiments
experiments:
  stage: test
  tags:
    - linux
  script:
    - *print-common-info
    - *get-codec-binaries
    - python3 -m pytest tests/test_experiments.py::test_generate_test_items -n auto | tee log.txt
  artifacts:
    paths:
      - experiments/selection/*/proc_output/*.log
      - log.txt
    when: on_failure
    expire_in: 1 week

# run some test configs for item creation
test_processing:
  stage: test
  rules:
    - if: $CI_PIPELINE_SOURCE == 'merge_request_event'
    - if: $CI_PIPELINE_SOURCE == 'push'
      when: never
  tags:
    - linux
  script:
@@ -73,6 +109,7 @@ test_processing:

lint:
  stage: analyze
  needs: []
  tags:
    - linux
  allow_failure: true
@@ -81,6 +118,7 @@ lint:

format:
  stage: analyze
  needs: []
  variables:
    ARTIFACT_BASE_NAME: "mr-$CI_MERGE_REQUEST_IID--sha-$CI_COMMIT_SHORT_SHA--formatting-fix"
    ARTIFACT_FOLDER: "formatting-patch"
+68 −10
Original line number Diff line number Diff line
@@ -47,6 +47,13 @@

---

# Item generation

The `item_generation_scripts` module may be used to generate audio items for the P.800 listening test according to the scene description. All scenes must be fully described in the `SCENE.yml` file. The module takes monophonic audio
files from the specified input directory as the input and store the audio items in the requested format specification to the specified output directory. The module also generates the associated metadata files in case the target format requires so.

This module may be executed from the command-line with `python -m ivas_processing_scripts.generation item_gen_configs/SCENE.YML`.

# Listening test generation

The `ivas_processing_scripts` module helps to quickly setup listening tests with multiple (pre-)processing and post-processing options.
@@ -133,6 +140,11 @@ postprocessing:
# delete_tmp: true
### Master seed for random processes like bitstream error pattern generation; default = 0
# master_seed: 5
### Additional seed to specify number of preruns (used for background noise delay and FER bitstream processing); default = 0
# prerun_seed: 2
### flag for linux to use windows-built binaries with wine: default = false
### this requires the wine binary to be available and will be ignored on windows
# use_windows_codec_binaries: true

### Any relative paths will be interpreted relative to the working directory the script is called from!
### Usage of absolute paths is recommended.
@@ -163,6 +175,10 @@ output_path: "./tmp_output"
### searches for the specified substring in found filenames; default = null
# input_select:
#  - "48kHz"

### Include the condition number in the item name; default = false
### for e.g. abcxyz.wav --> abcxyz.cXX.wav
# condition_in_output_filename: true
```

</details>
@@ -200,7 +216,7 @@ input:
    ### Target loudness in LKFS; default = null (no loudness change applied)
    # loudness: -26
    ### Spatial audio format in which loudness is adjusted (only used if preprocessing loudness is not null);
    ### default = null (uses preprocessing fmt if possible)
    ### default = null (uses postprocessing fmt)
    # loudness_fmt: "BINAURAL"
    ### Pre-/post-trim individual signal(s) (ms) (negative values pad silence); default = 0
    # trim:
@@ -237,12 +253,15 @@ input:
    # preamble_noise: true
    ### Additive background noise
    # background_noise:
        ### REQUIRED: SNR for background noise in dB
        ### SNR for background noise in dB; REQUIRED for prerecorded background noise and ignored for low level noise
        # snr: 10
        ### REQUIRED: Path to background noise, must have same format and sampling rate as input signal(s)
        ### REQUIRED: Either background noise path or low level noise flag
        ### Path to background noise, must have same format and sampling rate as input signal(s); default = null
        # background_noise_path: ".../noise.wav"
        ### Seed for delay offest; default = 0
        # seed_delay: 10
        ### Flag for using low level [-4,+4] background noise; default = false
        # low_level_noise: true
	### Flag for repeating the whole signal once and discarding the first half after processing
    # repeat_signal: true
```

</details>
@@ -275,8 +294,6 @@ input:
    # error_pattern: "path/pattern.192"
    ### Error rate in percent
    # error_rate: 5
	### Additional seed to specify number of preruns; default = 0
    # prerun_seed: 2
```
</details>

@@ -348,6 +365,8 @@ conditions_to_generate:
          # fs: 48000
          ### Additional commandline options; default = null
          # opts: ["-q", "-no_delay_cmp"]
	  ### Option to use SBA format of lower or same order (planar also possible) for SBA input formats
      # sba_fmt: "PLANARFOA"

  ### IVAS condition ###############################
  c07:
@@ -375,6 +394,8 @@ conditions_to_generate:
          # fs: 48000
          ### Additional commandline options; default = null
          # opts: ["-q", "-no_delay_cmp"]
	  ### Option to use SBA format of lower or same order (planar also possible) for SBA input formats
      # sba_fmt: "PLANARFOA"
            
  ### EVS condition ################################
  c08:
@@ -398,6 +419,8 @@ conditions_to_generate:
          bin: ~/git/ivas-codec/EVS_dec
          ### Decoder output sampling rate; default = null (same as input)
          # fs: 48000
	  ### Option to use SBA format of lower or same order (planar also possible) for SBA input formats
      # sba_fmt: "PLANARFOA"
```

</details>
@@ -470,6 +493,7 @@ This configuration has to match the channel configuration. If the provided list
For the encoding stage `cod` and the decoding stage `dec`, the path to the IVAS_cod and IVAS_dec binaries can be specified under the key `bin`.
Additionally some resampling can be applied by using the key `fs` followed by the desired sampling rate.
The general bitstream processing configuration can be locally overwritten for each EVS and IVAS condition with the key `tx`.
For IVAS and EVS conditions the `sba_fmt` key is available to specify a SBA format of lower or same order compared to the input for SBA input formats.
The additional key `evs_lfe_9k6bps_nb` is only available for EVS conditions and ensures a bitrate of 9.6kbps and narrow band processing of the LFE channel(s).
#### IVAS
The configuration of the IVAS condition is similar to the EVS condition. However, only one bitrate for all channels (and metadata) can be specified.
@@ -512,15 +536,16 @@ The following additional executables are needed for the different processing ste

| Processing step                                 | Executable            | Where to find                                                                                               |
|-------------------------------------------------|-----------------------|-------------------------------------------------------------------------------------------------------------|
| Loudness measurement and adjustment             | bs1770demo            | https://github.com/openitu/STL                                                                              |
| Loudness measurement and adjustment             | bs1770demo            | https://github.com/ErikNorvell-Ericsson/STL (Note branch)                                                                             |
| MNRU                                            | p50fbmnru             | https://github.com/openitu/STL                                                                              |
| ESDRU                                           | esdru                 | https://github.com/openitu/STL                                                                              |
| Frame error pattern application                 | eid-xor               | https://github.com/openitu/STL                                                                              |
| Error pattern generation                        | gen-patt              | https://www.itu.int/rec/T-REC-G.191-201003-S/en (Note: Version in https://github.com/openitu/STL is buggy!) |
| Reverberation module                            | reverb                | https://github.com/openitu/STL                                                                              |
| Filtering, Resampling                           | filter                | https://www.3gpp.org/ftp/tsg_sa/WG4_CODEC/TSGS4_76/docs/S4-131277.zip                                       |
| Random offset/seed generation (necessary for background noise and FER bitstream processing)   | random                | https://www.3gpp.org/ftp/tsg_sa/WG4_CODEC/TSGS4_76/docs/S4-131277.zip                                       |
| JBM network simulator                           | networkSimulator_g192 | https://www.3gpp.org/ftp/tsg_sa/WG4_CODEC/TSGS4_76/docs/S4-131277.zip                                       |
| MASA rendering (also used in loudness measurement of MASA items)        | masaRenderer        | https://www.3gpp.org/ftp/TSG_SA/WG4_CODEC/TSGS4_122_Athens/Docs/S4-230221.zip         |
| MASA rendering (also used in loudness measurement of MASA items)        | masaRenderer, masaAnalyzer   | https://www.3gpp.org/ftp/TSG_SA/WG4_CODEC/TSGS4_122_Athens/Docs/S4-230221.zip         |
| EVS reference conditions        | EVS_cod, EVS_dec      | https://www.3gpp.org/ftp/Specs/archive/26_series/26.443/26443-h00.zip                                       |

The necessary binaries have to be either placed in the [ivas_processing_scripts/bin](./ivas_processing_scripts/bin) folder or the path has to be specified in 
@@ -661,6 +686,39 @@ ISM

---

# audiotools CLI for format conversion and rendering
# Audiotools CLI for format conversion and rendering

Please refer to [the notebook](./examples/audiotools.ipynb) for an overview.

# How to generate the configs and process items for the selection test experiments

The script `generate_test.py` is used to generate config files and process items for the selection test experiments:
```
usage: generate_test.py [-h] [--no_parallel] [--create_cfg_only] exp_lab_pairs [exp_lab_pairs ...]

Generate config files and process files for selecton experiments. Experiment names and lab ids must be given as comma-separated pairs (e.g. 'P800-5,b BS1534-4a,d ...')

positional arguments:
  exp_lab_pairs      The combinations of experiment/lab-id that you want to generate, separated by whitespace. Experiment and lab id need to be separated by a comma.

options:
  -h, --help         show this help message and exit
  --no_parallel      If given, configs will not be run in parallel
  --create_cfg_only  If given, only create the configs and folder structure without processing items
```
Before running the script, one needs to put the input files in the respective input folder (including the background noise files, see below). If input files are missing, the script will complain ad stop. For example, for processing tests P800-3 and BS1534-4a for labs b and d, respectively, command line would look like this (no whitespace between the commas!):
```
python3 generate_test.py P800-3,b BS1534-4a,d
```

Tests are processed separately per category and per lab (as some values in the configs are dependent on category and lab). For each experiment, a static base config is stored from which the actual configs are generated (identfied by the suffix `catX-lab_Y.yml`). For P800 tests, there are 6 categories each. The BS1534 experiments do not define categories, except for the MASA ones (BAS534-7a/b) - there one might mix FOA and HOA2 input material, so ther eare 2 categories for those in the scripts (category 1 for FOA, category 2 for HOA2). In `experiments/selection/` there is a folder structure prepared for all selection experiments, in which you have to put the input files for your test. For example, for P800-1: 
```
experiments/selection/P800-1/
├── background_noise    <--- put your background files in here and name them as background_noisecatX.wav. Not all experiments use background noise
├── config              <--- contains base config, generated configs will be stored here, too
│   ├── P800-1.yml
├── proc_input
│   ├── catX            <--- put your input files for cat X in here
└── proc_output         <--- collect your output from here, example subfolder below
│   ├── catX-lab_Y      <--- NOTE: this is only generated by the script and not checked in in the repository
```
 No newline at end of file
+41 −11
Original line number Diff line number Diff line
@@ -19,15 +19,21 @@
# delete_tmp: true
### Master seed for random processes like bitstream error pattern generation; default = 0
# master_seed: 5
### Additional seed to specify number of preruns (used for background noise delay and FER bitstream processing); default = 0
# prerun_seed: 2
### flag for linux to use windows-built binaries with wine: default = false
### this requires the wine binary to be available and will be ignored on windows
# use_windows_codec_binaries: true

### Any relative paths will be interpreted relative to the working directory the script is called from!
### Usage of absolute paths is recommended.
### Do not use file names with dots "." in them! This is not supported, use "_" instead
### For Windows user: please use double back slash '\\' in paths and add '.exe' to executable definitions
### Do not use "tmp_" in file or folder names ("temp_" is fine)
### For Windows user: please use double back slash '\\' in paths
### REQUIRED: Input path or file
input_path: ".../ivas/items/HOA3"
### REQUIRED: Output path or file
output_path: ".../tmp_output"
output_path: ".../temp_output"
### Metadata path or file(s)
### If input format is ISM{1-4} a path for the metadata files can be specified;
### default = null (for ISM search for item_name.{wav, raw, pcm}.{0-3}.csv in input folder, otherise ignored)
@@ -50,6 +56,10 @@ output_path: ".../tmp_output"
# input_select:
#  - "48kHz"

### Include the condition number in the item name; default = false
### for e.g. abcxyz.wav --> abcxyz.cXX.wav
# condition_in_output_filename: true

################################################
### Input configuration
################################################
@@ -58,6 +68,13 @@ input:
    fmt: "HOA3"
    ### Input sampling rate in Hz needed for headerless audio files; default = 48000
    # fs: 32000
    ### Enable check for input files being aligned to a integer multiple of a given length in ms.
    ### If a file is not aligned, a warning will be issued. If the input format has metadata or force is true, an error is raised instead.
    # aligned_to:
    ### alignment length in ms, is needed if aligned_to is used
    #     len: 20
    ### default: false
    #     force: true

################################################
### Pre-processing on individual items
@@ -74,7 +91,7 @@ input:
    ### Target loudness in LKFS; default = null (no loudness change applied)
    # loudness: -26
    ### Spatial audio format in which loudness is adjusted (only used if preprocessing loudness is not null);
    ### default = null (uses preprocessing fmt if possible)
    ### default = null (uses postprocessing fmt)
    # loudness_fmt: "BINAURAL"
    ### Pre-/post-trim individual signal(s) (ms) (negative values pad silence); default = 0
    # trim:
@@ -105,14 +122,21 @@ input:
    # preamble: 10000
    ### Flag wheter to use noise (amplitude +-4) for the preamble or silence; default = false (silence)
    # preamble_noise: true
    ### Specify postamble duration in ms. Postamble is added after concatenation and possible signal repetition. defaut = 0
    # postamble: 20
    ### Flag wheter to use noise (amplitude +-4) for the postamble or silence; default = false (silence)
    # postamble_noise: true
    ### Additive background noise
    # background_noise:
        ### REQUIRED: SNR for background noise in dB
        ### SNR for background noise in dB; REQUIRED for prerecorded background noise and ignored for low level noise
        # snr: 10
        ### REQUIRED: Path to background noise, must have same format and sampling rate as input signal(s)
        ### REQUIRED: Either background noise path or low level noise flag
        ### Path to background noise, must have same format and sampling rate as input signal(s); default = null
        # background_noise_path: ".../noise.wav"
        ### Seed for delay offest; default = 0
        # seed_delay: 10
        ### Flag for using low level [-4,+4] background noise; default = false
        # low_level_noise: true
    ### Flag for repeating the whole signal once and discarding the first half after processing
    # repeat_signal: true

#################################################
### Bitstream processing
@@ -139,8 +163,6 @@ input:
    # error_pattern: "path/pattern.192"
    ### Error rate in percent
    # error_rate: 5
    ### Additional seed to specify number of preruns; default = 0
    # prerun_seed: 2
    
################################################
### Configuration for conditions under test
@@ -209,6 +231,8 @@ conditions_to_generate:
      ### Bitstream options
      # tx:
          ### For possible arguments see overall bitstream modification
      ### Option to use SBA format of lower or same order (planar also possible) for SBA input formats
      # sba_fmt: "PLANARFOA"

  ### IVAS condition ###############################
  c07:
@@ -239,6 +263,8 @@ conditions_to_generate:
      ### Bitstream options
      # tx:
          ### For possible arguments see overall bitstream modification
      ### Option to use SBA format of lower or same order (planar also possible) for SBA input formats
      # sba_fmt: "PLANARFOA"
            
  ### EVS condition ################################
  c08:
@@ -251,7 +277,7 @@ conditions_to_generate:
          # - 9600
          - [13200, 13200, 8000, 13200, 9600]
      ### for multi-channel configs, code LFE with 9.6 kbps NB (as mandated by IVAS-3)
      evs_lfe_9k6bps_nb: true
      # evs_lfe_9k6bps_nb: true
      ### Encoder options
      cod:
          ### Path to encoder binary; default search for EVS_cod in bin folder (primary) and PATH (secondary)
@@ -267,6 +293,8 @@ conditions_to_generate:
      ### Bitstream options
      # tx:
          ### For possible arguments see overall bitstream modification
      ### Option to use SBA format of lower or same order (planar also possible) for SBA input formats
      # sba_fmt: "PLANARFOA"

################################################
### Post-processing
@@ -274,7 +302,9 @@ conditions_to_generate:
### Post-processing step performed after core processing for all conditions
### Post-processing is required and can not be omitted
postprocessing:
    ### REQUIRED: Target format for output
    ### REQUIRED: Target format for output, this can be a string as below, or a list, e.g. ["FOA", "BINAURAL"].
    ###           Conversion will be applied in order and the last format is the final output forma. This was introduced to 
    ###           accomodate for the MASA tests where masaRenderer is used as binaural renderer for all conditions.
    fmt: "BINAURAL"
    ### REQUIRED: Target sampling rate in Hz for resampling
    fs: 48000
+101 −0
Original line number Diff line number Diff line
---
################################################
# General configuration
################################################

name: BS1534-1a
master_seed: 5
prerun_seed: 2

input_path: "experiments/selection/BS1534-1a/proc_input"
output_path: "experiments/selection/BS1534-1a/proc_output"
use_windows_codec_binaries: true
condition_in_output_filename: true

################################################
### Input configuration
################################################
input:
    fmt: "STEREO"
    fs: 48000
    aligned_to:
        len: 20

################################################
### Pre-processing on individual items
################################################
preprocessing:
    mask: "20KBP"
    loudness: -26
    window: 100

################################################
### Pre-processing on whole signal(s)
################################################
preprocessing_2:
    concatenate_input: false
    preamble_noise: false
    postamble: 20
    postamble_noise: true
    repeat_signal: true

#################################################
### Bitstream processing
#################################################

################################################
### Configuration for conditions under test
################################################
conditions_to_generate:
    ### Reference and anchor conditions ##########################
    c01:
        type: ref
    c02:
        type: lp7k

    ### EVS condition ################################
    c03:
        type: evs
        bitrates:
            - 24400
        cod:
            opts: ["-max_band", "FB"]
        dec:
    c04:
        type: evs
        bitrates:
            - 32000
        cod:
            opts: ["-max_band", "FB"]
        dec:
    c05:
        type: evs
        bitrates:
            - 48000
        cod:
            opts: ["-max_band", "FB"]
        dec:

    ### IVAS condition ###############################
    c06:
        type: ivas
        bitrates:
            - 48000
        cod:
        dec:
            fmt: "STEREO"
    c07:
        type: ivas
        bitrates:
            - 64000
        cod:
        dec:
            fmt: "STEREO"

################################################
### Post-processing
################################################
postprocessing:
    fmt: "STEREO"
    fs: 48000
    loudness: -26
Loading