Commit 8e6a4aae authored by Tapani Pihlajakuja's avatar Tapani Pihlajakuja
Browse files

Implements LTV regression test based on the MR version mostly via copy paste.

parent 05ce3fbb
Loading
Loading
Loading
Loading
Loading
+242 −0
Original line number Diff line number Diff line
@@ -119,6 +119,7 @@ variables:
      - 'long-term-logs'
      - 'backup-long-term-logs'
      - 'test-long-self-test'
      - 'check-regressions-long'
      - 'ivas-conformance-linux'

# This sets when pipelines are created. Jobs have more specific rules to restrict them.
@@ -186,6 +187,9 @@ workflow:
    - if: $CI_PIPELINE_SOURCE == 'web' && $MANUAL_PIPELINE_TYPE == 'test-long-self-test'
      variables:
        IVAS_PIPELINE_NAME: 'Test long self-test against main pipeline: $CI_COMMIT_BRANCH'
    - if: $CI_PIPELINE_SOURCE == 'web' && $MANUAL_PIPELINE_TYPE == 'check-regressions-long'
      variables:
        IVAS_PIPELINE_NAME: 'LTV regression check: $CI_COMMIT_BRANCH'
    - if: $CI_PIPELINE_SOURCE == 'web' && $MANUAL_PIPELINE_TYPE == 'ivas-conformance-linux'
      variables:
        IVAS_PIPELINE_NAME: 'IVAS Conformance Linux: $CI_COMMIT_BRANCH'
@@ -208,6 +212,8 @@ workflow:
  rules:
    - if: $CI_PIPELINE_SOURCE == 'web' && $MANUAL_PIPELINE_TYPE == "ivas-conformance-linux"
      when: never
    - if: $CI_PIPELINE_SOURCE == 'web' && $MANUAL_PIPELINE_TYPE == "check-regressions-long"
      when: never
    - if: $CI_PIPELINE_SOURCE == 'web'
    - if: $CI_PIPELINE_SOURCE == 'schedule'
    - if: $CI_PIPELINE_SOURCE == 'push'
@@ -257,6 +263,14 @@ workflow:
    - if: $CI_PIPELINE_SOURCE == 'merge_request_event'
      when: never

.rules-check-regressions-long:
  rules:
    - if: $CI_PIPELINE_SOURCE == 'web' && $MANUAL_PIPELINE_TYPE == "check-regressions-long"
    - if: $CI_PIPELINE_SOURCE == 'push'
      when: never
    - if: $CI_PIPELINE_SOURCE == 'merge_request_event'
      when: never

.rules-coverage:
  rules:
    - if: $COVERAGE_TEST # Set by scheduled pipeline
@@ -713,6 +727,136 @@ workflow:
        - $XML_REPORT_BRANCH
        - $XML_REPORT_MAIN

.check-regressions-ltv-job:
  extends:
    - .test-job-linux
    - .rules-check-regressions-long
  stage: test
  needs: ["build-codec-linux-clang-make", "check-regressions-long-pre"]
  timeout: "900 minutes"
  variables:
    XML_REPORT_BRANCH: "report-junit-branch-$CI_JOB_NAME--sha-$CI_COMMIT_SHORT_SHA.xml"
    XML_REPORT_MAIN: "report-junit-main-$CI_JOB_NAME--sha-$CI_COMMIT_SHORT_SHA.xml"
    HTML_REPORT_BRANCH: "report-junit-branch-$CI_JOB_NAME--sha-$CI_COMMIT_SHORT_SHA.html"
    HTML_REPORT_MAIN: "report-junit-main-$CI_JOB_NAME--sha-$CI_COMMIT_SHORT_SHA.html"
    CSV_BRANCH: "scores-branch-$CI_JOB_NAME--sha-$CI_COMMIT_SHORT_SHA.csv"
    CSV_MAIN: "scores-main-$CI_JOB_NAME--sha-$CI_COMMIT_SHORT_SHA.csv"
    IMAGES_ARTIFACT_NAME: "images_$CI_JOB_NAME"
    SUMMARY_HTML_ARTIFACT_NAME: "summary_$CI_JOB_NAME.html"
    USE_LTV: 1
  before_script:
    - !reference [ .test-job-linux, before_script ]
    - rm -rf tests/dut tests/ref
    - if [ "$DISABLE_HRTF" = "true" ]; then
    -   export PYTEST_ADDOPTS="${PYTEST_ADDOPTS} -k 'not model'"
    - fi
    - bash "${CI_PROJECT_DIR}"/ivas-codec-ci/snippets/basop/merge-request-print-pinned-commits.sh
  script:
    - bash "${CI_PROJECT_DIR}"/ivas-codec-ci/snippets/print-common-info.sh

    # create empty files for all artifacts to suppress warnings in case of no regressions found or all is BE
    - touch $XML_REPORT_BRANCH $XML_REPORT_MAIN $HTML_REPORT_BRANCH $HTML_REPORT_MAIN $CSV_BRANCH $CSV_MAIN $SUMMARY_HTML_ARTIFACT_NAME $FLOAT_REF_COMMIT_FILE $CUT_COMMIT_FILE $MERGE_TARGET_COMMIT_FILE $MERGE_SOURCE_FLOAT_REF_COMMIT_FILE regressions_crashes.csv regressions_MLD.csv regressions_MAX_ABS_DIFF.csv regressions_MIN_SSNR.csv regressions_MIN_ODG.csv improvements_crashes.csv improvements_MLD.csv improvements_MAX_ABS_DIFF.csv improvements_MIN_SSNR.csv improvements_MIN_ODG.csv
    - mkdir $IMAGES_ARTIFACT_NAME

    - set -euxo pipefail

    - bash "${CI_PROJECT_DIR}"/ivas-codec-ci/snippets/update-ltv-repo.sh
    - bash "${CI_PROJECT_DIR}"/ivas-codec-ci/snippets/copy-ltv-files-to-testv-dir.sh
    - testcase_timeout=$TESTCASE_TIMEOUT_LTV

    - if [ $LEVEL_SCALING != "1.0" ];then
    -   bash "${CI_PROJECT_DIR}"/ivas-codec-ci/snippets/basop/apply-testv-scaling.sh
    - fi

    - comp_args="--mld --ssnr --odg --mld-playback-level $MLD_PLAYBACK_LEVEL"

  # build merge target branch and correpsonding float reference
    - bash "${CI_PROJECT_DIR}"/ivas-codec-ci/snippets/basop/build-binaries.sh float-ref
    - bash "${CI_PROJECT_DIR}"/ivas-codec-ci/snippets/basop/build-binaries.sh merge-target
  # build to-be-merged branch and corresponding float ref branch
    - bash "${CI_PROJECT_DIR}"/ivas-codec-ci/snippets/basop/build-binaries.sh dut
    - bash "${CI_PROJECT_DIR}"/ivas-codec-ci/snippets/basop/build-binaries.sh float-ref-merge-source

  # set pytest args for ref creation
    - enc_stats_arg=""
    - if [ "$ENCODER_TEST" = "true" ]; then
    -   enc_stats_arg="--enc_stats"
    - fi

  ### ----- run pytest for branch (dut) first -----
  # create float reference outputs with corresponding branch
    - python3 -m pytest $TEST_SUITE --update_ref 1 $enc_stats_arg --create_ref -n auto --ref_encoder_path $MERGE_SOURCE_FLOAT_REF_ENCODER_PATH --ref_decoder_path $MERGE_SOURCE_FLOAT_REF_DECODER_PATH || exit_code=$?
  # create dut outputs
    - python3 -m pytest --tb=no -q $TEST_SUITE --keep_files --create_cut --html=$HTML_REPORT_BRANCH --self-contained-html --junit-xml=$XML_REPORT_BRANCH $comp_args --dut_encoder_path $DUT_ENCODER_PATH --dut_decoder_path $DUT_DECODER_PATH -n auto --testcase_timeout $testcase_timeout || true
    - zero_errors_branch=$(cat $XML_REPORT_BRANCH | grep -c 'errors="0"') || true
    - python3 scripts/parse_xml_report.py $XML_REPORT_BRANCH $CSV_BRANCH
  # Store branch outputs for later comparison
    - mv tests/dut tests/dut_branch
  # create the summary based on the branch only
    - python3 scripts/create_histograms.py $CSV_BRANCH $IMAGES_ARTIFACT_NAME --measures $MEASURES_FOR_REPORT
    - python3 ci/basop-pages/create_summary_page.py $SUMMARY_HTML_ARTIFACT_NAME $CI_JOB_ID $CI_JOB_NAME $IMAGES_ARTIFACT_NAME --measures $MEASURES_FOR_REPORT

  ### ----- run pytest for merge target now -----
  # create float reference outputs with corresponding branch
    - python3 -m pytest $TEST_SUITE --update_ref 1 $enc_stats_arg --create_ref -n auto --ref_encoder_path $REF_ENCODER_PATH --ref_decoder_path $REF_DECODER_PATH || exit_code=$?
  # create merge-target outputs
    - python3 -m pytest --tb=no -q $TEST_SUITE --keep_files --create_cut --html=$HTML_REPORT_MAIN --self-contained-html --junit-xml=$XML_REPORT_MAIN $comp_args --dut_encoder_path $MERGE_TARGET_ENCODER_PATH --dut_decoder_path $MERGE_TARGET_DECODER_PATH -n auto --testcase_timeout $testcase_timeout || true
    - python3 scripts/parse_xml_report.py $XML_REPORT_MAIN $CSV_MAIN

  ### compare the two csv files for regressions
    - regressions_found=0
    - python3 scripts/basop_check_for_changes_in_testcases.py --show_improvements --xml_report $XML_REPORT_BRANCH $CSV_BRANCH $CSV_MAIN > regression_log.txt || regressions_found=$?

    - exit_code=0
    - bash "${CI_PROJECT_DIR}"/ivas-codec-ci/snippets/basop/print-results-banner.sh
    - if [ $zero_errors_branch != 1 ]; then
    -   echo "Run errors encountered!"
    -   exit_code=$EXIT_CODE_FAIL
    - elif [ $regressions_found != 0 ]; then
    -    cat regression_log.txt
    -    echo "Detected regressions wrt to basop main"
    -    exit_code=$EXIT_CODE_NON_BE
    - fi

    - exit $exit_code
  after_script:
    - rm -rf tests/dut tests/ref
  allow_failure:
    exit_codes:
      - 123
  artifacts:
    name: "$CI_JOB_NAME--sha-$CI_COMMIT_SHORT_SHA--results"
    expire_in: 1 week
    when: always
    paths:
      - $XML_REPORT_BRANCH
      - $XML_REPORT_MAIN
      - $HTML_REPORT_BRANCH
      - $HTML_REPORT_MAIN
      - $CSV_BRANCH
      - $CSV_MAIN
      - $SUMMARY_HTML_ARTIFACT_NAME
      - $IMAGES_ARTIFACT_NAME
      - $FLOAT_REF_COMMIT_FILE
      - $CUT_COMMIT_FILE
      - $MERGE_TARGET_COMMIT_FILE
      - $MERGE_SOURCE_FLOAT_REF_COMMIT_FILE
      - regressions_crashes.csv
      - regressions_MLD.csv
      - regressions_MAX_ABS_DIFF.csv
      - regressions_MIN_SSNR.csv
      - regressions_MIN_ODG.csv
      - improvements_crashes.csv
      - improvements_MLD.csv
      - improvements_MAX_ABS_DIFF.csv
      - improvements_MIN_SSNR.csv
      - improvements_MIN_ODG.csv
    expose_as: "LTV regression test results"
    reports:
      junit:
        - $XML_REPORT_BRANCH
        - $XML_REPORT_MAIN

.ivas-pytest-compare-to-input-anchor: &ivas-pytest-compare-to-input-anchor
  stage: test
  needs: ["build-codec-linux-clang-make", "pytest-to-ref-pre"]
@@ -908,6 +1052,32 @@ pytest-to-ref-pre:
    reports:
      dotenv: commits.env


# This job replicates the functionality of branch-is-up-to-date-with-target-pre job from MR pipeline
# to run in manual pipeline for long regression testing. Note that we are using merge related commit
# names although this not part of MR pipeline. This helps sharing code now but could be improved later.
check-regressions-long-pre:
  extends:
    - .job-linux
    - .rules-check-regressions-long
  stage: prevalidate
  needs: []
  script:
    - bash "${CI_PROJECT_DIR}"/ivas-codec-ci/snippets/print-common-info.sh
    - bash "${CI_PROJECT_DIR}"/ivas-codec-ci/snippets/basop/update-scripts-repo.sh
    # This writes the values to the non-existing MR variables to share the script.
    - export CI_MERGE_REQUEST_SOURCE_BRANCH_NAME="$CI_COMMIT_REF_NAME"
    - export CI_MERGE_REQUEST_TARGET_BRANCH_NAME="main"
    - source "${CI_PROJECT_DIR}"/ivas-codec-ci/snippets/basop/branch-is-up-to-date-with-target-pre.sh
  artifacts:
    paths:
      - $MERGE_TARGET_COMMIT_FILE
      - $FLOAT_REF_COMMIT_FILE
      - $MERGE_SOURCE_FLOAT_REF_COMMIT_FILE
    reports:
      dotenv: commits.env


# ---------------------------------------------------------------
# Test jobs for merge requests
# ---------------------------------------------------------------
@@ -1082,6 +1252,78 @@ check-regressions-short-dec--10db:
    MERGE_TARGET_ENCODER_PATH: "$REF_ENCODER_PATH"
    USE_LTV: 0

check-regressions-long-enc-0db:
  extends:
    - .check-regressions-ltv-job
  resource_group: "long-regression-enc"
  variables:
    TEST_SUITE: "$LONG_TEST_SUITE_ENCODER"
    # overwrite decoder with float reference one
    DUT_DECODER_PATH: "$MERGE_SOURCE_FLOAT_REF_DECODER_PATH_FOR_BUILD_DO_NOT_MODIFY"
    MERGE_TARGET_DECODER_PATH: "$REF_DECODER_PATH"

check-regressions-long-enc-+10db:
  extends:
    - .check-regressions-ltv-job
  resource_group: "long-regression-enc"
  variables:
    TEST_SUITE: "$LONG_TEST_SUITE_ENCODER"
    # +10dB
    LEVEL_SCALING: "3.162"
    MLD_PLAYBACK_LEVEL: "82.0"
    # overwrite decoder with float reference one
    DUT_DECODER_PATH: "$MERGE_SOURCE_FLOAT_REF_DECODER_PATH_FOR_BUILD_DO_NOT_MODIFY"
    MERGE_TARGET_DECODER_PATH: "$REF_DECODER_PATH"

check-regressions-long-enc--10db:
  extends:
    - .check-regressions-ltv-job
  resource_group: "long-regression-enc"
  variables:
    TEST_SUITE: "$LONG_TEST_SUITE_ENCODER"
    # -10dB
    LEVEL_SCALING: "0.3162"
    MLD_PLAYBACK_LEVEL: "102.0"
    # overwrite decoder with float reference one
    DUT_DECODER_PATH: "$MERGE_SOURCE_FLOAT_REF_DECODER_PATH_FOR_BUILD_DO_NOT_MODIFY"
    MERGE_TARGET_DECODER_PATH: "$REF_DECODER_PATH"

check-regressions-long-dec-0db:
  extends:
    - .check-regressions-ltv-job
  resource_group: "long-regression-dec"
  variables:
    TEST_SUITE: "$LONG_TEST_SUITE_NO_RENDERER"
    # overwrite encoder with float reference one
    DUT_ENCODER_PATH: "$MERGE_SOURCE_FLOAT_REF_ENCODER_PATH_FOR_BUILD_DO_NOT_MODIFY"
    MERGE_TARGET_ENCODER_PATH: "$REF_ENCODER_PATH"

check-regressions-long-dec-+10db:
  extends:
    - .check-regressions-ltv-job
  resource_group: "long-regression-dec"
  variables:
    TEST_SUITE: "$LONG_TEST_SUITE_NO_RENDERER"
    # +10dB
    LEVEL_SCALING: "3.162"
    MLD_PLAYBACK_LEVEL: "82.0"
    # overwrite encoder with float reference one
    DUT_ENCODER_PATH: "$MERGE_SOURCE_FLOAT_REF_ENCODER_PATH_FOR_BUILD_DO_NOT_MODIFY"
    MERGE_TARGET_ENCODER_PATH: "$REF_ENCODER_PATH"

check-regressions-long-dec--10db:
  extends:
    - .check-regressions-ltv-job
  resource_group: "long-regression-dec"
  variables:
    TEST_SUITE: "$LONG_TEST_SUITE_NO_RENDERER"
    # -10dB
    LEVEL_SCALING: "0.3162"
    MLD_PLAYBACK_LEVEL: "102.0"
    # overwrite encoder with float reference one
    DUT_ENCODER_PATH: "$MERGE_SOURCE_FLOAT_REF_ENCODER_PATH_FOR_BUILD_DO_NOT_MODIFY"
    MERGE_TARGET_ENCODER_PATH: "$REF_ENCODER_PATH"

### jobs that compare bitexactness between merge target and source
renderer-pytest-on-merge-request:
  extends: