//#define FIX_1326_SPEEDUP_00 //make sqrt(1) a const - catch bitstreams //no occurence in current bitstream
//#define FIX_1326_SPEEDUP_01 // optimize matrixTransp1Mul_fx -> eig2x2_fx : rollout loop in mul, only 3 out of 4 results are needed - maybe a=b can also benefitcui