| //======--- SPUMathInst.td - Cell SPU math operations -*- tablegen -*---======// |
| // |
| // Cell SPU math operations |
| // |
| // This target description file contains instruction sequences for various |
| // math operations, such as vector multiplies, i32 multiply, etc., for the |
| // SPU's i32, i16 i8 and corresponding vector types. |
| // |
| // Any resemblance to libsimdmath or the Cell SDK simdmath library is |
| // purely and completely coincidental. |
| //===----------------------------------------------------------------------===// |
| |
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| // v16i8 multiply instruction sequence: |
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| |
| def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)), |
| (ORv4i32 |
| (ANDv4i32 |
| (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB), |
| (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8), |
| (ROTMAHIv8i16 VECREG:$rB, 8)), 8), |
| (FSMBIv8i16 0x2222)), |
| (ILAv4i32 0x0000ffff)), |
| (SHLIv4i32 |
| (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16), |
| (ROTMAIv4i32_i32 VECREG:$rB, 16)), |
| (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8), |
| (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8), |
| (FSMBIv8i16 0x2222)), 16))>; |
| |
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| // v8i16 multiply instruction sequence: |
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| |
| def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)), |
| (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB), |
| (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16), |
| (FSMBIv8i16 0xcccc))>; |
| |
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| // v4i32, v2i32, i32 multiply instruction sequence: |
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| |
| def MPYv4i32: |
| Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)), |
| (Av4i32 |
| (v4i32 (Av4i32 (v4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB)), |
| (v4i32 (MPYHv4i32 VECREG:$rB, VECREG:$rA)))), |
| (v4i32 (MPYUv4i32 VECREG:$rA, VECREG:$rB)))>; |
| |
| def MPYv2i32: |
| Pat<(mul (v2i32 VECREG:$rA), (v2i32 VECREG:$rB)), |
| (Av2i32 |
| (v2i32 (Av2i32 (v2i32 (MPYHv2i32 VECREG:$rA, VECREG:$rB)), |
| (v2i32 (MPYHv2i32 VECREG:$rB, VECREG:$rA)))), |
| (v2i32 (MPYUv2i32 VECREG:$rA, VECREG:$rB)))>; |
| |
| |
| def MPYi32: |
| Pat<(mul R32C:$rA, R32C:$rB), |
| (Ar32 |
| (Ar32 (MPYHr32 R32C:$rA, R32C:$rB), |
| (MPYHr32 R32C:$rB, R32C:$rA)), |
| (MPYUr32 R32C:$rA, R32C:$rB))>; |
| |
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| // f32, v4f32 divide instruction sequence: |
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |
| |
| // Reciprocal estimate and interpolation |
| def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>; |
| // Division estimate |
| def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>; |
| // Newton-Raphson iteration |
| def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA), |
| Interpf32.Fragment, |
| DivEstf32.Fragment)>; |
| // Epsilon addition |
| def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>; |
| |
| def : Pat<(fdiv R32FP:$rA, R32FP:$rB), |
| (SELBf32_cond NRaphf32.Fragment, |
| Epsilonf32.Fragment, |
| (CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>; |
| |
| // Reciprocal estimate and interpolation |
| def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>; |
| // Division estimate |
| def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>; |
| // Newton-Raphson iteration |
| def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment, |
| (v4f32 VECREG:$rB), |
| (v4f32 VECREG:$rA)), |
| Interpv4f32.Fragment, |
| DivEstv4f32.Fragment)>; |
| // Epsilon addition |
| def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>; |
| |
| def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)), |
| (SELBv4f32_cond NRaphv4f32.Fragment, |
| Epsilonv4f32.Fragment, |
| (CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB), |
| Epsilonv4f32.Fragment, |
| (v4f32 VECREG:$rA)), -1))>; |