Implement all vendor intrinsics used by the simd-json crate · rust-lang/rust@8649731 (original) (raw)
`@@ -590,6 +590,44 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
`
590
590
`}
`
591
591
`}
`
592
592
``
``
593
`+
"llvm.x86.sse41.packusdw" => {
`
``
594
`+
`
``
595
`+
intrinsic_args!(fx, args => (a, b); intrinsic);
`
``
596
+
``
597
`+
assert_eq!(a.layout(), b.layout());
`
``
598
`+
let layout = a.layout();
`
``
599
+
``
600
`+
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
`
``
601
`+
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
`
``
602
`+
assert_eq!(lane_ty, fx.tcx.types.i32);
`
``
603
`+
assert_eq!(ret_lane_ty, fx.tcx.types.u16);
`
``
604
`+
assert_eq!(lane_count * 2, ret_lane_count);
`
``
605
+
``
606
`+
let min_u16 = fx.bcx.ins().iconst(types::I32, i64::from(u16::MIN));
`
``
607
`+
let max_u16 = fx.bcx.ins().iconst(types::I32, i64::from(u16::MAX));
`
``
608
`+
let ret_lane_layout = fx.layout_of(fx.tcx.types.u16);
`
``
609
+
``
610
`+
for idx in 0..lane_count {
`
``
611
`+
let lane = a.value_lane(fx, idx).load_scalar(fx);
`
``
612
`+
let sat = fx.bcx.ins().umax(lane, min_u16);
`
``
613
`+
let sat = fx.bcx.ins().umin(sat, max_u16);
`
``
614
`+
let res = fx.bcx.ins().ireduce(types::I16, sat);
`
``
615
+
``
616
`+
let res_lane = CValue::by_val(res, ret_lane_layout);
`
``
617
`+
ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
`
``
618
`+
}
`
``
619
+
``
620
`+
for idx in 0..lane_count {
`
``
621
`+
let lane = b.value_lane(fx, idx).load_scalar(fx);
`
``
622
`+
let sat = fx.bcx.ins().umax(lane, min_u16);
`
``
623
`+
let sat = fx.bcx.ins().umin(sat, max_u16);
`
``
624
`+
let res = fx.bcx.ins().ireduce(types::I16, sat);
`
``
625
+
``
626
`+
let res_lane = CValue::by_val(res, ret_lane_layout);
`
``
627
`+
ret.place_lane(fx, lane_count + idx).write_cvalue(fx, res_lane);
`
``
628
`+
}
`
``
629
`+
}
`
``
630
+
593
631
`"llvm.x86.avx2.packssdw" => {
`
594
632
`
595
633
`intrinsic_args!(fx, args => (a, b); intrinsic);
`
`@@ -648,6 +686,106 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
`
648
686
`}
`
649
687
`}
`
650
688
``
``
689
`+
"llvm.x86.pclmulqdq" => {
`
``
690
`+
`
``
691
`+
intrinsic_args!(fx, args => (a, b, imm8); intrinsic);
`
``
692
+
``
693
`+
assert_eq!(a.layout(), b.layout());
`
``
694
`+
let layout = a.layout();
`
``
695
+
``
696
`+
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
`
``
697
`+
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
`
``
698
`+
assert_eq!(lane_ty, fx.tcx.types.i64);
`
``
699
`+
assert_eq!(ret_lane_ty, fx.tcx.types.i64);
`
``
700
`+
assert_eq!(lane_count, 2);
`
``
701
`+
assert_eq!(ret_lane_count, 2);
`
``
702
+
``
703
`+
let imm8 = imm8.load_scalar(fx);
`
``
704
+
``
705
`+
let control0 = fx.bcx.ins().band_imm(imm8, 0b0000_0001);
`
``
706
`+
let a_lane0 = a.value_lane(fx, 0).load_scalar(fx);
`
``
707
`+
let a_lane1 = a.value_lane(fx, 1).load_scalar(fx);
`
``
708
`+
let temp1 = fx.bcx.ins().select(control0, a_lane1, a_lane0);
`
``
709
+
``
710
`+
let control4 = fx.bcx.ins().band_imm(imm8, 0b0001_0000);
`
``
711
`+
let b_lane0 = b.value_lane(fx, 0).load_scalar(fx);
`
``
712
`+
let b_lane1 = b.value_lane(fx, 1).load_scalar(fx);
`
``
713
`+
let temp2 = fx.bcx.ins().select(control4, b_lane1, b_lane0);
`
``
714
+
``
715
`+
fn extract_bit(fx: &mut FunctionCx<'_, '_, '_>, val: Value, bit: i64) -> Value {
`
``
716
`+
let tmp = fx.bcx.ins().ushr_imm(val, bit);
`
``
717
`+
fx.bcx.ins().band_imm(tmp, 1)
`
``
718
`+
}
`
``
719
+
``
720
`+
let mut res1 = fx.bcx.ins().iconst(types::I64, 0);
`
``
721
`+
for i in 0..=63 {
`
``
722
`+
let x = extract_bit(fx, temp1, 0);
`
``
723
`+
let y = extract_bit(fx, temp2, i);
`
``
724
`+
let mut temp = fx.bcx.ins().band(x, y);
`
``
725
`+
for j in 1..=i {
`
``
726
`+
let x = extract_bit(fx, temp1, j);
`
``
727
`+
let y = extract_bit(fx, temp2, i - j);
`
``
728
`+
let z = fx.bcx.ins().band(x, y);
`
``
729
`+
temp = fx.bcx.ins().bxor(temp, z);
`
``
730
`+
}
`
``
731
`+
let temp = fx.bcx.ins().ishl_imm(temp, i);
`
``
732
`+
res1 = fx.bcx.ins().bor(res1, temp);
`
``
733
`+
}
`
``
734
`+
ret.place_lane(fx, 0).to_ptr().store(fx, res1, MemFlags::trusted());
`
``
735
+
``
736
`+
let mut res2 = fx.bcx.ins().iconst(types::I64, 0);
`
``
737
`+
for i in 64..=127 {
`
``
738
`+
let mut temp = fx.bcx.ins().iconst(types::I64, 0);
`
``
739
`+
for j in i - 63..=63 {
`
``
740
`+
let x = extract_bit(fx, temp1, j);
`
``
741
`+
let y = extract_bit(fx, temp2, i - j);
`
``
742
`+
let z = fx.bcx.ins().band(x, y);
`
``
743
`+
temp = fx.bcx.ins().bxor(temp, z);
`
``
744
`+
}
`
``
745
`+
let temp = fx.bcx.ins().ishl_imm(temp, i);
`
``
746
`+
res2 = fx.bcx.ins().bor(res2, temp);
`
``
747
`+
}
`
``
748
`+
ret.place_lane(fx, 1).to_ptr().store(fx, res2, MemFlags::trusted());
`
``
749
`+
}
`
``
750
+
``
751
`+
"llvm.x86.avx.ptestz.256" => {
`
``
752
`+
`
``
753
`+
intrinsic_args!(fx, args => (a, b); intrinsic);
`
``
754
+
``
755
`+
assert_eq!(a.layout(), b.layout());
`
``
756
`+
let layout = a.layout();
`
``
757
+
``
758
`+
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
`
``
759
`+
assert_eq!(lane_ty, fx.tcx.types.i64);
`
``
760
`+
assert_eq!(ret.layout().ty, fx.tcx.types.i32);
`
``
761
`+
assert_eq!(lane_count, 4);
`
``
762
+
``
763
`+
let a_lane0 = a.value_lane(fx, 0).load_scalar(fx);
`
``
764
`+
let a_lane1 = a.value_lane(fx, 1).load_scalar(fx);
`
``
765
`+
let a_lane2 = a.value_lane(fx, 2).load_scalar(fx);
`
``
766
`+
let a_lane3 = a.value_lane(fx, 3).load_scalar(fx);
`
``
767
`+
let b_lane0 = b.value_lane(fx, 0).load_scalar(fx);
`
``
768
`+
let b_lane1 = b.value_lane(fx, 1).load_scalar(fx);
`
``
769
`+
let b_lane2 = b.value_lane(fx, 2).load_scalar(fx);
`
``
770
`+
let b_lane3 = b.value_lane(fx, 3).load_scalar(fx);
`
``
771
+
``
772
`+
let zero0 = fx.bcx.ins().band(a_lane0, b_lane0);
`
``
773
`+
let zero1 = fx.bcx.ins().band(a_lane1, b_lane1);
`
``
774
`+
let zero2 = fx.bcx.ins().band(a_lane2, b_lane2);
`
``
775
`+
let zero3 = fx.bcx.ins().band(a_lane3, b_lane3);
`
``
776
+
``
777
`+
let all_zero0 = fx.bcx.ins().bor(zero0, zero1);
`
``
778
`+
let all_zero1 = fx.bcx.ins().bor(zero2, zero3);
`
``
779
`+
let all_zero = fx.bcx.ins().bor(all_zero0, all_zero1);
`
``
780
+
``
781
`+
let res = fx.bcx.ins().icmp_imm(IntCC::Equal, all_zero, 0);
`
``
782
`+
let res = CValue::by_val(
`
``
783
`+
fx.bcx.ins().uextend(types::I32, res),
`
``
784
`+
fx.layout_of(fx.tcx.types.i32),
`
``
785
`+
);
`
``
786
`+
ret.write_cvalue(fx, res);
`
``
787
`+
}
`
``
788
+
651
789
` _ => {
`
652
790
` fx.tcx
`
653
791
`.sess
`