Implement all vendor intrinsics used by the fimg crate · rust-lang/rust@ecf79a3 (original) (raw)
`@@ -494,6 +494,160 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
`
494
494
`}
`
495
495
`}
`
496
496
``
``
497
`+
"llvm.x86.avx2.packuswb" => {
`
``
498
`+
`
``
499
`+
intrinsic_args!(fx, args => (a, b); intrinsic);
`
``
500
+
``
501
`+
assert_eq!(a.layout(), b.layout());
`
``
502
`+
let layout = a.layout();
`
``
503
+
``
504
`+
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
`
``
505
`+
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
`
``
506
`+
assert_eq!(lane_ty, fx.tcx.types.i16);
`
``
507
`+
assert_eq!(ret_lane_ty, fx.tcx.types.u8);
`
``
508
`+
assert_eq!(lane_count * 2, ret_lane_count);
`
``
509
+
``
510
`+
let zero = fx.bcx.ins().iconst(types::I16, 0);
`
``
511
`+
let max_u8 = fx.bcx.ins().iconst(types::I16, 255);
`
``
512
`+
let ret_lane_layout = fx.layout_of(fx.tcx.types.u8);
`
``
513
+
``
514
`+
for idx in 0..lane_count / 2 {
`
``
515
`+
let lane = a.value_lane(fx, idx).load_scalar(fx);
`
``
516
`+
let sat = fx.bcx.ins().smax(lane, zero);
`
``
517
`+
let sat = fx.bcx.ins().umin(sat, max_u8);
`
``
518
`+
let res = fx.bcx.ins().ireduce(types::I8, sat);
`
``
519
+
``
520
`+
let res_lane = CValue::by_val(res, ret_lane_layout);
`
``
521
`+
ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
`
``
522
`+
}
`
``
523
+
``
524
`+
for idx in 0..lane_count / 2 {
`
``
525
`+
let lane = b.value_lane(fx, idx).load_scalar(fx);
`
``
526
`+
let sat = fx.bcx.ins().smax(lane, zero);
`
``
527
`+
let sat = fx.bcx.ins().umin(sat, max_u8);
`
``
528
`+
let res = fx.bcx.ins().ireduce(types::I8, sat);
`
``
529
+
``
530
`+
let res_lane = CValue::by_val(res, ret_lane_layout);
`
``
531
`+
ret.place_lane(fx, lane_count / 2 + idx).write_cvalue(fx, res_lane);
`
``
532
`+
}
`
``
533
+
``
534
`+
for idx in 0..lane_count / 2 {
`
``
535
`+
let lane = a.value_lane(fx, idx).load_scalar(fx);
`
``
536
`+
let sat = fx.bcx.ins().smax(lane, zero);
`
``
537
`+
let sat = fx.bcx.ins().umin(sat, max_u8);
`
``
538
`+
let res = fx.bcx.ins().ireduce(types::I8, sat);
`
``
539
+
``
540
`+
let res_lane = CValue::by_val(res, ret_lane_layout);
`
``
541
`+
ret.place_lane(fx, lane_count / 2 * 2 + idx).write_cvalue(fx, res_lane);
`
``
542
`+
}
`
``
543
+
``
544
`+
for idx in 0..lane_count / 2 {
`
``
545
`+
let lane = b.value_lane(fx, idx).load_scalar(fx);
`
``
546
`+
let sat = fx.bcx.ins().smax(lane, zero);
`
``
547
`+
let sat = fx.bcx.ins().umin(sat, max_u8);
`
``
548
`+
let res = fx.bcx.ins().ireduce(types::I8, sat);
`
``
549
+
``
550
`+
let res_lane = CValue::by_val(res, ret_lane_layout);
`
``
551
`+
ret.place_lane(fx, lane_count / 2 * 3 + idx).write_cvalue(fx, res_lane);
`
``
552
`+
}
`
``
553
`+
}
`
``
554
+
``
555
`+
"llvm.x86.sse2.packssdw.128" => {
`
``
556
`+
`
``
557
`+
intrinsic_args!(fx, args => (a, b); intrinsic);
`
``
558
+
``
559
`+
assert_eq!(a.layout(), b.layout());
`
``
560
`+
let layout = a.layout();
`
``
561
+
``
562
`+
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
`
``
563
`+
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
`
``
564
`+
assert_eq!(lane_ty, fx.tcx.types.i32);
`
``
565
`+
assert_eq!(ret_lane_ty, fx.tcx.types.i16);
`
``
566
`+
assert_eq!(lane_count * 2, ret_lane_count);
`
``
567
+
``
568
`+
let min_i16 = fx.bcx.ins().iconst(types::I32, i64::from(i16::MIN as u16));
`
``
569
`+
let max_i16 = fx.bcx.ins().iconst(types::I32, i64::from(i16::MAX as u16));
`
``
570
`+
let ret_lane_layout = fx.layout_of(fx.tcx.types.i16);
`
``
571
+
``
572
`+
for idx in 0..lane_count {
`
``
573
`+
let lane = a.value_lane(fx, idx).load_scalar(fx);
`
``
574
`+
let sat = fx.bcx.ins().smax(lane, min_i16);
`
``
575
`+
let sat = fx.bcx.ins().umin(sat, max_i16);
`
``
576
`+
let res = fx.bcx.ins().ireduce(types::I16, sat);
`
``
577
+
``
578
`+
let res_lane = CValue::by_val(res, ret_lane_layout);
`
``
579
`+
ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
`
``
580
`+
}
`
``
581
+
``
582
`+
for idx in 0..lane_count {
`
``
583
`+
let lane = b.value_lane(fx, idx).load_scalar(fx);
`
``
584
`+
let sat = fx.bcx.ins().smax(lane, min_i16);
`
``
585
`+
let sat = fx.bcx.ins().umin(sat, max_i16);
`
``
586
`+
let res = fx.bcx.ins().ireduce(types::I16, sat);
`
``
587
+
``
588
`+
let res_lane = CValue::by_val(res, ret_lane_layout);
`
``
589
`+
ret.place_lane(fx, lane_count + idx).write_cvalue(fx, res_lane);
`
``
590
`+
}
`
``
591
`+
}
`
``
592
+
``
593
`+
"llvm.x86.avx2.packssdw" => {
`
``
594
`+
`
``
595
`+
intrinsic_args!(fx, args => (a, b); intrinsic);
`
``
596
+
``
597
`+
assert_eq!(a.layout(), b.layout());
`
``
598
`+
let layout = a.layout();
`
``
599
+
``
600
`+
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
`
``
601
`+
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
`
``
602
`+
assert_eq!(lane_ty, fx.tcx.types.i32);
`
``
603
`+
assert_eq!(ret_lane_ty, fx.tcx.types.i16);
`
``
604
`+
assert_eq!(lane_count * 2, ret_lane_count);
`
``
605
+
``
606
`+
let min_i16 = fx.bcx.ins().iconst(types::I32, i64::from(i16::MIN as u16));
`
``
607
`+
let max_i16 = fx.bcx.ins().iconst(types::I32, i64::from(i16::MAX as u16));
`
``
608
`+
let ret_lane_layout = fx.layout_of(fx.tcx.types.i16);
`
``
609
+
``
610
`+
for idx in 0..lane_count / 2 {
`
``
611
`+
let lane = a.value_lane(fx, idx).load_scalar(fx);
`
``
612
`+
let sat = fx.bcx.ins().smax(lane, min_i16);
`
``
613
`+
let sat = fx.bcx.ins().umin(sat, max_i16);
`
``
614
`+
let res = fx.bcx.ins().ireduce(types::I16, sat);
`
``
615
+
``
616
`+
let res_lane = CValue::by_val(res, ret_lane_layout);
`
``
617
`+
ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
`
``
618
`+
}
`
``
619
+
``
620
`+
for idx in 0..lane_count / 2 {
`
``
621
`+
let lane = b.value_lane(fx, idx).load_scalar(fx);
`
``
622
`+
let sat = fx.bcx.ins().smax(lane, min_i16);
`
``
623
`+
let sat = fx.bcx.ins().umin(sat, max_i16);
`
``
624
`+
let res = fx.bcx.ins().ireduce(types::I16, sat);
`
``
625
+
``
626
`+
let res_lane = CValue::by_val(res, ret_lane_layout);
`
``
627
`+
ret.place_lane(fx, lane_count / 2 + idx).write_cvalue(fx, res_lane);
`
``
628
`+
}
`
``
629
+
``
630
`+
for idx in 0..lane_count / 2 {
`
``
631
`+
let lane = a.value_lane(fx, idx).load_scalar(fx);
`
``
632
`+
let sat = fx.bcx.ins().smax(lane, min_i16);
`
``
633
`+
let sat = fx.bcx.ins().umin(sat, max_i16);
`
``
634
`+
let res = fx.bcx.ins().ireduce(types::I16, sat);
`
``
635
+
``
636
`+
let res_lane = CValue::by_val(res, ret_lane_layout);
`
``
637
`+
ret.place_lane(fx, lane_count / 2 * 2 + idx).write_cvalue(fx, res_lane);
`
``
638
`+
}
`
``
639
+
``
640
`+
for idx in 0..lane_count / 2 {
`
``
641
`+
let lane = b.value_lane(fx, idx).load_scalar(fx);
`
``
642
`+
let sat = fx.bcx.ins().smax(lane, min_i16);
`
``
643
`+
let sat = fx.bcx.ins().umin(sat, max_i16);
`
``
644
`+
let res = fx.bcx.ins().ireduce(types::I16, sat);
`
``
645
+
``
646
`+
let res_lane = CValue::by_val(res, ret_lane_layout);
`
``
647
`+
ret.place_lane(fx, lane_count / 2 * 3 + idx).write_cvalue(fx, res_lane);
`
``
648
`+
}
`
``
649
`+
}
`
``
650
+
497
651
` _ => {
`
498
652
` fx.tcx
`
499
653
`.sess
`