--- vc1_block-good.s 2020-09-18 09:44:54.611070615 +0300 +++ vc1_block-bad.s 2020-09-18 09:44:42.735341627 +0300 @@ -651,7 +651,7 @@ ldrsh w11, [x13, #2] ldrsh w29, [x13] tbz w11, #31, .LBB1_33 -// %bb.32: // %if.then.i72 +// %bb.32: // %if.then.i69 // in Loop: Header=BB1_28 Depth=2 add w10, w10, #9 // =9 cmp w9, w10 @@ -975,7 +975,7 @@ ldrb w11, [x9, x12] cmp w14, w11 b.ne .LBB1_71 -// %bb.70: // %if.then.i145 +// %bb.70: // %if.then.i142 // in Loop: Header=BB1_68 Depth=3 ldrb w11, [x9, x10] .LBB1_71: // %vc1_coded_block_pred.exit @@ -1017,7 +1017,7 @@ cmp x23, #3 // =3 lsl w15, w3, w13 b.hi .LBB1_76 -// %bb.73: // %if.then.i46 +// %bb.73: // %if.then.i44 // in Loop: Header=BB1_68 Depth=3 ldr w0, [x19, #8560] ldr x13, [x19, #8544] @@ -1460,7 +1460,7 @@ // in Loop: Header=BB1_68 Depth=3 add w17, w11, w10 subs w16, w17, #2 // =2 - b.ge .LBB1_124 + b.hs .LBB1_124 .LBB1_123: // %cleanup334.thread.i // in Loop: Header=BB1_68 Depth=3 ldr x24, [sp, #152] // 8-byte Folded Reload @@ -1504,7 +1504,7 @@ ldr w29, [sp, #180] // 4-byte Folded Reload add w11, w11, w10 subs w10, w11, #2 // =2 - b.lt .LBB1_67 + b.lo .LBB1_67 // %bb.129: // %if.end375.i // in Loop: Header=BB1_68 Depth=3 cbz w15, .LBB1_148 @@ -2678,7 +2678,7 @@ subs w16, w16, w14 cneg w23, w16, mi cmp w27, w23 - csel w14, w15, w14, gt + csel w14, w15, w14, hi add w9, w14, w9 strh w9, [x13] mul w9, w9, w10 @@ -2687,7 +2687,7 @@ ldrb w9, [x29, #388] lsl x8, x8, #4 mov x10, #-16 - csneg x10, x10, x8, le + csneg x10, x10, x8, ls add x8, x12, x2, lsl #5 add x13, x8, x10, lsl #1 add w28, w9, w11, lsl #1 @@ -2702,7 +2702,7 @@ cmp w27, w23 mov w25, #1 str x2, [sp, #72] // 8-byte Folded Spill - csel x9, x10, x9, gt + csel x9, x10, x9, hi cmp w8, #0 // =0 ldr x8, [sp, #8] // 8-byte Folded Reload csel x29, x8, x9, eq @@ -2748,12 +2748,12 @@ add x9, x13, #16 // =16 cmp w27, w23 add x10, x12, x2, lsl #5 - csel x9, x9, x13, gt + csel x9, x9, x13, hi ldp x13, x12, [sp, #32] // 16-byte Folded Reload ldr q0, [x9] add x10, x10, #16 // =16 - csel x8, x10, x8, gt - csel x12, x13, x12, gt + csel x8, x10, x8, hi + csel x12, x13, x12, hi ldr w10, [x12] str q0, [x8] ldrh w12, [x9, #2] @@ -2942,14 +2942,14 @@ // %bb.86: // %if.then116.i // in Loop: Header=BB2_23 Depth=3 cmp w27, w23 - csel x10, x4, x3, gt + csel x10, x4, x3, hi ldr x11, [sp, #56] // 8-byte Folded Reload ldr w10, [x10] madd x9, x21, x16, x5 add x9, x9, x26, lsl #7 add x8, x11, #16 // =16 lsl w12, w14, w10 - csel x8, x8, x11, gt + csel x8, x8, x11, hi sbfiz x12, x12, #1, #32 ldrh w11, [x8, #2] ldrh w13, [x9, x12] @@ -9547,12 +9547,12 @@ str w8, [x19, #13200] csel w8, w12, w11, lt b .LBB4_6 -.LBB4_4: // %sw.epilog.thread766 +.LBB4_4: // %sw.epilog.thread765 mov w8, #4 str w8, [x19, #13200] mov w8, #5 b .LBB4_6 -.LBB4_5: // %sw.epilog.thread765 +.LBB4_5: // %sw.epilog.thread764 str wzr, [x19, #13200] .LBB4_6: // %sw.epilog15.sink.split str w8, [x19, #13204] @@ -10690,7 +10690,7 @@ ubfx w25, w9, #7, #1 str w8, [x19, #8560] cbz w25, .LBB4_99 -.LBB4_94: // %if.then405.i560 +.LBB4_94: // %if.then405.i559 // in Loop: Header=BB4_13 Depth=2 ldr x8, [x19, #2448] str w24, [sp, #60] // 4-byte Folded Spill @@ -10703,7 +10703,7 @@ mov w1, #24 add x2, x2, .L.str.12 bl av_log -.LBB4_96: // %if.end409.i564 +.LBB4_96: // %if.end409.i563 // in Loop: Header=BB4_13 Depth=2 ldr x15, [x19, #1536] ldrsw x14, [x19, #6736] @@ -10737,7 +10737,7 @@ lsl w11, w12, w11 str w10, [x19, #8560] tbz w11, #7, .LBB4_144 -// %bb.100: // %decode012.exit.i595 +// %bb.100: // %decode012.exit.i594 // in Loop: Header=BB4_13 Depth=2 mov w11, w10 lsr x11, x11, #3 @@ -11191,7 +11191,7 @@ mov w26, #1 str w11, [x14] b .LBB4_66 -.LBB4_144: // %sw.bb.i596 +.LBB4_144: // %sw.bb.i595 // in Loop: Header=BB4_13 Depth=2 ldr x11, [sp, #72] // 8-byte Folded Reload ldrsh w11, [x11, #82] @@ -14485,7 +14485,7 @@ mov w17, #71 csel w14, w17, w14, eq tbz w15, #31, .LBB4_596 -// %bb.457: // %if.then.i.i.i638 +// %bb.457: // %if.then.i.i.i637 // in Loop: Header=BB4_13 Depth=2 add w16, w16, #9 // =9 cmp w8, w16 @@ -15648,7 +15648,7 @@ // in Loop: Header=BB4_13 Depth=2 ldrb w24, [x29] b .LBB4_511 -.LBB4_595: // %if.then39.i.i.i656 +.LBB4_595: // %if.then39.i.i.i655 // in Loop: Header=BB4_13 Depth=2 sub w15, w16, w15 cmp w8, w15 @@ -15666,7 +15666,7 @@ ldrsh w13, [x12] ldrsh w15, [x12, #2] // kill: def $w16 killed $w16 killed $x16 def $x16 -.LBB4_596: // %get_vlc2.exit.i.i663 +.LBB4_596: // %get_vlc2.exit.i.i662 // in Loop: Header=BB4_13 Depth=2 add w12, w15, w16 cmp w8, w12 @@ -15675,7 +15675,7 @@ ldr x14, [sp, #64] // 8-byte Folded Reload str w12, [x14] b.ne .LBB4_599 -// %bb.597: // %if.then6.i.i688 +// %bb.597: // %if.then6.i.i687 // in Loop: Header=BB4_13 Depth=2 ldr w13, [x19, #12856] mov w11, w12 @@ -15707,16 +15707,16 @@ lsr w3, w10, w11 str w8, [x19, #8560] cbz w9, .LBB4_606 -// %bb.598: // %if.then11.i.i690 +// %bb.598: // %if.then11.i.i689 // in Loop: Header=BB4_13 Depth=2 and w8, w3, #0x1 add w8, w8, w3 asr w3, w8, #1 b .LBB4_606 -.LBB4_599: // %do.body.i.i692 +.LBB4_599: // %do.body.i.i691 // in Loop: Header=BB4_13 Depth=2 b.ge .LBB4_651 -// %bb.600: // %do.end.i.i697 +// %bb.600: // %do.end.i.i696 // in Loop: Header=BB4_13 Depth=2 add w14, w13, #1 // =1 mov w13, #36409 @@ -15728,7 +15728,7 @@ add w15, w13, w13, lsl #3 subs w14, w14, w15 b.eq .LBB4_602 -// %bb.601: // %if.then27.i.i718 +// %bb.601: // %if.then27.i.i717 // in Loop: Header=BB4_13 Depth=2 and x15, x11, #0x1 adrp x1, offset_table @@ -15760,11 +15760,11 @@ b .LBB4_603 .LBB4_602: // in Loop: Header=BB4_13 Depth=2 mov w2, wzr -.LBB4_603: // %if.end38.i.i722 +.LBB4_603: // %if.end38.i.i721 // in Loop: Header=BB4_13 Depth=2 cmp w13, w9 b.le .LBB4_605 -// %bb.604: // %if.then43.i.i745 +// %bb.604: // %if.then43.i.i744 // in Loop: Header=BB4_13 Depth=2 asr w9, w13, w9 mov w13, w12 @@ -17004,10 +17004,10 @@ cbz w3, .LBB6_6 // %bb.3: // %if.then39 add x17, x13, w15, sxtw - ldursb w17, [x17, #-1] - cmp w17, #0 // =0 - cneg w17, w17, mi - cbz w17, .LBB6_6 + ldursb w6, [x17, #-1] + cmp w6, #0 // =0 + cneg w17, w6, mi + cbz w6, .LBB6_6 // %bb.4: // %if.then39 cmp w17, w12 b.eq .LBB6_6 @@ -17030,10 +17030,10 @@ mov w17, w15 sxtw x17, w17 sub x17, x17, x14 - ldrsb w17, [x13, x17] - cmp w17, #0 // =0 - cneg w17, w17, mi - cbz w17, .LBB6_11 + ldrsb w6, [x13, x17] + cmp w6, #0 // =0 + cneg w17, w6, mi + cbz w6, .LBB6_11 // %bb.9: // %if.then88 cmp w17, w12 b.eq .LBB6_11 @@ -17060,10 +17060,10 @@ csel w14, wzr, w14, eq sub w14, w17, w14 add w14, w14, w15 - ldrsb w13, [x13, w14, sxtw] - cmp w13, #0 // =0 - cneg w13, w13, mi - cbz w13, .LBB6_17 + ldrsb w14, [x13, w14, sxtw] + cmp w14, #0 // =0 + cneg w13, w14, mi + cbz w14, .LBB6_17 // %bb.15: // %if.then140 cmp w13, w12 b.eq .LBB6_17 @@ -17086,7 +17086,7 @@ subs w10, w10, w9 cneg w10, w10, mi cmp w11, w10 - b.gt .LBB6_23 + b.hi .LBB6_23 .LBB6_20: mov w10, #1 mov w0, w9 @@ -17699,7 +17699,7 @@ ldrsh w12, [x10, #2] ldrsh w10, [x10] tbz w12, #31, .LBB8_9 -// %bb.5: // %if.then.i51 +// %bb.5: // %if.then.i50 add w13, w13, #9 // =9 cmp w9, w13 csel w13, w13, w9, hi @@ -17720,7 +17720,7 @@ // kill: def $w13 killed $w13 killed $x13 def $x13 mov w12, w14 b .LBB8_9 -.LBB8_7: // %if.then39.i69 +.LBB8_7: // %if.then39.i68 sub w12, w13, w12 cmp w9, w12 csel w13, w12, w9, hi @@ -17730,7 +17730,7 @@ //APP rev w12, w12 //NO_APP -.LBB8_8: // %get_vlc2.exit76 +.LBB8_8: // %get_vlc2.exit75 lsl w12, w12, w15 lsr w12, w12, w14 add w10, w12, w10 @@ -17738,7 +17738,7 @@ ldrsh w10, [x11] ldrsh w12, [x11, #2] // kill: def $w13 killed $w13 killed $x13 def $x13 -.LBB8_9: // %get_vlc2.exit76 +.LBB8_9: // %get_vlc2.exit75 add w11, w12, w13 cmp w9, w11 csel w12, w11, w9, hi @@ -18040,7 +18040,7 @@ .LBB8_61: // %cond.end228 add w8, w9, w8 subs w9, w8, #2 // =2 - b.lt .LBB8_105 + b.lo .LBB8_105 // %bb.62: // %if.end235 ldr w15, [sp, #56] // 4-byte Folded Reload ldr w11, [sp, #20] // 4-byte Folded Reload @@ -18059,7 +18059,7 @@ ldr x15, [sp, #40] // 8-byte Folded Reload add w9, w9, w8 subs w8, w9, #2 // =2 - b.lt .LBB8_105 + b.lo .LBB8_105 // %bb.67: // %if.end441 cbz w11, .LBB8_88 // %bb.68: // %if.then443 @@ -18076,7 +18076,7 @@ ldr x15, [sp, #40] // 8-byte Folded Reload add w9, w9, w8 subs w8, w9, #2 // =2 - b.lt .LBB8_105 + b.lo .LBB8_105 // %bb.72: // %if.end516 cbz w11, .LBB8_102 // %bb.73: // %if.then518 @@ -18750,9 +18750,10 @@ .seh_endprologue ldr x9, [x0, #2760] mov w8, #13176 + cmp w3, #0 // =0 mov x22, x0 add x27, x0, x8 - cmp w3, #0 // =0 + cneg w8, w3, mi mov x0, x1 mov w23, w7 mov x19, x6 @@ -18761,7 +18762,7 @@ mov w24, w3 mov w28, w2 mov x20, x1 - cneg w21, w3, mi + str w8, [sp, #64] // 4-byte Folded Spill blr x9 cmn w26, #1 // =1 b.eq .LBB9_4 @@ -18913,24 +18914,24 @@ mov w24, wzr b.hi .LBB9_111 // %bb.20: // %cond.end56 + str w8, [sp, #56] // 4-byte Folded Spill + ldr w8, [sp, #64] // 4-byte Folded Reload adrp x9, .LJTI9_0 add x9, x9, .LJTI9_0 - str w8, [sp, #64] // 4-byte Folded Spill - lsl w8, w21, #1 adr x11, .LBB9_21 ldrh w12, [x9, x25, lsl #1] add x11, x11, x12, lsl #2 + lsl w8, w8, #1 add w29, w10, w8 br x11 .LBB9_21: // %sw.bb - stp x25, x19, [sp, #32] // 16-byte Folded Spill - str w28, [sp, #48] // 4-byte Folded Spill - mov w26, wzr + str w28, [sp, #28] // 4-byte Folded Spill + mov x28, x25 + mov x26, x19 + mov w25, wzr str wzr, [sp, #76] - mov w28, w21 - neg w19, w21 - mov w21, #16612 - mov w25, #12874 + mov w19, #16612 + mov w21, #12874 b .LBB9_23 .LBB9_22: // %while.condthread-pre-split // in Loop: Header=BB9_23 Depth=1 @@ -18948,53 +18949,50 @@ // %bb.24: // %if.end84 // in Loop: Header=BB9_23 Depth=1 ldrsw x8, [sp, #72] - add x8, x8, w26, sxtw + add x8, x8, w25, sxtw cmp w8, #63 // =63 b.gt .LBB9_49 // %bb.25: // %if.end89 // in Loop: Header=BB9_23 Depth=1 ldr w9, [x27, #3276] add x10, x22, x8 - add x11, x10, x21 - add x10, x10, x25 + add x11, x10, x19 + add x10, x10, x21 cmp w9, #0 // =0 csel x9, x10, x11, eq ldr w10, [sp, #68] ldrb w9, [x9] - add w26, w8, #1 // =1 + add w25, w8, #1 // =1 mul w10, w10, w29 strh w10, [x20, x9, lsl #1] ldrb w11, [x27, #93] cbnz w11, .LBB9_22 // %bb.26: // %if.then106 // in Loop: Header=BB9_23 Depth=1 - tst w10, #0x8000 - csel w11, w28, w19, eq - add w10, w11, w10 + ldr w11, [sp, #64] // 4-byte Folded Reload + sub w10, w10, w11 strh w10, [x20, x9, lsl #1] b .LBB9_22 .LBB9_27: // %sw.bb243 - ldr w9, [sp, #64] // 4-byte Folded Reload - mov w8, w21 - str w28, [sp, #48] // 4-byte Folded Spill - stp x25, x19, [sp, #32] // 16-byte Folded Spill - mov w28, w8 - neg w19, w8 + ldr w9, [sp, #56] // 4-byte Folded Reload mov w8, w23 - and w21, w9, #0x2 - sxtw x8, w8 - str x8, [sp, #56] // 8-byte Folded Spill - str w21, [sp, #76] + str x19, [sp, #32] // 8-byte Folded Spill + sxtw x26, w8 + and w19, w9, #0x2 + str w19, [sp, #76] tbnz w9, #1, .LBB9_75 // %bb.28: // %while.body262.preheader + str x26, [sp, #48] // 8-byte Folded Spill + mov w26, w28 + mov x28, x25 adrp x25, .refptr.ff_vc1_adv_interlaced_8x4_zz ldr x25, [x25, .refptr.ff_vc1_adv_interlaced_8x4_zz] - mov w26, wzr + mov w21, wzr b .LBB9_30 .LBB9_29: // %while.cond259thread-pre-split // in Loop: Header=BB9_30 Depth=1 ldr w8, [sp, #76] - cbnz w8, .LBB9_52 + cbnz w8, .LBB9_51 .LBB9_30: // %while.body262 // =>This Inner Loop Header: Depth=1 ldr w4, [x22, #13204] @@ -19007,59 +19005,56 @@ // %bb.31: // %if.end269 // in Loop: Header=BB9_30 Depth=1 ldrsw x8, [sp, #72] - add x26, x8, w26, sxtw - cmp w26, #31 // =31 - b.gt .LBB9_52 + add x21, x8, w21, sxtw + cmp w21, #31 // =31 + b.gt .LBB9_51 // %bb.32: // %if.end274 // in Loop: Header=BB9_30 Depth=1 ldr w8, [x27, #3276] cbz w8, .LBB9_34 // %bb.33: // %if.else283 // in Loop: Header=BB9_30 Depth=1 - add x8, x25, w26, sxtw + add x8, x25, w21, sxtw b .LBB9_35 .LBB9_34: // %if.then277 // in Loop: Header=BB9_30 Depth=1 ldr x8, [x22, #13144] - add x8, x8, x26 + add x8, x8, x21 .LBB9_35: // %if.end289 // in Loop: Header=BB9_30 Depth=1 ldr w9, [sp, #68] ldrb w8, [x8] - add w26, w26, #1 // =1 + add w21, w21, #1 // =1 mul w9, w9, w29 strh w9, [x20, x8, lsl #1] ldrb w10, [x27, #93] cbnz w10, .LBB9_29 // %bb.36: // %if.then296 // in Loop: Header=BB9_30 Depth=1 - tst w9, #0x8000 - csel w10, w28, w19, eq - add w9, w10, w9 + ldr w10, [sp, #64] // 4-byte Folded Reload + sub w9, w9, w10 strh w9, [x20, x8, lsl #1] b .LBB9_29 .LBB9_37: // %sw.bb349 - ldr w9, [sp, #64] // 4-byte Folded Reload + ldr w9, [sp, #56] // 4-byte Folded Reload + mov x26, x19 // kill: def $w23 killed $w23 killed $x23 def $x23 - str w28, [sp, #48] // 4-byte Folded Spill - str x19, [sp, #40] // 8-byte Folded Spill - mov w28, w21 - neg w19, w21 - sxtw x23, w23 - and w21, w9, #0x2 - str w21, [sp, #76] + sxtw x8, w23 + str x8, [sp, #48] // 8-byte Folded Spill + and w19, w9, #0x2 + str w19, [sp, #76] tbnz w9, #1, .LBB9_91 // %bb.38: // %while.body364.preheader - str x23, [sp, #56] // 8-byte Folded Spill - mov x23, x25 + mov w23, w28 + mov x28, x25 adrp x25, .refptr.ff_vc1_adv_interlaced_4x8_zz ldr x25, [x25, .refptr.ff_vc1_adv_interlaced_4x8_zz] - mov w26, wzr + mov w21, wzr b .LBB9_40 .LBB9_39: // %while.cond361thread-pre-split // in Loop: Header=BB9_40 Depth=1 ldr w8, [sp, #76] - cbnz w8, .LBB9_55 + cbnz w8, .LBB9_54 .LBB9_40: // %while.body364 // =>This Inner Loop Header: Depth=1 ldr w4, [x22, #13204] @@ -19072,58 +19067,54 @@ // %bb.41: // %if.end371 // in Loop: Header=BB9_40 Depth=1 ldrsw x8, [sp, #72] - add x26, x8, w26, sxtw - cmp w26, #31 // =31 - b.gt .LBB9_55 + add x21, x8, w21, sxtw + cmp w21, #31 // =31 + b.gt .LBB9_54 // %bb.42: // %if.end376 // in Loop: Header=BB9_40 Depth=1 ldr w8, [x27, #3276] cbz w8, .LBB9_44 // %bb.43: // %if.else385 // in Loop: Header=BB9_40 Depth=1 - add x8, x25, w26, sxtw + add x8, x25, w21, sxtw b .LBB9_45 .LBB9_44: // %if.then379 // in Loop: Header=BB9_40 Depth=1 ldr x8, [x22, #13152] - add x8, x8, x26 + add x8, x8, x21 .LBB9_45: // %if.end391 // in Loop: Header=BB9_40 Depth=1 ldr w9, [sp, #68] ldrb w8, [x8] - add w26, w26, #1 // =1 + add w21, w21, #1 // =1 mul w9, w9, w29 strh w9, [x20, x8, lsl #1] ldrb w10, [x27, #93] cbnz w10, .LBB9_39 // %bb.46: // %if.then398 // in Loop: Header=BB9_40 Depth=1 - tst w9, #0x8000 - csel w10, w28, w19, eq - add w9, w10, w9 + ldr w10, [sp, #64] // 4-byte Folded Reload + sub w9, w9, w10 strh w9, [x20, x8, lsl #1] b .LBB9_39 .LBB9_47: // %sw.bb134 - neg w10, w21 - stp w28, w10, [sp, #48] // 8-byte Folded Spill lsl w10, w23, #1 - ldr w9, [sp, #64] // 4-byte Folded Reload - str w10, [sp, #12] // 4-byte Folded Spill + ldr w9, [sp, #56] // 4-byte Folded Reload + stp w10, w28, [sp, #24] // 8-byte Folded Spill // kill: def $w23 killed $w23 killed $x23 def $x23 sxtw x10, w23 - stp x25, x19, [sp, #32] // 16-byte Folded Spill - str w21, [sp, #56] // 4-byte Folded Spill - str x10, [sp] // 8-byte Folded Spill - adrp x23, .refptr.ff_vc1_adv_interlaced_4x4_zz - adrp x26, .refptr.ff_vc1_simple_progressive_4x4_zz - ldr x23, [x23, .refptr.ff_vc1_adv_interlaced_4x4_zz] - ldr x26, [x26, .refptr.ff_vc1_simple_progressive_4x4_zz] + str x19, [sp, #32] // 8-byte Folded Spill + stp x25, x10, [sp, #8] // 16-byte Folded Spill + adrp x26, .refptr.ff_vc1_adv_interlaced_4x4_zz + adrp x21, .refptr.ff_vc1_simple_progressive_4x4_zz + ldr x26, [x26, .refptr.ff_vc1_adv_interlaced_4x4_zz] + ldr x21, [x21, .refptr.ff_vc1_simple_progressive_4x4_zz] mvn w8, w9 mov w19, wzr mov w10, #3 mov w11, #1 and w24, w8, #0xf - b .LBB9_61 + b .LBB9_60 .LBB9_48: // %if.then124 cbz w8, .LBB9_72 .LBB9_49: // %if.else129 @@ -19131,84 +19122,86 @@ mov x0, x20 blr x8 ldr x8, [x22, #3304] - ldr x1, [sp, #40] // 8-byte Folded Reload sxtw x2, w23 mov x0, x20 + mov x1, x26 .LBB9_50: // %sw.epilog blr x8 + mov x25, x28 + ldr w28, [sp, #28] // 4-byte Folded Reload mov w24, #15 -.LBB9_51: - ldr w28, [sp, #48] // 4-byte Folded Reload - ldr x25, [sp, #32] // 8-byte Folded Reload b .LBB9_111 -.LBB9_52: // %while.end316 - ldr w9, [sp, #64] // 4-byte Folded Reload - cbnz w21, .LBB9_75 -// %bb.53: // %if.then323 - cmp w26, #1 // =1 +.LBB9_51: // %while.end316 + mov x25, x28 + mov w28, w26 + ldr w9, [sp, #56] // 4-byte Folded Reload + ldr x26, [sp, #48] // 8-byte Folded Reload + cbnz w19, .LBB9_75 +// %bb.52: // %if.then323 + cmp w21, #1 // =1 b.ne .LBB9_73 -// %bb.54: // %if.then326 +// %bb.53: // %if.then326 ldr x8, [x22, #12000] b .LBB9_74 -.LBB9_55: // %while.end418 - mov x25, x23 - ldr w9, [sp, #64] // 4-byte Folded Reload - ldr x23, [sp, #56] // 8-byte Folded Reload - cbnz w21, .LBB9_91 -// %bb.56: // %if.then425 - cmp w26, #1 // =1 +.LBB9_54: // %while.end418 + ldr w9, [sp, #56] // 4-byte Folded Reload + mov x25, x28 + mov w28, w23 + cbnz w19, .LBB9_91 +// %bb.55: // %if.then425 + cmp w21, #1 // =1 b.ne .LBB9_89 -// %bb.57: // %if.then428 +// %bb.56: // %if.then428 ldr x8, [x22, #12008] b .LBB9_90 -.LBB9_58: // %if.else226 - // in Loop: Header=BB9_61 Depth=1 +.LBB9_57: // %if.else226 + // in Loop: Header=BB9_60 Depth=1 ldr x8, [x22, #11984] -.LBB9_59: // %for.inc - // in Loop: Header=BB9_61 Depth=1 - ldr x9, [sp, #40] // 8-byte Folded Reload - ldp x10, x11, [sp, #16] // 16-byte Folded Reload - ldr x1, [sp] // 8-byte Folded Reload - add x2, x20, w28, uxtw #1 +.LBB9_58: // %for.inc + // in Loop: Header=BB9_60 Depth=1 + ldp x9, x10, [sp, #32] // 16-byte Folded Reload + ldr x11, [sp, #48] // 8-byte Folded Reload + ldr x1, [sp, #16] // 8-byte Folded Reload + add x2, x20, w23, uxtw #1 add x9, x9, x10 - ldr w10, [sp, #12] // 4-byte Folded Reload + ldr w10, [sp, #24] // 4-byte Folded Reload mul w10, w10, w11 add x0, x9, w10, sxtw blr x8 - ldr w9, [sp, #64] // 4-byte Folded Reload + ldr w9, [sp, #56] // 4-byte Folded Reload mov w11, #1 mov w10, #3 -.LBB9_60: // %for.inc - // in Loop: Header=BB9_61 Depth=1 +.LBB9_59: // %for.inc + // in Loop: Header=BB9_60 Depth=1 add w19, w19, #1 // =1 cmp w19, #4 // =4 - b.eq .LBB9_51 -.LBB9_61: // %for.body + b.eq .LBB9_71 +.LBB9_60: // %for.body // =>This Loop Header: Depth=1 - // Child Loop BB9_64 Depth 2 + // Child Loop BB9_63 Depth 2 sub w8, w10, w19 lsl w8, w11, w8 ands w25, w8, w9 str w25, [sp, #76] - b.ne .LBB9_60 -// %bb.62: // %while.body149.preheader - // in Loop: Header=BB9_61 Depth=1 + b.ne .LBB9_59 +// %bb.61: // %while.body149.preheader + // in Loop: Header=BB9_60 Depth=1 lsl w8, w19, #2 and x9, x19, #0x2 - and x28, x8, #0x4 + and x23, x8, #0x4 lsr x8, x9, #1 - mov w21, wzr - str x9, [sp, #24] // 8-byte Folded Spill - mov w9, w28 - bfi x28, x8, #5, #1 - str x9, [sp, #16] // 8-byte Folded Spill - b .LBB9_64 -.LBB9_63: // %cleanup201 - // in Loop: Header=BB9_64 Depth=2 + mov w28, wzr + str x9, [sp, #48] // 8-byte Folded Spill + mov w9, w23 + bfi x23, x8, #5, #1 + str x9, [sp, #40] // 8-byte Folded Spill + b .LBB9_63 +.LBB9_62: // %cleanup201 + // in Loop: Header=BB9_63 Depth=2 ldr w8, [sp, #76] - cbnz w8, .LBB9_69 -.LBB9_64: // %while.body149 - // Parent Loop BB9_61 Depth=1 + cbnz w8, .LBB9_68 +.LBB9_63: // %while.body149 + // Parent Loop BB9_60 Depth=1 // => This Inner Loop Header: Depth=2 ldr w4, [x22, #13204] add x1, sp, #76 // =76 @@ -19217,86 +19210,91 @@ mov x0, x22 bl vc1_decode_ac_coeff tbnz w0, #31, .LBB9_101 -// %bb.65: // %if.end156 - // in Loop: Header=BB9_64 Depth=2 +// %bb.64: // %if.end156 + // in Loop: Header=BB9_63 Depth=2 ldrsw x8, [sp, #72] - add x8, x8, w21, sxtw + add x8, x8, w28, sxtw cmp w8, #15 // =15 - b.gt .LBB9_68 -// %bb.66: // %if.end161 - // in Loop: Header=BB9_64 Depth=2 + b.gt .LBB9_67 +// %bb.65: // %if.end161 + // in Loop: Header=BB9_63 Depth=2 ldr w9, [x27, #3276] ldr w10, [sp, #68] - add w21, w8, #1 // =1 + add w28, w8, #1 // =1 cmp w9, #0 // =0 - csel x9, x26, x23, eq + csel x9, x21, x26, eq ldrb w9, [x9, x8] mul w10, w10, w29 - add x9, x28, x9 + add x9, x23, x9 strh w10, [x20, x9, lsl #1] ldrb w11, [x27, #93] - cbnz w11, .LBB9_63 -// %bb.67: // %if.then182 - // in Loop: Header=BB9_64 Depth=2 - ldp w11, w8, [sp, #52] // 8-byte Folded Reload - tst w10, #0x8000 - csel w8, w8, w11, eq - add w8, w8, w10 + cbnz w11, .LBB9_62 +// %bb.66: // %if.then182 + // in Loop: Header=BB9_63 Depth=2 + ldr w8, [sp, #64] // 4-byte Folded Reload + sub w8, w10, w8 strh w8, [x20, x9, lsl #1] - b .LBB9_63 -.LBB9_68: // in Loop: Header=BB9_61 Depth=1 - mov w21, w8 -.LBB9_69: // %while.end204 - // in Loop: Header=BB9_61 Depth=1 - ldr w9, [sp, #64] // 4-byte Folded Reload + b .LBB9_62 +.LBB9_67: // in Loop: Header=BB9_60 Depth=1 + mov w28, w8 +.LBB9_68: // %while.end204 + // in Loop: Header=BB9_60 Depth=1 + ldr w9, [sp, #56] // 4-byte Folded Reload mov w10, #3 mov w11, #1 - cbnz w25, .LBB9_60 -// %bb.70: // %if.then211 - // in Loop: Header=BB9_61 Depth=1 - cmp w21, #1 // =1 - b.ne .LBB9_58 -// %bb.71: // %if.then214 - // in Loop: Header=BB9_61 Depth=1 + cbnz w25, .LBB9_59 +// %bb.69: // %if.then211 + // in Loop: Header=BB9_60 Depth=1 + cmp w28, #1 // =1 + b.ne .LBB9_57 +// %bb.70: // %if.then214 + // in Loop: Header=BB9_60 Depth=1 ldr x8, [x22, #12016] - b .LBB9_59 + b .LBB9_58 +.LBB9_71: + ldr w28, [sp, #28] // 4-byte Folded Reload + ldr x25, [sp, #8] // 8-byte Folded Reload + b .LBB9_111 .LBB9_72: // %if.then127 ldr x8, [x22, #11992] - ldr x0, [sp, #40] // 8-byte Folded Reload // kill: def $w23 killed $w23 killed $x23 def $x23 sxtw x1, w23 + mov x0, x26 mov x2, x20 b .LBB9_50 .LBB9_73: // %if.else335 ldr x8, [x22, #11968] .LBB9_74: // %for.inc346 - ldr x0, [sp, #40] // 8-byte Folded Reload - ldr x1, [sp, #56] // 8-byte Folded Reload + ldr x0, [sp, #32] // 8-byte Folded Reload + mov x1, x26 mov x2, x20 blr x8 - ldr w9, [sp, #64] // 4-byte Folded Reload + ldr w9, [sp, #56] // 4-byte Folded Reload .LBB9_75: // %for.inc346 - lsr w8, w21, #1 - and w25, w9, #0x1 - bfi w21, w8, #2, #1 - mov w8, w25 - lsr w9, w21, #1 - bfi w8, w25, #1, #1 + lsr w8, w19, #1 + and w21, w9, #0x1 + bfi w19, w8, #2, #1 + mov w8, w21 + lsr w9, w19, #1 + bfi w8, w21, #1, #1 bfi w8, w9, #2, #2 eor w24, w8, #0xf - str w25, [sp, #76] - cbnz w25, .LBB9_85 + str w21, [sp, #76] + cbnz w21, .LBB9_111 // %bb.76: // %while.body262.1.preheader lsl w8, w23, #2 + sxtw x8, w8 + str x8, [sp, #56] // 8-byte Folded Spill adrp x23, .refptr.ff_vc1_adv_interlaced_8x4_zz ldr x23, [x23, .refptr.ff_vc1_adv_interlaced_8x4_zz] - mov w26, wzr - sxtw x21, w8 + mov w19, w28 + mov x28, x25 + mov w25, wzr b .LBB9_78 .LBB9_77: // %while.cond259thread-pre-split.1 // in Loop: Header=BB9_78 Depth=1 ldr w8, [sp, #76] - cbnz w8, .LBB9_86 + cbnz w8, .LBB9_85 .LBB9_78: // %while.body262.1 // =>This Inner Loop Header: Depth=1 ldr w4, [x22, #13204] @@ -19309,26 +19307,26 @@ // %bb.79: // %if.end269.1 // in Loop: Header=BB9_78 Depth=1 ldrsw x8, [sp, #72] - add x26, x8, w26, sxtw - cmp w26, #31 // =31 - b.gt .LBB9_86 + add x25, x8, w25, sxtw + cmp w25, #31 // =31 + b.gt .LBB9_85 // %bb.80: // %if.end274.1 // in Loop: Header=BB9_78 Depth=1 ldr w8, [x27, #3276] cbz w8, .LBB9_82 // %bb.81: // %if.else283.1 // in Loop: Header=BB9_78 Depth=1 - add x8, x23, x26 + add x8, x23, x25 b .LBB9_83 .LBB9_82: // %if.then277.1 // in Loop: Header=BB9_78 Depth=1 ldr x8, [x22, #13144] - add x8, x8, w26, sxtw + add x8, x8, w25, sxtw .LBB9_83: // %if.end289.1 // in Loop: Header=BB9_78 Depth=1 ldrb w8, [x8] ldr w9, [sp, #68] - add w26, w26, #1 // =1 + add w25, w25, #1 // =1 add x8, x8, #32 // =32 mul w9, w9, w29 strh w9, [x20, x8, lsl #1] @@ -19336,49 +19334,53 @@ cbnz w10, .LBB9_77 // %bb.84: // %if.then296.1 // in Loop: Header=BB9_78 Depth=1 - tst w9, #0x8000 - csel w10, w28, w19, eq - add w9, w10, w9 + ldr w10, [sp, #64] // 4-byte Folded Reload + sub w9, w9, w10 strh w9, [x20, x8, lsl #1] b .LBB9_77 -.LBB9_85: - ldr x25, [sp, #32] // 8-byte Folded Reload - b .LBB9_102 -.LBB9_86: // %while.end316.1 - ldr w28, [sp, #48] // 4-byte Folded Reload - cbnz w25, .LBB9_106 -// %bb.87: // %if.then323.1 - ldp x25, x9, [sp, #32] // 16-byte Folded Reload - cmp w26, #1 // =1 - b.ne .LBB9_107 -// %bb.88: // %if.then326.1 +.LBB9_85: // %while.end316.1 + cbnz w21, .LBB9_88 +// %bb.86: // %if.then323.1 + mov x1, x26 + ldr x26, [sp, #32] // 8-byte Folded Reload + ldr x9, [sp, #56] // 8-byte Folded Reload + cmp w25, #1 // =1 + mov x25, x28 + mov w28, w19 + b.ne .LBB9_106 +// %bb.87: // %if.then326.1 ldr x8, [x22, #12000] - b .LBB9_108 + b .LBB9_107 +.LBB9_88: + mov x25, x28 + mov w28, w19 + b .LBB9_111 .LBB9_89: // %if.else436 ldr x8, [x22, #11976] .LBB9_90: // %for.inc446 - ldr x0, [sp, #40] // 8-byte Folded Reload - mov x1, x23 + ldr x1, [sp, #48] // 8-byte Folded Reload + mov x0, x26 mov x2, x20 blr x8 - ldr w9, [sp, #64] // 4-byte Folded Reload + ldr w9, [sp, #56] // 4-byte Folded Reload .LBB9_91: // %for.inc446 add w8, w9, w9, lsl #2 mvn w8, w8 - and w21, w9, #0x1 + and w19, w9, #0x1 and w24, w8, #0xf - str w21, [sp, #76] - tbnz w9, #0, .LBB9_102 + str w19, [sp, #76] + tbnz w9, #0, .LBB9_111 // %bb.92: // %while.body364.1.preheader - str x25, [sp, #32] // 8-byte Folded Spill - adrp x26, .refptr.ff_vc1_adv_interlaced_4x8_zz - ldr x26, [x26, .refptr.ff_vc1_adv_interlaced_4x8_zz] - mov w25, wzr + mov w23, w28 + mov x28, x25 + adrp x25, .refptr.ff_vc1_adv_interlaced_4x8_zz + ldr x25, [x25, .refptr.ff_vc1_adv_interlaced_4x8_zz] + mov w21, wzr b .LBB9_94 .LBB9_93: // %while.cond361thread-pre-split.1 // in Loop: Header=BB9_94 Depth=1 ldr w8, [sp, #76] - cbnz w8, .LBB9_103 + cbnz w8, .LBB9_102 .LBB9_94: // %while.body364.1 // =>This Inner Loop Header: Depth=1 ldr w4, [x22, #13204] @@ -19391,26 +19393,26 @@ // %bb.95: // %if.end371.1 // in Loop: Header=BB9_94 Depth=1 ldrsw x8, [sp, #72] - add x25, x8, w25, sxtw - cmp w25, #31 // =31 - b.gt .LBB9_103 + add x21, x8, w21, sxtw + cmp w21, #31 // =31 + b.gt .LBB9_102 // %bb.96: // %if.end376.1 // in Loop: Header=BB9_94 Depth=1 ldr w8, [x27, #3276] cbz w8, .LBB9_98 // %bb.97: // %if.else385.1 // in Loop: Header=BB9_94 Depth=1 - add x8, x26, x25 + add x8, x25, x21 b .LBB9_99 .LBB9_98: // %if.then379.1 // in Loop: Header=BB9_94 Depth=1 ldr x8, [x22, #13152] - add x8, x8, w25, sxtw + add x8, x8, w21, sxtw .LBB9_99: // %if.end391.1 // in Loop: Header=BB9_94 Depth=1 ldrb w8, [x8] ldr w9, [sp, #68] - add w25, w25, #1 // =1 + add w21, w21, #1 // =1 add x8, x8, #4 // =4 mul w9, w9, w29 strh w9, [x20, x8, lsl #1] @@ -19418,46 +19420,40 @@ cbnz w10, .LBB9_93 // %bb.100: // %if.then398.1 // in Loop: Header=BB9_94 Depth=1 - tst w9, #0x8000 - csel w10, w28, w19, eq - add w9, w10, w9 + ldr w10, [sp, #64] // 4-byte Folded Reload + sub w9, w9, w10 strh w9, [x20, x8, lsl #1] b .LBB9_93 .LBB9_101: mov w24, w0 b .LBB9_113 -.LBB9_102: - ldr w28, [sp, #48] // 4-byte Folded Reload - b .LBB9_111 -.LBB9_103: // %while.end418.1 - ldr w28, [sp, #48] // 4-byte Folded Reload - cbnz w21, .LBB9_106 -// %bb.104: // %if.then425.1 - cmp w25, #1 // =1 - b.ne .LBB9_109 -// %bb.105: // %if.then428.1 +.LBB9_102: // %while.end418.1 + mov x25, x28 + cbnz w19, .LBB9_105 +// %bb.103: // %if.then425.1 + cmp w21, #1 // =1 + mov w28, w23 + b.ne .LBB9_108 +// %bb.104: // %if.then428.1 ldr x8, [x22, #12008] - b .LBB9_110 -.LBB9_106: - ldr x25, [sp, #32] // 8-byte Folded Reload + b .LBB9_109 +.LBB9_105: + mov w28, w23 b .LBB9_111 -.LBB9_107: // %if.else335.1 +.LBB9_106: // %if.else335.1 ldr x8, [x22, #11968] -.LBB9_108: // %sw.epilog - ldr x1, [sp, #56] // 8-byte Folded Reload - add x0, x9, x21 +.LBB9_107: // %sw.epilog + add x0, x26, x9 add x2, x20, #64 // =64 - blr x8 - b .LBB9_111 -.LBB9_109: // %if.else436.1 + b .LBB9_110 +.LBB9_108: // %if.else436.1 ldr x8, [x22, #11976] -.LBB9_110: // %sw.epilog - ldr x9, [sp, #40] // 8-byte Folded Reload +.LBB9_109: // %sw.epilog + ldr x1, [sp, #48] // 8-byte Folded Reload + add x0, x26, #4 // =4 add x2, x20, #8 // =8 - mov x1, x23 - add x0, x9, #4 // =4 +.LBB9_110: // %sw.epilog blr x8 - ldr x25, [sp, #32] // 8-byte Folded Reload .LBB9_111: // %sw.epilog ldr x8, [sp, #176] cbz x8, .LBB9_113