3 "// Generated by LLVM NVPTX Back-End\n"
10 " // .globl Subsample_Nearest_uchar\n"
12 ".visible .entry Subsample_Nearest_uchar(\n"
13 " .param .u64 Subsample_Nearest_uchar_param_0,\n"
14 " .param .u64 Subsample_Nearest_uchar_param_1,\n"
15 " .param .u32 Subsample_Nearest_uchar_param_2,\n"
16 " .param .u32 Subsample_Nearest_uchar_param_3,\n"
17 " .param .u32 Subsample_Nearest_uchar_param_4,\n"
18 " .param .u32 Subsample_Nearest_uchar_param_5,\n"
19 " .param .u32 Subsample_Nearest_uchar_param_6,\n"
20 " .param .u32 Subsample_Nearest_uchar_param_7\n"
23 " .reg .pred %p<4>;\n"
24 " .reg .f32 %f<13>;\n"
25 " .reg .b32 %r<19>;\n"
26 " .reg .b64 %rd<7>;\n"
28 " ld.param.u32 %r4, [Subsample_Nearest_uchar_param_3];\n"
29 " ld.param.u32 %r3, [Subsample_Nearest_uchar_param_2];\n"
30 " // begin inline asm\n"
31 " mov.u32 %r8, %ctaid.x;\n"
32 " // end inline asm\n"
33 " // begin inline asm\n"
34 " mov.u32 %r9, %ctaid.y;\n"
35 " // end inline asm\n"
36 " // begin inline asm\n"
37 " mov.u32 %r10, %ntid.x;\n"
38 " // end inline asm\n"
39 " // begin inline asm\n"
40 " mov.u32 %r11, %ntid.y;\n"
41 " // end inline asm\n"
42 " // begin inline asm\n"
43 " mov.u32 %r12, %tid.x;\n"
44 " // end inline asm\n"
45 " // begin inline asm\n"
46 " mov.u32 %r13, %tid.y;\n"
47 " // end inline asm\n"
48 " mad.lo.s32 %r1, %r10, %r8, %r12;\n"
49 " mad.lo.s32 %r2, %r11, %r9, %r13;\n"
50 " setp.ge.s32 %p1, %r2, %r4;\n"
51 " setp.ge.s32 %p2, %r1, %r3;\n"
52 " or.pred %p3, %p2, %p1;\n"
54 " ld.param.u32 %r7, [Subsample_Nearest_uchar_param_6];\n"
55 " ld.param.u32 %r6, [Subsample_Nearest_uchar_param_5];\n"
56 " ld.param.u32 %r5, [Subsample_Nearest_uchar_param_4];\n"
57 " ld.param.u64 %rd4, [Subsample_Nearest_uchar_param_0];\n"
58 " ld.param.u64 %rd3, [Subsample_Nearest_uchar_param_1];\n"
59 " cvta.to.global.u64 %rd1, %rd3;\n"
60 " cvt.rn.f32.s32 %f3, %r6;\n"
61 " cvt.rn.f32.s32 %f4, %r3;\n"
62 " div.rn.f32 %f5, %f3, %f4;\n"
63 " cvt.rn.f32.s32 %f6, %r7;\n"
64 " cvt.rn.f32.s32 %f7, %r4;\n"
65 " div.rn.f32 %f8, %f6, %f7;\n"
66 " cvt.rn.f32.s32 %f9, %r1;\n"
67 " add.f32 %f10, %f9, 0f3F000000;\n"
68 " mul.f32 %f1, %f5, %f10;\n"
69 " cvt.rn.f32.s32 %f11, %r2;\n"
70 " add.f32 %f12, %f11, 0f3F000000;\n"
71 " mul.f32 %f2, %f8, %f12;\n"
72 " // begin inline asm\n"
73 " tex.2d.v4.u32.f32 {%r14, %r15, %r16, %r17}, [%rd4, {%f1, %f2}];\n"
74 " // end inline asm\n"
75 " mad.lo.s32 %r18, %r2, %r5, %r1;\n"
76 " cvt.s64.s32 %rd5, %r18;\n"
77 " add.s64 %rd6, %rd1, %rd5;\n"
78 " st.global.u8 [%rd6], %r14;\n"
83 " // .globl Subsample_Nearest_uchar2\n"
84 ".visible .entry Subsample_Nearest_uchar2(\n"
85 " .param .u64 Subsample_Nearest_uchar2_param_0,\n"
86 " .param .u64 Subsample_Nearest_uchar2_param_1,\n"
87 " .param .u32 Subsample_Nearest_uchar2_param_2,\n"
88 " .param .u32 Subsample_Nearest_uchar2_param_3,\n"
89 " .param .u32 Subsample_Nearest_uchar2_param_4,\n"
90 " .param .u32 Subsample_Nearest_uchar2_param_5,\n"
91 " .param .u32 Subsample_Nearest_uchar2_param_6,\n"
92 " .param .u32 Subsample_Nearest_uchar2_param_7\n"
95 " .reg .pred %p<4>;\n"
96 " .reg .b16 %rs<3>;\n"
97 " .reg .f32 %f<13>;\n"
98 " .reg .b32 %r<19>;\n"
99 " .reg .b64 %rd<7>;\n"
101 " ld.param.u32 %r4, [Subsample_Nearest_uchar2_param_3];\n"
102 " ld.param.u32 %r3, [Subsample_Nearest_uchar2_param_2];\n"
103 " // begin inline asm\n"
104 " mov.u32 %r8, %ctaid.x;\n"
105 " // end inline asm\n"
106 " // begin inline asm\n"
107 " mov.u32 %r9, %ctaid.y;\n"
108 " // end inline asm\n"
109 " // begin inline asm\n"
110 " mov.u32 %r10, %ntid.x;\n"
111 " // end inline asm\n"
112 " // begin inline asm\n"
113 " mov.u32 %r11, %ntid.y;\n"
114 " // end inline asm\n"
115 " // begin inline asm\n"
116 " mov.u32 %r12, %tid.x;\n"
117 " // end inline asm\n"
118 " // begin inline asm\n"
119 " mov.u32 %r13, %tid.y;\n"
120 " // end inline asm\n"
121 " mad.lo.s32 %r1, %r10, %r8, %r12;\n"
122 " mad.lo.s32 %r2, %r11, %r9, %r13;\n"
123 " setp.ge.s32 %p1, %r2, %r4;\n"
124 " setp.ge.s32 %p2, %r1, %r3;\n"
125 " or.pred %p3, %p2, %p1;\n"
126 " @%p3 bra LBB1_2;\n"
127 " ld.param.u32 %r7, [Subsample_Nearest_uchar2_param_6];\n"
128 " ld.param.u32 %r6, [Subsample_Nearest_uchar2_param_5];\n"
129 " ld.param.u32 %r5, [Subsample_Nearest_uchar2_param_4];\n"
130 " ld.param.u64 %rd4, [Subsample_Nearest_uchar2_param_0];\n"
131 " ld.param.u64 %rd3, [Subsample_Nearest_uchar2_param_1];\n"
132 " cvta.to.global.u64 %rd1, %rd3;\n"
133 " cvt.rn.f32.s32 %f3, %r6;\n"
134 " cvt.rn.f32.s32 %f4, %r3;\n"
135 " div.rn.f32 %f5, %f3, %f4;\n"
136 " cvt.rn.f32.s32 %f6, %r7;\n"
137 " cvt.rn.f32.s32 %f7, %r4;\n"
138 " div.rn.f32 %f8, %f6, %f7;\n"
139 " cvt.rn.f32.s32 %f9, %r1;\n"
140 " add.f32 %f10, %f9, 0f3F000000;\n"
141 " mul.f32 %f1, %f5, %f10;\n"
142 " cvt.rn.f32.s32 %f11, %r2;\n"
143 " add.f32 %f12, %f11, 0f3F000000;\n"
144 " mul.f32 %f2, %f8, %f12;\n"
145 " // begin inline asm\n"
146 " tex.2d.v4.u32.f32 {%r14, %r15, %r16, %r17}, [%rd4, {%f1, %f2}];\n"
147 " // end inline asm\n"
148 " cvt.u16.u32 %rs1, %r14;\n"
149 " cvt.u16.u32 %rs2, %r15;\n"
150 " mad.lo.s32 %r18, %r2, %r5, %r1;\n"
151 " mul.wide.s32 %rd5, %r18, 2;\n"
152 " add.s64 %rd6, %rd1, %rd5;\n"
153 " st.global.v2.u8 [%rd6], {%rs1, %rs2};\n"
158 " // .globl Subsample_Nearest_uchar4\n"
159 ".visible .entry Subsample_Nearest_uchar4(\n"
160 " .param .u64 Subsample_Nearest_uchar4_param_0,\n"
161 " .param .u64 Subsample_Nearest_uchar4_param_1,\n"
162 " .param .u32 Subsample_Nearest_uchar4_param_2,\n"
163 " .param .u32 Subsample_Nearest_uchar4_param_3,\n"
164 " .param .u32 Subsample_Nearest_uchar4_param_4,\n"
165 " .param .u32 Subsample_Nearest_uchar4_param_5,\n"
166 " .param .u32 Subsample_Nearest_uchar4_param_6,\n"
167 " .param .u32 Subsample_Nearest_uchar4_param_7\n"
170 " .reg .pred %p<4>;\n"
171 " .reg .b16 %rs<5>;\n"
172 " .reg .f32 %f<13>;\n"
173 " .reg .b32 %r<19>;\n"
174 " .reg .b64 %rd<7>;\n"
176 " ld.param.u32 %r4, [Subsample_Nearest_uchar4_param_3];\n"
177 " ld.param.u32 %r3, [Subsample_Nearest_uchar4_param_2];\n"
178 " // begin inline asm\n"
179 " mov.u32 %r8, %ctaid.x;\n"
180 " // end inline asm\n"
181 " // begin inline asm\n"
182 " mov.u32 %r9, %ctaid.y;\n"
183 " // end inline asm\n"
184 " // begin inline asm\n"
185 " mov.u32 %r10, %ntid.x;\n"
186 " // end inline asm\n"
187 " // begin inline asm\n"
188 " mov.u32 %r11, %ntid.y;\n"
189 " // end inline asm\n"
190 " // begin inline asm\n"
191 " mov.u32 %r12, %tid.x;\n"
192 " // end inline asm\n"
193 " // begin inline asm\n"
194 " mov.u32 %r13, %tid.y;\n"
195 " // end inline asm\n"
196 " mad.lo.s32 %r1, %r10, %r8, %r12;\n"
197 " mad.lo.s32 %r2, %r11, %r9, %r13;\n"
198 " setp.ge.s32 %p1, %r2, %r4;\n"
199 " setp.ge.s32 %p2, %r1, %r3;\n"
200 " or.pred %p3, %p2, %p1;\n"
201 " @%p3 bra LBB2_2;\n"
202 " ld.param.u32 %r7, [Subsample_Nearest_uchar4_param_6];\n"
203 " ld.param.u32 %r6, [Subsample_Nearest_uchar4_param_5];\n"
204 " ld.param.u32 %r5, [Subsample_Nearest_uchar4_param_4];\n"
205 " ld.param.u64 %rd4, [Subsample_Nearest_uchar4_param_0];\n"
206 " ld.param.u64 %rd3, [Subsample_Nearest_uchar4_param_1];\n"
207 " cvta.to.global.u64 %rd1, %rd3;\n"
208 " cvt.rn.f32.s32 %f3, %r6;\n"
209 " cvt.rn.f32.s32 %f4, %r3;\n"
210 " div.rn.f32 %f5, %f3, %f4;\n"
211 " cvt.rn.f32.s32 %f6, %r7;\n"
212 " cvt.rn.f32.s32 %f7, %r4;\n"
213 " div.rn.f32 %f8, %f6, %f7;\n"
214 " cvt.rn.f32.s32 %f9, %r1;\n"
215 " add.f32 %f10, %f9, 0f3F000000;\n"
216 " mul.f32 %f1, %f5, %f10;\n"
217 " cvt.rn.f32.s32 %f11, %r2;\n"
218 " add.f32 %f12, %f11, 0f3F000000;\n"
219 " mul.f32 %f2, %f8, %f12;\n"
220 " // begin inline asm\n"
221 " tex.2d.v4.u32.f32 {%r14, %r15, %r16, %r17}, [%rd4, {%f1, %f2}];\n"
222 " // end inline asm\n"
223 " cvt.u16.u32 %rs1, %r14;\n"
224 " cvt.u16.u32 %rs2, %r15;\n"
225 " cvt.u16.u32 %rs3, %r16;\n"
226 " cvt.u16.u32 %rs4, %r17;\n"
227 " mad.lo.s32 %r18, %r2, %r5, %r1;\n"
228 " mul.wide.s32 %rd5, %r18, 4;\n"
229 " add.s64 %rd6, %rd1, %rd5;\n"
230 " st.global.v4.u8 [%rd6], {%rs1, %rs2, %rs3, %rs4};\n"
235 " // .globl Subsample_Nearest_ushort\n"
236 ".visible .entry Subsample_Nearest_ushort(\n"
237 " .param .u64 Subsample_Nearest_ushort_param_0,\n"
238 " .param .u64 Subsample_Nearest_ushort_param_1,\n"
239 " .param .u32 Subsample_Nearest_ushort_param_2,\n"
240 " .param .u32 Subsample_Nearest_ushort_param_3,\n"
241 " .param .u32 Subsample_Nearest_ushort_param_4,\n"
242 " .param .u32 Subsample_Nearest_ushort_param_5,\n"
243 " .param .u32 Subsample_Nearest_ushort_param_6,\n"
244 " .param .u32 Subsample_Nearest_ushort_param_7\n"
247 " .reg .pred %p<4>;\n"
248 " .reg .f32 %f<13>;\n"
249 " .reg .b32 %r<19>;\n"
250 " .reg .b64 %rd<7>;\n"
252 " ld.param.u32 %r4, [Subsample_Nearest_ushort_param_3];\n"
253 " ld.param.u32 %r3, [Subsample_Nearest_ushort_param_2];\n"
254 " // begin inline asm\n"
255 " mov.u32 %r8, %ctaid.x;\n"
256 " // end inline asm\n"
257 " // begin inline asm\n"
258 " mov.u32 %r9, %ctaid.y;\n"
259 " // end inline asm\n"
260 " // begin inline asm\n"
261 " mov.u32 %r10, %ntid.x;\n"
262 " // end inline asm\n"
263 " // begin inline asm\n"
264 " mov.u32 %r11, %ntid.y;\n"
265 " // end inline asm\n"
266 " // begin inline asm\n"
267 " mov.u32 %r12, %tid.x;\n"
268 " // end inline asm\n"
269 " // begin inline asm\n"
270 " mov.u32 %r13, %tid.y;\n"
271 " // end inline asm\n"
272 " mad.lo.s32 %r1, %r10, %r8, %r12;\n"
273 " mad.lo.s32 %r2, %r11, %r9, %r13;\n"
274 " setp.ge.s32 %p1, %r2, %r4;\n"
275 " setp.ge.s32 %p2, %r1, %r3;\n"
276 " or.pred %p3, %p2, %p1;\n"
277 " @%p3 bra LBB3_2;\n"
278 " ld.param.u32 %r7, [Subsample_Nearest_ushort_param_6];\n"
279 " ld.param.u32 %r6, [Subsample_Nearest_ushort_param_5];\n"
280 " ld.param.u32 %r5, [Subsample_Nearest_ushort_param_4];\n"
281 " ld.param.u64 %rd4, [Subsample_Nearest_ushort_param_0];\n"
282 " ld.param.u64 %rd3, [Subsample_Nearest_ushort_param_1];\n"
283 " cvta.to.global.u64 %rd1, %rd3;\n"
284 " cvt.rn.f32.s32 %f3, %r6;\n"
285 " cvt.rn.f32.s32 %f4, %r3;\n"
286 " div.rn.f32 %f5, %f3, %f4;\n"
287 " cvt.rn.f32.s32 %f6, %r7;\n"
288 " cvt.rn.f32.s32 %f7, %r4;\n"
289 " div.rn.f32 %f8, %f6, %f7;\n"
290 " cvt.rn.f32.s32 %f9, %r1;\n"
291 " add.f32 %f10, %f9, 0f3F000000;\n"
292 " mul.f32 %f1, %f5, %f10;\n"
293 " cvt.rn.f32.s32 %f11, %r2;\n"
294 " add.f32 %f12, %f11, 0f3F000000;\n"
295 " mul.f32 %f2, %f8, %f12;\n"
296 " // begin inline asm\n"
297 " tex.2d.v4.u32.f32 {%r14, %r15, %r16, %r17}, [%rd4, {%f1, %f2}];\n"
298 " // end inline asm\n"
299 " mad.lo.s32 %r18, %r2, %r5, %r1;\n"
300 " mul.wide.s32 %rd5, %r18, 2;\n"
301 " add.s64 %rd6, %rd1, %rd5;\n"
302 " st.global.u16 [%rd6], %r14;\n"
307 " // .globl Subsample_Nearest_ushort2\n"
308 ".visible .entry Subsample_Nearest_ushort2(\n"
309 " .param .u64 Subsample_Nearest_ushort2_param_0,\n"
310 " .param .u64 Subsample_Nearest_ushort2_param_1,\n"
311 " .param .u32 Subsample_Nearest_ushort2_param_2,\n"
312 " .param .u32 Subsample_Nearest_ushort2_param_3,\n"
313 " .param .u32 Subsample_Nearest_ushort2_param_4,\n"
314 " .param .u32 Subsample_Nearest_ushort2_param_5,\n"
315 " .param .u32 Subsample_Nearest_ushort2_param_6,\n"
316 " .param .u32 Subsample_Nearest_ushort2_param_7\n"
319 " .reg .pred %p<4>;\n"
320 " .reg .b16 %rs<3>;\n"
321 " .reg .f32 %f<13>;\n"
322 " .reg .b32 %r<19>;\n"
323 " .reg .b64 %rd<7>;\n"
325 " ld.param.u32 %r4, [Subsample_Nearest_ushort2_param_3];\n"
326 " ld.param.u32 %r3, [Subsample_Nearest_ushort2_param_2];\n"
327 " // begin inline asm\n"
328 " mov.u32 %r8, %ctaid.x;\n"
329 " // end inline asm\n"
330 " // begin inline asm\n"
331 " mov.u32 %r9, %ctaid.y;\n"
332 " // end inline asm\n"
333 " // begin inline asm\n"
334 " mov.u32 %r10, %ntid.x;\n"
335 " // end inline asm\n"
336 " // begin inline asm\n"
337 " mov.u32 %r11, %ntid.y;\n"
338 " // end inline asm\n"
339 " // begin inline asm\n"
340 " mov.u32 %r12, %tid.x;\n"
341 " // end inline asm\n"
342 " // begin inline asm\n"
343 " mov.u32 %r13, %tid.y;\n"
344 " // end inline asm\n"
345 " mad.lo.s32 %r1, %r10, %r8, %r12;\n"
346 " mad.lo.s32 %r2, %r11, %r9, %r13;\n"
347 " setp.ge.s32 %p1, %r2, %r4;\n"
348 " setp.ge.s32 %p2, %r1, %r3;\n"
349 " or.pred %p3, %p2, %p1;\n"
350 " @%p3 bra LBB4_2;\n"
351 " ld.param.u32 %r7, [Subsample_Nearest_ushort2_param_6];\n"
352 " ld.param.u32 %r6, [Subsample_Nearest_ushort2_param_5];\n"
353 " ld.param.u32 %r5, [Subsample_Nearest_ushort2_param_4];\n"
354 " ld.param.u64 %rd4, [Subsample_Nearest_ushort2_param_0];\n"
355 " ld.param.u64 %rd3, [Subsample_Nearest_ushort2_param_1];\n"
356 " cvta.to.global.u64 %rd1, %rd3;\n"
357 " cvt.rn.f32.s32 %f3, %r6;\n"
358 " cvt.rn.f32.s32 %f4, %r3;\n"
359 " div.rn.f32 %f5, %f3, %f4;\n"
360 " cvt.rn.f32.s32 %f6, %r7;\n"
361 " cvt.rn.f32.s32 %f7, %r4;\n"
362 " div.rn.f32 %f8, %f6, %f7;\n"
363 " cvt.rn.f32.s32 %f9, %r1;\n"
364 " add.f32 %f10, %f9, 0f3F000000;\n"
365 " mul.f32 %f1, %f5, %f10;\n"
366 " cvt.rn.f32.s32 %f11, %r2;\n"
367 " add.f32 %f12, %f11, 0f3F000000;\n"
368 " mul.f32 %f2, %f8, %f12;\n"
369 " // begin inline asm\n"
370 " tex.2d.v4.u32.f32 {%r14, %r15, %r16, %r17}, [%rd4, {%f1, %f2}];\n"
371 " // end inline asm\n"
372 " cvt.u16.u32 %rs1, %r14;\n"
373 " cvt.u16.u32 %rs2, %r15;\n"
374 " mad.lo.s32 %r18, %r2, %r5, %r1;\n"
375 " mul.wide.s32 %rd5, %r18, 4;\n"
376 " add.s64 %rd6, %rd1, %rd5;\n"
377 " st.global.v2.u16 [%rd6], {%rs1, %rs2};\n"
382 " // .globl Subsample_Nearest_ushort4\n"
383 ".visible .entry Subsample_Nearest_ushort4(\n"
384 " .param .u64 Subsample_Nearest_ushort4_param_0,\n"
385 " .param .u64 Subsample_Nearest_ushort4_param_1,\n"
386 " .param .u32 Subsample_Nearest_ushort4_param_2,\n"
387 " .param .u32 Subsample_Nearest_ushort4_param_3,\n"
388 " .param .u32 Subsample_Nearest_ushort4_param_4,\n"
389 " .param .u32 Subsample_Nearest_ushort4_param_5,\n"
390 " .param .u32 Subsample_Nearest_ushort4_param_6,\n"
391 " .param .u32 Subsample_Nearest_ushort4_param_7\n"
394 " .reg .pred %p<4>;\n"
395 " .reg .b16 %rs<5>;\n"
396 " .reg .f32 %f<13>;\n"
397 " .reg .b32 %r<19>;\n"
398 " .reg .b64 %rd<7>;\n"
400 " ld.param.u32 %r4, [Subsample_Nearest_ushort4_param_3];\n"
401 " ld.param.u32 %r3, [Subsample_Nearest_ushort4_param_2];\n"
402 " // begin inline asm\n"
403 " mov.u32 %r8, %ctaid.x;\n"
404 " // end inline asm\n"
405 " // begin inline asm\n"
406 " mov.u32 %r9, %ctaid.y;\n"
407 " // end inline asm\n"
408 " // begin inline asm\n"
409 " mov.u32 %r10, %ntid.x;\n"
410 " // end inline asm\n"
411 " // begin inline asm\n"
412 " mov.u32 %r11, %ntid.y;\n"
413 " // end inline asm\n"
414 " // begin inline asm\n"
415 " mov.u32 %r12, %tid.x;\n"
416 " // end inline asm\n"
417 " // begin inline asm\n"
418 " mov.u32 %r13, %tid.y;\n"
419 " // end inline asm\n"
420 " mad.lo.s32 %r1, %r10, %r8, %r12;\n"
421 " mad.lo.s32 %r2, %r11, %r9, %r13;\n"
422 " setp.ge.s32 %p1, %r2, %r4;\n"
423 " setp.ge.s32 %p2, %r1, %r3;\n"
424 " or.pred %p3, %p2, %p1;\n"
425 " @%p3 bra LBB5_2;\n"
426 " ld.param.u32 %r7, [Subsample_Nearest_ushort4_param_6];\n"
427 " ld.param.u32 %r6, [Subsample_Nearest_ushort4_param_5];\n"
428 " ld.param.u32 %r5, [Subsample_Nearest_ushort4_param_4];\n"
429 " ld.param.u64 %rd4, [Subsample_Nearest_ushort4_param_0];\n"
430 " ld.param.u64 %rd3, [Subsample_Nearest_ushort4_param_1];\n"
431 " cvta.to.global.u64 %rd1, %rd3;\n"
432 " cvt.rn.f32.s32 %f3, %r6;\n"
433 " cvt.rn.f32.s32 %f4, %r3;\n"
434 " div.rn.f32 %f5, %f3, %f4;\n"
435 " cvt.rn.f32.s32 %f6, %r7;\n"
436 " cvt.rn.f32.s32 %f7, %r4;\n"
437 " div.rn.f32 %f8, %f6, %f7;\n"
438 " cvt.rn.f32.s32 %f9, %r1;\n"
439 " add.f32 %f10, %f9, 0f3F000000;\n"
440 " mul.f32 %f1, %f5, %f10;\n"
441 " cvt.rn.f32.s32 %f11, %r2;\n"
442 " add.f32 %f12, %f11, 0f3F000000;\n"
443 " mul.f32 %f2, %f8, %f12;\n"
444 " // begin inline asm\n"
445 " tex.2d.v4.u32.f32 {%r14, %r15, %r16, %r17}, [%rd4, {%f1, %f2}];\n"
446 " // end inline asm\n"
447 " cvt.u16.u32 %rs1, %r14;\n"
448 " cvt.u16.u32 %rs2, %r15;\n"
449 " cvt.u16.u32 %rs3, %r16;\n"
450 " cvt.u16.u32 %rs4, %r17;\n"
451 " mad.lo.s32 %r18, %r2, %r5, %r1;\n"
452 " mul.wide.s32 %rd5, %r18, 8;\n"
453 " add.s64 %rd6, %rd1, %rd5;\n"
454 " st.global.v4.u16 [%rd6], {%rs1, %rs2, %rs3, %rs4};\n"
459 " // .globl Subsample_Bilinear_uchar\n"
460 ".visible .entry Subsample_Bilinear_uchar(\n"
461 " .param .u64 Subsample_Bilinear_uchar_param_0,\n"
462 " .param .u64 Subsample_Bilinear_uchar_param_1,\n"
463 " .param .u32 Subsample_Bilinear_uchar_param_2,\n"
464 " .param .u32 Subsample_Bilinear_uchar_param_3,\n"
465 " .param .u32 Subsample_Bilinear_uchar_param_4,\n"
466 " .param .u32 Subsample_Bilinear_uchar_param_5,\n"
467 " .param .u32 Subsample_Bilinear_uchar_param_6,\n"
468 " .param .u32 Subsample_Bilinear_uchar_param_7\n"
471 " .reg .pred %p<4>;\n"
472 " .reg .f32 %f<33>;\n"
473 " .reg .b32 %r<40>;\n"
474 " .reg .b64 %rd<10>;\n"
476 " ld.param.u32 %r4, [Subsample_Bilinear_uchar_param_3];\n"
477 " ld.param.u32 %r3, [Subsample_Bilinear_uchar_param_2];\n"
478 " // begin inline asm\n"
479 " mov.u32 %r8, %ctaid.x;\n"
480 " // end inline asm\n"
481 " // begin inline asm\n"
482 " mov.u32 %r9, %ctaid.y;\n"
483 " // end inline asm\n"
484 " // begin inline asm\n"
485 " mov.u32 %r10, %ntid.x;\n"
486 " // end inline asm\n"
487 " // begin inline asm\n"
488 " mov.u32 %r11, %ntid.y;\n"
489 " // end inline asm\n"
490 " // begin inline asm\n"
491 " mov.u32 %r12, %tid.x;\n"
492 " // end inline asm\n"
493 " // begin inline asm\n"
494 " mov.u32 %r13, %tid.y;\n"
495 " // end inline asm\n"
496 " mad.lo.s32 %r1, %r10, %r8, %r12;\n"
497 " mad.lo.s32 %r2, %r11, %r9, %r13;\n"
498 " setp.ge.s32 %p1, %r2, %r4;\n"
499 " setp.ge.s32 %p2, %r1, %r3;\n"
500 " or.pred %p3, %p2, %p1;\n"
501 " @%p3 bra LBB6_2;\n"
502 " ld.param.u32 %r7, [Subsample_Bilinear_uchar_param_6];\n"
503 " ld.param.u32 %r6, [Subsample_Bilinear_uchar_param_5];\n"
504 " ld.param.u32 %r5, [Subsample_Bilinear_uchar_param_4];\n"
505 " ld.param.u64 %rd4, [Subsample_Bilinear_uchar_param_0];\n"
506 " ld.param.u64 %rd3, [Subsample_Bilinear_uchar_param_1];\n"
507 " cvta.to.global.u64 %rd1, %rd3;\n"
508 " cvt.rn.f32.s32 %f9, %r6;\n"
509 " cvt.rn.f32.s32 %f10, %r3;\n"
510 " div.rn.f32 %f11, %f9, %f10;\n"
511 " cvt.rn.f32.s32 %f12, %r7;\n"
512 " cvt.rn.f32.s32 %f13, %r4;\n"
513 " div.rn.f32 %f14, %f12, %f13;\n"
514 " cvt.rn.f32.s32 %f15, %r1;\n"
515 " add.f32 %f16, %f15, 0f3F000000;\n"
516 " cvt.rn.f32.s32 %f17, %r2;\n"
517 " add.f32 %f18, %f17, 0f3F000000;\n"
518 " add.f32 %f19, %f11, 0fBF800000;\n"
519 " mul.f32 %f20, %f19, 0f3F000000;\n"
520 " max.f32 %f21, %f20, 0f00000000;\n"
521 " min.f32 %f22, %f21, 0f3F800000;\n"
522 " add.f32 %f23, %f14, 0fBF800000;\n"
523 " mul.f32 %f24, %f23, 0f3F000000;\n"
524 " max.f32 %f25, %f24, 0f00000000;\n"
525 " min.f32 %f26, %f25, 0f3F800000;\n"
526 " add.f32 %f27, %f22, 0f3F000000;\n"
527 " div.rn.f32 %f28, %f22, %f27;\n"
528 " add.f32 %f29, %f26, 0f3F000000;\n"
529 " div.rn.f32 %f30, %f26, %f29;\n"
530 " neg.f32 %f31, %f28;\n"
531 " fma.rn.f32 %f5, %f11, %f16, %f31;\n"
532 " neg.f32 %f32, %f30;\n"
533 " fma.rn.f32 %f4, %f14, %f18, %f32;\n"
534 " // begin inline asm\n"
535 " tex.2d.v4.u32.f32 {%r14, %r15, %r16, %r17}, [%rd4, {%f5, %f4}];\n"
536 " // end inline asm\n"
537 " and.b32 %r30, %r14, 255;\n"
538 " fma.rn.f32 %f7, %f11, %f16, %f28;\n"
539 " // begin inline asm\n"
540 " tex.2d.v4.u32.f32 {%r18, %r19, %r20, %r21}, [%rd4, {%f7, %f4}];\n"
541 " // end inline asm\n"
542 " and.b32 %r31, %r18, 255;\n"
543 " add.s32 %r32, %r30, %r31;\n"
544 " fma.rn.f32 %f8, %f14, %f18, %f30;\n"
545 " // begin inline asm\n"
546 " tex.2d.v4.u32.f32 {%r22, %r23, %r24, %r25}, [%rd4, {%f5, %f8}];\n"
547 " // end inline asm\n"
548 " and.b32 %r33, %r22, 255;\n"
549 " add.s32 %r34, %r32, %r33;\n"
550 " // begin inline asm\n"
551 " tex.2d.v4.u32.f32 {%r26, %r27, %r28, %r29}, [%rd4, {%f7, %f8}];\n"
552 " // end inline asm\n"
553 " and.b32 %r35, %r26, 255;\n"
554 " add.s32 %r36, %r34, %r35;\n"
555 " add.s32 %r37, %r36, 2;\n"
556 " mad.lo.s32 %r38, %r2, %r5, %r1;\n"
557 " cvt.s64.s32 %rd8, %r38;\n"
558 " add.s64 %rd9, %rd1, %rd8;\n"
559 " shr.u32 %r39, %r37, 2;\n"
560 " st.global.u8 [%rd9], %r39;\n"
565 " // .globl Subsample_Bilinear_uchar2\n"
566 ".visible .entry Subsample_Bilinear_uchar2(\n"
567 " .param .u64 Subsample_Bilinear_uchar2_param_0,\n"
568 " .param .u64 Subsample_Bilinear_uchar2_param_1,\n"
569 " .param .u32 Subsample_Bilinear_uchar2_param_2,\n"
570 " .param .u32 Subsample_Bilinear_uchar2_param_3,\n"
571 " .param .u32 Subsample_Bilinear_uchar2_param_4,\n"
572 " .param .u32 Subsample_Bilinear_uchar2_param_5,\n"
573 " .param .u32 Subsample_Bilinear_uchar2_param_6,\n"
574 " .param .u32 Subsample_Bilinear_uchar2_param_7\n"
577 " .reg .pred %p<4>;\n"
578 " .reg .b16 %rs<3>;\n"
579 " .reg .f32 %f<33>;\n"
580 " .reg .b32 %r<49>;\n"
581 " .reg .b64 %rd<10>;\n"
583 " ld.param.u32 %r4, [Subsample_Bilinear_uchar2_param_3];\n"
584 " ld.param.u32 %r3, [Subsample_Bilinear_uchar2_param_2];\n"
585 " // begin inline asm\n"
586 " mov.u32 %r8, %ctaid.x;\n"
587 " // end inline asm\n"
588 " // begin inline asm\n"
589 " mov.u32 %r9, %ctaid.y;\n"
590 " // end inline asm\n"
591 " // begin inline asm\n"
592 " mov.u32 %r10, %ntid.x;\n"
593 " // end inline asm\n"
594 " // begin inline asm\n"
595 " mov.u32 %r11, %ntid.y;\n"
596 " // end inline asm\n"
597 " // begin inline asm\n"
598 " mov.u32 %r12, %tid.x;\n"
599 " // end inline asm\n"
600 " // begin inline asm\n"
601 " mov.u32 %r13, %tid.y;\n"
602 " // end inline asm\n"
603 " mad.lo.s32 %r1, %r10, %r8, %r12;\n"
604 " mad.lo.s32 %r2, %r11, %r9, %r13;\n"
605 " setp.ge.s32 %p1, %r2, %r4;\n"
606 " setp.ge.s32 %p2, %r1, %r3;\n"
607 " or.pred %p3, %p2, %p1;\n"
608 " @%p3 bra LBB7_2;\n"
609 " ld.param.u32 %r7, [Subsample_Bilinear_uchar2_param_6];\n"
610 " ld.param.u32 %r6, [Subsample_Bilinear_uchar2_param_5];\n"
611 " ld.param.u32 %r5, [Subsample_Bilinear_uchar2_param_4];\n"
612 " ld.param.u64 %rd4, [Subsample_Bilinear_uchar2_param_0];\n"
613 " ld.param.u64 %rd3, [Subsample_Bilinear_uchar2_param_1];\n"
614 " cvta.to.global.u64 %rd1, %rd3;\n"
615 " cvt.rn.f32.s32 %f9, %r6;\n"
616 " cvt.rn.f32.s32 %f10, %r3;\n"
617 " div.rn.f32 %f11, %f9, %f10;\n"
618 " cvt.rn.f32.s32 %f12, %r7;\n"
619 " cvt.rn.f32.s32 %f13, %r4;\n"
620 " div.rn.f32 %f14, %f12, %f13;\n"
621 " cvt.rn.f32.s32 %f15, %r1;\n"
622 " add.f32 %f16, %f15, 0f3F000000;\n"
623 " cvt.rn.f32.s32 %f17, %r2;\n"
624 " add.f32 %f18, %f17, 0f3F000000;\n"
625 " add.f32 %f19, %f11, 0fBF800000;\n"
626 " mul.f32 %f20, %f19, 0f3F000000;\n"
627 " max.f32 %f21, %f20, 0f00000000;\n"
628 " min.f32 %f22, %f21, 0f3F800000;\n"
629 " add.f32 %f23, %f14, 0fBF800000;\n"
630 " mul.f32 %f24, %f23, 0f3F000000;\n"
631 " max.f32 %f25, %f24, 0f00000000;\n"
632 " min.f32 %f26, %f25, 0f3F800000;\n"
633 " add.f32 %f27, %f22, 0f3F000000;\n"
634 " div.rn.f32 %f28, %f22, %f27;\n"
635 " add.f32 %f29, %f26, 0f3F000000;\n"
636 " div.rn.f32 %f30, %f26, %f29;\n"
637 " neg.f32 %f31, %f28;\n"
638 " fma.rn.f32 %f5, %f11, %f16, %f31;\n"
639 " neg.f32 %f32, %f30;\n"
640 " fma.rn.f32 %f4, %f14, %f18, %f32;\n"
641 " // begin inline asm\n"
642 " tex.2d.v4.u32.f32 {%r14, %r15, %r16, %r17}, [%rd4, {%f5, %f4}];\n"
643 " // end inline asm\n"
644 " and.b32 %r30, %r14, 255;\n"
645 " fma.rn.f32 %f7, %f11, %f16, %f28;\n"
646 " // begin inline asm\n"
647 " tex.2d.v4.u32.f32 {%r18, %r19, %r20, %r21}, [%rd4, {%f7, %f4}];\n"
648 " // end inline asm\n"
649 " and.b32 %r31, %r18, 255;\n"
650 " add.s32 %r32, %r30, %r31;\n"
651 " and.b32 %r33, %r15, 255;\n"
652 " and.b32 %r34, %r19, 255;\n"
653 " add.s32 %r35, %r33, %r34;\n"
654 " fma.rn.f32 %f8, %f14, %f18, %f30;\n"
655 " // begin inline asm\n"
656 " tex.2d.v4.u32.f32 {%r22, %r23, %r24, %r25}, [%rd4, {%f5, %f8}];\n"
657 " // end inline asm\n"
658 " and.b32 %r36, %r22, 255;\n"
659 " add.s32 %r37, %r32, %r36;\n"
660 " and.b32 %r38, %r23, 255;\n"
661 " add.s32 %r39, %r35, %r38;\n"
662 " // begin inline asm\n"
663 " tex.2d.v4.u32.f32 {%r26, %r27, %r28, %r29}, [%rd4, {%f7, %f8}];\n"
664 " // end inline asm\n"
665 " and.b32 %r40, %r26, 255;\n"
666 " add.s32 %r41, %r37, %r40;\n"
667 " and.b32 %r42, %r27, 255;\n"
668 " add.s32 %r43, %r39, %r42;\n"
669 " add.s32 %r44, %r41, 2;\n"
670 " add.s32 %r45, %r43, 2;\n"
671 " mad.lo.s32 %r46, %r2, %r5, %r1;\n"
672 " shr.u32 %r47, %r44, 2;\n"
673 " shr.u32 %r48, %r45, 2;\n"
674 " cvt.u16.u32 %rs1, %r47;\n"
675 " mul.wide.s32 %rd8, %r46, 2;\n"
676 " add.s64 %rd9, %rd1, %rd8;\n"
677 " cvt.u16.u32 %rs2, %r48;\n"
678 " st.global.v2.u8 [%rd9], {%rs1, %rs2};\n"
683 " // .globl Subsample_Bilinear_uchar4\n"
684 ".visible .entry Subsample_Bilinear_uchar4(\n"
685 " .param .u64 Subsample_Bilinear_uchar4_param_0,\n"
686 " .param .u64 Subsample_Bilinear_uchar4_param_1,\n"
687 " .param .u32 Subsample_Bilinear_uchar4_param_2,\n"
688 " .param .u32 Subsample_Bilinear_uchar4_param_3,\n"
689 " .param .u32 Subsample_Bilinear_uchar4_param_4,\n"
690 " .param .u32 Subsample_Bilinear_uchar4_param_5,\n"
691 " .param .u32 Subsample_Bilinear_uchar4_param_6,\n"
692 " .param .u32 Subsample_Bilinear_uchar4_param_7\n"
695 " .reg .pred %p<4>;\n"
696 " .reg .b16 %rs<5>;\n"
697 " .reg .f32 %f<33>;\n"
698 " .reg .b32 %r<67>;\n"
699 " .reg .b64 %rd<10>;\n"
701 " ld.param.u32 %r4, [Subsample_Bilinear_uchar4_param_3];\n"
702 " ld.param.u32 %r3, [Subsample_Bilinear_uchar4_param_2];\n"
703 " // begin inline asm\n"
704 " mov.u32 %r8, %ctaid.x;\n"
705 " // end inline asm\n"
706 " // begin inline asm\n"
707 " mov.u32 %r9, %ctaid.y;\n"
708 " // end inline asm\n"
709 " // begin inline asm\n"
710 " mov.u32 %r10, %ntid.x;\n"
711 " // end inline asm\n"
712 " // begin inline asm\n"
713 " mov.u32 %r11, %ntid.y;\n"
714 " // end inline asm\n"
715 " // begin inline asm\n"
716 " mov.u32 %r12, %tid.x;\n"
717 " // end inline asm\n"
718 " // begin inline asm\n"
719 " mov.u32 %r13, %tid.y;\n"
720 " // end inline asm\n"
721 " mad.lo.s32 %r1, %r10, %r8, %r12;\n"
722 " mad.lo.s32 %r2, %r11, %r9, %r13;\n"
723 " setp.ge.s32 %p1, %r2, %r4;\n"
724 " setp.ge.s32 %p2, %r1, %r3;\n"
725 " or.pred %p3, %p2, %p1;\n"
726 " @%p3 bra LBB8_2;\n"
727 " ld.param.u32 %r7, [Subsample_Bilinear_uchar4_param_6];\n"
728 " ld.param.u32 %r6, [Subsample_Bilinear_uchar4_param_5];\n"
729 " ld.param.u32 %r5, [Subsample_Bilinear_uchar4_param_4];\n"
730 " ld.param.u64 %rd4, [Subsample_Bilinear_uchar4_param_0];\n"
731 " ld.param.u64 %rd3, [Subsample_Bilinear_uchar4_param_1];\n"
732 " cvta.to.global.u64 %rd1, %rd3;\n"
733 " cvt.rn.f32.s32 %f9, %r6;\n"
734 " cvt.rn.f32.s32 %f10, %r3;\n"
735 " div.rn.f32 %f11, %f9, %f10;\n"
736 " cvt.rn.f32.s32 %f12, %r7;\n"
737 " cvt.rn.f32.s32 %f13, %r4;\n"
738 " div.rn.f32 %f14, %f12, %f13;\n"
739 " cvt.rn.f32.s32 %f15, %r1;\n"
740 " add.f32 %f16, %f15, 0f3F000000;\n"
741 " cvt.rn.f32.s32 %f17, %r2;\n"
742 " add.f32 %f18, %f17, 0f3F000000;\n"
743 " add.f32 %f19, %f11, 0fBF800000;\n"
744 " mul.f32 %f20, %f19, 0f3F000000;\n"
745 " max.f32 %f21, %f20, 0f00000000;\n"
746 " min.f32 %f22, %f21, 0f3F800000;\n"
747 " add.f32 %f23, %f14, 0fBF800000;\n"
748 " mul.f32 %f24, %f23, 0f3F000000;\n"
749 " max.f32 %f25, %f24, 0f00000000;\n"
750 " min.f32 %f26, %f25, 0f3F800000;\n"
751 " add.f32 %f27, %f22, 0f3F000000;\n"
752 " div.rn.f32 %f28, %f22, %f27;\n"
753 " add.f32 %f29, %f26, 0f3F000000;\n"
754 " div.rn.f32 %f30, %f26, %f29;\n"
755 " neg.f32 %f31, %f28;\n"
756 " fma.rn.f32 %f5, %f11, %f16, %f31;\n"
757 " neg.f32 %f32, %f30;\n"
758 " fma.rn.f32 %f4, %f14, %f18, %f32;\n"
759 " // begin inline asm\n"
760 " tex.2d.v4.u32.f32 {%r14, %r15, %r16, %r17}, [%rd4, {%f5, %f4}];\n"
761 " // end inline asm\n"
762 " and.b32 %r30, %r14, 255;\n"
763 " fma.rn.f32 %f7, %f11, %f16, %f28;\n"
764 " // begin inline asm\n"
765 " tex.2d.v4.u32.f32 {%r18, %r19, %r20, %r21}, [%rd4, {%f7, %f4}];\n"
766 " // end inline asm\n"
767 " and.b32 %r31, %r18, 255;\n"
768 " add.s32 %r32, %r30, %r31;\n"
769 " and.b32 %r33, %r15, 255;\n"
770 " and.b32 %r34, %r19, 255;\n"
771 " add.s32 %r35, %r33, %r34;\n"
772 " and.b32 %r36, %r16, 255;\n"
773 " and.b32 %r37, %r20, 255;\n"
774 " add.s32 %r38, %r36, %r37;\n"
775 " and.b32 %r39, %r17, 255;\n"
776 " and.b32 %r40, %r21, 255;\n"
777 " add.s32 %r41, %r39, %r40;\n"
778 " fma.rn.f32 %f8, %f14, %f18, %f30;\n"
779 " // begin inline asm\n"
780 " tex.2d.v4.u32.f32 {%r22, %r23, %r24, %r25}, [%rd4, {%f5, %f8}];\n"
781 " // end inline asm\n"
782 " and.b32 %r42, %r22, 255;\n"
783 " add.s32 %r43, %r32, %r42;\n"
784 " and.b32 %r44, %r23, 255;\n"
785 " add.s32 %r45, %r35, %r44;\n"
786 " and.b32 %r46, %r24, 255;\n"
787 " add.s32 %r47, %r38, %r46;\n"
788 " and.b32 %r48, %r25, 255;\n"
789 " add.s32 %r49, %r41, %r48;\n"
790 " // begin inline asm\n"
791 " tex.2d.v4.u32.f32 {%r26, %r27, %r28, %r29}, [%rd4, {%f7, %f8}];\n"
792 " // end inline asm\n"
793 " and.b32 %r50, %r26, 255;\n"
794 " add.s32 %r51, %r43, %r50;\n"
795 " and.b32 %r52, %r27, 255;\n"
796 " add.s32 %r53, %r45, %r52;\n"
797 " and.b32 %r54, %r28, 255;\n"
798 " add.s32 %r55, %r47, %r54;\n"
799 " and.b32 %r56, %r29, 255;\n"
800 " add.s32 %r57, %r49, %r56;\n"
801 " add.s32 %r58, %r51, 2;\n"
802 " add.s32 %r59, %r53, 2;\n"
803 " add.s32 %r60, %r55, 2;\n"
804 " add.s32 %r61, %r57, 2;\n"
805 " mad.lo.s32 %r62, %r2, %r5, %r1;\n"
806 " shr.u32 %r63, %r58, 2;\n"
807 " shr.u32 %r64, %r59, 2;\n"
808 " shr.u32 %r65, %r60, 2;\n"
809 " shr.u32 %r66, %r61, 2;\n"
810 " cvt.u16.u32 %rs1, %r63;\n"
811 " mul.wide.s32 %rd8, %r62, 4;\n"
812 " add.s64 %rd9, %rd1, %rd8;\n"
813 " cvt.u16.u32 %rs2, %r64;\n"
814 " cvt.u16.u32 %rs3, %r65;\n"
815 " cvt.u16.u32 %rs4, %r66;\n"
816 " st.global.v4.u8 [%rd9], {%rs1, %rs2, %rs3, %rs4};\n"
821 " // .globl Subsample_Bilinear_ushort\n"
822 ".visible .entry Subsample_Bilinear_ushort(\n"
823 " .param .u64 Subsample_Bilinear_ushort_param_0,\n"
824 " .param .u64 Subsample_Bilinear_ushort_param_1,\n"
825 " .param .u32 Subsample_Bilinear_ushort_param_2,\n"
826 " .param .u32 Subsample_Bilinear_ushort_param_3,\n"
827 " .param .u32 Subsample_Bilinear_ushort_param_4,\n"
828 " .param .u32 Subsample_Bilinear_ushort_param_5,\n"
829 " .param .u32 Subsample_Bilinear_ushort_param_6,\n"
830 " .param .u32 Subsample_Bilinear_ushort_param_7\n"
833 " .reg .pred %p<4>;\n"
834 " .reg .f32 %f<33>;\n"
835 " .reg .b32 %r<40>;\n"
836 " .reg .b64 %rd<10>;\n"
838 " ld.param.u32 %r4, [Subsample_Bilinear_ushort_param_3];\n"
839 " ld.param.u32 %r3, [Subsample_Bilinear_ushort_param_2];\n"
840 " // begin inline asm\n"
841 " mov.u32 %r8, %ctaid.x;\n"
842 " // end inline asm\n"
843 " // begin inline asm\n"
844 " mov.u32 %r9, %ctaid.y;\n"
845 " // end inline asm\n"
846 " // begin inline asm\n"
847 " mov.u32 %r10, %ntid.x;\n"
848 " // end inline asm\n"
849 " // begin inline asm\n"
850 " mov.u32 %r11, %ntid.y;\n"
851 " // end inline asm\n"
852 " // begin inline asm\n"
853 " mov.u32 %r12, %tid.x;\n"
854 " // end inline asm\n"
855 " // begin inline asm\n"
856 " mov.u32 %r13, %tid.y;\n"
857 " // end inline asm\n"
858 " mad.lo.s32 %r1, %r10, %r8, %r12;\n"
859 " mad.lo.s32 %r2, %r11, %r9, %r13;\n"
860 " setp.ge.s32 %p1, %r2, %r4;\n"
861 " setp.ge.s32 %p2, %r1, %r3;\n"
862 " or.pred %p3, %p2, %p1;\n"
863 " @%p3 bra LBB9_2;\n"
864 " ld.param.u32 %r7, [Subsample_Bilinear_ushort_param_6];\n"
865 " ld.param.u32 %r6, [Subsample_Bilinear_ushort_param_5];\n"
866 " ld.param.u32 %r5, [Subsample_Bilinear_ushort_param_4];\n"
867 " ld.param.u64 %rd4, [Subsample_Bilinear_ushort_param_0];\n"
868 " ld.param.u64 %rd3, [Subsample_Bilinear_ushort_param_1];\n"
869 " cvta.to.global.u64 %rd1, %rd3;\n"
870 " cvt.rn.f32.s32 %f9, %r6;\n"
871 " cvt.rn.f32.s32 %f10, %r3;\n"
872 " div.rn.f32 %f11, %f9, %f10;\n"
873 " cvt.rn.f32.s32 %f12, %r7;\n"
874 " cvt.rn.f32.s32 %f13, %r4;\n"
875 " div.rn.f32 %f14, %f12, %f13;\n"
876 " cvt.rn.f32.s32 %f15, %r1;\n"
877 " add.f32 %f16, %f15, 0f3F000000;\n"
878 " cvt.rn.f32.s32 %f17, %r2;\n"
879 " add.f32 %f18, %f17, 0f3F000000;\n"
880 " add.f32 %f19, %f11, 0fBF800000;\n"
881 " mul.f32 %f20, %f19, 0f3F000000;\n"
882 " max.f32 %f21, %f20, 0f00000000;\n"
883 " min.f32 %f22, %f21, 0f3F800000;\n"
884 " add.f32 %f23, %f14, 0fBF800000;\n"
885 " mul.f32 %f24, %f23, 0f3F000000;\n"
886 " max.f32 %f25, %f24, 0f00000000;\n"
887 " min.f32 %f26, %f25, 0f3F800000;\n"
888 " add.f32 %f27, %f22, 0f3F000000;\n"
889 " div.rn.f32 %f28, %f22, %f27;\n"
890 " add.f32 %f29, %f26, 0f3F000000;\n"
891 " div.rn.f32 %f30, %f26, %f29;\n"
892 " neg.f32 %f31, %f28;\n"
893 " fma.rn.f32 %f5, %f11, %f16, %f31;\n"
894 " neg.f32 %f32, %f30;\n"
895 " fma.rn.f32 %f4, %f14, %f18, %f32;\n"
896 " // begin inline asm\n"
897 " tex.2d.v4.u32.f32 {%r14, %r15, %r16, %r17}, [%rd4, {%f5, %f4}];\n"
898 " // end inline asm\n"
899 " and.b32 %r30, %r14, 65535;\n"
900 " fma.rn.f32 %f7, %f11, %f16, %f28;\n"
901 " // begin inline asm\n"
902 " tex.2d.v4.u32.f32 {%r18, %r19, %r20, %r21}, [%rd4, {%f7, %f4}];\n"
903 " // end inline asm\n"
904 " and.b32 %r31, %r18, 65535;\n"
905 " add.s32 %r32, %r30, %r31;\n"
906 " fma.rn.f32 %f8, %f14, %f18, %f30;\n"
907 " // begin inline asm\n"
908 " tex.2d.v4.u32.f32 {%r22, %r23, %r24, %r25}, [%rd4, {%f5, %f8}];\n"
909 " // end inline asm\n"
910 " and.b32 %r33, %r22, 65535;\n"
911 " add.s32 %r34, %r32, %r33;\n"
912 " // begin inline asm\n"
913 " tex.2d.v4.u32.f32 {%r26, %r27, %r28, %r29}, [%rd4, {%f7, %f8}];\n"
914 " // end inline asm\n"
915 " and.b32 %r35, %r26, 65535;\n"
916 " add.s32 %r36, %r34, %r35;\n"
917 " add.s32 %r37, %r36, 2;\n"
918 " mad.lo.s32 %r38, %r2, %r5, %r1;\n"
919 " mul.wide.s32 %rd8, %r38, 2;\n"
920 " add.s64 %rd9, %rd1, %rd8;\n"
921 " shr.u32 %r39, %r37, 2;\n"
922 " st.global.u16 [%rd9], %r39;\n"
927 " // .globl Subsample_Bilinear_ushort2\n"
928 ".visible .entry Subsample_Bilinear_ushort2(\n"
929 " .param .u64 Subsample_Bilinear_ushort2_param_0,\n"
930 " .param .u64 Subsample_Bilinear_ushort2_param_1,\n"
931 " .param .u32 Subsample_Bilinear_ushort2_param_2,\n"
932 " .param .u32 Subsample_Bilinear_ushort2_param_3,\n"
933 " .param .u32 Subsample_Bilinear_ushort2_param_4,\n"
934 " .param .u32 Subsample_Bilinear_ushort2_param_5,\n"
935 " .param .u32 Subsample_Bilinear_ushort2_param_6,\n"
936 " .param .u32 Subsample_Bilinear_ushort2_param_7\n"
939 " .reg .pred %p<4>;\n"
940 " .reg .b16 %rs<3>;\n"
941 " .reg .f32 %f<33>;\n"
942 " .reg .b32 %r<49>;\n"
943 " .reg .b64 %rd<10>;\n"
945 " ld.param.u32 %r4, [Subsample_Bilinear_ushort2_param_3];\n"
946 " ld.param.u32 %r3, [Subsample_Bilinear_ushort2_param_2];\n"
947 " // begin inline asm\n"
948 " mov.u32 %r8, %ctaid.x;\n"
949 " // end inline asm\n"
950 " // begin inline asm\n"
951 " mov.u32 %r9, %ctaid.y;\n"
952 " // end inline asm\n"
953 " // begin inline asm\n"
954 " mov.u32 %r10, %ntid.x;\n"
955 " // end inline asm\n"
956 " // begin inline asm\n"
957 " mov.u32 %r11, %ntid.y;\n"
958 " // end inline asm\n"
959 " // begin inline asm\n"
960 " mov.u32 %r12, %tid.x;\n"
961 " // end inline asm\n"
962 " // begin inline asm\n"
963 " mov.u32 %r13, %tid.y;\n"
964 " // end inline asm\n"
965 " mad.lo.s32 %r1, %r10, %r8, %r12;\n"
966 " mad.lo.s32 %r2, %r11, %r9, %r13;\n"
967 " setp.ge.s32 %p1, %r2, %r4;\n"
968 " setp.ge.s32 %p2, %r1, %r3;\n"
969 " or.pred %p3, %p2, %p1;\n"
970 " @%p3 bra LBB10_2;\n"
971 " ld.param.u32 %r7, [Subsample_Bilinear_ushort2_param_6];\n"
972 " ld.param.u32 %r6, [Subsample_Bilinear_ushort2_param_5];\n"
973 " ld.param.u32 %r5, [Subsample_Bilinear_ushort2_param_4];\n"
974 " ld.param.u64 %rd4, [Subsample_Bilinear_ushort2_param_0];\n"
975 " ld.param.u64 %rd3, [Subsample_Bilinear_ushort2_param_1];\n"
976 " cvta.to.global.u64 %rd1, %rd3;\n"
977 " cvt.rn.f32.s32 %f9, %r6;\n"
978 " cvt.rn.f32.s32 %f10, %r3;\n"
979 " div.rn.f32 %f11, %f9, %f10;\n"
980 " cvt.rn.f32.s32 %f12, %r7;\n"
981 " cvt.rn.f32.s32 %f13, %r4;\n"
982 " div.rn.f32 %f14, %f12, %f13;\n"
983 " cvt.rn.f32.s32 %f15, %r1;\n"
984 " add.f32 %f16, %f15, 0f3F000000;\n"
985 " cvt.rn.f32.s32 %f17, %r2;\n"
986 " add.f32 %f18, %f17, 0f3F000000;\n"
987 " add.f32 %f19, %f11, 0fBF800000;\n"
988 " mul.f32 %f20, %f19, 0f3F000000;\n"
989 " max.f32 %f21, %f20, 0f00000000;\n"
990 " min.f32 %f22, %f21, 0f3F800000;\n"
991 " add.f32 %f23, %f14, 0fBF800000;\n"
992 " mul.f32 %f24, %f23, 0f3F000000;\n"
993 " max.f32 %f25, %f24, 0f00000000;\n"
994 " min.f32 %f26, %f25, 0f3F800000;\n"
995 " add.f32 %f27, %f22, 0f3F000000;\n"
996 " div.rn.f32 %f28, %f22, %f27;\n"
997 " add.f32 %f29, %f26, 0f3F000000;\n"
998 " div.rn.f32 %f30, %f26, %f29;\n"
999 " neg.f32 %f31, %f28;\n"
1000 " fma.rn.f32 %f5, %f11, %f16, %f31;\n"
1001 " neg.f32 %f32, %f30;\n"
1002 " fma.rn.f32 %f4, %f14, %f18, %f32;\n"
1003 " // begin inline asm\n"
1004 " tex.2d.v4.u32.f32 {%r14, %r15, %r16, %r17}, [%rd4, {%f5, %f4}];\n"
1005 " // end inline asm\n"
1006 " and.b32 %r30, %r14, 65535;\n"
1007 " fma.rn.f32 %f7, %f11, %f16, %f28;\n"
1008 " // begin inline asm\n"
1009 " tex.2d.v4.u32.f32 {%r18, %r19, %r20, %r21}, [%rd4, {%f7, %f4}];\n"
1010 " // end inline asm\n"
1011 " and.b32 %r31, %r18, 65535;\n"
1012 " add.s32 %r32, %r30, %r31;\n"
1013 " and.b32 %r33, %r15, 65535;\n"
1014 " and.b32 %r34, %r19, 65535;\n"
1015 " add.s32 %r35, %r33, %r34;\n"
1016 " fma.rn.f32 %f8, %f14, %f18, %f30;\n"
1017 " // begin inline asm\n"
1018 " tex.2d.v4.u32.f32 {%r22, %r23, %r24, %r25}, [%rd4, {%f5, %f8}];\n"
1019 " // end inline asm\n"
1020 " and.b32 %r36, %r22, 65535;\n"
1021 " add.s32 %r37, %r32, %r36;\n"
1022 " and.b32 %r38, %r23, 65535;\n"
1023 " add.s32 %r39, %r35, %r38;\n"
1024 " // begin inline asm\n"
1025 " tex.2d.v4.u32.f32 {%r26, %r27, %r28, %r29}, [%rd4, {%f7, %f8}];\n"
1026 " // end inline asm\n"
1027 " and.b32 %r40, %r26, 65535;\n"
1028 " add.s32 %r41, %r37, %r40;\n"
1029 " and.b32 %r42, %r27, 65535;\n"
1030 " add.s32 %r43, %r39, %r42;\n"
1031 " add.s32 %r44, %r41, 2;\n"
1032 " add.s32 %r45, %r43, 2;\n"
1033 " mad.lo.s32 %r46, %r2, %r5, %r1;\n"
1034 " shr.u32 %r47, %r44, 2;\n"
1035 " shr.u32 %r48, %r45, 2;\n"
1036 " cvt.u16.u32 %rs1, %r47;\n"
1037 " mul.wide.s32 %rd8, %r46, 4;\n"
1038 " add.s64 %rd9, %rd1, %rd8;\n"
1039 " cvt.u16.u32 %rs2, %r48;\n"
1040 " st.global.v2.u16 [%rd9], {%rs1, %rs2};\n"
1045 " // .globl Subsample_Bilinear_ushort4\n"
1046 ".visible .entry Subsample_Bilinear_ushort4(\n"
1047 " .param .u64 Subsample_Bilinear_ushort4_param_0,\n"
1048 " .param .u64 Subsample_Bilinear_ushort4_param_1,\n"
1049 " .param .u32 Subsample_Bilinear_ushort4_param_2,\n"
1050 " .param .u32 Subsample_Bilinear_ushort4_param_3,\n"
1051 " .param .u32 Subsample_Bilinear_ushort4_param_4,\n"
1052 " .param .u32 Subsample_Bilinear_ushort4_param_5,\n"
1053 " .param .u32 Subsample_Bilinear_ushort4_param_6,\n"
1054 " .param .u32 Subsample_Bilinear_ushort4_param_7\n"
1057 " .reg .pred %p<4>;\n"
1058 " .reg .b16 %rs<5>;\n"
1059 " .reg .f32 %f<33>;\n"
1060 " .reg .b32 %r<67>;\n"
1061 " .reg .b64 %rd<10>;\n"
1063 " ld.param.u32 %r4, [Subsample_Bilinear_ushort4_param_3];\n"
1064 " ld.param.u32 %r3, [Subsample_Bilinear_ushort4_param_2];\n"
1065 " // begin inline asm\n"
1066 " mov.u32 %r8, %ctaid.x;\n"
1067 " // end inline asm\n"
1068 " // begin inline asm\n"
1069 " mov.u32 %r9, %ctaid.y;\n"
1070 " // end inline asm\n"
1071 " // begin inline asm\n"
1072 " mov.u32 %r10, %ntid.x;\n"
1073 " // end inline asm\n"
1074 " // begin inline asm\n"
1075 " mov.u32 %r11, %ntid.y;\n"
1076 " // end inline asm\n"
1077 " // begin inline asm\n"
1078 " mov.u32 %r12, %tid.x;\n"
1079 " // end inline asm\n"
1080 " // begin inline asm\n"
1081 " mov.u32 %r13, %tid.y;\n"
1082 " // end inline asm\n"
1083 " mad.lo.s32 %r1, %r10, %r8, %r12;\n"
1084 " mad.lo.s32 %r2, %r11, %r9, %r13;\n"
1085 " setp.ge.s32 %p1, %r2, %r4;\n"
1086 " setp.ge.s32 %p2, %r1, %r3;\n"
1087 " or.pred %p3, %p2, %p1;\n"
1088 " @%p3 bra LBB11_2;\n"
1089 " ld.param.u32 %r7, [Subsample_Bilinear_ushort4_param_6];\n"
1090 " ld.param.u32 %r6, [Subsample_Bilinear_ushort4_param_5];\n"
1091 " ld.param.u32 %r5, [Subsample_Bilinear_ushort4_param_4];\n"
1092 " ld.param.u64 %rd4, [Subsample_Bilinear_ushort4_param_0];\n"
1093 " ld.param.u64 %rd3, [Subsample_Bilinear_ushort4_param_1];\n"
1094 " cvta.to.global.u64 %rd1, %rd3;\n"
1095 " cvt.rn.f32.s32 %f9, %r6;\n"
1096 " cvt.rn.f32.s32 %f10, %r3;\n"
1097 " div.rn.f32 %f11, %f9, %f10;\n"
1098 " cvt.rn.f32.s32 %f12, %r7;\n"
1099 " cvt.rn.f32.s32 %f13, %r4;\n"
1100 " div.rn.f32 %f14, %f12, %f13;\n"
1101 " cvt.rn.f32.s32 %f15, %r1;\n"
1102 " add.f32 %f16, %f15, 0f3F000000;\n"
1103 " cvt.rn.f32.s32 %f17, %r2;\n"
1104 " add.f32 %f18, %f17, 0f3F000000;\n"
1105 " add.f32 %f19, %f11, 0fBF800000;\n"
1106 " mul.f32 %f20, %f19, 0f3F000000;\n"
1107 " max.f32 %f21, %f20, 0f00000000;\n"
1108 " min.f32 %f22, %f21, 0f3F800000;\n"
1109 " add.f32 %f23, %f14, 0fBF800000;\n"
1110 " mul.f32 %f24, %f23, 0f3F000000;\n"
1111 " max.f32 %f25, %f24, 0f00000000;\n"
1112 " min.f32 %f26, %f25, 0f3F800000;\n"
1113 " add.f32 %f27, %f22, 0f3F000000;\n"
1114 " div.rn.f32 %f28, %f22, %f27;\n"
1115 " add.f32 %f29, %f26, 0f3F000000;\n"
1116 " div.rn.f32 %f30, %f26, %f29;\n"
1117 " neg.f32 %f31, %f28;\n"
1118 " fma.rn.f32 %f5, %f11, %f16, %f31;\n"
1119 " neg.f32 %f32, %f30;\n"
1120 " fma.rn.f32 %f4, %f14, %f18, %f32;\n"
1121 " // begin inline asm\n"
1122 " tex.2d.v4.u32.f32 {%r14, %r15, %r16, %r17}, [%rd4, {%f5, %f4}];\n"
1123 " // end inline asm\n"
1124 " and.b32 %r30, %r14, 65535;\n"
1125 " fma.rn.f32 %f7, %f11, %f16, %f28;\n"
1126 " // begin inline asm\n"
1127 " tex.2d.v4.u32.f32 {%r18, %r19, %r20, %r21}, [%rd4, {%f7, %f4}];\n"
1128 " // end inline asm\n"
1129 " and.b32 %r31, %r18, 65535;\n"
1130 " add.s32 %r32, %r30, %r31;\n"
1131 " and.b32 %r33, %r15, 65535;\n"
1132 " and.b32 %r34, %r19, 65535;\n"
1133 " add.s32 %r35, %r33, %r34;\n"
1134 " and.b32 %r36, %r16, 65535;\n"
1135 " and.b32 %r37, %r20, 65535;\n"
1136 " add.s32 %r38, %r36, %r37;\n"
1137 " and.b32 %r39, %r17, 65535;\n"
1138 " and.b32 %r40, %r21, 65535;\n"
1139 " add.s32 %r41, %r39, %r40;\n"
1140 " fma.rn.f32 %f8, %f14, %f18, %f30;\n"
1141 " // begin inline asm\n"
1142 " tex.2d.v4.u32.f32 {%r22, %r23, %r24, %r25}, [%rd4, {%f5, %f8}];\n"
1143 " // end inline asm\n"
1144 " and.b32 %r42, %r22, 65535;\n"
1145 " add.s32 %r43, %r32, %r42;\n"
1146 " and.b32 %r44, %r23, 65535;\n"
1147 " add.s32 %r45, %r35, %r44;\n"
1148 " and.b32 %r46, %r24, 65535;\n"
1149 " add.s32 %r47, %r38, %r46;\n"
1150 " and.b32 %r48, %r25, 65535;\n"
1151 " add.s32 %r49, %r41, %r48;\n"
1152 " // begin inline asm\n"
1153 " tex.2d.v4.u32.f32 {%r26, %r27, %r28, %r29}, [%rd4, {%f7, %f8}];\n"
1154 " // end inline asm\n"
1155 " and.b32 %r50, %r26, 65535;\n"
1156 " add.s32 %r51, %r43, %r50;\n"
1157 " and.b32 %r52, %r27, 65535;\n"
1158 " add.s32 %r53, %r45, %r52;\n"
1159 " and.b32 %r54, %r28, 65535;\n"
1160 " add.s32 %r55, %r47, %r54;\n"
1161 " and.b32 %r56, %r29, 65535;\n"
1162 " add.s32 %r57, %r49, %r56;\n"
1163 " add.s32 %r58, %r51, 2;\n"
1164 " add.s32 %r59, %r53, 2;\n"
1165 " add.s32 %r60, %r55, 2;\n"
1166 " add.s32 %r61, %r57, 2;\n"
1167 " mad.lo.s32 %r62, %r2, %r5, %r1;\n"
1168 " shr.u32 %r63, %r58, 2;\n"
1169 " shr.u32 %r64, %r59, 2;\n"
1170 " shr.u32 %r65, %r60, 2;\n"
1171 " shr.u32 %r66, %r61, 2;\n"
1172 " cvt.u16.u32 %rs1, %r63;\n"
1173 " mul.wide.s32 %rd8, %r62, 8;\n"
1174 " add.s64 %rd9, %rd1, %rd8;\n"
1175 " cvt.u16.u32 %rs2, %r64;\n"
1176 " cvt.u16.u32 %rs3, %r65;\n"
1177 " cvt.u16.u32 %rs4, %r66;\n"
1178 " st.global.v4.u16 [%rd9], {%rs1, %rs2, %rs3, %rs4};\n"
const char vf_scale_cuda_ptx[]