I0624 11:55:56.979715 139899452486720 xla_bridge.py:568] Unable to initialize backend 'rocm': NOT_FOUND: Could not find registered platform with name: "rocm". Available platform names are: Interpreter CUDA I0624 11:55:56.979871 139899452486720 xla_bridge.py:568] Unable to initialize backend 'tpu': module 'jaxlib.xla_extension' has no attribute 'get_tpu_client' (4, 8) Traceback (most recent call last): File "/media/adam/shared_drive/PycharmProjects/triton_test/pallas_test.py", line 231, in app.run(main) File "/home/adam/anaconda3/envs/triton_test/lib/python3.10/site-packages/absl/app.py", line 308, in run _run_main(main, args) File "/home/adam/anaconda3/envs/triton_test/lib/python3.10/site-packages/absl/app.py", line 254, in _run_main sys.exit(main(argv)) File "/media/adam/shared_drive/PycharmProjects/triton_test/pallas_test.py", line 226, in main print(pl.pallas_call(kernel1, out_shape=out_shape, grid=grid)(x, y)) File "/home/adam/anaconda3/envs/triton_test/lib/python3.10/site-packages/jax_triton/pallas/pallas_call.py", line 352, in wrapped out_flat = pallas_call_p.bind( jax._src.source_info_util.JaxStackTraceBeforeTransformation: TypeError: __init__(): incompatible constructor arguments. The following argument types are supported: 1. jaxlib.cuda._triton.TritonKernel(arg0: str, arg1: str, arg2: int, arg3: int) Invoked with: 'add', 4, 0, '//\n// Generated by LLVM NVPTX Back-End\n//\n\n.version 8.0\n.target sm_89\n.address_size 64\n\n\t// .globl\tadd\n\n.visible .entry add(\n\t.param .u64 add_param_0,\n\t.param .u64 add_param_1,\n\t.param .u64 add_param_2\n)\n.maxntid 128, 1, 1\n{\n\t.reg .pred \t%p<97>;\n\t.reg .b32 \t%r<389>;\n\t.reg .f32 \t%f<513>;\n\t.reg .b64 \t%rd<105>;\n\n\tld.param.u64 \t%rd97, [add_param_0];\n\tld.param.u64 \t%rd98, [add_param_1];\n\tmov.u32 \t%r385, %tid.x;\n\tshl.b32 \t%r386, %r385, 2;\n\tld.param.u64 \t%rd99, [add_param_2];\n\tand.b32 \t%r387, %r386, 124;\n\tand.b32 \t%r388, %r386, 16256;\n\tmul.wide.u32 \t%rd100, %r388, 4;\n\tadd.s64 \t%rd101, %rd97, %rd100;\n\tmul.wide.u32 \t%rd102, %r387, 4;\n\tadd.s64 \t%rd1, %rd101, %rd102;\n\tadd.s64 \t%rd2, %rd1, 2048;\n\tadd.s64 \t%rd3, %rd1, 4096;\n\tadd.s64 \t%rd4, %rd1, 6144;\n\tadd.s64 \t%rd5, %rd1, 8192;\n\tadd.s64 \t%rd6, %rd1, 10240;\n\tadd.s64 \t%rd7, %rd1, 12288;\n\tadd.s64 \t%rd8, %rd1, 14336;\n\tadd.s64 \t%rd9, %rd1, 16384;\n\tadd.s64 \t%rd10, %rd1, 18432;\n\tadd.s64 \t%rd11, %rd1, 20480;\n\tadd.s64 \t%rd12, %rd1, 22528;\n\tadd.s64 \t%rd13, %rd1, 24576;\n\tadd.s64 \t%rd14, %rd1, 26624;\n\tadd.s64 \t%rd15, %rd1, 28672;\n\tadd.s64 \t%rd16, %rd1, 30720;\n\tadd.s64 \t%rd17, %rd1, 32768;\n\tadd.s64 \t%rd18, %rd1, 34816;\n\tadd.s64 \t%rd19, %rd1, 36864;\n\tadd.s64 \t%rd20, %rd1, 38912;\n\tadd.s64 \t%rd21, %rd1, 40960;\n\tadd.s64 \t%rd22, %rd1, 43008;\n\tadd.s64 \t%rd23, %rd1, 45056;\n\tadd.s64 \t%rd24, %rd1, 47104;\n\tadd.s64 \t%rd25, %rd1, 49152;\n\tadd.s64 \t%rd26, %rd1, 51200;\n\tadd.s64 \t%rd27, %rd1, 53248;\n\tadd.s64 \t%rd28, %rd1, 55296;\n\tadd.s64 \t%rd29, %rd1, 57344;\n\tadd.s64 \t%rd30, %rd1, 59392;\n\tadd.s64 \t%rd31, %rd1, 61440;\n\tadd.s64 \t%rd32, %rd1, 63488;\n\tmov.pred \t%p1, -1;\n\tmov.u32 %r1, 0x0;\n\tmov.u32 %r2, 0x0;\n\tmov.u32 %r3, 0x0;\n\tmov.u32 %r4, 0x0;\n\t@%p1 ld.global.v4.b32 { %r1, %r2, %r3, %r4 }, [ %rd1 + 0 ];\n\tmov.b32 \t%f1, %r1;\n\tmov.b32 \t%f2, %r2;\n\tmov.b32 \t%f3, %r3;\n\tmov.b32 \t%f4, %r4;\n\tmov.u32 %r5, 0x0;\n\tmov.u32 %r6, 0x0;\n\tmov.u32 %r7, 0x0;\n\tmov.u32 %r8, 0x0;\n\t@%p1 ld.global.v4.b32 { %r5, %r6, %r7, %r8 }, [ %rd2 + 0 ];\n\tmov.b32 \t%f5, %r5;\n\tmov.b32 \t%f6, %r6;\n\tmov.b32 \t%f7, %r7;\n\tmov.b32 \t%f8, %r8;\n\tmov.u32 %r9, 0x0;\n\tmov.u32 %r10, 0x0;\n\tmov.u32 %r11, 0x0;\n\tmov.u32 %r12, 0x0;\n\t@%p1 ld.global.v4.b32 { %r9, %r10, %r11, %r12 }, [ %rd3 + 0 ];\n\tmov.b32 \t%f9, %r9;\n\tmov.b32 \t%f10, %r10;\n\tmov.b32 \t%f11, %r11;\n\tmov.b32 \t%f12, %r12;\n\tmov.u32 %r13, 0x0;\n\tmov.u32 %r14, 0x0;\n\tmov.u32 %r15, 0x0;\n\tmov.u32 %r16, 0x0;\n\t@%p1 ld.global.v4.b32 { %r13, %r14, %r15, %r16 }, [ %rd4 + 0 ];\n\tmov.b32 \t%f13, %r13;\n\tmov.b32 \t%f14, %r14;\n\tmov.b32 \t%f15, %r15;\n\tmov.b32 \t%f16, %r16;\n\tmov.u32 %r17, 0x0;\n\tmov.u32 %r18, 0x0;\n\tmov.u32 %r19, 0x0;\n\tmov.u32 %r20, 0x0;\n\t@%p1 ld.global.v4.b32 { %r17, %r18, %r19, %r20 }, [ %rd5 + 0 ];\n\tmov.b32 \t%f17, %r17;\n\tmov.b32 \t%f18, %r18;\n\tmov.b32 \t%f19, %r19;\n\tmov.b32 \t%f20, %r20;\n\tmov.u32 %r21, 0x0;\n\tmov.u32 %r22, 0x0;\n\tmov.u32 %r23, 0x0;\n\tmov.u32 %r24, 0x0;\n\t@%p1 ld.global.v4.b32 { %r21, %r22, %r23, %r24 }, [ %rd6 + 0 ];\n\tmov.b32 \t%f21, %r21;\n\tmov.b32 \t%f22, %r22;\n\tmov.b32 \t%f23, %r23;\n\tmov.b32 \t%f24, %r24;\n\tmov.u32 %r25, 0x0;\n\tmov.u32 %r26, 0x0;\n\tmov.u32 %r27, 0x0;\n\tmov.u32 %r28, 0x0;\n\t@%p1 ld.global.v4.b32 { %r25, %r26, %r27, %r28 }, [ %rd7 + 0 ];\n\tmov.b32 \t%f25, %r25;\n\tmov.b32 \t%f26, %r26;\n\tmov.b32 \t%f27, %r27;\n\tmov.b32 \t%f28, %r28;\n\tmov.u32 %r29, 0x0;\n\tmov.u32 %r30, 0x0;\n\tmov.u32 %r31, 0x0;\n\tmov.u32 %r32, 0x0;\n\t@%p1 ld.global.v4.b32 { %r29, %r30, %r31, %r32 }, [ %rd8 + 0 ];\n\tmov.b32 \t%f29, %r29;\n\tmov.b32 \t%f30, %r30;\n\tmov.b32 \t%f31, %r31;\n\tmov.b32 \t%f32, %r32;\n\tmov.u32 %r33, 0x0;\n\tmov.u32 %r34, 0x0;\n\tmov.u32 %r35, 0x0;\n\tmov.u32 %r36, 0x0;\n\t@%p1 ld.global.v4.b32 { %r33, %r34, %r35, %r36 }, [ %rd9 + 0 ];\n\tmov.b32 \t%f33, %r33;\n\tmov.b32 \t%f34, %r34;\n\tmov.b32 \t%f35, %r35;\n\tmov.b32 \t%f36, %r36;\n\tmov.u32 %r37, 0x0;\n\tmov.u32 %r38, 0x0;\n\tmov.u32 %r39, 0x0;\n\tmov.u32 %r40, 0x0;\n\t@%p1 ld.global.v4.b32 { %r37, %r38, %r39, %r40 }, [ %rd10 + 0 ];\n\tmov.b32 \t%f37, %r37;\n\tmov.b32 \t%f38, %r38;\n\tmov.b32 \t%f39, %r39;\n\tmov.b32 \t%f40, %r40;\n\tmov.u32 %r41, 0x0;\n\tmov.u32 %r42, 0x0;\n\tmov.u32 %r43, 0x0;\n\tmov.u32 %r44, 0x0;\n\t@%p1 ld.global.v4.b32 { %r41, %r42, %r43, %r44 }, [ %rd11 + 0 ];\n\tmov.b32 \t%f41, %r41;\n\tmov.b32 \t%f42, %r42;\n\tmov.b32 \t%f43, %r43;\n\tmov.b32 \t%f44, %r44;\n\tmov.u32 %r45, 0x0;\n\tmov.u32 %r46, 0x0;\n\tmov.u32 %r47, 0x0;\n\tmov.u32 %r48, 0x0;\n\t@%p1 ld.global.v4.b32 { %r45, %r46, %r47, %r48 }, [ %rd12 + 0 ];\n\tmov.b32 \t%f45, %r45;\n\tmov.b32 \t%f46, %r46;\n\tmov.b32 \t%f47, %r47;\n\tmov.b32 \t%f48, %r48;\n\tmov.u32 %r49, 0x0;\n\tmov.u32 %r50, 0x0;\n\tmov.u32 %r51, 0x0;\n\tmov.u32 %r52, 0x0;\n\t@%p1 ld.global.v4.b32 { %r49, %r50, %r51, %r52 }, [ %rd13 + 0 ];\n\tmov.b32 \t%f49, %r49;\n\tmov.b32 \t%f50, %r50;\n\tmov.b32 \t%f51, %r51;\n\tmov.b32 \t%f52, %r52;\n\tmov.u32 %r53, 0x0;\n\tmov.u32 %r54, 0x0;\n\tmov.u32 %r55, 0x0;\n\tmov.u32 %r56, 0x0;\n\t@%p1 ld.global.v4.b32 { %r53, %r54, %r55, %r56 }, [ %rd14 + 0 ];\n\tmov.b32 \t%f53, %r53;\n\tmov.b32 \t%f54, %r54;\n\tmov.b32 \t%f55, %r55;\n\tmov.b32 \t%f56, %r56;\n\tmov.u32 %r57, 0x0;\n\tmov.u32 %r58, 0x0;\n\tmov.u32 %r59, 0x0;\n\tmov.u32 %r60, 0x0;\n\t@%p1 ld.global.v4.b32 { %r57, %r58, %r59, %r60 }, [ %rd15 + 0 ];\n\tmov.b32 \t%f57, %r57;\n\tmov.b32 \t%f58, %r58;\n\tmov.b32 \t%f59, %r59;\n\tmov.b32 \t%f60, %r60;\n\tmov.u32 %r61, 0x0;\n\tmov.u32 %r62, 0x0;\n\tmov.u32 %r63, 0x0;\n\tmov.u32 %r64, 0x0;\n\t@%p1 ld.global.v4.b32 { %r61, %r62, %r63, %r64 }, [ %rd16 + 0 ];\n\tmov.b32 \t%f61, %r61;\n\tmov.b32 \t%f62, %r62;\n\tmov.b32 \t%f63, %r63;\n\tmov.b32 \t%f64, %r64;\n\tmov.u32 %r65, 0x0;\n\tmov.u32 %r66, 0x0;\n\tmov.u32 %r67, 0x0;\n\tmov.u32 %r68, 0x0;\n\t@%p1 ld.global.v4.b32 { %r65, %r66, %r67, %r68 }, [ %rd17 + 0 ];\n\tmov.b32 \t%f65, %r65;\n\tmov.b32 \t%f66, %r66;\n\tmov.b32 \t%f67, %r67;\n\tmov.b32 \t%f68, %r68;\n\tmov.u32 %r69, 0x0;\n\tmov.u32 %r70, 0x0;\n\tmov.u32 %r71, 0x0;\n\tmov.u32 %r72, 0x0;\n\t@%p1 ld.global.v4.b32 { %r69, %r70, %r71, %r72 }, [ %rd18 + 0 ];\n\tmov.b32 \t%f69, %r69;\n\tmov.b32 \t%f70, %r70;\n\tmov.b32 \t%f71, %r71;\n\tmov.b32 \t%f72, %r72;\n\tmov.u32 %r73, 0x0;\n\tmov.u32 %r74, 0x0;\n\tmov.u32 %r75, 0x0;\n\tmov.u32 %r76, 0x0;\n\t@%p1 ld.global.v4.b32 { %r73, %r74, %r75, %r76 }, [ %rd19 + 0 ];\n\tmov.b32 \t%f73, %r73;\n\tmov.b32 \t%f74, %r74;\n\tmov.b32 \t%f75, %r75;\n\tmov.b32 \t%f76, %r76;\n\tmov.u32 %r77, 0x0;\n\tmov.u32 %r78, 0x0;\n\tmov.u32 %r79, 0x0;\n\tmov.u32 %r80, 0x0;\n\t@%p1 ld.global.v4.b32 { %r77, %r78, %r79, %r80 }, [ %rd20 + 0 ];\n\tmov.b32 \t%f77, %r77;\n\tmov.b32 \t%f78, %r78;\n\tmov.b32 \t%f79, %r79;\n\tmov.b32 \t%f80, %r80;\n\tmov.u32 %r81, 0x0;\n\tmov.u32 %r82, 0x0;\n\tmov.u32 %r83, 0x0;\n\tmov.u32 %r84, 0x0;\n\t@%p1 ld.global.v4.b32 { %r81, %r82, %r83, %r84 }, [ %rd21 + 0 ];\n\tmov.b32 \t%f81, %r81;\n\tmov.b32 \t%f82, %r82;\n\tmov.b32 \t%f83, %r83;\n\tmov.b32 \t%f84, %r84;\n\tmov.u32 %r85, 0x0;\n\tmov.u32 %r86, 0x0;\n\tmov.u32 %r87, 0x0;\n\tmov.u32 %r88, 0x0;\n\t@%p1 ld.global.v4.b32 { %r85, %r86, %r87, %r88 }, [ %rd22 + 0 ];\n\tmov.b32 \t%f85, %r85;\n\tmov.b32 \t%f86, %r86;\n\tmov.b32 \t%f87, %r87;\n\tmov.b32 \t%f88, %r88;\n\tmov.u32 %r89, 0x0;\n\tmov.u32 %r90, 0x0;\n\tmov.u32 %r91, 0x0;\n\tmov.u32 %r92, 0x0;\n\t@%p1 ld.global.v4.b32 { %r89, %r90, %r91, %r92 }, [ %rd23 + 0 ];\n\tmov.b32 \t%f89, %r89;\n\tmov.b32 \t%f90, %r90;\n\tmov.b32 \t%f91, %r91;\n\tmov.b32 \t%f92, %r92;\n\tmov.u32 %r93, 0x0;\n\tmov.u32 %r94, 0x0;\n\tmov.u32 %r95, 0x0;\n\tmov.u32 %r96, 0x0;\n\t@%p1 ld.global.v4.b32 { %r93, %r94, %r95, %r96 }, [ %rd24 + 0 ];\n\tmov.b32 \t%f93, %r93;\n\tmov.b32 \t%f94, %r94;\n\tmov.b32 \t%f95, %r95;\n\tmov.b32 \t%f96, %r96;\n\tmov.u32 %r97, 0x0;\n\tmov.u32 %r98, 0x0;\n\tmov.u32 %r99, 0x0;\n\tmov.u32 %r100, 0x0;\n\t@%p1 ld.global.v4.b32 { %r97, %r98, %r99, %r100 }, [ %rd25 + 0 ];\n\tmov.b32 \t%f97, %r97;\n\tmov.b32 \t%f98, %r98;\n\tmov.b32 \t%f99, %r99;\n\tmov.b32 \t%f100, %r100;\n\tmov.u32 %r101, 0x0;\n\tmov.u32 %r102, 0x0;\n\tmov.u32 %r103, 0x0;\n\tmov.u32 %r104, 0x0;\n\t@%p1 ld.global.v4.b32 { %r101, %r102, %r103, %r104 }, [ %rd26 + 0 ];\n\tmov.b32 \t%f101, %r101;\n\tmov.b32 \t%f102, %r102;\n\tmov.b32 \t%f103, %r103;\n\tmov.b32 \t%f104, %r104;\n\tmov.u32 %r105, 0x0;\n\tmov.u32 %r106, 0x0;\n\tmov.u32 %r107, 0x0;\n\tmov.u32 %r108, 0x0;\n\t@%p1 ld.global.v4.b32 { %r105, %r106, %r107, %r108 }, [ %rd27 + 0 ];\n\tmov.b32 \t%f105, %r105;\n\tmov.b32 \t%f106, %r106;\n\tmov.b32 \t%f107, %r107;\n\tmov.b32 \t%f108, %r108;\n\tmov.u32 %r109, 0x0;\n\tmov.u32 %r110, 0x0;\n\tmov.u32 %r111, 0x0;\n\tmov.u32 %r112, 0x0;\n\t@%p1 ld.global.v4.b32 { %r109, %r110, %r111, %r112 }, [ %rd28 + 0 ];\n\tmov.b32 \t%f109, %r109;\n\tmov.b32 \t%f110, %r110;\n\tmov.b32 \t%f111, %r111;\n\tmov.b32 \t%f112, %r112;\n\tmov.u32 %r113, 0x0;\n\tmov.u32 %r114, 0x0;\n\tmov.u32 %r115, 0x0;\n\tmov.u32 %r116, 0x0;\n\t@%p1 ld.global.v4.b32 { %r113, %r114, %r115, %r116 }, [ %rd29 + 0 ];\n\tmov.b32 \t%f113, %r113;\n\tmov.b32 \t%f114, %r114;\n\tmov.b32 \t%f115, %r115;\n\tmov.b32 \t%f116, %r116;\n\tmov.u32 %r117, 0x0;\n\tmov.u32 %r118, 0x0;\n\tmov.u32 %r119, 0x0;\n\tmov.u32 %r120, 0x0;\n\t@%p1 ld.global.v4.b32 { %r117, %r118, %r119, %r120 }, [ %rd30 + 0 ];\n\tmov.b32 \t%f117, %r117;\n\tmov.b32 \t%f118, %r118;\n\tmov.b32 \t%f119, %r119;\n\tmov.b32 \t%f120, %r120;\n\tmov.u32 %r121, 0x0;\n\tmov.u32 %r122, 0x0;\n\tmov.u32 %r123, 0x0;\n\tmov.u32 %r124, 0x0;\n\t@%p1 ld.global.v4.b32 { %r121, %r122, %r123, %r124 }, [ %rd31 + 0 ];\n\tmov.b32 \t%f121, %r121;\n\tmov.b32 \t%f122, %r122;\n\tmov.b32 \t%f123, %r123;\n\tmov.b32 \t%f124, %r124;\n\tmov.u32 %r125, 0x0;\n\tmov.u32 %r126, 0x0;\n\tmov.u32 %r127, 0x0;\n\tmov.u32 %r128, 0x0;\n\t@%p1 ld.global.v4.b32 { %r125, %r126, %r127, %r128 }, [ %rd32 + 0 ];\n\tmov.b32 \t%f125, %r125;\n\tmov.b32 \t%f126, %r126;\n\tmov.b32 \t%f127, %r127;\n\tmov.b32 \t%f128, %r128;\n\tadd.s64 \t%rd103, %rd98, %rd100;\n\tadd.s64 \t%rd33, %rd103, %rd102;\n\tadd.s64 \t%rd34, %rd33, 2048;\n\tadd.s64 \t%rd35, %rd33, 4096;\n\tadd.s64 \t%rd36, %rd33, 6144;\n\tadd.s64 \t%rd37, %rd33, 8192;\n\tadd.s64 \t%rd38, %rd33, 10240;\n\tadd.s64 \t%rd39, %rd33, 12288;\n\tadd.s64 \t%rd40, %rd33, 14336;\n\tadd.s64 \t%rd41, %rd33, 16384;\n\tadd.s64 \t%rd42, %rd33, 18432;\n\tadd.s64 \t%rd43, %rd33, 20480;\n\tadd.s64 \t%rd44, %rd33, 22528;\n\tadd.s64 \t%rd45, %rd33, 24576;\n\tadd.s64 \t%rd46, %rd33, 26624;\n\tadd.s64 \t%rd47, %rd33, 28672;\n\tadd.s64 \t%rd48, %rd33, 30720;\n\tadd.s64 \t%rd49, %rd33, 32768;\n\tadd.s64 \t%rd50, %rd33, 34816;\n\tadd.s64 \t%rd51, %rd33, 36864;\n\tadd.s64 \t%rd52, %rd33, 38912;\n\tadd.s64 \t%rd53, %rd33, 40960;\n\tadd.s64 \t%rd54, %rd33, 43008;\n\tadd.s64 \t%rd55, %rd33, 45056;\n\tadd.s64 \t%rd56, %rd33, 47104;\n\tadd.s64 \t%rd57, %rd33, 49152;\n\tadd.s64 \t%rd58, %rd33, 51200;\n\tadd.s64 \t%rd59, %rd33, 53248;\n\tadd.s64 \t%rd60, %rd33, 55296;\n\tadd.s64 \t%rd61, %rd33, 57344;\n\tadd.s64 \t%rd62, %rd33, 59392;\n\tadd.s64 \t%rd63, %rd33, 61440;\n\tadd.s64 \t%rd64, %rd33, 63488;\n\tmov.u32 %r129, 0x0;\n\tmov.u32 %r130, 0x0;\n\tmov.u32 %r131, 0x0;\n\tmov.u32 %r132, 0x0;\n\t@%p1 ld.global.v4.b32 { %r129, %r130, %r131, %r132 }, [ %rd33 + 0 ];\n\tmov.b32 \t%f129, %r129;\n\tmov.b32 \t%f130, %r130;\n\tmov.b32 \t%f131, %r131;\n\tmov.b32 \t%f132, %r132;\n\tmov.u32 %r133, 0x0;\n\tmov.u32 %r134, 0x0;\n\tmov.u32 %r135, 0x0;\n\tmov.u32 %r136, 0x0;\n\t@%p1 ld.global.v4.b32 { %r133, %r134, %r135, %r136 }, [ %rd34 + 0 ];\n\tmov.b32 \t%f133, %r133;\n\tmov.b32 \t%f134, %r134;\n\tmov.b32 \t%f135, %r135;\n\tmov.b32 \t%f136, %r136;\n\tmov.u32 %r137, 0x0;\n\tmov.u32 %r138, 0x0;\n\tmov.u32 %r139, 0x0;\n\tmov.u32 %r140, 0x0;\n\t@%p1 ld.global.v4.b32 { %r137, %r138, %r139, %r140 }, [ %rd35 + 0 ];\n\tmov.b32 \t%f137, %r137;\n\tmov.b32 \t%f138, %r138;\n\tmov.b32 \t%f139, %r139;\n\tmov.b32 \t%f140, %r140;\n\tmov.u32 %r141, 0x0;\n\tmov.u32 %r142, 0x0;\n\tmov.u32 %r143, 0x0;\n\tmov.u32 %r144, 0x0;\n\t@%p1 ld.global.v4.b32 { %r141, %r142, %r143, %r144 }, [ %rd36 + 0 ];\n\tmov.b32 \t%f141, %r141;\n\tmov.b32 \t%f142, %r142;\n\tmov.b32 \t%f143, %r143;\n\tmov.b32 \t%f144, %r144;\n\tmov.u32 %r145, 0x0;\n\tmov.u32 %r146, 0x0;\n\tmov.u32 %r147, 0x0;\n\tmov.u32 %r148, 0x0;\n\t@%p1 ld.global.v4.b32 { %r145, %r146, %r147, %r148 }, [ %rd37 + 0 ];\n\tmov.b32 \t%f145, %r145;\n\tmov.b32 \t%f146, %r146;\n\tmov.b32 \t%f147, %r147;\n\tmov.b32 \t%f148, %r148;\n\tmov.u32 %r149, 0x0;\n\tmov.u32 %r150, 0x0;\n\tmov.u32 %r151, 0x0;\n\tmov.u32 %r152, 0x0;\n\t@%p1 ld.global.v4.b32 { %r149, %r150, %r151, %r152 }, [ %rd38 + 0 ];\n\tmov.b32 \t%f149, %r149;\n\tmov.b32 \t%f150, %r150;\n\tmov.b32 \t%f151, %r151;\n\tmov.b32 \t%f152, %r152;\n\tmov.u32 %r153, 0x0;\n\tmov.u32 %r154, 0x0;\n\tmov.u32 %r155, 0x0;\n\tmov.u32 %r156, 0x0;\n\t@%p1 ld.global.v4.b32 { %r153, %r154, %r155, %r156 }, [ %rd39 + 0 ];\n\tmov.b32 \t%f153, %r153;\n\tmov.b32 \t%f154, %r154;\n\tmov.b32 \t%f155, %r155;\n\tmov.b32 \t%f156, %r156;\n\tmov.u32 %r157, 0x0;\n\tmov.u32 %r158, 0x0;\n\tmov.u32 %r159, 0x0;\n\tmov.u32 %r160, 0x0;\n\t@%p1 ld.global.v4.b32 { %r157, %r158, %r159, %r160 }, [ %rd40 + 0 ];\n\tmov.b32 \t%f157, %r157;\n\tmov.b32 \t%f158, %r158;\n\tmov.b32 \t%f159, %r159;\n\tmov.b32 \t%f160, %r160;\n\tmov.u32 %r161, 0x0;\n\tmov.u32 %r162, 0x0;\n\tmov.u32 %r163, 0x0;\n\tmov.u32 %r164, 0x0;\n\t@%p1 ld.global.v4.b32 { %r161, %r162, %r163, %r164 }, [ %rd41 + 0 ];\n\tmov.b32 \t%f161, %r161;\n\tmov.b32 \t%f162, %r162;\n\tmov.b32 \t%f163, %r163;\n\tmov.b32 \t%f164, %r164;\n\tmov.u32 %r165, 0x0;\n\tmov.u32 %r166, 0x0;\n\tmov.u32 %r167, 0x0;\n\tmov.u32 %r168, 0x0;\n\t@%p1 ld.global.v4.b32 { %r165, %r166, %r167, %r168 }, [ %rd42 + 0 ];\n\tmov.b32 \t%f165, %r165;\n\tmov.b32 \t%f166, %r166;\n\tmov.b32 \t%f167, %r167;\n\tmov.b32 \t%f168, %r168;\n\tmov.u32 %r169, 0x0;\n\tmov.u32 %r170, 0x0;\n\tmov.u32 %r171, 0x0;\n\tmov.u32 %r172, 0x0;\n\t@%p1 ld.global.v4.b32 { %r169, %r170, %r171, %r172 }, [ %rd43 + 0 ];\n\tmov.b32 \t%f169, %r169;\n\tmov.b32 \t%f170, %r170;\n\tmov.b32 \t%f171, %r171;\n\tmov.b32 \t%f172, %r172;\n\tmov.u32 %r173, 0x0;\n\tmov.u32 %r174, 0x0;\n\tmov.u32 %r175, 0x0;\n\tmov.u32 %r176, 0x0;\n\t@%p1 ld.global.v4.b32 { %r173, %r174, %r175, %r176 }, [ %rd44 + 0 ];\n\tmov.b32 \t%f173, %r173;\n\tmov.b32 \t%f174, %r174;\n\tmov.b32 \t%f175, %r175;\n\tmov.b32 \t%f176, %r176;\n\tmov.u32 %r177, 0x0;\n\tmov.u32 %r178, 0x0;\n\tmov.u32 %r179, 0x0;\n\tmov.u32 %r180, 0x0;\n\t@%p1 ld.global.v4.b32 { %r177, %r178, %r179, %r180 }, [ %rd45 + 0 ];\n\tmov.b32 \t%f177, %r177;\n\tmov.b32 \t%f178, %r178;\n\tmov.b32 \t%f179, %r179;\n\tmov.b32 \t%f180, %r180;\n\tmov.u32 %r181, 0x0;\n\tmov.u32 %r182, 0x0;\n\tmov.u32 %r183, 0x0;\n\tmov.u32 %r184, 0x0;\n\t@%p1 ld.global.v4.b32 { %r181, %r182, %r183, %r184 }, [ %rd46 + 0 ];\n\tmov.b32 \t%f181, %r181;\n\tmov.b32 \t%f182, %r182;\n\tmov.b32 \t%f183, %r183;\n\tmov.b32 \t%f184, %r184;\n\tmov.u32 %r185, 0x0;\n\tmov.u32 %r186, 0x0;\n\tmov.u32 %r187, 0x0;\n\tmov.u32 %r188, 0x0;\n\t@%p1 ld.global.v4.b32 { %r185, %r186, %r187, %r188 }, [ %rd47 + 0 ];\n\tmov.b32 \t%f185, %r185;\n\tmov.b32 \t%f186, %r186;\n\tmov.b32 \t%f187, %r187;\n\tmov.b32 \t%f188, %r188;\n\tmov.u32 %r189, 0x0;\n\tmov.u32 %r190, 0x0;\n\tmov.u32 %r191, 0x0;\n\tmov.u32 %r192, 0x0;\n\t@%p1 ld.global.v4.b32 { %r189, %r190, %r191, %r192 }, [ %rd48 + 0 ];\n\tmov.b32 \t%f189, %r189;\n\tmov.b32 \t%f190, %r190;\n\tmov.b32 \t%f191, %r191;\n\tmov.b32 \t%f192, %r192;\n\tmov.u32 %r193, 0x0;\n\tmov.u32 %r194, 0x0;\n\tmov.u32 %r195, 0x0;\n\tmov.u32 %r196, 0x0;\n\t@%p1 ld.global.v4.b32 { %r193, %r194, %r195, %r196 }, [ %rd49 + 0 ];\n\tmov.b32 \t%f193, %r193;\n\tmov.b32 \t%f194, %r194;\n\tmov.b32 \t%f195, %r195;\n\tmov.b32 \t%f196, %r196;\n\tmov.u32 %r197, 0x0;\n\tmov.u32 %r198, 0x0;\n\tmov.u32 %r199, 0x0;\n\tmov.u32 %r200, 0x0;\n\t@%p1 ld.global.v4.b32 { %r197, %r198, %r199, %r200 }, [ %rd50 + 0 ];\n\tmov.b32 \t%f197, %r197;\n\tmov.b32 \t%f198, %r198;\n\tmov.b32 \t%f199, %r199;\n\tmov.b32 \t%f200, %r200;\n\tmov.u32 %r201, 0x0;\n\tmov.u32 %r202, 0x0;\n\tmov.u32 %r203, 0x0;\n\tmov.u32 %r204, 0x0;\n\t@%p1 ld.global.v4.b32 { %r201, %r202, %r203, %r204 }, [ %rd51 + 0 ];\n\tmov.b32 \t%f201, %r201;\n\tmov.b32 \t%f202, %r202;\n\tmov.b32 \t%f203, %r203;\n\tmov.b32 \t%f204, %r204;\n\tmov.u32 %r205, 0x0;\n\tmov.u32 %r206, 0x0;\n\tmov.u32 %r207, 0x0;\n\tmov.u32 %r208, 0x0;\n\t@%p1 ld.global.v4.b32 { %r205, %r206, %r207, %r208 }, [ %rd52 + 0 ];\n\tmov.b32 \t%f205, %r205;\n\tmov.b32 \t%f206, %r206;\n\tmov.b32 \t%f207, %r207;\n\tmov.b32 \t%f208, %r208;\n\tmov.u32 %r209, 0x0;\n\tmov.u32 %r210, 0x0;\n\tmov.u32 %r211, 0x0;\n\tmov.u32 %r212, 0x0;\n\t@%p1 ld.global.v4.b32 { %r209, %r210, %r211, %r212 }, [ %rd53 + 0 ];\n\tmov.b32 \t%f209, %r209;\n\tmov.b32 \t%f210, %r210;\n\tmov.b32 \t%f211, %r211;\n\tmov.b32 \t%f212, %r212;\n\tmov.u32 %r213, 0x0;\n\tmov.u32 %r214, 0x0;\n\tmov.u32 %r215, 0x0;\n\tmov.u32 %r216, 0x0;\n\t@%p1 ld.global.v4.b32 { %r213, %r214, %r215, %r216 }, [ %rd54 + 0 ];\n\tmov.b32 \t%f213, %r213;\n\tmov.b32 \t%f214, %r214;\n\tmov.b32 \t%f215, %r215;\n\tmov.b32 \t%f216, %r216;\n\tmov.u32 %r217, 0x0;\n\tmov.u32 %r218, 0x0;\n\tmov.u32 %r219, 0x0;\n\tmov.u32 %r220, 0x0;\n\t@%p1 ld.global.v4.b32 { %r217, %r218, %r219, %r220 }, [ %rd55 + 0 ];\n\tmov.b32 \t%f217, %r217;\n\tmov.b32 \t%f218, %r218;\n\tmov.b32 \t%f219, %r219;\n\tmov.b32 \t%f220, %r220;\n\tmov.u32 %r221, 0x0;\n\tmov.u32 %r222, 0x0;\n\tmov.u32 %r223, 0x0;\n\tmov.u32 %r224, 0x0;\n\t@%p1 ld.global.v4.b32 { %r221, %r222, %r223, %r224 }, [ %rd56 + 0 ];\n\tmov.b32 \t%f221, %r221;\n\tmov.b32 \t%f222, %r222;\n\tmov.b32 \t%f223, %r223;\n\tmov.b32 \t%f224, %r224;\n\tmov.u32 %r225, 0x0;\n\tmov.u32 %r226, 0x0;\n\tmov.u32 %r227, 0x0;\n\tmov.u32 %r228, 0x0;\n\t@%p1 ld.global.v4.b32 { %r225, %r226, %r227, %r228 }, [ %rd57 + 0 ];\n\tmov.b32 \t%f225, %r225;\n\tmov.b32 \t%f226, %r226;\n\tmov.b32 \t%f227, %r227;\n\tmov.b32 \t%f228, %r228;\n\tmov.u32 %r229, 0x0;\n\tmov.u32 %r230, 0x0;\n\tmov.u32 %r231, 0x0;\n\tmov.u32 %r232, 0x0;\n\t@%p1 ld.global.v4.b32 { %r229, %r230, %r231, %r232 }, [ %rd58 + 0 ];\n\tmov.b32 \t%f229, %r229;\n\tmov.b32 \t%f230, %r230;\n\tmov.b32 \t%f231, %r231;\n\tmov.b32 \t%f232, %r232;\n\tmov.u32 %r233, 0x0;\n\tmov.u32 %r234, 0x0;\n\tmov.u32 %r235, 0x0;\n\tmov.u32 %r236, 0x0;\n\t@%p1 ld.global.v4.b32 { %r233, %r234, %r235, %r236 }, [ %rd59 + 0 ];\n\tmov.b32 \t%f233, %r233;\n\tmov.b32 \t%f234, %r234;\n\tmov.b32 \t%f235, %r235;\n\tmov.b32 \t%f236, %r236;\n\tmov.u32 %r237, 0x0;\n\tmov.u32 %r238, 0x0;\n\tmov.u32 %r239, 0x0;\n\tmov.u32 %r240, 0x0;\n\t@%p1 ld.global.v4.b32 { %r237, %r238, %r239, %r240 }, [ %rd60 + 0 ];\n\tmov.b32 \t%f237, %r237;\n\tmov.b32 \t%f238, %r238;\n\tmov.b32 \t%f239, %r239;\n\tmov.b32 \t%f240, %r240;\n\tmov.u32 %r241, 0x0;\n\tmov.u32 %r242, 0x0;\n\tmov.u32 %r243, 0x0;\n\tmov.u32 %r244, 0x0;\n\t@%p1 ld.global.v4.b32 { %r241, %r242, %r243, %r244 }, [ %rd61 + 0 ];\n\tmov.b32 \t%f241, %r241;\n\tmov.b32 \t%f242, %r242;\n\tmov.b32 \t%f243, %r243;\n\tmov.b32 \t%f244, %r244;\n\tmov.u32 %r245, 0x0;\n\tmov.u32 %r246, 0x0;\n\tmov.u32 %r247, 0x0;\n\tmov.u32 %r248, 0x0;\n\t@%p1 ld.global.v4.b32 { %r245, %r246, %r247, %r248 }, [ %rd62 + 0 ];\n\tmov.b32 \t%f245, %r245;\n\tmov.b32 \t%f246, %r246;\n\tmov.b32 \t%f247, %r247;\n\tmov.b32 \t%f248, %r248;\n\tmov.u32 %r249, 0x0;\n\tmov.u32 %r250, 0x0;\n\tmov.u32 %r251, 0x0;\n\tmov.u32 %r252, 0x0;\n\t@%p1 ld.global.v4.b32 { %r249, %r250, %r251, %r252 }, [ %rd63 + 0 ];\n\tmov.b32 \t%f249, %r249;\n\tmov.b32 \t%f250, %r250;\n\tmov.b32 \t%f251, %r251;\n\tmov.b32 \t%f252, %r252;\n\tmov.u32 %r253, 0x0;\n\tmov.u32 %r254, 0x0;\n\tmov.u32 %r255, 0x0;\n\tmov.u32 %r256, 0x0;\n\t@%p1 ld.global.v4.b32 { %r253, %r254, %r255, %r256 }, [ %rd64 + 0 ];\n\tmov.b32 \t%f253, %r253;\n\tmov.b32 \t%f254, %r254;\n\tmov.b32 \t%f255, %r255;\n\tmov.b32 \t%f256, %r256;\n\tadd.f32 \t%f257, %f1, %f129;\n\tadd.f32 \t%f258, %f2, %f130;\n\tadd.f32 \t%f259, %f3, %f131;\n\tadd.f32 \t%f260, %f4, %f132;\n\tadd.f32 \t%f261, %f5, %f133;\n\tadd.f32 \t%f262, %f6, %f134;\n\tadd.f32 \t%f263, %f7, %f135;\n\tadd.f32 \t%f264, %f8, %f136;\n\tadd.f32 \t%f265, %f9, %f137;\n\tadd.f32 \t%f266, %f10, %f138;\n\tadd.f32 \t%f267, %f11, %f139;\n\tadd.f32 \t%f268, %f12, %f140;\n\tadd.f32 \t%f269, %f13, %f141;\n\tadd.f32 \t%f270, %f14, %f142;\n\tadd.f32 \t%f271, %f15, %f143;\n\tadd.f32 \t%f272, %f16, %f144;\n\tadd.f32 \t%f273, %f17, %f145;\n\tadd.f32 \t%f274, %f18, %f146;\n\tadd.f32 \t%f275, %f19, %f147;\n\tadd.f32 \t%f276, %f20, %f148;\n\tadd.f32 \t%f277, %f21, %f149;\n\tadd.f32 \t%f278, %f22, %f150;\n\tadd.f32 \t%f279, %f23, %f151;\n\tadd.f32 \t%f280, %f24, %f152;\n\tadd.f32 \t%f281, %f25, %f153;\n\tadd.f32 \t%f282, %f26, %f154;\n\tadd.f32 \t%f283, %f27, %f155;\n\tadd.f32 \t%f284, %f28, %f156;\n\tadd.f32 \t%f285, %f29, %f157;\n\tadd.f32 \t%f286, %f30, %f158;\n\tadd.f32 \t%f287, %f31, %f159;\n\tadd.f32 \t%f288, %f32, %f160;\n\tadd.f32 \t%f289, %f33, %f161;\n\tadd.f32 \t%f290, %f34, %f162;\n\tadd.f32 \t%f291, %f35, %f163;\n\tadd.f32 \t%f292, %f36, %f164;\n\tadd.f32 \t%f293, %f37, %f165;\n\tadd.f32 \t%f294, %f38, %f166;\n\tadd.f32 \t%f295, %f39, %f167;\n\tadd.f32 \t%f296, %f40, %f168;\n\tadd.f32 \t%f297, %f41, %f169;\n\tadd.f32 \t%f298, %f42, %f170;\n\tadd.f32 \t%f299, %f43, %f171;\n\tadd.f32 \t%f300, %f44, %f172;\n\tadd.f32 \t%f301, %f45, %f173;\n\tadd.f32 \t%f302, %f46, %f174;\n\tadd.f32 \t%f303, %f47, %f175;\n\tadd.f32 \t%f304, %f48, %f176;\n\tadd.f32 \t%f305, %f49, %f177;\n\tadd.f32 \t%f306, %f50, %f178;\n\tadd.f32 \t%f307, %f51, %f179;\n\tadd.f32 \t%f308, %f52, %f180;\n\tadd.f32 \t%f309, %f53, %f181;\n\tadd.f32 \t%f310, %f54, %f182;\n\tadd.f32 \t%f311, %f55, %f183;\n\tadd.f32 \t%f312, %f56, %f184;\n\tadd.f32 \t%f313, %f57, %f185;\n\tadd.f32 \t%f314, %f58, %f186;\n\tadd.f32 \t%f315, %f59, %f187;\n\tadd.f32 \t%f316, %f60, %f188;\n\tadd.f32 \t%f317, %f61, %f189;\n\tadd.f32 \t%f318, %f62, %f190;\n\tadd.f32 \t%f319, %f63, %f191;\n\tadd.f32 \t%f320, %f64, %f192;\n\tadd.f32 \t%f321, %f65, %f193;\n\tadd.f32 \t%f322, %f66, %f194;\n\tadd.f32 \t%f323, %f67, %f195;\n\tadd.f32 \t%f324, %f68, %f196;\n\tadd.f32 \t%f325, %f69, %f197;\n\tadd.f32 \t%f326, %f70, %f198;\n\tadd.f32 \t%f327, %f71, %f199;\n\tadd.f32 \t%f328, %f72, %f200;\n\tadd.f32 \t%f329, %f73, %f201;\n\tadd.f32 \t%f330, %f74, %f202;\n\tadd.f32 \t%f331, %f75, %f203;\n\tadd.f32 \t%f332, %f76, %f204;\n\tadd.f32 \t%f333, %f77, %f205;\n\tadd.f32 \t%f334, %f78, %f206;\n\tadd.f32 \t%f335, %f79, %f207;\n\tadd.f32 \t%f336, %f80, %f208;\n\tadd.f32 \t%f337, %f81, %f209;\n\tadd.f32 \t%f338, %f82, %f210;\n\tadd.f32 \t%f339, %f83, %f211;\n\tadd.f32 \t%f340, %f84, %f212;\n\tadd.f32 \t%f341, %f85, %f213;\n\tadd.f32 \t%f342, %f86, %f214;\n\tadd.f32 \t%f343, %f87, %f215;\n\tadd.f32 \t%f344, %f88, %f216;\n\tadd.f32 \t%f345, %f89, %f217;\n\tadd.f32 \t%f346, %f90, %f218;\n\tadd.f32 \t%f347, %f91, %f219;\n\tadd.f32 \t%f348, %f92, %f220;\n\tadd.f32 \t%f349, %f93, %f221;\n\tadd.f32 \t%f350, %f94, %f222;\n\tadd.f32 \t%f351, %f95, %f223;\n\tadd.f32 \t%f352, %f96, %f224;\n\tadd.f32 \t%f353, %f97, %f225;\n\tadd.f32 \t%f354, %f98, %f226;\n\tadd.f32 \t%f355, %f99, %f227;\n\tadd.f32 \t%f356, %f100, %f228;\n\tadd.f32 \t%f357, %f101, %f229;\n\tadd.f32 \t%f358, %f102, %f230;\n\tadd.f32 \t%f359, %f103, %f231;\n\tadd.f32 \t%f360, %f104, %f232;\n\tadd.f32 \t%f361, %f105, %f233;\n\tadd.f32 \t%f362, %f106, %f234;\n\tadd.f32 \t%f363, %f107, %f235;\n\tadd.f32 \t%f364, %f108, %f236;\n\tadd.f32 \t%f365, %f109, %f237;\n\tadd.f32 \t%f366, %f110, %f238;\n\tadd.f32 \t%f367, %f111, %f239;\n\tadd.f32 \t%f368, %f112, %f240;\n\tadd.f32 \t%f369, %f113, %f241;\n\tadd.f32 \t%f370, %f114, %f242;\n\tadd.f32 \t%f371, %f115, %f243;\n\tadd.f32 \t%f372, %f116, %f244;\n\tadd.f32 \t%f373, %f117, %f245;\n\tadd.f32 \t%f374, %f118, %f246;\n\tadd.f32 \t%f375, %f119, %f247;\n\tadd.f32 \t%f376, %f120, %f248;\n\tadd.f32 \t%f377, %f121, %f249;\n\tadd.f32 \t%f378, %f122, %f250;\n\tadd.f32 \t%f379, %f123, %f251;\n\tadd.f32 \t%f380, %f124, %f252;\n\tadd.f32 \t%f381, %f125, %f253;\n\tadd.f32 \t%f382, %f126, %f254;\n\tadd.f32 \t%f383, %f127, %f255;\n\tadd.f32 \t%f384, %f128, %f256;\n\tadd.f32 \t%f385, %f257, %f257;\n\tadd.f32 \t%f386, %f258, %f258;\n\tadd.f32 \t%f387, %f259, %f259;\n\tadd.f32 \t%f388, %f260, %f260;\n\tadd.f32 \t%f389, %f261, %f261;\n\tadd.f32 \t%f390, %f262, %f262;\n\tadd.f32 \t%f391, %f263, %f263;\n\tadd.f32 \t%f392, %f264, %f264;\n\tadd.f32 \t%f393, %f265, %f265;\n\tadd.f32 \t%f394, %f266, %f266;\n\tadd.f32 \t%f395, %f267, %f267;\n\tadd.f32 \t%f396, %f268, %f268;\n\tadd.f32 \t%f397, %f269, %f269;\n\tadd.f32 \t%f398, %f270, %f270;\n\tadd.f32 \t%f399, %f271, %f271;\n\tadd.f32 \t%f400, %f272, %f272;\n\tadd.f32 \t%f401, %f273, %f273;\n\tadd.f32 \t%f402, %f274, %f274;\n\tadd.f32 \t%f403, %f275, %f275;\n\tadd.f32 \t%f404, %f276, %f276;\n\tadd.f32 \t%f405, %f277, %f277;\n\tadd.f32 \t%f406, %f278, %f278;\n\tadd.f32 \t%f407, %f279, %f279;\n\tadd.f32 \t%f408, %f280, %f280;\n\tadd.f32 \t%f409, %f281, %f281;\n\tadd.f32 \t%f410, %f282, %f282;\n\tadd.f32 \t%f411, %f283, %f283;\n\tadd.f32 \t%f412, %f284, %f284;\n\tadd.f32 \t%f413, %f285, %f285;\n\tadd.f32 \t%f414, %f286, %f286;\n\tadd.f32 \t%f415, %f287, %f287;\n\tadd.f32 \t%f416, %f288, %f288;\n\tadd.f32 \t%f417, %f289, %f289;\n\tadd.f32 \t%f418, %f290, %f290;\n\tadd.f32 \t%f419, %f291, %f291;\n\tadd.f32 \t%f420, %f292, %f292;\n\tadd.f32 \t%f421, %f293, %f293;\n\tadd.f32 \t%f422, %f294, %f294;\n\tadd.f32 \t%f423, %f295, %f295;\n\tadd.f32 \t%f424, %f296, %f296;\n\tadd.f32 \t%f425, %f297, %f297;\n\tadd.f32 \t%f426, %f298, %f298;\n\tadd.f32 \t%f427, %f299, %f299;\n\tadd.f32 \t%f428, %f300, %f300;\n\tadd.f32 \t%f429, %f301, %f301;\n\tadd.f32 \t%f430, %f302, %f302;\n\tadd.f32 \t%f431, %f303, %f303;\n\tadd.f32 \t%f432, %f304, %f304;\n\tadd.f32 \t%f433, %f305, %f305;\n\tadd.f32 \t%f434, %f306, %f306;\n\tadd.f32 \t%f435, %f307, %f307;\n\tadd.f32 \t%f436, %f308, %f308;\n\tadd.f32 \t%f437, %f309, %f309;\n\tadd.f32 \t%f438, %f310, %f310;\n\tadd.f32 \t%f439, %f311, %f311;\n\tadd.f32 \t%f440, %f312, %f312;\n\tadd.f32 \t%f441, %f313, %f313;\n\tadd.f32 \t%f442, %f314, %f314;\n\tadd.f32 \t%f443, %f315, %f315;\n\tadd.f32 \t%f444, %f316, %f316;\n\tadd.f32 \t%f445, %f317, %f317;\n\tadd.f32 \t%f446, %f318, %f318;\n\tadd.f32 \t%f447, %f319, %f319;\n\tadd.f32 \t%f448, %f320, %f320;\n\tadd.f32 \t%f449, %f321, %f321;\n\tadd.f32 \t%f450, %f322, %f322;\n\tadd.f32 \t%f451, %f323, %f323;\n\tadd.f32 \t%f452, %f324, %f324;\n\tadd.f32 \t%f453, %f325, %f325;\n\tadd.f32 \t%f454, %f326, %f326;\n\tadd.f32 \t%f455, %f327, %f327;\n\tadd.f32 \t%f456, %f328, %f328;\n\tadd.f32 \t%f457, %f329, %f329;\n\tadd.f32 \t%f458, %f330, %f330;\n\tadd.f32 \t%f459, %f331, %f331;\n\tadd.f32 \t%f460, %f332, %f332;\n\tadd.f32 \t%f461, %f333, %f333;\n\tadd.f32 \t%f462, %f334, %f334;\n\tadd.f32 \t%f463, %f335, %f335;\n\tadd.f32 \t%f464, %f336, %f336;\n\tadd.f32 \t%f465, %f337, %f337;\n\tadd.f32 \t%f466, %f338, %f338;\n\tadd.f32 \t%f467, %f339, %f339;\n\tadd.f32 \t%f468, %f340, %f340;\n\tadd.f32 \t%f469, %f341, %f341;\n\tadd.f32 \t%f470, %f342, %f342;\n\tadd.f32 \t%f471, %f343, %f343;\n\tadd.f32 \t%f472, %f344, %f344;\n\tadd.f32 \t%f473, %f345, %f345;\n\tadd.f32 \t%f474, %f346, %f346;\n\tadd.f32 \t%f475, %f347, %f347;\n\tadd.f32 \t%f476, %f348, %f348;\n\tadd.f32 \t%f477, %f349, %f349;\n\tadd.f32 \t%f478, %f350, %f350;\n\tadd.f32 \t%f479, %f351, %f351;\n\tadd.f32 \t%f480, %f352, %f352;\n\tadd.f32 \t%f481, %f353, %f353;\n\tadd.f32 \t%f482, %f354, %f354;\n\tadd.f32 \t%f483, %f355, %f355;\n\tadd.f32 \t%f484, %f356, %f356;\n\tadd.f32 \t%f485, %f357, %f357;\n\tadd.f32 \t%f486, %f358, %f358;\n\tadd.f32 \t%f487, %f359, %f359;\n\tadd.f32 \t%f488, %f360, %f360;\n\tadd.f32 \t%f489, %f361, %f361;\n\tadd.f32 \t%f490, %f362, %f362;\n\tadd.f32 \t%f491, %f363, %f363;\n\tadd.f32 \t%f492, %f364, %f364;\n\tadd.f32 \t%f493, %f365, %f365;\n\tadd.f32 \t%f494, %f366, %f366;\n\tadd.f32 \t%f495, %f367, %f367;\n\tadd.f32 \t%f496, %f368, %f368;\n\tadd.f32 \t%f497, %f369, %f369;\n\tadd.f32 \t%f498, %f370, %f370;\n\tadd.f32 \t%f499, %f371, %f371;\n\tadd.f32 \t%f500, %f372, %f372;\n\tadd.f32 \t%f501, %f373, %f373;\n\tadd.f32 \t%f502, %f374, %f374;\n\tadd.f32 \t%f503, %f375, %f375;\n\tadd.f32 \t%f504, %f376, %f376;\n\tadd.f32 \t%f505, %f377, %f377;\n\tadd.f32 \t%f506, %f378, %f378;\n\tadd.f32 \t%f507, %f379, %f379;\n\tadd.f32 \t%f508, %f380, %f380;\n\tadd.f32 \t%f509, %f381, %f381;\n\tadd.f32 \t%f510, %f382, %f382;\n\tadd.f32 \t%f511, %f383, %f383;\n\tadd.f32 \t%f512, %f384, %f384;\n\tadd.s64 \t%rd104, %rd99, %rd100;\n\tadd.s64 \t%rd65, %rd104, %rd102;\n\tadd.s64 \t%rd66, %rd65, 2048;\n\tadd.s64 \t%rd67, %rd65, 4096;\n\tadd.s64 \t%rd68, %rd65, 6144;\n\tadd.s64 \t%rd69, %rd65, 8192;\n\tadd.s64 \t%rd70, %rd65, 10240;\n\tadd.s64 \t%rd71, %rd65, 12288;\n\tadd.s64 \t%rd72, %rd65, 14336;\n\tadd.s64 \t%rd73, %rd65, 16384;\n\tadd.s64 \t%rd74, %rd65, 18432;\n\tadd.s64 \t%rd75, %rd65, 20480;\n\tadd.s64 \t%rd76, %rd65, 22528;\n\tadd.s64 \t%rd77, %rd65, 24576;\n\tadd.s64 \t%rd78, %rd65, 26624;\n\tadd.s64 \t%rd79, %rd65, 28672;\n\tadd.s64 \t%rd80, %rd65, 30720;\n\tadd.s64 \t%rd81, %rd65, 32768;\n\tadd.s64 \t%rd82, %rd65, 34816;\n\tadd.s64 \t%rd83, %rd65, 36864;\n\tadd.s64 \t%rd84, %rd65, 38912;\n\tadd.s64 \t%rd85, %rd65, 40960;\n\tadd.s64 \t%rd86, %rd65, 43008;\n\tadd.s64 \t%rd87, %rd65, 45056;\n\tadd.s64 \t%rd88, %rd65, 47104;\n\tadd.s64 \t%rd89, %rd65, 49152;\n\tadd.s64 \t%rd90, %rd65, 51200;\n\tadd.s64 \t%rd91, %rd65, 53248;\n\tadd.s64 \t%rd92, %rd65, 55296;\n\tadd.s64 \t%rd93, %rd65, 57344;\n\tadd.s64 \t%rd94, %rd65, 59392;\n\tadd.s64 \t%rd95, %rd65, 61440;\n\tadd.s64 \t%rd96, %rd65, 63488;\n\tmov.b32 \t%r257, %f385;\n\tmov.b32 \t%r258, %f386;\n\tmov.b32 \t%r259, %f387;\n\tmov.b32 \t%r260, %f388;\n\t@%p1 st.global.v4.b32 [ %rd65 + 0 ], { %r257, %r258, %r259, %r260 };\n\tmov.b32 \t%r261, %f389;\n\tmov.b32 \t%r262, %f390;\n\tmov.b32 \t%r263, %f391;\n\tmov.b32 \t%r264, %f392;\n\t@%p1 st.global.v4.b32 [ %rd66 + 0 ], { %r261, %r262, %r263, %r264 };\n\tmov.b32 \t%r265, %f393;\n\tmov.b32 \t%r266, %f394;\n\tmov.b32 \t%r267, %f395;\n\tmov.b32 \t%r268, %f396;\n\t@%p1 st.global.v4.b32 [ %rd67 + 0 ], { %r265, %r266, %r267, %r268 };\n\tmov.b32 \t%r269, %f397;\n\tmov.b32 \t%r270, %f398;\n\tmov.b32 \t%r271, %f399;\n\tmov.b32 \t%r272, %f400;\n\t@%p1 st.global.v4.b32 [ %rd68 + 0 ], { %r269, %r270, %r271, %r272 };\n\tmov.b32 \t%r273, %f401;\n\tmov.b32 \t%r274, %f402;\n\tmov.b32 \t%r275, %f403;\n\tmov.b32 \t%r276, %f404;\n\t@%p1 st.global.v4.b32 [ %rd69 + 0 ], { %r273, %r274, %r275, %r276 };\n\tmov.b32 \t%r277, %f405;\n\tmov.b32 \t%r278, %f406;\n\tmov.b32 \t%r279, %f407;\n\tmov.b32 \t%r280, %f408;\n\t@%p1 st.global.v4.b32 [ %rd70 + 0 ], { %r277, %r278, %r279, %r280 };\n\tmov.b32 \t%r281, %f409;\n\tmov.b32 \t%r282, %f410;\n\tmov.b32 \t%r283, %f411;\n\tmov.b32 \t%r284, %f412;\n\t@%p1 st.global.v4.b32 [ %rd71 + 0 ], { %r281, %r282, %r283, %r284 };\n\tmov.b32 \t%r285, %f413;\n\tmov.b32 \t%r286, %f414;\n\tmov.b32 \t%r287, %f415;\n\tmov.b32 \t%r288, %f416;\n\t@%p1 st.global.v4.b32 [ %rd72 + 0 ], { %r285, %r286, %r287, %r288 };\n\tmov.b32 \t%r289, %f417;\n\tmov.b32 \t%r290, %f418;\n\tmov.b32 \t%r291, %f419;\n\tmov.b32 \t%r292, %f420;\n\t@%p1 st.global.v4.b32 [ %rd73 + 0 ], { %r289, %r290, %r291, %r292 };\n\tmov.b32 \t%r293, %f421;\n\tmov.b32 \t%r294, %f422;\n\tmov.b32 \t%r295, %f423;\n\tmov.b32 \t%r296, %f424;\n\t@%p1 st.global.v4.b32 [ %rd74 + 0 ], { %r293, %r294, %r295, %r296 };\n\tmov.b32 \t%r297, %f425;\n\tmov.b32 \t%r298, %f426;\n\tmov.b32 \t%r299, %f427;\n\tmov.b32 \t%r300, %f428;\n\t@%p1 st.global.v4.b32 [ %rd75 + 0 ], { %r297, %r298, %r299, %r300 };\n\tmov.b32 \t%r301, %f429;\n\tmov.b32 \t%r302, %f430;\n\tmov.b32 \t%r303, %f431;\n\tmov.b32 \t%r304, %f432;\n\t@%p1 st.global.v4.b32 [ %rd76 + 0 ], { %r301, %r302, %r303, %r304 };\n\tmov.b32 \t%r305, %f433;\n\tmov.b32 \t%r306, %f434;\n\tmov.b32 \t%r307, %f435;\n\tmov.b32 \t%r308, %f436;\n\t@%p1 st.global.v4.b32 [ %rd77 + 0 ], { %r305, %r306, %r307, %r308 };\n\tmov.b32 \t%r309, %f437;\n\tmov.b32 \t%r310, %f438;\n\tmov.b32 \t%r311, %f439;\n\tmov.b32 \t%r312, %f440;\n\t@%p1 st.global.v4.b32 [ %rd78 + 0 ], { %r309, %r310, %r311, %r312 };\n\tmov.b32 \t%r313, %f441;\n\tmov.b32 \t%r314, %f442;\n\tmov.b32 \t%r315, %f443;\n\tmov.b32 \t%r316, %f444;\n\t@%p1 st.global.v4.b32 [ %rd79 + 0 ], { %r313, %r314, %r315, %r316 };\n\tmov.b32 \t%r317, %f445;\n\tmov.b32 \t%r318, %f446;\n\tmov.b32 \t%r319, %f447;\n\tmov.b32 \t%r320, %f448;\n\t@%p1 st.global.v4.b32 [ %rd80 + 0 ], { %r317, %r318, %r319, %r320 };\n\tmov.b32 \t%r321, %f449;\n\tmov.b32 \t%r322, %f450;\n\tmov.b32 \t%r323, %f451;\n\tmov.b32 \t%r324, %f452;\n\t@%p1 st.global.v4.b32 [ %rd81 + 0 ], { %r321, %r322, %r323, %r324 };\n\tmov.b32 \t%r325, %f453;\n\tmov.b32 \t%r326, %f454;\n\tmov.b32 \t%r327, %f455;\n\tmov.b32 \t%r328, %f456;\n\t@%p1 st.global.v4.b32 [ %rd82 + 0 ], { %r325, %r326, %r327, %r328 };\n\tmov.b32 \t%r329, %f457;\n\tmov.b32 \t%r330, %f458;\n\tmov.b32 \t%r331, %f459;\n\tmov.b32 \t%r332, %f460;\n\t@%p1 st.global.v4.b32 [ %rd83 + 0 ], { %r329, %r330, %r331, %r332 };\n\tmov.b32 \t%r333, %f461;\n\tmov.b32 \t%r334, %f462;\n\tmov.b32 \t%r335, %f463;\n\tmov.b32 \t%r336, %f464;\n\t@%p1 st.global.v4.b32 [ %rd84 + 0 ], { %r333, %r334, %r335, %r336 };\n\tmov.b32 \t%r337, %f465;\n\tmov.b32 \t%r338, %f466;\n\tmov.b32 \t%r339, %f467;\n\tmov.b32 \t%r340, %f468;\n\t@%p1 st.global.v4.b32 [ %rd85 + 0 ], { %r337, %r338, %r339, %r340 };\n\tmov.b32 \t%r341, %f469;\n\tmov.b32 \t%r342, %f470;\n\tmov.b32 \t%r343, %f471;\n\tmov.b32 \t%r344, %f472;\n\t@%p1 st.global.v4.b32 [ %rd86 + 0 ], { %r341, %r342, %r343, %r344 };\n\tmov.b32 \t%r345, %f473;\n\tmov.b32 \t%r346, %f474;\n\tmov.b32 \t%r347, %f475;\n\tmov.b32 \t%r348, %f476;\n\t@%p1 st.global.v4.b32 [ %rd87 + 0 ], { %r345, %r346, %r347, %r348 };\n\tmov.b32 \t%r349, %f477;\n\tmov.b32 \t%r350, %f478;\n\tmov.b32 \t%r351, %f479;\n\tmov.b32 \t%r352, %f480;\n\t@%p1 st.global.v4.b32 [ %rd88 + 0 ], { %r349, %r350, %r351, %r352 };\n\tmov.b32 \t%r353, %f481;\n\tmov.b32 \t%r354, %f482;\n\tmov.b32 \t%r355, %f483;\n\tmov.b32 \t%r356, %f484;\n\t@%p1 st.global.v4.b32 [ %rd89 + 0 ], { %r353, %r354, %r355, %r356 };\n\tmov.b32 \t%r357, %f485;\n\tmov.b32 \t%r358, %f486;\n\tmov.b32 \t%r359, %f487;\n\tmov.b32 \t%r360, %f488;\n\t@%p1 st.global.v4.b32 [ %rd90 + 0 ], { %r357, %r358, %r359, %r360 };\n\tmov.b32 \t%r361, %f489;\n\tmov.b32 \t%r362, %f490;\n\tmov.b32 \t%r363, %f491;\n\tmov.b32 \t%r364, %f492;\n\t@%p1 st.global.v4.b32 [ %rd91 + 0 ], { %r361, %r362, %r363, %r364 };\n\tmov.b32 \t%r365, %f493;\n\tmov.b32 \t%r366, %f494;\n\tmov.b32 \t%r367, %f495;\n\tmov.b32 \t%r368, %f496;\n\t@%p1 st.global.v4.b32 [ %rd92 + 0 ], { %r365, %r366, %r367, %r368 };\n\tmov.b32 \t%r369, %f497;\n\tmov.b32 \t%r370, %f498;\n\tmov.b32 \t%r371, %f499;\n\tmov.b32 \t%r372, %f500;\n\t@%p1 st.global.v4.b32 [ %rd93 + 0 ], { %r369, %r370, %r371, %r372 };\n\tmov.b32 \t%r373, %f501;\n\tmov.b32 \t%r374, %f502;\n\tmov.b32 \t%r375, %f503;\n\tmov.b32 \t%r376, %f504;\n\t@%p1 st.global.v4.b32 [ %rd94 + 0 ], { %r373, %r374, %r375, %r376 };\n\tmov.b32 \t%r377, %f505;\n\tmov.b32 \t%r378, %f506;\n\tmov.b32 \t%r379, %f507;\n\tmov.b32 \t%r380, %f508;\n\t@%p1 st.global.v4.b32 [ %rd95 + 0 ], { %r377, %r378, %r379, %r380 };\n\tmov.b32 \t%r381, %f509;\n\tmov.b32 \t%r382, %f510;\n\tmov.b32 \t%r383, %f511;\n\tmov.b32 \t%r384, %f512;\n\t@%p1 st.global.v4.b32 [ %rd96 + 0 ], { %r381, %r382, %r383, %r384 };\n\tret;\n\n}\n', 'module {\n tt.func public @add(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: !tt.ptr {tt.divisibility = 16 : i32}, %arg2: !tt.ptr {tt.divisibility = 16 : i32}) attributes {noinline = false} {\n %0 = tt.get_program_id x : i32\n %1 = tt.get_program_id y : i32\n %2 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>\n %3 = tt.expand_dims %2 {axis = 1 : i32} : (tensor<128xi32>) -> tensor<128x1xi32>\n %c128_i32 = arith.constant 128 : i32\n %cst = arith.constant dense<128> : tensor<128x1xi32>\n %4 = arith.muli %3, %cst : tensor<128x1xi32>\n %5 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>\n %6 = tt.expand_dims %5 {axis = 0 : i32} : (tensor<128xi32>) -> tensor<1x128xi32>\n %c1_i32 = arith.constant 1 : i32\n %cst_0 = arith.constant dense<1> : tensor<1x128xi32>\n %7 = arith.muli %6, %cst_0 : tensor<1x128xi32>\n %8 = tt.broadcast %4 : (tensor<128x1xi32>) -> tensor<128x128xi32>\n %9 = tt.broadcast %7 : (tensor<1x128xi32>) -> tensor<128x128xi32>\n %10 = tt.splat %arg0 : (!tt.ptr) -> tensor<128x128x!tt.ptr>\n %11 = tt.addptr %10, %8 : tensor<128x128x!tt.ptr>, tensor<128x128xi32>\n %12 = tt.addptr %11, %9 : tensor<128x128x!tt.ptr>, tensor<128x128xi32>\n %13 = tt.load %12 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<128x128xf32>\n %14 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>\n %15 = tt.expand_dims %14 {axis = 1 : i32} : (tensor<128xi32>) -> tensor<128x1xi32>\n %c128_i32_1 = arith.constant 128 : i32\n %cst_2 = arith.constant dense<128> : tensor<128x1xi32>\n %16 = arith.muli %15, %cst_2 : tensor<128x1xi32>\n %17 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>\n %18 = tt.expand_dims %17 {axis = 0 : i32} : (tensor<128xi32>) -> tensor<1x128xi32>\n %c1_i32_3 = arith.constant 1 : i32\n %cst_4 = arith.constant dense<1> : tensor<1x128xi32>\n %19 = arith.muli %18, %cst_4 : tensor<1x128xi32>\n %20 = tt.broadcast %16 : (tensor<128x1xi32>) -> tensor<128x128xi32>\n %21 = tt.broadcast %19 : (tensor<1x128xi32>) -> tensor<128x128xi32>\n %22 = tt.splat %arg1 : (!tt.ptr) -> tensor<128x128x!tt.ptr>\n %23 = tt.addptr %22, %20 : tensor<128x128x!tt.ptr>, tensor<128x128xi32>\n %24 = tt.addptr %23, %21 : tensor<128x128x!tt.ptr>, tensor<128x128xi32>\n %25 = tt.load %24 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<128x128xf32>\n %26 = arith.addf %13, %25 : tensor<128x128xf32>\n %cst_5 = arith.constant 2.000000e+00 : f32\n %cst_6 = arith.constant dense<2.000000e+00> : tensor<128x128xf32>\n %27 = arith.mulf %26, %cst_6 : tensor<128x128xf32>\n %28 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>\n %29 = tt.expand_dims %28 {axis = 1 : i32} : (tensor<128xi32>) -> tensor<128x1xi32>\n %c128_i32_7 = arith.constant 128 : i32\n %cst_8 = arith.constant dense<128> : tensor<128x1xi32>\n %30 = arith.muli %29, %cst_8 : tensor<128x1xi32>\n %31 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>\n %32 = tt.expand_dims %31 {axis = 0 : i32} : (tensor<128xi32>) -> tensor<1x128xi32>\n %c1_i32_9 = arith.constant 1 : i32\n %cst_10 = arith.constant dense<1> : tensor<1x128xi32>\n %33 = arith.muli %32, %cst_10 : tensor<1x128xi32>\n %34 = tt.broadcast %30 : (tensor<128x1xi32>) -> tensor<128x128xi32>\n %35 = tt.broadcast %33 : (tensor<1x128xi32>) -> tensor<128x128xi32>\n %36 = tt.splat %arg2 : (!tt.ptr) -> tensor<128x128x!tt.ptr>\n %37 = tt.addptr %36, %34 : tensor<128x128x!tt.ptr>, tensor<128x128xi32>\n %38 = tt.addptr %37, %35 : tensor<128x128x!tt.ptr>, tensor<128x128xi32>\n tt.store %38, %27 {cache = 1 : i32, evict = 1 : i32} : tensor<128x128xf32>\n tt.return\n }\n}\n', 89 The preceding stack trace is the source of the JAX operation that, once transformed by JAX, triggered the following exception. -------------------- The above exception was the direct cause of the following exception: Traceback (most recent call last): File "/media/adam/shared_drive/PycharmProjects/triton_test/pallas_test.py", line 231, in app.run(main) File "/home/adam/anaconda3/envs/triton_test/lib/python3.10/site-packages/absl/app.py", line 308, in run _run_main(main, args) File "/home/adam/anaconda3/envs/triton_test/lib/python3.10/site-packages/absl/app.py", line 254, in _run_main sys.exit(main(argv)) File "/media/adam/shared_drive/PycharmProjects/triton_test/pallas_test.py", line 226, in main print(pl.pallas_call(kernel1, out_shape=out_shape, grid=grid)(x, y)) File "/home/adam/anaconda3/envs/triton_test/lib/python3.10/site-packages/jax/_src/traceback_util.py", line 166, in reraise_with_filtered_traceback return fun(*args, **kwargs) File "/home/adam/anaconda3/envs/triton_test/lib/python3.10/site-packages/jax/_src/pjit.py", line 250, in cache_miss outs, out_flat, out_tree, args_flat, jaxpr = _python_pjit_helper( File "/home/adam/anaconda3/envs/triton_test/lib/python3.10/site-packages/jax/_src/pjit.py", line 163, in _python_pjit_helper out_flat = pjit_p.bind(*args_flat, **params) File "/home/adam/anaconda3/envs/triton_test/lib/python3.10/site-packages/jax/_src/core.py", line 2652, in bind return self.bind_with_trace(top_trace, args, params) File "/home/adam/anaconda3/envs/triton_test/lib/python3.10/site-packages/jax/_src/core.py", line 383, in bind_with_trace out = trace.process_primitive(self, map(trace.full_raise, args), params) File "/home/adam/anaconda3/envs/triton_test/lib/python3.10/site-packages/jax/_src/core.py", line 790, in process_primitive return primitive.impl(*tracers, **params) File "/home/adam/anaconda3/envs/triton_test/lib/python3.10/site-packages/jax/_src/pjit.py", line 1193, in _pjit_call_impl return xc._xla.pjit(name, f, call_impl_cache_miss, [], [], donated_argnums, File "/home/adam/anaconda3/envs/triton_test/lib/python3.10/site-packages/jax/_src/pjit.py", line 1177, in call_impl_cache_miss out_flat, compiled = _pjit_call_impl_python( File "/home/adam/anaconda3/envs/triton_test/lib/python3.10/site-packages/jax/_src/pjit.py", line 1110, in _pjit_call_impl_python compiled = _pjit_lower( File "/home/adam/anaconda3/envs/triton_test/lib/python3.10/site-packages/jax/_src/pjit.py", line 1239, in _pjit_lower return _pjit_lower_cached(jaxpr, in_shardings, out_shardings, *args, **kwargs) File "/home/adam/anaconda3/envs/triton_test/lib/python3.10/site-packages/jax/_src/pjit.py", line 1279, in _pjit_lower_cached return pxla.lower_sharding_computation( File "/home/adam/anaconda3/envs/triton_test/lib/python3.10/site-packages/jax/_src/profiler.py", line 314, in wrapper return func(*args, **kwargs) File "/home/adam/anaconda3/envs/triton_test/lib/python3.10/site-packages/jax/_src/interpreters/pxla.py", line 2085, in lower_sharding_computation nreps, tuple_args, shape_poly_state) = _cached_lowering_to_hlo( File "/home/adam/anaconda3/envs/triton_test/lib/python3.10/site-packages/jax/_src/interpreters/pxla.py", line 1928, in _cached_lowering_to_hlo lowering_result = mlir.lower_jaxpr_to_module( File "/home/adam/anaconda3/envs/triton_test/lib/python3.10/site-packages/jax/_src/interpreters/mlir.py", line 666, in lower_jaxpr_to_module lower_jaxpr_to_fun( File "/home/adam/anaconda3/envs/triton_test/lib/python3.10/site-packages/jax/_src/interpreters/mlir.py", line 997, in lower_jaxpr_to_fun out_vals, tokens_out = jaxpr_subcomp(ctx.replace(name_stack=callee_name_stack), File "/home/adam/anaconda3/envs/triton_test/lib/python3.10/site-packages/jax/_src/interpreters/mlir.py", line 1143, in jaxpr_subcomp ans = rule(rule_ctx, *map(_unwrap_singleton_ir_values, in_nodes), File "/home/adam/anaconda3/envs/triton_test/lib/python3.10/site-packages/jax_triton/pallas/triton_lowering.py", line 1642, in pallas_call_lowering kernel = triton_kernel_call_lib.TritonKernel( jax._src.traceback_util.UnfilteredStackTrace: TypeError: __init__(): incompatible constructor arguments. The following argument types are supported: 1. jaxlib.cuda._triton.TritonKernel(arg0: str, arg1: str, arg2: int, arg3: int) Invoked with: 'add', 4, 0, '//\n// Generated by LLVM NVPTX Back-End\n//\n\n.version 8.0\n.target sm_89\n.address_size 64\n\n\t// .globl\tadd\n\n.visible .entry add(\n\t.param .u64 add_param_0,\n\t.param .u64 add_param_1,\n\t.param .u64 add_param_2\n)\n.maxntid 128, 1, 1\n{\n\t.reg .pred \t%p<97>;\n\t.reg .b32 \t%r<389>;\n\t.reg .f32 \t%f<513>;\n\t.reg .b64 \t%rd<105>;\n\n\tld.param.u64 \t%rd97, [add_param_0];\n\tld.param.u64 \t%rd98, [add_param_1];\n\tmov.u32 \t%r385, %tid.x;\n\tshl.b32 \t%r386, %r385, 2;\n\tld.param.u64 \t%rd99, [add_param_2];\n\tand.b32 \t%r387, %r386, 124;\n\tand.b32 \t%r388, %r386, 16256;\n\tmul.wide.u32 \t%rd100, %r388, 4;\n\tadd.s64 \t%rd101, %rd97, %rd100;\n\tmul.wide.u32 \t%rd102, %r387, 4;\n\tadd.s64 \t%rd1, %rd101, %rd102;\n\tadd.s64 \t%rd2, %rd1, 2048;\n\tadd.s64 \t%rd3, %rd1, 4096;\n\tadd.s64 \t%rd4, %rd1, 6144;\n\tadd.s64 \t%rd5, %rd1, 8192;\n\tadd.s64 \t%rd6, %rd1, 10240;\n\tadd.s64 \t%rd7, %rd1, 12288;\n\tadd.s64 \t%rd8, %rd1, 14336;\n\tadd.s64 \t%rd9, %rd1, 16384;\n\tadd.s64 \t%rd10, %rd1, 18432;\n\tadd.s64 \t%rd11, %rd1, 20480;\n\tadd.s64 \t%rd12, %rd1, 22528;\n\tadd.s64 \t%rd13, %rd1, 24576;\n\tadd.s64 \t%rd14, %rd1, 26624;\n\tadd.s64 \t%rd15, %rd1, 28672;\n\tadd.s64 \t%rd16, %rd1, 30720;\n\tadd.s64 \t%rd17, %rd1, 32768;\n\tadd.s64 \t%rd18, %rd1, 34816;\n\tadd.s64 \t%rd19, %rd1, 36864;\n\tadd.s64 \t%rd20, %rd1, 38912;\n\tadd.s64 \t%rd21, %rd1, 40960;\n\tadd.s64 \t%rd22, %rd1, 43008;\n\tadd.s64 \t%rd23, %rd1, 45056;\n\tadd.s64 \t%rd24, %rd1, 47104;\n\tadd.s64 \t%rd25, %rd1, 49152;\n\tadd.s64 \t%rd26, %rd1, 51200;\n\tadd.s64 \t%rd27, %rd1, 53248;\n\tadd.s64 \t%rd28, %rd1, 55296;\n\tadd.s64 \t%rd29, %rd1, 57344;\n\tadd.s64 \t%rd30, %rd1, 59392;\n\tadd.s64 \t%rd31, %rd1, 61440;\n\tadd.s64 \t%rd32, %rd1, 63488;\n\tmov.pred \t%p1, -1;\n\tmov.u32 %r1, 0x0;\n\tmov.u32 %r2, 0x0;\n\tmov.u32 %r3, 0x0;\n\tmov.u32 %r4, 0x0;\n\t@%p1 ld.global.v4.b32 { %r1, %r2, %r3, %r4 }, [ %rd1 + 0 ];\n\tmov.b32 \t%f1, %r1;\n\tmov.b32 \t%f2, %r2;\n\tmov.b32 \t%f3, %r3;\n\tmov.b32 \t%f4, %r4;\n\tmov.u32 %r5, 0x0;\n\tmov.u32 %r6, 0x0;\n\tmov.u32 %r7, 0x0;\n\tmov.u32 %r8, 0x0;\n\t@%p1 ld.global.v4.b32 { %r5, %r6, %r7, %r8 }, [ %rd2 + 0 ];\n\tmov.b32 \t%f5, %r5;\n\tmov.b32 \t%f6, %r6;\n\tmov.b32 \t%f7, %r7;\n\tmov.b32 \t%f8, %r8;\n\tmov.u32 %r9, 0x0;\n\tmov.u32 %r10, 0x0;\n\tmov.u32 %r11, 0x0;\n\tmov.u32 %r12, 0x0;\n\t@%p1 ld.global.v4.b32 { %r9, %r10, %r11, %r12 }, [ %rd3 + 0 ];\n\tmov.b32 \t%f9, %r9;\n\tmov.b32 \t%f10, %r10;\n\tmov.b32 \t%f11, %r11;\n\tmov.b32 \t%f12, %r12;\n\tmov.u32 %r13, 0x0;\n\tmov.u32 %r14, 0x0;\n\tmov.u32 %r15, 0x0;\n\tmov.u32 %r16, 0x0;\n\t@%p1 ld.global.v4.b32 { %r13, %r14, %r15, %r16 }, [ %rd4 + 0 ];\n\tmov.b32 \t%f13, %r13;\n\tmov.b32 \t%f14, %r14;\n\tmov.b32 \t%f15, %r15;\n\tmov.b32 \t%f16, %r16;\n\tmov.u32 %r17, 0x0;\n\tmov.u32 %r18, 0x0;\n\tmov.u32 %r19, 0x0;\n\tmov.u32 %r20, 0x0;\n\t@%p1 ld.global.v4.b32 { %r17, %r18, %r19, %r20 }, [ %rd5 + 0 ];\n\tmov.b32 \t%f17, %r17;\n\tmov.b32 \t%f18, %r18;\n\tmov.b32 \t%f19, %r19;\n\tmov.b32 \t%f20, %r20;\n\tmov.u32 %r21, 0x0;\n\tmov.u32 %r22, 0x0;\n\tmov.u32 %r23, 0x0;\n\tmov.u32 %r24, 0x0;\n\t@%p1 ld.global.v4.b32 { %r21, %r22, %r23, %r24 }, [ %rd6 + 0 ];\n\tmov.b32 \t%f21, %r21;\n\tmov.b32 \t%f22, %r22;\n\tmov.b32 \t%f23, %r23;\n\tmov.b32 \t%f24, %r24;\n\tmov.u32 %r25, 0x0;\n\tmov.u32 %r26, 0x0;\n\tmov.u32 %r27, 0x0;\n\tmov.u32 %r28, 0x0;\n\t@%p1 ld.global.v4.b32 { %r25, %r26, %r27, %r28 }, [ %rd7 + 0 ];\n\tmov.b32 \t%f25, %r25;\n\tmov.b32 \t%f26, %r26;\n\tmov.b32 \t%f27, %r27;\n\tmov.b32 \t%f28, %r28;\n\tmov.u32 %r29, 0x0;\n\tmov.u32 %r30, 0x0;\n\tmov.u32 %r31, 0x0;\n\tmov.u32 %r32, 0x0;\n\t@%p1 ld.global.v4.b32 { %r29, %r30, %r31, %r32 }, [ %rd8 + 0 ];\n\tmov.b32 \t%f29, %r29;\n\tmov.b32 \t%f30, %r30;\n\tmov.b32 \t%f31, %r31;\n\tmov.b32 \t%f32, %r32;\n\tmov.u32 %r33, 0x0;\n\tmov.u32 %r34, 0x0;\n\tmov.u32 %r35, 0x0;\n\tmov.u32 %r36, 0x0;\n\t@%p1 ld.global.v4.b32 { %r33, %r34, %r35, %r36 }, [ %rd9 + 0 ];\n\tmov.b32 \t%f33, %r33;\n\tmov.b32 \t%f34, %r34;\n\tmov.b32 \t%f35, %r35;\n\tmov.b32 \t%f36, %r36;\n\tmov.u32 %r37, 0x0;\n\tmov.u32 %r38, 0x0;\n\tmov.u32 %r39, 0x0;\n\tmov.u32 %r40, 0x0;\n\t@%p1 ld.global.v4.b32 { %r37, %r38, %r39, %r40 }, [ %rd10 + 0 ];\n\tmov.b32 \t%f37, %r37;\n\tmov.b32 \t%f38, %r38;\n\tmov.b32 \t%f39, %r39;\n\tmov.b32 \t%f40, %r40;\n\tmov.u32 %r41, 0x0;\n\tmov.u32 %r42, 0x0;\n\tmov.u32 %r43, 0x0;\n\tmov.u32 %r44, 0x0;\n\t@%p1 ld.global.v4.b32 { %r41, %r42, %r43, %r44 }, [ %rd11 + 0 ];\n\tmov.b32 \t%f41, %r41;\n\tmov.b32 \t%f42, %r42;\n\tmov.b32 \t%f43, %r43;\n\tmov.b32 \t%f44, %r44;\n\tmov.u32 %r45, 0x0;\n\tmov.u32 %r46, 0x0;\n\tmov.u32 %r47, 0x0;\n\tmov.u32 %r48, 0x0;\n\t@%p1 ld.global.v4.b32 { %r45, %r46, %r47, %r48 }, [ %rd12 + 0 ];\n\tmov.b32 \t%f45, %r45;\n\tmov.b32 \t%f46, %r46;\n\tmov.b32 \t%f47, %r47;\n\tmov.b32 \t%f48, %r48;\n\tmov.u32 %r49, 0x0;\n\tmov.u32 %r50, 0x0;\n\tmov.u32 %r51, 0x0;\n\tmov.u32 %r52, 0x0;\n\t@%p1 ld.global.v4.b32 { %r49, %r50, %r51, %r52 }, [ %rd13 + 0 ];\n\tmov.b32 \t%f49, %r49;\n\tmov.b32 \t%f50, %r50;\n\tmov.b32 \t%f51, %r51;\n\tmov.b32 \t%f52, %r52;\n\tmov.u32 %r53, 0x0;\n\tmov.u32 %r54, 0x0;\n\tmov.u32 %r55, 0x0;\n\tmov.u32 %r56, 0x0;\n\t@%p1 ld.global.v4.b32 { %r53, %r54, %r55, %r56 }, [ %rd14 + 0 ];\n\tmov.b32 \t%f53, %r53;\n\tmov.b32 \t%f54, %r54;\n\tmov.b32 \t%f55, %r55;\n\tmov.b32 \t%f56, %r56;\n\tmov.u32 %r57, 0x0;\n\tmov.u32 %r58, 0x0;\n\tmov.u32 %r59, 0x0;\n\tmov.u32 %r60, 0x0;\n\t@%p1 ld.global.v4.b32 { %r57, %r58, %r59, %r60 }, [ %rd15 + 0 ];\n\tmov.b32 \t%f57, %r57;\n\tmov.b32 \t%f58, %r58;\n\tmov.b32 \t%f59, %r59;\n\tmov.b32 \t%f60, %r60;\n\tmov.u32 %r61, 0x0;\n\tmov.u32 %r62, 0x0;\n\tmov.u32 %r63, 0x0;\n\tmov.u32 %r64, 0x0;\n\t@%p1 ld.global.v4.b32 { %r61, %r62, %r63, %r64 }, [ %rd16 + 0 ];\n\tmov.b32 \t%f61, %r61;\n\tmov.b32 \t%f62, %r62;\n\tmov.b32 \t%f63, %r63;\n\tmov.b32 \t%f64, %r64;\n\tmov.u32 %r65, 0x0;\n\tmov.u32 %r66, 0x0;\n\tmov.u32 %r67, 0x0;\n\tmov.u32 %r68, 0x0;\n\t@%p1 ld.global.v4.b32 { %r65, %r66, %r67, %r68 }, [ %rd17 + 0 ];\n\tmov.b32 \t%f65, %r65;\n\tmov.b32 \t%f66, %r66;\n\tmov.b32 \t%f67, %r67;\n\tmov.b32 \t%f68, %r68;\n\tmov.u32 %r69, 0x0;\n\tmov.u32 %r70, 0x0;\n\tmov.u32 %r71, 0x0;\n\tmov.u32 %r72, 0x0;\n\t@%p1 ld.global.v4.b32 { %r69, %r70, %r71, %r72 }, [ %rd18 + 0 ];\n\tmov.b32 \t%f69, %r69;\n\tmov.b32 \t%f70, %r70;\n\tmov.b32 \t%f71, %r71;\n\tmov.b32 \t%f72, %r72;\n\tmov.u32 %r73, 0x0;\n\tmov.u32 %r74, 0x0;\n\tmov.u32 %r75, 0x0;\n\tmov.u32 %r76, 0x0;\n\t@%p1 ld.global.v4.b32 { %r73, %r74, %r75, %r76 }, [ %rd19 + 0 ];\n\tmov.b32 \t%f73, %r73;\n\tmov.b32 \t%f74, %r74;\n\tmov.b32 \t%f75, %r75;\n\tmov.b32 \t%f76, %r76;\n\tmov.u32 %r77, 0x0;\n\tmov.u32 %r78, 0x0;\n\tmov.u32 %r79, 0x0;\n\tmov.u32 %r80, 0x0;\n\t@%p1 ld.global.v4.b32 { %r77, %r78, %r79, %r80 }, [ %rd20 + 0 ];\n\tmov.b32 \t%f77, %r77;\n\tmov.b32 \t%f78, %r78;\n\tmov.b32 \t%f79, %r79;\n\tmov.b32 \t%f80, %r80;\n\tmov.u32 %r81, 0x0;\n\tmov.u32 %r82, 0x0;\n\tmov.u32 %r83, 0x0;\n\tmov.u32 %r84, 0x0;\n\t@%p1 ld.global.v4.b32 { %r81, %r82, %r83, %r84 }, [ %rd21 + 0 ];\n\tmov.b32 \t%f81, %r81;\n\tmov.b32 \t%f82, %r82;\n\tmov.b32 \t%f83, %r83;\n\tmov.b32 \t%f84, %r84;\n\tmov.u32 %r85, 0x0;\n\tmov.u32 %r86, 0x0;\n\tmov.u32 %r87, 0x0;\n\tmov.u32 %r88, 0x0;\n\t@%p1 ld.global.v4.b32 { %r85, %r86, %r87, %r88 }, [ %rd22 + 0 ];\n\tmov.b32 \t%f85, %r85;\n\tmov.b32 \t%f86, %r86;\n\tmov.b32 \t%f87, %r87;\n\tmov.b32 \t%f88, %r88;\n\tmov.u32 %r89, 0x0;\n\tmov.u32 %r90, 0x0;\n\tmov.u32 %r91, 0x0;\n\tmov.u32 %r92, 0x0;\n\t@%p1 ld.global.v4.b32 { %r89, %r90, %r91, %r92 }, [ %rd23 + 0 ];\n\tmov.b32 \t%f89, %r89;\n\tmov.b32 \t%f90, %r90;\n\tmov.b32 \t%f91, %r91;\n\tmov.b32 \t%f92, %r92;\n\tmov.u32 %r93, 0x0;\n\tmov.u32 %r94, 0x0;\n\tmov.u32 %r95, 0x0;\n\tmov.u32 %r96, 0x0;\n\t@%p1 ld.global.v4.b32 { %r93, %r94, %r95, %r96 }, [ %rd24 + 0 ];\n\tmov.b32 \t%f93, %r93;\n\tmov.b32 \t%f94, %r94;\n\tmov.b32 \t%f95, %r95;\n\tmov.b32 \t%f96, %r96;\n\tmov.u32 %r97, 0x0;\n\tmov.u32 %r98, 0x0;\n\tmov.u32 %r99, 0x0;\n\tmov.u32 %r100, 0x0;\n\t@%p1 ld.global.v4.b32 { %r97, %r98, %r99, %r100 }, [ %rd25 + 0 ];\n\tmov.b32 \t%f97, %r97;\n\tmov.b32 \t%f98, %r98;\n\tmov.b32 \t%f99, %r99;\n\tmov.b32 \t%f100, %r100;\n\tmov.u32 %r101, 0x0;\n\tmov.u32 %r102, 0x0;\n\tmov.u32 %r103, 0x0;\n\tmov.u32 %r104, 0x0;\n\t@%p1 ld.global.v4.b32 { %r101, %r102, %r103, %r104 }, [ %rd26 + 0 ];\n\tmov.b32 \t%f101, %r101;\n\tmov.b32 \t%f102, %r102;\n\tmov.b32 \t%f103, %r103;\n\tmov.b32 \t%f104, %r104;\n\tmov.u32 %r105, 0x0;\n\tmov.u32 %r106, 0x0;\n\tmov.u32 %r107, 0x0;\n\tmov.u32 %r108, 0x0;\n\t@%p1 ld.global.v4.b32 { %r105, %r106, %r107, %r108 }, [ %rd27 + 0 ];\n\tmov.b32 \t%f105, %r105;\n\tmov.b32 \t%f106, %r106;\n\tmov.b32 \t%f107, %r107;\n\tmov.b32 \t%f108, %r108;\n\tmov.u32 %r109, 0x0;\n\tmov.u32 %r110, 0x0;\n\tmov.u32 %r111, 0x0;\n\tmov.u32 %r112, 0x0;\n\t@%p1 ld.global.v4.b32 { %r109, %r110, %r111, %r112 }, [ %rd28 + 0 ];\n\tmov.b32 \t%f109, %r109;\n\tmov.b32 \t%f110, %r110;\n\tmov.b32 \t%f111, %r111;\n\tmov.b32 \t%f112, %r112;\n\tmov.u32 %r113, 0x0;\n\tmov.u32 %r114, 0x0;\n\tmov.u32 %r115, 0x0;\n\tmov.u32 %r116, 0x0;\n\t@%p1 ld.global.v4.b32 { %r113, %r114, %r115, %r116 }, [ %rd29 + 0 ];\n\tmov.b32 \t%f113, %r113;\n\tmov.b32 \t%f114, %r114;\n\tmov.b32 \t%f115, %r115;\n\tmov.b32 \t%f116, %r116;\n\tmov.u32 %r117, 0x0;\n\tmov.u32 %r118, 0x0;\n\tmov.u32 %r119, 0x0;\n\tmov.u32 %r120, 0x0;\n\t@%p1 ld.global.v4.b32 { %r117, %r118, %r119, %r120 }, [ %rd30 + 0 ];\n\tmov.b32 \t%f117, %r117;\n\tmov.b32 \t%f118, %r118;\n\tmov.b32 \t%f119, %r119;\n\tmov.b32 \t%f120, %r120;\n\tmov.u32 %r121, 0x0;\n\tmov.u32 %r122, 0x0;\n\tmov.u32 %r123, 0x0;\n\tmov.u32 %r124, 0x0;\n\t@%p1 ld.global.v4.b32 { %r121, %r122, %r123, %r124 }, [ %rd31 + 0 ];\n\tmov.b32 \t%f121, %r121;\n\tmov.b32 \t%f122, %r122;\n\tmov.b32 \t%f123, %r123;\n\tmov.b32 \t%f124, %r124;\n\tmov.u32 %r125, 0x0;\n\tmov.u32 %r126, 0x0;\n\tmov.u32 %r127, 0x0;\n\tmov.u32 %r128, 0x0;\n\t@%p1 ld.global.v4.b32 { %r125, %r126, %r127, %r128 }, [ %rd32 + 0 ];\n\tmov.b32 \t%f125, %r125;\n\tmov.b32 \t%f126, %r126;\n\tmov.b32 \t%f127, %r127;\n\tmov.b32 \t%f128, %r128;\n\tadd.s64 \t%rd103, %rd98, %rd100;\n\tadd.s64 \t%rd33, %rd103, %rd102;\n\tadd.s64 \t%rd34, %rd33, 2048;\n\tadd.s64 \t%rd35, %rd33, 4096;\n\tadd.s64 \t%rd36, %rd33, 6144;\n\tadd.s64 \t%rd37, %rd33, 8192;\n\tadd.s64 \t%rd38, %rd33, 10240;\n\tadd.s64 \t%rd39, %rd33, 12288;\n\tadd.s64 \t%rd40, %rd33, 14336;\n\tadd.s64 \t%rd41, %rd33, 16384;\n\tadd.s64 \t%rd42, %rd33, 18432;\n\tadd.s64 \t%rd43, %rd33, 20480;\n\tadd.s64 \t%rd44, %rd33, 22528;\n\tadd.s64 \t%rd45, %rd33, 24576;\n\tadd.s64 \t%rd46, %rd33, 26624;\n\tadd.s64 \t%rd47, %rd33, 28672;\n\tadd.s64 \t%rd48, %rd33, 30720;\n\tadd.s64 \t%rd49, %rd33, 32768;\n\tadd.s64 \t%rd50, %rd33, 34816;\n\tadd.s64 \t%rd51, %rd33, 36864;\n\tadd.s64 \t%rd52, %rd33, 38912;\n\tadd.s64 \t%rd53, %rd33, 40960;\n\tadd.s64 \t%rd54, %rd33, 43008;\n\tadd.s64 \t%rd55, %rd33, 45056;\n\tadd.s64 \t%rd56, %rd33, 47104;\n\tadd.s64 \t%rd57, %rd33, 49152;\n\tadd.s64 \t%rd58, %rd33, 51200;\n\tadd.s64 \t%rd59, %rd33, 53248;\n\tadd.s64 \t%rd60, %rd33, 55296;\n\tadd.s64 \t%rd61, %rd33, 57344;\n\tadd.s64 \t%rd62, %rd33, 59392;\n\tadd.s64 \t%rd63, %rd33, 61440;\n\tadd.s64 \t%rd64, %rd33, 63488;\n\tmov.u32 %r129, 0x0;\n\tmov.u32 %r130, 0x0;\n\tmov.u32 %r131, 0x0;\n\tmov.u32 %r132, 0x0;\n\t@%p1 ld.global.v4.b32 { %r129, %r130, %r131, %r132 }, [ %rd33 + 0 ];\n\tmov.b32 \t%f129, %r129;\n\tmov.b32 \t%f130, %r130;\n\tmov.b32 \t%f131, %r131;\n\tmov.b32 \t%f132, %r132;\n\tmov.u32 %r133, 0x0;\n\tmov.u32 %r134, 0x0;\n\tmov.u32 %r135, 0x0;\n\tmov.u32 %r136, 0x0;\n\t@%p1 ld.global.v4.b32 { %r133, %r134, %r135, %r136 }, [ %rd34 + 0 ];\n\tmov.b32 \t%f133, %r133;\n\tmov.b32 \t%f134, %r134;\n\tmov.b32 \t%f135, %r135;\n\tmov.b32 \t%f136, %r136;\n\tmov.u32 %r137, 0x0;\n\tmov.u32 %r138, 0x0;\n\tmov.u32 %r139, 0x0;\n\tmov.u32 %r140, 0x0;\n\t@%p1 ld.global.v4.b32 { %r137, %r138, %r139, %r140 }, [ %rd35 + 0 ];\n\tmov.b32 \t%f137, %r137;\n\tmov.b32 \t%f138, %r138;\n\tmov.b32 \t%f139, %r139;\n\tmov.b32 \t%f140, %r140;\n\tmov.u32 %r141, 0x0;\n\tmov.u32 %r142, 0x0;\n\tmov.u32 %r143, 0x0;\n\tmov.u32 %r144, 0x0;\n\t@%p1 ld.global.v4.b32 { %r141, %r142, %r143, %r144 }, [ %rd36 + 0 ];\n\tmov.b32 \t%f141, %r141;\n\tmov.b32 \t%f142, %r142;\n\tmov.b32 \t%f143, %r143;\n\tmov.b32 \t%f144, %r144;\n\tmov.u32 %r145, 0x0;\n\tmov.u32 %r146, 0x0;\n\tmov.u32 %r147, 0x0;\n\tmov.u32 %r148, 0x0;\n\t@%p1 ld.global.v4.b32 { %r145, %r146, %r147, %r148 }, [ %rd37 + 0 ];\n\tmov.b32 \t%f145, %r145;\n\tmov.b32 \t%f146, %r146;\n\tmov.b32 \t%f147, %r147;\n\tmov.b32 \t%f148, %r148;\n\tmov.u32 %r149, 0x0;\n\tmov.u32 %r150, 0x0;\n\tmov.u32 %r151, 0x0;\n\tmov.u32 %r152, 0x0;\n\t@%p1 ld.global.v4.b32 { %r149, %r150, %r151, %r152 }, [ %rd38 + 0 ];\n\tmov.b32 \t%f149, %r149;\n\tmov.b32 \t%f150, %r150;\n\tmov.b32 \t%f151, %r151;\n\tmov.b32 \t%f152, %r152;\n\tmov.u32 %r153, 0x0;\n\tmov.u32 %r154, 0x0;\n\tmov.u32 %r155, 0x0;\n\tmov.u32 %r156, 0x0;\n\t@%p1 ld.global.v4.b32 { %r153, %r154, %r155, %r156 }, [ %rd39 + 0 ];\n\tmov.b32 \t%f153, %r153;\n\tmov.b32 \t%f154, %r154;\n\tmov.b32 \t%f155, %r155;\n\tmov.b32 \t%f156, %r156;\n\tmov.u32 %r157, 0x0;\n\tmov.u32 %r158, 0x0;\n\tmov.u32 %r159, 0x0;\n\tmov.u32 %r160, 0x0;\n\t@%p1 ld.global.v4.b32 { %r157, %r158, %r159, %r160 }, [ %rd40 + 0 ];\n\tmov.b32 \t%f157, %r157;\n\tmov.b32 \t%f158, %r158;\n\tmov.b32 \t%f159, %r159;\n\tmov.b32 \t%f160, %r160;\n\tmov.u32 %r161, 0x0;\n\tmov.u32 %r162, 0x0;\n\tmov.u32 %r163, 0x0;\n\tmov.u32 %r164, 0x0;\n\t@%p1 ld.global.v4.b32 { %r161, %r162, %r163, %r164 }, [ %rd41 + 0 ];\n\tmov.b32 \t%f161, %r161;\n\tmov.b32 \t%f162, %r162;\n\tmov.b32 \t%f163, %r163;\n\tmov.b32 \t%f164, %r164;\n\tmov.u32 %r165, 0x0;\n\tmov.u32 %r166, 0x0;\n\tmov.u32 %r167, 0x0;\n\tmov.u32 %r168, 0x0;\n\t@%p1 ld.global.v4.b32 { %r165, %r166, %r167, %r168 }, [ %rd42 + 0 ];\n\tmov.b32 \t%f165, %r165;\n\tmov.b32 \t%f166, %r166;\n\tmov.b32 \t%f167, %r167;\n\tmov.b32 \t%f168, %r168;\n\tmov.u32 %r169, 0x0;\n\tmov.u32 %r170, 0x0;\n\tmov.u32 %r171, 0x0;\n\tmov.u32 %r172, 0x0;\n\t@%p1 ld.global.v4.b32 { %r169, %r170, %r171, %r172 }, [ %rd43 + 0 ];\n\tmov.b32 \t%f169, %r169;\n\tmov.b32 \t%f170, %r170;\n\tmov.b32 \t%f171, %r171;\n\tmov.b32 \t%f172, %r172;\n\tmov.u32 %r173, 0x0;\n\tmov.u32 %r174, 0x0;\n\tmov.u32 %r175, 0x0;\n\tmov.u32 %r176, 0x0;\n\t@%p1 ld.global.v4.b32 { %r173, %r174, %r175, %r176 }, [ %rd44 + 0 ];\n\tmov.b32 \t%f173, %r173;\n\tmov.b32 \t%f174, %r174;\n\tmov.b32 \t%f175, %r175;\n\tmov.b32 \t%f176, %r176;\n\tmov.u32 %r177, 0x0;\n\tmov.u32 %r178, 0x0;\n\tmov.u32 %r179, 0x0;\n\tmov.u32 %r180, 0x0;\n\t@%p1 ld.global.v4.b32 { %r177, %r178, %r179, %r180 }, [ %rd45 + 0 ];\n\tmov.b32 \t%f177, %r177;\n\tmov.b32 \t%f178, %r178;\n\tmov.b32 \t%f179, %r179;\n\tmov.b32 \t%f180, %r180;\n\tmov.u32 %r181, 0x0;\n\tmov.u32 %r182, 0x0;\n\tmov.u32 %r183, 0x0;\n\tmov.u32 %r184, 0x0;\n\t@%p1 ld.global.v4.b32 { %r181, %r182, %r183, %r184 }, [ %rd46 + 0 ];\n\tmov.b32 \t%f181, %r181;\n\tmov.b32 \t%f182, %r182;\n\tmov.b32 \t%f183, %r183;\n\tmov.b32 \t%f184, %r184;\n\tmov.u32 %r185, 0x0;\n\tmov.u32 %r186, 0x0;\n\tmov.u32 %r187, 0x0;\n\tmov.u32 %r188, 0x0;\n\t@%p1 ld.global.v4.b32 { %r185, %r186, %r187, %r188 }, [ %rd47 + 0 ];\n\tmov.b32 \t%f185, %r185;\n\tmov.b32 \t%f186, %r186;\n\tmov.b32 \t%f187, %r187;\n\tmov.b32 \t%f188, %r188;\n\tmov.u32 %r189, 0x0;\n\tmov.u32 %r190, 0x0;\n\tmov.u32 %r191, 0x0;\n\tmov.u32 %r192, 0x0;\n\t@%p1 ld.global.v4.b32 { %r189, %r190, %r191, %r192 }, [ %rd48 + 0 ];\n\tmov.b32 \t%f189, %r189;\n\tmov.b32 \t%f190, %r190;\n\tmov.b32 \t%f191, %r191;\n\tmov.b32 \t%f192, %r192;\n\tmov.u32 %r193, 0x0;\n\tmov.u32 %r194, 0x0;\n\tmov.u32 %r195, 0x0;\n\tmov.u32 %r196, 0x0;\n\t@%p1 ld.global.v4.b32 { %r193, %r194, %r195, %r196 }, [ %rd49 + 0 ];\n\tmov.b32 \t%f193, %r193;\n\tmov.b32 \t%f194, %r194;\n\tmov.b32 \t%f195, %r195;\n\tmov.b32 \t%f196, %r196;\n\tmov.u32 %r197, 0x0;\n\tmov.u32 %r198, 0x0;\n\tmov.u32 %r199, 0x0;\n\tmov.u32 %r200, 0x0;\n\t@%p1 ld.global.v4.b32 { %r197, %r198, %r199, %r200 }, [ %rd50 + 0 ];\n\tmov.b32 \t%f197, %r197;\n\tmov.b32 \t%f198, %r198;\n\tmov.b32 \t%f199, %r199;\n\tmov.b32 \t%f200, %r200;\n\tmov.u32 %r201, 0x0;\n\tmov.u32 %r202, 0x0;\n\tmov.u32 %r203, 0x0;\n\tmov.u32 %r204, 0x0;\n\t@%p1 ld.global.v4.b32 { %r201, %r202, %r203, %r204 }, [ %rd51 + 0 ];\n\tmov.b32 \t%f201, %r201;\n\tmov.b32 \t%f202, %r202;\n\tmov.b32 \t%f203, %r203;\n\tmov.b32 \t%f204, %r204;\n\tmov.u32 %r205, 0x0;\n\tmov.u32 %r206, 0x0;\n\tmov.u32 %r207, 0x0;\n\tmov.u32 %r208, 0x0;\n\t@%p1 ld.global.v4.b32 { %r205, %r206, %r207, %r208 }, [ %rd52 + 0 ];\n\tmov.b32 \t%f205, %r205;\n\tmov.b32 \t%f206, %r206;\n\tmov.b32 \t%f207, %r207;\n\tmov.b32 \t%f208, %r208;\n\tmov.u32 %r209, 0x0;\n\tmov.u32 %r210, 0x0;\n\tmov.u32 %r211, 0x0;\n\tmov.u32 %r212, 0x0;\n\t@%p1 ld.global.v4.b32 { %r209, %r210, %r211, %r212 }, [ %rd53 + 0 ];\n\tmov.b32 \t%f209, %r209;\n\tmov.b32 \t%f210, %r210;\n\tmov.b32 \t%f211, %r211;\n\tmov.b32 \t%f212, %r212;\n\tmov.u32 %r213, 0x0;\n\tmov.u32 %r214, 0x0;\n\tmov.u32 %r215, 0x0;\n\tmov.u32 %r216, 0x0;\n\t@%p1 ld.global.v4.b32 { %r213, %r214, %r215, %r216 }, [ %rd54 + 0 ];\n\tmov.b32 \t%f213, %r213;\n\tmov.b32 \t%f214, %r214;\n\tmov.b32 \t%f215, %r215;\n\tmov.b32 \t%f216, %r216;\n\tmov.u32 %r217, 0x0;\n\tmov.u32 %r218, 0x0;\n\tmov.u32 %r219, 0x0;\n\tmov.u32 %r220, 0x0;\n\t@%p1 ld.global.v4.b32 { %r217, %r218, %r219, %r220 }, [ %rd55 + 0 ];\n\tmov.b32 \t%f217, %r217;\n\tmov.b32 \t%f218, %r218;\n\tmov.b32 \t%f219, %r219;\n\tmov.b32 \t%f220, %r220;\n\tmov.u32 %r221, 0x0;\n\tmov.u32 %r222, 0x0;\n\tmov.u32 %r223, 0x0;\n\tmov.u32 %r224, 0x0;\n\t@%p1 ld.global.v4.b32 { %r221, %r222, %r223, %r224 }, [ %rd56 + 0 ];\n\tmov.b32 \t%f221, %r221;\n\tmov.b32 \t%f222, %r222;\n\tmov.b32 \t%f223, %r223;\n\tmov.b32 \t%f224, %r224;\n\tmov.u32 %r225, 0x0;\n\tmov.u32 %r226, 0x0;\n\tmov.u32 %r227, 0x0;\n\tmov.u32 %r228, 0x0;\n\t@%p1 ld.global.v4.b32 { %r225, %r226, %r227, %r228 }, [ %rd57 + 0 ];\n\tmov.b32 \t%f225, %r225;\n\tmov.b32 \t%f226, %r226;\n\tmov.b32 \t%f227, %r227;\n\tmov.b32 \t%f228, %r228;\n\tmov.u32 %r229, 0x0;\n\tmov.u32 %r230, 0x0;\n\tmov.u32 %r231, 0x0;\n\tmov.u32 %r232, 0x0;\n\t@%p1 ld.global.v4.b32 { %r229, %r230, %r231, %r232 }, [ %rd58 + 0 ];\n\tmov.b32 \t%f229, %r229;\n\tmov.b32 \t%f230, %r230;\n\tmov.b32 \t%f231, %r231;\n\tmov.b32 \t%f232, %r232;\n\tmov.u32 %r233, 0x0;\n\tmov.u32 %r234, 0x0;\n\tmov.u32 %r235, 0x0;\n\tmov.u32 %r236, 0x0;\n\t@%p1 ld.global.v4.b32 { %r233, %r234, %r235, %r236 }, [ %rd59 + 0 ];\n\tmov.b32 \t%f233, %r233;\n\tmov.b32 \t%f234, %r234;\n\tmov.b32 \t%f235, %r235;\n\tmov.b32 \t%f236, %r236;\n\tmov.u32 %r237, 0x0;\n\tmov.u32 %r238, 0x0;\n\tmov.u32 %r239, 0x0;\n\tmov.u32 %r240, 0x0;\n\t@%p1 ld.global.v4.b32 { %r237, %r238, %r239, %r240 }, [ %rd60 + 0 ];\n\tmov.b32 \t%f237, %r237;\n\tmov.b32 \t%f238, %r238;\n\tmov.b32 \t%f239, %r239;\n\tmov.b32 \t%f240, %r240;\n\tmov.u32 %r241, 0x0;\n\tmov.u32 %r242, 0x0;\n\tmov.u32 %r243, 0x0;\n\tmov.u32 %r244, 0x0;\n\t@%p1 ld.global.v4.b32 { %r241, %r242, %r243, %r244 }, [ %rd61 + 0 ];\n\tmov.b32 \t%f241, %r241;\n\tmov.b32 \t%f242, %r242;\n\tmov.b32 \t%f243, %r243;\n\tmov.b32 \t%f244, %r244;\n\tmov.u32 %r245, 0x0;\n\tmov.u32 %r246, 0x0;\n\tmov.u32 %r247, 0x0;\n\tmov.u32 %r248, 0x0;\n\t@%p1 ld.global.v4.b32 { %r245, %r246, %r247, %r248 }, [ %rd62 + 0 ];\n\tmov.b32 \t%f245, %r245;\n\tmov.b32 \t%f246, %r246;\n\tmov.b32 \t%f247, %r247;\n\tmov.b32 \t%f248, %r248;\n\tmov.u32 %r249, 0x0;\n\tmov.u32 %r250, 0x0;\n\tmov.u32 %r251, 0x0;\n\tmov.u32 %r252, 0x0;\n\t@%p1 ld.global.v4.b32 { %r249, %r250, %r251, %r252 }, [ %rd63 + 0 ];\n\tmov.b32 \t%f249, %r249;\n\tmov.b32 \t%f250, %r250;\n\tmov.b32 \t%f251, %r251;\n\tmov.b32 \t%f252, %r252;\n\tmov.u32 %r253, 0x0;\n\tmov.u32 %r254, 0x0;\n\tmov.u32 %r255, 0x0;\n\tmov.u32 %r256, 0x0;\n\t@%p1 ld.global.v4.b32 { %r253, %r254, %r255, %r256 }, [ %rd64 + 0 ];\n\tmov.b32 \t%f253, %r253;\n\tmov.b32 \t%f254, %r254;\n\tmov.b32 \t%f255, %r255;\n\tmov.b32 \t%f256, %r256;\n\tadd.f32 \t%f257, %f1, %f129;\n\tadd.f32 \t%f258, %f2, %f130;\n\tadd.f32 \t%f259, %f3, %f131;\n\tadd.f32 \t%f260, %f4, %f132;\n\tadd.f32 \t%f261, %f5, %f133;\n\tadd.f32 \t%f262, %f6, %f134;\n\tadd.f32 \t%f263, %f7, %f135;\n\tadd.f32 \t%f264, %f8, %f136;\n\tadd.f32 \t%f265, %f9, %f137;\n\tadd.f32 \t%f266, %f10, %f138;\n\tadd.f32 \t%f267, %f11, %f139;\n\tadd.f32 \t%f268, %f12, %f140;\n\tadd.f32 \t%f269, %f13, %f141;\n\tadd.f32 \t%f270, %f14, %f142;\n\tadd.f32 \t%f271, %f15, %f143;\n\tadd.f32 \t%f272, %f16, %f144;\n\tadd.f32 \t%f273, %f17, %f145;\n\tadd.f32 \t%f274, %f18, %f146;\n\tadd.f32 \t%f275, %f19, %f147;\n\tadd.f32 \t%f276, %f20, %f148;\n\tadd.f32 \t%f277, %f21, %f149;\n\tadd.f32 \t%f278, %f22, %f150;\n\tadd.f32 \t%f279, %f23, %f151;\n\tadd.f32 \t%f280, %f24, %f152;\n\tadd.f32 \t%f281, %f25, %f153;\n\tadd.f32 \t%f282, %f26, %f154;\n\tadd.f32 \t%f283, %f27, %f155;\n\tadd.f32 \t%f284, %f28, %f156;\n\tadd.f32 \t%f285, %f29, %f157;\n\tadd.f32 \t%f286, %f30, %f158;\n\tadd.f32 \t%f287, %f31, %f159;\n\tadd.f32 \t%f288, %f32, %f160;\n\tadd.f32 \t%f289, %f33, %f161;\n\tadd.f32 \t%f290, %f34, %f162;\n\tadd.f32 \t%f291, %f35, %f163;\n\tadd.f32 \t%f292, %f36, %f164;\n\tadd.f32 \t%f293, %f37, %f165;\n\tadd.f32 \t%f294, %f38, %f166;\n\tadd.f32 \t%f295, %f39, %f167;\n\tadd.f32 \t%f296, %f40, %f168;\n\tadd.f32 \t%f297, %f41, %f169;\n\tadd.f32 \t%f298, %f42, %f170;\n\tadd.f32 \t%f299, %f43, %f171;\n\tadd.f32 \t%f300, %f44, %f172;\n\tadd.f32 \t%f301, %f45, %f173;\n\tadd.f32 \t%f302, %f46, %f174;\n\tadd.f32 \t%f303, %f47, %f175;\n\tadd.f32 \t%f304, %f48, %f176;\n\tadd.f32 \t%f305, %f49, %f177;\n\tadd.f32 \t%f306, %f50, %f178;\n\tadd.f32 \t%f307, %f51, %f179;\n\tadd.f32 \t%f308, %f52, %f180;\n\tadd.f32 \t%f309, %f53, %f181;\n\tadd.f32 \t%f310, %f54, %f182;\n\tadd.f32 \t%f311, %f55, %f183;\n\tadd.f32 \t%f312, %f56, %f184;\n\tadd.f32 \t%f313, %f57, %f185;\n\tadd.f32 \t%f314, %f58, %f186;\n\tadd.f32 \t%f315, %f59, %f187;\n\tadd.f32 \t%f316, %f60, %f188;\n\tadd.f32 \t%f317, %f61, %f189;\n\tadd.f32 \t%f318, %f62, %f190;\n\tadd.f32 \t%f319, %f63, %f191;\n\tadd.f32 \t%f320, %f64, %f192;\n\tadd.f32 \t%f321, %f65, %f193;\n\tadd.f32 \t%f322, %f66, %f194;\n\tadd.f32 \t%f323, %f67, %f195;\n\tadd.f32 \t%f324, %f68, %f196;\n\tadd.f32 \t%f325, %f69, %f197;\n\tadd.f32 \t%f326, %f70, %f198;\n\tadd.f32 \t%f327, %f71, %f199;\n\tadd.f32 \t%f328, %f72, %f200;\n\tadd.f32 \t%f329, %f73, %f201;\n\tadd.f32 \t%f330, %f74, %f202;\n\tadd.f32 \t%f331, %f75, %f203;\n\tadd.f32 \t%f332, %f76, %f204;\n\tadd.f32 \t%f333, %f77, %f205;\n\tadd.f32 \t%f334, %f78, %f206;\n\tadd.f32 \t%f335, %f79, %f207;\n\tadd.f32 \t%f336, %f80, %f208;\n\tadd.f32 \t%f337, %f81, %f209;\n\tadd.f32 \t%f338, %f82, %f210;\n\tadd.f32 \t%f339, %f83, %f211;\n\tadd.f32 \t%f340, %f84, %f212;\n\tadd.f32 \t%f341, %f85, %f213;\n\tadd.f32 \t%f342, %f86, %f214;\n\tadd.f32 \t%f343, %f87, %f215;\n\tadd.f32 \t%f344, %f88, %f216;\n\tadd.f32 \t%f345, %f89, %f217;\n\tadd.f32 \t%f346, %f90, %f218;\n\tadd.f32 \t%f347, %f91, %f219;\n\tadd.f32 \t%f348, %f92, %f220;\n\tadd.f32 \t%f349, %f93, %f221;\n\tadd.f32 \t%f350, %f94, %f222;\n\tadd.f32 \t%f351, %f95, %f223;\n\tadd.f32 \t%f352, %f96, %f224;\n\tadd.f32 \t%f353, %f97, %f225;\n\tadd.f32 \t%f354, %f98, %f226;\n\tadd.f32 \t%f355, %f99, %f227;\n\tadd.f32 \t%f356, %f100, %f228;\n\tadd.f32 \t%f357, %f101, %f229;\n\tadd.f32 \t%f358, %f102, %f230;\n\tadd.f32 \t%f359, %f103, %f231;\n\tadd.f32 \t%f360, %f104, %f232;\n\tadd.f32 \t%f361, %f105, %f233;\n\tadd.f32 \t%f362, %f106, %f234;\n\tadd.f32 \t%f363, %f107, %f235;\n\tadd.f32 \t%f364, %f108, %f236;\n\tadd.f32 \t%f365, %f109, %f237;\n\tadd.f32 \t%f366, %f110, %f238;\n\tadd.f32 \t%f367, %f111, %f239;\n\tadd.f32 \t%f368, %f112, %f240;\n\tadd.f32 \t%f369, %f113, %f241;\n\tadd.f32 \t%f370, %f114, %f242;\n\tadd.f32 \t%f371, %f115, %f243;\n\tadd.f32 \t%f372, %f116, %f244;\n\tadd.f32 \t%f373, %f117, %f245;\n\tadd.f32 \t%f374, %f118, %f246;\n\tadd.f32 \t%f375, %f119, %f247;\n\tadd.f32 \t%f376, %f120, %f248;\n\tadd.f32 \t%f377, %f121, %f249;\n\tadd.f32 \t%f378, %f122, %f250;\n\tadd.f32 \t%f379, %f123, %f251;\n\tadd.f32 \t%f380, %f124, %f252;\n\tadd.f32 \t%f381, %f125, %f253;\n\tadd.f32 \t%f382, %f126, %f254;\n\tadd.f32 \t%f383, %f127, %f255;\n\tadd.f32 \t%f384, %f128, %f256;\n\tadd.f32 \t%f385, %f257, %f257;\n\tadd.f32 \t%f386, %f258, %f258;\n\tadd.f32 \t%f387, %f259, %f259;\n\tadd.f32 \t%f388, %f260, %f260;\n\tadd.f32 \t%f389, %f261, %f261;\n\tadd.f32 \t%f390, %f262, %f262;\n\tadd.f32 \t%f391, %f263, %f263;\n\tadd.f32 \t%f392, %f264, %f264;\n\tadd.f32 \t%f393, %f265, %f265;\n\tadd.f32 \t%f394, %f266, %f266;\n\tadd.f32 \t%f395, %f267, %f267;\n\tadd.f32 \t%f396, %f268, %f268;\n\tadd.f32 \t%f397, %f269, %f269;\n\tadd.f32 \t%f398, %f270, %f270;\n\tadd.f32 \t%f399, %f271, %f271;\n\tadd.f32 \t%f400, %f272, %f272;\n\tadd.f32 \t%f401, %f273, %f273;\n\tadd.f32 \t%f402, %f274, %f274;\n\tadd.f32 \t%f403, %f275, %f275;\n\tadd.f32 \t%f404, %f276, %f276;\n\tadd.f32 \t%f405, %f277, %f277;\n\tadd.f32 \t%f406, %f278, %f278;\n\tadd.f32 \t%f407, %f279, %f279;\n\tadd.f32 \t%f408, %f280, %f280;\n\tadd.f32 \t%f409, %f281, %f281;\n\tadd.f32 \t%f410, %f282, %f282;\n\tadd.f32 \t%f411, %f283, %f283;\n\tadd.f32 \t%f412, %f284, %f284;\n\tadd.f32 \t%f413, %f285, %f285;\n\tadd.f32 \t%f414, %f286, %f286;\n\tadd.f32 \t%f415, %f287, %f287;\n\tadd.f32 \t%f416, %f288, %f288;\n\tadd.f32 \t%f417, %f289, %f289;\n\tadd.f32 \t%f418, %f290, %f290;\n\tadd.f32 \t%f419, %f291, %f291;\n\tadd.f32 \t%f420, %f292, %f292;\n\tadd.f32 \t%f421, %f293, %f293;\n\tadd.f32 \t%f422, %f294, %f294;\n\tadd.f32 \t%f423, %f295, %f295;\n\tadd.f32 \t%f424, %f296, %f296;\n\tadd.f32 \t%f425, %f297, %f297;\n\tadd.f32 \t%f426, %f298, %f298;\n\tadd.f32 \t%f427, %f299, %f299;\n\tadd.f32 \t%f428, %f300, %f300;\n\tadd.f32 \t%f429, %f301, %f301;\n\tadd.f32 \t%f430, %f302, %f302;\n\tadd.f32 \t%f431, %f303, %f303;\n\tadd.f32 \t%f432, %f304, %f304;\n\tadd.f32 \t%f433, %f305, %f305;\n\tadd.f32 \t%f434, %f306, %f306;\n\tadd.f32 \t%f435, %f307, %f307;\n\tadd.f32 \t%f436, %f308, %f308;\n\tadd.f32 \t%f437, %f309, %f309;\n\tadd.f32 \t%f438, %f310, %f310;\n\tadd.f32 \t%f439, %f311, %f311;\n\tadd.f32 \t%f440, %f312, %f312;\n\tadd.f32 \t%f441, %f313, %f313;\n\tadd.f32 \t%f442, %f314, %f314;\n\tadd.f32 \t%f443, %f315, %f315;\n\tadd.f32 \t%f444, %f316, %f316;\n\tadd.f32 \t%f445, %f317, %f317;\n\tadd.f32 \t%f446, %f318, %f318;\n\tadd.f32 \t%f447, %f319, %f319;\n\tadd.f32 \t%f448, %f320, %f320;\n\tadd.f32 \t%f449, %f321, %f321;\n\tadd.f32 \t%f450, %f322, %f322;\n\tadd.f32 \t%f451, %f323, %f323;\n\tadd.f32 \t%f452, %f324, %f324;\n\tadd.f32 \t%f453, %f325, %f325;\n\tadd.f32 \t%f454, %f326, %f326;\n\tadd.f32 \t%f455, %f327, %f327;\n\tadd.f32 \t%f456, %f328, %f328;\n\tadd.f32 \t%f457, %f329, %f329;\n\tadd.f32 \t%f458, %f330, %f330;\n\tadd.f32 \t%f459, %f331, %f331;\n\tadd.f32 \t%f460, %f332, %f332;\n\tadd.f32 \t%f461, %f333, %f333;\n\tadd.f32 \t%f462, %f334, %f334;\n\tadd.f32 \t%f463, %f335, %f335;\n\tadd.f32 \t%f464, %f336, %f336;\n\tadd.f32 \t%f465, %f337, %f337;\n\tadd.f32 \t%f466, %f338, %f338;\n\tadd.f32 \t%f467, %f339, %f339;\n\tadd.f32 \t%f468, %f340, %f340;\n\tadd.f32 \t%f469, %f341, %f341;\n\tadd.f32 \t%f470, %f342, %f342;\n\tadd.f32 \t%f471, %f343, %f343;\n\tadd.f32 \t%f472, %f344, %f344;\n\tadd.f32 \t%f473, %f345, %f345;\n\tadd.f32 \t%f474, %f346, %f346;\n\tadd.f32 \t%f475, %f347, %f347;\n\tadd.f32 \t%f476, %f348, %f348;\n\tadd.f32 \t%f477, %f349, %f349;\n\tadd.f32 \t%f478, %f350, %f350;\n\tadd.f32 \t%f479, %f351, %f351;\n\tadd.f32 \t%f480, %f352, %f352;\n\tadd.f32 \t%f481, %f353, %f353;\n\tadd.f32 \t%f482, %f354, %f354;\n\tadd.f32 \t%f483, %f355, %f355;\n\tadd.f32 \t%f484, %f356, %f356;\n\tadd.f32 \t%f485, %f357, %f357;\n\tadd.f32 \t%f486, %f358, %f358;\n\tadd.f32 \t%f487, %f359, %f359;\n\tadd.f32 \t%f488, %f360, %f360;\n\tadd.f32 \t%f489, %f361, %f361;\n\tadd.f32 \t%f490, %f362, %f362;\n\tadd.f32 \t%f491, %f363, %f363;\n\tadd.f32 \t%f492, %f364, %f364;\n\tadd.f32 \t%f493, %f365, %f365;\n\tadd.f32 \t%f494, %f366, %f366;\n\tadd.f32 \t%f495, %f367, %f367;\n\tadd.f32 \t%f496, %f368, %f368;\n\tadd.f32 \t%f497, %f369, %f369;\n\tadd.f32 \t%f498, %f370, %f370;\n\tadd.f32 \t%f499, %f371, %f371;\n\tadd.f32 \t%f500, %f372, %f372;\n\tadd.f32 \t%f501, %f373, %f373;\n\tadd.f32 \t%f502, %f374, %f374;\n\tadd.f32 \t%f503, %f375, %f375;\n\tadd.f32 \t%f504, %f376, %f376;\n\tadd.f32 \t%f505, %f377, %f377;\n\tadd.f32 \t%f506, %f378, %f378;\n\tadd.f32 \t%f507, %f379, %f379;\n\tadd.f32 \t%f508, %f380, %f380;\n\tadd.f32 \t%f509, %f381, %f381;\n\tadd.f32 \t%f510, %f382, %f382;\n\tadd.f32 \t%f511, %f383, %f383;\n\tadd.f32 \t%f512, %f384, %f384;\n\tadd.s64 \t%rd104, %rd99, %rd100;\n\tadd.s64 \t%rd65, %rd104, %rd102;\n\tadd.s64 \t%rd66, %rd65, 2048;\n\tadd.s64 \t%rd67, %rd65, 4096;\n\tadd.s64 \t%rd68, %rd65, 6144;\n\tadd.s64 \t%rd69, %rd65, 8192;\n\tadd.s64 \t%rd70, %rd65, 10240;\n\tadd.s64 \t%rd71, %rd65, 12288;\n\tadd.s64 \t%rd72, %rd65, 14336;\n\tadd.s64 \t%rd73, %rd65, 16384;\n\tadd.s64 \t%rd74, %rd65, 18432;\n\tadd.s64 \t%rd75, %rd65, 20480;\n\tadd.s64 \t%rd76, %rd65, 22528;\n\tadd.s64 \t%rd77, %rd65, 24576;\n\tadd.s64 \t%rd78, %rd65, 26624;\n\tadd.s64 \t%rd79, %rd65, 28672;\n\tadd.s64 \t%rd80, %rd65, 30720;\n\tadd.s64 \t%rd81, %rd65, 32768;\n\tadd.s64 \t%rd82, %rd65, 34816;\n\tadd.s64 \t%rd83, %rd65, 36864;\n\tadd.s64 \t%rd84, %rd65, 38912;\n\tadd.s64 \t%rd85, %rd65, 40960;\n\tadd.s64 \t%rd86, %rd65, 43008;\n\tadd.s64 \t%rd87, %rd65, 45056;\n\tadd.s64 \t%rd88, %rd65, 47104;\n\tadd.s64 \t%rd89, %rd65, 49152;\n\tadd.s64 \t%rd90, %rd65, 51200;\n\tadd.s64 \t%rd91, %rd65, 53248;\n\tadd.s64 \t%rd92, %rd65, 55296;\n\tadd.s64 \t%rd93, %rd65, 57344;\n\tadd.s64 \t%rd94, %rd65, 59392;\n\tadd.s64 \t%rd95, %rd65, 61440;\n\tadd.s64 \t%rd96, %rd65, 63488;\n\tmov.b32 \t%r257, %f385;\n\tmov.b32 \t%r258, %f386;\n\tmov.b32 \t%r259, %f387;\n\tmov.b32 \t%r260, %f388;\n\t@%p1 st.global.v4.b32 [ %rd65 + 0 ], { %r257, %r258, %r259, %r260 };\n\tmov.b32 \t%r261, %f389;\n\tmov.b32 \t%r262, %f390;\n\tmov.b32 \t%r263, %f391;\n\tmov.b32 \t%r264, %f392;\n\t@%p1 st.global.v4.b32 [ %rd66 + 0 ], { %r261, %r262, %r263, %r264 };\n\tmov.b32 \t%r265, %f393;\n\tmov.b32 \t%r266, %f394;\n\tmov.b32 \t%r267, %f395;\n\tmov.b32 \t%r268, %f396;\n\t@%p1 st.global.v4.b32 [ %rd67 + 0 ], { %r265, %r266, %r267, %r268 };\n\tmov.b32 \t%r269, %f397;\n\tmov.b32 \t%r270, %f398;\n\tmov.b32 \t%r271, %f399;\n\tmov.b32 \t%r272, %f400;\n\t@%p1 st.global.v4.b32 [ %rd68 + 0 ], { %r269, %r270, %r271, %r272 };\n\tmov.b32 \t%r273, %f401;\n\tmov.b32 \t%r274, %f402;\n\tmov.b32 \t%r275, %f403;\n\tmov.b32 \t%r276, %f404;\n\t@%p1 st.global.v4.b32 [ %rd69 + 0 ], { %r273, %r274, %r275, %r276 };\n\tmov.b32 \t%r277, %f405;\n\tmov.b32 \t%r278, %f406;\n\tmov.b32 \t%r279, %f407;\n\tmov.b32 \t%r280, %f408;\n\t@%p1 st.global.v4.b32 [ %rd70 + 0 ], { %r277, %r278, %r279, %r280 };\n\tmov.b32 \t%r281, %f409;\n\tmov.b32 \t%r282, %f410;\n\tmov.b32 \t%r283, %f411;\n\tmov.b32 \t%r284, %f412;\n\t@%p1 st.global.v4.b32 [ %rd71 + 0 ], { %r281, %r282, %r283, %r284 };\n\tmov.b32 \t%r285, %f413;\n\tmov.b32 \t%r286, %f414;\n\tmov.b32 \t%r287, %f415;\n\tmov.b32 \t%r288, %f416;\n\t@%p1 st.global.v4.b32 [ %rd72 + 0 ], { %r285, %r286, %r287, %r288 };\n\tmov.b32 \t%r289, %f417;\n\tmov.b32 \t%r290, %f418;\n\tmov.b32 \t%r291, %f419;\n\tmov.b32 \t%r292, %f420;\n\t@%p1 st.global.v4.b32 [ %rd73 + 0 ], { %r289, %r290, %r291, %r292 };\n\tmov.b32 \t%r293, %f421;\n\tmov.b32 \t%r294, %f422;\n\tmov.b32 \t%r295, %f423;\n\tmov.b32 \t%r296, %f424;\n\t@%p1 st.global.v4.b32 [ %rd74 + 0 ], { %r293, %r294, %r295, %r296 };\n\tmov.b32 \t%r297, %f425;\n\tmov.b32 \t%r298, %f426;\n\tmov.b32 \t%r299, %f427;\n\tmov.b32 \t%r300, %f428;\n\t@%p1 st.global.v4.b32 [ %rd75 + 0 ], { %r297, %r298, %r299, %r300 };\n\tmov.b32 \t%r301, %f429;\n\tmov.b32 \t%r302, %f430;\n\tmov.b32 \t%r303, %f431;\n\tmov.b32 \t%r304, %f432;\n\t@%p1 st.global.v4.b32 [ %rd76 + 0 ], { %r301, %r302, %r303, %r304 };\n\tmov.b32 \t%r305, %f433;\n\tmov.b32 \t%r306, %f434;\n\tmov.b32 \t%r307, %f435;\n\tmov.b32 \t%r308, %f436;\n\t@%p1 st.global.v4.b32 [ %rd77 + 0 ], { %r305, %r306, %r307, %r308 };\n\tmov.b32 \t%r309, %f437;\n\tmov.b32 \t%r310, %f438;\n\tmov.b32 \t%r311, %f439;\n\tmov.b32 \t%r312, %f440;\n\t@%p1 st.global.v4.b32 [ %rd78 + 0 ], { %r309, %r310, %r311, %r312 };\n\tmov.b32 \t%r313, %f441;\n\tmov.b32 \t%r314, %f442;\n\tmov.b32 \t%r315, %f443;\n\tmov.b32 \t%r316, %f444;\n\t@%p1 st.global.v4.b32 [ %rd79 + 0 ], { %r313, %r314, %r315, %r316 };\n\tmov.b32 \t%r317, %f445;\n\tmov.b32 \t%r318, %f446;\n\tmov.b32 \t%r319, %f447;\n\tmov.b32 \t%r320, %f448;\n\t@%p1 st.global.v4.b32 [ %rd80 + 0 ], { %r317, %r318, %r319, %r320 };\n\tmov.b32 \t%r321, %f449;\n\tmov.b32 \t%r322, %f450;\n\tmov.b32 \t%r323, %f451;\n\tmov.b32 \t%r324, %f452;\n\t@%p1 st.global.v4.b32 [ %rd81 + 0 ], { %r321, %r322, %r323, %r324 };\n\tmov.b32 \t%r325, %f453;\n\tmov.b32 \t%r326, %f454;\n\tmov.b32 \t%r327, %f455;\n\tmov.b32 \t%r328, %f456;\n\t@%p1 st.global.v4.b32 [ %rd82 + 0 ], { %r325, %r326, %r327, %r328 };\n\tmov.b32 \t%r329, %f457;\n\tmov.b32 \t%r330, %f458;\n\tmov.b32 \t%r331, %f459;\n\tmov.b32 \t%r332, %f460;\n\t@%p1 st.global.v4.b32 [ %rd83 + 0 ], { %r329, %r330, %r331, %r332 };\n\tmov.b32 \t%r333, %f461;\n\tmov.b32 \t%r334, %f462;\n\tmov.b32 \t%r335, %f463;\n\tmov.b32 \t%r336, %f464;\n\t@%p1 st.global.v4.b32 [ %rd84 + 0 ], { %r333, %r334, %r335, %r336 };\n\tmov.b32 \t%r337, %f465;\n\tmov.b32 \t%r338, %f466;\n\tmov.b32 \t%r339, %f467;\n\tmov.b32 \t%r340, %f468;\n\t@%p1 st.global.v4.b32 [ %rd85 + 0 ], { %r337, %r338, %r339, %r340 };\n\tmov.b32 \t%r341, %f469;\n\tmov.b32 \t%r342, %f470;\n\tmov.b32 \t%r343, %f471;\n\tmov.b32 \t%r344, %f472;\n\t@%p1 st.global.v4.b32 [ %rd86 + 0 ], { %r341, %r342, %r343, %r344 };\n\tmov.b32 \t%r345, %f473;\n\tmov.b32 \t%r346, %f474;\n\tmov.b32 \t%r347, %f475;\n\tmov.b32 \t%r348, %f476;\n\t@%p1 st.global.v4.b32 [ %rd87 + 0 ], { %r345, %r346, %r347, %r348 };\n\tmov.b32 \t%r349, %f477;\n\tmov.b32 \t%r350, %f478;\n\tmov.b32 \t%r351, %f479;\n\tmov.b32 \t%r352, %f480;\n\t@%p1 st.global.v4.b32 [ %rd88 + 0 ], { %r349, %r350, %r351, %r352 };\n\tmov.b32 \t%r353, %f481;\n\tmov.b32 \t%r354, %f482;\n\tmov.b32 \t%r355, %f483;\n\tmov.b32 \t%r356, %f484;\n\t@%p1 st.global.v4.b32 [ %rd89 + 0 ], { %r353, %r354, %r355, %r356 };\n\tmov.b32 \t%r357, %f485;\n\tmov.b32 \t%r358, %f486;\n\tmov.b32 \t%r359, %f487;\n\tmov.b32 \t%r360, %f488;\n\t@%p1 st.global.v4.b32 [ %rd90 + 0 ], { %r357, %r358, %r359, %r360 };\n\tmov.b32 \t%r361, %f489;\n\tmov.b32 \t%r362, %f490;\n\tmov.b32 \t%r363, %f491;\n\tmov.b32 \t%r364, %f492;\n\t@%p1 st.global.v4.b32 [ %rd91 + 0 ], { %r361, %r362, %r363, %r364 };\n\tmov.b32 \t%r365, %f493;\n\tmov.b32 \t%r366, %f494;\n\tmov.b32 \t%r367, %f495;\n\tmov.b32 \t%r368, %f496;\n\t@%p1 st.global.v4.b32 [ %rd92 + 0 ], { %r365, %r366, %r367, %r368 };\n\tmov.b32 \t%r369, %f497;\n\tmov.b32 \t%r370, %f498;\n\tmov.b32 \t%r371, %f499;\n\tmov.b32 \t%r372, %f500;\n\t@%p1 st.global.v4.b32 [ %rd93 + 0 ], { %r369, %r370, %r371, %r372 };\n\tmov.b32 \t%r373, %f501;\n\tmov.b32 \t%r374, %f502;\n\tmov.b32 \t%r375, %f503;\n\tmov.b32 \t%r376, %f504;\n\t@%p1 st.global.v4.b32 [ %rd94 + 0 ], { %r373, %r374, %r375, %r376 };\n\tmov.b32 \t%r377, %f505;\n\tmov.b32 \t%r378, %f506;\n\tmov.b32 \t%r379, %f507;\n\tmov.b32 \t%r380, %f508;\n\t@%p1 st.global.v4.b32 [ %rd95 + 0 ], { %r377, %r378, %r379, %r380 };\n\tmov.b32 \t%r381, %f509;\n\tmov.b32 \t%r382, %f510;\n\tmov.b32 \t%r383, %f511;\n\tmov.b32 \t%r384, %f512;\n\t@%p1 st.global.v4.b32 [ %rd96 + 0 ], { %r381, %r382, %r383, %r384 };\n\tret;\n\n}\n', 'module {\n tt.func public @add(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: !tt.ptr {tt.divisibility = 16 : i32}, %arg2: !tt.ptr {tt.divisibility = 16 : i32}) attributes {noinline = false} {\n %0 = tt.get_program_id x : i32\n %1 = tt.get_program_id y : i32\n %2 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>\n %3 = tt.expand_dims %2 {axis = 1 : i32} : (tensor<128xi32>) -> tensor<128x1xi32>\n %c128_i32 = arith.constant 128 : i32\n %cst = arith.constant dense<128> : tensor<128x1xi32>\n %4 = arith.muli %3, %cst : tensor<128x1xi32>\n %5 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>\n %6 = tt.expand_dims %5 {axis = 0 : i32} : (tensor<128xi32>) -> tensor<1x128xi32>\n %c1_i32 = arith.constant 1 : i32\n %cst_0 = arith.constant dense<1> : tensor<1x128xi32>\n %7 = arith.muli %6, %cst_0 : tensor<1x128xi32>\n %8 = tt.broadcast %4 : (tensor<128x1xi32>) -> tensor<128x128xi32>\n %9 = tt.broadcast %7 : (tensor<1x128xi32>) -> tensor<128x128xi32>\n %10 = tt.splat %arg0 : (!tt.ptr) -> tensor<128x128x!tt.ptr>\n %11 = tt.addptr %10, %8 : tensor<128x128x!tt.ptr>, tensor<128x128xi32>\n %12 = tt.addptr %11, %9 : tensor<128x128x!tt.ptr>, tensor<128x128xi32>\n %13 = tt.load %12 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<128x128xf32>\n %14 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>\n %15 = tt.expand_dims %14 {axis = 1 : i32} : (tensor<128xi32>) -> tensor<128x1xi32>\n %c128_i32_1 = arith.constant 128 : i32\n %cst_2 = arith.constant dense<128> : tensor<128x1xi32>\n %16 = arith.muli %15, %cst_2 : tensor<128x1xi32>\n %17 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>\n %18 = tt.expand_dims %17 {axis = 0 : i32} : (tensor<128xi32>) -> tensor<1x128xi32>\n %c1_i32_3 = arith.constant 1 : i32\n %cst_4 = arith.constant dense<1> : tensor<1x128xi32>\n %19 = arith.muli %18, %cst_4 : tensor<1x128xi32>\n %20 = tt.broadcast %16 : (tensor<128x1xi32>) -> tensor<128x128xi32>\n %21 = tt.broadcast %19 : (tensor<1x128xi32>) -> tensor<128x128xi32>\n %22 = tt.splat %arg1 : (!tt.ptr) -> tensor<128x128x!tt.ptr>\n %23 = tt.addptr %22, %20 : tensor<128x128x!tt.ptr>, tensor<128x128xi32>\n %24 = tt.addptr %23, %21 : tensor<128x128x!tt.ptr>, tensor<128x128xi32>\n %25 = tt.load %24 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<128x128xf32>\n %26 = arith.addf %13, %25 : tensor<128x128xf32>\n %cst_5 = arith.constant 2.000000e+00 : f32\n %cst_6 = arith.constant dense<2.000000e+00> : tensor<128x128xf32>\n %27 = arith.mulf %26, %cst_6 : tensor<128x128xf32>\n %28 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>\n %29 = tt.expand_dims %28 {axis = 1 : i32} : (tensor<128xi32>) -> tensor<128x1xi32>\n %c128_i32_7 = arith.constant 128 : i32\n %cst_8 = arith.constant dense<128> : tensor<128x1xi32>\n %30 = arith.muli %29, %cst_8 : tensor<128x1xi32>\n %31 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>\n %32 = tt.expand_dims %31 {axis = 0 : i32} : (tensor<128xi32>) -> tensor<1x128xi32>\n %c1_i32_9 = arith.constant 1 : i32\n %cst_10 = arith.constant dense<1> : tensor<1x128xi32>\n %33 = arith.muli %32, %cst_10 : tensor<1x128xi32>\n %34 = tt.broadcast %30 : (tensor<128x1xi32>) -> tensor<128x128xi32>\n %35 = tt.broadcast %33 : (tensor<1x128xi32>) -> tensor<128x128xi32>\n %36 = tt.splat %arg2 : (!tt.ptr) -> tensor<128x128x!tt.ptr>\n %37 = tt.addptr %36, %34 : tensor<128x128x!tt.ptr>, tensor<128x128xi32>\n %38 = tt.addptr %37, %35 : tensor<128x128x!tt.ptr>, tensor<128x128xi32>\n tt.store %38, %27 {cache = 1 : i32, evict = 1 : i32} : tensor<128x128xf32>\n tt.return\n }\n}\n', 89 The stack trace below excludes JAX-internal frames. The preceding is the original exception that occurred, unmodified. -------------------- The above exception was the direct cause of the following exception: Traceback (most recent call last): File "/media/adam/shared_drive/PycharmProjects/triton_test/pallas_test.py", line 231, in app.run(main) File "/home/adam/anaconda3/envs/triton_test/lib/python3.10/site-packages/absl/app.py", line 308, in run _run_main(main, args) File "/home/adam/anaconda3/envs/triton_test/lib/python3.10/site-packages/absl/app.py", line 254, in _run_main sys.exit(main(argv)) File "/media/adam/shared_drive/PycharmProjects/triton_test/pallas_test.py", line 226, in main print(pl.pallas_call(kernel1, out_shape=out_shape, grid=grid)(x, y)) File "/home/adam/anaconda3/envs/triton_test/lib/python3.10/site-packages/jax_triton/pallas/triton_lowering.py", line 1642, in pallas_call_lowering kernel = triton_kernel_call_lib.TritonKernel( TypeError: __init__(): incompatible constructor arguments. The following argument types are supported: 1. jaxlib.cuda._triton.TritonKernel(arg0: str, arg1: str, arg2: int, arg3: int) Invoked with: 'add', 4, 0, '//\n// Generated by LLVM NVPTX Back-End\n//\n\n.version 8.0\n.target sm_89\n.address_size 64\n\n\t// .globl\tadd\n\n.visible .entry add(\n\t.param .u64 add_param_0,\n\t.param .u64 add_param_1,\n\t.param .u64 add_param_2\n)\n.maxntid 128, 1, 1\n{\n\t.reg .pred \t%p<97>;\n\t.reg .b32 \t%r<389>;\n\t.reg .f32 \t%f<513>;\n\t.reg .b64 \t%rd<105>;\n\n\tld.param.u64 \t%rd97, [add_param_0];\n\tld.param.u64 \t%rd98, [add_param_1];\n\tmov.u32 \t%r385, %tid.x;\n\tshl.b32 \t%r386, %r385, 2;\n\tld.param.u64 \t%rd99, [add_param_2];\n\tand.b32 \t%r387, %r386, 124;\n\tand.b32 \t%r388, %r386, 16256;\n\tmul.wide.u32 \t%rd100, %r388, 4;\n\tadd.s64 \t%rd101, %rd97, %rd100;\n\tmul.wide.u32 \t%rd102, %r387, 4;\n\tadd.s64 \t%rd1, %rd101, %rd102;\n\tadd.s64 \t%rd2, %rd1, 2048;\n\tadd.s64 \t%rd3, %rd1, 4096;\n\tadd.s64 \t%rd4, %rd1, 6144;\n\tadd.s64 \t%rd5, %rd1, 8192;\n\tadd.s64 \t%rd6, %rd1, 10240;\n\tadd.s64 \t%rd7, %rd1, 12288;\n\tadd.s64 \t%rd8, %rd1, 14336;\n\tadd.s64 \t%rd9, %rd1, 16384;\n\tadd.s64 \t%rd10, %rd1, 18432;\n\tadd.s64 \t%rd11, %rd1, 20480;\n\tadd.s64 \t%rd12, %rd1, 22528;\n\tadd.s64 \t%rd13, %rd1, 24576;\n\tadd.s64 \t%rd14, %rd1, 26624;\n\tadd.s64 \t%rd15, %rd1, 28672;\n\tadd.s64 \t%rd16, %rd1, 30720;\n\tadd.s64 \t%rd17, %rd1, 32768;\n\tadd.s64 \t%rd18, %rd1, 34816;\n\tadd.s64 \t%rd19, %rd1, 36864;\n\tadd.s64 \t%rd20, %rd1, 38912;\n\tadd.s64 \t%rd21, %rd1, 40960;\n\tadd.s64 \t%rd22, %rd1, 43008;\n\tadd.s64 \t%rd23, %rd1, 45056;\n\tadd.s64 \t%rd24, %rd1, 47104;\n\tadd.s64 \t%rd25, %rd1, 49152;\n\tadd.s64 \t%rd26, %rd1, 51200;\n\tadd.s64 \t%rd27, %rd1, 53248;\n\tadd.s64 \t%rd28, %rd1, 55296;\n\tadd.s64 \t%rd29, %rd1, 57344;\n\tadd.s64 \t%rd30, %rd1, 59392;\n\tadd.s64 \t%rd31, %rd1, 61440;\n\tadd.s64 \t%rd32, %rd1, 63488;\n\tmov.pred \t%p1, -1;\n\tmov.u32 %r1, 0x0;\n\tmov.u32 %r2, 0x0;\n\tmov.u32 %r3, 0x0;\n\tmov.u32 %r4, 0x0;\n\t@%p1 ld.global.v4.b32 { %r1, %r2, %r3, %r4 }, [ %rd1 + 0 ];\n\tmov.b32 \t%f1, %r1;\n\tmov.b32 \t%f2, %r2;\n\tmov.b32 \t%f3, %r3;\n\tmov.b32 \t%f4, %r4;\n\tmov.u32 %r5, 0x0;\n\tmov.u32 %r6, 0x0;\n\tmov.u32 %r7, 0x0;\n\tmov.u32 %r8, 0x0;\n\t@%p1 ld.global.v4.b32 { %r5, %r6, %r7, %r8 }, [ %rd2 + 0 ];\n\tmov.b32 \t%f5, %r5;\n\tmov.b32 \t%f6, %r6;\n\tmov.b32 \t%f7, %r7;\n\tmov.b32 \t%f8, %r8;\n\tmov.u32 %r9, 0x0;\n\tmov.u32 %r10, 0x0;\n\tmov.u32 %r11, 0x0;\n\tmov.u32 %r12, 0x0;\n\t@%p1 ld.global.v4.b32 { %r9, %r10, %r11, %r12 }, [ %rd3 + 0 ];\n\tmov.b32 \t%f9, %r9;\n\tmov.b32 \t%f10, %r10;\n\tmov.b32 \t%f11, %r11;\n\tmov.b32 \t%f12, %r12;\n\tmov.u32 %r13, 0x0;\n\tmov.u32 %r14, 0x0;\n\tmov.u32 %r15, 0x0;\n\tmov.u32 %r16, 0x0;\n\t@%p1 ld.global.v4.b32 { %r13, %r14, %r15, %r16 }, [ %rd4 + 0 ];\n\tmov.b32 \t%f13, %r13;\n\tmov.b32 \t%f14, %r14;\n\tmov.b32 \t%f15, %r15;\n\tmov.b32 \t%f16, %r16;\n\tmov.u32 %r17, 0x0;\n\tmov.u32 %r18, 0x0;\n\tmov.u32 %r19, 0x0;\n\tmov.u32 %r20, 0x0;\n\t@%p1 ld.global.v4.b32 { %r17, %r18, %r19, %r20 }, [ %rd5 + 0 ];\n\tmov.b32 \t%f17, %r17;\n\tmov.b32 \t%f18, %r18;\n\tmov.b32 \t%f19, %r19;\n\tmov.b32 \t%f20, %r20;\n\tmov.u32 %r21, 0x0;\n\tmov.u32 %r22, 0x0;\n\tmov.u32 %r23, 0x0;\n\tmov.u32 %r24, 0x0;\n\t@%p1 ld.global.v4.b32 { %r21, %r22, %r23, %r24 }, [ %rd6 + 0 ];\n\tmov.b32 \t%f21, %r21;\n\tmov.b32 \t%f22, %r22;\n\tmov.b32 \t%f23, %r23;\n\tmov.b32 \t%f24, %r24;\n\tmov.u32 %r25, 0x0;\n\tmov.u32 %r26, 0x0;\n\tmov.u32 %r27, 0x0;\n\tmov.u32 %r28, 0x0;\n\t@%p1 ld.global.v4.b32 { %r25, %r26, %r27, %r28 }, [ %rd7 + 0 ];\n\tmov.b32 \t%f25, %r25;\n\tmov.b32 \t%f26, %r26;\n\tmov.b32 \t%f27, %r27;\n\tmov.b32 \t%f28, %r28;\n\tmov.u32 %r29, 0x0;\n\tmov.u32 %r30, 0x0;\n\tmov.u32 %r31, 0x0;\n\tmov.u32 %r32, 0x0;\n\t@%p1 ld.global.v4.b32 { %r29, %r30, %r31, %r32 }, [ %rd8 + 0 ];\n\tmov.b32 \t%f29, %r29;\n\tmov.b32 \t%f30, %r30;\n\tmov.b32 \t%f31, %r31;\n\tmov.b32 \t%f32, %r32;\n\tmov.u32 %r33, 0x0;\n\tmov.u32 %r34, 0x0;\n\tmov.u32 %r35, 0x0;\n\tmov.u32 %r36, 0x0;\n\t@%p1 ld.global.v4.b32 { %r33, %r34, %r35, %r36 }, [ %rd9 + 0 ];\n\tmov.b32 \t%f33, %r33;\n\tmov.b32 \t%f34, %r34;\n\tmov.b32 \t%f35, %r35;\n\tmov.b32 \t%f36, %r36;\n\tmov.u32 %r37, 0x0;\n\tmov.u32 %r38, 0x0;\n\tmov.u32 %r39, 0x0;\n\tmov.u32 %r40, 0x0;\n\t@%p1 ld.global.v4.b32 { %r37, %r38, %r39, %r40 }, [ %rd10 + 0 ];\n\tmov.b32 \t%f37, %r37;\n\tmov.b32 \t%f38, %r38;\n\tmov.b32 \t%f39, %r39;\n\tmov.b32 \t%f40, %r40;\n\tmov.u32 %r41, 0x0;\n\tmov.u32 %r42, 0x0;\n\tmov.u32 %r43, 0x0;\n\tmov.u32 %r44, 0x0;\n\t@%p1 ld.global.v4.b32 { %r41, %r42, %r43, %r44 }, [ %rd11 + 0 ];\n\tmov.b32 \t%f41, %r41;\n\tmov.b32 \t%f42, %r42;\n\tmov.b32 \t%f43, %r43;\n\tmov.b32 \t%f44, %r44;\n\tmov.u32 %r45, 0x0;\n\tmov.u32 %r46, 0x0;\n\tmov.u32 %r47, 0x0;\n\tmov.u32 %r48, 0x0;\n\t@%p1 ld.global.v4.b32 { %r45, %r46, %r47, %r48 }, [ %rd12 + 0 ];\n\tmov.b32 \t%f45, %r45;\n\tmov.b32 \t%f46, %r46;\n\tmov.b32 \t%f47, %r47;\n\tmov.b32 \t%f48, %r48;\n\tmov.u32 %r49, 0x0;\n\tmov.u32 %r50, 0x0;\n\tmov.u32 %r51, 0x0;\n\tmov.u32 %r52, 0x0;\n\t@%p1 ld.global.v4.b32 { %r49, %r50, %r51, %r52 }, [ %rd13 + 0 ];\n\tmov.b32 \t%f49, %r49;\n\tmov.b32 \t%f50, %r50;\n\tmov.b32 \t%f51, %r51;\n\tmov.b32 \t%f52, %r52;\n\tmov.u32 %r53, 0x0;\n\tmov.u32 %r54, 0x0;\n\tmov.u32 %r55, 0x0;\n\tmov.u32 %r56, 0x0;\n\t@%p1 ld.global.v4.b32 { %r53, %r54, %r55, %r56 }, [ %rd14 + 0 ];\n\tmov.b32 \t%f53, %r53;\n\tmov.b32 \t%f54, %r54;\n\tmov.b32 \t%f55, %r55;\n\tmov.b32 \t%f56, %r56;\n\tmov.u32 %r57, 0x0;\n\tmov.u32 %r58, 0x0;\n\tmov.u32 %r59, 0x0;\n\tmov.u32 %r60, 0x0;\n\t@%p1 ld.global.v4.b32 { %r57, %r58, %r59, %r60 }, [ %rd15 + 0 ];\n\tmov.b32 \t%f57, %r57;\n\tmov.b32 \t%f58, %r58;\n\tmov.b32 \t%f59, %r59;\n\tmov.b32 \t%f60, %r60;\n\tmov.u32 %r61, 0x0;\n\tmov.u32 %r62, 0x0;\n\tmov.u32 %r63, 0x0;\n\tmov.u32 %r64, 0x0;\n\t@%p1 ld.global.v4.b32 { %r61, %r62, %r63, %r64 }, [ %rd16 + 0 ];\n\tmov.b32 \t%f61, %r61;\n\tmov.b32 \t%f62, %r62;\n\tmov.b32 \t%f63, %r63;\n\tmov.b32 \t%f64, %r64;\n\tmov.u32 %r65, 0x0;\n\tmov.u32 %r66, 0x0;\n\tmov.u32 %r67, 0x0;\n\tmov.u32 %r68, 0x0;\n\t@%p1 ld.global.v4.b32 { %r65, %r66, %r67, %r68 }, [ %rd17 + 0 ];\n\tmov.b32 \t%f65, %r65;\n\tmov.b32 \t%f66, %r66;\n\tmov.b32 \t%f67, %r67;\n\tmov.b32 \t%f68, %r68;\n\tmov.u32 %r69, 0x0;\n\tmov.u32 %r70, 0x0;\n\tmov.u32 %r71, 0x0;\n\tmov.u32 %r72, 0x0;\n\t@%p1 ld.global.v4.b32 { %r69, %r70, %r71, %r72 }, [ %rd18 + 0 ];\n\tmov.b32 \t%f69, %r69;\n\tmov.b32 \t%f70, %r70;\n\tmov.b32 \t%f71, %r71;\n\tmov.b32 \t%f72, %r72;\n\tmov.u32 %r73, 0x0;\n\tmov.u32 %r74, 0x0;\n\tmov.u32 %r75, 0x0;\n\tmov.u32 %r76, 0x0;\n\t@%p1 ld.global.v4.b32 { %r73, %r74, %r75, %r76 }, [ %rd19 + 0 ];\n\tmov.b32 \t%f73, %r73;\n\tmov.b32 \t%f74, %r74;\n\tmov.b32 \t%f75, %r75;\n\tmov.b32 \t%f76, %r76;\n\tmov.u32 %r77, 0x0;\n\tmov.u32 %r78, 0x0;\n\tmov.u32 %r79, 0x0;\n\tmov.u32 %r80, 0x0;\n\t@%p1 ld.global.v4.b32 { %r77, %r78, %r79, %r80 }, [ %rd20 + 0 ];\n\tmov.b32 \t%f77, %r77;\n\tmov.b32 \t%f78, %r78;\n\tmov.b32 \t%f79, %r79;\n\tmov.b32 \t%f80, %r80;\n\tmov.u32 %r81, 0x0;\n\tmov.u32 %r82, 0x0;\n\tmov.u32 %r83, 0x0;\n\tmov.u32 %r84, 0x0;\n\t@%p1 ld.global.v4.b32 { %r81, %r82, %r83, %r84 }, [ %rd21 + 0 ];\n\tmov.b32 \t%f81, %r81;\n\tmov.b32 \t%f82, %r82;\n\tmov.b32 \t%f83, %r83;\n\tmov.b32 \t%f84, %r84;\n\tmov.u32 %r85, 0x0;\n\tmov.u32 %r86, 0x0;\n\tmov.u32 %r87, 0x0;\n\tmov.u32 %r88, 0x0;\n\t@%p1 ld.global.v4.b32 { %r85, %r86, %r87, %r88 }, [ %rd22 + 0 ];\n\tmov.b32 \t%f85, %r85;\n\tmov.b32 \t%f86, %r86;\n\tmov.b32 \t%f87, %r87;\n\tmov.b32 \t%f88, %r88;\n\tmov.u32 %r89, 0x0;\n\tmov.u32 %r90, 0x0;\n\tmov.u32 %r91, 0x0;\n\tmov.u32 %r92, 0x0;\n\t@%p1 ld.global.v4.b32 { %r89, %r90, %r91, %r92 }, [ %rd23 + 0 ];\n\tmov.b32 \t%f89, %r89;\n\tmov.b32 \t%f90, %r90;\n\tmov.b32 \t%f91, %r91;\n\tmov.b32 \t%f92, %r92;\n\tmov.u32 %r93, 0x0;\n\tmov.u32 %r94, 0x0;\n\tmov.u32 %r95, 0x0;\n\tmov.u32 %r96, 0x0;\n\t@%p1 ld.global.v4.b32 { %r93, %r94, %r95, %r96 }, [ %rd24 + 0 ];\n\tmov.b32 \t%f93, %r93;\n\tmov.b32 \t%f94, %r94;\n\tmov.b32 \t%f95, %r95;\n\tmov.b32 \t%f96, %r96;\n\tmov.u32 %r97, 0x0;\n\tmov.u32 %r98, 0x0;\n\tmov.u32 %r99, 0x0;\n\tmov.u32 %r100, 0x0;\n\t@%p1 ld.global.v4.b32 { %r97, %r98, %r99, %r100 }, [ %rd25 + 0 ];\n\tmov.b32 \t%f97, %r97;\n\tmov.b32 \t%f98, %r98;\n\tmov.b32 \t%f99, %r99;\n\tmov.b32 \t%f100, %r100;\n\tmov.u32 %r101, 0x0;\n\tmov.u32 %r102, 0x0;\n\tmov.u32 %r103, 0x0;\n\tmov.u32 %r104, 0x0;\n\t@%p1 ld.global.v4.b32 { %r101, %r102, %r103, %r104 }, [ %rd26 + 0 ];\n\tmov.b32 \t%f101, %r101;\n\tmov.b32 \t%f102, %r102;\n\tmov.b32 \t%f103, %r103;\n\tmov.b32 \t%f104, %r104;\n\tmov.u32 %r105, 0x0;\n\tmov.u32 %r106, 0x0;\n\tmov.u32 %r107, 0x0;\n\tmov.u32 %r108, 0x0;\n\t@%p1 ld.global.v4.b32 { %r105, %r106, %r107, %r108 }, [ %rd27 + 0 ];\n\tmov.b32 \t%f105, %r105;\n\tmov.b32 \t%f106, %r106;\n\tmov.b32 \t%f107, %r107;\n\tmov.b32 \t%f108, %r108;\n\tmov.u32 %r109, 0x0;\n\tmov.u32 %r110, 0x0;\n\tmov.u32 %r111, 0x0;\n\tmov.u32 %r112, 0x0;\n\t@%p1 ld.global.v4.b32 { %r109, %r110, %r111, %r112 }, [ %rd28 + 0 ];\n\tmov.b32 \t%f109, %r109;\n\tmov.b32 \t%f110, %r110;\n\tmov.b32 \t%f111, %r111;\n\tmov.b32 \t%f112, %r112;\n\tmov.u32 %r113, 0x0;\n\tmov.u32 %r114, 0x0;\n\tmov.u32 %r115, 0x0;\n\tmov.u32 %r116, 0x0;\n\t@%p1 ld.global.v4.b32 { %r113, %r114, %r115, %r116 }, [ %rd29 + 0 ];\n\tmov.b32 \t%f113, %r113;\n\tmov.b32 \t%f114, %r114;\n\tmov.b32 \t%f115, %r115;\n\tmov.b32 \t%f116, %r116;\n\tmov.u32 %r117, 0x0;\n\tmov.u32 %r118, 0x0;\n\tmov.u32 %r119, 0x0;\n\tmov.u32 %r120, 0x0;\n\t@%p1 ld.global.v4.b32 { %r117, %r118, %r119, %r120 }, [ %rd30 + 0 ];\n\tmov.b32 \t%f117, %r117;\n\tmov.b32 \t%f118, %r118;\n\tmov.b32 \t%f119, %r119;\n\tmov.b32 \t%f120, %r120;\n\tmov.u32 %r121, 0x0;\n\tmov.u32 %r122, 0x0;\n\tmov.u32 %r123, 0x0;\n\tmov.u32 %r124, 0x0;\n\t@%p1 ld.global.v4.b32 { %r121, %r122, %r123, %r124 }, [ %rd31 + 0 ];\n\tmov.b32 \t%f121, %r121;\n\tmov.b32 \t%f122, %r122;\n\tmov.b32 \t%f123, %r123;\n\tmov.b32 \t%f124, %r124;\n\tmov.u32 %r125, 0x0;\n\tmov.u32 %r126, 0x0;\n\tmov.u32 %r127, 0x0;\n\tmov.u32 %r128, 0x0;\n\t@%p1 ld.global.v4.b32 { %r125, %r126, %r127, %r128 }, [ %rd32 + 0 ];\n\tmov.b32 \t%f125, %r125;\n\tmov.b32 \t%f126, %r126;\n\tmov.b32 \t%f127, %r127;\n\tmov.b32 \t%f128, %r128;\n\tadd.s64 \t%rd103, %rd98, %rd100;\n\tadd.s64 \t%rd33, %rd103, %rd102;\n\tadd.s64 \t%rd34, %rd33, 2048;\n\tadd.s64 \t%rd35, %rd33, 4096;\n\tadd.s64 \t%rd36, %rd33, 6144;\n\tadd.s64 \t%rd37, %rd33, 8192;\n\tadd.s64 \t%rd38, %rd33, 10240;\n\tadd.s64 \t%rd39, %rd33, 12288;\n\tadd.s64 \t%rd40, %rd33, 14336;\n\tadd.s64 \t%rd41, %rd33, 16384;\n\tadd.s64 \t%rd42, %rd33, 18432;\n\tadd.s64 \t%rd43, %rd33, 20480;\n\tadd.s64 \t%rd44, %rd33, 22528;\n\tadd.s64 \t%rd45, %rd33, 24576;\n\tadd.s64 \t%rd46, %rd33, 26624;\n\tadd.s64 \t%rd47, %rd33, 28672;\n\tadd.s64 \t%rd48, %rd33, 30720;\n\tadd.s64 \t%rd49, %rd33, 32768;\n\tadd.s64 \t%rd50, %rd33, 34816;\n\tadd.s64 \t%rd51, %rd33, 36864;\n\tadd.s64 \t%rd52, %rd33, 38912;\n\tadd.s64 \t%rd53, %rd33, 40960;\n\tadd.s64 \t%rd54, %rd33, 43008;\n\tadd.s64 \t%rd55, %rd33, 45056;\n\tadd.s64 \t%rd56, %rd33, 47104;\n\tadd.s64 \t%rd57, %rd33, 49152;\n\tadd.s64 \t%rd58, %rd33, 51200;\n\tadd.s64 \t%rd59, %rd33, 53248;\n\tadd.s64 \t%rd60, %rd33, 55296;\n\tadd.s64 \t%rd61, %rd33, 57344;\n\tadd.s64 \t%rd62, %rd33, 59392;\n\tadd.s64 \t%rd63, %rd33, 61440;\n\tadd.s64 \t%rd64, %rd33, 63488;\n\tmov.u32 %r129, 0x0;\n\tmov.u32 %r130, 0x0;\n\tmov.u32 %r131, 0x0;\n\tmov.u32 %r132, 0x0;\n\t@%p1 ld.global.v4.b32 { %r129, %r130, %r131, %r132 }, [ %rd33 + 0 ];\n\tmov.b32 \t%f129, %r129;\n\tmov.b32 \t%f130, %r130;\n\tmov.b32 \t%f131, %r131;\n\tmov.b32 \t%f132, %r132;\n\tmov.u32 %r133, 0x0;\n\tmov.u32 %r134, 0x0;\n\tmov.u32 %r135, 0x0;\n\tmov.u32 %r136, 0x0;\n\t@%p1 ld.global.v4.b32 { %r133, %r134, %r135, %r136 }, [ %rd34 + 0 ];\n\tmov.b32 \t%f133, %r133;\n\tmov.b32 \t%f134, %r134;\n\tmov.b32 \t%f135, %r135;\n\tmov.b32 \t%f136, %r136;\n\tmov.u32 %r137, 0x0;\n\tmov.u32 %r138, 0x0;\n\tmov.u32 %r139, 0x0;\n\tmov.u32 %r140, 0x0;\n\t@%p1 ld.global.v4.b32 { %r137, %r138, %r139, %r140 }, [ %rd35 + 0 ];\n\tmov.b32 \t%f137, %r137;\n\tmov.b32 \t%f138, %r138;\n\tmov.b32 \t%f139, %r139;\n\tmov.b32 \t%f140, %r140;\n\tmov.u32 %r141, 0x0;\n\tmov.u32 %r142, 0x0;\n\tmov.u32 %r143, 0x0;\n\tmov.u32 %r144, 0x0;\n\t@%p1 ld.global.v4.b32 { %r141, %r142, %r143, %r144 }, [ %rd36 + 0 ];\n\tmov.b32 \t%f141, %r141;\n\tmov.b32 \t%f142, %r142;\n\tmov.b32 \t%f143, %r143;\n\tmov.b32 \t%f144, %r144;\n\tmov.u32 %r145, 0x0;\n\tmov.u32 %r146, 0x0;\n\tmov.u32 %r147, 0x0;\n\tmov.u32 %r148, 0x0;\n\t@%p1 ld.global.v4.b32 { %r145, %r146, %r147, %r148 }, [ %rd37 + 0 ];\n\tmov.b32 \t%f145, %r145;\n\tmov.b32 \t%f146, %r146;\n\tmov.b32 \t%f147, %r147;\n\tmov.b32 \t%f148, %r148;\n\tmov.u32 %r149, 0x0;\n\tmov.u32 %r150, 0x0;\n\tmov.u32 %r151, 0x0;\n\tmov.u32 %r152, 0x0;\n\t@%p1 ld.global.v4.b32 { %r149, %r150, %r151, %r152 }, [ %rd38 + 0 ];\n\tmov.b32 \t%f149, %r149;\n\tmov.b32 \t%f150, %r150;\n\tmov.b32 \t%f151, %r151;\n\tmov.b32 \t%f152, %r152;\n\tmov.u32 %r153, 0x0;\n\tmov.u32 %r154, 0x0;\n\tmov.u32 %r155, 0x0;\n\tmov.u32 %r156, 0x0;\n\t@%p1 ld.global.v4.b32 { %r153, %r154, %r155, %r156 }, [ %rd39 + 0 ];\n\tmov.b32 \t%f153, %r153;\n\tmov.b32 \t%f154, %r154;\n\tmov.b32 \t%f155, %r155;\n\tmov.b32 \t%f156, %r156;\n\tmov.u32 %r157, 0x0;\n\tmov.u32 %r158, 0x0;\n\tmov.u32 %r159, 0x0;\n\tmov.u32 %r160, 0x0;\n\t@%p1 ld.global.v4.b32 { %r157, %r158, %r159, %r160 }, [ %rd40 + 0 ];\n\tmov.b32 \t%f157, %r157;\n\tmov.b32 \t%f158, %r158;\n\tmov.b32 \t%f159, %r159;\n\tmov.b32 \t%f160, %r160;\n\tmov.u32 %r161, 0x0;\n\tmov.u32 %r162, 0x0;\n\tmov.u32 %r163, 0x0;\n\tmov.u32 %r164, 0x0;\n\t@%p1 ld.global.v4.b32 { %r161, %r162, %r163, %r164 }, [ %rd41 + 0 ];\n\tmov.b32 \t%f161, %r161;\n\tmov.b32 \t%f162, %r162;\n\tmov.b32 \t%f163, %r163;\n\tmov.b32 \t%f164, %r164;\n\tmov.u32 %r165, 0x0;\n\tmov.u32 %r166, 0x0;\n\tmov.u32 %r167, 0x0;\n\tmov.u32 %r168, 0x0;\n\t@%p1 ld.global.v4.b32 { %r165, %r166, %r167, %r168 }, [ %rd42 + 0 ];\n\tmov.b32 \t%f165, %r165;\n\tmov.b32 \t%f166, %r166;\n\tmov.b32 \t%f167, %r167;\n\tmov.b32 \t%f168, %r168;\n\tmov.u32 %r169, 0x0;\n\tmov.u32 %r170, 0x0;\n\tmov.u32 %r171, 0x0;\n\tmov.u32 %r172, 0x0;\n\t@%p1 ld.global.v4.b32 { %r169, %r170, %r171, %r172 }, [ %rd43 + 0 ];\n\tmov.b32 \t%f169, %r169;\n\tmov.b32 \t%f170, %r170;\n\tmov.b32 \t%f171, %r171;\n\tmov.b32 \t%f172, %r172;\n\tmov.u32 %r173, 0x0;\n\tmov.u32 %r174, 0x0;\n\tmov.u32 %r175, 0x0;\n\tmov.u32 %r176, 0x0;\n\t@%p1 ld.global.v4.b32 { %r173, %r174, %r175, %r176 }, [ %rd44 + 0 ];\n\tmov.b32 \t%f173, %r173;\n\tmov.b32 \t%f174, %r174;\n\tmov.b32 \t%f175, %r175;\n\tmov.b32 \t%f176, %r176;\n\tmov.u32 %r177, 0x0;\n\tmov.u32 %r178, 0x0;\n\tmov.u32 %r179, 0x0;\n\tmov.u32 %r180, 0x0;\n\t@%p1 ld.global.v4.b32 { %r177, %r178, %r179, %r180 }, [ %rd45 + 0 ];\n\tmov.b32 \t%f177, %r177;\n\tmov.b32 \t%f178, %r178;\n\tmov.b32 \t%f179, %r179;\n\tmov.b32 \t%f180, %r180;\n\tmov.u32 %r181, 0x0;\n\tmov.u32 %r182, 0x0;\n\tmov.u32 %r183, 0x0;\n\tmov.u32 %r184, 0x0;\n\t@%p1 ld.global.v4.b32 { %r181, %r182, %r183, %r184 }, [ %rd46 + 0 ];\n\tmov.b32 \t%f181, %r181;\n\tmov.b32 \t%f182, %r182;\n\tmov.b32 \t%f183, %r183;\n\tmov.b32 \t%f184, %r184;\n\tmov.u32 %r185, 0x0;\n\tmov.u32 %r186, 0x0;\n\tmov.u32 %r187, 0x0;\n\tmov.u32 %r188, 0x0;\n\t@%p1 ld.global.v4.b32 { %r185, %r186, %r187, %r188 }, [ %rd47 + 0 ];\n\tmov.b32 \t%f185, %r185;\n\tmov.b32 \t%f186, %r186;\n\tmov.b32 \t%f187, %r187;\n\tmov.b32 \t%f188, %r188;\n\tmov.u32 %r189, 0x0;\n\tmov.u32 %r190, 0x0;\n\tmov.u32 %r191, 0x0;\n\tmov.u32 %r192, 0x0;\n\t@%p1 ld.global.v4.b32 { %r189, %r190, %r191, %r192 }, [ %rd48 + 0 ];\n\tmov.b32 \t%f189, %r189;\n\tmov.b32 \t%f190, %r190;\n\tmov.b32 \t%f191, %r191;\n\tmov.b32 \t%f192, %r192;\n\tmov.u32 %r193, 0x0;\n\tmov.u32 %r194, 0x0;\n\tmov.u32 %r195, 0x0;\n\tmov.u32 %r196, 0x0;\n\t@%p1 ld.global.v4.b32 { %r193, %r194, %r195, %r196 }, [ %rd49 + 0 ];\n\tmov.b32 \t%f193, %r193;\n\tmov.b32 \t%f194, %r194;\n\tmov.b32 \t%f195, %r195;\n\tmov.b32 \t%f196, %r196;\n\tmov.u32 %r197, 0x0;\n\tmov.u32 %r198, 0x0;\n\tmov.u32 %r199, 0x0;\n\tmov.u32 %r200, 0x0;\n\t@%p1 ld.global.v4.b32 { %r197, %r198, %r199, %r200 }, [ %rd50 + 0 ];\n\tmov.b32 \t%f197, %r197;\n\tmov.b32 \t%f198, %r198;\n\tmov.b32 \t%f199, %r199;\n\tmov.b32 \t%f200, %r200;\n\tmov.u32 %r201, 0x0;\n\tmov.u32 %r202, 0x0;\n\tmov.u32 %r203, 0x0;\n\tmov.u32 %r204, 0x0;\n\t@%p1 ld.global.v4.b32 { %r201, %r202, %r203, %r204 }, [ %rd51 + 0 ];\n\tmov.b32 \t%f201, %r201;\n\tmov.b32 \t%f202, %r202;\n\tmov.b32 \t%f203, %r203;\n\tmov.b32 \t%f204, %r204;\n\tmov.u32 %r205, 0x0;\n\tmov.u32 %r206, 0x0;\n\tmov.u32 %r207, 0x0;\n\tmov.u32 %r208, 0x0;\n\t@%p1 ld.global.v4.b32 { %r205, %r206, %r207, %r208 }, [ %rd52 + 0 ];\n\tmov.b32 \t%f205, %r205;\n\tmov.b32 \t%f206, %r206;\n\tmov.b32 \t%f207, %r207;\n\tmov.b32 \t%f208, %r208;\n\tmov.u32 %r209, 0x0;\n\tmov.u32 %r210, 0x0;\n\tmov.u32 %r211, 0x0;\n\tmov.u32 %r212, 0x0;\n\t@%p1 ld.global.v4.b32 { %r209, %r210, %r211, %r212 }, [ %rd53 + 0 ];\n\tmov.b32 \t%f209, %r209;\n\tmov.b32 \t%f210, %r210;\n\tmov.b32 \t%f211, %r211;\n\tmov.b32 \t%f212, %r212;\n\tmov.u32 %r213, 0x0;\n\tmov.u32 %r214, 0x0;\n\tmov.u32 %r215, 0x0;\n\tmov.u32 %r216, 0x0;\n\t@%p1 ld.global.v4.b32 { %r213, %r214, %r215, %r216 }, [ %rd54 + 0 ];\n\tmov.b32 \t%f213, %r213;\n\tmov.b32 \t%f214, %r214;\n\tmov.b32 \t%f215, %r215;\n\tmov.b32 \t%f216, %r216;\n\tmov.u32 %r217, 0x0;\n\tmov.u32 %r218, 0x0;\n\tmov.u32 %r219, 0x0;\n\tmov.u32 %r220, 0x0;\n\t@%p1 ld.global.v4.b32 { %r217, %r218, %r219, %r220 }, [ %rd55 + 0 ];\n\tmov.b32 \t%f217, %r217;\n\tmov.b32 \t%f218, %r218;\n\tmov.b32 \t%f219, %r219;\n\tmov.b32 \t%f220, %r220;\n\tmov.u32 %r221, 0x0;\n\tmov.u32 %r222, 0x0;\n\tmov.u32 %r223, 0x0;\n\tmov.u32 %r224, 0x0;\n\t@%p1 ld.global.v4.b32 { %r221, %r222, %r223, %r224 }, [ %rd56 + 0 ];\n\tmov.b32 \t%f221, %r221;\n\tmov.b32 \t%f222, %r222;\n\tmov.b32 \t%f223, %r223;\n\tmov.b32 \t%f224, %r224;\n\tmov.u32 %r225, 0x0;\n\tmov.u32 %r226, 0x0;\n\tmov.u32 %r227, 0x0;\n\tmov.u32 %r228, 0x0;\n\t@%p1 ld.global.v4.b32 { %r225, %r226, %r227, %r228 }, [ %rd57 + 0 ];\n\tmov.b32 \t%f225, %r225;\n\tmov.b32 \t%f226, %r226;\n\tmov.b32 \t%f227, %r227;\n\tmov.b32 \t%f228, %r228;\n\tmov.u32 %r229, 0x0;\n\tmov.u32 %r230, 0x0;\n\tmov.u32 %r231, 0x0;\n\tmov.u32 %r232, 0x0;\n\t@%p1 ld.global.v4.b32 { %r229, %r230, %r231, %r232 }, [ %rd58 + 0 ];\n\tmov.b32 \t%f229, %r229;\n\tmov.b32 \t%f230, %r230;\n\tmov.b32 \t%f231, %r231;\n\tmov.b32 \t%f232, %r232;\n\tmov.u32 %r233, 0x0;\n\tmov.u32 %r234, 0x0;\n\tmov.u32 %r235, 0x0;\n\tmov.u32 %r236, 0x0;\n\t@%p1 ld.global.v4.b32 { %r233, %r234, %r235, %r236 }, [ %rd59 + 0 ];\n\tmov.b32 \t%f233, %r233;\n\tmov.b32 \t%f234, %r234;\n\tmov.b32 \t%f235, %r235;\n\tmov.b32 \t%f236, %r236;\n\tmov.u32 %r237, 0x0;\n\tmov.u32 %r238, 0x0;\n\tmov.u32 %r239, 0x0;\n\tmov.u32 %r240, 0x0;\n\t@%p1 ld.global.v4.b32 { %r237, %r238, %r239, %r240 }, [ %rd60 + 0 ];\n\tmov.b32 \t%f237, %r237;\n\tmov.b32 \t%f238, %r238;\n\tmov.b32 \t%f239, %r239;\n\tmov.b32 \t%f240, %r240;\n\tmov.u32 %r241, 0x0;\n\tmov.u32 %r242, 0x0;\n\tmov.u32 %r243, 0x0;\n\tmov.u32 %r244, 0x0;\n\t@%p1 ld.global.v4.b32 { %r241, %r242, %r243, %r244 }, [ %rd61 + 0 ];\n\tmov.b32 \t%f241, %r241;\n\tmov.b32 \t%f242, %r242;\n\tmov.b32 \t%f243, %r243;\n\tmov.b32 \t%f244, %r244;\n\tmov.u32 %r245, 0x0;\n\tmov.u32 %r246, 0x0;\n\tmov.u32 %r247, 0x0;\n\tmov.u32 %r248, 0x0;\n\t@%p1 ld.global.v4.b32 { %r245, %r246, %r247, %r248 }, [ %rd62 + 0 ];\n\tmov.b32 \t%f245, %r245;\n\tmov.b32 \t%f246, %r246;\n\tmov.b32 \t%f247, %r247;\n\tmov.b32 \t%f248, %r248;\n\tmov.u32 %r249, 0x0;\n\tmov.u32 %r250, 0x0;\n\tmov.u32 %r251, 0x0;\n\tmov.u32 %r252, 0x0;\n\t@%p1 ld.global.v4.b32 { %r249, %r250, %r251, %r252 }, [ %rd63 + 0 ];\n\tmov.b32 \t%f249, %r249;\n\tmov.b32 \t%f250, %r250;\n\tmov.b32 \t%f251, %r251;\n\tmov.b32 \t%f252, %r252;\n\tmov.u32 %r253, 0x0;\n\tmov.u32 %r254, 0x0;\n\tmov.u32 %r255, 0x0;\n\tmov.u32 %r256, 0x0;\n\t@%p1 ld.global.v4.b32 { %r253, %r254, %r255, %r256 }, [ %rd64 + 0 ];\n\tmov.b32 \t%f253, %r253;\n\tmov.b32 \t%f254, %r254;\n\tmov.b32 \t%f255, %r255;\n\tmov.b32 \t%f256, %r256;\n\tadd.f32 \t%f257, %f1, %f129;\n\tadd.f32 \t%f258, %f2, %f130;\n\tadd.f32 \t%f259, %f3, %f131;\n\tadd.f32 \t%f260, %f4, %f132;\n\tadd.f32 \t%f261, %f5, %f133;\n\tadd.f32 \t%f262, %f6, %f134;\n\tadd.f32 \t%f263, %f7, %f135;\n\tadd.f32 \t%f264, %f8, %f136;\n\tadd.f32 \t%f265, %f9, %f137;\n\tadd.f32 \t%f266, %f10, %f138;\n\tadd.f32 \t%f267, %f11, %f139;\n\tadd.f32 \t%f268, %f12, %f140;\n\tadd.f32 \t%f269, %f13, %f141;\n\tadd.f32 \t%f270, %f14, %f142;\n\tadd.f32 \t%f271, %f15, %f143;\n\tadd.f32 \t%f272, %f16, %f144;\n\tadd.f32 \t%f273, %f17, %f145;\n\tadd.f32 \t%f274, %f18, %f146;\n\tadd.f32 \t%f275, %f19, %f147;\n\tadd.f32 \t%f276, %f20, %f148;\n\tadd.f32 \t%f277, %f21, %f149;\n\tadd.f32 \t%f278, %f22, %f150;\n\tadd.f32 \t%f279, %f23, %f151;\n\tadd.f32 \t%f280, %f24, %f152;\n\tadd.f32 \t%f281, %f25, %f153;\n\tadd.f32 \t%f282, %f26, %f154;\n\tadd.f32 \t%f283, %f27, %f155;\n\tadd.f32 \t%f284, %f28, %f156;\n\tadd.f32 \t%f285, %f29, %f157;\n\tadd.f32 \t%f286, %f30, %f158;\n\tadd.f32 \t%f287, %f31, %f159;\n\tadd.f32 \t%f288, %f32, %f160;\n\tadd.f32 \t%f289, %f33, %f161;\n\tadd.f32 \t%f290, %f34, %f162;\n\tadd.f32 \t%f291, %f35, %f163;\n\tadd.f32 \t%f292, %f36, %f164;\n\tadd.f32 \t%f293, %f37, %f165;\n\tadd.f32 \t%f294, %f38, %f166;\n\tadd.f32 \t%f295, %f39, %f167;\n\tadd.f32 \t%f296, %f40, %f168;\n\tadd.f32 \t%f297, %f41, %f169;\n\tadd.f32 \t%f298, %f42, %f170;\n\tadd.f32 \t%f299, %f43, %f171;\n\tadd.f32 \t%f300, %f44, %f172;\n\tadd.f32 \t%f301, %f45, %f173;\n\tadd.f32 \t%f302, %f46, %f174;\n\tadd.f32 \t%f303, %f47, %f175;\n\tadd.f32 \t%f304, %f48, %f176;\n\tadd.f32 \t%f305, %f49, %f177;\n\tadd.f32 \t%f306, %f50, %f178;\n\tadd.f32 \t%f307, %f51, %f179;\n\tadd.f32 \t%f308, %f52, %f180;\n\tadd.f32 \t%f309, %f53, %f181;\n\tadd.f32 \t%f310, %f54, %f182;\n\tadd.f32 \t%f311, %f55, %f183;\n\tadd.f32 \t%f312, %f56, %f184;\n\tadd.f32 \t%f313, %f57, %f185;\n\tadd.f32 \t%f314, %f58, %f186;\n\tadd.f32 \t%f315, %f59, %f187;\n\tadd.f32 \t%f316, %f60, %f188;\n\tadd.f32 \t%f317, %f61, %f189;\n\tadd.f32 \t%f318, %f62, %f190;\n\tadd.f32 \t%f319, %f63, %f191;\n\tadd.f32 \t%f320, %f64, %f192;\n\tadd.f32 \t%f321, %f65, %f193;\n\tadd.f32 \t%f322, %f66, %f194;\n\tadd.f32 \t%f323, %f67, %f195;\n\tadd.f32 \t%f324, %f68, %f196;\n\tadd.f32 \t%f325, %f69, %f197;\n\tadd.f32 \t%f326, %f70, %f198;\n\tadd.f32 \t%f327, %f71, %f199;\n\tadd.f32 \t%f328, %f72, %f200;\n\tadd.f32 \t%f329, %f73, %f201;\n\tadd.f32 \t%f330, %f74, %f202;\n\tadd.f32 \t%f331, %f75, %f203;\n\tadd.f32 \t%f332, %f76, %f204;\n\tadd.f32 \t%f333, %f77, %f205;\n\tadd.f32 \t%f334, %f78, %f206;\n\tadd.f32 \t%f335, %f79, %f207;\n\tadd.f32 \t%f336, %f80, %f208;\n\tadd.f32 \t%f337, %f81, %f209;\n\tadd.f32 \t%f338, %f82, %f210;\n\tadd.f32 \t%f339, %f83, %f211;\n\tadd.f32 \t%f340, %f84, %f212;\n\tadd.f32 \t%f341, %f85, %f213;\n\tadd.f32 \t%f342, %f86, %f214;\n\tadd.f32 \t%f343, %f87, %f215;\n\tadd.f32 \t%f344, %f88, %f216;\n\tadd.f32 \t%f345, %f89, %f217;\n\tadd.f32 \t%f346, %f90, %f218;\n\tadd.f32 \t%f347, %f91, %f219;\n\tadd.f32 \t%f348, %f92, %f220;\n\tadd.f32 \t%f349, %f93, %f221;\n\tadd.f32 \t%f350, %f94, %f222;\n\tadd.f32 \t%f351, %f95, %f223;\n\tadd.f32 \t%f352, %f96, %f224;\n\tadd.f32 \t%f353, %f97, %f225;\n\tadd.f32 \t%f354, %f98, %f226;\n\tadd.f32 \t%f355, %f99, %f227;\n\tadd.f32 \t%f356, %f100, %f228;\n\tadd.f32 \t%f357, %f101, %f229;\n\tadd.f32 \t%f358, %f102, %f230;\n\tadd.f32 \t%f359, %f103, %f231;\n\tadd.f32 \t%f360, %f104, %f232;\n\tadd.f32 \t%f361, %f105, %f233;\n\tadd.f32 \t%f362, %f106, %f234;\n\tadd.f32 \t%f363, %f107, %f235;\n\tadd.f32 \t%f364, %f108, %f236;\n\tadd.f32 \t%f365, %f109, %f237;\n\tadd.f32 \t%f366, %f110, %f238;\n\tadd.f32 \t%f367, %f111, %f239;\n\tadd.f32 \t%f368, %f112, %f240;\n\tadd.f32 \t%f369, %f113, %f241;\n\tadd.f32 \t%f370, %f114, %f242;\n\tadd.f32 \t%f371, %f115, %f243;\n\tadd.f32 \t%f372, %f116, %f244;\n\tadd.f32 \t%f373, %f117, %f245;\n\tadd.f32 \t%f374, %f118, %f246;\n\tadd.f32 \t%f375, %f119, %f247;\n\tadd.f32 \t%f376, %f120, %f248;\n\tadd.f32 \t%f377, %f121, %f249;\n\tadd.f32 \t%f378, %f122, %f250;\n\tadd.f32 \t%f379, %f123, %f251;\n\tadd.f32 \t%f380, %f124, %f252;\n\tadd.f32 \t%f381, %f125, %f253;\n\tadd.f32 \t%f382, %f126, %f254;\n\tadd.f32 \t%f383, %f127, %f255;\n\tadd.f32 \t%f384, %f128, %f256;\n\tadd.f32 \t%f385, %f257, %f257;\n\tadd.f32 \t%f386, %f258, %f258;\n\tadd.f32 \t%f387, %f259, %f259;\n\tadd.f32 \t%f388, %f260, %f260;\n\tadd.f32 \t%f389, %f261, %f261;\n\tadd.f32 \t%f390, %f262, %f262;\n\tadd.f32 \t%f391, %f263, %f263;\n\tadd.f32 \t%f392, %f264, %f264;\n\tadd.f32 \t%f393, %f265, %f265;\n\tadd.f32 \t%f394, %f266, %f266;\n\tadd.f32 \t%f395, %f267, %f267;\n\tadd.f32 \t%f396, %f268, %f268;\n\tadd.f32 \t%f397, %f269, %f269;\n\tadd.f32 \t%f398, %f270, %f270;\n\tadd.f32 \t%f399, %f271, %f271;\n\tadd.f32 \t%f400, %f272, %f272;\n\tadd.f32 \t%f401, %f273, %f273;\n\tadd.f32 \t%f402, %f274, %f274;\n\tadd.f32 \t%f403, %f275, %f275;\n\tadd.f32 \t%f404, %f276, %f276;\n\tadd.f32 \t%f405, %f277, %f277;\n\tadd.f32 \t%f406, %f278, %f278;\n\tadd.f32 \t%f407, %f279, %f279;\n\tadd.f32 \t%f408, %f280, %f280;\n\tadd.f32 \t%f409, %f281, %f281;\n\tadd.f32 \t%f410, %f282, %f282;\n\tadd.f32 \t%f411, %f283, %f283;\n\tadd.f32 \t%f412, %f284, %f284;\n\tadd.f32 \t%f413, %f285, %f285;\n\tadd.f32 \t%f414, %f286, %f286;\n\tadd.f32 \t%f415, %f287, %f287;\n\tadd.f32 \t%f416, %f288, %f288;\n\tadd.f32 \t%f417, %f289, %f289;\n\tadd.f32 \t%f418, %f290, %f290;\n\tadd.f32 \t%f419, %f291, %f291;\n\tadd.f32 \t%f420, %f292, %f292;\n\tadd.f32 \t%f421, %f293, %f293;\n\tadd.f32 \t%f422, %f294, %f294;\n\tadd.f32 \t%f423, %f295, %f295;\n\tadd.f32 \t%f424, %f296, %f296;\n\tadd.f32 \t%f425, %f297, %f297;\n\tadd.f32 \t%f426, %f298, %f298;\n\tadd.f32 \t%f427, %f299, %f299;\n\tadd.f32 \t%f428, %f300, %f300;\n\tadd.f32 \t%f429, %f301, %f301;\n\tadd.f32 \t%f430, %f302, %f302;\n\tadd.f32 \t%f431, %f303, %f303;\n\tadd.f32 \t%f432, %f304, %f304;\n\tadd.f32 \t%f433, %f305, %f305;\n\tadd.f32 \t%f434, %f306, %f306;\n\tadd.f32 \t%f435, %f307, %f307;\n\tadd.f32 \t%f436, %f308, %f308;\n\tadd.f32 \t%f437, %f309, %f309;\n\tadd.f32 \t%f438, %f310, %f310;\n\tadd.f32 \t%f439, %f311, %f311;\n\tadd.f32 \t%f440, %f312, %f312;\n\tadd.f32 \t%f441, %f313, %f313;\n\tadd.f32 \t%f442, %f314, %f314;\n\tadd.f32 \t%f443, %f315, %f315;\n\tadd.f32 \t%f444, %f316, %f316;\n\tadd.f32 \t%f445, %f317, %f317;\n\tadd.f32 \t%f446, %f318, %f318;\n\tadd.f32 \t%f447, %f319, %f319;\n\tadd.f32 \t%f448, %f320, %f320;\n\tadd.f32 \t%f449, %f321, %f321;\n\tadd.f32 \t%f450, %f322, %f322;\n\tadd.f32 \t%f451, %f323, %f323;\n\tadd.f32 \t%f452, %f324, %f324;\n\tadd.f32 \t%f453, %f325, %f325;\n\tadd.f32 \t%f454, %f326, %f326;\n\tadd.f32 \t%f455, %f327, %f327;\n\tadd.f32 \t%f456, %f328, %f328;\n\tadd.f32 \t%f457, %f329, %f329;\n\tadd.f32 \t%f458, %f330, %f330;\n\tadd.f32 \t%f459, %f331, %f331;\n\tadd.f32 \t%f460, %f332, %f332;\n\tadd.f32 \t%f461, %f333, %f333;\n\tadd.f32 \t%f462, %f334, %f334;\n\tadd.f32 \t%f463, %f335, %f335;\n\tadd.f32 \t%f464, %f336, %f336;\n\tadd.f32 \t%f465, %f337, %f337;\n\tadd.f32 \t%f466, %f338, %f338;\n\tadd.f32 \t%f467, %f339, %f339;\n\tadd.f32 \t%f468, %f340, %f340;\n\tadd.f32 \t%f469, %f341, %f341;\n\tadd.f32 \t%f470, %f342, %f342;\n\tadd.f32 \t%f471, %f343, %f343;\n\tadd.f32 \t%f472, %f344, %f344;\n\tadd.f32 \t%f473, %f345, %f345;\n\tadd.f32 \t%f474, %f346, %f346;\n\tadd.f32 \t%f475, %f347, %f347;\n\tadd.f32 \t%f476, %f348, %f348;\n\tadd.f32 \t%f477, %f349, %f349;\n\tadd.f32 \t%f478, %f350, %f350;\n\tadd.f32 \t%f479, %f351, %f351;\n\tadd.f32 \t%f480, %f352, %f352;\n\tadd.f32 \t%f481, %f353, %f353;\n\tadd.f32 \t%f482, %f354, %f354;\n\tadd.f32 \t%f483, %f355, %f355;\n\tadd.f32 \t%f484, %f356, %f356;\n\tadd.f32 \t%f485, %f357, %f357;\n\tadd.f32 \t%f486, %f358, %f358;\n\tadd.f32 \t%f487, %f359, %f359;\n\tadd.f32 \t%f488, %f360, %f360;\n\tadd.f32 \t%f489, %f361, %f361;\n\tadd.f32 \t%f490, %f362, %f362;\n\tadd.f32 \t%f491, %f363, %f363;\n\tadd.f32 \t%f492, %f364, %f364;\n\tadd.f32 \t%f493, %f365, %f365;\n\tadd.f32 \t%f494, %f366, %f366;\n\tadd.f32 \t%f495, %f367, %f367;\n\tadd.f32 \t%f496, %f368, %f368;\n\tadd.f32 \t%f497, %f369, %f369;\n\tadd.f32 \t%f498, %f370, %f370;\n\tadd.f32 \t%f499, %f371, %f371;\n\tadd.f32 \t%f500, %f372, %f372;\n\tadd.f32 \t%f501, %f373, %f373;\n\tadd.f32 \t%f502, %f374, %f374;\n\tadd.f32 \t%f503, %f375, %f375;\n\tadd.f32 \t%f504, %f376, %f376;\n\tadd.f32 \t%f505, %f377, %f377;\n\tadd.f32 \t%f506, %f378, %f378;\n\tadd.f32 \t%f507, %f379, %f379;\n\tadd.f32 \t%f508, %f380, %f380;\n\tadd.f32 \t%f509, %f381, %f381;\n\tadd.f32 \t%f510, %f382, %f382;\n\tadd.f32 \t%f511, %f383, %f383;\n\tadd.f32 \t%f512, %f384, %f384;\n\tadd.s64 \t%rd104, %rd99, %rd100;\n\tadd.s64 \t%rd65, %rd104, %rd102;\n\tadd.s64 \t%rd66, %rd65, 2048;\n\tadd.s64 \t%rd67, %rd65, 4096;\n\tadd.s64 \t%rd68, %rd65, 6144;\n\tadd.s64 \t%rd69, %rd65, 8192;\n\tadd.s64 \t%rd70, %rd65, 10240;\n\tadd.s64 \t%rd71, %rd65, 12288;\n\tadd.s64 \t%rd72, %rd65, 14336;\n\tadd.s64 \t%rd73, %rd65, 16384;\n\tadd.s64 \t%rd74, %rd65, 18432;\n\tadd.s64 \t%rd75, %rd65, 20480;\n\tadd.s64 \t%rd76, %rd65, 22528;\n\tadd.s64 \t%rd77, %rd65, 24576;\n\tadd.s64 \t%rd78, %rd65, 26624;\n\tadd.s64 \t%rd79, %rd65, 28672;\n\tadd.s64 \t%rd80, %rd65, 30720;\n\tadd.s64 \t%rd81, %rd65, 32768;\n\tadd.s64 \t%rd82, %rd65, 34816;\n\tadd.s64 \t%rd83, %rd65, 36864;\n\tadd.s64 \t%rd84, %rd65, 38912;\n\tadd.s64 \t%rd85, %rd65, 40960;\n\tadd.s64 \t%rd86, %rd65, 43008;\n\tadd.s64 \t%rd87, %rd65, 45056;\n\tadd.s64 \t%rd88, %rd65, 47104;\n\tadd.s64 \t%rd89, %rd65, 49152;\n\tadd.s64 \t%rd90, %rd65, 51200;\n\tadd.s64 \t%rd91, %rd65, 53248;\n\tadd.s64 \t%rd92, %rd65, 55296;\n\tadd.s64 \t%rd93, %rd65, 57344;\n\tadd.s64 \t%rd94, %rd65, 59392;\n\tadd.s64 \t%rd95, %rd65, 61440;\n\tadd.s64 \t%rd96, %rd65, 63488;\n\tmov.b32 \t%r257, %f385;\n\tmov.b32 \t%r258, %f386;\n\tmov.b32 \t%r259, %f387;\n\tmov.b32 \t%r260, %f388;\n\t@%p1 st.global.v4.b32 [ %rd65 + 0 ], { %r257, %r258, %r259, %r260 };\n\tmov.b32 \t%r261, %f389;\n\tmov.b32 \t%r262, %f390;\n\tmov.b32 \t%r263, %f391;\n\tmov.b32 \t%r264, %f392;\n\t@%p1 st.global.v4.b32 [ %rd66 + 0 ], { %r261, %r262, %r263, %r264 };\n\tmov.b32 \t%r265, %f393;\n\tmov.b32 \t%r266, %f394;\n\tmov.b32 \t%r267, %f395;\n\tmov.b32 \t%r268, %f396;\n\t@%p1 st.global.v4.b32 [ %rd67 + 0 ], { %r265, %r266, %r267, %r268 };\n\tmov.b32 \t%r269, %f397;\n\tmov.b32 \t%r270, %f398;\n\tmov.b32 \t%r271, %f399;\n\tmov.b32 \t%r272, %f400;\n\t@%p1 st.global.v4.b32 [ %rd68 + 0 ], { %r269, %r270, %r271, %r272 };\n\tmov.b32 \t%r273, %f401;\n\tmov.b32 \t%r274, %f402;\n\tmov.b32 \t%r275, %f403;\n\tmov.b32 \t%r276, %f404;\n\t@%p1 st.global.v4.b32 [ %rd69 + 0 ], { %r273, %r274, %r275, %r276 };\n\tmov.b32 \t%r277, %f405;\n\tmov.b32 \t%r278, %f406;\n\tmov.b32 \t%r279, %f407;\n\tmov.b32 \t%r280, %f408;\n\t@%p1 st.global.v4.b32 [ %rd70 + 0 ], { %r277, %r278, %r279, %r280 };\n\tmov.b32 \t%r281, %f409;\n\tmov.b32 \t%r282, %f410;\n\tmov.b32 \t%r283, %f411;\n\tmov.b32 \t%r284, %f412;\n\t@%p1 st.global.v4.b32 [ %rd71 + 0 ], { %r281, %r282, %r283, %r284 };\n\tmov.b32 \t%r285, %f413;\n\tmov.b32 \t%r286, %f414;\n\tmov.b32 \t%r287, %f415;\n\tmov.b32 \t%r288, %f416;\n\t@%p1 st.global.v4.b32 [ %rd72 + 0 ], { %r285, %r286, %r287, %r288 };\n\tmov.b32 \t%r289, %f417;\n\tmov.b32 \t%r290, %f418;\n\tmov.b32 \t%r291, %f419;\n\tmov.b32 \t%r292, %f420;\n\t@%p1 st.global.v4.b32 [ %rd73 + 0 ], { %r289, %r290, %r291, %r292 };\n\tmov.b32 \t%r293, %f421;\n\tmov.b32 \t%r294, %f422;\n\tmov.b32 \t%r295, %f423;\n\tmov.b32 \t%r296, %f424;\n\t@%p1 st.global.v4.b32 [ %rd74 + 0 ], { %r293, %r294, %r295, %r296 };\n\tmov.b32 \t%r297, %f425;\n\tmov.b32 \t%r298, %f426;\n\tmov.b32 \t%r299, %f427;\n\tmov.b32 \t%r300, %f428;\n\t@%p1 st.global.v4.b32 [ %rd75 + 0 ], { %r297, %r298, %r299, %r300 };\n\tmov.b32 \t%r301, %f429;\n\tmov.b32 \t%r302, %f430;\n\tmov.b32 \t%r303, %f431;\n\tmov.b32 \t%r304, %f432;\n\t@%p1 st.global.v4.b32 [ %rd76 + 0 ], { %r301, %r302, %r303, %r304 };\n\tmov.b32 \t%r305, %f433;\n\tmov.b32 \t%r306, %f434;\n\tmov.b32 \t%r307, %f435;\n\tmov.b32 \t%r308, %f436;\n\t@%p1 st.global.v4.b32 [ %rd77 + 0 ], { %r305, %r306, %r307, %r308 };\n\tmov.b32 \t%r309, %f437;\n\tmov.b32 \t%r310, %f438;\n\tmov.b32 \t%r311, %f439;\n\tmov.b32 \t%r312, %f440;\n\t@%p1 st.global.v4.b32 [ %rd78 + 0 ], { %r309, %r310, %r311, %r312 };\n\tmov.b32 \t%r313, %f441;\n\tmov.b32 \t%r314, %f442;\n\tmov.b32 \t%r315, %f443;\n\tmov.b32 \t%r316, %f444;\n\t@%p1 st.global.v4.b32 [ %rd79 + 0 ], { %r313, %r314, %r315, %r316 };\n\tmov.b32 \t%r317, %f445;\n\tmov.b32 \t%r318, %f446;\n\tmov.b32 \t%r319, %f447;\n\tmov.b32 \t%r320, %f448;\n\t@%p1 st.global.v4.b32 [ %rd80 + 0 ], { %r317, %r318, %r319, %r320 };\n\tmov.b32 \t%r321, %f449;\n\tmov.b32 \t%r322, %f450;\n\tmov.b32 \t%r323, %f451;\n\tmov.b32 \t%r324, %f452;\n\t@%p1 st.global.v4.b32 [ %rd81 + 0 ], { %r321, %r322, %r323, %r324 };\n\tmov.b32 \t%r325, %f453;\n\tmov.b32 \t%r326, %f454;\n\tmov.b32 \t%r327, %f455;\n\tmov.b32 \t%r328, %f456;\n\t@%p1 st.global.v4.b32 [ %rd82 + 0 ], { %r325, %r326, %r327, %r328 };\n\tmov.b32 \t%r329, %f457;\n\tmov.b32 \t%r330, %f458;\n\tmov.b32 \t%r331, %f459;\n\tmov.b32 \t%r332, %f460;\n\t@%p1 st.global.v4.b32 [ %rd83 + 0 ], { %r329, %r330, %r331, %r332 };\n\tmov.b32 \t%r333, %f461;\n\tmov.b32 \t%r334, %f462;\n\tmov.b32 \t%r335, %f463;\n\tmov.b32 \t%r336, %f464;\n\t@%p1 st.global.v4.b32 [ %rd84 + 0 ], { %r333, %r334, %r335, %r336 };\n\tmov.b32 \t%r337, %f465;\n\tmov.b32 \t%r338, %f466;\n\tmov.b32 \t%r339, %f467;\n\tmov.b32 \t%r340, %f468;\n\t@%p1 st.global.v4.b32 [ %rd85 + 0 ], { %r337, %r338, %r339, %r340 };\n\tmov.b32 \t%r341, %f469;\n\tmov.b32 \t%r342, %f470;\n\tmov.b32 \t%r343, %f471;\n\tmov.b32 \t%r344, %f472;\n\t@%p1 st.global.v4.b32 [ %rd86 + 0 ], { %r341, %r342, %r343, %r344 };\n\tmov.b32 \t%r345, %f473;\n\tmov.b32 \t%r346, %f474;\n\tmov.b32 \t%r347, %f475;\n\tmov.b32 \t%r348, %f476;\n\t@%p1 st.global.v4.b32 [ %rd87 + 0 ], { %r345, %r346, %r347, %r348 };\n\tmov.b32 \t%r349, %f477;\n\tmov.b32 \t%r350, %f478;\n\tmov.b32 \t%r351, %f479;\n\tmov.b32 \t%r352, %f480;\n\t@%p1 st.global.v4.b32 [ %rd88 + 0 ], { %r349, %r350, %r351, %r352 };\n\tmov.b32 \t%r353, %f481;\n\tmov.b32 \t%r354, %f482;\n\tmov.b32 \t%r355, %f483;\n\tmov.b32 \t%r356, %f484;\n\t@%p1 st.global.v4.b32 [ %rd89 + 0 ], { %r353, %r354, %r355, %r356 };\n\tmov.b32 \t%r357, %f485;\n\tmov.b32 \t%r358, %f486;\n\tmov.b32 \t%r359, %f487;\n\tmov.b32 \t%r360, %f488;\n\t@%p1 st.global.v4.b32 [ %rd90 + 0 ], { %r357, %r358, %r359, %r360 };\n\tmov.b32 \t%r361, %f489;\n\tmov.b32 \t%r362, %f490;\n\tmov.b32 \t%r363, %f491;\n\tmov.b32 \t%r364, %f492;\n\t@%p1 st.global.v4.b32 [ %rd91 + 0 ], { %r361, %r362, %r363, %r364 };\n\tmov.b32 \t%r365, %f493;\n\tmov.b32 \t%r366, %f494;\n\tmov.b32 \t%r367, %f495;\n\tmov.b32 \t%r368, %f496;\n\t@%p1 st.global.v4.b32 [ %rd92 + 0 ], { %r365, %r366, %r367, %r368 };\n\tmov.b32 \t%r369, %f497;\n\tmov.b32 \t%r370, %f498;\n\tmov.b32 \t%r371, %f499;\n\tmov.b32 \t%r372, %f500;\n\t@%p1 st.global.v4.b32 [ %rd93 + 0 ], { %r369, %r370, %r371, %r372 };\n\tmov.b32 \t%r373, %f501;\n\tmov.b32 \t%r374, %f502;\n\tmov.b32 \t%r375, %f503;\n\tmov.b32 \t%r376, %f504;\n\t@%p1 st.global.v4.b32 [ %rd94 + 0 ], { %r373, %r374, %r375, %r376 };\n\tmov.b32 \t%r377, %f505;\n\tmov.b32 \t%r378, %f506;\n\tmov.b32 \t%r379, %f507;\n\tmov.b32 \t%r380, %f508;\n\t@%p1 st.global.v4.b32 [ %rd95 + 0 ], { %r377, %r378, %r379, %r380 };\n\tmov.b32 \t%r381, %f509;\n\tmov.b32 \t%r382, %f510;\n\tmov.b32 \t%r383, %f511;\n\tmov.b32 \t%r384, %f512;\n\t@%p1 st.global.v4.b32 [ %rd96 + 0 ], { %r381, %r382, %r383, %r384 };\n\tret;\n\n}\n', 'module {\n tt.func public @add(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: !tt.ptr {tt.divisibility = 16 : i32}, %arg2: !tt.ptr {tt.divisibility = 16 : i32}) attributes {noinline = false} {\n %0 = tt.get_program_id x : i32\n %1 = tt.get_program_id y : i32\n %2 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>\n %3 = tt.expand_dims %2 {axis = 1 : i32} : (tensor<128xi32>) -> tensor<128x1xi32>\n %c128_i32 = arith.constant 128 : i32\n %cst = arith.constant dense<128> : tensor<128x1xi32>\n %4 = arith.muli %3, %cst : tensor<128x1xi32>\n %5 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>\n %6 = tt.expand_dims %5 {axis = 0 : i32} : (tensor<128xi32>) -> tensor<1x128xi32>\n %c1_i32 = arith.constant 1 : i32\n %cst_0 = arith.constant dense<1> : tensor<1x128xi32>\n %7 = arith.muli %6, %cst_0 : tensor<1x128xi32>\n %8 = tt.broadcast %4 : (tensor<128x1xi32>) -> tensor<128x128xi32>\n %9 = tt.broadcast %7 : (tensor<1x128xi32>) -> tensor<128x128xi32>\n %10 = tt.splat %arg0 : (!tt.ptr) -> tensor<128x128x!tt.ptr>\n %11 = tt.addptr %10, %8 : tensor<128x128x!tt.ptr>, tensor<128x128xi32>\n %12 = tt.addptr %11, %9 : tensor<128x128x!tt.ptr>, tensor<128x128xi32>\n %13 = tt.load %12 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<128x128xf32>\n %14 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>\n %15 = tt.expand_dims %14 {axis = 1 : i32} : (tensor<128xi32>) -> tensor<128x1xi32>\n %c128_i32_1 = arith.constant 128 : i32\n %cst_2 = arith.constant dense<128> : tensor<128x1xi32>\n %16 = arith.muli %15, %cst_2 : tensor<128x1xi32>\n %17 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>\n %18 = tt.expand_dims %17 {axis = 0 : i32} : (tensor<128xi32>) -> tensor<1x128xi32>\n %c1_i32_3 = arith.constant 1 : i32\n %cst_4 = arith.constant dense<1> : tensor<1x128xi32>\n %19 = arith.muli %18, %cst_4 : tensor<1x128xi32>\n %20 = tt.broadcast %16 : (tensor<128x1xi32>) -> tensor<128x128xi32>\n %21 = tt.broadcast %19 : (tensor<1x128xi32>) -> tensor<128x128xi32>\n %22 = tt.splat %arg1 : (!tt.ptr) -> tensor<128x128x!tt.ptr>\n %23 = tt.addptr %22, %20 : tensor<128x128x!tt.ptr>, tensor<128x128xi32>\n %24 = tt.addptr %23, %21 : tensor<128x128x!tt.ptr>, tensor<128x128xi32>\n %25 = tt.load %24 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<128x128xf32>\n %26 = arith.addf %13, %25 : tensor<128x128xf32>\n %cst_5 = arith.constant 2.000000e+00 : f32\n %cst_6 = arith.constant dense<2.000000e+00> : tensor<128x128xf32>\n %27 = arith.mulf %26, %cst_6 : tensor<128x128xf32>\n %28 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>\n %29 = tt.expand_dims %28 {axis = 1 : i32} : (tensor<128xi32>) -> tensor<128x1xi32>\n %c128_i32_7 = arith.constant 128 : i32\n %cst_8 = arith.constant dense<128> : tensor<128x1xi32>\n %30 = arith.muli %29, %cst_8 : tensor<128x1xi32>\n %31 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>\n %32 = tt.expand_dims %31 {axis = 0 : i32} : (tensor<128xi32>) -> tensor<1x128xi32>\n %c1_i32_9 = arith.constant 1 : i32\n %cst_10 = arith.constant dense<1> : tensor<1x128xi32>\n %33 = arith.muli %32, %cst_10 : tensor<1x128xi32>\n %34 = tt.broadcast %30 : (tensor<128x1xi32>) -> tensor<128x128xi32>\n %35 = tt.broadcast %33 : (tensor<1x128xi32>) -> tensor<128x128xi32>\n %36 = tt.splat %arg2 : (!tt.ptr) -> tensor<128x128x!tt.ptr>\n %37 = tt.addptr %36, %34 : tensor<128x128x!tt.ptr>, tensor<128x128xi32>\n %38 = tt.addptr %37, %35 : tensor<128x128x!tt.ptr>, tensor<128x128xi32>\n tt.store %38, %27 {cache = 1 : i32, evict = 1 : i32} : tensor<128x128xf32>\n tt.return\n }\n}\n', 89 Process finished with exit code 1