ONNX MLIRでONNXモデルを変換する
前回の記事でビルドしたONNX MLIRを使って、ONNX Model Zooで配布されている学習済みモデルを変換する。
環境変数パスの設定
ONNX MLIRのビルドディレクトリは~/workspace/onnx-mlir/build
とする。
~/workspace/onnx-mlir/build/bin
以下に次の2つの実行ファイルがあるはず。
onnx-mlir
- おそらくコンパイラのフロントエンドに相当?
onnx-mlir-opt
- コンパイラPassをテストするためのツール
この実行ファイルをコマンドで実行できるように環境変数パスにディレクトリを追加する。
export PATH=$PATH:~/workspace/onnx-mlir/build/bin
ONNXモデルを取得
例としてMNISTの学習済みモデルを取得する。
cd ~/workspace
wget https://github.com/onnx/models/raw/master/vision/classification/mnist/model/mnist-8.onnx
変換実行
shared libraryへ変換
$ nnx-mlir --EmitLib mnist-8.onnx
%22 = "krnl.getref"(%21, %c0_i64) : (memref<4096xi8>, i64) -> memref<1x1x32x32xf32>
%20 = "krnl.getref"(%19, %c0_i64) : (memref<25088xi8>, i64) -> memref<1x8x28x28xf32>
%18 = "krnl.getref"(%17, %c0_i64) : (memref<25088xi8>, i64) -> memref<1x8x28x28xf32>
%16 = "krnl.getref"(%15, %c0_i64) : (memref<25088xi8>, i64) -> memref<1x8x28x28xf32>
%14 = "krnl.getref"(%13, %c0_i64) : (memref<6272xi8>, i64) -> memref<1x8x14x14xf32>
%12 = "krnl.getref"(%11, %c0_i64) : (memref<10368xi8>, i64) -> memref<1x8x18x18xf32>
%10 = "krnl.getref"(%9, %c0_i64) : (memref<12544xi8>, i64) -> memref<1x16x14x14xf32>
%8 = "krnl.getref"(%7, %c0_i64) : (memref<12544xi8>, i64) -> memref<1x16x14x14xf32>
%6 = "krnl.getref"(%5, %c0_i64) : (memref<12544xi8>, i64) -> memref<1x16x14x14xf32>
%4 = "krnl.getref"(%3, %c0_i64) : (memref<1024xi8>, i64) -> memref<1x16x4x4xf32>
%2 = "krnl.getref"(%1, %c0_i64) : (memref<1024xi8>, i64) -> memref<1x256xf32>
shared libraryがmnist-8.so
が生成される。
現時点でドキュメントはほとんど整備されていないので、テストコード等から使い方を解析する必要がある。
MLIRのtransformation dialectへ変換
$ onnx-mlir --EmitMLIR mnist-8.onnx
%22 = "krnl.getref"(%21, %c0_i64) : (memref<4096xi8>, i64) -> memref<1x1x32x32xf32>
%20 = "krnl.getref"(%19, %c0_i64) : (memref<25088xi8>, i64) -> memref<1x8x28x28xf32>
%18 = "krnl.getref"(%17, %c0_i64) : (memref<25088xi8>, i64) -> memref<1x8x28x28xf32>
%16 = "krnl.getref"(%15, %c0_i64) : (memref<25088xi8>, i64) -> memref<1x8x28x28xf32>
%14 = "krnl.getref"(%13, %c0_i64) : (memref<6272xi8>, i64) -> memref<1x8x14x14xf32>
%12 = "krnl.getref"(%11, %c0_i64) : (memref<10368xi8>, i64) -> memref<1x8x18x18xf32>
%10 = "krnl.getref"(%9, %c0_i64) : (memref<12544xi8>, i64) -> memref<1x16x14x14xf32>
%8 = "krnl.getref"(%7, %c0_i64) : (memref<12544xi8>, i64) -> memref<1x16x14x14xf32>
%6 = "krnl.getref"(%5, %c0_i64) : (memref<12544xi8>, i64) -> memref<1x16x14x14xf32>
%4 = "krnl.getref"(%3, %c0_i64) : (memref<1024xi8>, i64) -> memref<1x16x4x4xf32>
%2 = "krnl.getref"(%1, %c0_i64) : (memref<1024xi8>, i64) -> memref<1x256xf32>
Full MLIR code written to:
mnist-8.onnx.mlir
Constant-free MLIR Code written to:
mnist-8.tmp
Use:
mnist-8.onnx.mlir
to continue lowering the code to other dialects.
mnist-8.onnx.mlir
にtransformation dialectが出力される。
cat mnist-8.onnx.mlir
は以下。
$ cat mnist-8.onnx.mlir
#map0 = affine_map<()[s0, s1, s2, s3] -> (s0, s1, s2, s3)>
#map1 = affine_map<() -> (0)>
#map2 = affine_map<() -> (32)>
#map3 = affine_map<() -> (1)>
#map4 = affine_map<()[s0, s1, s2, s3] -> (s0, s1, s2 + 2, s3 + 2)>
#map5 = affine_map<() -> (28)>
#map6 = affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s2 + s3, s4 + s5)>
#map7 = affine_map<() -> (5)>
#map8 = affine_map<() -> (8)>
#map9 = affine_map<()[s0, s1, s2] -> (0, s0, s1, s2)>
#map10 = affine_map<()[s0] -> (s0, 0, 0)>
#map11 = affine_map<()[s0] -> (0, s0 * 2)>
#map12 = affine_map<(d0) -> (28, d0 * -2 + 28, d0 * 2 + 2, 2)>
#map13 = affine_map<() -> (14)>
#map14 = affine_map<() -> (18)>
#map15 = affine_map<() -> (16)>
#map16 = affine_map<()[s0] -> (0, s0 * 3)>
#map17 = affine_map<(d0) -> (14, d0 * -3 + 14, d0 * 3 + 3, 3)>
#map18 = affine_map<() -> (4)>
#map19 = affine_map<()[s0, s1] -> (s0, s1)>
#map20 = affine_map<() -> (256)>
#map21 = affine_map<()[s0] -> (0, s0)>
#map22 = affine_map<() -> (10)>
module {
%0 = "krnl.packed_const"() {file_name = "/tmp/packed_const-85df46.tmp", is_le = true, size_in_bytes = 23840 : i64} : () -> i64
func @main_graph(%arg0: memref<1x1x28x28xf32>) -> memref<1x10xf32> {
%c28 = constant 28 : index
%c2 = constant 2 : index
%cst = constant 0xFF800000 : f32
%c14 = constant 14 : index
%c3 = constant 3 : index
%c1 = constant 1 : index
%c1024_i64 = constant 1024 : i64
%cst_0 = constant 1.000000e+00 : f32
%cst_1 = constant 0.000000e+00 : f32
%c0 = constant 0 : index
%c0_i64 = constant 0 : i64
%c10240_i64 = constant 10240 : i64
%c14336_i64 = constant 14336 : i64
%c39424_i64 = constant 39424 : i64
%c64512_i64 = constant 64512 : i64
%c89600_i64 = constant 89600 : i64
%c95872_i64 = constant 95872 : i64
%c106240_i64 = constant 106240 : i64
%c118784_i64 = constant 118784 : i64
%c131328_i64 = constant 131328 : i64
%c143872_i64 = constant 143872 : i64
%c144896_i64 = constant 144896 : i64
%1 = alloc() : memref<1x10xf32>
%2 = alloc() : memref<145920xi8>
%3 = "krnl.getref"(%2, %c144896_i64) : (memref<145920xi8>, i64) -> memref<1x256xf32>
%4 = "krnl.getref"(%2, %c143872_i64) : (memref<145920xi8>, i64) -> memref<1x16x4x4xf32>
%5 = "krnl.getref"(%2, %c131328_i64) : (memref<145920xi8>, i64) -> memref<1x16x14x14xf32>
%6 = "krnl.getref"(%2, %c118784_i64) : (memref<145920xi8>, i64) -> memref<1x16x14x14xf32>
%7 = "krnl.getref"(%2, %c106240_i64) : (memref<145920xi8>, i64) -> memref<1x16x14x14xf32>
%8 = "krnl.getref"(%2, %c95872_i64) : (memref<145920xi8>, i64) -> memref<1x8x18x18xf32>
%9 = "krnl.getref"(%2, %c89600_i64) : (memref<145920xi8>, i64) -> memref<1x8x14x14xf32>
%10 = "krnl.getref"(%2, %c64512_i64) : (memref<145920xi8>, i64) -> memref<1x8x28x28xf32>
%11 = "krnl.getref"(%2, %c39424_i64) : (memref<145920xi8>, i64) -> memref<1x8x28x28xf32>
%12 = "krnl.getref"(%2, %c14336_i64) : (memref<145920xi8>, i64) -> memref<1x8x28x28xf32>
%13 = "krnl.getref"(%2, %c10240_i64) : (memref<145920xi8>, i64) -> memref<1x1x32x32xf32>
%14 = "krnl.getref"(%2, %c0_i64) : (memref<145920xi8>, i64) -> memref<256x10xf32>
%15 = "krnl.global"() {name = "constant_0", offset = 0 : i64, shape = [16, 4, 4, 10]} : () -> memref<16x4x4x10xf32>
"krnl.memcpy"(%14, %15, %c10240_i64) : (memref<256x10xf32>, memref<16x4x4x10xf32>, i64) -> ()
%16 = "krnl.global"() {name = "constant_1", offset = 10240 : i64, shape = [8, 1, 5, 5]} : () -> memref<8x1x5x5xf32>
affine.for %arg1 = 0 to 1 {
affine.for %arg2 = 0 to 1 {
affine.for %arg3 = 0 to 32 {
affine.for %arg4 = 0 to 32 {
affine.store %cst_1, %13[symbol(%arg1), symbol(%arg2), symbol(%arg3), symbol(%arg4)] : memref<1x1x32x32xf32>
}
}
}
}
affine.for %arg1 = 0 to 1 {
affine.for %arg2 = 0 to 1 {
affine.for %arg3 = 0 to 28 {
affine.for %arg4 = 0 to 28 {
%21 = affine.load %arg0[symbol(%arg1), symbol(%arg2), symbol(%arg3), symbol(%arg4)] : memref<1x1x28x28xf32>
affine.store %21, %13[symbol(%arg1), symbol(%arg2), symbol(%arg3) + 2, symbol(%arg4) + 2] : memref<1x1x32x32xf32>
}
}
}
}
affine.for %arg1 = 0 to 1 {
affine.for %arg2 = 0 to 8 {
affine.for %arg3 = 0 to 28 {
affine.for %arg4 = 0 to 28 {
affine.store %cst_1, %12[symbol(%arg1), symbol(%arg2), symbol(%arg3), symbol(%arg4)] : memref<1x8x28x28xf32>
affine.for %arg5 = 0 to 1 {
affine.for %arg6 = 0 to 5 {
affine.for %arg7 = 0 to 5 {
%21 = affine.load %13[symbol(%arg1), symbol(%arg5), symbol(%arg3) + symbol(%arg6), symbol(%arg4) + symbol(%arg7)] : memref<1x1x32x32xf32>
%22 = affine.load %16[symbol(%arg2), symbol(%arg5), symbol(%arg6), symbol(%arg7)] : memref<8x1x5x5xf32>
%23 = affine.load %12[symbol(%arg1), symbol(%arg2), symbol(%arg3), symbol(%arg4)] : memref<1x8x28x28xf32>
%24 = mulf %21, %22 : f32
%25 = addf %23, %24 : f32
affine.store %25, %12[symbol(%arg1), symbol(%arg2), symbol(%arg3), symbol(%arg4)] : memref<1x8x28x28xf32>
}
}
}
}
}
}
}
%17 = "krnl.global"() {name = "constant_2", offset = 11040 : i64, shape = [8, 1, 1], value = dense<[[[-0.161539719]], [[-0.433835655]], [[0.091641359]], [[-0.0168522168]], [[-0.0650264397]], [[-0.131737873]], [[0.0204175506]], [[-0.121110231]]]> : tensor<8x1x1xf32>} : () -> memref<8x1x1xf32>
affine.for %arg1 = 0 to 1 {
affine.for %arg2 = 0 to 8 {
affine.for %arg3 = 0 to 28 {
affine.for %arg4 = 0 to 28 {
%21 = affine.load %12[0, symbol(%arg2), symbol(%arg3), symbol(%arg4)] : memref<1x8x28x28xf32>
%22 = affine.load %17[symbol(%arg2), 0, 0] : memref<8x1x1xf32>
%23 = addf %21, %22 : f32
affine.store %23, %11[symbol(%arg1), symbol(%arg2), symbol(%arg3), symbol(%arg4)] : memref<1x8x28x28xf32>
}
}
}
}
affine.for %arg1 = 0 to 1 {
affine.for %arg2 = 0 to 8 {
affine.for %arg3 = 0 to 28 {
affine.for %arg4 = 0 to 28 {
%21 = affine.load %11[symbol(%arg1), symbol(%arg2), symbol(%arg3), symbol(%arg4)] : memref<1x8x28x28xf32>
%22 = cmpf "olt", %21, %cst_1 : f32
%23 = select %22, %cst_1, %21 : f32
affine.store %23, %10[symbol(%arg1), symbol(%arg2), symbol(%arg3), symbol(%arg4)] : memref<1x8x28x28xf32>
}
}
}
}
affine.for %arg1 = 0 to 1 {
affine.for %arg2 = 0 to 8 {
affine.for %arg3 = 0 to 14 {
affine.for %arg4 = 0 to 14 {
affine.store %cst, %9[symbol(%arg1), symbol(%arg2), symbol(%arg3), symbol(%arg4)] : memref<1x8x14x14xf32>
%21 = affine.max #map11()[%arg3]
%22 = affine.max #map11()[%arg4]
affine.for %arg5 = 0 to min #map12(%arg3) {
affine.for %arg6 = 0 to min #map12(%arg4) {
%23 = addi %arg5, %21 : index
%24 = addi %arg6, %22 : index
%25 = load %10[%arg1, %arg2, %23, %24] : memref<1x8x28x28xf32>
%26 = affine.load %9[symbol(%arg1), symbol(%arg2), symbol(%arg3), symbol(%arg4)] : memref<1x8x14x14xf32>
%27 = cmpf "ogt", %26, %25 : f32
%28 = select %27, %26, %25 : f32
affine.store %28, %9[symbol(%arg1), symbol(%arg2), symbol(%arg3), symbol(%arg4)] : memref<1x8x14x14xf32>
}
}
}
}
}
}
%18 = "krnl.global"() {name = "constant_3", offset = 11040 : i64, shape = [16, 8, 5, 5]} : () -> memref<16x8x5x5xf32>
affine.for %arg1 = 0 to 1 {
affine.for %arg2 = 0 to 8 {
affine.for %arg3 = 0 to 18 {
affine.for %arg4 = 0 to 18 {
affine.store %cst_1, %8[symbol(%arg1), symbol(%arg2), symbol(%arg3), symbol(%arg4)] : memref<1x8x18x18xf32>
}
}
}
}
affine.for %arg1 = 0 to 1 {
affine.for %arg2 = 0 to 8 {
affine.for %arg3 = 0 to 14 {
affine.for %arg4 = 0 to 14 {
%21 = affine.load %9[symbol(%arg1), symbol(%arg2), symbol(%arg3), symbol(%arg4)] : memref<1x8x14x14xf32>
affine.store %21, %8[symbol(%arg1), symbol(%arg2), symbol(%arg3) + 2, symbol(%arg4) + 2] : memref<1x8x18x18xf32>
}
}
}
}
affine.for %arg1 = 0 to 1 {
affine.for %arg2 = 0 to 16 {
affine.for %arg3 = 0 to 14 {
affine.for %arg4 = 0 to 14 {
affine.store %cst_1, %7[symbol(%arg1), symbol(%arg2), symbol(%arg3), symbol(%arg4)] : memref<1x16x14x14xf32>
affine.for %arg5 = 0 to 8 {
affine.for %arg6 = 0 to 5 {
affine.for %arg7 = 0 to 5 {
%21 = affine.load %8[symbol(%arg1), symbol(%arg5), symbol(%arg3) + symbol(%arg6), symbol(%arg4) + symbol(%arg7)] : memref<1x8x18x18xf32>
%22 = affine.load %18[symbol(%arg2), symbol(%arg5), symbol(%arg6), symbol(%arg7)] : memref<16x8x5x5xf32>
%23 = affine.load %7[symbol(%arg1), symbol(%arg2), symbol(%arg3), symbol(%arg4)] : memref<1x16x14x14xf32>
%24 = mulf %21, %22 : f32
%25 = addf %23, %24 : f32
affine.store %25, %7[symbol(%arg1), symbol(%arg2), symbol(%arg3), symbol(%arg4)] : memref<1x16x14x14xf32>
}
}
}
}
}
}
}
%19 = "krnl.global"() {name = "constant_4", offset = 23840 : i64, shape = [16, 1, 1], value = dense<[[[-0.0822488219]], [[-0.108868778]], [[-0.141039595]], [[-0.204869166]], [[-0.17913565]], [[-0.215438381]], [[-0.133805066]], [[-0.195724562]], [[-0.268250644]], [[-0.258212209]], [[-0.0761560649]], [[0.0132841459]], [[-0.00444464432]], [[-0.414740831]], [[-0.17879115]], [[-0.0386558883]]]> : tensor<16x1x1xf32>} : () -> memref<16x1x1xf32>
affine.for %arg1 = 0 to 1 {
affine.for %arg2 = 0 to 16 {
affine.for %arg3 = 0 to 14 {
affine.for %arg4 = 0 to 14 {
%21 = affine.load %7[0, symbol(%arg2), symbol(%arg3), symbol(%arg4)] : memref<1x16x14x14xf32>
%22 = affine.load %19[symbol(%arg2), 0, 0] : memref<16x1x1xf32>
%23 = addf %21, %22 : f32
affine.store %23, %6[symbol(%arg1), symbol(%arg2), symbol(%arg3), symbol(%arg4)] : memref<1x16x14x14xf32>
}
}
}
}
affine.for %arg1 = 0 to 1 {
affine.for %arg2 = 0 to 16 {
affine.for %arg3 = 0 to 14 {
affine.for %arg4 = 0 to 14 {
%21 = affine.load %6[symbol(%arg1), symbol(%arg2), symbol(%arg3), symbol(%arg4)] : memref<1x16x14x14xf32>
%22 = cmpf "olt", %21, %cst_1 : f32
%23 = select %22, %cst_1, %21 : f32
affine.store %23, %5[symbol(%arg1), symbol(%arg2), symbol(%arg3), symbol(%arg4)] : memref<1x16x14x14xf32>
}
}
}
}
affine.for %arg1 = 0 to 1 {
affine.for %arg2 = 0 to 16 {
affine.for %arg3 = 0 to 4 {
affine.for %arg4 = 0 to 4 {
affine.store %cst, %4[symbol(%arg1), symbol(%arg2), symbol(%arg3), symbol(%arg4)] : memref<1x16x4x4xf32>
%21 = affine.max #map16()[%arg3]
%22 = affine.max #map16()[%arg4]
affine.for %arg5 = 0 to min #map17(%arg3) {
affine.for %arg6 = 0 to min #map17(%arg4) {
%23 = addi %arg5, %21 : index
%24 = addi %arg6, %22 : index
%25 = load %5[%arg1, %arg2, %23, %24] : memref<1x16x14x14xf32>
%26 = affine.load %4[symbol(%arg1), symbol(%arg2), symbol(%arg3), symbol(%arg4)] : memref<1x16x4x4xf32>
%27 = cmpf "ogt", %26, %25 : f32
%28 = select %27, %26, %25 : f32
affine.store %28, %4[symbol(%arg1), symbol(%arg2), symbol(%arg3), symbol(%arg4)] : memref<1x16x4x4xf32>
}
}
}
}
}
}
"krnl.memcpy"(%3, %4, %c1024_i64) : (memref<1x256xf32>, memref<1x16x4x4xf32>, i64) -> ()
%20 = "krnl.global"() {name = "constant_5", offset = 23840 : i64, shape = [1, 10], value = dense<[[-0.0448560268, 0.00779166119, 0.0681008175, 0.0299937408, -0.126409635, 0.14021875, -0.0552849025, -0.0493838154, 0.0843220502, -0.0545404144]]> : tensor<1x10xf32>} : () -> memref<1x10xf32>
affine.for %arg1 = 0 to 1 {
affine.for %arg2 = 0 to 10 {
affine.store %cst_1, %1[symbol(%arg1), symbol(%arg2)] : memref<1x10xf32>
affine.for %arg3 = 0 to 256 {
%26 = affine.load %3[symbol(%arg1), symbol(%arg3)] : memref<1x256xf32>
%27 = affine.load %14[symbol(%arg3), symbol(%arg2)] : memref<256x10xf32>
%28 = affine.load %1[symbol(%arg1), symbol(%arg2)] : memref<1x10xf32>
%29 = mulf %26, %27 : f32
%30 = addf %28, %29 : f32
affine.store %30, %1[symbol(%arg1), symbol(%arg2)] : memref<1x10xf32>
}
%21 = affine.load %1[symbol(%arg1), symbol(%arg2)] : memref<1x10xf32>
%22 = mulf %cst_0, %21 : f32
%23 = affine.load %20[0, symbol(%arg2)] : memref<1x10xf32>
%24 = mulf %cst_0, %23 : f32
%25 = addf %22, %24 : f32
affine.store %25, %1[symbol(%arg1), symbol(%arg2)] : memref<1x10xf32>
}
}
dealloc %2 : memref<145920xi8>
return %1 : memref<1x10xf32>
}
"krnl.entry_point"() {func = @main_graph, numInputs = 1 : i32, numOutputs = 1 : i32} : () -> ()
}
transformation dialectからLLVM dialectへ変換
上記で出力したtransformation dialectをLLVM dialectへ変換する。
$ onnx-mlir-opt --convert-krnl-to-llvm mnist-8.onnx.mlir
出力は以下。2000行以上ある。
module {
llvm.func @setDType(!llvm<"i8*">, !llvm.i32)
llvm.func @getDType(!llvm<"i8*">) -> !llvm.i32
llvm.func @getStrides(!llvm<"i8*">) -> !llvm<"i64*">
llvm.func @getSizes(!llvm<"i8*">) -> !llvm<"i64*">
llvm.func @setRtMemRef(!llvm<"i8*">, !llvm.i32, !llvm<"i8*">)
llvm.func @getRtMemRef(!llvm<"i8*">, !llvm.i32) -> !llvm<"i8*">
llvm.func @setData(!llvm<"i8*">, !llvm<"i8*">)
llvm.func @getData(!llvm<"i8*">) -> !llvm<"i8*">
llvm.func @createRtMemRef(!llvm.i32) -> !llvm<"i8*">
llvm.func @createOrderedRtMemRefDict() -> !llvm<"i8*">
llvm.func @free(!llvm<"i8*">)
llvm.mlir.global internal constant @constant_5(dense<[[-0.0448560268, 0.00779166119, 0.0681008175, 0.0299937408, -0.126409635, 0.14021875, -0.0552849025, -0.0493838154, 0.0843220502, -0.0545404144]]> : tensor<1x10xf32>) : !llvm<"[1 x [10 x float]]">
llvm.mlir.global internal constant @constant_4(dense<[[[-0.0822488219]], [[-0.108868778]], [[-0.141039595]], [[-0.204869166]], [[-0.17913565]], [[-0.215438381]], [[-0.133805066]], [[-0.195724562]], [[-0.268250644]], [[-0.258212209]], [[-0.0761560649]], [[0.0132841459]], [[-0.00444464432]], [[-0.414740831]], [[-0.17879115]], [[-0.0386558883]]]> : tensor<16x1x1xf32>) : !llvm<"[16 x [1 x [1 x float]]]">
llvm.mlir.global internal constant @constant_2(dense<[[[-0.161539719]], [[-0.433835655]], [[0.091641359]], [[-0.0168522168]], [[-0.0650264397]], [[-0.131737873]], [[0.0204175506]], [[-0.121110231]]]> : tensor<8x1x1xf32>) : !llvm<"[8 x [1 x [1 x float]]]">
llvm.func @llvm.memcpy.p0i8.p0i8.i64(!llvm<"i8*">, !llvm<"i8*">, !llvm.i64, !llvm.i1)
llvm.func @malloc(!llvm.i64) -> !llvm<"i8*">
llvm.mlir.global external constant @constPackFilePath("/tmp/packed_const-85df46.tmp")
llvm.mlir.global external constant @constPackFilePathStrLen(28 : i64) : !llvm.i64
llvm.mlir.global external constant @constPackFileName("packed_const-85df46.tmp")
llvm.mlir.global external constant @constPackFileNameStrLen(23 : i64) : !llvm.i64
llvm.mlir.global external constant @constPackIsLE(1 : i8) : !llvm.i8
llvm.func @getEmbeddedConstPool(!llvm.i64) -> !llvm<"i8*">
llvm.mlir.global internal @packedConst() : !llvm<"i8*">
llvm.func @main_graph(%arg0: !llvm<"float*">, %arg1: !llvm<"float*">, %arg2: !llvm.i64, %arg3: !llvm.i64, %arg4: !llvm.i64, %arg5: !llvm.i64, %arg6: !llvm.i64, %arg7: !llvm.i64, %arg8: !llvm.i64, %arg9: !llvm.i64, %arg10: !llvm.i64) -> !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> {
%0 = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%1 = llvm.insertvalue %arg0, %0[0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%2 = llvm.insertvalue %arg1, %1[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%3 = llvm.insertvalue %arg2, %2[2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%4 = llvm.insertvalue %arg3, %3[3, 0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%5 = llvm.insertvalue %arg7, %4[4, 0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%6 = llvm.insertvalue %arg4, %5[3, 1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%7 = llvm.insertvalue %arg8, %6[4, 1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%8 = llvm.insertvalue %arg5, %7[3, 2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%9 = llvm.insertvalue %arg9, %8[4, 2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%10 = llvm.insertvalue %arg6, %9[3, 3] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%11 = llvm.insertvalue %arg10, %10[4, 3] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%12 = llvm.mlir.addressof @packedConst : !llvm<"i8**">
%13 = llvm.mlir.constant(23840 : i64) : !llvm.i64
%14 = llvm.call @getEmbeddedConstPool(%13) : (!llvm.i64) -> !llvm<"i8*">
llvm.store %14, %12 : !llvm<"i8**">
%15 = llvm.mlir.constant(28 : index) : !llvm.i64
%16 = llvm.mlir.constant(2 : index) : !llvm.i64
%17 = llvm.mlir.constant(0xFF800000 : f32) : !llvm.float
%18 = llvm.mlir.constant(14 : index) : !llvm.i64
%19 = llvm.mlir.constant(3 : index) : !llvm.i64
%20 = llvm.mlir.constant(1 : index) : !llvm.i64
%21 = llvm.mlir.constant(1024 : i64) : !llvm.i64
%22 = llvm.mlir.constant(1.000000e+00 : f32) : !llvm.float
%23 = llvm.mlir.constant(0.000000e+00 : f32) : !llvm.float
%24 = llvm.mlir.constant(0 : index) : !llvm.i64
%25 = llvm.mlir.constant(0 : i64) : !llvm.i64
%26 = llvm.mlir.constant(10240 : i64) : !llvm.i64
%27 = llvm.mlir.constant(14336 : i64) : !llvm.i64
%28 = llvm.mlir.constant(39424 : i64) : !llvm.i64
%29 = llvm.mlir.constant(64512 : i64) : !llvm.i64
%30 = llvm.mlir.constant(89600 : i64) : !llvm.i64
%31 = llvm.mlir.constant(95872 : i64) : !llvm.i64
%32 = llvm.mlir.constant(106240 : i64) : !llvm.i64
%33 = llvm.mlir.constant(118784 : i64) : !llvm.i64
%34 = llvm.mlir.constant(131328 : i64) : !llvm.i64
%35 = llvm.mlir.constant(143872 : i64) : !llvm.i64
%36 = llvm.mlir.constant(144896 : i64) : !llvm.i64
%37 = llvm.mlir.constant(1 : index) : !llvm.i64
%38 = llvm.mlir.constant(10 : index) : !llvm.i64
%39 = llvm.mul %37, %38 : !llvm.i64
%40 = llvm.mlir.null : !llvm<"float*">
%41 = llvm.mlir.constant(1 : index) : !llvm.i64
%42 = llvm.getelementptr %40[%41] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
%43 = llvm.ptrtoint %42 : !llvm<"float*"> to !llvm.i64
%44 = llvm.mul %39, %43 : !llvm.i64
%45 = llvm.call @malloc(%44) : (!llvm.i64) -> !llvm<"i8*">
%46 = llvm.bitcast %45 : !llvm<"i8*"> to !llvm<"float*">
%47 = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%48 = llvm.insertvalue %46, %47[0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%49 = llvm.insertvalue %46, %48[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%50 = llvm.mlir.constant(0 : index) : !llvm.i64
%51 = llvm.insertvalue %50, %49[2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%52 = llvm.mlir.constant(1 : index) : !llvm.i64
%53 = llvm.mlir.constant(10 : index) : !llvm.i64
%54 = llvm.insertvalue %37, %51[3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%55 = llvm.insertvalue %53, %54[4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%56 = llvm.insertvalue %38, %55[3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%57 = llvm.insertvalue %52, %56[4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%58 = llvm.mlir.constant(145920 : index) : !llvm.i64
%59 = llvm.mlir.null : !llvm<"i8*">
%60 = llvm.mlir.constant(1 : index) : !llvm.i64
%61 = llvm.getelementptr %59[%60] : (!llvm<"i8*">, !llvm.i64) -> !llvm<"i8*">
%62 = llvm.ptrtoint %61 : !llvm<"i8*"> to !llvm.i64
%63 = llvm.mul %58, %62 : !llvm.i64
%64 = llvm.call @malloc(%63) : (!llvm.i64) -> !llvm<"i8*">
%65 = llvm.bitcast %64 : !llvm<"i8*"> to !llvm<"i8*">
%66 = llvm.mlir.undef : !llvm<"{ i8*, i8*, i64, [1 x i64], [1 x i64] }">
%67 = llvm.insertvalue %65, %66[0] : !llvm<"{ i8*, i8*, i64, [1 x i64], [1 x i64] }">
%68 = llvm.insertvalue %65, %67[1] : !llvm<"{ i8*, i8*, i64, [1 x i64], [1 x i64] }">
%69 = llvm.mlir.constant(0 : index) : !llvm.i64
%70 = llvm.insertvalue %69, %68[2] : !llvm<"{ i8*, i8*, i64, [1 x i64], [1 x i64] }">
%71 = llvm.mlir.constant(1 : index) : !llvm.i64
%72 = llvm.insertvalue %58, %70[3, 0] : !llvm<"{ i8*, i8*, i64, [1 x i64], [1 x i64] }">
%73 = llvm.insertvalue %71, %72[4, 0] : !llvm<"{ i8*, i8*, i64, [1 x i64], [1 x i64] }">
%74 = llvm.extractvalue %73[1] : !llvm<"{ i8*, i8*, i64, [1 x i64], [1 x i64] }">
%75 = llvm.getelementptr %74[%36] : (!llvm<"i8*">, !llvm.i64) -> !llvm<"i8*">
%76 = llvm.bitcast %75 : !llvm<"i8*"> to !llvm<"float*">
%77 = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%78 = llvm.insertvalue %76, %77[0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%79 = llvm.insertvalue %76, %78[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%80 = llvm.mlir.constant(0 : index) : !llvm.i64
%81 = llvm.insertvalue %80, %79[2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%82 = llvm.mlir.constant(1 : index) : !llvm.i64
%83 = llvm.insertvalue %82, %81[3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%84 = llvm.mlir.constant(256 : index) : !llvm.i64
%85 = llvm.insertvalue %84, %83[4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%86 = llvm.mlir.constant(256 : index) : !llvm.i64
%87 = llvm.insertvalue %86, %85[3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%88 = llvm.mlir.constant(1 : index) : !llvm.i64
%89 = llvm.insertvalue %88, %87[4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%90 = llvm.extractvalue %73[1] : !llvm<"{ i8*, i8*, i64, [1 x i64], [1 x i64] }">
%91 = llvm.getelementptr %90[%35] : (!llvm<"i8*">, !llvm.i64) -> !llvm<"i8*">
%92 = llvm.bitcast %91 : !llvm<"i8*"> to !llvm<"float*">
%93 = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%94 = llvm.insertvalue %92, %93[0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%95 = llvm.insertvalue %92, %94[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%96 = llvm.mlir.constant(0 : index) : !llvm.i64
%97 = llvm.insertvalue %96, %95[2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%98 = llvm.mlir.constant(1 : index) : !llvm.i64
%99 = llvm.insertvalue %98, %97[3, 0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%100 = llvm.mlir.constant(256 : index) : !llvm.i64
%101 = llvm.insertvalue %100, %99[4, 0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%102 = llvm.mlir.constant(16 : index) : !llvm.i64
%103 = llvm.insertvalue %102, %101[3, 1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%104 = llvm.mlir.constant(16 : index) : !llvm.i64
%105 = llvm.insertvalue %104, %103[4, 1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%106 = llvm.mlir.constant(4 : index) : !llvm.i64
%107 = llvm.insertvalue %106, %105[3, 2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%108 = llvm.mlir.constant(4 : index) : !llvm.i64
%109 = llvm.insertvalue %108, %107[4, 2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%110 = llvm.mlir.constant(4 : index) : !llvm.i64
%111 = llvm.insertvalue %110, %109[3, 3] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%112 = llvm.mlir.constant(1 : index) : !llvm.i64
%113 = llvm.insertvalue %112, %111[4, 3] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%114 = llvm.extractvalue %73[1] : !llvm<"{ i8*, i8*, i64, [1 x i64], [1 x i64] }">
%115 = llvm.getelementptr %114[%34] : (!llvm<"i8*">, !llvm.i64) -> !llvm<"i8*">
%116 = llvm.bitcast %115 : !llvm<"i8*"> to !llvm<"float*">
%117 = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%118 = llvm.insertvalue %116, %117[0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%119 = llvm.insertvalue %116, %118[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%120 = llvm.mlir.constant(0 : index) : !llvm.i64
%121 = llvm.insertvalue %120, %119[2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%122 = llvm.mlir.constant(1 : index) : !llvm.i64
%123 = llvm.insertvalue %122, %121[3, 0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%124 = llvm.mlir.constant(3136 : index) : !llvm.i64
%125 = llvm.insertvalue %124, %123[4, 0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%126 = llvm.mlir.constant(16 : index) : !llvm.i64
%127 = llvm.insertvalue %126, %125[3, 1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%128 = llvm.mlir.constant(196 : index) : !llvm.i64
%129 = llvm.insertvalue %128, %127[4, 1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%130 = llvm.mlir.constant(14 : index) : !llvm.i64
%131 = llvm.insertvalue %130, %129[3, 2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%132 = llvm.mlir.constant(14 : index) : !llvm.i64
%133 = llvm.insertvalue %132, %131[4, 2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%134 = llvm.mlir.constant(14 : index) : !llvm.i64
%135 = llvm.insertvalue %134, %133[3, 3] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%136 = llvm.mlir.constant(1 : index) : !llvm.i64
%137 = llvm.insertvalue %136, %135[4, 3] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%138 = llvm.extractvalue %73[1] : !llvm<"{ i8*, i8*, i64, [1 x i64], [1 x i64] }">
%139 = llvm.getelementptr %138[%33] : (!llvm<"i8*">, !llvm.i64) -> !llvm<"i8*">
%140 = llvm.bitcast %139 : !llvm<"i8*"> to !llvm<"float*">
%141 = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%142 = llvm.insertvalue %140, %141[0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%143 = llvm.insertvalue %140, %142[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%144 = llvm.mlir.constant(0 : index) : !llvm.i64
%145 = llvm.insertvalue %144, %143[2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%146 = llvm.mlir.constant(1 : index) : !llvm.i64
%147 = llvm.insertvalue %146, %145[3, 0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%148 = llvm.mlir.constant(3136 : index) : !llvm.i64
%149 = llvm.insertvalue %148, %147[4, 0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%150 = llvm.mlir.constant(16 : index) : !llvm.i64
%151 = llvm.insertvalue %150, %149[3, 1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%152 = llvm.mlir.constant(196 : index) : !llvm.i64
%153 = llvm.insertvalue %152, %151[4, 1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%154 = llvm.mlir.constant(14 : index) : !llvm.i64
%155 = llvm.insertvalue %154, %153[3, 2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%156 = llvm.mlir.constant(14 : index) : !llvm.i64
%157 = llvm.insertvalue %156, %155[4, 2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%158 = llvm.mlir.constant(14 : index) : !llvm.i64
%159 = llvm.insertvalue %158, %157[3, 3] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%160 = llvm.mlir.constant(1 : index) : !llvm.i64
%161 = llvm.insertvalue %160, %159[4, 3] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%162 = llvm.extractvalue %73[1] : !llvm<"{ i8*, i8*, i64, [1 x i64], [1 x i64] }">
%163 = llvm.getelementptr %162[%32] : (!llvm<"i8*">, !llvm.i64) -> !llvm<"i8*">
%164 = llvm.bitcast %163 : !llvm<"i8*"> to !llvm<"float*">
%165 = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%166 = llvm.insertvalue %164, %165[0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%167 = llvm.insertvalue %164, %166[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%168 = llvm.mlir.constant(0 : index) : !llvm.i64
%169 = llvm.insertvalue %168, %167[2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%170 = llvm.mlir.constant(1 : index) : !llvm.i64
%171 = llvm.insertvalue %170, %169[3, 0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%172 = llvm.mlir.constant(3136 : index) : !llvm.i64
%173 = llvm.insertvalue %172, %171[4, 0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%174 = llvm.mlir.constant(16 : index) : !llvm.i64
%175 = llvm.insertvalue %174, %173[3, 1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%176 = llvm.mlir.constant(196 : index) : !llvm.i64
%177 = llvm.insertvalue %176, %175[4, 1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%178 = llvm.mlir.constant(14 : index) : !llvm.i64
%179 = llvm.insertvalue %178, %177[3, 2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%180 = llvm.mlir.constant(14 : index) : !llvm.i64
%181 = llvm.insertvalue %180, %179[4, 2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%182 = llvm.mlir.constant(14 : index) : !llvm.i64
%183 = llvm.insertvalue %182, %181[3, 3] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%184 = llvm.mlir.constant(1 : index) : !llvm.i64
%185 = llvm.insertvalue %184, %183[4, 3] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%186 = llvm.extractvalue %73[1] : !llvm<"{ i8*, i8*, i64, [1 x i64], [1 x i64] }">
%187 = llvm.getelementptr %186[%31] : (!llvm<"i8*">, !llvm.i64) -> !llvm<"i8*">
%188 = llvm.bitcast %187 : !llvm<"i8*"> to !llvm<"float*">
%189 = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%190 = llvm.insertvalue %188, %189[0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%191 = llvm.insertvalue %188, %190[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%192 = llvm.mlir.constant(0 : index) : !llvm.i64
%193 = llvm.insertvalue %192, %191[2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%194 = llvm.mlir.constant(1 : index) : !llvm.i64
%195 = llvm.insertvalue %194, %193[3, 0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%196 = llvm.mlir.constant(2592 : index) : !llvm.i64
%197 = llvm.insertvalue %196, %195[4, 0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%198 = llvm.mlir.constant(8 : index) : !llvm.i64
%199 = llvm.insertvalue %198, %197[3, 1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%200 = llvm.mlir.constant(324 : index) : !llvm.i64
%201 = llvm.insertvalue %200, %199[4, 1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%202 = llvm.mlir.constant(18 : index) : !llvm.i64
%203 = llvm.insertvalue %202, %201[3, 2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%204 = llvm.mlir.constant(18 : index) : !llvm.i64
%205 = llvm.insertvalue %204, %203[4, 2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%206 = llvm.mlir.constant(18 : index) : !llvm.i64
%207 = llvm.insertvalue %206, %205[3, 3] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%208 = llvm.mlir.constant(1 : index) : !llvm.i64
%209 = llvm.insertvalue %208, %207[4, 3] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%210 = llvm.extractvalue %73[1] : !llvm<"{ i8*, i8*, i64, [1 x i64], [1 x i64] }">
%211 = llvm.getelementptr %210[%30] : (!llvm<"i8*">, !llvm.i64) -> !llvm<"i8*">
%212 = llvm.bitcast %211 : !llvm<"i8*"> to !llvm<"float*">
%213 = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%214 = llvm.insertvalue %212, %213[0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%215 = llvm.insertvalue %212, %214[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%216 = llvm.mlir.constant(0 : index) : !llvm.i64
%217 = llvm.insertvalue %216, %215[2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%218 = llvm.mlir.constant(1 : index) : !llvm.i64
%219 = llvm.insertvalue %218, %217[3, 0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%220 = llvm.mlir.constant(1568 : index) : !llvm.i64
%221 = llvm.insertvalue %220, %219[4, 0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%222 = llvm.mlir.constant(8 : index) : !llvm.i64
%223 = llvm.insertvalue %222, %221[3, 1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%224 = llvm.mlir.constant(196 : index) : !llvm.i64
%225 = llvm.insertvalue %224, %223[4, 1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%226 = llvm.mlir.constant(14 : index) : !llvm.i64
%227 = llvm.insertvalue %226, %225[3, 2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%228 = llvm.mlir.constant(14 : index) : !llvm.i64
%229 = llvm.insertvalue %228, %227[4, 2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%230 = llvm.mlir.constant(14 : index) : !llvm.i64
%231 = llvm.insertvalue %230, %229[3, 3] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%232 = llvm.mlir.constant(1 : index) : !llvm.i64
%233 = llvm.insertvalue %232, %231[4, 3] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%234 = llvm.extractvalue %73[1] : !llvm<"{ i8*, i8*, i64, [1 x i64], [1 x i64] }">
%235 = llvm.getelementptr %234[%29] : (!llvm<"i8*">, !llvm.i64) -> !llvm<"i8*">
%236 = llvm.bitcast %235 : !llvm<"i8*"> to !llvm<"float*">
%237 = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%238 = llvm.insertvalue %236, %237[0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%239 = llvm.insertvalue %236, %238[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%240 = llvm.mlir.constant(0 : index) : !llvm.i64
%241 = llvm.insertvalue %240, %239[2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%242 = llvm.mlir.constant(1 : index) : !llvm.i64
%243 = llvm.insertvalue %242, %241[3, 0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%244 = llvm.mlir.constant(6272 : index) : !llvm.i64
%245 = llvm.insertvalue %244, %243[4, 0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%246 = llvm.mlir.constant(8 : index) : !llvm.i64
%247 = llvm.insertvalue %246, %245[3, 1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%248 = llvm.mlir.constant(784 : index) : !llvm.i64
%249 = llvm.insertvalue %248, %247[4, 1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%250 = llvm.mlir.constant(28 : index) : !llvm.i64
%251 = llvm.insertvalue %250, %249[3, 2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%252 = llvm.mlir.constant(28 : index) : !llvm.i64
%253 = llvm.insertvalue %252, %251[4, 2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%254 = llvm.mlir.constant(28 : index) : !llvm.i64
%255 = llvm.insertvalue %254, %253[3, 3] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%256 = llvm.mlir.constant(1 : index) : !llvm.i64
%257 = llvm.insertvalue %256, %255[4, 3] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%258 = llvm.extractvalue %73[1] : !llvm<"{ i8*, i8*, i64, [1 x i64], [1 x i64] }">
%259 = llvm.getelementptr %258[%28] : (!llvm<"i8*">, !llvm.i64) -> !llvm<"i8*">
%260 = llvm.bitcast %259 : !llvm<"i8*"> to !llvm<"float*">
%261 = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%262 = llvm.insertvalue %260, %261[0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%263 = llvm.insertvalue %260, %262[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%264 = llvm.mlir.constant(0 : index) : !llvm.i64
%265 = llvm.insertvalue %264, %263[2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%266 = llvm.mlir.constant(1 : index) : !llvm.i64
%267 = llvm.insertvalue %266, %265[3, 0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%268 = llvm.mlir.constant(6272 : index) : !llvm.i64
%269 = llvm.insertvalue %268, %267[4, 0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%270 = llvm.mlir.constant(8 : index) : !llvm.i64
%271 = llvm.insertvalue %270, %269[3, 1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%272 = llvm.mlir.constant(784 : index) : !llvm.i64
%273 = llvm.insertvalue %272, %271[4, 1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%274 = llvm.mlir.constant(28 : index) : !llvm.i64
%275 = llvm.insertvalue %274, %273[3, 2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%276 = llvm.mlir.constant(28 : index) : !llvm.i64
%277 = llvm.insertvalue %276, %275[4, 2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%278 = llvm.mlir.constant(28 : index) : !llvm.i64
%279 = llvm.insertvalue %278, %277[3, 3] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%280 = llvm.mlir.constant(1 : index) : !llvm.i64
%281 = llvm.insertvalue %280, %279[4, 3] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%282 = llvm.extractvalue %73[1] : !llvm<"{ i8*, i8*, i64, [1 x i64], [1 x i64] }">
%283 = llvm.getelementptr %282[%27] : (!llvm<"i8*">, !llvm.i64) -> !llvm<"i8*">
%284 = llvm.bitcast %283 : !llvm<"i8*"> to !llvm<"float*">
%285 = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%286 = llvm.insertvalue %284, %285[0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%287 = llvm.insertvalue %284, %286[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%288 = llvm.mlir.constant(0 : index) : !llvm.i64
%289 = llvm.insertvalue %288, %287[2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%290 = llvm.mlir.constant(1 : index) : !llvm.i64
%291 = llvm.insertvalue %290, %289[3, 0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%292 = llvm.mlir.constant(6272 : index) : !llvm.i64
%293 = llvm.insertvalue %292, %291[4, 0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%294 = llvm.mlir.constant(8 : index) : !llvm.i64
%295 = llvm.insertvalue %294, %293[3, 1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%296 = llvm.mlir.constant(784 : index) : !llvm.i64
%297 = llvm.insertvalue %296, %295[4, 1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%298 = llvm.mlir.constant(28 : index) : !llvm.i64
%299 = llvm.insertvalue %298, %297[3, 2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%300 = llvm.mlir.constant(28 : index) : !llvm.i64
%301 = llvm.insertvalue %300, %299[4, 2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%302 = llvm.mlir.constant(28 : index) : !llvm.i64
%303 = llvm.insertvalue %302, %301[3, 3] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%304 = llvm.mlir.constant(1 : index) : !llvm.i64
%305 = llvm.insertvalue %304, %303[4, 3] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%306 = llvm.extractvalue %73[1] : !llvm<"{ i8*, i8*, i64, [1 x i64], [1 x i64] }">
%307 = llvm.getelementptr %306[%26] : (!llvm<"i8*">, !llvm.i64) -> !llvm<"i8*">
%308 = llvm.bitcast %307 : !llvm<"i8*"> to !llvm<"float*">
%309 = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%310 = llvm.insertvalue %308, %309[0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%311 = llvm.insertvalue %308, %310[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%312 = llvm.mlir.constant(0 : index) : !llvm.i64
%313 = llvm.insertvalue %312, %311[2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%314 = llvm.mlir.constant(1 : index) : !llvm.i64
%315 = llvm.insertvalue %314, %313[3, 0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%316 = llvm.mlir.constant(1024 : index) : !llvm.i64
%317 = llvm.insertvalue %316, %315[4, 0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%318 = llvm.mlir.constant(1 : index) : !llvm.i64
%319 = llvm.insertvalue %318, %317[3, 1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%320 = llvm.mlir.constant(1024 : index) : !llvm.i64
%321 = llvm.insertvalue %320, %319[4, 1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%322 = llvm.mlir.constant(32 : index) : !llvm.i64
%323 = llvm.insertvalue %322, %321[3, 2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%324 = llvm.mlir.constant(32 : index) : !llvm.i64
%325 = llvm.insertvalue %324, %323[4, 2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%326 = llvm.mlir.constant(32 : index) : !llvm.i64
%327 = llvm.insertvalue %326, %325[3, 3] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%328 = llvm.mlir.constant(1 : index) : !llvm.i64
%329 = llvm.insertvalue %328, %327[4, 3] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%330 = llvm.extractvalue %73[1] : !llvm<"{ i8*, i8*, i64, [1 x i64], [1 x i64] }">
%331 = llvm.getelementptr %330[%25] : (!llvm<"i8*">, !llvm.i64) -> !llvm<"i8*">
%332 = llvm.bitcast %331 : !llvm<"i8*"> to !llvm<"float*">
%333 = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%334 = llvm.insertvalue %332, %333[0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%335 = llvm.insertvalue %332, %334[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%336 = llvm.mlir.constant(0 : index) : !llvm.i64
%337 = llvm.insertvalue %336, %335[2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%338 = llvm.mlir.constant(256 : index) : !llvm.i64
%339 = llvm.insertvalue %338, %337[3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%340 = llvm.mlir.constant(10 : index) : !llvm.i64
%341 = llvm.insertvalue %340, %339[4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%342 = llvm.mlir.constant(10 : index) : !llvm.i64
%343 = llvm.insertvalue %342, %341[3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%344 = llvm.mlir.constant(1 : index) : !llvm.i64
%345 = llvm.insertvalue %344, %343[4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%346 = llvm.mlir.constant(1 : i64) : !llvm.i64
%347 = llvm.mlir.addressof @packedConst : !llvm<"i8**">
%348 = llvm.load %347 : !llvm<"i8**">
%349 = llvm.mlir.constant(0 : i64) : !llvm.i64
%350 = llvm.getelementptr %348[%349] : (!llvm<"i8*">, !llvm.i64) -> !llvm<"i8*">
%351 = llvm.bitcast %350 : !llvm<"i8*"> to !llvm<"float*">
%352 = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%353 = llvm.insertvalue %351, %352[0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%354 = llvm.insertvalue %351, %353[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%355 = llvm.mlir.constant(0 : index) : !llvm.i64
%356 = llvm.insertvalue %355, %354[2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%357 = llvm.mlir.constant(16 : index) : !llvm.i64
%358 = llvm.insertvalue %357, %356[3, 0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%359 = llvm.mlir.constant(160 : index) : !llvm.i64
%360 = llvm.insertvalue %359, %358[4, 0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%361 = llvm.mlir.constant(4 : index) : !llvm.i64
%362 = llvm.insertvalue %361, %360[3, 1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%363 = llvm.mlir.constant(40 : index) : !llvm.i64
%364 = llvm.insertvalue %363, %362[4, 1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%365 = llvm.mlir.constant(4 : index) : !llvm.i64
%366 = llvm.insertvalue %365, %364[3, 2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%367 = llvm.mlir.constant(10 : index) : !llvm.i64
%368 = llvm.insertvalue %367, %366[4, 2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%369 = llvm.mlir.constant(10 : index) : !llvm.i64
%370 = llvm.insertvalue %369, %368[3, 3] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%371 = llvm.mlir.constant(1 : index) : !llvm.i64
%372 = llvm.insertvalue %371, %370[4, 3] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%373 = llvm.extractvalue %345[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%374 = llvm.bitcast %373 : !llvm<"float*"> to !llvm<"i8*">
%375 = llvm.extractvalue %372[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%376 = llvm.bitcast %375 : !llvm<"float*"> to !llvm<"i8*">
%377 = llvm.sext %26 : !llvm.i64 to !llvm.i64
%378 = llvm.mlir.constant(false) : !llvm.i1
%379 = llvm.call @llvm.memcpy.p0i8.p0i8.i64(%374, %376, %377, %378) : (!llvm<"i8*">, !llvm<"i8*">, !llvm.i64, !llvm.i1) -> !llvm.void
%380 = llvm.mlir.constant(1 : i64) : !llvm.i64
%381 = llvm.mlir.addressof @packedConst : !llvm<"i8**">
%382 = llvm.load %381 : !llvm<"i8**">
%383 = llvm.mlir.constant(10240 : i64) : !llvm.i64
%384 = llvm.getelementptr %382[%383] : (!llvm<"i8*">, !llvm.i64) -> !llvm<"i8*">
%385 = llvm.bitcast %384 : !llvm<"i8*"> to !llvm<"float*">
%386 = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%387 = llvm.insertvalue %385, %386[0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%388 = llvm.insertvalue %385, %387[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%389 = llvm.mlir.constant(0 : index) : !llvm.i64
%390 = llvm.insertvalue %389, %388[2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%391 = llvm.mlir.constant(8 : index) : !llvm.i64
%392 = llvm.insertvalue %391, %390[3, 0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%393 = llvm.mlir.constant(25 : index) : !llvm.i64
%394 = llvm.insertvalue %393, %392[4, 0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%395 = llvm.mlir.constant(1 : index) : !llvm.i64
%396 = llvm.insertvalue %395, %394[3, 1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%397 = llvm.mlir.constant(25 : index) : !llvm.i64
%398 = llvm.insertvalue %397, %396[4, 1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%399 = llvm.mlir.constant(5 : index) : !llvm.i64
%400 = llvm.insertvalue %399, %398[3, 2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%401 = llvm.mlir.constant(5 : index) : !llvm.i64
%402 = llvm.insertvalue %401, %400[4, 2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%403 = llvm.mlir.constant(5 : index) : !llvm.i64
%404 = llvm.insertvalue %403, %402[3, 3] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%405 = llvm.mlir.constant(1 : index) : !llvm.i64
%406 = llvm.insertvalue %405, %404[4, 3] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%407 = llvm.mlir.constant(0 : index) : !llvm.i64
%408 = llvm.mlir.constant(1 : index) : !llvm.i64
%409 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb1(%407 : !llvm.i64)
^bb1(%410: !llvm.i64): // 2 preds: ^bb0, ^bb11
%411 = llvm.icmp "slt" %410, %408 : !llvm.i64
llvm.cond_br %411, ^bb2, ^bb12
^bb2: // pred: ^bb1
%412 = llvm.mlir.constant(0 : index) : !llvm.i64
%413 = llvm.mlir.constant(1 : index) : !llvm.i64
%414 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb3(%412 : !llvm.i64)
^bb3(%415: !llvm.i64): // 2 preds: ^bb2, ^bb10
%416 = llvm.icmp "slt" %415, %413 : !llvm.i64
llvm.cond_br %416, ^bb4, ^bb11
^bb4: // pred: ^bb3
%417 = llvm.mlir.constant(0 : index) : !llvm.i64
%418 = llvm.mlir.constant(32 : index) : !llvm.i64
%419 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb5(%417 : !llvm.i64)
^bb5(%420: !llvm.i64): // 2 preds: ^bb4, ^bb9
%421 = llvm.icmp "slt" %420, %418 : !llvm.i64
llvm.cond_br %421, ^bb6, ^bb10
^bb6: // pred: ^bb5
%422 = llvm.mlir.constant(0 : index) : !llvm.i64
%423 = llvm.mlir.constant(32 : index) : !llvm.i64
%424 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb7(%422 : !llvm.i64)
^bb7(%425: !llvm.i64): // 2 preds: ^bb6, ^bb8
%426 = llvm.icmp "slt" %425, %423 : !llvm.i64
llvm.cond_br %426, ^bb8, ^bb9
^bb8: // pred: ^bb7
%427 = llvm.extractvalue %329[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%428 = llvm.mlir.constant(0 : index) : !llvm.i64
%429 = llvm.mlir.constant(1024 : index) : !llvm.i64
%430 = llvm.mul %410, %429 : !llvm.i64
%431 = llvm.add %428, %430 : !llvm.i64
%432 = llvm.mlir.constant(1024 : index) : !llvm.i64
%433 = llvm.mul %415, %432 : !llvm.i64
%434 = llvm.add %431, %433 : !llvm.i64
%435 = llvm.mlir.constant(32 : index) : !llvm.i64
%436 = llvm.mul %420, %435 : !llvm.i64
%437 = llvm.add %434, %436 : !llvm.i64
%438 = llvm.mlir.constant(1 : index) : !llvm.i64
%439 = llvm.mul %425, %438 : !llvm.i64
%440 = llvm.add %437, %439 : !llvm.i64
%441 = llvm.getelementptr %427[%440] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
llvm.store %23, %441 : !llvm<"float*">
%442 = llvm.add %425, %424 : !llvm.i64
llvm.br ^bb7(%442 : !llvm.i64)
^bb9: // pred: ^bb7
%443 = llvm.add %420, %419 : !llvm.i64
llvm.br ^bb5(%443 : !llvm.i64)
^bb10: // pred: ^bb5
%444 = llvm.add %415, %414 : !llvm.i64
llvm.br ^bb3(%444 : !llvm.i64)
^bb11: // pred: ^bb3
%445 = llvm.add %410, %409 : !llvm.i64
llvm.br ^bb1(%445 : !llvm.i64)
^bb12: // pred: ^bb1
%446 = llvm.mlir.constant(0 : index) : !llvm.i64
%447 = llvm.mlir.constant(1 : index) : !llvm.i64
%448 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb13(%446 : !llvm.i64)
^bb13(%449: !llvm.i64): // 2 preds: ^bb12, ^bb23
%450 = llvm.icmp "slt" %449, %447 : !llvm.i64
llvm.cond_br %450, ^bb14, ^bb24
^bb14: // pred: ^bb13
%451 = llvm.mlir.constant(0 : index) : !llvm.i64
%452 = llvm.mlir.constant(1 : index) : !llvm.i64
%453 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb15(%451 : !llvm.i64)
^bb15(%454: !llvm.i64): // 2 preds: ^bb14, ^bb22
%455 = llvm.icmp "slt" %454, %452 : !llvm.i64
llvm.cond_br %455, ^bb16, ^bb23
^bb16: // pred: ^bb15
%456 = llvm.mlir.constant(0 : index) : !llvm.i64
%457 = llvm.mlir.constant(28 : index) : !llvm.i64
%458 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb17(%456 : !llvm.i64)
^bb17(%459: !llvm.i64): // 2 preds: ^bb16, ^bb21
%460 = llvm.icmp "slt" %459, %457 : !llvm.i64
llvm.cond_br %460, ^bb18, ^bb22
^bb18: // pred: ^bb17
%461 = llvm.mlir.constant(0 : index) : !llvm.i64
%462 = llvm.mlir.constant(28 : index) : !llvm.i64
%463 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb19(%461 : !llvm.i64)
^bb19(%464: !llvm.i64): // 2 preds: ^bb18, ^bb20
%465 = llvm.icmp "slt" %464, %462 : !llvm.i64
llvm.cond_br %465, ^bb20, ^bb21
^bb20: // pred: ^bb19
%466 = llvm.extractvalue %11[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%467 = llvm.mlir.constant(0 : index) : !llvm.i64
%468 = llvm.mlir.constant(784 : index) : !llvm.i64
%469 = llvm.mul %449, %468 : !llvm.i64
%470 = llvm.add %467, %469 : !llvm.i64
%471 = llvm.mlir.constant(784 : index) : !llvm.i64
%472 = llvm.mul %454, %471 : !llvm.i64
%473 = llvm.add %470, %472 : !llvm.i64
%474 = llvm.mlir.constant(28 : index) : !llvm.i64
%475 = llvm.mul %459, %474 : !llvm.i64
%476 = llvm.add %473, %475 : !llvm.i64
%477 = llvm.mlir.constant(1 : index) : !llvm.i64
%478 = llvm.mul %464, %477 : !llvm.i64
%479 = llvm.add %476, %478 : !llvm.i64
%480 = llvm.getelementptr %466[%479] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
%481 = llvm.load %480 : !llvm<"float*">
%482 = llvm.mlir.constant(2 : index) : !llvm.i64
%483 = llvm.add %459, %482 : !llvm.i64
%484 = llvm.mlir.constant(2 : index) : !llvm.i64
%485 = llvm.add %464, %484 : !llvm.i64
%486 = llvm.extractvalue %329[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%487 = llvm.mlir.constant(0 : index) : !llvm.i64
%488 = llvm.mlir.constant(1024 : index) : !llvm.i64
%489 = llvm.mul %449, %488 : !llvm.i64
%490 = llvm.add %487, %489 : !llvm.i64
%491 = llvm.mlir.constant(1024 : index) : !llvm.i64
%492 = llvm.mul %454, %491 : !llvm.i64
%493 = llvm.add %490, %492 : !llvm.i64
%494 = llvm.mlir.constant(32 : index) : !llvm.i64
%495 = llvm.mul %483, %494 : !llvm.i64
%496 = llvm.add %493, %495 : !llvm.i64
%497 = llvm.mlir.constant(1 : index) : !llvm.i64
%498 = llvm.mul %485, %497 : !llvm.i64
%499 = llvm.add %496, %498 : !llvm.i64
%500 = llvm.getelementptr %486[%499] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
llvm.store %481, %500 : !llvm<"float*">
%501 = llvm.add %464, %463 : !llvm.i64
llvm.br ^bb19(%501 : !llvm.i64)
^bb21: // pred: ^bb19
%502 = llvm.add %459, %458 : !llvm.i64
llvm.br ^bb17(%502 : !llvm.i64)
^bb22: // pred: ^bb17
%503 = llvm.add %454, %453 : !llvm.i64
llvm.br ^bb15(%503 : !llvm.i64)
^bb23: // pred: ^bb15
%504 = llvm.add %449, %448 : !llvm.i64
llvm.br ^bb13(%504 : !llvm.i64)
^bb24: // pred: ^bb13
%505 = llvm.mlir.constant(0 : index) : !llvm.i64
%506 = llvm.mlir.constant(1 : index) : !llvm.i64
%507 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb25(%505 : !llvm.i64)
^bb25(%508: !llvm.i64): // 2 preds: ^bb24, ^bb44
%509 = llvm.icmp "slt" %508, %506 : !llvm.i64
llvm.cond_br %509, ^bb26, ^bb45
^bb26: // pred: ^bb25
%510 = llvm.mlir.constant(0 : index) : !llvm.i64
%511 = llvm.mlir.constant(8 : index) : !llvm.i64
%512 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb27(%510 : !llvm.i64)
^bb27(%513: !llvm.i64): // 2 preds: ^bb26, ^bb43
%514 = llvm.icmp "slt" %513, %511 : !llvm.i64
llvm.cond_br %514, ^bb28, ^bb44
^bb28: // pred: ^bb27
%515 = llvm.mlir.constant(0 : index) : !llvm.i64
%516 = llvm.mlir.constant(28 : index) : !llvm.i64
%517 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb29(%515 : !llvm.i64)
^bb29(%518: !llvm.i64): // 2 preds: ^bb28, ^bb42
%519 = llvm.icmp "slt" %518, %516 : !llvm.i64
llvm.cond_br %519, ^bb30, ^bb43
^bb30: // pred: ^bb29
%520 = llvm.mlir.constant(0 : index) : !llvm.i64
%521 = llvm.mlir.constant(28 : index) : !llvm.i64
%522 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb31(%520 : !llvm.i64)
^bb31(%523: !llvm.i64): // 2 preds: ^bb30, ^bb41
%524 = llvm.icmp "slt" %523, %521 : !llvm.i64
llvm.cond_br %524, ^bb32, ^bb42
^bb32: // pred: ^bb31
%525 = llvm.extractvalue %305[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%526 = llvm.mlir.constant(0 : index) : !llvm.i64
%527 = llvm.mlir.constant(6272 : index) : !llvm.i64
%528 = llvm.mul %508, %527 : !llvm.i64
%529 = llvm.add %526, %528 : !llvm.i64
%530 = llvm.mlir.constant(784 : index) : !llvm.i64
%531 = llvm.mul %513, %530 : !llvm.i64
%532 = llvm.add %529, %531 : !llvm.i64
%533 = llvm.mlir.constant(28 : index) : !llvm.i64
%534 = llvm.mul %518, %533 : !llvm.i64
%535 = llvm.add %532, %534 : !llvm.i64
%536 = llvm.mlir.constant(1 : index) : !llvm.i64
%537 = llvm.mul %523, %536 : !llvm.i64
%538 = llvm.add %535, %537 : !llvm.i64
%539 = llvm.getelementptr %525[%538] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
llvm.store %23, %539 : !llvm<"float*">
%540 = llvm.mlir.constant(0 : index) : !llvm.i64
%541 = llvm.mlir.constant(1 : index) : !llvm.i64
%542 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb33(%540 : !llvm.i64)
^bb33(%543: !llvm.i64): // 2 preds: ^bb32, ^bb40
%544 = llvm.icmp "slt" %543, %541 : !llvm.i64
llvm.cond_br %544, ^bb34, ^bb41
^bb34: // pred: ^bb33
%545 = llvm.mlir.constant(0 : index) : !llvm.i64
%546 = llvm.mlir.constant(5 : index) : !llvm.i64
%547 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb35(%545 : !llvm.i64)
^bb35(%548: !llvm.i64): // 2 preds: ^bb34, ^bb39
%549 = llvm.icmp "slt" %548, %546 : !llvm.i64
llvm.cond_br %549, ^bb36, ^bb40
^bb36: // pred: ^bb35
%550 = llvm.mlir.constant(0 : index) : !llvm.i64
%551 = llvm.mlir.constant(5 : index) : !llvm.i64
%552 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb37(%550 : !llvm.i64)
^bb37(%553: !llvm.i64): // 2 preds: ^bb36, ^bb38
%554 = llvm.icmp "slt" %553, %551 : !llvm.i64
llvm.cond_br %554, ^bb38, ^bb39
^bb38: // pred: ^bb37
%555 = llvm.add %518, %548 : !llvm.i64
%556 = llvm.add %523, %553 : !llvm.i64
%557 = llvm.extractvalue %329[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%558 = llvm.mlir.constant(0 : index) : !llvm.i64
%559 = llvm.mlir.constant(1024 : index) : !llvm.i64
%560 = llvm.mul %508, %559 : !llvm.i64
%561 = llvm.add %558, %560 : !llvm.i64
%562 = llvm.mlir.constant(1024 : index) : !llvm.i64
%563 = llvm.mul %543, %562 : !llvm.i64
%564 = llvm.add %561, %563 : !llvm.i64
%565 = llvm.mlir.constant(32 : index) : !llvm.i64
%566 = llvm.mul %555, %565 : !llvm.i64
%567 = llvm.add %564, %566 : !llvm.i64
%568 = llvm.mlir.constant(1 : index) : !llvm.i64
%569 = llvm.mul %556, %568 : !llvm.i64
%570 = llvm.add %567, %569 : !llvm.i64
%571 = llvm.getelementptr %557[%570] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
%572 = llvm.load %571 : !llvm<"float*">
%573 = llvm.extractvalue %406[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%574 = llvm.mlir.constant(0 : index) : !llvm.i64
%575 = llvm.mlir.constant(25 : index) : !llvm.i64
%576 = llvm.mul %513, %575 : !llvm.i64
%577 = llvm.add %574, %576 : !llvm.i64
%578 = llvm.mlir.constant(25 : index) : !llvm.i64
%579 = llvm.mul %543, %578 : !llvm.i64
%580 = llvm.add %577, %579 : !llvm.i64
%581 = llvm.mlir.constant(5 : index) : !llvm.i64
%582 = llvm.mul %548, %581 : !llvm.i64
%583 = llvm.add %580, %582 : !llvm.i64
%584 = llvm.mlir.constant(1 : index) : !llvm.i64
%585 = llvm.mul %553, %584 : !llvm.i64
%586 = llvm.add %583, %585 : !llvm.i64
%587 = llvm.getelementptr %573[%586] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
%588 = llvm.load %587 : !llvm<"float*">
%589 = llvm.extractvalue %305[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%590 = llvm.mlir.constant(0 : index) : !llvm.i64
%591 = llvm.mlir.constant(6272 : index) : !llvm.i64
%592 = llvm.mul %508, %591 : !llvm.i64
%593 = llvm.add %590, %592 : !llvm.i64
%594 = llvm.mlir.constant(784 : index) : !llvm.i64
%595 = llvm.mul %513, %594 : !llvm.i64
%596 = llvm.add %593, %595 : !llvm.i64
%597 = llvm.mlir.constant(28 : index) : !llvm.i64
%598 = llvm.mul %518, %597 : !llvm.i64
%599 = llvm.add %596, %598 : !llvm.i64
%600 = llvm.mlir.constant(1 : index) : !llvm.i64
%601 = llvm.mul %523, %600 : !llvm.i64
%602 = llvm.add %599, %601 : !llvm.i64
%603 = llvm.getelementptr %589[%602] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
%604 = llvm.load %603 : !llvm<"float*">
%605 = llvm.fmul %572, %588 : !llvm.float
%606 = llvm.fadd %604, %605 : !llvm.float
%607 = llvm.extractvalue %305[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%608 = llvm.mlir.constant(0 : index) : !llvm.i64
%609 = llvm.mlir.constant(6272 : index) : !llvm.i64
%610 = llvm.mul %508, %609 : !llvm.i64
%611 = llvm.add %608, %610 : !llvm.i64
%612 = llvm.mlir.constant(784 : index) : !llvm.i64
%613 = llvm.mul %513, %612 : !llvm.i64
%614 = llvm.add %611, %613 : !llvm.i64
%615 = llvm.mlir.constant(28 : index) : !llvm.i64
%616 = llvm.mul %518, %615 : !llvm.i64
%617 = llvm.add %614, %616 : !llvm.i64
%618 = llvm.mlir.constant(1 : index) : !llvm.i64
%619 = llvm.mul %523, %618 : !llvm.i64
%620 = llvm.add %617, %619 : !llvm.i64
%621 = llvm.getelementptr %607[%620] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
llvm.store %606, %621 : !llvm<"float*">
%622 = llvm.add %553, %552 : !llvm.i64
llvm.br ^bb37(%622 : !llvm.i64)
^bb39: // pred: ^bb37
%623 = llvm.add %548, %547 : !llvm.i64
llvm.br ^bb35(%623 : !llvm.i64)
^bb40: // pred: ^bb35
%624 = llvm.add %543, %542 : !llvm.i64
llvm.br ^bb33(%624 : !llvm.i64)
^bb41: // pred: ^bb33
%625 = llvm.add %523, %522 : !llvm.i64
llvm.br ^bb31(%625 : !llvm.i64)
^bb42: // pred: ^bb31
%626 = llvm.add %518, %517 : !llvm.i64
llvm.br ^bb29(%626 : !llvm.i64)
^bb43: // pred: ^bb29
%627 = llvm.add %513, %512 : !llvm.i64
llvm.br ^bb27(%627 : !llvm.i64)
^bb44: // pred: ^bb27
%628 = llvm.add %508, %507 : !llvm.i64
llvm.br ^bb25(%628 : !llvm.i64)
^bb45: // pred: ^bb25
%629 = llvm.mlir.constant(1 : i64) : !llvm.i64
%630 = llvm.alloca %629 x !llvm<"[8 x [1 x [1 x float]]]"> : (!llvm.i64) -> !llvm<"[8 x [1 x [1 x float]]]*">
%631 = llvm.bitcast %630 : !llvm<"[8 x [1 x [1 x float]]]*"> to !llvm<"i8*">
%632 = llvm.mlir.addressof @constant_2 : !llvm<"[8 x [1 x [1 x float]]]*">
%633 = llvm.bitcast %632 : !llvm<"[8 x [1 x [1 x float]]]*"> to !llvm<"i8*">
%634 = llvm.mlir.constant(4 : i64) : !llvm.i64
%635 = llvm.mlir.constant(8 : i64) : !llvm.i64
%636 = llvm.mul %634, %635 : !llvm.i64
%637 = llvm.sext %636 : !llvm.i64 to !llvm.i64
%638 = llvm.mlir.constant(false) : !llvm.i1
%639 = llvm.call @llvm.memcpy.p0i8.p0i8.i64(%631, %633, %637, %638) : (!llvm<"i8*">, !llvm<"i8*">, !llvm.i64, !llvm.i1) -> !llvm.void
%640 = llvm.bitcast %630 : !llvm<"[8 x [1 x [1 x float]]]*"> to !llvm<"float*">
%641 = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }">
%642 = llvm.insertvalue %640, %641[0] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }">
%643 = llvm.insertvalue %640, %642[1] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }">
%644 = llvm.mlir.constant(0 : index) : !llvm.i64
%645 = llvm.insertvalue %644, %643[2] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }">
%646 = llvm.mlir.constant(8 : index) : !llvm.i64
%647 = llvm.insertvalue %646, %645[3, 0] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }">
%648 = llvm.mlir.constant(1 : index) : !llvm.i64
%649 = llvm.insertvalue %648, %647[4, 0] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }">
%650 = llvm.mlir.constant(1 : index) : !llvm.i64
%651 = llvm.insertvalue %650, %649[3, 1] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }">
%652 = llvm.mlir.constant(1 : index) : !llvm.i64
%653 = llvm.insertvalue %652, %651[4, 1] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }">
%654 = llvm.mlir.constant(1 : index) : !llvm.i64
%655 = llvm.insertvalue %654, %653[3, 2] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }">
%656 = llvm.mlir.constant(1 : index) : !llvm.i64
%657 = llvm.insertvalue %656, %655[4, 2] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }">
%658 = llvm.mlir.constant(0 : index) : !llvm.i64
%659 = llvm.mlir.constant(1 : index) : !llvm.i64
%660 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb46(%658 : !llvm.i64)
^bb46(%661: !llvm.i64): // 2 preds: ^bb45, ^bb56
%662 = llvm.icmp "slt" %661, %659 : !llvm.i64
llvm.cond_br %662, ^bb47, ^bb57
^bb47: // pred: ^bb46
%663 = llvm.mlir.constant(0 : index) : !llvm.i64
%664 = llvm.mlir.constant(8 : index) : !llvm.i64
%665 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb48(%663 : !llvm.i64)
^bb48(%666: !llvm.i64): // 2 preds: ^bb47, ^bb55
%667 = llvm.icmp "slt" %666, %664 : !llvm.i64
llvm.cond_br %667, ^bb49, ^bb56
^bb49: // pred: ^bb48
%668 = llvm.mlir.constant(0 : index) : !llvm.i64
%669 = llvm.mlir.constant(28 : index) : !llvm.i64
%670 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb50(%668 : !llvm.i64)
^bb50(%671: !llvm.i64): // 2 preds: ^bb49, ^bb54
%672 = llvm.icmp "slt" %671, %669 : !llvm.i64
llvm.cond_br %672, ^bb51, ^bb55
^bb51: // pred: ^bb50
%673 = llvm.mlir.constant(0 : index) : !llvm.i64
%674 = llvm.mlir.constant(28 : index) : !llvm.i64
%675 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb52(%673 : !llvm.i64)
^bb52(%676: !llvm.i64): // 2 preds: ^bb51, ^bb53
%677 = llvm.icmp "slt" %676, %674 : !llvm.i64
llvm.cond_br %677, ^bb53, ^bb54
^bb53: // pred: ^bb52
%678 = llvm.mlir.constant(0 : index) : !llvm.i64
%679 = llvm.extractvalue %305[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%680 = llvm.mlir.constant(0 : index) : !llvm.i64
%681 = llvm.mlir.constant(6272 : index) : !llvm.i64
%682 = llvm.mul %678, %681 : !llvm.i64
%683 = llvm.add %680, %682 : !llvm.i64
%684 = llvm.mlir.constant(784 : index) : !llvm.i64
%685 = llvm.mul %666, %684 : !llvm.i64
%686 = llvm.add %683, %685 : !llvm.i64
%687 = llvm.mlir.constant(28 : index) : !llvm.i64
%688 = llvm.mul %671, %687 : !llvm.i64
%689 = llvm.add %686, %688 : !llvm.i64
%690 = llvm.mlir.constant(1 : index) : !llvm.i64
%691 = llvm.mul %676, %690 : !llvm.i64
%692 = llvm.add %689, %691 : !llvm.i64
%693 = llvm.getelementptr %679[%692] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
%694 = llvm.load %693 : !llvm<"float*">
%695 = llvm.mlir.constant(0 : index) : !llvm.i64
%696 = llvm.mlir.constant(0 : index) : !llvm.i64
%697 = llvm.extractvalue %657[1] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }">
%698 = llvm.mlir.constant(0 : index) : !llvm.i64
%699 = llvm.mlir.constant(1 : index) : !llvm.i64
%700 = llvm.mul %666, %699 : !llvm.i64
%701 = llvm.add %698, %700 : !llvm.i64
%702 = llvm.mlir.constant(1 : index) : !llvm.i64
%703 = llvm.mul %695, %702 : !llvm.i64
%704 = llvm.add %701, %703 : !llvm.i64
%705 = llvm.mlir.constant(1 : index) : !llvm.i64
%706 = llvm.mul %696, %705 : !llvm.i64
%707 = llvm.add %704, %706 : !llvm.i64
%708 = llvm.getelementptr %697[%707] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
%709 = llvm.load %708 : !llvm<"float*">
%710 = llvm.fadd %694, %709 : !llvm.float
%711 = llvm.extractvalue %281[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%712 = llvm.mlir.constant(0 : index) : !llvm.i64
%713 = llvm.mlir.constant(6272 : index) : !llvm.i64
%714 = llvm.mul %661, %713 : !llvm.i64
%715 = llvm.add %712, %714 : !llvm.i64
%716 = llvm.mlir.constant(784 : index) : !llvm.i64
%717 = llvm.mul %666, %716 : !llvm.i64
%718 = llvm.add %715, %717 : !llvm.i64
%719 = llvm.mlir.constant(28 : index) : !llvm.i64
%720 = llvm.mul %671, %719 : !llvm.i64
%721 = llvm.add %718, %720 : !llvm.i64
%722 = llvm.mlir.constant(1 : index) : !llvm.i64
%723 = llvm.mul %676, %722 : !llvm.i64
%724 = llvm.add %721, %723 : !llvm.i64
%725 = llvm.getelementptr %711[%724] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
llvm.store %710, %725 : !llvm<"float*">
%726 = llvm.add %676, %675 : !llvm.i64
llvm.br ^bb52(%726 : !llvm.i64)
^bb54: // pred: ^bb52
%727 = llvm.add %671, %670 : !llvm.i64
llvm.br ^bb50(%727 : !llvm.i64)
^bb55: // pred: ^bb50
%728 = llvm.add %666, %665 : !llvm.i64
llvm.br ^bb48(%728 : !llvm.i64)
^bb56: // pred: ^bb48
%729 = llvm.add %661, %660 : !llvm.i64
llvm.br ^bb46(%729 : !llvm.i64)
^bb57: // pred: ^bb46
%730 = llvm.mlir.constant(0 : index) : !llvm.i64
%731 = llvm.mlir.constant(1 : index) : !llvm.i64
%732 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb58(%730 : !llvm.i64)
^bb58(%733: !llvm.i64): // 2 preds: ^bb57, ^bb68
%734 = llvm.icmp "slt" %733, %731 : !llvm.i64
llvm.cond_br %734, ^bb59, ^bb69
^bb59: // pred: ^bb58
%735 = llvm.mlir.constant(0 : index) : !llvm.i64
%736 = llvm.mlir.constant(8 : index) : !llvm.i64
%737 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb60(%735 : !llvm.i64)
^bb60(%738: !llvm.i64): // 2 preds: ^bb59, ^bb67
%739 = llvm.icmp "slt" %738, %736 : !llvm.i64
llvm.cond_br %739, ^bb61, ^bb68
^bb61: // pred: ^bb60
%740 = llvm.mlir.constant(0 : index) : !llvm.i64
%741 = llvm.mlir.constant(28 : index) : !llvm.i64
%742 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb62(%740 : !llvm.i64)
^bb62(%743: !llvm.i64): // 2 preds: ^bb61, ^bb66
%744 = llvm.icmp "slt" %743, %741 : !llvm.i64
llvm.cond_br %744, ^bb63, ^bb67
^bb63: // pred: ^bb62
%745 = llvm.mlir.constant(0 : index) : !llvm.i64
%746 = llvm.mlir.constant(28 : index) : !llvm.i64
%747 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb64(%745 : !llvm.i64)
^bb64(%748: !llvm.i64): // 2 preds: ^bb63, ^bb65
%749 = llvm.icmp "slt" %748, %746 : !llvm.i64
llvm.cond_br %749, ^bb65, ^bb66
^bb65: // pred: ^bb64
%750 = llvm.extractvalue %281[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%751 = llvm.mlir.constant(0 : index) : !llvm.i64
%752 = llvm.mlir.constant(6272 : index) : !llvm.i64
%753 = llvm.mul %733, %752 : !llvm.i64
%754 = llvm.add %751, %753 : !llvm.i64
%755 = llvm.mlir.constant(784 : index) : !llvm.i64
%756 = llvm.mul %738, %755 : !llvm.i64
%757 = llvm.add %754, %756 : !llvm.i64
%758 = llvm.mlir.constant(28 : index) : !llvm.i64
%759 = llvm.mul %743, %758 : !llvm.i64
%760 = llvm.add %757, %759 : !llvm.i64
%761 = llvm.mlir.constant(1 : index) : !llvm.i64
%762 = llvm.mul %748, %761 : !llvm.i64
%763 = llvm.add %760, %762 : !llvm.i64
%764 = llvm.getelementptr %750[%763] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
%765 = llvm.load %764 : !llvm<"float*">
%766 = llvm.fcmp "olt" %765, %23 : !llvm.float
%767 = llvm.select %766, %23, %765 : !llvm.i1, !llvm.float
%768 = llvm.extractvalue %257[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%769 = llvm.mlir.constant(0 : index) : !llvm.i64
%770 = llvm.mlir.constant(6272 : index) : !llvm.i64
%771 = llvm.mul %733, %770 : !llvm.i64
%772 = llvm.add %769, %771 : !llvm.i64
%773 = llvm.mlir.constant(784 : index) : !llvm.i64
%774 = llvm.mul %738, %773 : !llvm.i64
%775 = llvm.add %772, %774 : !llvm.i64
%776 = llvm.mlir.constant(28 : index) : !llvm.i64
%777 = llvm.mul %743, %776 : !llvm.i64
%778 = llvm.add %775, %777 : !llvm.i64
%779 = llvm.mlir.constant(1 : index) : !llvm.i64
%780 = llvm.mul %748, %779 : !llvm.i64
%781 = llvm.add %778, %780 : !llvm.i64
%782 = llvm.getelementptr %768[%781] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
llvm.store %767, %782 : !llvm<"float*">
%783 = llvm.add %748, %747 : !llvm.i64
llvm.br ^bb64(%783 : !llvm.i64)
^bb66: // pred: ^bb64
%784 = llvm.add %743, %742 : !llvm.i64
llvm.br ^bb62(%784 : !llvm.i64)
^bb67: // pred: ^bb62
%785 = llvm.add %738, %737 : !llvm.i64
llvm.br ^bb60(%785 : !llvm.i64)
^bb68: // pred: ^bb60
%786 = llvm.add %733, %732 : !llvm.i64
llvm.br ^bb58(%786 : !llvm.i64)
^bb69: // pred: ^bb58
%787 = llvm.mlir.constant(0 : index) : !llvm.i64
%788 = llvm.mlir.constant(1 : index) : !llvm.i64
%789 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb70(%787 : !llvm.i64)
^bb70(%790: !llvm.i64): // 2 preds: ^bb69, ^bb86
%791 = llvm.icmp "slt" %790, %788 : !llvm.i64
llvm.cond_br %791, ^bb71, ^bb87
^bb71: // pred: ^bb70
%792 = llvm.mlir.constant(0 : index) : !llvm.i64
%793 = llvm.mlir.constant(8 : index) : !llvm.i64
%794 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb72(%792 : !llvm.i64)
^bb72(%795: !llvm.i64): // 2 preds: ^bb71, ^bb85
%796 = llvm.icmp "slt" %795, %793 : !llvm.i64
llvm.cond_br %796, ^bb73, ^bb86
^bb73: // pred: ^bb72
%797 = llvm.mlir.constant(0 : index) : !llvm.i64
%798 = llvm.mlir.constant(14 : index) : !llvm.i64
%799 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb74(%797 : !llvm.i64)
^bb74(%800: !llvm.i64): // 2 preds: ^bb73, ^bb84
%801 = llvm.icmp "slt" %800, %798 : !llvm.i64
llvm.cond_br %801, ^bb75, ^bb85
^bb75: // pred: ^bb74
%802 = llvm.mlir.constant(0 : index) : !llvm.i64
%803 = llvm.mlir.constant(14 : index) : !llvm.i64
%804 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb76(%802 : !llvm.i64)
^bb76(%805: !llvm.i64): // 2 preds: ^bb75, ^bb83
%806 = llvm.icmp "slt" %805, %803 : !llvm.i64
llvm.cond_br %806, ^bb77, ^bb84
^bb77: // pred: ^bb76
%807 = llvm.extractvalue %233[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%808 = llvm.mlir.constant(0 : index) : !llvm.i64
%809 = llvm.mlir.constant(1568 : index) : !llvm.i64
%810 = llvm.mul %790, %809 : !llvm.i64
%811 = llvm.add %808, %810 : !llvm.i64
%812 = llvm.mlir.constant(196 : index) : !llvm.i64
%813 = llvm.mul %795, %812 : !llvm.i64
%814 = llvm.add %811, %813 : !llvm.i64
%815 = llvm.mlir.constant(14 : index) : !llvm.i64
%816 = llvm.mul %800, %815 : !llvm.i64
%817 = llvm.add %814, %816 : !llvm.i64
%818 = llvm.mlir.constant(1 : index) : !llvm.i64
%819 = llvm.mul %805, %818 : !llvm.i64
%820 = llvm.add %817, %819 : !llvm.i64
%821 = llvm.getelementptr %807[%820] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
llvm.store %17, %821 : !llvm<"float*">
%822 = llvm.mlir.constant(0 : index) : !llvm.i64
%823 = llvm.mlir.constant(2 : index) : !llvm.i64
%824 = llvm.mul %800, %823 : !llvm.i64
%825 = llvm.icmp "sgt" %822, %824 : !llvm.i64
%826 = llvm.select %825, %822, %824 : !llvm.i1, !llvm.i64
%827 = llvm.mlir.constant(0 : index) : !llvm.i64
%828 = llvm.mlir.constant(2 : index) : !llvm.i64
%829 = llvm.mul %805, %828 : !llvm.i64
%830 = llvm.icmp "sgt" %827, %829 : !llvm.i64
%831 = llvm.select %830, %827, %829 : !llvm.i1, !llvm.i64
%832 = llvm.mlir.constant(0 : index) : !llvm.i64
%833 = llvm.mlir.constant(28 : index) : !llvm.i64
%834 = llvm.mlir.constant(-2 : index) : !llvm.i64
%835 = llvm.mul %800, %834 : !llvm.i64
%836 = llvm.mlir.constant(28 : index) : !llvm.i64
%837 = llvm.add %835, %836 : !llvm.i64
%838 = llvm.mlir.constant(2 : index) : !llvm.i64
%839 = llvm.mul %800, %838 : !llvm.i64
%840 = llvm.mlir.constant(2 : index) : !llvm.i64
%841 = llvm.add %839, %840 : !llvm.i64
%842 = llvm.mlir.constant(2 : index) : !llvm.i64
%843 = llvm.icmp "slt" %833, %837 : !llvm.i64
%844 = llvm.select %843, %833, %837 : !llvm.i1, !llvm.i64
%845 = llvm.icmp "slt" %844, %841 : !llvm.i64
%846 = llvm.select %845, %844, %841 : !llvm.i1, !llvm.i64
%847 = llvm.icmp "slt" %846, %842 : !llvm.i64
%848 = llvm.select %847, %846, %842 : !llvm.i1, !llvm.i64
%849 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb78(%832 : !llvm.i64)
^bb78(%850: !llvm.i64): // 2 preds: ^bb77, ^bb82
%851 = llvm.icmp "slt" %850, %848 : !llvm.i64
llvm.cond_br %851, ^bb79, ^bb83
^bb79: // pred: ^bb78
%852 = llvm.mlir.constant(0 : index) : !llvm.i64
%853 = llvm.mlir.constant(28 : index) : !llvm.i64
%854 = llvm.mlir.constant(-2 : index) : !llvm.i64
%855 = llvm.mul %805, %854 : !llvm.i64
%856 = llvm.mlir.constant(28 : index) : !llvm.i64
%857 = llvm.add %855, %856 : !llvm.i64
%858 = llvm.mlir.constant(2 : index) : !llvm.i64
%859 = llvm.mul %805, %858 : !llvm.i64
%860 = llvm.mlir.constant(2 : index) : !llvm.i64
%861 = llvm.add %859, %860 : !llvm.i64
%862 = llvm.mlir.constant(2 : index) : !llvm.i64
%863 = llvm.icmp "slt" %853, %857 : !llvm.i64
%864 = llvm.select %863, %853, %857 : !llvm.i1, !llvm.i64
%865 = llvm.icmp "slt" %864, %861 : !llvm.i64
%866 = llvm.select %865, %864, %861 : !llvm.i1, !llvm.i64
%867 = llvm.icmp "slt" %866, %862 : !llvm.i64
%868 = llvm.select %867, %866, %862 : !llvm.i1, !llvm.i64
%869 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb80(%852 : !llvm.i64)
^bb80(%870: !llvm.i64): // 2 preds: ^bb79, ^bb81
%871 = llvm.icmp "slt" %870, %868 : !llvm.i64
llvm.cond_br %871, ^bb81, ^bb82
^bb81: // pred: ^bb80
%872 = llvm.add %850, %826 : !llvm.i64
%873 = llvm.add %870, %831 : !llvm.i64
%874 = llvm.extractvalue %257[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%875 = llvm.mlir.constant(0 : index) : !llvm.i64
%876 = llvm.mlir.constant(6272 : index) : !llvm.i64
%877 = llvm.mul %790, %876 : !llvm.i64
%878 = llvm.add %875, %877 : !llvm.i64
%879 = llvm.mlir.constant(784 : index) : !llvm.i64
%880 = llvm.mul %795, %879 : !llvm.i64
%881 = llvm.add %878, %880 : !llvm.i64
%882 = llvm.mlir.constant(28 : index) : !llvm.i64
%883 = llvm.mul %872, %882 : !llvm.i64
%884 = llvm.add %881, %883 : !llvm.i64
%885 = llvm.mlir.constant(1 : index) : !llvm.i64
%886 = llvm.mul %873, %885 : !llvm.i64
%887 = llvm.add %884, %886 : !llvm.i64
%888 = llvm.getelementptr %874[%887] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
%889 = llvm.load %888 : !llvm<"float*">
%890 = llvm.extractvalue %233[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%891 = llvm.mlir.constant(0 : index) : !llvm.i64
%892 = llvm.mlir.constant(1568 : index) : !llvm.i64
%893 = llvm.mul %790, %892 : !llvm.i64
%894 = llvm.add %891, %893 : !llvm.i64
%895 = llvm.mlir.constant(196 : index) : !llvm.i64
%896 = llvm.mul %795, %895 : !llvm.i64
%897 = llvm.add %894, %896 : !llvm.i64
%898 = llvm.mlir.constant(14 : index) : !llvm.i64
%899 = llvm.mul %800, %898 : !llvm.i64
%900 = llvm.add %897, %899 : !llvm.i64
%901 = llvm.mlir.constant(1 : index) : !llvm.i64
%902 = llvm.mul %805, %901 : !llvm.i64
%903 = llvm.add %900, %902 : !llvm.i64
%904 = llvm.getelementptr %890[%903] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
%905 = llvm.load %904 : !llvm<"float*">
%906 = llvm.fcmp "ogt" %905, %889 : !llvm.float
%907 = llvm.select %906, %905, %889 : !llvm.i1, !llvm.float
%908 = llvm.extractvalue %233[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%909 = llvm.mlir.constant(0 : index) : !llvm.i64
%910 = llvm.mlir.constant(1568 : index) : !llvm.i64
%911 = llvm.mul %790, %910 : !llvm.i64
%912 = llvm.add %909, %911 : !llvm.i64
%913 = llvm.mlir.constant(196 : index) : !llvm.i64
%914 = llvm.mul %795, %913 : !llvm.i64
%915 = llvm.add %912, %914 : !llvm.i64
%916 = llvm.mlir.constant(14 : index) : !llvm.i64
%917 = llvm.mul %800, %916 : !llvm.i64
%918 = llvm.add %915, %917 : !llvm.i64
%919 = llvm.mlir.constant(1 : index) : !llvm.i64
%920 = llvm.mul %805, %919 : !llvm.i64
%921 = llvm.add %918, %920 : !llvm.i64
%922 = llvm.getelementptr %908[%921] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
llvm.store %907, %922 : !llvm<"float*">
%923 = llvm.add %870, %869 : !llvm.i64
llvm.br ^bb80(%923 : !llvm.i64)
^bb82: // pred: ^bb80
%924 = llvm.add %850, %849 : !llvm.i64
llvm.br ^bb78(%924 : !llvm.i64)
^bb83: // pred: ^bb78
%925 = llvm.add %805, %804 : !llvm.i64
llvm.br ^bb76(%925 : !llvm.i64)
^bb84: // pred: ^bb76
%926 = llvm.add %800, %799 : !llvm.i64
llvm.br ^bb74(%926 : !llvm.i64)
^bb85: // pred: ^bb74
%927 = llvm.add %795, %794 : !llvm.i64
llvm.br ^bb72(%927 : !llvm.i64)
^bb86: // pred: ^bb72
%928 = llvm.add %790, %789 : !llvm.i64
llvm.br ^bb70(%928 : !llvm.i64)
^bb87: // pred: ^bb70
%929 = llvm.mlir.constant(1 : i64) : !llvm.i64
%930 = llvm.mlir.addressof @packedConst : !llvm<"i8**">
%931 = llvm.load %930 : !llvm<"i8**">
%932 = llvm.mlir.constant(11040 : i64) : !llvm.i64
%933 = llvm.getelementptr %931[%932] : (!llvm<"i8*">, !llvm.i64) -> !llvm<"i8*">
%934 = llvm.bitcast %933 : !llvm<"i8*"> to !llvm<"float*">
%935 = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%936 = llvm.insertvalue %934, %935[0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%937 = llvm.insertvalue %934, %936[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%938 = llvm.mlir.constant(0 : index) : !llvm.i64
%939 = llvm.insertvalue %938, %937[2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%940 = llvm.mlir.constant(16 : index) : !llvm.i64
%941 = llvm.insertvalue %940, %939[3, 0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%942 = llvm.mlir.constant(200 : index) : !llvm.i64
%943 = llvm.insertvalue %942, %941[4, 0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%944 = llvm.mlir.constant(8 : index) : !llvm.i64
%945 = llvm.insertvalue %944, %943[3, 1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%946 = llvm.mlir.constant(25 : index) : !llvm.i64
%947 = llvm.insertvalue %946, %945[4, 1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%948 = llvm.mlir.constant(5 : index) : !llvm.i64
%949 = llvm.insertvalue %948, %947[3, 2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%950 = llvm.mlir.constant(5 : index) : !llvm.i64
%951 = llvm.insertvalue %950, %949[4, 2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%952 = llvm.mlir.constant(5 : index) : !llvm.i64
%953 = llvm.insertvalue %952, %951[3, 3] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%954 = llvm.mlir.constant(1 : index) : !llvm.i64
%955 = llvm.insertvalue %954, %953[4, 3] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%956 = llvm.mlir.constant(0 : index) : !llvm.i64
%957 = llvm.mlir.constant(1 : index) : !llvm.i64
%958 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb88(%956 : !llvm.i64)
^bb88(%959: !llvm.i64): // 2 preds: ^bb87, ^bb98
%960 = llvm.icmp "slt" %959, %957 : !llvm.i64
llvm.cond_br %960, ^bb89, ^bb99
^bb89: // pred: ^bb88
%961 = llvm.mlir.constant(0 : index) : !llvm.i64
%962 = llvm.mlir.constant(8 : index) : !llvm.i64
%963 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb90(%961 : !llvm.i64)
^bb90(%964: !llvm.i64): // 2 preds: ^bb89, ^bb97
%965 = llvm.icmp "slt" %964, %962 : !llvm.i64
llvm.cond_br %965, ^bb91, ^bb98
^bb91: // pred: ^bb90
%966 = llvm.mlir.constant(0 : index) : !llvm.i64
%967 = llvm.mlir.constant(18 : index) : !llvm.i64
%968 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb92(%966 : !llvm.i64)
^bb92(%969: !llvm.i64): // 2 preds: ^bb91, ^bb96
%970 = llvm.icmp "slt" %969, %967 : !llvm.i64
llvm.cond_br %970, ^bb93, ^bb97
^bb93: // pred: ^bb92
%971 = llvm.mlir.constant(0 : index) : !llvm.i64
%972 = llvm.mlir.constant(18 : index) : !llvm.i64
%973 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb94(%971 : !llvm.i64)
^bb94(%974: !llvm.i64): // 2 preds: ^bb93, ^bb95
%975 = llvm.icmp "slt" %974, %972 : !llvm.i64
llvm.cond_br %975, ^bb95, ^bb96
^bb95: // pred: ^bb94
%976 = llvm.extractvalue %209[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%977 = llvm.mlir.constant(0 : index) : !llvm.i64
%978 = llvm.mlir.constant(2592 : index) : !llvm.i64
%979 = llvm.mul %959, %978 : !llvm.i64
%980 = llvm.add %977, %979 : !llvm.i64
%981 = llvm.mlir.constant(324 : index) : !llvm.i64
%982 = llvm.mul %964, %981 : !llvm.i64
%983 = llvm.add %980, %982 : !llvm.i64
%984 = llvm.mlir.constant(18 : index) : !llvm.i64
%985 = llvm.mul %969, %984 : !llvm.i64
%986 = llvm.add %983, %985 : !llvm.i64
%987 = llvm.mlir.constant(1 : index) : !llvm.i64
%988 = llvm.mul %974, %987 : !llvm.i64
%989 = llvm.add %986, %988 : !llvm.i64
%990 = llvm.getelementptr %976[%989] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
llvm.store %23, %990 : !llvm<"float*">
%991 = llvm.add %974, %973 : !llvm.i64
llvm.br ^bb94(%991 : !llvm.i64)
^bb96: // pred: ^bb94
%992 = llvm.add %969, %968 : !llvm.i64
llvm.br ^bb92(%992 : !llvm.i64)
^bb97: // pred: ^bb92
%993 = llvm.add %964, %963 : !llvm.i64
llvm.br ^bb90(%993 : !llvm.i64)
^bb98: // pred: ^bb90
%994 = llvm.add %959, %958 : !llvm.i64
llvm.br ^bb88(%994 : !llvm.i64)
^bb99: // pred: ^bb88
%995 = llvm.mlir.constant(0 : index) : !llvm.i64
%996 = llvm.mlir.constant(1 : index) : !llvm.i64
%997 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb100(%995 : !llvm.i64)
^bb100(%998: !llvm.i64): // 2 preds: ^bb99, ^bb110
%999 = llvm.icmp "slt" %998, %996 : !llvm.i64
llvm.cond_br %999, ^bb101, ^bb111
^bb101: // pred: ^bb100
%1000 = llvm.mlir.constant(0 : index) : !llvm.i64
%1001 = llvm.mlir.constant(8 : index) : !llvm.i64
%1002 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb102(%1000 : !llvm.i64)
^bb102(%1003: !llvm.i64): // 2 preds: ^bb101, ^bb109
%1004 = llvm.icmp "slt" %1003, %1001 : !llvm.i64
llvm.cond_br %1004, ^bb103, ^bb110
^bb103: // pred: ^bb102
%1005 = llvm.mlir.constant(0 : index) : !llvm.i64
%1006 = llvm.mlir.constant(14 : index) : !llvm.i64
%1007 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb104(%1005 : !llvm.i64)
^bb104(%1008: !llvm.i64): // 2 preds: ^bb103, ^bb108
%1009 = llvm.icmp "slt" %1008, %1006 : !llvm.i64
llvm.cond_br %1009, ^bb105, ^bb109
^bb105: // pred: ^bb104
%1010 = llvm.mlir.constant(0 : index) : !llvm.i64
%1011 = llvm.mlir.constant(14 : index) : !llvm.i64
%1012 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb106(%1010 : !llvm.i64)
^bb106(%1013: !llvm.i64): // 2 preds: ^bb105, ^bb107
%1014 = llvm.icmp "slt" %1013, %1011 : !llvm.i64
llvm.cond_br %1014, ^bb107, ^bb108
^bb107: // pred: ^bb106
%1015 = llvm.extractvalue %233[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%1016 = llvm.mlir.constant(0 : index) : !llvm.i64
%1017 = llvm.mlir.constant(1568 : index) : !llvm.i64
%1018 = llvm.mul %998, %1017 : !llvm.i64
%1019 = llvm.add %1016, %1018 : !llvm.i64
%1020 = llvm.mlir.constant(196 : index) : !llvm.i64
%1021 = llvm.mul %1003, %1020 : !llvm.i64
%1022 = llvm.add %1019, %1021 : !llvm.i64
%1023 = llvm.mlir.constant(14 : index) : !llvm.i64
%1024 = llvm.mul %1008, %1023 : !llvm.i64
%1025 = llvm.add %1022, %1024 : !llvm.i64
%1026 = llvm.mlir.constant(1 : index) : !llvm.i64
%1027 = llvm.mul %1013, %1026 : !llvm.i64
%1028 = llvm.add %1025, %1027 : !llvm.i64
%1029 = llvm.getelementptr %1015[%1028] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
%1030 = llvm.load %1029 : !llvm<"float*">
%1031 = llvm.mlir.constant(2 : index) : !llvm.i64
%1032 = llvm.add %1008, %1031 : !llvm.i64
%1033 = llvm.mlir.constant(2 : index) : !llvm.i64
%1034 = llvm.add %1013, %1033 : !llvm.i64
%1035 = llvm.extractvalue %209[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%1036 = llvm.mlir.constant(0 : index) : !llvm.i64
%1037 = llvm.mlir.constant(2592 : index) : !llvm.i64
%1038 = llvm.mul %998, %1037 : !llvm.i64
%1039 = llvm.add %1036, %1038 : !llvm.i64
%1040 = llvm.mlir.constant(324 : index) : !llvm.i64
%1041 = llvm.mul %1003, %1040 : !llvm.i64
%1042 = llvm.add %1039, %1041 : !llvm.i64
%1043 = llvm.mlir.constant(18 : index) : !llvm.i64
%1044 = llvm.mul %1032, %1043 : !llvm.i64
%1045 = llvm.add %1042, %1044 : !llvm.i64
%1046 = llvm.mlir.constant(1 : index) : !llvm.i64
%1047 = llvm.mul %1034, %1046 : !llvm.i64
%1048 = llvm.add %1045, %1047 : !llvm.i64
%1049 = llvm.getelementptr %1035[%1048] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
llvm.store %1030, %1049 : !llvm<"float*">
%1050 = llvm.add %1013, %1012 : !llvm.i64
llvm.br ^bb106(%1050 : !llvm.i64)
^bb108: // pred: ^bb106
%1051 = llvm.add %1008, %1007 : !llvm.i64
llvm.br ^bb104(%1051 : !llvm.i64)
^bb109: // pred: ^bb104
%1052 = llvm.add %1003, %1002 : !llvm.i64
llvm.br ^bb102(%1052 : !llvm.i64)
^bb110: // pred: ^bb102
%1053 = llvm.add %998, %997 : !llvm.i64
llvm.br ^bb100(%1053 : !llvm.i64)
^bb111: // pred: ^bb100
%1054 = llvm.mlir.constant(0 : index) : !llvm.i64
%1055 = llvm.mlir.constant(1 : index) : !llvm.i64
%1056 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb112(%1054 : !llvm.i64)
^bb112(%1057: !llvm.i64): // 2 preds: ^bb111, ^bb131
%1058 = llvm.icmp "slt" %1057, %1055 : !llvm.i64
llvm.cond_br %1058, ^bb113, ^bb132
^bb113: // pred: ^bb112
%1059 = llvm.mlir.constant(0 : index) : !llvm.i64
%1060 = llvm.mlir.constant(16 : index) : !llvm.i64
%1061 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb114(%1059 : !llvm.i64)
^bb114(%1062: !llvm.i64): // 2 preds: ^bb113, ^bb130
%1063 = llvm.icmp "slt" %1062, %1060 : !llvm.i64
llvm.cond_br %1063, ^bb115, ^bb131
^bb115: // pred: ^bb114
%1064 = llvm.mlir.constant(0 : index) : !llvm.i64
%1065 = llvm.mlir.constant(14 : index) : !llvm.i64
%1066 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb116(%1064 : !llvm.i64)
^bb116(%1067: !llvm.i64): // 2 preds: ^bb115, ^bb129
%1068 = llvm.icmp "slt" %1067, %1065 : !llvm.i64
llvm.cond_br %1068, ^bb117, ^bb130
^bb117: // pred: ^bb116
%1069 = llvm.mlir.constant(0 : index) : !llvm.i64
%1070 = llvm.mlir.constant(14 : index) : !llvm.i64
%1071 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb118(%1069 : !llvm.i64)
^bb118(%1072: !llvm.i64): // 2 preds: ^bb117, ^bb128
%1073 = llvm.icmp "slt" %1072, %1070 : !llvm.i64
llvm.cond_br %1073, ^bb119, ^bb129
^bb119: // pred: ^bb118
%1074 = llvm.extractvalue %185[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%1075 = llvm.mlir.constant(0 : index) : !llvm.i64
%1076 = llvm.mlir.constant(3136 : index) : !llvm.i64
%1077 = llvm.mul %1057, %1076 : !llvm.i64
%1078 = llvm.add %1075, %1077 : !llvm.i64
%1079 = llvm.mlir.constant(196 : index) : !llvm.i64
%1080 = llvm.mul %1062, %1079 : !llvm.i64
%1081 = llvm.add %1078, %1080 : !llvm.i64
%1082 = llvm.mlir.constant(14 : index) : !llvm.i64
%1083 = llvm.mul %1067, %1082 : !llvm.i64
%1084 = llvm.add %1081, %1083 : !llvm.i64
%1085 = llvm.mlir.constant(1 : index) : !llvm.i64
%1086 = llvm.mul %1072, %1085 : !llvm.i64
%1087 = llvm.add %1084, %1086 : !llvm.i64
%1088 = llvm.getelementptr %1074[%1087] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
llvm.store %23, %1088 : !llvm<"float*">
%1089 = llvm.mlir.constant(0 : index) : !llvm.i64
%1090 = llvm.mlir.constant(8 : index) : !llvm.i64
%1091 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb120(%1089 : !llvm.i64)
^bb120(%1092: !llvm.i64): // 2 preds: ^bb119, ^bb127
%1093 = llvm.icmp "slt" %1092, %1090 : !llvm.i64
llvm.cond_br %1093, ^bb121, ^bb128
^bb121: // pred: ^bb120
%1094 = llvm.mlir.constant(0 : index) : !llvm.i64
%1095 = llvm.mlir.constant(5 : index) : !llvm.i64
%1096 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb122(%1094 : !llvm.i64)
^bb122(%1097: !llvm.i64): // 2 preds: ^bb121, ^bb126
%1098 = llvm.icmp "slt" %1097, %1095 : !llvm.i64
llvm.cond_br %1098, ^bb123, ^bb127
^bb123: // pred: ^bb122
%1099 = llvm.mlir.constant(0 : index) : !llvm.i64
%1100 = llvm.mlir.constant(5 : index) : !llvm.i64
%1101 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb124(%1099 : !llvm.i64)
^bb124(%1102: !llvm.i64): // 2 preds: ^bb123, ^bb125
%1103 = llvm.icmp "slt" %1102, %1100 : !llvm.i64
llvm.cond_br %1103, ^bb125, ^bb126
^bb125: // pred: ^bb124
%1104 = llvm.add %1067, %1097 : !llvm.i64
%1105 = llvm.add %1072, %1102 : !llvm.i64
%1106 = llvm.extractvalue %209[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%1107 = llvm.mlir.constant(0 : index) : !llvm.i64
%1108 = llvm.mlir.constant(2592 : index) : !llvm.i64
%1109 = llvm.mul %1057, %1108 : !llvm.i64
%1110 = llvm.add %1107, %1109 : !llvm.i64
%1111 = llvm.mlir.constant(324 : index) : !llvm.i64
%1112 = llvm.mul %1092, %1111 : !llvm.i64
%1113 = llvm.add %1110, %1112 : !llvm.i64
%1114 = llvm.mlir.constant(18 : index) : !llvm.i64
%1115 = llvm.mul %1104, %1114 : !llvm.i64
%1116 = llvm.add %1113, %1115 : !llvm.i64
%1117 = llvm.mlir.constant(1 : index) : !llvm.i64
%1118 = llvm.mul %1105, %1117 : !llvm.i64
%1119 = llvm.add %1116, %1118 : !llvm.i64
%1120 = llvm.getelementptr %1106[%1119] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
%1121 = llvm.load %1120 : !llvm<"float*">
%1122 = llvm.extractvalue %955[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%1123 = llvm.mlir.constant(0 : index) : !llvm.i64
%1124 = llvm.mlir.constant(200 : index) : !llvm.i64
%1125 = llvm.mul %1062, %1124 : !llvm.i64
%1126 = llvm.add %1123, %1125 : !llvm.i64
%1127 = llvm.mlir.constant(25 : index) : !llvm.i64
%1128 = llvm.mul %1092, %1127 : !llvm.i64
%1129 = llvm.add %1126, %1128 : !llvm.i64
%1130 = llvm.mlir.constant(5 : index) : !llvm.i64
%1131 = llvm.mul %1097, %1130 : !llvm.i64
%1132 = llvm.add %1129, %1131 : !llvm.i64
%1133 = llvm.mlir.constant(1 : index) : !llvm.i64
%1134 = llvm.mul %1102, %1133 : !llvm.i64
%1135 = llvm.add %1132, %1134 : !llvm.i64
%1136 = llvm.getelementptr %1122[%1135] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
%1137 = llvm.load %1136 : !llvm<"float*">
%1138 = llvm.extractvalue %185[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%1139 = llvm.mlir.constant(0 : index) : !llvm.i64
%1140 = llvm.mlir.constant(3136 : index) : !llvm.i64
%1141 = llvm.mul %1057, %1140 : !llvm.i64
%1142 = llvm.add %1139, %1141 : !llvm.i64
%1143 = llvm.mlir.constant(196 : index) : !llvm.i64
%1144 = llvm.mul %1062, %1143 : !llvm.i64
%1145 = llvm.add %1142, %1144 : !llvm.i64
%1146 = llvm.mlir.constant(14 : index) : !llvm.i64
%1147 = llvm.mul %1067, %1146 : !llvm.i64
%1148 = llvm.add %1145, %1147 : !llvm.i64
%1149 = llvm.mlir.constant(1 : index) : !llvm.i64
%1150 = llvm.mul %1072, %1149 : !llvm.i64
%1151 = llvm.add %1148, %1150 : !llvm.i64
%1152 = llvm.getelementptr %1138[%1151] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
%1153 = llvm.load %1152 : !llvm<"float*">
%1154 = llvm.fmul %1121, %1137 : !llvm.float
%1155 = llvm.fadd %1153, %1154 : !llvm.float
%1156 = llvm.extractvalue %185[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%1157 = llvm.mlir.constant(0 : index) : !llvm.i64
%1158 = llvm.mlir.constant(3136 : index) : !llvm.i64
%1159 = llvm.mul %1057, %1158 : !llvm.i64
%1160 = llvm.add %1157, %1159 : !llvm.i64
%1161 = llvm.mlir.constant(196 : index) : !llvm.i64
%1162 = llvm.mul %1062, %1161 : !llvm.i64
%1163 = llvm.add %1160, %1162 : !llvm.i64
%1164 = llvm.mlir.constant(14 : index) : !llvm.i64
%1165 = llvm.mul %1067, %1164 : !llvm.i64
%1166 = llvm.add %1163, %1165 : !llvm.i64
%1167 = llvm.mlir.constant(1 : index) : !llvm.i64
%1168 = llvm.mul %1072, %1167 : !llvm.i64
%1169 = llvm.add %1166, %1168 : !llvm.i64
%1170 = llvm.getelementptr %1156[%1169] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
llvm.store %1155, %1170 : !llvm<"float*">
%1171 = llvm.add %1102, %1101 : !llvm.i64
llvm.br ^bb124(%1171 : !llvm.i64)
^bb126: // pred: ^bb124
%1172 = llvm.add %1097, %1096 : !llvm.i64
llvm.br ^bb122(%1172 : !llvm.i64)
^bb127: // pred: ^bb122
%1173 = llvm.add %1092, %1091 : !llvm.i64
llvm.br ^bb120(%1173 : !llvm.i64)
^bb128: // pred: ^bb120
%1174 = llvm.add %1072, %1071 : !llvm.i64
llvm.br ^bb118(%1174 : !llvm.i64)
^bb129: // pred: ^bb118
%1175 = llvm.add %1067, %1066 : !llvm.i64
llvm.br ^bb116(%1175 : !llvm.i64)
^bb130: // pred: ^bb116
%1176 = llvm.add %1062, %1061 : !llvm.i64
llvm.br ^bb114(%1176 : !llvm.i64)
^bb131: // pred: ^bb114
%1177 = llvm.add %1057, %1056 : !llvm.i64
llvm.br ^bb112(%1177 : !llvm.i64)
^bb132: // pred: ^bb112
%1178 = llvm.mlir.constant(1 : i64) : !llvm.i64
%1179 = llvm.alloca %1178 x !llvm<"[16 x [1 x [1 x float]]]"> : (!llvm.i64) -> !llvm<"[16 x [1 x [1 x float]]]*">
%1180 = llvm.bitcast %1179 : !llvm<"[16 x [1 x [1 x float]]]*"> to !llvm<"i8*">
%1181 = llvm.mlir.addressof @constant_4 : !llvm<"[16 x [1 x [1 x float]]]*">
%1182 = llvm.bitcast %1181 : !llvm<"[16 x [1 x [1 x float]]]*"> to !llvm<"i8*">
%1183 = llvm.mlir.constant(4 : i64) : !llvm.i64
%1184 = llvm.mlir.constant(16 : i64) : !llvm.i64
%1185 = llvm.mul %1183, %1184 : !llvm.i64
%1186 = llvm.sext %1185 : !llvm.i64 to !llvm.i64
%1187 = llvm.mlir.constant(false) : !llvm.i1
%1188 = llvm.call @llvm.memcpy.p0i8.p0i8.i64(%1180, %1182, %1186, %1187) : (!llvm<"i8*">, !llvm<"i8*">, !llvm.i64, !llvm.i1) -> !llvm.void
%1189 = llvm.bitcast %1179 : !llvm<"[16 x [1 x [1 x float]]]*"> to !llvm<"float*">
%1190 = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }">
%1191 = llvm.insertvalue %1189, %1190[0] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }">
%1192 = llvm.insertvalue %1189, %1191[1] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }">
%1193 = llvm.mlir.constant(0 : index) : !llvm.i64
%1194 = llvm.insertvalue %1193, %1192[2] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }">
%1195 = llvm.mlir.constant(16 : index) : !llvm.i64
%1196 = llvm.insertvalue %1195, %1194[3, 0] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }">
%1197 = llvm.mlir.constant(1 : index) : !llvm.i64
%1198 = llvm.insertvalue %1197, %1196[4, 0] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }">
%1199 = llvm.mlir.constant(1 : index) : !llvm.i64
%1200 = llvm.insertvalue %1199, %1198[3, 1] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }">
%1201 = llvm.mlir.constant(1 : index) : !llvm.i64
%1202 = llvm.insertvalue %1201, %1200[4, 1] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }">
%1203 = llvm.mlir.constant(1 : index) : !llvm.i64
%1204 = llvm.insertvalue %1203, %1202[3, 2] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }">
%1205 = llvm.mlir.constant(1 : index) : !llvm.i64
%1206 = llvm.insertvalue %1205, %1204[4, 2] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }">
%1207 = llvm.mlir.constant(0 : index) : !llvm.i64
%1208 = llvm.mlir.constant(1 : index) : !llvm.i64
%1209 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb133(%1207 : !llvm.i64)
^bb133(%1210: !llvm.i64): // 2 preds: ^bb132, ^bb143
%1211 = llvm.icmp "slt" %1210, %1208 : !llvm.i64
llvm.cond_br %1211, ^bb134, ^bb144
^bb134: // pred: ^bb133
%1212 = llvm.mlir.constant(0 : index) : !llvm.i64
%1213 = llvm.mlir.constant(16 : index) : !llvm.i64
%1214 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb135(%1212 : !llvm.i64)
^bb135(%1215: !llvm.i64): // 2 preds: ^bb134, ^bb142
%1216 = llvm.icmp "slt" %1215, %1213 : !llvm.i64
llvm.cond_br %1216, ^bb136, ^bb143
^bb136: // pred: ^bb135
%1217 = llvm.mlir.constant(0 : index) : !llvm.i64
%1218 = llvm.mlir.constant(14 : index) : !llvm.i64
%1219 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb137(%1217 : !llvm.i64)
^bb137(%1220: !llvm.i64): // 2 preds: ^bb136, ^bb141
%1221 = llvm.icmp "slt" %1220, %1218 : !llvm.i64
llvm.cond_br %1221, ^bb138, ^bb142
^bb138: // pred: ^bb137
%1222 = llvm.mlir.constant(0 : index) : !llvm.i64
%1223 = llvm.mlir.constant(14 : index) : !llvm.i64
%1224 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb139(%1222 : !llvm.i64)
^bb139(%1225: !llvm.i64): // 2 preds: ^bb138, ^bb140
%1226 = llvm.icmp "slt" %1225, %1223 : !llvm.i64
llvm.cond_br %1226, ^bb140, ^bb141
^bb140: // pred: ^bb139
%1227 = llvm.mlir.constant(0 : index) : !llvm.i64
%1228 = llvm.extractvalue %185[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%1229 = llvm.mlir.constant(0 : index) : !llvm.i64
%1230 = llvm.mlir.constant(3136 : index) : !llvm.i64
%1231 = llvm.mul %1227, %1230 : !llvm.i64
%1232 = llvm.add %1229, %1231 : !llvm.i64
%1233 = llvm.mlir.constant(196 : index) : !llvm.i64
%1234 = llvm.mul %1215, %1233 : !llvm.i64
%1235 = llvm.add %1232, %1234 : !llvm.i64
%1236 = llvm.mlir.constant(14 : index) : !llvm.i64
%1237 = llvm.mul %1220, %1236 : !llvm.i64
%1238 = llvm.add %1235, %1237 : !llvm.i64
%1239 = llvm.mlir.constant(1 : index) : !llvm.i64
%1240 = llvm.mul %1225, %1239 : !llvm.i64
%1241 = llvm.add %1238, %1240 : !llvm.i64
%1242 = llvm.getelementptr %1228[%1241] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
%1243 = llvm.load %1242 : !llvm<"float*">
%1244 = llvm.mlir.constant(0 : index) : !llvm.i64
%1245 = llvm.mlir.constant(0 : index) : !llvm.i64
%1246 = llvm.extractvalue %1206[1] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }">
%1247 = llvm.mlir.constant(0 : index) : !llvm.i64
%1248 = llvm.mlir.constant(1 : index) : !llvm.i64
%1249 = llvm.mul %1215, %1248 : !llvm.i64
%1250 = llvm.add %1247, %1249 : !llvm.i64
%1251 = llvm.mlir.constant(1 : index) : !llvm.i64
%1252 = llvm.mul %1244, %1251 : !llvm.i64
%1253 = llvm.add %1250, %1252 : !llvm.i64
%1254 = llvm.mlir.constant(1 : index) : !llvm.i64
%1255 = llvm.mul %1245, %1254 : !llvm.i64
%1256 = llvm.add %1253, %1255 : !llvm.i64
%1257 = llvm.getelementptr %1246[%1256] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
%1258 = llvm.load %1257 : !llvm<"float*">
%1259 = llvm.fadd %1243, %1258 : !llvm.float
%1260 = llvm.extractvalue %161[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%1261 = llvm.mlir.constant(0 : index) : !llvm.i64
%1262 = llvm.mlir.constant(3136 : index) : !llvm.i64
%1263 = llvm.mul %1210, %1262 : !llvm.i64
%1264 = llvm.add %1261, %1263 : !llvm.i64
%1265 = llvm.mlir.constant(196 : index) : !llvm.i64
%1266 = llvm.mul %1215, %1265 : !llvm.i64
%1267 = llvm.add %1264, %1266 : !llvm.i64
%1268 = llvm.mlir.constant(14 : index) : !llvm.i64
%1269 = llvm.mul %1220, %1268 : !llvm.i64
%1270 = llvm.add %1267, %1269 : !llvm.i64
%1271 = llvm.mlir.constant(1 : index) : !llvm.i64
%1272 = llvm.mul %1225, %1271 : !llvm.i64
%1273 = llvm.add %1270, %1272 : !llvm.i64
%1274 = llvm.getelementptr %1260[%1273] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
llvm.store %1259, %1274 : !llvm<"float*">
%1275 = llvm.add %1225, %1224 : !llvm.i64
llvm.br ^bb139(%1275 : !llvm.i64)
^bb141: // pred: ^bb139
%1276 = llvm.add %1220, %1219 : !llvm.i64
llvm.br ^bb137(%1276 : !llvm.i64)
^bb142: // pred: ^bb137
%1277 = llvm.add %1215, %1214 : !llvm.i64
llvm.br ^bb135(%1277 : !llvm.i64)
^bb143: // pred: ^bb135
%1278 = llvm.add %1210, %1209 : !llvm.i64
llvm.br ^bb133(%1278 : !llvm.i64)
^bb144: // pred: ^bb133
%1279 = llvm.mlir.constant(0 : index) : !llvm.i64
%1280 = llvm.mlir.constant(1 : index) : !llvm.i64
%1281 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb145(%1279 : !llvm.i64)
^bb145(%1282: !llvm.i64): // 2 preds: ^bb144, ^bb155
%1283 = llvm.icmp "slt" %1282, %1280 : !llvm.i64
llvm.cond_br %1283, ^bb146, ^bb156
^bb146: // pred: ^bb145
%1284 = llvm.mlir.constant(0 : index) : !llvm.i64
%1285 = llvm.mlir.constant(16 : index) : !llvm.i64
%1286 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb147(%1284 : !llvm.i64)
^bb147(%1287: !llvm.i64): // 2 preds: ^bb146, ^bb154
%1288 = llvm.icmp "slt" %1287, %1285 : !llvm.i64
llvm.cond_br %1288, ^bb148, ^bb155
^bb148: // pred: ^bb147
%1289 = llvm.mlir.constant(0 : index) : !llvm.i64
%1290 = llvm.mlir.constant(14 : index) : !llvm.i64
%1291 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb149(%1289 : !llvm.i64)
^bb149(%1292: !llvm.i64): // 2 preds: ^bb148, ^bb153
%1293 = llvm.icmp "slt" %1292, %1290 : !llvm.i64
llvm.cond_br %1293, ^bb150, ^bb154
^bb150: // pred: ^bb149
%1294 = llvm.mlir.constant(0 : index) : !llvm.i64
%1295 = llvm.mlir.constant(14 : index) : !llvm.i64
%1296 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb151(%1294 : !llvm.i64)
^bb151(%1297: !llvm.i64): // 2 preds: ^bb150, ^bb152
%1298 = llvm.icmp "slt" %1297, %1295 : !llvm.i64
llvm.cond_br %1298, ^bb152, ^bb153
^bb152: // pred: ^bb151
%1299 = llvm.extractvalue %161[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%1300 = llvm.mlir.constant(0 : index) : !llvm.i64
%1301 = llvm.mlir.constant(3136 : index) : !llvm.i64
%1302 = llvm.mul %1282, %1301 : !llvm.i64
%1303 = llvm.add %1300, %1302 : !llvm.i64
%1304 = llvm.mlir.constant(196 : index) : !llvm.i64
%1305 = llvm.mul %1287, %1304 : !llvm.i64
%1306 = llvm.add %1303, %1305 : !llvm.i64
%1307 = llvm.mlir.constant(14 : index) : !llvm.i64
%1308 = llvm.mul %1292, %1307 : !llvm.i64
%1309 = llvm.add %1306, %1308 : !llvm.i64
%1310 = llvm.mlir.constant(1 : index) : !llvm.i64
%1311 = llvm.mul %1297, %1310 : !llvm.i64
%1312 = llvm.add %1309, %1311 : !llvm.i64
%1313 = llvm.getelementptr %1299[%1312] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
%1314 = llvm.load %1313 : !llvm<"float*">
%1315 = llvm.fcmp "olt" %1314, %23 : !llvm.float
%1316 = llvm.select %1315, %23, %1314 : !llvm.i1, !llvm.float
%1317 = llvm.extractvalue %137[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%1318 = llvm.mlir.constant(0 : index) : !llvm.i64
%1319 = llvm.mlir.constant(3136 : index) : !llvm.i64
%1320 = llvm.mul %1282, %1319 : !llvm.i64
%1321 = llvm.add %1318, %1320 : !llvm.i64
%1322 = llvm.mlir.constant(196 : index) : !llvm.i64
%1323 = llvm.mul %1287, %1322 : !llvm.i64
%1324 = llvm.add %1321, %1323 : !llvm.i64
%1325 = llvm.mlir.constant(14 : index) : !llvm.i64
%1326 = llvm.mul %1292, %1325 : !llvm.i64
%1327 = llvm.add %1324, %1326 : !llvm.i64
%1328 = llvm.mlir.constant(1 : index) : !llvm.i64
%1329 = llvm.mul %1297, %1328 : !llvm.i64
%1330 = llvm.add %1327, %1329 : !llvm.i64
%1331 = llvm.getelementptr %1317[%1330] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
llvm.store %1316, %1331 : !llvm<"float*">
%1332 = llvm.add %1297, %1296 : !llvm.i64
llvm.br ^bb151(%1332 : !llvm.i64)
^bb153: // pred: ^bb151
%1333 = llvm.add %1292, %1291 : !llvm.i64
llvm.br ^bb149(%1333 : !llvm.i64)
^bb154: // pred: ^bb149
%1334 = llvm.add %1287, %1286 : !llvm.i64
llvm.br ^bb147(%1334 : !llvm.i64)
^bb155: // pred: ^bb147
%1335 = llvm.add %1282, %1281 : !llvm.i64
llvm.br ^bb145(%1335 : !llvm.i64)
^bb156: // pred: ^bb145
%1336 = llvm.mlir.constant(0 : index) : !llvm.i64
%1337 = llvm.mlir.constant(1 : index) : !llvm.i64
%1338 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb157(%1336 : !llvm.i64)
^bb157(%1339: !llvm.i64): // 2 preds: ^bb156, ^bb173
%1340 = llvm.icmp "slt" %1339, %1337 : !llvm.i64
llvm.cond_br %1340, ^bb158, ^bb174
^bb158: // pred: ^bb157
%1341 = llvm.mlir.constant(0 : index) : !llvm.i64
%1342 = llvm.mlir.constant(16 : index) : !llvm.i64
%1343 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb159(%1341 : !llvm.i64)
^bb159(%1344: !llvm.i64): // 2 preds: ^bb158, ^bb172
%1345 = llvm.icmp "slt" %1344, %1342 : !llvm.i64
llvm.cond_br %1345, ^bb160, ^bb173
^bb160: // pred: ^bb159
%1346 = llvm.mlir.constant(0 : index) : !llvm.i64
%1347 = llvm.mlir.constant(4 : index) : !llvm.i64
%1348 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb161(%1346 : !llvm.i64)
^bb161(%1349: !llvm.i64): // 2 preds: ^bb160, ^bb171
%1350 = llvm.icmp "slt" %1349, %1347 : !llvm.i64
llvm.cond_br %1350, ^bb162, ^bb172
^bb162: // pred: ^bb161
%1351 = llvm.mlir.constant(0 : index) : !llvm.i64
%1352 = llvm.mlir.constant(4 : index) : !llvm.i64
%1353 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb163(%1351 : !llvm.i64)
^bb163(%1354: !llvm.i64): // 2 preds: ^bb162, ^bb170
%1355 = llvm.icmp "slt" %1354, %1352 : !llvm.i64
llvm.cond_br %1355, ^bb164, ^bb171
^bb164: // pred: ^bb163
%1356 = llvm.extractvalue %113[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%1357 = llvm.mlir.constant(0 : index) : !llvm.i64
%1358 = llvm.mlir.constant(256 : index) : !llvm.i64
%1359 = llvm.mul %1339, %1358 : !llvm.i64
%1360 = llvm.add %1357, %1359 : !llvm.i64
%1361 = llvm.mlir.constant(16 : index) : !llvm.i64
%1362 = llvm.mul %1344, %1361 : !llvm.i64
%1363 = llvm.add %1360, %1362 : !llvm.i64
%1364 = llvm.mlir.constant(4 : index) : !llvm.i64
%1365 = llvm.mul %1349, %1364 : !llvm.i64
%1366 = llvm.add %1363, %1365 : !llvm.i64
%1367 = llvm.mlir.constant(1 : index) : !llvm.i64
%1368 = llvm.mul %1354, %1367 : !llvm.i64
%1369 = llvm.add %1366, %1368 : !llvm.i64
%1370 = llvm.getelementptr %1356[%1369] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
llvm.store %17, %1370 : !llvm<"float*">
%1371 = llvm.mlir.constant(0 : index) : !llvm.i64
%1372 = llvm.mlir.constant(3 : index) : !llvm.i64
%1373 = llvm.mul %1349, %1372 : !llvm.i64
%1374 = llvm.icmp "sgt" %1371, %1373 : !llvm.i64
%1375 = llvm.select %1374, %1371, %1373 : !llvm.i1, !llvm.i64
%1376 = llvm.mlir.constant(0 : index) : !llvm.i64
%1377 = llvm.mlir.constant(3 : index) : !llvm.i64
%1378 = llvm.mul %1354, %1377 : !llvm.i64
%1379 = llvm.icmp "sgt" %1376, %1378 : !llvm.i64
%1380 = llvm.select %1379, %1376, %1378 : !llvm.i1, !llvm.i64
%1381 = llvm.mlir.constant(0 : index) : !llvm.i64
%1382 = llvm.mlir.constant(14 : index) : !llvm.i64
%1383 = llvm.mlir.constant(-3 : index) : !llvm.i64
%1384 = llvm.mul %1349, %1383 : !llvm.i64
%1385 = llvm.mlir.constant(14 : index) : !llvm.i64
%1386 = llvm.add %1384, %1385 : !llvm.i64
%1387 = llvm.mlir.constant(3 : index) : !llvm.i64
%1388 = llvm.mul %1349, %1387 : !llvm.i64
%1389 = llvm.mlir.constant(3 : index) : !llvm.i64
%1390 = llvm.add %1388, %1389 : !llvm.i64
%1391 = llvm.mlir.constant(3 : index) : !llvm.i64
%1392 = llvm.icmp "slt" %1382, %1386 : !llvm.i64
%1393 = llvm.select %1392, %1382, %1386 : !llvm.i1, !llvm.i64
%1394 = llvm.icmp "slt" %1393, %1390 : !llvm.i64
%1395 = llvm.select %1394, %1393, %1390 : !llvm.i1, !llvm.i64
%1396 = llvm.icmp "slt" %1395, %1391 : !llvm.i64
%1397 = llvm.select %1396, %1395, %1391 : !llvm.i1, !llvm.i64
%1398 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb165(%1381 : !llvm.i64)
^bb165(%1399: !llvm.i64): // 2 preds: ^bb164, ^bb169
%1400 = llvm.icmp "slt" %1399, %1397 : !llvm.i64
llvm.cond_br %1400, ^bb166, ^bb170
^bb166: // pred: ^bb165
%1401 = llvm.mlir.constant(0 : index) : !llvm.i64
%1402 = llvm.mlir.constant(14 : index) : !llvm.i64
%1403 = llvm.mlir.constant(-3 : index) : !llvm.i64
%1404 = llvm.mul %1354, %1403 : !llvm.i64
%1405 = llvm.mlir.constant(14 : index) : !llvm.i64
%1406 = llvm.add %1404, %1405 : !llvm.i64
%1407 = llvm.mlir.constant(3 : index) : !llvm.i64
%1408 = llvm.mul %1354, %1407 : !llvm.i64
%1409 = llvm.mlir.constant(3 : index) : !llvm.i64
%1410 = llvm.add %1408, %1409 : !llvm.i64
%1411 = llvm.mlir.constant(3 : index) : !llvm.i64
%1412 = llvm.icmp "slt" %1402, %1406 : !llvm.i64
%1413 = llvm.select %1412, %1402, %1406 : !llvm.i1, !llvm.i64
%1414 = llvm.icmp "slt" %1413, %1410 : !llvm.i64
%1415 = llvm.select %1414, %1413, %1410 : !llvm.i1, !llvm.i64
%1416 = llvm.icmp "slt" %1415, %1411 : !llvm.i64
%1417 = llvm.select %1416, %1415, %1411 : !llvm.i1, !llvm.i64
%1418 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb167(%1401 : !llvm.i64)
^bb167(%1419: !llvm.i64): // 2 preds: ^bb166, ^bb168
%1420 = llvm.icmp "slt" %1419, %1417 : !llvm.i64
llvm.cond_br %1420, ^bb168, ^bb169
^bb168: // pred: ^bb167
%1421 = llvm.add %1399, %1375 : !llvm.i64
%1422 = llvm.add %1419, %1380 : !llvm.i64
%1423 = llvm.extractvalue %137[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%1424 = llvm.mlir.constant(0 : index) : !llvm.i64
%1425 = llvm.mlir.constant(3136 : index) : !llvm.i64
%1426 = llvm.mul %1339, %1425 : !llvm.i64
%1427 = llvm.add %1424, %1426 : !llvm.i64
%1428 = llvm.mlir.constant(196 : index) : !llvm.i64
%1429 = llvm.mul %1344, %1428 : !llvm.i64
%1430 = llvm.add %1427, %1429 : !llvm.i64
%1431 = llvm.mlir.constant(14 : index) : !llvm.i64
%1432 = llvm.mul %1421, %1431 : !llvm.i64
%1433 = llvm.add %1430, %1432 : !llvm.i64
%1434 = llvm.mlir.constant(1 : index) : !llvm.i64
%1435 = llvm.mul %1422, %1434 : !llvm.i64
%1436 = llvm.add %1433, %1435 : !llvm.i64
%1437 = llvm.getelementptr %1423[%1436] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
%1438 = llvm.load %1437 : !llvm<"float*">
%1439 = llvm.extractvalue %113[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%1440 = llvm.mlir.constant(0 : index) : !llvm.i64
%1441 = llvm.mlir.constant(256 : index) : !llvm.i64
%1442 = llvm.mul %1339, %1441 : !llvm.i64
%1443 = llvm.add %1440, %1442 : !llvm.i64
%1444 = llvm.mlir.constant(16 : index) : !llvm.i64
%1445 = llvm.mul %1344, %1444 : !llvm.i64
%1446 = llvm.add %1443, %1445 : !llvm.i64
%1447 = llvm.mlir.constant(4 : index) : !llvm.i64
%1448 = llvm.mul %1349, %1447 : !llvm.i64
%1449 = llvm.add %1446, %1448 : !llvm.i64
%1450 = llvm.mlir.constant(1 : index) : !llvm.i64
%1451 = llvm.mul %1354, %1450 : !llvm.i64
%1452 = llvm.add %1449, %1451 : !llvm.i64
%1453 = llvm.getelementptr %1439[%1452] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
%1454 = llvm.load %1453 : !llvm<"float*">
%1455 = llvm.fcmp "ogt" %1454, %1438 : !llvm.float
%1456 = llvm.select %1455, %1454, %1438 : !llvm.i1, !llvm.float
%1457 = llvm.extractvalue %113[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%1458 = llvm.mlir.constant(0 : index) : !llvm.i64
%1459 = llvm.mlir.constant(256 : index) : !llvm.i64
%1460 = llvm.mul %1339, %1459 : !llvm.i64
%1461 = llvm.add %1458, %1460 : !llvm.i64
%1462 = llvm.mlir.constant(16 : index) : !llvm.i64
%1463 = llvm.mul %1344, %1462 : !llvm.i64
%1464 = llvm.add %1461, %1463 : !llvm.i64
%1465 = llvm.mlir.constant(4 : index) : !llvm.i64
%1466 = llvm.mul %1349, %1465 : !llvm.i64
%1467 = llvm.add %1464, %1466 : !llvm.i64
%1468 = llvm.mlir.constant(1 : index) : !llvm.i64
%1469 = llvm.mul %1354, %1468 : !llvm.i64
%1470 = llvm.add %1467, %1469 : !llvm.i64
%1471 = llvm.getelementptr %1457[%1470] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
llvm.store %1456, %1471 : !llvm<"float*">
%1472 = llvm.add %1419, %1418 : !llvm.i64
llvm.br ^bb167(%1472 : !llvm.i64)
^bb169: // pred: ^bb167
%1473 = llvm.add %1399, %1398 : !llvm.i64
llvm.br ^bb165(%1473 : !llvm.i64)
^bb170: // pred: ^bb165
%1474 = llvm.add %1354, %1353 : !llvm.i64
llvm.br ^bb163(%1474 : !llvm.i64)
^bb171: // pred: ^bb163
%1475 = llvm.add %1349, %1348 : !llvm.i64
llvm.br ^bb161(%1475 : !llvm.i64)
^bb172: // pred: ^bb161
%1476 = llvm.add %1344, %1343 : !llvm.i64
llvm.br ^bb159(%1476 : !llvm.i64)
^bb173: // pred: ^bb159
%1477 = llvm.add %1339, %1338 : !llvm.i64
llvm.br ^bb157(%1477 : !llvm.i64)
^bb174: // pred: ^bb157
%1478 = llvm.extractvalue %89[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%1479 = llvm.bitcast %1478 : !llvm<"float*"> to !llvm<"i8*">
%1480 = llvm.extractvalue %113[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%1481 = llvm.bitcast %1480 : !llvm<"float*"> to !llvm<"i8*">
%1482 = llvm.sext %21 : !llvm.i64 to !llvm.i64
%1483 = llvm.mlir.constant(false) : !llvm.i1
%1484 = llvm.call @llvm.memcpy.p0i8.p0i8.i64(%1479, %1481, %1482, %1483) : (!llvm<"i8*">, !llvm<"i8*">, !llvm.i64, !llvm.i1) -> !llvm.void
%1485 = llvm.mlir.constant(1 : i64) : !llvm.i64
%1486 = llvm.alloca %1485 x !llvm<"[1 x [10 x float]]"> : (!llvm.i64) -> !llvm<"[1 x [10 x float]]*">
%1487 = llvm.bitcast %1486 : !llvm<"[1 x [10 x float]]*"> to !llvm<"i8*">
%1488 = llvm.mlir.addressof @constant_5 : !llvm<"[1 x [10 x float]]*">
%1489 = llvm.bitcast %1488 : !llvm<"[1 x [10 x float]]*"> to !llvm<"i8*">
%1490 = llvm.mlir.constant(4 : i64) : !llvm.i64
%1491 = llvm.mlir.constant(10 : i64) : !llvm.i64
%1492 = llvm.mul %1490, %1491 : !llvm.i64
%1493 = llvm.sext %1492 : !llvm.i64 to !llvm.i64
%1494 = llvm.mlir.constant(false) : !llvm.i1
%1495 = llvm.call @llvm.memcpy.p0i8.p0i8.i64(%1487, %1489, %1493, %1494) : (!llvm<"i8*">, !llvm<"i8*">, !llvm.i64, !llvm.i1) -> !llvm.void
%1496 = llvm.bitcast %1486 : !llvm<"[1 x [10 x float]]*"> to !llvm<"float*">
%1497 = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%1498 = llvm.insertvalue %1496, %1497[0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%1499 = llvm.insertvalue %1496, %1498[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%1500 = llvm.mlir.constant(0 : index) : !llvm.i64
%1501 = llvm.insertvalue %1500, %1499[2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%1502 = llvm.mlir.constant(1 : index) : !llvm.i64
%1503 = llvm.insertvalue %1502, %1501[3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%1504 = llvm.mlir.constant(10 : index) : !llvm.i64
%1505 = llvm.insertvalue %1504, %1503[4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%1506 = llvm.mlir.constant(10 : index) : !llvm.i64
%1507 = llvm.insertvalue %1506, %1505[3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%1508 = llvm.mlir.constant(1 : index) : !llvm.i64
%1509 = llvm.insertvalue %1508, %1507[4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%1510 = llvm.mlir.constant(0 : index) : !llvm.i64
%1511 = llvm.mlir.constant(1 : index) : !llvm.i64
%1512 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb175(%1510 : !llvm.i64)
^bb175(%1513: !llvm.i64): // 2 preds: ^bb174, ^bb182
%1514 = llvm.icmp "slt" %1513, %1511 : !llvm.i64
llvm.cond_br %1514, ^bb176, ^bb183
^bb176: // pred: ^bb175
%1515 = llvm.mlir.constant(0 : index) : !llvm.i64
%1516 = llvm.mlir.constant(10 : index) : !llvm.i64
%1517 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb177(%1515 : !llvm.i64)
^bb177(%1518: !llvm.i64): // 2 preds: ^bb176, ^bb181
%1519 = llvm.icmp "slt" %1518, %1516 : !llvm.i64
llvm.cond_br %1519, ^bb178, ^bb182
^bb178: // pred: ^bb177
%1520 = llvm.extractvalue %57[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%1521 = llvm.mlir.constant(0 : index) : !llvm.i64
%1522 = llvm.mlir.constant(10 : index) : !llvm.i64
%1523 = llvm.mul %1513, %1522 : !llvm.i64
%1524 = llvm.add %1521, %1523 : !llvm.i64
%1525 = llvm.mlir.constant(1 : index) : !llvm.i64
%1526 = llvm.mul %1518, %1525 : !llvm.i64
%1527 = llvm.add %1524, %1526 : !llvm.i64
%1528 = llvm.getelementptr %1520[%1527] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
llvm.store %23, %1528 : !llvm<"float*">
%1529 = llvm.mlir.constant(0 : index) : !llvm.i64
%1530 = llvm.mlir.constant(256 : index) : !llvm.i64
%1531 = llvm.mlir.constant(1 : index) : !llvm.i64
llvm.br ^bb179(%1529 : !llvm.i64)
^bb179(%1532: !llvm.i64): // 2 preds: ^bb178, ^bb180
%1533 = llvm.icmp "slt" %1532, %1530 : !llvm.i64
llvm.cond_br %1533, ^bb180, ^bb181
^bb180: // pred: ^bb179
%1534 = llvm.extractvalue %89[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%1535 = llvm.mlir.constant(0 : index) : !llvm.i64
%1536 = llvm.mlir.constant(256 : index) : !llvm.i64
%1537 = llvm.mul %1513, %1536 : !llvm.i64
%1538 = llvm.add %1535, %1537 : !llvm.i64
%1539 = llvm.mlir.constant(1 : index) : !llvm.i64
%1540 = llvm.mul %1532, %1539 : !llvm.i64
%1541 = llvm.add %1538, %1540 : !llvm.i64
%1542 = llvm.getelementptr %1534[%1541] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
%1543 = llvm.load %1542 : !llvm<"float*">
%1544 = llvm.extractvalue %345[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%1545 = llvm.mlir.constant(0 : index) : !llvm.i64
%1546 = llvm.mlir.constant(10 : index) : !llvm.i64
%1547 = llvm.mul %1532, %1546 : !llvm.i64
%1548 = llvm.add %1545, %1547 : !llvm.i64
%1549 = llvm.mlir.constant(1 : index) : !llvm.i64
%1550 = llvm.mul %1518, %1549 : !llvm.i64
%1551 = llvm.add %1548, %1550 : !llvm.i64
%1552 = llvm.getelementptr %1544[%1551] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
%1553 = llvm.load %1552 : !llvm<"float*">
%1554 = llvm.extractvalue %57[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%1555 = llvm.mlir.constant(0 : index) : !llvm.i64
%1556 = llvm.mlir.constant(10 : index) : !llvm.i64
%1557 = llvm.mul %1513, %1556 : !llvm.i64
%1558 = llvm.add %1555, %1557 : !llvm.i64
%1559 = llvm.mlir.constant(1 : index) : !llvm.i64
%1560 = llvm.mul %1518, %1559 : !llvm.i64
%1561 = llvm.add %1558, %1560 : !llvm.i64
%1562 = llvm.getelementptr %1554[%1561] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
%1563 = llvm.load %1562 : !llvm<"float*">
%1564 = llvm.fmul %1543, %1553 : !llvm.float
%1565 = llvm.fadd %1563, %1564 : !llvm.float
%1566 = llvm.extractvalue %57[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%1567 = llvm.mlir.constant(0 : index) : !llvm.i64
%1568 = llvm.mlir.constant(10 : index) : !llvm.i64
%1569 = llvm.mul %1513, %1568 : !llvm.i64
%1570 = llvm.add %1567, %1569 : !llvm.i64
%1571 = llvm.mlir.constant(1 : index) : !llvm.i64
%1572 = llvm.mul %1518, %1571 : !llvm.i64
%1573 = llvm.add %1570, %1572 : !llvm.i64
%1574 = llvm.getelementptr %1566[%1573] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
llvm.store %1565, %1574 : !llvm<"float*">
%1575 = llvm.add %1532, %1531 : !llvm.i64
llvm.br ^bb179(%1575 : !llvm.i64)
^bb181: // pred: ^bb179
%1576 = llvm.extractvalue %57[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%1577 = llvm.mlir.constant(0 : index) : !llvm.i64
%1578 = llvm.mlir.constant(10 : index) : !llvm.i64
%1579 = llvm.mul %1513, %1578 : !llvm.i64
%1580 = llvm.add %1577, %1579 : !llvm.i64
%1581 = llvm.mlir.constant(1 : index) : !llvm.i64
%1582 = llvm.mul %1518, %1581 : !llvm.i64
%1583 = llvm.add %1580, %1582 : !llvm.i64
%1584 = llvm.getelementptr %1576[%1583] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
%1585 = llvm.load %1584 : !llvm<"float*">
%1586 = llvm.fmul %22, %1585 : !llvm.float
%1587 = llvm.mlir.constant(0 : index) : !llvm.i64
%1588 = llvm.extractvalue %1509[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%1589 = llvm.mlir.constant(0 : index) : !llvm.i64
%1590 = llvm.mlir.constant(10 : index) : !llvm.i64
%1591 = llvm.mul %1587, %1590 : !llvm.i64
%1592 = llvm.add %1589, %1591 : !llvm.i64
%1593 = llvm.mlir.constant(1 : index) : !llvm.i64
%1594 = llvm.mul %1518, %1593 : !llvm.i64
%1595 = llvm.add %1592, %1594 : !llvm.i64
%1596 = llvm.getelementptr %1588[%1595] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
%1597 = llvm.load %1596 : !llvm<"float*">
%1598 = llvm.fmul %22, %1597 : !llvm.float
%1599 = llvm.fadd %1586, %1598 : !llvm.float
%1600 = llvm.extractvalue %57[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%1601 = llvm.mlir.constant(0 : index) : !llvm.i64
%1602 = llvm.mlir.constant(10 : index) : !llvm.i64
%1603 = llvm.mul %1513, %1602 : !llvm.i64
%1604 = llvm.add %1601, %1603 : !llvm.i64
%1605 = llvm.mlir.constant(1 : index) : !llvm.i64
%1606 = llvm.mul %1518, %1605 : !llvm.i64
%1607 = llvm.add %1604, %1606 : !llvm.i64
%1608 = llvm.getelementptr %1600[%1607] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*">
llvm.store %1599, %1608 : !llvm<"float*">
%1609 = llvm.add %1518, %1517 : !llvm.i64
llvm.br ^bb177(%1609 : !llvm.i64)
^bb182: // pred: ^bb177
%1610 = llvm.add %1513, %1512 : !llvm.i64
llvm.br ^bb175(%1610 : !llvm.i64)
^bb183: // pred: ^bb175
%1611 = llvm.extractvalue %73[0] : !llvm<"{ i8*, i8*, i64, [1 x i64], [1 x i64] }">
%1612 = llvm.bitcast %1611 : !llvm<"i8*"> to !llvm<"i8*">
llvm.call @free(%1612) : (!llvm<"i8*">) -> ()
llvm.return %57 : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
}
llvm.func @_mlir_ciface_main_graph(%arg0: !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }*">) -> !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> {
%0 = llvm.load %arg0 : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }*">
%1 = llvm.extractvalue %0[0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%2 = llvm.extractvalue %0[1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%3 = llvm.extractvalue %0[2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%4 = llvm.extractvalue %0[3, 0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%5 = llvm.extractvalue %0[3, 1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%6 = llvm.extractvalue %0[3, 2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%7 = llvm.extractvalue %0[3, 3] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%8 = llvm.extractvalue %0[4, 0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%9 = llvm.extractvalue %0[4, 1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%10 = llvm.extractvalue %0[4, 2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%11 = llvm.extractvalue %0[4, 3] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%12 = llvm.call @main_graph(%1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11) : (!llvm<"float*">, !llvm<"float*">, !llvm.i64, !llvm.i64, !llvm.i64, !llvm.i64, !llvm.i64, !llvm.i64, !llvm.i64, !llvm.i64, !llvm.i64) -> !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
llvm.return %12 : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
}
llvm.func @_dyn_entry_point_main_graph(%arg0: !llvm<"i8*">) -> !llvm<"i8*"> {
%0 = llvm.mlir.constant(0 : i32) : !llvm.i32
%1 = llvm.call @getRtMemRef(%arg0, %0) : (!llvm<"i8*">, !llvm.i32) -> !llvm<"i8*">
%2 = llvm.mlir.constant(1 : i32) : !llvm.i32
%3 = llvm.alloca %2 x !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }"> : (!llvm.i32) -> !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }*">
%4 = llvm.load %3 : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }*">
%5 = llvm.call @getData(%1) : (!llvm<"i8*">) -> !llvm<"i8*">
%6 = llvm.bitcast %5 : !llvm<"i8*"> to !llvm<"float*">
%7 = llvm.insertvalue %6, %4[0 : i32] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }*">
%8 = llvm.insertvalue %6, %7[1 : i32] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%9 = llvm.mlir.constant(0 : i64) : !llvm.i64
%10 = llvm.insertvalue %9, %8[2 : i32] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%11 = llvm.call @getSizes(%1) : (!llvm<"i8*">) -> !llvm<"i64*">
%12 = llvm.call @getStrides(%1) : (!llvm<"i8*">) -> !llvm<"i64*">
%13 = llvm.mlir.constant(0 : i64) : !llvm.i64
%14 = llvm.getelementptr %11[%13] : (!llvm<"i64*">, !llvm.i64) -> !llvm<"i64*">
%15 = llvm.load %14 : !llvm<"i64*">
%16 = llvm.insertvalue %15, %10[3, 0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%17 = llvm.getelementptr %11[%13] : (!llvm<"i64*">, !llvm.i64) -> !llvm<"i64*">
%18 = llvm.load %17 : !llvm<"i64*">
%19 = llvm.insertvalue %18, %16[4, 0] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%20 = llvm.mlir.constant(1 : i64) : !llvm.i64
%21 = llvm.getelementptr %11[%20] : (!llvm<"i64*">, !llvm.i64) -> !llvm<"i64*">
%22 = llvm.load %21 : !llvm<"i64*">
%23 = llvm.insertvalue %22, %19[3, 1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%24 = llvm.getelementptr %11[%20] : (!llvm<"i64*">, !llvm.i64) -> !llvm<"i64*">
%25 = llvm.load %24 : !llvm<"i64*">
%26 = llvm.insertvalue %25, %23[4, 1] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%27 = llvm.mlir.constant(2 : i64) : !llvm.i64
%28 = llvm.getelementptr %11[%27] : (!llvm<"i64*">, !llvm.i64) -> !llvm<"i64*">
%29 = llvm.load %28 : !llvm<"i64*">
%30 = llvm.insertvalue %29, %26[3, 2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%31 = llvm.getelementptr %11[%27] : (!llvm<"i64*">, !llvm.i64) -> !llvm<"i64*">
%32 = llvm.load %31 : !llvm<"i64*">
%33 = llvm.insertvalue %32, %30[4, 2] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%34 = llvm.mlir.constant(3 : i64) : !llvm.i64
%35 = llvm.getelementptr %11[%34] : (!llvm<"i64*">, !llvm.i64) -> !llvm<"i64*">
%36 = llvm.load %35 : !llvm<"i64*">
%37 = llvm.insertvalue %36, %33[3, 3] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
%38 = llvm.getelementptr %11[%34] : (!llvm<"i64*">, !llvm.i64) -> !llvm<"i64*">
%39 = llvm.load %38 : !llvm<"i64*">
%40 = llvm.insertvalue %39, %37[4, 3] : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }">
llvm.store %40, %3 : !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }*">
%41 = llvm.call @_mlir_ciface_main_graph(%3) : (!llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }*">) -> !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%42 = llvm.call @createOrderedRtMemRefDict() : () -> !llvm<"i8*">
%43 = llvm.mlir.constant(2 : i32) : !llvm.i32
%44 = llvm.call @createRtMemRef(%43) : (!llvm.i32) -> !llvm<"i8*">
%45 = llvm.extractvalue %41[0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%46 = llvm.bitcast %45 : !llvm<"float*"> to !llvm<"i8*">
%47 = llvm.call @setData(%44, %46) : (!llvm<"i8*">, !llvm<"i8*">) -> !llvm.void
%48 = llvm.mlir.constant(1 : i32) : !llvm.i32
%49 = llvm.call @setDType(%44, %48) : (!llvm<"i8*">, !llvm.i32) -> !llvm.void
%50 = llvm.call @getSizes(%44) : (!llvm<"i8*">) -> !llvm<"i64*">
%51 = llvm.call @getStrides(%44) : (!llvm<"i8*">) -> !llvm<"i64*">
%52 = llvm.mlir.constant(0 : i64) : !llvm.i64
%53 = llvm.extractvalue %41[3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%54 = llvm.getelementptr %50[%52] : (!llvm<"i64*">, !llvm.i64) -> !llvm<"i64*">
llvm.store %53, %54 : !llvm<"i64*">
%55 = llvm.extractvalue %41[4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%56 = llvm.getelementptr %51[%52] : (!llvm<"i64*">, !llvm.i64) -> !llvm<"i64*">
llvm.store %55, %56 : !llvm<"i64*">
%57 = llvm.mlir.constant(1 : i64) : !llvm.i64
%58 = llvm.extractvalue %41[3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%59 = llvm.getelementptr %50[%57] : (!llvm<"i64*">, !llvm.i64) -> !llvm<"i64*">
llvm.store %58, %59 : !llvm<"i64*">
%60 = llvm.extractvalue %41[4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
%61 = llvm.getelementptr %51[%57] : (!llvm<"i64*">, !llvm.i64) -> !llvm<"i64*">
llvm.store %60, %61 : !llvm<"i64*">
%62 = llvm.mlir.constant(0 : i32) : !llvm.i32
%63 = llvm.call @setRtMemRef(%42, %62, %44) : (!llvm<"i8*">, !llvm.i32, !llvm<"i8*">) -> !llvm.void
llvm.return %42 : !llvm<"i8*">
}
}
コマンドのヘルプ
onnx-mlir
コマンドのヘルプ
$ onnx-mlir -h
OVERVIEW: ONNX MLIR modular optimizer driver
USAGE: onnx-mlir [options] <input file>
OPTIONS:
Generic Options:
--help - Display available options (--help-hidden for more)
--help-list - Display list of available options (--help-list-hidden for more)
--version - Display the version of this program
ONNX MLIR Options:
These are frontend options.
Choose target to emit:
--EmitONNXBasic - Ingest ONNX and emit the basic ONNX operations withoutinferred shapes.
--EmitONNXIR - Ingest ONNX and emit corresponding ONNX dialect.
--EmitMLIR - Lower model to MLIR built-in transformation dialect.
--EmitLLVMIR - Lower model to LLVM IR (LLVM dialect).
--EmitLib - Lower model to LLVM IR, emit (to file) LLVM bitcode for model, compile and link it to a shared library.
--EmitJNI - Lower model to LLMV IR -> LLVM bitcode -> JNI shared library -> jar
onnx-mlir-opt
コマンドのヘルプ
$ onnx-mlir-opt -h
OVERVIEW: ONNX MLIR modular optimizer driver
USAGE: onnx-mlir-opt [options] <input file>
OPTIONS:
Color Options:
--color - Use colors in output (default=autodetect)
General options:
--allow-unregistered-dialect - Allow operation with no registered dialects
--enable-name-compression - Enable name/filename string compression
--mlir-disable-threading - Disabling multi-threading within MLIR
--mlir-elide-elementsattrs-if-larger=<uint> - Elide ElementsAttrs with "..." that have more elements than the given upper limit
--mlir-pretty-debuginfo - Print pretty debug info in MLIR output
--mlir-print-debuginfo - Print debug info in MLIR output
--mlir-print-elementsattrs-with-hex-if-larger=<long> - Print DenseElementsAttrs with a hex string that have more elements than the given upper limit (use -1 to disable)
--mlir-print-op-on-diagnostic - When a diagnostic is emitted on an operation, also print the operation as an attached note
--mlir-print-stacktrace-on-diagnostic - When a diagnostic is emitted, also print the stack trace as an attached note
-o=<filename> - Output filename
Compiler passes to run
--pass-pipeline - A textual description of a pass pipeline to run
Passes:
--affine-data-copy-generate - Generate explicit copying for affine memory operations
--fast-mem-capacity=<ulong> - Set fast memory space capacity in KiB (default: unlimited)
--fast-mem-space=<uint> - Fast memory space identifier for copy generation (default: 1)
--generate-dma - Generate DMA instead of point-wise copy
--min-dma-transfer=<int> - Minimum DMA transfer size supported by the target in bytes
--skip-non-unit-stride-loops - Testing purposes: avoid non-unit stride loop choice depths for copy placement
--slow-mem-space=<uint> - Slow memory space identifier for copy generation (default: 0)
--tag-mem-space=<uint> - Tag memory space identifier for copy generation (default: 0)
--affine-loop-fusion - Fuse affine loop nests
--fusion-compute-tolerance=<number> - Fractional increase in additional computation tolerated while fusing
--fusion-fast-mem-space=<uint> - Faster memory space number to promote fusion buffers to
--fusion-local-buf-threshold=<ulong> - Threshold size (KiB) for promoting local buffers to fast memory space
--fusion-maximal - Enables maximal loop fusion
--affine-loop-invariant-code-motion - Hoist loop invariant instructions outside of affine loops
--affine-loop-tile - Tile affine loop nests
--cache-size=<ulong> - Set size of cache to tile for in KiB
--separate - Separate full and partial tiles
--tile-size=<uint> - Use this tile size for all loops
--tile-sizes=<uint> - List of tile sizes for each perfect nest (overridden by -tile-size)
--affine-loop-unroll - Unroll affine loops
--unroll-factor=<uint> - Use this unroll factor for all loops being unrolled
--unroll-full - Fully unroll loops
--unroll-full-threshold=<uint> - Unroll all loops with trip count less than or equal to this
--unroll-num-reps=<uint> - Unroll innermost loops repeatedly this many times
--affine-loop-unroll-jam - Unroll and jam affine loops
--unroll-jam-factor=<uint> - Use this unroll jam factor for all loops (default 4)
--affine-pipeline-data-transfer - Pipeline non-blocking data transfers between explicitly managed levels of the memory hierarchy
--affine-super-vectorize - Vectorize to a target independent n-D vector abstraction
--test-fastest-varying=<long> - Specify a 1-D, 2-D or 3-D pattern of fastest varying memory dimensions to match. See defaultPatterns in Vectorize.cpp for a description and examples. This is used for testing purposes
--virtual-vector-size=<long> - Specify an n-D virtual vector size for vectorization
--attribute-promotion - Promote constant operands to attributes.
--buffer-placement - Optimizes placement of alloc and dealloc operations
--bundle-memory-pools - Bundle memory pools of internal MemRefs into a single memory pool.
--canonicalize - Canonicalize operations
--constprop-onnx - ConstProp ONNX operations into composition of other ONNX operations.
--convert-krnl-to-affine - Lower Krnl dialect.
--convert-krnl-to-llvm - Lower the Krnl Affine and Std dialects to LLVM.
--convert-linalg-on-tensors-to-buffers - Convert the Linalg operations which work on tensor-type operands or results to use buffers instead
--convert-linalg-to-affine-loops - Lower the operations from the linalg dialect into affine loops
--convert-linalg-to-loops - Lower the operations from the linalg dialect into loops
--convert-linalg-to-parallel-loops - Lower the operations from the linalg dialect into parallel loops
--convert-onnx-to-krnl - Lower frontend ops to Krnl dialect.
--convert-scf-to-std - Lower SCF to Standard Dialect.
--convert-vector-to-llvm - Lower Vector Dialect to LLVM IR Dialect.
--reassociate-fp-reductions - Allows llvm to reassociate floating-point reductions for speed
--convert-vector-to-scf - Convert vector to SCF.
--full-unroll - Perform full unrolling when converting vector transfers to SCF
--cse - Eliminate common sub-expressions
--decompose-onnx - Decompose ONNX operations into composition of other ONNX operations.
--elide-constants - Elide values of constant operations.
--elide-krnl-constants - Elide the constant values of the Global Krnl operations.
--enable-memory-pool - Enable a memory pool for allocating internal MemRefs.
--for-loop-specialization - Specialize `for` loops for vectorization
--inline - Inline function calls
--disable-simplify - Disable running simplifications during inlining
--max-iterations=<uint> - Maximum number of iterations when inlining within an SCC
--linalg-fold-unit-extent-dims - Remove unit-extent dimension in Linalg ops on tensors
--fold-one-trip-loops-only - Only folds the one-trip loops from Linalg ops on tensors (for testing purposes only)
--linalg-fusion - Fuse operations in the linalg dialect
--linalg-fusion-for-tensor-ops - Fuse operations on RankedTensorType in linalg dialect
--linalg-promote-subviews - Promote subview ops to local buffers
--test-promote-dynamic - Test generation of dynamic promoted buffers
--linalg-tile - Tile operations in the linalg dialect
--linalg-tile-sizes=<long> - Test generation of dynamic promoted buffers
--linalg-tile-to-parallel-loops - Tile operations in the linalg dialect to parallel loops
--linalg-tile-sizes=<long> - Test generation of dynamic promoted buffers
--loop-coalescing - Coalesce nested loops with independent bounds into a single loop
--loop-invariant-code-motion - Hoist loop invariant instructions outside of the loop
--lower-affine - Lower Affine Dialect to Standard Dialect.
--memref-dataflow-opt - Perform store/load forwarding for memrefs
--pack-krnl-constants - Elide the constant values of the Global Krnl operations.
--elision-threshold=<long> - A threshold value specifying the maximum number of elements a constant operation can hold as an attribute. If the number exceeds this threshold, constants will be packed together and, in the case where `move-to-file` option is enabled, stored as a binary file on disk. This can help preserve readability of IR dump and improve compilation speed.
--filename=<string> - Specify a file in which the packed constant is to be stored.
--move-to-file - Whether to move the packed constant to a file.
--parallel-loop-collapsing - Collapse parallel loops to use less induction variables
--collapsed-indices-0=<uint> - Which loop indices to combine 0th loop index
--collapsed-indices-1=<uint> - Which loop indices to combine into the position 1 loop index
--collapsed-indices-2=<uint> - Which loop indices to combine into the position 2 loop index
--parallel-loop-fusion - Fuse adjacent parallel loops
--parallel-loop-specialization - Specialize parallel loops for vectorization
--parallel-loop-tiling - Tile parallel loops
--parallel-loop-tile-sizes=<long> - Factors to tile parallel loops by
--print-cfg-graph - Print CFG graph per-Region
--print-op-graph - Print op graph per-Region
--print-op-stats - Print statistics of operations
--sccp - Sparse Conditional Constant Propagation
--shape-inference - Shape inference for frontend dialects.
--simplify-affine-structures - Simplify affine expressions in maps/sets and normalize memrefs
--snapshot-op-locations - Generate new locations from the current IR
--filename=<string> - The filename to print the generated IR
--tag=<string> - A tag to use when fusing the new locations with the original. If unset, the locations are replaced.
--strip-debuginfo - Strip debug info from all operations
--symbol-dce - Eliminate dead symbols
--pass-pipeline-crash-reproducer=<string> - Generate a .mlir reproducer file at the given output path if the pass manager crashes or fails
--pass-pipeline-local-reproducer - When generating a crash reproducer, attempt to generated a reproducer with the smallest pipeline.
--pass-statistics - Display the statistics of each pass
--pass-statistics-display=<value> - Display method for pass statistics
=list - display the results in a merged list sorted by pass name
=pipeline - display the results with a nested pipeline view
--pass-timing - Display the execution times of each pass
--pass-timing-display=<value> - Display method for pass timing data
=list - display the results in a list sorted by total time
=pipeline - display the results with a nested pipeline view
--print-ir-after - Print IR after specified passes
--pass-pipeline - A textual description of a pass pipeline to run
Passes:
--affine-data-copy-generate - Generate explicit copying for affine memory operations
--fast-mem-capacity=<ulong> - Set fast memory space capacity in KiB (default: unlimited)
--fast-mem-space=<uint> - Fast memory space identifier for copy generation (default: 1)
--generate-dma - Generate DMA instead of point-wise copy
--min-dma-transfer=<int> - Minimum DMA transfer size supported by the target in bytes
--skip-non-unit-stride-loops - Testing purposes: avoid non-unit stride loop choice depths for copy placement
--slow-mem-space=<uint> - Slow memory space identifier for copy generation (default: 0)
--tag-mem-space=<uint> - Tag memory space identifier for copy generation (default: 0)
--affine-loop-fusion - Fuse affine loop nests
--fusion-compute-tolerance=<number> - Fractional increase in additional computation tolerated while fusing
--fusion-fast-mem-space=<uint> - Faster memory space number to promote fusion buffers to
--fusion-local-buf-threshold=<ulong> - Threshold size (KiB) for promoting local buffers to fast memory space
--fusion-maximal - Enables maximal loop fusion
--affine-loop-invariant-code-motion - Hoist loop invariant instructions outside of affine loops
--affine-loop-tile - Tile affine loop nests
--cache-size=<ulong> - Set size of cache to tile for in KiB
--separate - Separate full and partial tiles
--tile-size=<uint> - Use this tile size for all loops
--tile-sizes=<uint> - List of tile sizes for each perfect nest (overridden by -tile-size)
--affine-loop-unroll - Unroll affine loops
--unroll-factor=<uint> - Use this unroll factor for all loops being unrolled
--unroll-full - Fully unroll loops
--unroll-full-threshold=<uint> - Unroll all loops with trip count less than or equal to this
--unroll-num-reps=<uint> - Unroll innermost loops repeatedly this many times
--affine-loop-unroll-jam - Unroll and jam affine loops
--unroll-jam-factor=<uint> - Use this unroll jam factor for all loops (default 4)
--affine-pipeline-data-transfer - Pipeline non-blocking data transfers between explicitly managed levels of the memory hierarchy
--affine-super-vectorize - Vectorize to a target independent n-D vector abstraction
--test-fastest-varying=<long> - Specify a 1-D, 2-D or 3-D pattern of fastest varying memory dimensions to match. See defaultPatterns in Vectorize.cpp for a description and examples. This is used for testing purposes
--virtual-vector-size=<long> - Specify an n-D virtual vector size for vectorization
--attribute-promotion - Promote constant operands to attributes.
--buffer-placement - Optimizes placement of alloc and dealloc operations
--bundle-memory-pools - Bundle memory pools of internal MemRefs into a single memory pool.
--canonicalize - Canonicalize operations
--constprop-onnx - ConstProp ONNX operations into composition of other ONNX operations.
--convert-krnl-to-affine - Lower Krnl dialect.
--convert-krnl-to-llvm - Lower the Krnl Affine and Std dialects to LLVM.
--convert-linalg-on-tensors-to-buffers - Convert the Linalg operations which work on tensor-type operands or results to use buffers instead
--convert-linalg-to-affine-loops - Lower the operations from the linalg dialect into affine loops
--convert-linalg-to-loops - Lower the operations from the linalg dialect into loops
--convert-linalg-to-parallel-loops - Lower the operations from the linalg dialect into parallel loops
--convert-onnx-to-krnl - Lower frontend ops to Krnl dialect.
--convert-scf-to-std - Lower SCF to Standard Dialect.
--convert-vector-to-llvm - Lower Vector Dialect to LLVM IR Dialect.
--reassociate-fp-reductions - Allows llvm to reassociate floating-point reductions for speed
--convert-vector-to-scf - Convert vector to SCF.
--full-unroll - Perform full unrolling when converting vector transfers to SCF
--cse - Eliminate common sub-expressions
--decompose-onnx - Decompose ONNX operations into composition of other ONNX operations.
--elide-constants - Elide values of constant operations.
--elide-krnl-constants - Elide the constant values of the Global Krnl operations.
--enable-memory-pool - Enable a memory pool for allocating internal MemRefs.
--for-loop-specialization - Specialize `for` loops for vectorization
--inline - Inline function calls
--disable-simplify - Disable running simplifications during inlining
--max-iterations=<uint> - Maximum number of iterations when inlining within an SCC
--linalg-fold-unit-extent-dims - Remove unit-extent dimension in Linalg ops on tensors
--fold-one-trip-loops-only - Only folds the one-trip loops from Linalg ops on tensors (for testing purposes only)
--linalg-fusion - Fuse operations in the linalg dialect
--linalg-fusion-for-tensor-ops - Fuse operations on RankedTensorType in linalg dialect
--linalg-promote-subviews - Promote subview ops to local buffers
--test-promote-dynamic - Test generation of dynamic promoted buffers
--linalg-tile - Tile operations in the linalg dialect
--linalg-tile-sizes=<long> - Test generation of dynamic promoted buffers
--linalg-tile-to-parallel-loops - Tile operations in the linalg dialect to parallel loops
--linalg-tile-sizes=<long> - Test generation of dynamic promoted buffers
--loop-coalescing - Coalesce nested loops with independent bounds into a single loop
--loop-invariant-code-motion - Hoist loop invariant instructions outside of the loop
--lower-affine - Lower Affine Dialect to Standard Dialect.
--memref-dataflow-opt - Perform store/load forwarding for memrefs
--pack-krnl-constants - Elide the constant values of the Global Krnl operations.
--elision-threshold=<long> - A threshold value specifying the maximum number of elements a constant operation can hold as an attribute. If the number exceeds this threshold, constants will be packed together and, in the case where `move-to-file` option is enabled, stored as a binary file on disk. This can help preserve readability of IR dump and improve compilation speed.
--filename=<string> - Specify a file in which the packed constant is to be stored.
--move-to-file - Whether to move the packed constant to a file.
--parallel-loop-collapsing - Collapse parallel loops to use less induction variables
--collapsed-indices-0=<uint> - Which loop indices to combine 0th loop index
--collapsed-indices-1=<uint> - Which loop indices to combine into the position 1 loop index
--collapsed-indices-2=<uint> - Which loop indices to combine into the position 2 loop index
--parallel-loop-fusion - Fuse adjacent parallel loops
--parallel-loop-specialization - Specialize parallel loops for vectorization
--parallel-loop-tiling - Tile parallel loops
--parallel-loop-tile-sizes=<long> - Factors to tile parallel loops by
--print-cfg-graph - Print CFG graph per-Region
--print-op-graph - Print op graph per-Region
--print-op-stats - Print statistics of operations
--sccp - Sparse Conditional Constant Propagation
--shape-inference - Shape inference for frontend dialects.
--simplify-affine-structures - Simplify affine expressions in maps/sets and normalize memrefs
--snapshot-op-locations - Generate new locations from the current IR
--filename=<string> - The filename to print the generated IR
--tag=<string> - A tag to use when fusing the new locations with the original. If unset, the locations are replaced.
--strip-debuginfo - Strip debug info from all operations
--symbol-dce - Eliminate dead symbols
--print-ir-after-all - Print IR after each pass
--print-ir-after-change - When printing the IR after a pass, only print if the IR changed
--print-ir-before - Print IR before specified passes
--pass-pipeline - A textual description of a pass pipeline to run
Passes:
--affine-data-copy-generate - Generate explicit copying for affine memory operations
--fast-mem-capacity=<ulong> - Set fast memory space capacity in KiB (default: unlimited)
--fast-mem-space=<uint> - Fast memory space identifier for copy generation (default: 1)
--generate-dma - Generate DMA instead of point-wise copy
--min-dma-transfer=<int> - Minimum DMA transfer size supported by the target in bytes
--skip-non-unit-stride-loops - Testing purposes: avoid non-unit stride loop choice depths for copy placement
--slow-mem-space=<uint> - Slow memory space identifier for copy generation (default: 0)
--tag-mem-space=<uint> - Tag memory space identifier for copy generation (default: 0)
--affine-loop-fusion - Fuse affine loop nests
--fusion-compute-tolerance=<number> - Fractional increase in additional computation tolerated while fusing
--fusion-fast-mem-space=<uint> - Faster memory space number to promote fusion buffers to
--fusion-local-buf-threshold=<ulong> - Threshold size (KiB) for promoting local buffers to fast memory space
--fusion-maximal - Enables maximal loop fusion
--affine-loop-invariant-code-motion - Hoist loop invariant instructions outside of affine loops
--affine-loop-tile - Tile affine loop nests
--cache-size=<ulong> - Set size of cache to tile for in KiB
--separate - Separate full and partial tiles
--tile-size=<uint> - Use this tile size for all loops
--tile-sizes=<uint> - List of tile sizes for each perfect nest (overridden by -tile-size)
--affine-loop-unroll - Unroll affine loops
--unroll-factor=<uint> - Use this unroll factor for all loops being unrolled
--unroll-full - Fully unroll loops
--unroll-full-threshold=<uint> - Unroll all loops with trip count less than or equal to this
--unroll-num-reps=<uint> - Unroll innermost loops repeatedly this many times
--affine-loop-unroll-jam - Unroll and jam affine loops
--unroll-jam-factor=<uint> - Use this unroll jam factor for all loops (default 4)
--affine-pipeline-data-transfer - Pipeline non-blocking data transfers between explicitly managed levels of the memory hierarchy
--affine-super-vectorize - Vectorize to a target independent n-D vector abstraction
--test-fastest-varying=<long> - Specify a 1-D, 2-D or 3-D pattern of fastest varying memory dimensions to match. See defaultPatterns in Vectorize.cpp for a description and examples. This is used for testing purposes
--virtual-vector-size=<long> - Specify an n-D virtual vector size for vectorization
--attribute-promotion - Promote constant operands to attributes.
--buffer-placement - Optimizes placement of alloc and dealloc operations
--bundle-memory-pools - Bundle memory pools of internal MemRefs into a single memory pool.
--canonicalize - Canonicalize operations
--constprop-onnx - ConstProp ONNX operations into composition of other ONNX operations.
--convert-krnl-to-affine - Lower Krnl dialect.
--convert-krnl-to-llvm - Lower the Krnl Affine and Std dialects to LLVM.
--convert-linalg-on-tensors-to-buffers - Convert the Linalg operations which work on tensor-type operands or results to use buffers instead
--convert-linalg-to-affine-loops - Lower the operations from the linalg dialect into affine loops
--convert-linalg-to-loops - Lower the operations from the linalg dialect into loops
--convert-linalg-to-parallel-loops - Lower the operations from the linalg dialect into parallel loops
--convert-onnx-to-krnl - Lower frontend ops to Krnl dialect.
--convert-scf-to-std - Lower SCF to Standard Dialect.
--convert-vector-to-llvm - Lower Vector Dialect to LLVM IR Dialect.
--reassociate-fp-reductions - Allows llvm to reassociate floating-point reductions for speed
--convert-vector-to-scf - Convert vector to SCF.
--full-unroll - Perform full unrolling when converting vector transfers to SCF
--cse - Eliminate common sub-expressions
--decompose-onnx - Decompose ONNX operations into composition of other ONNX operations.
--elide-constants - Elide values of constant operations.
--elide-krnl-constants - Elide the constant values of the Global Krnl operations.
--enable-memory-pool - Enable a memory pool for allocating internal MemRefs.
--for-loop-specialization - Specialize `for` loops for vectorization
--inline - Inline function calls
--disable-simplify - Disable running simplifications during inlining
--max-iterations=<uint> - Maximum number of iterations when inlining within an SCC
--linalg-fold-unit-extent-dims - Remove unit-extent dimension in Linalg ops on tensors
--fold-one-trip-loops-only - Only folds the one-trip loops from Linalg ops on tensors (for testing purposes only)
--linalg-fusion - Fuse operations in the linalg dialect
--linalg-fusion-for-tensor-ops - Fuse operations on RankedTensorType in linalg dialect
--linalg-promote-subviews - Promote subview ops to local buffers
--test-promote-dynamic - Test generation of dynamic promoted buffers
--linalg-tile - Tile operations in the linalg dialect
--linalg-tile-sizes=<long> - Test generation of dynamic promoted buffers
--linalg-tile-to-parallel-loops - Tile operations in the linalg dialect to parallel loops
--linalg-tile-sizes=<long> - Test generation of dynamic promoted buffers
--loop-coalescing - Coalesce nested loops with independent bounds into a single loop
--loop-invariant-code-motion - Hoist loop invariant instructions outside of the loop
--lower-affine - Lower Affine Dialect to Standard Dialect.
--memref-dataflow-opt - Perform store/load forwarding for memrefs
--pack-krnl-constants - Elide the constant values of the Global Krnl operations.
--elision-threshold=<long> - A threshold value specifying the maximum number of elements a constant operation can hold as an attribute. If the number exceeds this threshold, constants will be packed together and, in the case where `move-to-file` option is enabled, stored as a binary file on disk. This can help preserve readability of IR dump and improve compilation speed.
--filename=<string> - Specify a file in which the packed constant is to be stored.
--move-to-file - Whether to move the packed constant to a file.
--parallel-loop-collapsing - Collapse parallel loops to use less induction variables
--collapsed-indices-0=<uint> - Which loop indices to combine 0th loop index
--collapsed-indices-1=<uint> - Which loop indices to combine into the position 1 loop index
--collapsed-indices-2=<uint> - Which loop indices to combine into the position 2 loop index
--parallel-loop-fusion - Fuse adjacent parallel loops
--parallel-loop-specialization - Specialize parallel loops for vectorization
--parallel-loop-tiling - Tile parallel loops
--parallel-loop-tile-sizes=<long> - Factors to tile parallel loops by
--print-cfg-graph - Print CFG graph per-Region
--print-op-graph - Print op graph per-Region
--print-op-stats - Print statistics of operations
--sccp - Sparse Conditional Constant Propagation
--shape-inference - Shape inference for frontend dialects.
--simplify-affine-structures - Simplify affine expressions in maps/sets and normalize memrefs
--snapshot-op-locations - Generate new locations from the current IR
--filename=<string> - The filename to print the generated IR
--tag=<string> - A tag to use when fusing the new locations with the original. If unset, the locations are replaced.
--strip-debuginfo - Strip debug info from all operations
--symbol-dce - Eliminate dead symbols
--print-ir-before-all - Print IR before each pass
--print-ir-module-scope - When printing IR for print-ir-[before|after]{-all} always print the top-level module operation
--split-input-file - Split the input file into pieces and process each chunk independently
--verify-diagnostics - Check that emitted diagnostics match expected-* lines on the corresponding line
--verify-each - Run the verifier after each transformation pass
Generic Options:
--help - Display available options (--help-hidden for more)
--help-list - Display list of available options (--help-list-hidden for more)
--version - Display the version of this program