2023-06-22 10:58:07 +00:00
|
|
|
/**
|
|
|
|
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
|
|
|
|
*
|
|
|
|
* This software is licensed under the terms of the Software for Open Models License (SOM),
|
|
|
|
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
|
|
|
|
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#version 450
|
|
|
|
|
2023-09-21 17:00:10 +00:00
|
|
|
#include "common.comp"
|
2023-06-22 10:58:07 +00:00
|
|
|
|
|
|
|
#define nth 32
|
|
|
|
#define IN_TYPE float16_t
|
|
|
|
#define IN_TYPE_SIZE 2
|
|
|
|
#define OUT_TYPE float
|
|
|
|
#define OUT_TYPE_SIZE 4
|
|
|
|
|
|
|
|
layout(local_size_x = nth) in;
|
|
|
|
|
|
|
|
layout (binding = 0) readonly buffer tensorIn { IN_TYPE in_[]; };
|
|
|
|
layout (binding = 1) writeonly buffer tensorOut { OUT_TYPE out_[]; };
|
|
|
|
|
|
|
|
layout (push_constant) uniform parameter {
|
|
|
|
uint inOff;
|
|
|
|
uint outOff;
|
|
|
|
int ne00;
|
|
|
|
int ne01;
|
|
|
|
int ne02;
|
|
|
|
uint nb00;
|
|
|
|
uint nb01;
|
|
|
|
uint nb02;
|
|
|
|
uint nb03;
|
|
|
|
int ne0;
|
|
|
|
int ne1;
|
|
|
|
int ne2;
|
|
|
|
uint nb0;
|
|
|
|
uint nb1;
|
|
|
|
uint nb2;
|
|
|
|
uint nb3;
|
|
|
|
} pcs;
|
|
|
|
|
|
|
|
void main() {
|
|
|
|
const uint i03 = gl_WorkGroupID.z;
|
|
|
|
const uint i02 = gl_WorkGroupID.y;
|
|
|
|
const uint i01 = gl_WorkGroupID.x;
|
|
|
|
|
|
|
|
const int n = int(i03)*pcs.ne02*pcs.ne01*pcs.ne00 + int(i02)*pcs.ne01*pcs.ne00 + int(i01)*pcs.ne00;
|
|
|
|
|
|
|
|
const int i3 = n / (pcs.ne2*pcs.ne1*pcs.ne0);
|
|
|
|
const int i2 = (n - i3*pcs.ne2*pcs.ne1*pcs.ne0) / (pcs.ne1*pcs.ne0);
|
|
|
|
const int i1 = (n - i3*pcs.ne2*pcs.ne1*pcs.ne0 - i2*pcs.ne1*pcs.ne0) / pcs.ne0;
|
|
|
|
const int i0 = (n - i3*pcs.ne2*pcs.ne1*pcs.ne0 - i2*pcs.ne1*pcs.ne0 - i1*pcs.ne0);
|
|
|
|
|
|
|
|
const uint dst_data = (i3*pcs.nb3 + i2*pcs.nb2 + i1*pcs.nb1 + i0*pcs.nb0) / OUT_TYPE_SIZE + pcs.outOff; // Based from out_
|
|
|
|
|
|
|
|
for (uint i00 = gl_LocalInvocationID.x; i00 < pcs.ne00; i00 += nth) {
|
|
|
|
const uint src = uint((i03*pcs.nb03 + i02*pcs.nb02 + i01*pcs.nb01 + i00*pcs.nb00) / IN_TYPE_SIZE) + pcs.inOff; // Based from in_
|
|
|
|
out_[dst_data+i00] = OUT_TYPE(in_[src]);
|
|
|
|
}
|
|
|
|
}
|