FC2ブログ

頭と尻尾はくれてやる!

パソコンおやじのiPhoneアプリ・サイト作成・運営日記


KerasとMPSで同じ計算をする(2)

KerasとMPSで同じ計算をする(1)
↑この続き。今回はMetal Performance Shaders側、およびその比較結果。

(2)MPS側
Kerasで保存した係数をmacOS側で読み込んでMetal Performance Shadersで同じNN構成で計算する。
//入力用構造体
typedef struct
{
    float angle0;
    float angle1;
} Inputs_t;

//出力用構造体
typedef  struct
{
    float q0;
    float q1;
    float q2;
} MPSResult_t;

{
    //インスタンス
    id <MTLDevice> device;
    id <MTLCommandQueue> commandQueue;
    MPSCNNNeuronReLU *relu;
    MPSImage *srcImage;
    MPSImage *h1Image;
    MPSImage  *finalImage;
    SlimMPSCNNFullyConnected *h1;

    float *angles;//入力
    float *qResults;//出力

    MTLRegion srcImageRegion;
    MTLRegion filnalImageRegion;
}

-(void)setupNN
{
    NUM_INPUT = 2;
    NUM_HIDDEN1 = 4;
    NUM_OUTPUT = 3;
    
    angles = calloc(NUM_INPUT, sizeof(float));
    qResults = calloc(NUM_OUTPUT , sizeof(float));
    
    srcImageRegion = MTLRegionMake2D(0, 0, NUM_INPUT, 1);
    filnalImageRegion = MTLRegionMake2D(0, 0, 1, 1);

    MPSImageDescriptor *sid = [MPSImageDescriptor imageDescriptorWithChannelFormat:MPSImageFeatureChannelFormatFloat32 width:NUM_INPUT height:1 featureChannels:1];//入力側
    
    MPSImageDescriptor *h1id = [MPSImageDescriptor imageDescriptorWithChannelFormat:MPSImageFeatureChannelFormatFloat32 width:1 height:1 featureChannels:NUM_HIDDEN1];
    
    MPSImageDescriptor *did = [MPSImageDescriptor imageDescriptorWithChannelFormat:MPSImageFeatureChannelFormatFloat32 width:1 height:1 featureChannels:NUM_OUTPUT];//出力側
    
    
    device = MTLCreateSystemDefaultDevice();
    commandQueue = [device newCommandQueue];
    
    
    // Initialize MPSImage from descriptors
    srcImage = [[MPSImage alloc] initWithDevice:device imageDescriptor:sid];
    h1Image = [[MPSImage alloc] initWithDevice:device imageDescriptor:h1id];
    finalImage = [[MPSImage alloc] initWithDevice:device  imageDescriptor:did];
    
    relu = [[MPSCNNNeuronReLU alloc] initWithDevice:device a:0];
}

-(void)makeLayers
{
    h1 = [[SlimMPSCNNFullyConnected alloc]
          initWithKernelWidth:NUM_INPUT
          kernelHeight:1
          inputFeatureChannels:1
          outputFeatureChannels:NUM_HIDDEN1
          neuronFilter:relu
          device:device
          kernelParamsBinaryName:@"1"];
    
    
    h2 = [[SlimMPSCNNFullyConnected alloc]
          initWithKernelWidth:1
          kernelHeight:1
          inputFeatureChannels:NUM_HIDDEN1
          outputFeatureChannels:NUM_OUTPUT
          neuronFilter:relu
          device:device
          kernelParamsBinaryName:@"2"];
}

-(void)checkNN
{
    Inputs_t inputs;
    inputs.angle0 = 0.1;
    inputs.angle1 = 0.2;
    //↑Keras側と同じ入力
    MPSResult_t r = [self inferenceForInputs:inputs];
    NSLog(@"q0=%f,q1=%f,q2=%f",r.q0,r.q1,r.q2);
}


-(MPSResult_t)inferenceForInputs:(Inputs_t)inputs
{
    __block MPSResult_t mpsResult;
    
    dispatch_semaphore_t semaphore = dispatch_semaphore_create(0);
       dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_BACKGROUND, 0), ^{
        
        self->angles[0] = inputs.angle0;
        self->angles[1] = inputs.angle1;
        
        [self->srcImage.texture replaceRegion:self->srcImageRegion
                                  mipmapLevel:0
                                        slice:0
                                    withBytes:self->angles
                                  bytesPerRow:sizeof(float)*4
                                bytesPerImage:0];
        
        @autoreleasepool{
            id <MTLCommandBuffer> commandBuffer = [self->commandQueue commandBuffer];
            
            [self->h1 encodeToCommandBuffer:commandBuffer sourceImage:self->srcImage destinationImage:self->h1Image];
            [self->h2 encodeToCommandBuffer:commandBuffer sourceImage:self->h1Image destinationImage:self->finalImage];
            
            [commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> buffer) {
                
                [self->finalImage.texture getBytes:&self->qResults[0]
                                       bytesPerRow:sizeof(float)*4
                                        fromRegion:self->filnalImageRegion
                                       mipmapLevel:0];
                
                mpsResult.q0 = self->qResults[0];
                mpsResult.q1 = self->qResults[1];
                mpsResult.q2 = self->qResults[2];
                
                dispatch_semaphore_signal(semaphore);
                
            }];
            
            [commandBuffer commit];
        }
        
    });
    
    dispatch_semaphore_wait(semaphore, DISPATCH_TIME_FOREVER);
    
    return mpsResult;
}
SlimMPSCNNFullyConnectedクラスはAPPLEのサンプルコードを参考にしてる。というかほとんどそのまま。
実装部分はこんな感じで↓
- (instancetype)initWithKernelWidth:(NSUInteger)kernelWidth
                       kernelHeight:(NSUInteger)kernelHeight
               inputFeatureChannels:(NSUInteger)inputFeatureChannels
              outputFeatureChannels:(NSUInteger)outputFeatureChannels
                       neuronFilter:(MPSCNNNeuron *)neuronFilter
                             device:(id<MTLDevice>)device
             kernelParamsBinaryName:(NSString *)kernelParamsBinaryName
{
    
    NSUInteger nofWeight = inputFeatureChannels*kernelHeight*kernelWidth*outputFeatureChannels;
    NSUInteger nofBias = outputFeatureChannels;
    float *weightP = calloc(nofWeight, sizeof(float));
    float *biasP = calloc(nofBias, sizeof(float));
    
    NSString *wbdataFolder = [NSString stringWithFormat:@"%@",BaseFolder];
    NSString *fileName_w = [NSString stringWithFormat:@"%@_%@.dat",STR_WEIGHTS , kernelParamsBinaryName];
    NSString *fileName_b = [NSString stringWithFormat:@"%@_%@.dat",STR_BIAS, kernelParamsBinaryName];
    NSString *filePath_w = [NSString stringWithFormat:@"%@%@",wbdataFolder,fileName_w];
    NSString *filePath_b = [NSString stringWithFormat:@"%@%@",wbdataFolder,fileName_b];
    
    
    NSData *wData = [[NSData alloc] initWithContentsOfFile:filePath_w];
    NSData *bData = [[NSData alloc] initWithContentsOfFile:filePath_b];
    
    [wData getBytes:weightP length:nofWeight*sizeof(float)];
    [bData getBytes:biasP length:nofBias*sizeof(float)];
    
    // 係数チェック
    //for (int ite=0;ite<nofWeight;ite++) {
    //    NSLog(@"weight%@:%f",kernelParamsBinaryName,weightP[ite]);
    //}
    //for (int ite=0;ite<nofBias;ite++) {
    //    NSLog(@"bias%@:%f",kernelParamsBinaryName,biasP[ite]);
    //}
    
    
    MPSCNNConvolutionDescriptor *convDesc = [MPSCNNConvolutionDescriptor
                                             cnnConvolutionDescriptorWithKernelWidth:kernelWidth
                                             kernelHeight:kernelHeight
                                             inputFeatureChannels:inputFeatureChannels
                                             outputFeatureChannels:outputFeatureChannels
                                             neuronFilter:neuronFilter];
    
    
    self = [super initWithDevice:device
           convolutionDescriptor:convDesc
                   kernelWeights:weightP
                       biasTerms:biasP
                           flags:MPSCNNConvolutionFlagsNone];
    self.destinationFeatureChannelOffset = 0;
    
    free(weightP);
    free(biasP);
    
    return self;
}


(3)結果比較
もちろん入力は同じ。

resutl=[[0.69752 0.10237998 1.01858 ]]
↑Keras側の実行結果

q0=0.697416,q1=0.102600,q2=1.018707
↑MPS側の結果

いつものことながら、微妙に違うけどまあこんなもんかな。
とにかくこれでKerasの学習で得た係数をMPSで使い、同じ計算をすることができた。



<< MPSでsoftmax関数を使う  TopPage  KerasとMPSで同じ計算をする(1) >>

コメント


管理者にだけ表示を許可する
 

トラックバック

トラックバックURL
http://ringsbell.blog117.fc2.com/tb.php/1187-1e02d696




Copyright ©頭と尻尾はくれてやる!. Powered by FC2 Blog. Template by eriraha.

FC2Ad