美文网首页
在一个算子前后添加quant和dequant算子

在一个算子前后添加quant和dequant算子

作者: i_1312 | 来源:发表于2023-04-09 23:27 被阅读0次
        void OnnxParserImpl::ParseActivation(const onnx::NodeProto &node, const armnn::ActivationFunction func)
        {
            CHECK_VALID_SIZE(static_cast<size_t>(node.input_size()), 1, 3);
            CHECK_VALID_SIZE(static_cast<size_t>(node.output_size()), 1);
    
            VALID_INPUTS(node, STR_LIST(onnx::TensorProto::FLOAT));
    
            ActivationDescriptor desc;
            desc.m_Function = func;
    
            if (func == ActivationFunction::BoundedReLu)
            {
                if (node.input_size() == 1 && node.attribute_size() > 0)
                {
                    desc.m_A = ReadOptionalNodeFloatAttribute(node, "max", std::numeric_limits<float>::max());
                    desc.m_B = ReadOptionalNodeFloatAttribute(node, "min", std::numeric_limits<float>::lowest());
                }
                else
                {
                    desc.m_A = node.input(2).empty() ? std::numeric_limits<float>::max() : std::stof(node.input(2));
                    desc.m_B = node.input(1).empty() ? std::numeric_limits<float>::lowest() : std::stof(node.input(1));
                }
            }
    
            ///   添加dequant算子
            IConnectableLayer *const layer_dequant = m_Network->AddDequantizeLayer((node.name() + "dequant").c_str());
            ARMNN_ASSERT(layer_dequant != nullptr);
            auto outputInfo_dequant = ComputeOutputInfo({node.output(0)}, layer_dequant, {m_TensorsInfo[node.input(0)].m_info->GetShape()});
            layer_dequant->GetOutputSlot(0).SetTensorInfo(outputInfo_dequant[0]);
            // register the input connection slots for the layer, connections are made after all layers have been created
            // only the tensors for the inputs are relevant, exclude the const tensors
            RegisterInputSlots(layer_dequant, {node.input(0)});
            // register the output connection slots for the layer, connections are made after all layers have been created
            RegisterOutputSlots(layer_dequant, {node.input(0) + "dequant"});
    
    
            IConnectableLayer *const layer = m_Network->AddActivationLayer(desc, node.name().c_str());
            ARMNN_ASSERT(layer != nullptr);
    
            auto outputInfo = ComputeOutputInfo({node.output(0)}, layer, {m_TensorsInfo[node.input(0)].m_info->GetShape()});
            // activation output
            // if (m_jsonRoot.isMember(node.output(0)))
            // {
            //     Json::Value prop = m_jsonRoot[node.output(0)];
            //     std::string type = prop["type"].asString();
            //     float scale = prop["scale"].asFloat();
            //     int zeropoint = prop["zeropoint"].asInt();
            //     outputInfo[0].SetDataType(mapDataType(type));
            //     outputInfo[0].SetQuantizationScale(scale);
            //     outputInfo[0].SetQuantizationOffset(zeropoint);
            // }
    
       
    
            layer->GetOutputSlot(0).SetTensorInfo(outputInfo[0]);
    
            // register the input connection slots for the layer, connections are made after all layers have been created
            // only the tensors for the inputs are relevant, exclude the const tensors
            RegisterInputSlots(layer, {node.input(0) + "dequant"}); // 这是给layer注册输入
    
            // register the output connection slots for the layer, connections are made after all layers have been created
            RegisterOutputSlots(layer, {node.output(0) + "quant"}); // 这是给layer注册输出
    
            IConnectableLayer *const quant_layer = m_Network->AddQuantizeLayer(node.name().c_str());
            ARMNN_ASSERT(layer != nullptr);
            auto outputInfo_quant = ComputeOutputInfo({node.output(0)}, layer, {m_TensorsInfo[node.input(0)].m_info->GetShape()});
            outputInfo_quant[0].SetDataType(mapDataType("int8"));
            outputInfo_quant[0].SetQuantizationScale(0.0002f);
            outputInfo_quant[0].SetQuantizationOffset(23);
            quant_layer->GetOutputSlot(0).SetTensorInfo(outputInfo_quant[0]);
            RegisterInputSlots(quant_layer, {node.output(0) + "quant"});
            RegisterOutputSlots(quant_layer, {node.output(0)});
        }
    
    
    

    主要的一个思路在于

    判断输入输出的类型是否相同
    如果输入是浮点,但输出是量化,那么在这个op之前添加一个quant
    如果输入时量化,输出时浮点,那么在op之前添加dequant 的算子

    需要去内部检查这个算子的量化参数是否正确:
    1 该有的量化参数都需要有,比如conv 输出、输出、权重量化类型相同,bias也有量化参数
    2 遇到不正确的量化参数, 需要提示给用户,

    相关文章

      网友评论

          本文标题:在一个算子前后添加quant和dequant算子

          本文链接:https://www.haomeiwen.com/subject/jpjiddtx.html