美文网首页
Image Blur

Image Blur

作者: 不决书 | 来源:发表于2024-01-23 23:12 被阅读0次

    代码分析说明:
    使用计算管线

    import { makeSample, SampleInit } from '../../components/SampleLayout';
    
    import blurWGSL from './blur.wgsl';
    import fullscreenTexturedQuadWGSL from '../../shaders/fullscreenTexturedQuad.wgsl';
    
    // Contants from the blur.wgsl shader.
    const tileDim = 128;
    const batch = [4, 4];
    
    const init: SampleInit = async ({ canvas, pageState, gui }) => {
      const adapter = await navigator.gpu.requestAdapter();
      const device = await adapter.requestDevice();
    
      if (!pageState.active) return;
      const context = canvas.getContext('webgpu') as GPUCanvasContext;
    
      const devicePixelRatio = window.devicePixelRatio;
      canvas.width = canvas.clientWidth * devicePixelRatio;
      canvas.height = canvas.clientHeight * devicePixelRatio;
      const presentationFormat = navigator.gpu.getPreferredCanvasFormat();
    
      context.configure({
        device,
        format: presentationFormat,
        alphaMode: 'premultiplied',
      });
    
      const blurPipeline = device.createComputePipeline({
        layout: 'auto',
        compute: {
          module: device.createShaderModule({
            code: blurWGSL,
          }),
          entryPoint: 'main',
        },
      });
    
      const fullscreenQuadPipeline = device.createRenderPipeline({
        layout: 'auto',
        vertex: {
          module: device.createShaderModule({
            code: fullscreenTexturedQuadWGSL,
          }),
          entryPoint: 'vert_main',
        },
        fragment: {
          module: device.createShaderModule({
            code: fullscreenTexturedQuadWGSL,
          }),
          entryPoint: 'frag_main',
          targets: [
            {
              format: presentationFormat,
            },
          ],
        },
        primitive: {
          topology: 'triangle-list',
        },
      });
    
      const sampler = device.createSampler({
        magFilter: 'linear',
        minFilter: 'linear',
      });
    
      const response = await fetch('../assets/img/Di-3d.png');
      const imageBitmap = await createImageBitmap(await response.blob());
    
      const [srcWidth, srcHeight] = [imageBitmap.width, imageBitmap.height];
      const cubeTexture = device.createTexture({
        size: [srcWidth, srcHeight, 1],
        format: 'rgba8unorm',
        usage:
          GPUTextureUsage.TEXTURE_BINDING |
          GPUTextureUsage.COPY_DST |
          GPUTextureUsage.RENDER_ATTACHMENT,
      });
      device.queue.copyExternalImageToTexture(
        { source: imageBitmap },
        { texture: cubeTexture },
        [imageBitmap.width, imageBitmap.height]
      );
    
    // 创建两个纹理buffer
      const textures = [0, 1].map(() => {
        return device.createTexture({
          size: {
            width: srcWidth,
            height: srcHeight,
          },
          format: 'rgba8unorm',
          usage:
            GPUTextureUsage.COPY_DST |
            GPUTextureUsage.STORAGE_BINDING |
            GPUTextureUsage.TEXTURE_BINDING,
        });
      });
    
    // 创建数据Buffer0
      const buffer0 = (() => {
        const buffer = device.createBuffer({
          size: 4,
          mappedAtCreation: true,
          usage: GPUBufferUsage.UNIFORM,
        });
        new Uint32Array(buffer.getMappedRange())[0] = 0;
        buffer.unmap();
        return buffer;
      })();
    // 创建数据Buffer1
      const buffer1 = (() => {
        const buffer = device.createBuffer({
          size: 4,
          mappedAtCreation: true,
          usage: GPUBufferUsage.UNIFORM,
        });
        new Uint32Array(buffer.getMappedRange())[0] = 1;
        buffer.unmap();
        return buffer;
      })();
    // 创建模糊数据buffer
      const blurParamsBuffer = device.createBuffer({
        size: 8,
        usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.UNIFORM,
      });
    
     // 创建计算管线的绑定组
      const computeConstants = device.createBindGroup({
        layout: blurPipeline.getBindGroupLayout(0),
        entries: [
          {
            binding: 0,
            resource: sampler,
          },
          {
            binding: 1,
            resource: {
              buffer: blurParamsBuffer,
            },
          },
        ],
      });
     // 创建计算管线的绑定组0
      const computeBindGroup0 = device.createBindGroup({
        layout: blurPipeline.getBindGroupLayout(1),
        entries: [
          {
            binding: 1,
            resource: cubeTexture.createView(),
          },
          {
            binding: 2,
            resource: textures[0].createView(),
          },
          {
            binding: 3,
            resource: {
              buffer: buffer0,
            },
          },
        ],
      });
     // 创建计算管线的绑定组1
      const computeBindGroup1 = device.createBindGroup({
        layout: blurPipeline.getBindGroupLayout(1),
        entries: [
          {
            binding: 1,
            resource: textures[0].createView(),
          },
          {
            binding: 2,
            resource: textures[1].createView(),
          },
          {
            binding: 3,
            resource: {
              buffer: buffer1,
            },
          },
        ],
      });
     // 创建计算管线的绑定组2
      const computeBindGroup2 = device.createBindGroup({
        layout: blurPipeline.getBindGroupLayout(1),
        entries: [
          {
            binding: 1,
            resource: textures[1].createView(),
          },
          {
            binding: 2,
            resource: textures[0].createView(),
          },
          {
            binding: 3,
            resource: {
              buffer: buffer0,
            },
          },
        ],
      });
    // 创建显示绑定组
      const showResultBindGroup = device.createBindGroup({
        layout: fullscreenQuadPipeline.getBindGroupLayout(0),
        entries: [
          {
            binding: 0,
            resource: sampler,
          },
          {
            binding: 1,
            // 计算管线处理完的buffer图像
            resource: textures[1].createView(),
          },
        ],
      });
    
      const settings = {
        filterSize: 15,
        iterations: 2,
      };
    
      let blockDim: number;
      const updateSettings = () => {
        blockDim = tileDim - (settings.filterSize - 1);
        device.queue.writeBuffer(
          blurParamsBuffer,
          0,
          new Uint32Array([settings.filterSize, blockDim])
        );
      };
      gui.add(settings, 'filterSize', 1, 33).step(2).onChange(updateSettings);
      gui.add(settings, 'iterations', 1, 10).step(1);
    
      updateSettings();
    
      function frame() {
        // Sample is no longer the active page.
        if (!pageState.active) return;
    
        const commandEncoder = device.createCommandEncoder();
      // 创建一个计算的pass
        const computePass = commandEncoder.beginComputePass();
        computePass.setPipeline(blurPipeline);
        computePass.setBindGroup(0, computeConstants);
    
        computePass.setBindGroup(1, computeBindGroup0);
      /**
       * 要使用当前 GPUComputePipeline
       * @param workgroupCountX 要调度的工作组网格的X维度
       * @param workgroupCountY 要调度的工作组网格的Y维度。
       * @param workgroupCountZ 要调度的工作组网格的Z维度。
       *
       * 注意: 传递给 dispatchWorkgroups() 和 dispatchWorkgroupsIndirect() 的 x, y和 z 值是要为每个维度调度的工作组数, 而不是 而不是要在每个维度上执行的着色器调用数。这与现代原生GPU API的行为相匹配,但与OpenCL的行为不同。 这意味着,如果一个 GPUShaderModule 用 @workgroup_size(4, 4)定义了一个入口点,并且通过调用 computePass.dispatchWorkgroups(8, 8); 将工作分派给它; 入口点将被调用1024次:沿X轴和Y轴调度4x4工作组8次。
     
    // 经过多次的模糊处理
      dispatchWorkgroups(
        workgroupCountX: GPUSize32,
        workgroupCountY?: GPUSize32,
        workgroupCountZ?: GPUSize32
      ): void; 
     */
        computePass.dispatchWorkgroups(
          Math.ceil(srcWidth / blockDim),
          Math.ceil(srcHeight / batch[1])
        );
    
        computePass.setBindGroup(1, computeBindGroup1);
        computePass.dispatchWorkgroups(
          Math.ceil(srcHeight / blockDim),
          Math.ceil(srcWidth / batch[1])
        );
    
        for (let i = 0; i < settings.iterations - 1; ++i) {
          computePass.setBindGroup(1, computeBindGroup2);
          computePass.dispatchWorkgroups(
            Math.ceil(srcWidth / blockDim),
            Math.ceil(srcHeight / batch[1])
          );
    
          computePass.setBindGroup(1, computeBindGroup1);
          computePass.dispatchWorkgroups(
            Math.ceil(srcHeight / blockDim),
            Math.ceil(srcWidth / batch[1])
          );
        }
    
        computePass.end();
        // 创建一个渲染的pass
        const passEncoder = commandEncoder.beginRenderPass({
          colorAttachments: [
            {
              view: context.getCurrentTexture().createView(),
              clearValue: { r: 0.0, g: 0.0, b: 0.0, a: 1.0 },
              loadOp: 'clear',
              storeOp: 'store',
            },
          ],
        });
    
        passEncoder.setPipeline(fullscreenQuadPipeline);
        passEncoder.setBindGroup(0, showResultBindGroup);
        passEncoder.draw(6);
        passEncoder.end();
        device.queue.submit([commandEncoder.finish()]);
    
        requestAnimationFrame(frame);
      }
      requestAnimationFrame(frame);
    };
    
    

    blur.wgsl

    struct Params {
      filterDim : i32,
      blockDim : u32,
    }
    
    @group(0) @binding(0) var samp : sampler;
    @group(0) @binding(1) var<uniform> params : Params;
    @group(1) @binding(1) var inputTex : texture_2d<f32>;
    @group(1) @binding(2) var outputTex : texture_storage_2d<rgba8unorm, write>;
    
    struct Flip {
      value : u32,
    }
    @group(1) @binding(3) var<uniform> flip : Flip;
    
    // This shader blurs the input texture in one direction, depending on whether
    // |flip.value| is 0 or 1.
    // It does so by running (128 / 4) threads per workgroup to load 128
    // texels into 4 rows of shared memory. Each thread loads a
    // 4 x 4 block of texels to take advantage of the texture sampling
    // hardware.
    // Then, each thread computes the blur result by averaging the adjacent texel values
    // in shared memory.
    // Because we're operating on a subset of the texture, we cannot compute all of the
    // results since not all of the neighbors are available in shared memory.
    // Specifically, with 128 x 128 tiles, we can only compute and write out
    // square blocks of size 128 - (filterSize - 1). We compute the number of blocks
    // needed in Javascript and dispatch that amount.
    
    var<workgroup> tile : array<array<vec3<f32>, 128>, 4>;
    
    @compute @workgroup_size(32, 1, 1)
    fn main(
      @builtin(workgroup_id) WorkGroupID : vec3<u32>,
      @builtin(local_invocation_id) LocalInvocationID : vec3<u32>
    ) {
      let filterOffset = (params.filterDim - 1) / 2;
      let dims = vec2<i32>(textureDimensions(inputTex, 0));
      let baseIndex = vec2<i32>(WorkGroupID.xy * vec2(params.blockDim, 4) +
                                LocalInvocationID.xy * vec2(4, 1))
                      - vec2(filterOffset, 0);
    
      for (var r = 0; r < 4; r++) {
        for (var c = 0; c < 4; c++) {
          var loadIndex = baseIndex + vec2(c, r);
          if (flip.value != 0u) {
            loadIndex = loadIndex.yx;
          }
    
          tile[r][4 * LocalInvocationID.x + u32(c)] = textureSampleLevel(
            inputTex,
            samp,
            (vec2<f32>(loadIndex) + vec2<f32>(0.25, 0.25)) / vec2<f32>(dims),
            0.0
          ).rgb;
        }
      }
    
      workgroupBarrier();
    
      for (var r = 0; r < 4; r++) {
        for (var c = 0; c < 4; c++) {
          var writeIndex = baseIndex + vec2(c, r);
          if (flip.value != 0) {
            writeIndex = writeIndex.yx;
          }
    
          let center = i32(4 * LocalInvocationID.x) + c;
          if (center >= filterOffset &&
              center < 128 - filterOffset &&
              all(writeIndex < dims)) {
            var acc = vec3(0.0, 0.0, 0.0);
            for (var f = 0; f < params.filterDim; f++) {
              var i = center + f - filterOffset;
              acc = acc + (1.0 / f32(params.filterDim)) * tile[r][i];
            }
            textureStore(outputTex, writeIndex, vec4(acc, 1.0));
          }
        }
      }
    }
    
    

    fullscreenTexturedQuad.wgsl

    struct Params {
      filterDim : i32,
      blockDim : u32,
    }
    
    @group(0) @binding(0) var samp : sampler;
    @group(0) @binding(1) var<uniform> params : Params;
    @group(1) @binding(1) var inputTex : texture_2d<f32>;
    @group(1) @binding(2) var outputTex : texture_storage_2d<rgba8unorm, write>;
    
    struct Flip {
      value : u32,
    }
    @group(1) @binding(3) var<uniform> flip : Flip;
    
    // This shader blurs the input texture in one direction, depending on whether
    // |flip.value| is 0 or 1.
    // It does so by running (128 / 4) threads per workgroup to load 128
    // texels into 4 rows of shared memory. Each thread loads a
    // 4 x 4 block of texels to take advantage of the texture sampling
    // hardware.
    // Then, each thread computes the blur result by averaging the adjacent texel values
    // in shared memory.
    // Because we're operating on a subset of the texture, we cannot compute all of the
    // results since not all of the neighbors are available in shared memory.
    // Specifically, with 128 x 128 tiles, we can only compute and write out
    // square blocks of size 128 - (filterSize - 1). We compute the number of blocks
    // needed in Javascript and dispatch that amount.
    
    var<workgroup> tile : array<array<vec3<f32>, 128>, 4>;
    
    @compute @workgroup_size(32, 1, 1)
    fn main(
      @builtin(workgroup_id) WorkGroupID : vec3<u32>,
      @builtin(local_invocation_id) LocalInvocationID : vec3<u32>
    ) {
      let filterOffset = (params.filterDim - 1) / 2;
     // 纹理大小
      let dims = vec2<i32>(textureDimensions(inputTex, 0));
      let baseIndex = vec2<i32>(WorkGroupID.xy * vec2(params.blockDim, 4) +
                                LocalInvocationID.xy * vec2(4, 1))
                      - vec2(filterOffset, 0);
    
      for (var r = 0; r < 4; r++) {
        for (var c = 0; c < 4; c++) {
          var loadIndex = baseIndex + vec2(c, r);
          if (flip.value != 0u) {
            loadIndex = loadIndex.yx;
          }
          
          tile[r][4 * LocalInvocationID.x + u32(c)] = textureSampleLevel(
            inputTex,
            samp,
            // 坐标偏移
            (vec2<f32>(loadIndex) + vec2<f32>(0.25, 0.25)) / vec2<f32>(dims),
            0.0
          ).rgb;
        }
      }
    
      workgroupBarrier();
     // 纹理写入
      for (var r = 0; r < 4; r++) {
        for (var c = 0; c < 4; c++) {
          var writeIndex = baseIndex + vec2(c, r);
          if (flip.value != 0) {
            writeIndex = writeIndex.yx;
          }
    
          let center = i32(4 * LocalInvocationID.x) + c;
          if (center >= filterOffset &&
              center < 128 - filterOffset &&
              all(writeIndex < dims)) {
            var acc = vec3(0.0, 0.0, 0.0);
            for (var f = 0; f < params.filterDim; f++) {
              var i = center + f - filterOffset;
              acc = acc + (1.0 / f32(params.filterDim)) * tile[r][i];
            }
            textureStore(outputTex, writeIndex, vec4(acc, 1.0));
          }
        }
      }
    }
    
    

    总结步骤

    简单总结就是,通过计算管线将图片进行了多轮的采样计算,使图像模糊,在使用写回的buffer传递给渲染管线

    相关文章

      网友评论

          本文标题:Image Blur

          本文链接:https://www.haomeiwen.com/subject/qtspodtx.html