/*
 * Decompiled with CFR 0.152.
 */
package boofcv.alg.filter.convolve.normalized;

import boofcv.struct.convolve.Kernel1D_F32;
import boofcv.struct.convolve.Kernel1D_F64;
import boofcv.struct.convolve.Kernel1D_S32;
import boofcv.struct.convolve.Kernel2D_F32;
import boofcv.struct.convolve.Kernel2D_F64;
import boofcv.struct.convolve.Kernel2D_S32;
import boofcv.struct.image.GrayF32;
import boofcv.struct.image.GrayF64;
import boofcv.struct.image.GrayI16;
import boofcv.struct.image.GrayI8;
import boofcv.struct.image.GrayS16;
import boofcv.struct.image.GrayS32;
import boofcv.struct.image.GrayU16;
import boofcv.struct.image.GrayU8;

public class ConvolveNormalized_JustBorder_SB {
    public static void horizontal(Kernel1D_F32 kernel, GrayF32 input, GrayF32 output) {
        float[] dataSrc = input.data;
        float[] dataDst = output.data;
        float[] dataKer = kernel.data;
        int kernelWidth = kernel.getWidth();
        int offsetL = kernel.getOffset();
        int offsetR = kernelWidth - offsetL - 1;
        int width = input.getWidth();
        int height = input.getHeight();
        for (int i = 0; i < height; ++i) {
            int indexSrc;
            float weight;
            float total;
            int j;
            int indexDest = output.startIndex + i * output.stride;
            int jStart = j = input.startIndex + i * input.stride;
            int jEnd = j + offsetL;
            while (j < jEnd) {
                total = 0.0f;
                weight = 0.0f;
                indexSrc = jStart;
                for (int k = kernelWidth - (offsetR + 1 + j - jStart); k < kernelWidth; ++k) {
                    float w = dataKer[k];
                    weight += w;
                    total += dataSrc[indexSrc++] * w;
                }
                dataDst[indexDest++] = total / weight;
                ++j;
            }
            j += width - (offsetL + offsetR);
            indexDest += width - (offsetL + offsetR);
            jEnd = jStart + width;
            while (j < jEnd) {
                total = 0.0f;
                weight = 0.0f;
                indexSrc = j - offsetL;
                int kEnd = jEnd - indexSrc;
                for (int k = 0; k < kEnd; ++k) {
                    float w = dataKer[k];
                    weight += w;
                    total += dataSrc[indexSrc++] * w;
                }
                dataDst[indexDest++] = total / weight;
                ++j;
            }
        }
    }

    public static void vertical(Kernel1D_F32 kernel, GrayF32 input, GrayF32 output) {
        int k;
        int indexSrc;
        float weight;
        int iEnd;
        int i;
        int indexDst;
        int y;
        float[] dataSrc = input.data;
        float[] dataDst = output.data;
        float[] dataKer = kernel.data;
        int kernelWidth = kernel.getWidth();
        int offsetL = kernel.getOffset();
        int offsetR = kernelWidth - offsetL - 1;
        int imgWidth = output.getWidth();
        int imgHeight = output.getHeight();
        int yEnd = imgHeight - offsetR;
        for (y = 0; y < offsetL; ++y) {
            indexDst = output.startIndex + y * output.stride;
            i = input.startIndex + y * input.stride;
            iEnd = i + imgWidth;
            int kStart = offsetL - y;
            weight = 0.0f;
            for (int k2 = kStart; k2 < kernelWidth; ++k2) {
                weight += dataKer[k2];
            }
            while (i < iEnd) {
                float total = 0.0f;
                indexSrc = i - y * input.stride;
                k = kStart;
                while (k < kernelWidth) {
                    total += dataSrc[indexSrc] * dataKer[k];
                    ++k;
                    indexSrc += input.stride;
                }
                dataDst[indexDst++] = total / weight;
                ++i;
            }
        }
        for (y = yEnd; y < imgHeight; ++y) {
            indexDst = output.startIndex + y * output.stride;
            i = input.startIndex + y * input.stride;
            iEnd = i + imgWidth;
            int kEnd = imgHeight - (y - offsetL);
            weight = 0.0f;
            for (int k3 = 0; k3 < kEnd; ++k3) {
                weight += dataKer[k3];
            }
            while (i < iEnd) {
                float total = 0.0f;
                indexSrc = i - offsetL * input.stride;
                k = 0;
                while (k < kEnd) {
                    total += dataSrc[indexSrc] * dataKer[k];
                    ++k;
                    indexSrc += input.stride;
                }
                dataDst[indexDst++] = total / weight;
                ++i;
            }
        }
    }

    public static void convolve(Kernel2D_F32 kernel, GrayF32 input, GrayF32 output) {
        int y;
        float[] dataSrc = input.data;
        float[] dataDst = output.data;
        float[] dataKer = kernel.data;
        int kernelWidth = kernel.getWidth();
        int offsetL = kernel.getOffset();
        int offsetR = kernelWidth - offsetL - 1;
        int width = input.getWidth();
        int height = input.getHeight();
        for (y = 0; y < height; ++y) {
            int x;
            int minI = y >= offsetL ? -offsetL : -y;
            int maxI = y < height - offsetR ? offsetR : height - y - 1;
            int indexDst = output.startIndex + y * output.stride;
            for (x = 0; x < offsetL; ++x) {
                float total = 0.0f;
                float weight = 0.0f;
                for (int i = minI; i <= maxI; ++i) {
                    int indexSrc = input.startIndex + (y + i) * input.stride + x;
                    int indexKer = (i + offsetL) * kernelWidth;
                    for (int j = -x; j <= offsetR; ++j) {
                        float w = dataKer[indexKer + j + offsetL];
                        weight += w;
                        total += dataSrc[indexSrc + j] * w;
                    }
                }
                dataDst[indexDst++] = total / weight;
            }
            indexDst = output.startIndex + y * output.stride + width - offsetR;
            for (x = width - offsetR; x < width; ++x) {
                int maxJ = width - x - 1;
                float total = 0.0f;
                float weight = 0.0f;
                for (int i = minI; i <= maxI; ++i) {
                    int indexSrc = input.startIndex + (y + i) * input.stride + x;
                    int indexKer = (i + offsetL) * kernelWidth;
                    for (int j = -offsetL; j <= maxJ; ++j) {
                        float w = dataKer[indexKer + j + offsetL];
                        weight += w;
                        total += dataSrc[indexSrc + j] * w;
                    }
                }
                dataDst[indexDst++] = total / weight;
            }
        }
        for (y = 0; y < offsetL; ++y) {
            int indexDst = output.startIndex + y * output.stride + offsetL;
            for (int x = offsetL; x < width - offsetR; ++x) {
                float total = 0.0f;
                float weight = 0.0f;
                for (int i = -y; i <= offsetR; ++i) {
                    int indexSrc = input.startIndex + (y + i) * input.stride + x;
                    int indexKer = (i + offsetL) * kernelWidth;
                    for (int j = -offsetL; j <= offsetR; ++j) {
                        float w = dataKer[indexKer + j + offsetL];
                        weight += w;
                        total += dataSrc[indexSrc + j] * w;
                    }
                }
                dataDst[indexDst++] = total / weight;
            }
        }
        for (y = height - offsetR; y < height; ++y) {
            int maxI = height - y - 1;
            int indexDst = output.startIndex + y * output.stride + offsetL;
            for (int x = offsetL; x < width - offsetR; ++x) {
                float total = 0.0f;
                float weight = 0.0f;
                for (int i = -offsetL; i <= maxI; ++i) {
                    int indexSrc = input.startIndex + (y + i) * input.stride + x;
                    int indexKer = (i + offsetL) * kernelWidth;
                    for (int j = -offsetL; j <= offsetR; ++j) {
                        float w = dataKer[indexKer + j + offsetL];
                        weight += w;
                        total += dataSrc[indexSrc + j] * w;
                    }
                }
                dataDst[indexDst++] = total / weight;
            }
        }
    }

    public static void horizontal(Kernel1D_F64 kernel, GrayF64 input, GrayF64 output) {
        double[] dataSrc = input.data;
        double[] dataDst = output.data;
        double[] dataKer = kernel.data;
        int kernelWidth = kernel.getWidth();
        int offsetL = kernel.getOffset();
        int offsetR = kernelWidth - offsetL - 1;
        int width = input.getWidth();
        int height = input.getHeight();
        for (int i = 0; i < height; ++i) {
            int indexSrc;
            double weight;
            double total;
            int j;
            int indexDest = output.startIndex + i * output.stride;
            int jStart = j = input.startIndex + i * input.stride;
            int jEnd = j + offsetL;
            while (j < jEnd) {
                total = 0.0;
                weight = 0.0;
                indexSrc = jStart;
                for (int k = kernelWidth - (offsetR + 1 + j - jStart); k < kernelWidth; ++k) {
                    double w = dataKer[k];
                    weight += w;
                    total += dataSrc[indexSrc++] * w;
                }
                dataDst[indexDest++] = total / weight;
                ++j;
            }
            j += width - (offsetL + offsetR);
            indexDest += width - (offsetL + offsetR);
            jEnd = jStart + width;
            while (j < jEnd) {
                total = 0.0;
                weight = 0.0;
                indexSrc = j - offsetL;
                int kEnd = jEnd - indexSrc;
                for (int k = 0; k < kEnd; ++k) {
                    double w = dataKer[k];
                    weight += w;
                    total += dataSrc[indexSrc++] * w;
                }
                dataDst[indexDest++] = total / weight;
                ++j;
            }
        }
    }

    public static void vertical(Kernel1D_F64 kernel, GrayF64 input, GrayF64 output) {
        int k;
        int indexSrc;
        double weight;
        int iEnd;
        int i;
        int indexDst;
        int y;
        double[] dataSrc = input.data;
        double[] dataDst = output.data;
        double[] dataKer = kernel.data;
        int kernelWidth = kernel.getWidth();
        int offsetL = kernel.getOffset();
        int offsetR = kernelWidth - offsetL - 1;
        int imgWidth = output.getWidth();
        int imgHeight = output.getHeight();
        int yEnd = imgHeight - offsetR;
        for (y = 0; y < offsetL; ++y) {
            indexDst = output.startIndex + y * output.stride;
            i = input.startIndex + y * input.stride;
            iEnd = i + imgWidth;
            int kStart = offsetL - y;
            weight = 0.0;
            for (int k2 = kStart; k2 < kernelWidth; ++k2) {
                weight += dataKer[k2];
            }
            while (i < iEnd) {
                double total = 0.0;
                indexSrc = i - y * input.stride;
                k = kStart;
                while (k < kernelWidth) {
                    total += dataSrc[indexSrc] * dataKer[k];
                    ++k;
                    indexSrc += input.stride;
                }
                dataDst[indexDst++] = total / weight;
                ++i;
            }
        }
        for (y = yEnd; y < imgHeight; ++y) {
            indexDst = output.startIndex + y * output.stride;
            i = input.startIndex + y * input.stride;
            iEnd = i + imgWidth;
            int kEnd = imgHeight - (y - offsetL);
            weight = 0.0;
            for (int k3 = 0; k3 < kEnd; ++k3) {
                weight += dataKer[k3];
            }
            while (i < iEnd) {
                double total = 0.0;
                indexSrc = i - offsetL * input.stride;
                k = 0;
                while (k < kEnd) {
                    total += dataSrc[indexSrc] * dataKer[k];
                    ++k;
                    indexSrc += input.stride;
                }
                dataDst[indexDst++] = total / weight;
                ++i;
            }
        }
    }

    public static void convolve(Kernel2D_F64 kernel, GrayF64 input, GrayF64 output) {
        int y;
        double[] dataSrc = input.data;
        double[] dataDst = output.data;
        double[] dataKer = kernel.data;
        int kernelWidth = kernel.getWidth();
        int offsetL = kernel.getOffset();
        int offsetR = kernelWidth - offsetL - 1;
        int width = input.getWidth();
        int height = input.getHeight();
        for (y = 0; y < height; ++y) {
            int x;
            int minI = y >= offsetL ? -offsetL : -y;
            int maxI = y < height - offsetR ? offsetR : height - y - 1;
            int indexDst = output.startIndex + y * output.stride;
            for (x = 0; x < offsetL; ++x) {
                double total = 0.0;
                double weight = 0.0;
                for (int i = minI; i <= maxI; ++i) {
                    int indexSrc = input.startIndex + (y + i) * input.stride + x;
                    int indexKer = (i + offsetL) * kernelWidth;
                    for (int j = -x; j <= offsetR; ++j) {
                        double w = dataKer[indexKer + j + offsetL];
                        weight += w;
                        total += dataSrc[indexSrc + j] * w;
                    }
                }
                dataDst[indexDst++] = total / weight;
            }
            indexDst = output.startIndex + y * output.stride + width - offsetR;
            for (x = width - offsetR; x < width; ++x) {
                int maxJ = width - x - 1;
                double total = 0.0;
                double weight = 0.0;
                for (int i = minI; i <= maxI; ++i) {
                    int indexSrc = input.startIndex + (y + i) * input.stride + x;
                    int indexKer = (i + offsetL) * kernelWidth;
                    for (int j = -offsetL; j <= maxJ; ++j) {
                        double w = dataKer[indexKer + j + offsetL];
                        weight += w;
                        total += dataSrc[indexSrc + j] * w;
                    }
                }
                dataDst[indexDst++] = total / weight;
            }
        }
        for (y = 0; y < offsetL; ++y) {
            int indexDst = output.startIndex + y * output.stride + offsetL;
            for (int x = offsetL; x < width - offsetR; ++x) {
                double total = 0.0;
                double weight = 0.0;
                for (int i = -y; i <= offsetR; ++i) {
                    int indexSrc = input.startIndex + (y + i) * input.stride + x;
                    int indexKer = (i + offsetL) * kernelWidth;
                    for (int j = -offsetL; j <= offsetR; ++j) {
                        double w = dataKer[indexKer + j + offsetL];
                        weight += w;
                        total += dataSrc[indexSrc + j] * w;
                    }
                }
                dataDst[indexDst++] = total / weight;
            }
        }
        for (y = height - offsetR; y < height; ++y) {
            int maxI = height - y - 1;
            int indexDst = output.startIndex + y * output.stride + offsetL;
            for (int x = offsetL; x < width - offsetR; ++x) {
                double total = 0.0;
                double weight = 0.0;
                for (int i = -offsetL; i <= maxI; ++i) {
                    int indexSrc = input.startIndex + (y + i) * input.stride + x;
                    int indexKer = (i + offsetL) * kernelWidth;
                    for (int j = -offsetL; j <= offsetR; ++j) {
                        double w = dataKer[indexKer + j + offsetL];
                        weight += w;
                        total += dataSrc[indexSrc + j] * w;
                    }
                }
                dataDst[indexDst++] = total / weight;
            }
        }
    }

    public static void horizontal(Kernel1D_S32 kernel, GrayU8 input, GrayI8 output) {
        byte[] dataSrc = input.data;
        byte[] dataDst = output.data;
        int[] dataKer = kernel.data;
        int kernelWidth = kernel.getWidth();
        int offsetL = kernel.getOffset();
        int offsetR = kernelWidth - offsetL - 1;
        int width = input.getWidth();
        int height = input.getHeight();
        for (int i = 0; i < height; ++i) {
            int indexSrc;
            int weight;
            int total;
            int j;
            int indexDest = output.startIndex + i * output.stride;
            int jStart = j = input.startIndex + i * input.stride;
            int jEnd = j + offsetL;
            while (j < jEnd) {
                total = 0;
                weight = 0;
                indexSrc = jStart;
                for (int k = kernelWidth - (offsetR + 1 + j - jStart); k < kernelWidth; ++k) {
                    int w = dataKer[k];
                    weight += w;
                    total += (dataSrc[indexSrc++] & 0xFF) * w;
                }
                dataDst[indexDest++] = (byte)((total + weight / 2) / weight);
                ++j;
            }
            j += width - (offsetL + offsetR);
            indexDest += width - (offsetL + offsetR);
            jEnd = jStart + width;
            while (j < jEnd) {
                total = 0;
                weight = 0;
                indexSrc = j - offsetL;
                int kEnd = jEnd - indexSrc;
                for (int k = 0; k < kEnd; ++k) {
                    int w = dataKer[k];
                    weight += w;
                    total += (dataSrc[indexSrc++] & 0xFF) * w;
                }
                dataDst[indexDest++] = (byte)((total + weight / 2) / weight);
                ++j;
            }
        }
    }

    public static void vertical(Kernel1D_S32 kernel, GrayU8 input, GrayI8 output) {
        int k;
        int indexSrc;
        int total;
        int k2;
        int weight;
        int iEnd;
        int i;
        int indexDst;
        int y;
        byte[] dataSrc = input.data;
        byte[] dataDst = output.data;
        int[] dataKer = kernel.data;
        int kernelWidth = kernel.getWidth();
        int offsetL = kernel.getOffset();
        int offsetR = kernelWidth - offsetL - 1;
        int imgWidth = output.getWidth();
        int imgHeight = output.getHeight();
        int yEnd = imgHeight - offsetR;
        for (y = 0; y < offsetL; ++y) {
            indexDst = output.startIndex + y * output.stride;
            i = input.startIndex + y * input.stride;
            iEnd = i + imgWidth;
            int kStart = offsetL - y;
            weight = 0;
            for (k2 = kStart; k2 < kernelWidth; ++k2) {
                weight += dataKer[k2];
            }
            while (i < iEnd) {
                total = 0;
                indexSrc = i - y * input.stride;
                k = kStart;
                while (k < kernelWidth) {
                    total += (dataSrc[indexSrc] & 0xFF) * dataKer[k];
                    ++k;
                    indexSrc += input.stride;
                }
                dataDst[indexDst++] = (byte)((total + weight / 2) / weight);
                ++i;
            }
        }
        for (y = yEnd; y < imgHeight; ++y) {
            indexDst = output.startIndex + y * output.stride;
            i = input.startIndex + y * input.stride;
            iEnd = i + imgWidth;
            int kEnd = imgHeight - (y - offsetL);
            weight = 0;
            for (k2 = 0; k2 < kEnd; ++k2) {
                weight += dataKer[k2];
            }
            while (i < iEnd) {
                total = 0;
                indexSrc = i - offsetL * input.stride;
                k = 0;
                while (k < kEnd) {
                    total += (dataSrc[indexSrc] & 0xFF) * dataKer[k];
                    ++k;
                    indexSrc += input.stride;
                }
                dataDst[indexDst++] = (byte)((total + weight / 2) / weight);
                ++i;
            }
        }
    }

    public static void convolve(Kernel2D_S32 kernel, GrayU8 input, GrayI8 output) {
        int y;
        byte[] dataSrc = input.data;
        byte[] dataDst = output.data;
        int[] dataKer = kernel.data;
        int kernelWidth = kernel.getWidth();
        int offsetL = kernel.getOffset();
        int offsetR = kernelWidth - offsetL - 1;
        int width = input.getWidth();
        int height = input.getHeight();
        for (y = 0; y < height; ++y) {
            int x;
            int minI = y >= offsetL ? -offsetL : -y;
            int maxI = y < height - offsetR ? offsetR : height - y - 1;
            int indexDst = output.startIndex + y * output.stride;
            for (x = 0; x < offsetL; ++x) {
                int total = 0;
                int weight = 0;
                for (int i = minI; i <= maxI; ++i) {
                    int indexSrc = input.startIndex + (y + i) * input.stride + x;
                    int indexKer = (i + offsetL) * kernelWidth;
                    for (int j = -x; j <= offsetR; ++j) {
                        int w = dataKer[indexKer + j + offsetL];
                        weight += w;
                        total += (dataSrc[indexSrc + j] & 0xFF) * w;
                    }
                }
                dataDst[indexDst++] = (byte)((total + weight / 2) / weight);
            }
            indexDst = output.startIndex + y * output.stride + width - offsetR;
            for (x = width - offsetR; x < width; ++x) {
                int maxJ = width - x - 1;
                int total = 0;
                int weight = 0;
                for (int i = minI; i <= maxI; ++i) {
                    int indexSrc = input.startIndex + (y + i) * input.stride + x;
                    int indexKer = (i + offsetL) * kernelWidth;
                    for (int j = -offsetL; j <= maxJ; ++j) {
                        int w = dataKer[indexKer + j + offsetL];
                        weight += w;
                        total += (dataSrc[indexSrc + j] & 0xFF) * w;
                    }
                }
                dataDst[indexDst++] = (byte)((total + weight / 2) / weight);
            }
        }
        for (y = 0; y < offsetL; ++y) {
            int indexDst = output.startIndex + y * output.stride + offsetL;
            for (int x = offsetL; x < width - offsetR; ++x) {
                int total = 0;
                int weight = 0;
                for (int i = -y; i <= offsetR; ++i) {
                    int indexSrc = input.startIndex + (y + i) * input.stride + x;
                    int indexKer = (i + offsetL) * kernelWidth;
                    for (int j = -offsetL; j <= offsetR; ++j) {
                        int w = dataKer[indexKer + j + offsetL];
                        weight += w;
                        total += (dataSrc[indexSrc + j] & 0xFF) * w;
                    }
                }
                dataDst[indexDst++] = (byte)((total + weight / 2) / weight);
            }
        }
        for (y = height - offsetR; y < height; ++y) {
            int maxI = height - y - 1;
            int indexDst = output.startIndex + y * output.stride + offsetL;
            for (int x = offsetL; x < width - offsetR; ++x) {
                int total = 0;
                int weight = 0;
                for (int i = -offsetL; i <= maxI; ++i) {
                    int indexSrc = input.startIndex + (y + i) * input.stride + x;
                    int indexKer = (i + offsetL) * kernelWidth;
                    for (int j = -offsetL; j <= offsetR; ++j) {
                        int w = dataKer[indexKer + j + offsetL];
                        weight += w;
                        total += (dataSrc[indexSrc + j] & 0xFF) * w;
                    }
                }
                dataDst[indexDst++] = (byte)((total + weight / 2) / weight);
            }
        }
    }

    public static void horizontal(Kernel1D_S32 kernel, GrayS16 input, GrayI16 output) {
        short[] dataSrc = input.data;
        short[] dataDst = output.data;
        int[] dataKer = kernel.data;
        int kernelWidth = kernel.getWidth();
        int offsetL = kernel.getOffset();
        int offsetR = kernelWidth - offsetL - 1;
        int width = input.getWidth();
        int height = input.getHeight();
        for (int i = 0; i < height; ++i) {
            int indexSrc;
            int weight;
            int total;
            int j;
            int indexDest = output.startIndex + i * output.stride;
            int jStart = j = input.startIndex + i * input.stride;
            int jEnd = j + offsetL;
            while (j < jEnd) {
                total = 0;
                weight = 0;
                indexSrc = jStart;
                for (int k = kernelWidth - (offsetR + 1 + j - jStart); k < kernelWidth; ++k) {
                    int w = dataKer[k];
                    weight += w;
                    total += dataSrc[indexSrc++] * w;
                }
                dataDst[indexDest++] = (short)((total + weight / 2) / weight);
                ++j;
            }
            j += width - (offsetL + offsetR);
            indexDest += width - (offsetL + offsetR);
            jEnd = jStart + width;
            while (j < jEnd) {
                total = 0;
                weight = 0;
                indexSrc = j - offsetL;
                int kEnd = jEnd - indexSrc;
                for (int k = 0; k < kEnd; ++k) {
                    int w = dataKer[k];
                    weight += w;
                    total += dataSrc[indexSrc++] * w;
                }
                dataDst[indexDest++] = (short)((total + weight / 2) / weight);
                ++j;
            }
        }
    }

    public static void vertical(Kernel1D_S32 kernel, GrayS16 input, GrayI16 output) {
        int k;
        int indexSrc;
        int total;
        int k2;
        int weight;
        int iEnd;
        int i;
        int indexDst;
        int y;
        short[] dataSrc = input.data;
        short[] dataDst = output.data;
        int[] dataKer = kernel.data;
        int kernelWidth = kernel.getWidth();
        int offsetL = kernel.getOffset();
        int offsetR = kernelWidth - offsetL - 1;
        int imgWidth = output.getWidth();
        int imgHeight = output.getHeight();
        int yEnd = imgHeight - offsetR;
        for (y = 0; y < offsetL; ++y) {
            indexDst = output.startIndex + y * output.stride;
            i = input.startIndex + y * input.stride;
            iEnd = i + imgWidth;
            int kStart = offsetL - y;
            weight = 0;
            for (k2 = kStart; k2 < kernelWidth; ++k2) {
                weight += dataKer[k2];
            }
            while (i < iEnd) {
                total = 0;
                indexSrc = i - y * input.stride;
                k = kStart;
                while (k < kernelWidth) {
                    total += dataSrc[indexSrc] * dataKer[k];
                    ++k;
                    indexSrc += input.stride;
                }
                dataDst[indexDst++] = (short)((total + weight / 2) / weight);
                ++i;
            }
        }
        for (y = yEnd; y < imgHeight; ++y) {
            indexDst = output.startIndex + y * output.stride;
            i = input.startIndex + y * input.stride;
            iEnd = i + imgWidth;
            int kEnd = imgHeight - (y - offsetL);
            weight = 0;
            for (k2 = 0; k2 < kEnd; ++k2) {
                weight += dataKer[k2];
            }
            while (i < iEnd) {
                total = 0;
                indexSrc = i - offsetL * input.stride;
                k = 0;
                while (k < kEnd) {
                    total += dataSrc[indexSrc] * dataKer[k];
                    ++k;
                    indexSrc += input.stride;
                }
                dataDst[indexDst++] = (short)((total + weight / 2) / weight);
                ++i;
            }
        }
    }

    public static void convolve(Kernel2D_S32 kernel, GrayS16 input, GrayI16 output) {
        int y;
        short[] dataSrc = input.data;
        short[] dataDst = output.data;
        int[] dataKer = kernel.data;
        int kernelWidth = kernel.getWidth();
        int offsetL = kernel.getOffset();
        int offsetR = kernelWidth - offsetL - 1;
        int width = input.getWidth();
        int height = input.getHeight();
        for (y = 0; y < height; ++y) {
            int x;
            int minI = y >= offsetL ? -offsetL : -y;
            int maxI = y < height - offsetR ? offsetR : height - y - 1;
            int indexDst = output.startIndex + y * output.stride;
            for (x = 0; x < offsetL; ++x) {
                int total = 0;
                int weight = 0;
                for (int i = minI; i <= maxI; ++i) {
                    int indexSrc = input.startIndex + (y + i) * input.stride + x;
                    int indexKer = (i + offsetL) * kernelWidth;
                    for (int j = -x; j <= offsetR; ++j) {
                        int w = dataKer[indexKer + j + offsetL];
                        weight += w;
                        total += dataSrc[indexSrc + j] * w;
                    }
                }
                dataDst[indexDst++] = (short)((total + weight / 2) / weight);
            }
            indexDst = output.startIndex + y * output.stride + width - offsetR;
            for (x = width - offsetR; x < width; ++x) {
                int maxJ = width - x - 1;
                int total = 0;
                int weight = 0;
                for (int i = minI; i <= maxI; ++i) {
                    int indexSrc = input.startIndex + (y + i) * input.stride + x;
                    int indexKer = (i + offsetL) * kernelWidth;
                    for (int j = -offsetL; j <= maxJ; ++j) {
                        int w = dataKer[indexKer + j + offsetL];
                        weight += w;
                        total += dataSrc[indexSrc + j] * w;
                    }
                }
                dataDst[indexDst++] = (short)((total + weight / 2) / weight);
            }
        }
        for (y = 0; y < offsetL; ++y) {
            int indexDst = output.startIndex + y * output.stride + offsetL;
            for (int x = offsetL; x < width - offsetR; ++x) {
                int total = 0;
                int weight = 0;
                for (int i = -y; i <= offsetR; ++i) {
                    int indexSrc = input.startIndex + (y + i) * input.stride + x;
                    int indexKer = (i + offsetL) * kernelWidth;
                    for (int j = -offsetL; j <= offsetR; ++j) {
                        int w = dataKer[indexKer + j + offsetL];
                        weight += w;
                        total += dataSrc[indexSrc + j] * w;
                    }
                }
                dataDst[indexDst++] = (short)((total + weight / 2) / weight);
            }
        }
        for (y = height - offsetR; y < height; ++y) {
            int maxI = height - y - 1;
            int indexDst = output.startIndex + y * output.stride + offsetL;
            for (int x = offsetL; x < width - offsetR; ++x) {
                int total = 0;
                int weight = 0;
                for (int i = -offsetL; i <= maxI; ++i) {
                    int indexSrc = input.startIndex + (y + i) * input.stride + x;
                    int indexKer = (i + offsetL) * kernelWidth;
                    for (int j = -offsetL; j <= offsetR; ++j) {
                        int w = dataKer[indexKer + j + offsetL];
                        weight += w;
                        total += dataSrc[indexSrc + j] * w;
                    }
                }
                dataDst[indexDst++] = (short)((total + weight / 2) / weight);
            }
        }
    }

    public static void horizontal(Kernel1D_S32 kernel, GrayU16 input, GrayI16 output) {
        short[] dataSrc = input.data;
        short[] dataDst = output.data;
        int[] dataKer = kernel.data;
        int kernelWidth = kernel.getWidth();
        int offsetL = kernel.getOffset();
        int offsetR = kernelWidth - offsetL - 1;
        int width = input.getWidth();
        int height = input.getHeight();
        for (int i = 0; i < height; ++i) {
            int indexSrc;
            int weight;
            int total;
            int j;
            int indexDest = output.startIndex + i * output.stride;
            int jStart = j = input.startIndex + i * input.stride;
            int jEnd = j + offsetL;
            while (j < jEnd) {
                total = 0;
                weight = 0;
                indexSrc = jStart;
                for (int k = kernelWidth - (offsetR + 1 + j - jStart); k < kernelWidth; ++k) {
                    int w = dataKer[k];
                    weight += w;
                    total += (dataSrc[indexSrc++] & 0xFFFF) * w;
                }
                dataDst[indexDest++] = (short)((total + weight / 2) / weight);
                ++j;
            }
            j += width - (offsetL + offsetR);
            indexDest += width - (offsetL + offsetR);
            jEnd = jStart + width;
            while (j < jEnd) {
                total = 0;
                weight = 0;
                indexSrc = j - offsetL;
                int kEnd = jEnd - indexSrc;
                for (int k = 0; k < kEnd; ++k) {
                    int w = dataKer[k];
                    weight += w;
                    total += (dataSrc[indexSrc++] & 0xFFFF) * w;
                }
                dataDst[indexDest++] = (short)((total + weight / 2) / weight);
                ++j;
            }
        }
    }

    public static void vertical(Kernel1D_S32 kernel, GrayU16 input, GrayI16 output) {
        int k;
        int indexSrc;
        int total;
        int k2;
        int weight;
        int iEnd;
        int i;
        int indexDst;
        int y;
        short[] dataSrc = input.data;
        short[] dataDst = output.data;
        int[] dataKer = kernel.data;
        int kernelWidth = kernel.getWidth();
        int offsetL = kernel.getOffset();
        int offsetR = kernelWidth - offsetL - 1;
        int imgWidth = output.getWidth();
        int imgHeight = output.getHeight();
        int yEnd = imgHeight - offsetR;
        for (y = 0; y < offsetL; ++y) {
            indexDst = output.startIndex + y * output.stride;
            i = input.startIndex + y * input.stride;
            iEnd = i + imgWidth;
            int kStart = offsetL - y;
            weight = 0;
            for (k2 = kStart; k2 < kernelWidth; ++k2) {
                weight += dataKer[k2];
            }
            while (i < iEnd) {
                total = 0;
                indexSrc = i - y * input.stride;
                k = kStart;
                while (k < kernelWidth) {
                    total += (dataSrc[indexSrc] & 0xFFFF) * dataKer[k];
                    ++k;
                    indexSrc += input.stride;
                }
                dataDst[indexDst++] = (short)((total + weight / 2) / weight);
                ++i;
            }
        }
        for (y = yEnd; y < imgHeight; ++y) {
            indexDst = output.startIndex + y * output.stride;
            i = input.startIndex + y * input.stride;
            iEnd = i + imgWidth;
            int kEnd = imgHeight - (y - offsetL);
            weight = 0;
            for (k2 = 0; k2 < kEnd; ++k2) {
                weight += dataKer[k2];
            }
            while (i < iEnd) {
                total = 0;
                indexSrc = i - offsetL * input.stride;
                k = 0;
                while (k < kEnd) {
                    total += (dataSrc[indexSrc] & 0xFFFF) * dataKer[k];
                    ++k;
                    indexSrc += input.stride;
                }
                dataDst[indexDst++] = (short)((total + weight / 2) / weight);
                ++i;
            }
        }
    }

    public static void convolve(Kernel2D_S32 kernel, GrayU16 input, GrayI16 output) {
        int y;
        short[] dataSrc = input.data;
        short[] dataDst = output.data;
        int[] dataKer = kernel.data;
        int kernelWidth = kernel.getWidth();
        int offsetL = kernel.getOffset();
        int offsetR = kernelWidth - offsetL - 1;
        int width = input.getWidth();
        int height = input.getHeight();
        for (y = 0; y < height; ++y) {
            int x;
            int minI = y >= offsetL ? -offsetL : -y;
            int maxI = y < height - offsetR ? offsetR : height - y - 1;
            int indexDst = output.startIndex + y * output.stride;
            for (x = 0; x < offsetL; ++x) {
                int total = 0;
                int weight = 0;
                for (int i = minI; i <= maxI; ++i) {
                    int indexSrc = input.startIndex + (y + i) * input.stride + x;
                    int indexKer = (i + offsetL) * kernelWidth;
                    for (int j = -x; j <= offsetR; ++j) {
                        int w = dataKer[indexKer + j + offsetL];
                        weight += w;
                        total += (dataSrc[indexSrc + j] & 0xFFFF) * w;
                    }
                }
                dataDst[indexDst++] = (short)((total + weight / 2) / weight);
            }
            indexDst = output.startIndex + y * output.stride + width - offsetR;
            for (x = width - offsetR; x < width; ++x) {
                int maxJ = width - x - 1;
                int total = 0;
                int weight = 0;
                for (int i = minI; i <= maxI; ++i) {
                    int indexSrc = input.startIndex + (y + i) * input.stride + x;
                    int indexKer = (i + offsetL) * kernelWidth;
                    for (int j = -offsetL; j <= maxJ; ++j) {
                        int w = dataKer[indexKer + j + offsetL];
                        weight += w;
                        total += (dataSrc[indexSrc + j] & 0xFFFF) * w;
                    }
                }
                dataDst[indexDst++] = (short)((total + weight / 2) / weight);
            }
        }
        for (y = 0; y < offsetL; ++y) {
            int indexDst = output.startIndex + y * output.stride + offsetL;
            for (int x = offsetL; x < width - offsetR; ++x) {
                int total = 0;
                int weight = 0;
                for (int i = -y; i <= offsetR; ++i) {
                    int indexSrc = input.startIndex + (y + i) * input.stride + x;
                    int indexKer = (i + offsetL) * kernelWidth;
                    for (int j = -offsetL; j <= offsetR; ++j) {
                        int w = dataKer[indexKer + j + offsetL];
                        weight += w;
                        total += (dataSrc[indexSrc + j] & 0xFFFF) * w;
                    }
                }
                dataDst[indexDst++] = (short)((total + weight / 2) / weight);
            }
        }
        for (y = height - offsetR; y < height; ++y) {
            int maxI = height - y - 1;
            int indexDst = output.startIndex + y * output.stride + offsetL;
            for (int x = offsetL; x < width - offsetR; ++x) {
                int total = 0;
                int weight = 0;
                for (int i = -offsetL; i <= maxI; ++i) {
                    int indexSrc = input.startIndex + (y + i) * input.stride + x;
                    int indexKer = (i + offsetL) * kernelWidth;
                    for (int j = -offsetL; j <= offsetR; ++j) {
                        int w = dataKer[indexKer + j + offsetL];
                        weight += w;
                        total += (dataSrc[indexSrc + j] & 0xFFFF) * w;
                    }
                }
                dataDst[indexDst++] = (short)((total + weight / 2) / weight);
            }
        }
    }

    public static void horizontal(Kernel1D_S32 kernel, GrayS32 input, GrayS32 output) {
        int[] dataSrc = input.data;
        int[] dataDst = output.data;
        int[] dataKer = kernel.data;
        int kernelWidth = kernel.getWidth();
        int offsetL = kernel.getOffset();
        int offsetR = kernelWidth - offsetL - 1;
        int width = input.getWidth();
        int height = input.getHeight();
        for (int i = 0; i < height; ++i) {
            int indexSrc;
            int weight;
            int total;
            int j;
            int indexDest = output.startIndex + i * output.stride;
            int jStart = j = input.startIndex + i * input.stride;
            int jEnd = j + offsetL;
            while (j < jEnd) {
                total = 0;
                weight = 0;
                indexSrc = jStart;
                for (int k = kernelWidth - (offsetR + 1 + j - jStart); k < kernelWidth; ++k) {
                    int w = dataKer[k];
                    weight += w;
                    total += dataSrc[indexSrc++] * w;
                }
                dataDst[indexDest++] = (total + weight / 2) / weight;
                ++j;
            }
            j += width - (offsetL + offsetR);
            indexDest += width - (offsetL + offsetR);
            jEnd = jStart + width;
            while (j < jEnd) {
                total = 0;
                weight = 0;
                indexSrc = j - offsetL;
                int kEnd = jEnd - indexSrc;
                for (int k = 0; k < kEnd; ++k) {
                    int w = dataKer[k];
                    weight += w;
                    total += dataSrc[indexSrc++] * w;
                }
                dataDst[indexDest++] = (total + weight / 2) / weight;
                ++j;
            }
        }
    }

    public static void vertical(Kernel1D_S32 kernel, GrayS32 input, GrayS32 output) {
        int k;
        int indexSrc;
        int total;
        int k2;
        int weight;
        int iEnd;
        int i;
        int indexDst;
        int y;
        int[] dataSrc = input.data;
        int[] dataDst = output.data;
        int[] dataKer = kernel.data;
        int kernelWidth = kernel.getWidth();
        int offsetL = kernel.getOffset();
        int offsetR = kernelWidth - offsetL - 1;
        int imgWidth = output.getWidth();
        int imgHeight = output.getHeight();
        int yEnd = imgHeight - offsetR;
        for (y = 0; y < offsetL; ++y) {
            indexDst = output.startIndex + y * output.stride;
            i = input.startIndex + y * input.stride;
            iEnd = i + imgWidth;
            int kStart = offsetL - y;
            weight = 0;
            for (k2 = kStart; k2 < kernelWidth; ++k2) {
                weight += dataKer[k2];
            }
            while (i < iEnd) {
                total = 0;
                indexSrc = i - y * input.stride;
                k = kStart;
                while (k < kernelWidth) {
                    total += dataSrc[indexSrc] * dataKer[k];
                    ++k;
                    indexSrc += input.stride;
                }
                dataDst[indexDst++] = (total + weight / 2) / weight;
                ++i;
            }
        }
        for (y = yEnd; y < imgHeight; ++y) {
            indexDst = output.startIndex + y * output.stride;
            i = input.startIndex + y * input.stride;
            iEnd = i + imgWidth;
            int kEnd = imgHeight - (y - offsetL);
            weight = 0;
            for (k2 = 0; k2 < kEnd; ++k2) {
                weight += dataKer[k2];
            }
            while (i < iEnd) {
                total = 0;
                indexSrc = i - offsetL * input.stride;
                k = 0;
                while (k < kEnd) {
                    total += dataSrc[indexSrc] * dataKer[k];
                    ++k;
                    indexSrc += input.stride;
                }
                dataDst[indexDst++] = (total + weight / 2) / weight;
                ++i;
            }
        }
    }

    public static void convolve(Kernel2D_S32 kernel, GrayS32 input, GrayS32 output) {
        int y;
        int[] dataSrc = input.data;
        int[] dataDst = output.data;
        int[] dataKer = kernel.data;
        int kernelWidth = kernel.getWidth();
        int offsetL = kernel.getOffset();
        int offsetR = kernelWidth - offsetL - 1;
        int width = input.getWidth();
        int height = input.getHeight();
        for (y = 0; y < height; ++y) {
            int x;
            int minI = y >= offsetL ? -offsetL : -y;
            int maxI = y < height - offsetR ? offsetR : height - y - 1;
            int indexDst = output.startIndex + y * output.stride;
            for (x = 0; x < offsetL; ++x) {
                int total = 0;
                int weight = 0;
                for (int i = minI; i <= maxI; ++i) {
                    int indexSrc = input.startIndex + (y + i) * input.stride + x;
                    int indexKer = (i + offsetL) * kernelWidth;
                    for (int j = -x; j <= offsetR; ++j) {
                        int w = dataKer[indexKer + j + offsetL];
                        weight += w;
                        total += dataSrc[indexSrc + j] * w;
                    }
                }
                dataDst[indexDst++] = (total + weight / 2) / weight;
            }
            indexDst = output.startIndex + y * output.stride + width - offsetR;
            for (x = width - offsetR; x < width; ++x) {
                int maxJ = width - x - 1;
                int total = 0;
                int weight = 0;
                for (int i = minI; i <= maxI; ++i) {
                    int indexSrc = input.startIndex + (y + i) * input.stride + x;
                    int indexKer = (i + offsetL) * kernelWidth;
                    for (int j = -offsetL; j <= maxJ; ++j) {
                        int w = dataKer[indexKer + j + offsetL];
                        weight += w;
                        total += dataSrc[indexSrc + j] * w;
                    }
                }
                dataDst[indexDst++] = (total + weight / 2) / weight;
            }
        }
        for (y = 0; y < offsetL; ++y) {
            int indexDst = output.startIndex + y * output.stride + offsetL;
            for (int x = offsetL; x < width - offsetR; ++x) {
                int total = 0;
                int weight = 0;
                for (int i = -y; i <= offsetR; ++i) {
                    int indexSrc = input.startIndex + (y + i) * input.stride + x;
                    int indexKer = (i + offsetL) * kernelWidth;
                    for (int j = -offsetL; j <= offsetR; ++j) {
                        int w = dataKer[indexKer + j + offsetL];
                        weight += w;
                        total += dataSrc[indexSrc + j] * w;
                    }
                }
                dataDst[indexDst++] = (total + weight / 2) / weight;
            }
        }
        for (y = height - offsetR; y < height; ++y) {
            int maxI = height - y - 1;
            int indexDst = output.startIndex + y * output.stride + offsetL;
            for (int x = offsetL; x < width - offsetR; ++x) {
                int total = 0;
                int weight = 0;
                for (int i = -offsetL; i <= maxI; ++i) {
                    int indexSrc = input.startIndex + (y + i) * input.stride + x;
                    int indexKer = (i + offsetL) * kernelWidth;
                    for (int j = -offsetL; j <= offsetR; ++j) {
                        int w = dataKer[indexKer + j + offsetL];
                        weight += w;
                        total += dataSrc[indexSrc + j] * w;
                    }
                }
                dataDst[indexDst++] = (total + weight / 2) / weight;
            }
        }
    }

    public static void vertical(Kernel1D_S32 kernelX, Kernel1D_S32 kernelY, GrayU16 input, GrayI8 output) {
        int k;
        int indexSrc;
        int total;
        int weight;
        int x;
        int weightX;
        int k2;
        int weightY;
        int iEnd;
        int i;
        int indexDst;
        int y;
        short[] dataSrc = input.data;
        byte[] dataDst = output.data;
        int[] dataKer = kernelY.data;
        int offsetY = kernelY.getOffset();
        int kernelWidthY = kernelY.getWidth();
        int offsetX = kernelX.getOffset();
        int kernelWidthX = kernelX.getWidth();
        int offsetX1 = kernelWidthX - offsetX - 1;
        int imgWidth = output.getWidth();
        int imgHeight = output.getHeight();
        int yEnd = imgHeight - (kernelWidthY - offsetY - 1);
        int startWeightX = 0;
        for (int k3 = offsetX; k3 < kernelWidthX; ++k3) {
            startWeightX += kernelX.data[k3];
        }
        for (y = 0; y < offsetY; ++y) {
            indexDst = output.startIndex + y * output.stride;
            i = input.startIndex + y * input.stride;
            iEnd = i + imgWidth;
            int kStart = offsetY - y;
            weightY = 0;
            for (k2 = kStart; k2 < kernelWidthY; ++k2) {
                weightY += dataKer[k2];
            }
            weightX = startWeightX;
            x = 0;
            while (i < iEnd) {
                weight = weightX * weightY;
                total = 0;
                indexSrc = i - y * input.stride;
                k = kStart;
                while (k < kernelWidthY) {
                    total += (dataSrc[indexSrc] & 0xFFFF) * dataKer[k];
                    ++k;
                    indexSrc += input.stride;
                }
                dataDst[indexDst++] = (byte)((total + weight / 2) / weight);
                if (x < offsetX) {
                    weightX += kernelX.data[offsetX - x - 1];
                } else if (x >= input.width - (kernelWidthX - offsetX)) {
                    weightX -= kernelX.data[input.width - x + offsetX - 1];
                }
                ++i;
                ++x;
            }
        }
        for (y = yEnd; y < imgHeight; ++y) {
            indexDst = output.startIndex + y * output.stride;
            i = input.startIndex + y * input.stride;
            iEnd = i + imgWidth;
            int kEnd = imgHeight - (y - offsetY);
            weightY = 0;
            for (k2 = 0; k2 < kEnd; ++k2) {
                weightY += dataKer[k2];
            }
            weightX = startWeightX;
            x = 0;
            while (i < iEnd) {
                weight = weightX * weightY;
                total = 0;
                indexSrc = i - offsetY * input.stride;
                k = 0;
                while (k < kEnd) {
                    total += (dataSrc[indexSrc] & 0xFFFF) * dataKer[k];
                    ++k;
                    indexSrc += input.stride;
                }
                dataDst[indexDst++] = (byte)((total + weight / 2) / weight);
                if (x < offsetX) {
                    weightX += kernelX.data[offsetX - x - 1];
                } else if (x >= input.width - (kernelWidthX - offsetX)) {
                    weightX -= kernelX.data[input.width - x + offsetX - 1];
                }
                ++i;
                ++x;
            }
        }
        int weightY2 = kernelY.computeSum();
        for (int y2 = offsetY; y2 < yEnd; ++y2) {
            int indexDst2 = output.startIndex + y2 * output.stride;
            int i2 = input.startIndex + y2 * input.stride;
            int iEnd2 = i2 + offsetY;
            int weightX2 = startWeightX;
            int x2 = 0;
            while (i2 < iEnd2) {
                int weight2 = weightX2 * weightY2;
                int total2 = 0;
                int indexSrc2 = i2 - offsetY * input.stride;
                int k4 = 0;
                while (k4 < kernelWidthY) {
                    total2 += (dataSrc[indexSrc2] & 0xFFFF) * dataKer[k4];
                    ++k4;
                    indexSrc2 += input.stride;
                }
                dataDst[indexDst2++] = (byte)((total2 + weight2 / 2) / weight2);
                weightX2 += kernelX.data[offsetX - x2 - 1];
                ++i2;
                ++x2;
            }
            int startX = input.width - offsetX1;
            indexDst2 = output.startIndex + y2 * output.stride + startX;
            i2 = input.startIndex + y2 * input.stride + startX;
            iEnd2 = input.startIndex + y2 * input.stride + input.width;
            x = startX;
            while (i2 < iEnd2) {
                weight = (weightX2 -= kernelX.data[input.width - x + offsetX]) * weightY2;
                total = 0;
                indexSrc = i2 - offsetY * input.stride;
                k = 0;
                while (k < kernelWidthY) {
                    total += (dataSrc[indexSrc] & 0xFFFF) * dataKer[k];
                    ++k;
                    indexSrc += input.stride;
                }
                dataDst[indexDst2++] = (byte)((total + weight / 2) / weight);
                ++i2;
                ++x;
            }
        }
    }

    public static void vertical(Kernel1D_S32 kernelX, Kernel1D_S32 kernelY, GrayS32 input, GrayI16 output) {
        int k;
        int indexSrc;
        int total;
        int weight;
        int x;
        int weightX;
        int k2;
        int weightY;
        int iEnd;
        int i;
        int indexDst;
        int y;
        int[] dataSrc = input.data;
        short[] dataDst = output.data;
        int[] dataKer = kernelY.data;
        int offsetY = kernelY.getOffset();
        int kernelWidthY = kernelY.getWidth();
        int offsetX = kernelX.getOffset();
        int kernelWidthX = kernelX.getWidth();
        int offsetX1 = kernelWidthX - offsetX - 1;
        int imgWidth = output.getWidth();
        int imgHeight = output.getHeight();
        int yEnd = imgHeight - (kernelWidthY - offsetY - 1);
        int startWeightX = 0;
        for (int k3 = offsetX; k3 < kernelWidthX; ++k3) {
            startWeightX += kernelX.data[k3];
        }
        for (y = 0; y < offsetY; ++y) {
            indexDst = output.startIndex + y * output.stride;
            i = input.startIndex + y * input.stride;
            iEnd = i + imgWidth;
            int kStart = offsetY - y;
            weightY = 0;
            for (k2 = kStart; k2 < kernelWidthY; ++k2) {
                weightY += dataKer[k2];
            }
            weightX = startWeightX;
            x = 0;
            while (i < iEnd) {
                weight = weightX * weightY;
                total = 0;
                indexSrc = i - y * input.stride;
                k = kStart;
                while (k < kernelWidthY) {
                    total += dataSrc[indexSrc] * dataKer[k];
                    ++k;
                    indexSrc += input.stride;
                }
                dataDst[indexDst++] = (short)((total + weight / 2) / weight);
                if (x < offsetX) {
                    weightX += kernelX.data[offsetX - x - 1];
                } else if (x >= input.width - (kernelWidthX - offsetX)) {
                    weightX -= kernelX.data[input.width - x + offsetX - 1];
                }
                ++i;
                ++x;
            }
        }
        for (y = yEnd; y < imgHeight; ++y) {
            indexDst = output.startIndex + y * output.stride;
            i = input.startIndex + y * input.stride;
            iEnd = i + imgWidth;
            int kEnd = imgHeight - (y - offsetY);
            weightY = 0;
            for (k2 = 0; k2 < kEnd; ++k2) {
                weightY += dataKer[k2];
            }
            weightX = startWeightX;
            x = 0;
            while (i < iEnd) {
                weight = weightX * weightY;
                total = 0;
                indexSrc = i - offsetY * input.stride;
                k = 0;
                while (k < kEnd) {
                    total += dataSrc[indexSrc] * dataKer[k];
                    ++k;
                    indexSrc += input.stride;
                }
                dataDst[indexDst++] = (short)((total + weight / 2) / weight);
                if (x < offsetX) {
                    weightX += kernelX.data[offsetX - x - 1];
                } else if (x >= input.width - (kernelWidthX - offsetX)) {
                    weightX -= kernelX.data[input.width - x + offsetX - 1];
                }
                ++i;
                ++x;
            }
        }
        int weightY2 = kernelY.computeSum();
        for (int y2 = offsetY; y2 < yEnd; ++y2) {
            int indexDst2 = output.startIndex + y2 * output.stride;
            int i2 = input.startIndex + y2 * input.stride;
            int iEnd2 = i2 + offsetY;
            int weightX2 = startWeightX;
            int x2 = 0;
            while (i2 < iEnd2) {
                int weight2 = weightX2 * weightY2;
                int total2 = 0;
                int indexSrc2 = i2 - offsetY * input.stride;
                int k4 = 0;
                while (k4 < kernelWidthY) {
                    total2 += dataSrc[indexSrc2] * dataKer[k4];
                    ++k4;
                    indexSrc2 += input.stride;
                }
                dataDst[indexDst2++] = (short)((total2 + weight2 / 2) / weight2);
                weightX2 += kernelX.data[offsetX - x2 - 1];
                ++i2;
                ++x2;
            }
            int startX = input.width - offsetX1;
            indexDst2 = output.startIndex + y2 * output.stride + startX;
            i2 = input.startIndex + y2 * input.stride + startX;
            iEnd2 = input.startIndex + y2 * input.stride + input.width;
            x = startX;
            while (i2 < iEnd2) {
                weight = (weightX2 -= kernelX.data[input.width - x + offsetX]) * weightY2;
                total = 0;
                indexSrc = i2 - offsetY * input.stride;
                k = 0;
                while (k < kernelWidthY) {
                    total += dataSrc[indexSrc] * dataKer[k];
                    ++k;
                    indexSrc += input.stride;
                }
                dataDst[indexDst2++] = (short)((total + weight / 2) / weight);
                ++i2;
                ++x;
            }
        }
    }
}

