矩阵乘法的优化及其在卷积中的应用
点击上方“小白学视觉”,选择加"星标"或“置顶”
重磅干货,第一时间送达
本文转自 | 视觉算法
基本概念
![](https://filescdn.proginn.com/224cf0887beca0393a0e863bd672570b/8ae8fb69880dc52d91321eecb8aa042e.webp)
![](https://filescdn.proginn.com/733e39871f6733b3f7b7b33f00ce2451/04eb70480cf5f69d2cc30f7f0c097b6d.webp)
for (int m = 0; m < M; m++) {
for (int n = 0; n < N; n++) {
C[m][n] = 0;
for (int k = 0; k < K; k++) {
C[m][n] += A[m][k] * B[k][n];
}
}
}
![](https://filescdn.proginn.com/e4d8a3c2b6841bfc58dd1a729bbc9af2/5bae5663eb806e0e3ec5cc86bd42c6fa.webp)
计算拆分展示
![](https://filescdn.proginn.com/27bad6b5089a39dd1e396ef1f9d2d953/45e13d52dd49f7912e239cc7d0d385e5.webp)
for (int m = 0; m < M; m++) {
for (int n = 0; n < N; n += 4) {
C[m][n + 0] = 0;
C[m][n + 1] = 0;
C[m][n + 2] = 0;
C[m][n + 3] = 0;
for (int k = 0; k < K; k++) {
C[m][n + 0] += A[m][k] * B[k][n + 0];
C[m][n + 1] += A[m][k] * B[k][n + 1];
C[m][n + 2] += A[m][k] * B[k][n + 2];
C[m][n + 3] += A[m][k] * B[k][n + 3];
}
}
}
![](https://filescdn.proginn.com/568e80111626162531ddc51830bf01f6/943ef2ab9c6074ad3f4c7389b2a10af4.webp)
for (int m = 0; m < M; m += 4) {
for (int n = 0; n < N; n += 4) {
C[m + 0][n + 0..3] = 0;
C[m + 1][n + 0..3] = 0;
C[m + 2][n + 0..3] = 0;
C[m + 3][n + 0..3] = 0;
for (int k = 0; k < K; k++) {
C[m + 0][n + 0..3] += A[m + 0][k] * B[k][n + 0..3];
C[m + 1][n + 0..3] += A[m + 1][k] * B[k][n + 0..3];
C[m + 2][n + 0..3] += A[m + 2][k] * B[k][n + 0..3];
C[m + 3][n + 0..3] += A[m + 3][k] * B[k][n + 0..3];
}
}
}
![](https://filescdn.proginn.com/e788814d4af1970b5d96279e8ac21172/77b0bebde9d5d18b40c1f378b859a5dd.webp)
![](https://filescdn.proginn.com/c0496b274576ecb30bd1ca229042b6ee/070cac46100978d44201d542997ca0d0.webp)
![](https://filescdn.proginn.com/2f78b9a8e9b58748834dbf2700e544e5/506e71aed37a987b268b81f4e74fabcf.webp)
for (int m = 0; m < M; m += 4) {
for (int n = 0; n < N; n += 4) {
C[m + 0..3][n + 0..3] = 0;
C[m + 0..3][n + 0..3] = 0;
C[m + 0..3][n + 0..3] = 0;
C[m + 0..3][n + 0..3] = 0;
for (int k = 0; k < K; k += 4) {
C[m + 0..3][n + 0..3] += A[m + 0..3][k + 0] * B[k + 0][n + 0..3];
C[m + 0..3][n + 0..3] += A[m + 0..3][k + 1] * B[k + 1][n + 0..3];
C[m + 0..3][n + 0..3] += A[m + 0..3][k + 2] * B[k + 2][n + 0..3];
C[m + 0..3][n + 0..3] += A[m + 0..3][k + 3] * B[k + 3][n + 0..3];
}
}
}
![](https://filescdn.proginn.com/e47c218efe56f536429bac713dc3be39/6db3b3d0ee3aa87cac1425218afc9279.webp)
![](https://filescdn.proginn.com/f702c5fec4b22e55557e83ad78cede16/ff9105ef2648a6df4da72dc71989a61b.webp)
处理内存布局
![](https://filescdn.proginn.com/619116fef3e1c9519600e34f89e85f65/a49fcd6b51e174075e08abc762153e97.webp)
![](https://filescdn.proginn.com/5bb3b75031aba3f57d3e4ea93a65f335/cf5d515f25daaeab5beb088e3e210032.webp)
for (int mo = 0; mo < M; mo += 8) {
for (int no = 0; no < N; no += 8) {
for (int mi = 0; mi < 2;mi ++) {
for (int ni = 0; ni < 2; ni++) {
int m = mo + mi * 4;
int n = no + ni * 4;
C[m + 0..3][n + 0..3] = 0;
C[m + 0..3][n + 0..3] = 0;
C[m + 0..3][n + 0..3] = 0;
C[m + 0..3][n + 0..3] = 0;
for (int k = 0; k < K; k += 4) {
C[m + 0..3][n + 0..3] += A[m + 0..3][k + 0] * B[k + 0][n + 0..3];
C[m + 0..3][n + 0..3] += A[m + 0..3][k + 1] * B[k + 1][n + 0..3];
C[m + 0..3][n + 0..3] += A[m + 0..3][k + 2] * B[k + 2][n + 0..3];
C[m + 0..3][n + 0..3] += A[m + 0..3][k + 3] * B[k + 3][n + 0..3];
}
}
}
}
}
量化神经网络
![](https://filescdn.proginn.com/9348cae3b6d32554b16e1440e4a7d2a0/df780f6ecd916cc49029c382a2bb0f80.webp)
![](https://filescdn.proginn.com/93311792529d4646f2b2479849105791/5fa2deb67143ea9041cec99b2152e174.webp)
计算划分与削减维度
![](https://filescdn.proginn.com/263626592a3eb6714d98517945d0914d/f0b017752d7aac24100c8afdf1b3469a.webp)
![](https://filescdn.proginn.com/4f1c82aeb44eb26cceec7031d5692367/4af1d9efdc873873ac29150e17b0c1dc.webp)
内存组织的特点
![](https://filescdn.proginn.com/88e3970d1fa5de0192cf237540458add/1e1687124ec6c9e29bec9ea2ac6b3bef.webp)
im2col 计算方法
![](https://filescdn.proginn.com/942a14562e0f2251a65b1083f445b706/96e5cd2755b0473a4ec215e8365ece94.webp)
内存布局与卷积性能
![](https://filescdn.proginn.com/f514463e187994a5ed21a884e2a00558/9f93b1c690779bf97e77f06fcc31bb15.webp)
![](https://filescdn.proginn.com/522aa8292d5f76e5fdd5887da2395969/7c02c69afdc3c3bb87f8c732b0a1d4e0.webp)
参考
—完—
交流群
欢迎加入公众号读者群一起和同行交流,目前有SLAM、三维视觉、传感器、自动驾驶、计算摄影、检测、分割、识别、医学影像、GAN、算法竞赛等微信群(以后会逐渐细分),请扫描下面微信号加群,备注:”昵称+学校/公司+研究方向“,例如:”张三 + 上海交大 + 视觉SLAM“。请按照格式备注,否则不予通过。添加成功后会根据研究方向邀请进入相关微信群。请勿在群内发送广告,否则会请出群,谢谢理解~
评论