輸入圖片和,32*32*16和濾波器,5*5*6*16,輸出位28*28*16
創(chuàng)新互聯(lián)建站服務項目包括道里網(wǎng)站建設、道里網(wǎng)站制作、道里網(wǎng)頁制作以及道里網(wǎng)絡營銷策劃等。多年來,我們專注于互聯(lián)網(wǎng)行業(yè),利用自身積累的技術優(yōu)勢、行業(yè)經(jīng)驗、深度合作伙伴關系等,向廣大中小型企業(yè)、政府機構等提供互聯(lián)網(wǎng)行業(yè)的解決方案,道里網(wǎng)站推廣取得了明顯的社會效益與經(jīng)濟效益。目前,我們服務的客戶以成都為中心已經(jīng)輻射到道里省份的部分城市,未來相信會繼續(xù)擴大服務區(qū)域并繼續(xù)獲得客戶的支持與信任!卷積部分卷積部分無非就是數(shù)據(jù)的相乘和相加,設計好乘法器和加法器并保存得到的結(jié)果(16)??紤]到數(shù)據(jù)精度采用半精度浮點數(shù)乘法器和半精度浮點數(shù)加法器。[半精度浮點數(shù)詳解],定點數(shù)量化,減少存取數(shù)據(jù)和訪問存儲器的時間,并且不會過分減少精度。(https://blog.csdn.net/Miracle_cx/article/details/89463503)
舉例一個平常的數(shù),這次反過來,如-1.5625×10^(-1)
即-0.15625 = -0.00101(十進制轉(zhuǎn)二進制)= -1.01×2^(-3)
所以符號位為1,指數(shù)為-3+15=12,所以指數(shù)位為01100,尾數(shù)位為0100000000。所以-1.5625×10^(-1)用半精度浮點數(shù)表示就為1 01100 0100000000。注:小數(shù)部分二進制每次乘以2取整。
module floatAdd (
input wire [15:0] floatA,
input wire [15:0] floatB,
output reg [15:0] sum
);
reg sign; // 輸出結(jié)果的正負標志位
reg signed [5:0] exponent; //輸出數(shù)據(jù)的指數(shù),因為有正負所以選擇有符號數(shù)
reg [9:0] mantissa; // 輸出數(shù)據(jù)的尾數(shù)
reg [4:0] exponentA, exponentB; //輸入數(shù)據(jù)的階數(shù)
reg [10:0] fractionA, fractionB, fraction; // 計算暫存位
reg [7:0] shiftAmount; // 移位寄存器,為了計算加法時配平階數(shù)
reg cout;
always @ (floatA or floatB)
begin
exponentA = floatA[14:10];
exponentB = floatB[14:10];
fractionA = {1'b1,floatA[9:0]};
fractionB = {1'b1,floatB[9:0]};
exponent = exponentA;
if (floatA == 0) // 特殊情況A為0
begin
sum = floatB;
end
else if (floatB == 0) // 特殊情況B為0
begin
sum = floatA;
end
else if (floatA[14:0] == floatB[14:0] && floatA[15]^floatB[15]==1'b1) //特殊情況互為相反數(shù)
begin
sum=0;
end
else
begin
if (exponentB >exponentA) // 配平階數(shù)使得相加兩數(shù)在同一階數(shù)上
begin
shiftAmount = exponentB - exponentA;
fractionA = fractionA >>(shiftAmount);
exponent = exponentB;
end
else if (exponentA >exponentB)
begin
shiftAmount = exponentA - exponentB;
fractionB = fractionB >>(shiftAmount);
exponent = exponentA;
end
if (floatA[15] == floatB[15]) // 兩數(shù)同號
begin
{cout,fraction} = fractionA + fractionB;
if (cout == 1'b1)
begin
{cout,fraction} = {cout,fraction} >>1;
exponent = exponent + 1;
end
sign = floatA[15];
end
else
begin //兩數(shù)異號
if (floatA[15] == 1'b1) // A 為負數(shù)
begin
{cout,fraction} = fractionB - fractionA; // B-A
end
else
begin
{cout,fraction} = fractionA - fractionB; // A-B
end
sign = cout;
if (cout == 1'b1)
fraction = -fraction; // 0-負數(shù)可求出此數(shù)的絕對值
// 對franction進行階數(shù)配平求出尾數(shù)
if (fraction [10] == 0) begin
if (fraction[9] == 1'b1) begin
fraction = fraction<< 1;
exponent = exponent - 1;
end else if (fraction[8] == 1'b1) begin
fraction = fraction<< 2;
exponent = exponent - 2;
end else if (fraction[7] == 1'b1) begin
fraction = fraction<< 3;
exponent = exponent - 3;
end else if (fraction[6] == 1'b1) begin
fraction = fraction<< 4;
exponent = exponent - 4;
end else if (fraction[5] == 1'b1) begin
fraction = fraction<< 5;
exponent = exponent - 5;
end else if (fraction[4] == 1'b1) begin
fraction = fraction<< 6;
exponent = exponent - 6;
end else if (fraction[3] == 1'b1) begin
fraction = fraction<< 7;
exponent = exponent - 7;
end else if (fraction[2] == 1'b1) begin
fraction = fraction<< 8;
exponent = exponent - 8;
end else if (fraction[1] == 1'b1) begin
fraction = fraction<< 9;
exponent = exponent - 9;
end else if (fraction[0] == 1'b1) begin
fraction = fraction<< 10;
exponent = exponent - 10;
end
end
end
mantissa = fraction[9:0];
if(exponent[5]==1'b1) begin //太小了輸出全0太小了
sum = 16'b0000000000000000;
end
else begin
sum = {sign,exponent[4:0],mantissa}; // 組合數(shù)據(jù)
end
end
end
endmodule
測試代碼
`timescale 100 ns / 10 ps
module floatAdd_TB ();
reg [15:0] floatA;
reg [15:0] floatB;
wire [15:0] sum;
initial begin
// 0.3 + 0.2
#0
floatA = 16'h34CD;
floatB = 16'h3266;
// 0.3 + 0
#10
floatA = 16'h34CD;
floatB = 16'h0000;
#10
$stop;
end
floatAdd FADD
(
.floatA(floatA),
.floatB(floatB),
.sum(sum)
);
endmodule
這里選擇0.3+0.2,與0.3+0,對應二進制可以運算,結(jié)果無誤
module floatMuilt
(
input wire [15:0] floatA,
input wire [15:0] floatB,
output reg [15:0] product
);
reg sign; // 輸出的正負標志位
reg signed [5:0] exponent; // 輸出數(shù)據(jù)的指數(shù),因為有正負所以選擇有符號數(shù)
reg [9:0] mantissa; // 輸出數(shù)據(jù)的小數(shù)
reg [10:0] fractionA, fractionB; //fraction = {1,mantissa} // 計算二進制數(shù)據(jù)最高位補1
reg [21:0] fraction; // 相乘結(jié)果參數(shù)
always @ (floatA or floatB)
begin
if (floatA == 0 || floatB == 0) // 處理乘數(shù)有一個或者兩個均為0的情況
product = 0; // 輸出為0
else
begin
sign = floatA[15] ^ floatB[15]; // 異或門判斷輸出的計算正負
exponent = floatA[14:10] + floatB[14:10] - 5'd15 + 5'd2; // 由于借位給fractionA和fractionB需要先補齊兩位指數(shù)
fractionA = {1'b1,floatA[9:0]}; //借位給fractionA
fractionB = {1'b1,floatB[9:0]}; //借位給fractionB
fraction = fractionA * fractionB; //計算二進制乘法
// 找到第一個不為0的數(shù)字并對指數(shù)進行匹配處理
if (fraction[21] == 1'b1)
begin
fraction = fraction<< 1;
exponent = exponent - 1;
end
else if (fraction[20] == 1'b1)
begin
fraction = fraction<< 2;
exponent = exponent - 2;
end
else if (fraction[19] == 1'b1)
begin
fraction = fraction<< 3;
exponent = exponent - 3;
end
else if (fraction[18] == 1'b1)
begin
fraction = fraction<< 4;
exponent = exponent - 4;
end
else if (fraction[17] == 1'b1)
begin
fraction = fraction<< 5;
exponent = exponent - 5;
end
else if (fraction[16] == 1'b1)
begin
fraction = fraction<< 6;
exponent = exponent - 6;
end
else if (fraction[15] == 1'b1)
begin
fraction = fraction<< 7;
exponent = exponent - 7;
end
else if (fraction[14] == 1'b1)
begin
fraction = fraction<< 8;
exponent = exponent - 8;
end
else if (fraction[13] == 1'b1)
begin
fraction = fraction<< 9;
exponent = exponent - 9;
end
else if (fraction[12] == 1'b0)
begin
fraction = fraction<< 10;
exponent = exponent - 10;
end
// 按照半精度浮點數(shù)的格式輸出
mantissa = fraction[21:12];
if(exponent[5]==1'b1) begin //太小了輸出全0(精度問題)
product=16'b0000000000000000;
end
else begin
product = {sign,exponent[4:0],mantissa}; //拼接輸出數(shù)據(jù)
end
end
end
測試代碼
`timescale 100 ns / 10 ps
module floatMult_TB ();
reg [15:0] floatA;
reg [15:0] floatB;
wire [15:0] product;
initial begin
// 4 * 5
#0
floatA = 16'b0100010000000000;
floatB = 16'b0100010100000000;
// 0.0004125 * 0
#10
floatA = 16'b0000111011000010;
floatB = 16'b0000000000000000;
#10
$stop;
end
floatMult FM
(
.floatA(floatA),
.floatB(floatB),
.product(product)
);
endmodule
這里采用4*5,與一個很小的數(shù)與0相乘,結(jié)果無誤。
將計數(shù)一次得到的結(jié)果存儲下來,用于后面的卷積,這里不做演示。
`timescale 100 ns / 10 ps
module processingElement(clk,reset,floatA,floatB,result);
parameter DATA_WIDTH = 16;
input clk, reset;
input [DATA_WIDTH-1:0] floatA, floatB;
output reg [DATA_WIDTH-1:0] result;
wire [DATA_WIDTH-1:0] multResult;
wire [DATA_WIDTH-1:0] addResult;
floatMult FM (floatA,floatB,multResult);
floatAdd FADD (multResult,result,addResult);
always @ (posedge clk or posedge reset) begin
if (reset == 1'b1) begin
result = 0;
end else begin
result = addResult;
end
end
endmodule
最后通過循環(huán)完成一整個卷積。
`timescale 100 ns / 10 ps
module convUnit(clk,reset,image,filter,result);
parameter DATA_WIDTH = 16;
parameter D = 1; //depth of the filter
parameter F = 5; //size of the filter
input clk, reset;
input [0:D*F*F*DATA_WIDTH-1] image, filter;
output [0:DATA_WIDTH-1] result;
reg [DATA_WIDTH-1:0] selectedInput1, selectedInput2;
integer i;
processingElement PE
(
.clk(clk),
.reset(reset),
.floatA(selectedInput1),
.floatB(selectedInput2),
.result(result)
);
// The convolution is calculated in a sequential process to save hardware
// The result of the element wise matrix multiplication is finished after (F*F+2) cycles (2 cycles to reset the processing element and F*F cycles to accumulate the result of the F*F multiplications)
always @ (posedge clk, posedge reset) begin
if (reset == 1'b1) begin // reset
i = 0;
selectedInput1 = 0;
selectedInput2 = 0;
end else if (i >D*F*F-1) begin // if the convolution is finished but we still wait for other blocks to finsih, send zeros to the conv unit (in case of pipelining)
selectedInput1 = 0;
selectedInput2 = 0;
end else begin // send one element of the image part and one element of the filter to be multiplied and accumulated
selectedInput1 = image[DATA_WIDTH*i+:DATA_WIDTH];
selectedInput2 = filter[DATA_WIDTH*i+:DATA_WIDTH];
i = i + 1;
end
end
endmodule
過濾器在進行卷積之前需要將處理的數(shù)據(jù)提取出來,比如55的矩陣,1-25的依次排列,過濾器選擇22,就需要將1,2,6,7提取出來。
`timescale 100 ns / 10 ps
//this modules takes as inputs the image, a row number and a column number
//it fills the output array with matrices of the parts of the image to be sent to the conv units
module RFselector(image,rowNumber, column,receptiveField);
parameter DATA_WIDTH = 16;
parameter D = 1; //Depth of the filter
parameter H = 32; //Height of the image
parameter W = 32; //Width of the image
parameter F = 5; //Size of the filter
input [0:D*H*W*DATA_WIDTH-1] image;
input [5:0] rowNumber, column;
output reg [0:(((W-F+1)/2)*D*F*F*DATA_WIDTH)-1] receptiveField; //array to hold the matrices (parts of the image) to be sent to the conv units
//address: counter to fill the receptive filed array
//c: counter to loop on the columns of the input image
//k: counter to loop on the depth of the input image
//i: counter to loop on the rows of the input image
integer address, c, k, i;
always @ (image or rowNumber or column) begin
address = 0;
if (column == 0) begin //if the column is zero fill the array with the parts of the image correspoding to the first half of pixels of the row (with rowNumber) of the output image
for (c = 0; c< (W-F+1)/2; c = c + 1) begin
for (k = 0; k< D; k = k + 1) begin
for (i = 0; i< F; i = i + 1) begin
receptiveField[address*F*DATA_WIDTH+:F*DATA_WIDTH] = image[rowNumber*W*DATA_WIDTH+c*DATA_WIDTH+k*H*W*DATA_WIDTH+i*W*DATA_WIDTH+:F*DATA_WIDTH];
address = address + 1;
end
end
end
end else begin //if the column is zero fill the array with the parts of the image correspoding to the second half of pixels of the row (with rowNumber) of the output image
for (c = (W-F+1)/2; c< (W-F+1); c = c + 1) begin
for (k = 0; k< D; k = k + 1) begin
for (i = 0; i< F; i = i + 1) begin
receptiveField[address*F*DATA_WIDTH+:F*DATA_WIDTH] = image[rowNumber*W*DATA_WIDTH+c*DATA_WIDTH+k*H*W*DATA_WIDTH+i*W*DATA_WIDTH+:F*DATA_WIDTH];
address = address + 1;
end
end
end
end
end
endmodule
接下來就需要選擇過濾器的移動來完成一整個卷積層。
`timescale 100 ns / 10 ps
module convLayerSingle(clk,reset,image,filter,outputConv);
parameter DATA_WIDTH = 16;
parameter D = 1; //Depth of the filter
parameter H = 32; //Height of the image
parameter W = 32; //Width of the image
parameter F = 5; //Size of the filter
input clk, reset;
input [0:D*H*W*DATA_WIDTH-1] image;
input [0:D*F*F*DATA_WIDTH-1] filter;
output reg [0:(H-F+1)*(W-F+1)*DATA_WIDTH-1] outputConv; // output of the module
wire [0:((W-F+1)/2)*DATA_WIDTH-1] outputConvUnits; // output of the conv units and input to the row selector
reg internalReset;
wire [0:(((W-F+1)/2)*D*F*F*DATA_WIDTH)-1] receptiveField; // array of the matrices to be sent to conv units
integer counter, outputCounter;
//counter: number of clock cycles need for the conv unit to finsish
//outputCounter: index to map the output of the conv units to the output of the module
reg [5:0] rowNumber, column;
//rowNumber: determines the row that is calculated by the conv units
//column: determines if we are calculating the first or the second 14 pixels of the output row
RFselector
#(
.DATA_WIDTH(DATA_WIDTH),
.D(D),
.H(H),
.W(W),
.F(F)
) RF
(
.image(image),
.rowNumber(rowNumber),
.column(column),
.receptiveField(receptiveField)
);
genvar n;
generate //generating n convolution units where n is half the number of pixels in one row of the output image
for (n = 0; n< (H-F+1)/2; n = n + 1)
begin: convLayerSingle
convUnit
#(
.D(D),
.F(F)
) CU
(
.clk(clk),
.reset(internalReset),
.image(receptiveField[n*D*F*F*DATA_WIDTH+:D*F*F*DATA_WIDTH]),
.filter(filter),
.result(outputConvUnits[n*DATA_WIDTH+:DATA_WIDTH])
);
end
endgenerate
always @ (posedge clk or posedge reset) begin
if (reset == 1'b1) begin
internalReset = 1'b1;
rowNumber = 0;
column = 0;
counter = 0;
outputCounter = 0;
end else if (rowNumber< H-F+1) begin
if (counter == D*F*F+2) begin //The conv unit finishes ater 1*5*5+2 clock cycles
outputCounter = outputCounter + 1;
counter = 0;
internalReset = 1'b1;
if (column == 0) begin
column = (H-F+1)/2;
end else begin
rowNumber = rowNumber + 1;
column = 0;
end
end else begin
internalReset = 0;
counter = counter + 1;
end
end
end
always @ (*) begin // connecting the output of the conv units with the output of the module
outputConv[outputCounter*((W-F+1)/2)*DATA_WIDTH+:((W-F+1)/2)*DATA_WIDTH] = outputConvUnits;
end
endmodule
你是否還在尋找穩(wěn)定的海外服務器提供商?創(chuàng)新互聯(lián)www.cdcxhl.cn海外機房具備T級流量清洗系統(tǒng)配攻擊溯源,準確流量調(diào)度確保服務器高可用性,企業(yè)級服務器適合批量采購,新人活動首月15元起,快前往官網(wǎng)查看詳情吧