FPGA完成CNN卷積層-創(chuàng)新互聯(lián)

1卷積層頂層設計

輸入圖片和，32*32*16和濾波器，5*5*6*16，輸出位28*28*16

在這里插入圖片描述

創(chuàng)新互聯(lián)建站服務項目包括道里網(wǎng)站建設、道里網(wǎng)站制作、道里網(wǎng)頁制作以及道里網(wǎng)絡營銷策劃等。多年來，我們專注于互聯(lián)網(wǎng)行業(yè)，利用自身積累的技術優(yōu)勢、行業(yè)經(jīng)驗、深度合作伙伴關系等，向廣大中小型企業(yè)、政府機構等提供互聯(lián)網(wǎng)行業(yè)的解決方案，道里網(wǎng)站推廣取得了明顯的社會效益與經(jīng)濟效益。目前，我們服務的客戶以成都為中心已經(jīng)輻射到道里省份的部分城市，未來相信會繼續(xù)擴大服務區(qū)域并繼續(xù)獲得客戶的支持與信任！卷積部分

卷積部分無非就是數(shù)據(jù)的相乘和相加，設計好乘法器和加法器并保存得到的結(jié)果（16）?？紤]到數(shù)據(jù)精度采用半精度浮點數(shù)乘法器和半精度浮點數(shù)加法器。[半精度浮點數(shù)詳解]，定點數(shù)量化，減少存取數(shù)據(jù)和訪問存儲器的時間，并且不會過分減少精度。(https://blog.csdn.net/Miracle_cx/article/details/89463503)
舉例一個平常的數(shù)，這次反過來，如-1.5625×10^(-1)
即-0.15625 = -0.00101（十進制轉(zhuǎn)二進制）= -1.01×2^(-3)
所以符號位為1，指數(shù)為-3+15=12，所以指數(shù)位為01100，尾數(shù)位為0100000000。所以-1.5625×10^(-1)用半精度浮點數(shù)表示就為1 01100 0100000000。注：小數(shù)部分二進制每次乘以2取整。

module floatAdd (
	input 	wire [15:0] floatA,
	input 	wire [15:0] floatB,
	output	reg	 [15:0] sum
);

reg sign; // 輸出結(jié)果的正負標志位
reg signed [5:0] exponent; //輸出數(shù)據(jù)的指數(shù)，因為有正負所以選擇有符號數(shù)
reg [9:0] mantissa; // 輸出數(shù)據(jù)的尾數(shù)
reg [4:0] exponentA, exponentB; //輸入數(shù)據(jù)的階數(shù)
reg [10:0] fractionA, fractionB, fraction;	// 計算暫存位
reg [7:0] shiftAmount; 	// 移位寄存器，為了計算加法時配平階數(shù)
reg cout;

always @ (floatA or floatB) 
begin
	exponentA = floatA[14:10];
	exponentB = floatB[14:10];
	fractionA = {1'b1,floatA[9:0]};
	fractionB = {1'b1,floatB[9:0]}; 
	
	exponent = exponentA;

	if (floatA == 0) 		// 特殊情況A為0
	begin						
		sum = floatB;
	end 
	else if (floatB == 0)  // 特殊情況B為0
	begin					
		sum = floatA;
	end 
	else if (floatA[14:0] == floatB[14:0] && floatA[15]^floatB[15]==1'b1) //特殊情況互為相反數(shù)
	begin
		sum=0;
	end 
	else 
	begin
		if (exponentB >exponentA)  // 配平階數(shù)使得相加兩數(shù)在同一階數(shù)上
		begin
			shiftAmount = exponentB - exponentA;
			fractionA = fractionA >>(shiftAmount);
			exponent = exponentB;
		end 
		else if (exponentA >exponentB) 
		begin 
			shiftAmount = exponentA - exponentB;
			fractionB = fractionB >>(shiftAmount);
			exponent = exponentA;
		end
		if (floatA[15] == floatB[15]) 	// 兩數(shù)同號
		begin							
			{cout,fraction} = fractionA + fractionB;
			if (cout == 1'b1) 
			begin
				{cout,fraction} = {cout,fraction} >>1;
				exponent = exponent + 1;
			end
			sign = floatA[15];
		end 
		else 
		begin						//兩數(shù)異號
			if (floatA[15] == 1'b1) // A 為負數(shù)
			begin
				{cout,fraction} = fractionB - fractionA;	// B-A
			end 
			else 
			begin
				{cout,fraction} = fractionA - fractionB;	// A-B
			end
			sign = cout;
			if (cout == 1'b1) 
				fraction = -fraction; // 0-負數(shù)可求出此數(shù)的絕對值
			// 對franction進行階數(shù)配平求出尾數(shù)
			if (fraction [10] == 0) begin
				if (fraction[9] == 1'b1) begin
					fraction = fraction<< 1;
					exponent = exponent - 1;
				end else if (fraction[8] == 1'b1) begin
					fraction = fraction<< 2;
					exponent = exponent - 2;
				end else if (fraction[7] == 1'b1) begin
					fraction = fraction<< 3;
					exponent = exponent - 3;
				end else if (fraction[6] == 1'b1) begin
					fraction = fraction<< 4;
					exponent = exponent - 4;
				end else if (fraction[5] == 1'b1) begin
					fraction = fraction<< 5;
					exponent = exponent - 5;
				end else if (fraction[4] == 1'b1) begin
					fraction = fraction<< 6;
					exponent = exponent - 6;
				end else if (fraction[3] == 1'b1) begin
					fraction = fraction<< 7;
					exponent = exponent - 7;
				end else if (fraction[2] == 1'b1) begin
					fraction = fraction<< 8;
					exponent = exponent - 8;
				end else if (fraction[1] == 1'b1) begin
					fraction = fraction<< 9;
					exponent = exponent - 9;
				end else if (fraction[0] == 1'b1) begin
					fraction = fraction<< 10;
					exponent = exponent - 10;
				end 
			end
		end
		mantissa = fraction[9:0];
		if(exponent[5]==1'b1) begin //太小了輸出全0太小了
			sum = 16'b0000000000000000;
		end
		else begin
			sum = {sign,exponent[4:0],mantissa}; // 組合數(shù)據(jù)
		end		
	end		
end

endmodule

測試代碼

`timescale 100 ns / 10 ps

module floatAdd_TB ();

reg [15:0] floatA;
reg [15:0] floatB;
wire [15:0] sum;

initial begin
	
	// 0.3 + 0.2
	#0
	floatA = 16'h34CD;
	floatB = 16'h3266;

	// 0.3 + 0
	#10
	floatA = 16'h34CD;
	floatB = 16'h0000;
	#10
	$stop;
end

floatAdd FADD
(
	.floatA(floatA),
	.floatB(floatB),
	.sum(sum)
);

endmodule

在這里插入圖片描述
這里選擇0.3+0.2，與0.3+0，對應二進制可以運算，結(jié)果無誤

module floatMuilt 
(
	input wire [15:0] floatA,
	input wire [15:0] floatB,
	output reg [15:0] product
);

reg sign; // 輸出的正負標志位
reg signed [5:0] exponent; // 輸出數(shù)據(jù)的指數(shù)，因為有正負所以選擇有符號數(shù)
reg [9:0] mantissa; // 輸出數(shù)據(jù)的小數(shù)
reg [10:0] fractionA, fractionB;	//fraction = {1,mantissa} // 計算二進制數(shù)據(jù)最高位補1
reg [21:0] fraction; // 相乘結(jié)果參數(shù)


always @ (floatA or floatB) 
begin
	if (floatA == 0 || floatB == 0)  // 處理乘數(shù)有一個或者兩個均為0的情況
		product = 0;				//  輸出為0
	else 
	begin
		sign = floatA[15] ^ floatB[15]; // 異或門判斷輸出的計算正負
		exponent = floatA[14:10] + floatB[14:10] - 5'd15 + 5'd2; // 由于借位給fractionA和fractionB需要先補齊兩位指數(shù)
	
		fractionA = {1'b1,floatA[9:0]}; //借位給fractionA
		fractionB = {1'b1,floatB[9:0]}; //借位給fractionB
		fraction = fractionA * fractionB; //計算二進制乘法
		// 找到第一個不為0的數(shù)字并對指數(shù)進行匹配處理
		if (fraction[21] == 1'b1) 
		begin
			fraction = fraction<< 1;
			exponent = exponent - 1; 
		end 
		else if (fraction[20] == 1'b1) 
		begin
			fraction = fraction<< 2;
			exponent = exponent - 2;
		end 
		else if (fraction[19] == 1'b1) 
		begin
			fraction = fraction<< 3;
			exponent = exponent - 3;
		end 
		else if (fraction[18] == 1'b1) 
		begin
			fraction = fraction<< 4;
			exponent = exponent - 4;
		end 
		else if (fraction[17] == 1'b1) 
		begin
			fraction = fraction<< 5;
			exponent = exponent - 5;
		end 
		else if (fraction[16] == 1'b1) 
		begin
			fraction = fraction<< 6;
			exponent = exponent - 6;
		end 
		else if (fraction[15] == 1'b1) 
		begin
			fraction = fraction<< 7;
			exponent = exponent - 7;
		end 
		else if (fraction[14] == 1'b1) 
		begin
			fraction = fraction<< 8;
			exponent = exponent - 8;
		end 
		else if (fraction[13] == 1'b1) 
		begin
			fraction = fraction<< 9;
			exponent = exponent - 9;
		end 
		else if (fraction[12] == 1'b0) 
		begin
			fraction = fraction<< 10;
			exponent = exponent - 10;
		end 
		// 按照半精度浮點數(shù)的格式輸出
		mantissa = fraction[21:12];
		if(exponent[5]==1'b1) begin //太小了輸出全0(精度問題)
			product=16'b0000000000000000;
		end
		else begin
			product = {sign,exponent[4:0],mantissa}; //拼接輸出數(shù)據(jù)
		end
	end
end

測試代碼

`timescale 100 ns / 10 ps

module floatMult_TB ();

reg [15:0] floatA;
reg [15:0] floatB;
wire [15:0] product;

initial begin
	
	// 4 * 5
	#0
	floatA = 16'b0100010000000000;
	floatB = 16'b0100010100000000;

	// 0.0004125 * 0
	#10
	floatA = 16'b0000111011000010;
	floatB = 16'b0000000000000000;

	#10
	$stop;
end

floatMult FM
(
	.floatA(floatA),
	.floatB(floatB),
	.product(product)
);

endmodule

在這里插入圖片描述
這里采用4*5，與一個很小的數(shù)與0相乘，結(jié)果無誤。

將計數(shù)一次得到的結(jié)果存儲下來，用于后面的卷積，這里不做演示。

`timescale 100 ns / 10 ps

module processingElement(clk,reset,floatA,floatB,result);

parameter DATA_WIDTH = 16;

input clk, reset;
input [DATA_WIDTH-1:0] floatA, floatB;
output reg [DATA_WIDTH-1:0] result;

wire [DATA_WIDTH-1:0] multResult;
wire [DATA_WIDTH-1:0] addResult;

floatMult FM (floatA,floatB,multResult);
floatAdd FADD (multResult,result,addResult);

always @ (posedge clk or posedge reset) begin
	if (reset == 1'b1) begin
		result = 0;
	end else begin
		result = addResult;
	end
end

endmodule

最后通過循環(huán)完成一整個卷積。

`timescale 100 ns / 10 ps

module convUnit(clk,reset,image,filter,result);

parameter DATA_WIDTH = 16;
parameter D = 1; //depth of the filter
parameter F = 5; //size of the filter

input clk, reset;
input [0:D*F*F*DATA_WIDTH-1] image, filter;
output [0:DATA_WIDTH-1] result;

reg [DATA_WIDTH-1:0] selectedInput1, selectedInput2;

integer i;


processingElement PE
	(
		.clk(clk),
		.reset(reset),
		.floatA(selectedInput1),
		.floatB(selectedInput2),
		.result(result)
	);

// The convolution is calculated in a sequential process to save hardware
// The result of the element wise matrix multiplication is finished after (F*F+2) cycles (2 cycles to reset the processing element and F*F cycles to accumulate the result of the F*F multiplications) 
always @ (posedge clk, posedge reset) begin
	if (reset == 1'b1) begin // reset
		i = 0;
		selectedInput1 = 0;
		selectedInput2 = 0;
	end else if (i >D*F*F-1) begin // if the convolution is finished but we still wait for other blocks to finsih, send zeros to the conv unit (in case of pipelining)
		selectedInput1 = 0;
		selectedInput2 = 0;
	end else begin // send one element of the image part and one element of the filter to be multiplied and accumulated
		selectedInput1 = image[DATA_WIDTH*i+:DATA_WIDTH];
		selectedInput2 = filter[DATA_WIDTH*i+:DATA_WIDTH];
		i = i + 1;
	end
end

endmodule

過濾器

在進行卷積之前需要將處理的數(shù)據(jù)提取出來，比如55的矩陣，1-25的依次排列，過濾器選擇22，就需要將1，2，6，7提取出來。

`timescale 100 ns / 10 ps

//this modules takes as inputs the image, a row number and a column number
//it fills the output array with matrices of the parts of the image to be sent to the conv units

module RFselector(image,rowNumber, column,receptiveField);

parameter DATA_WIDTH = 16;
parameter D = 1; //Depth of the filter
parameter H = 32; //Height of the image
parameter W = 32; //Width of the image
parameter F = 5; //Size of the filter

input [0:D*H*W*DATA_WIDTH-1] image;
input [5:0] rowNumber, column;
output reg [0:(((W-F+1)/2)*D*F*F*DATA_WIDTH)-1] receptiveField; //array to hold the matrices (parts of the image) to be sent to the conv units

//address: counter to fill the receptive filed array
//c: counter to loop on the columns of the input image
//k: counter to loop on the depth of the input image
//i: counter to loop on the rows of the input image
integer address, c, k, i;

always @ (image or rowNumber or column) begin
	address = 0;
	if (column == 0) begin //if the column is zero fill the array with the parts of the image correspoding to the first half of pixels of the row (with rowNumber) of the output image
		for (c = 0; c< (W-F+1)/2; c = c + 1) begin
			for (k = 0; k< D; k = k + 1) begin
				for (i = 0; i< F; i = i + 1) begin
					receptiveField[address*F*DATA_WIDTH+:F*DATA_WIDTH] = image[rowNumber*W*DATA_WIDTH+c*DATA_WIDTH+k*H*W*DATA_WIDTH+i*W*DATA_WIDTH+:F*DATA_WIDTH];
					address = address + 1;
				end
			end
		end
	end else begin //if the column is zero fill the array with the parts of the image correspoding to the second half of pixels of the row (with rowNumber) of the output image
		for (c = (W-F+1)/2; c< (W-F+1); c = c + 1) begin
			for (k = 0; k< D; k = k + 1) begin
				for (i = 0; i< F; i = i + 1) begin
					receptiveField[address*F*DATA_WIDTH+:F*DATA_WIDTH] = image[rowNumber*W*DATA_WIDTH+c*DATA_WIDTH+k*H*W*DATA_WIDTH+i*W*DATA_WIDTH+:F*DATA_WIDTH];
					address = address + 1;
				end
			end
		end
	end
	
end

endmodule

接下來就需要選擇過濾器的移動來完成一整個卷積層。

`timescale 100 ns / 10 ps

module convLayerSingle(clk,reset,image,filter,outputConv);

parameter DATA_WIDTH = 16;
parameter D = 1; //Depth of the filter
parameter H = 32; //Height of the image
parameter W = 32; //Width of the image
parameter F = 5; //Size of the filter

input clk, reset;
input [0:D*H*W*DATA_WIDTH-1] image;
input [0:D*F*F*DATA_WIDTH-1] filter;
output reg [0:(H-F+1)*(W-F+1)*DATA_WIDTH-1] outputConv; // output of the module

wire [0:((W-F+1)/2)*DATA_WIDTH-1] outputConvUnits; // output of the conv units and input to the row selector

reg internalReset;
wire [0:(((W-F+1)/2)*D*F*F*DATA_WIDTH)-1] receptiveField; // array of the matrices to be sent to conv units


integer counter, outputCounter;
//counter: number of clock cycles need for the conv unit to finsish
//outputCounter: index to map the output of the conv units to the output of the module

reg [5:0] rowNumber, column; 
//rowNumber: determines the row that is calculated by the conv units
//column: determines if we are calculating the first or the second 14 pixels of the output row

RFselector
#(
	.DATA_WIDTH(DATA_WIDTH),
	.D(D),
	.H(H),
	.W(W),
	.F(F)
) RF
(
	.image(image),
	.rowNumber(rowNumber),
	.column(column),
	.receptiveField(receptiveField)
);

genvar n;

generate //generating n convolution units where n is half the number of pixels in one row of the output image
	for (n = 0; n< (H-F+1)/2; n = n + 1) 
	begin: convLayerSingle
		convUnit
		#(
			.D(D),
			.F(F)
		) CU
		(
			.clk(clk),
			.reset(internalReset),
			.image(receptiveField[n*D*F*F*DATA_WIDTH+:D*F*F*DATA_WIDTH]),
			.filter(filter),
			.result(outputConvUnits[n*DATA_WIDTH+:DATA_WIDTH])
		);
	end
endgenerate

always @ (posedge clk or posedge reset) begin
	if (reset == 1'b1) begin
		internalReset = 1'b1;
		rowNumber = 0;
		column = 0;
		counter = 0;
		outputCounter = 0;
	end else if (rowNumber< H-F+1) begin
		if (counter == D*F*F+2) begin //The conv unit finishes ater 1*5*5+2 clock cycles
			outputCounter = outputCounter + 1;
			counter = 0;
			internalReset = 1'b1;
			if (column == 0) begin
				column = (H-F+1)/2;
			end else begin
				rowNumber = rowNumber + 1;
				column = 0;
			end
		end else begin
			internalReset = 0;
			counter = counter + 1;
		end
	end
end

always @ (*) begin // connecting the output of the conv units with the output of the module
	outputConv[outputCounter*((W-F+1)/2)*DATA_WIDTH+:((W-F+1)/2)*DATA_WIDTH] = outputConvUnits;
end

endmodule

你是否還在尋找穩(wěn)定的海外服務器提供商？創(chuàng)新互聯(lián)www.cdcxhl.cn海外機房具備T級流量清洗系統(tǒng)配攻擊溯源，準確流量調(diào)度確保服務器高可用性，企業(yè)級服務器適合批量采購，新人活動首月15元起，快前往官網(wǎng)查看詳情吧

分享題目：FPGA完成CNN卷積層-創(chuàng)新互聯(lián)
瀏覽地址：http://weahome.cn/article/dcidgd.html

真实的国产乱ⅩXXX66竹夫人,五月香六月婷婷激情综合,亚洲日本VA一区二区三区,亚洲精品一区二区三区麻豆

FPGA完成CNN卷積層-創(chuàng)新互聯(lián)

其他資訊

網(wǎng)站制作

企業(yè)服務

網(wǎng)站建設

服務器托管