本篇內(nèi)容主要講解“NanoVG優(yōu)化方法是什么”,感興趣的朋友不妨來看看。本文介紹的方法操作簡(jiǎn)單快捷,實(shí)用性強(qiáng)。下面就讓小編來帶大家學(xué)習(xí)“NanoVG優(yōu)化方法是什么”吧!
成都創(chuàng)新互聯(lián)專注骨干網(wǎng)絡(luò)服務(wù)器租用10多年,服務(wù)更有保障!服務(wù)器租用,四川電信科技城機(jī)房 成都服務(wù)器租用,成都服務(wù)器托管,骨干網(wǎng)絡(luò)帶寬,享受低延遲,高速訪問。靈活、實(shí)現(xiàn)低成本的共享或公網(wǎng)數(shù)據(jù)中心高速帶寬的專屬高性能服務(wù)器。
nanovg正如其名稱所示的那樣,是一個(gè)非常小巧的矢量繪圖函數(shù)庫(kù)。相比cairo和skia的數(shù)十萬行代碼,nanovg不足5000行的C語言代碼,稱為nano也是名副其實(shí)了。nanovg的設(shè)計(jì)、接口和代碼質(zhì)量都堪稱典范,唯一美中不足的就是性能不太理想。特別是在Android的低端機(jī)型和大屏幕的機(jī)型上,一個(gè)簡(jiǎn)單的界面每秒只能畫十幾幀。最近我把AWTK移植到Android上時(shí),就碰到了這個(gè)尷尬的問題。
經(jīng)過優(yōu)化之后,AWTK在低端機(jī)型上,整體渲染性能有了3到5倍的提升。這里做個(gè)筆記,供有需要的朋友參考。
nanovg的性能瓶頸在于片段著色器(fragment shader),片段著色器可以認(rèn)為是為GPU提供的一個(gè)回調(diào)函數(shù),該回調(diào)函數(shù)在處理每個(gè)像素時(shí)被調(diào)用,在每一幀繪制時(shí)都會(huì)執(zhí)行數(shù)百萬次,可見該函數(shù)的對(duì)性能的影響是很大的。
我們先看看nanovg的片段著色器(fragment shader)代碼:
static const char* fillFragShader = "#ifdef GL_ES\n" "#if defined(GL_FRAGMENT_PRECISION_HIGH) || defined(NANOVG_GL3)\n" " precision highp float;\n" "#else\n" " precision mediump float;\n" "#endif\n" "#endif\n" "#ifdef NANOVG_GL3\n" "#ifdef USE_UNIFORMBUFFER\n" " layout(std140) uniform frag {\n" " mat3 scissorMat;\n" " mat3 paintMat;\n" " vec4 innerCol;\n" " vec4 outerCol;\n" " vec2 scissorExt;\n" " vec2 scissorScale;\n" " vec2 extent;\n" " float radius;\n" " float feather;\n" " float strokeMult;\n" " float strokeThr;\n" " int texType;\n" " int type;\n" " };\n" "#else\n" // NANOVG_GL3 && !USE_UNIFORMBUFFER " uniform vec4 frag[UNIFORMARRAY_SIZE];\n" "#endif\n" " uniform sampler2D tex;\n" " in vec2 ftcoord;\n" " in vec2 fpos;\n" " out vec4 outColor;\n" "#else\n" // !NANOVG_GL3 " uniform vec4 frag[UNIFORMARRAY_SIZE];\n" " uniform sampler2D tex;\n" " varying vec2 ftcoord;\n" " varying vec2 fpos;\n" "#endif\n" "#ifndef USE_UNIFORMBUFFER\n" " #define scissorMat mat3(frag[0].xyz, frag[1].xyz, frag[2].xyz)\n" " #define paintMat mat3(frag[3].xyz, frag[4].xyz, frag[5].xyz)\n" " #define innerCol frag[6]\n" " #define outerCol frag[7]\n" " #define scissorExt frag[8].xy\n" " #define scissorScale frag[8].zw\n" " #define extent frag[9].xy\n" " #define radius frag[9].z\n" " #define feather frag[9].w\n" " #define strokeMult frag[10].x\n" " #define strokeThr frag[10].y\n" " #define texType int(frag[10].z)\n" " #define type int(frag[10].w)\n" "#endif\n" "\n" "float sdroundrect(vec2 pt, vec2 ext, float rad) {\n" " vec2 ext2 = ext - vec2(rad,rad);\n" " vec2 d = abs(pt) - ext2;\n" " return min(max(d.x,d.y),0.0) + length(max(d,0.0)) - rad;\n" "}\n" "\n" "http:// Scissoring\n" "float scissorMask(vec2 p) {\n" " vec2 sc = (abs((scissorMat * vec3(p,1.0)).xy) - scissorExt);\n" " sc = vec2(0.5,0.5) - sc * scissorScale;\n" " return clamp(sc.x,0.0,1.0) * clamp(sc.y,0.0,1.0);\n" "}\n" "#ifdef EDGE_AA\n" "http:// Stroke - from [0..1] to clipped pyramid, where the slope is 1px.\n" "float strokeMask() {\n" " return min(1.0, (1.0-abs(ftcoord.x*2.0-1.0))*strokeMult) * min(1.0, ftcoord.y);\n" "}\n" "#endif\n" "\n" "void main(void) {\n" " vec4 result;\n" " float scissor = scissorMask(fpos);\n" "#ifdef EDGE_AA\n" " float strokeAlpha = strokeMask();\n" " if (strokeAlpha < strokeThr) discard;\n" "#else\n" " float strokeAlpha = 1.0;\n" "#endif\n" " if (type == 0) { // Gradient\n" " // Calculate gradient color using box gradient\n" " vec2 pt = (paintMat * vec3(fpos,1.0)).xy;\n" " float d = clamp((sdroundrect(pt, extent, radius) + feather*0.5) / feather, 0.0, 1.0);\n" " vec4 color = mix(innerCol,outerCol,d);\n" " // Combine alpha\n" " color *= strokeAlpha * scissor;\n" " result = color;\n" " } else if (type == 1) { // Image\n" " // Calculate color fron texture\n" " vec2 pt = (paintMat * vec3(fpos,1.0)).xy / extent;\n" "#ifdef NANOVG_GL3\n" " vec4 color = texture(tex, pt);\n" "#else\n" " vec4 color = texture2D(tex, pt);\n" "#endif\n" " if (texType == 1) color = vec4(color.xyz*color.w,color.w);" " if (texType == 2) color = vec4(color.x);" " // Apply color tint and alpha.\n" " color *= innerCol;\n" " // Combine alpha\n" " color *= strokeAlpha * scissor;\n" " result = color;\n" " } else if (type == 2) { // Stencil fill\n" " result = vec4(1,1,1,1);\n" " } else if (type == 3) { // Textured tris\n" "#ifdef NANOVG_GL3\n" " vec4 color = texture(tex, ftcoord);\n" "#else\n" " vec4 color = texture2D(tex, ftcoord);\n" "#endif\n" " if (texType == 1) color = vec4(color.xyz*color.w,color.w);" " if (texType == 2) color = vec4(color.x);" " color *= scissor;\n" " result = color * innerCol;\n" " }\n" "#ifdef NANOVG_GL3\n" " outColor = result;\n" "#else\n" " gl_FragColor = result;\n" "#endif\n" "}\n";
它的功能很完整也很復(fù)雜,裁剪和反走樣都做了處理。仔細(xì)分析之后,我發(fā)現(xiàn)了幾個(gè)性能問題:
簡(jiǎn)單顏色填充和漸變顏色填充使用了相同的代碼:
" if (type == 0) { // Gradient\n" " // Calculate gradient color using box gradient\n" " vec2 pt = (paintMat * vec3(fpos,1.0)).xy;\n" " float d = clamp((sdroundrect(pt, extent, radius) + feather*0.5) / feather, 0.0, 1.0);\n" " vec4 color = mix(innerCol,outerCol,d);\n" " // Combine alpha\n" " color *= strokeAlpha * scissor;\n" " result = color;\n"
簡(jiǎn)單顏色填充只需一條指令,而漸變顏色填充則需要數(shù)十條指令。這兩種情況重用一段代碼,會(huì)讓簡(jiǎn)單顏色填充慢10倍以上。
把顏色填充分成以下幾種情況,分別進(jìn)行優(yōu)化:
矩形簡(jiǎn)單顏色填充。
對(duì)于無需裁剪的矩形(這是最常見的情況),直接賦值即可,性能提高20倍以上。
" if (type == 5) { //fast fill color\n" " result = innerCol;\n"
通用多邊形簡(jiǎn)單顏色填充。
去掉漸變的采樣函數(shù),性能會(huì)提高一倍以上:
" } else if(type == 7) { // fill color\n" " strokeAlpha = strokeMask();\n" " if (strokeAlpha < strokeThr) discard;\n" " float scissor = scissorMask(fpos);\n" " vec4 color = innerCol;\n" " color *= strokeAlpha * scissor;\n" " result = color;\n"
漸變顏色填充(只占極小的部分)。
這種情況非常少見,還是使用之前的代碼。
平均情況,填充性能提高10倍以上!
對(duì)于文字而言,需要顯示的像素和不顯示的像素,平均算下來在1:1左右。
" } else if (type == 3) { // Textured tris\n" "#ifdef NANOVG_GL3\n" " vec4 color = texture(tex, ftcoord);\n" "#else\n" " vec4 color = texture2D(tex, ftcoord);\n" "#endif\n" " if (texType == 1) color = vec4(color.xyz*color.w,color.w);" " if (texType == 2) color = vec4(color.x);" " color *= scissor;\n" " result = color * innerCol;\n" " }\n"
如果顯示的像素和不顯示的像素都走完整的流程,會(huì)浪費(fèi)調(diào)一半的時(shí)間。
當(dāng)color.x < 0.02時(shí)直接跳過。
裁剪和反走樣放到判斷語句之后。
" } else if (type == 3) { // Textured tris\n" "#ifdef NANOVG_GL3\n" " vec4 color = texture(tex, ftcoord);\n" "#else\n" " vec4 color = texture2D(tex, ftcoord);\n" "#endif\n" " if(color.x < 0.02) discard;\n" " strokeAlpha = strokeMask();\n" " if (strokeAlpha < strokeThr) discard;\n" " float scissor = scissorMask(fpos);\n" " color = vec4(color.x);" " color *= scissor;\n" " result = color * innerCol;\n" " }\n"
字體渲染性能提高一倍!
反走樣的實(shí)現(xiàn)函數(shù)如下(其實(shí)我也不懂):
"float strokeMask() {\n" " return min(1.0, (1.0-abs(ftcoord.x*2.0-1.0))*strokeMult) * min(1.0, ftcoord.y);\n" "}\n"
與簡(jiǎn)單的賦值操作相比,加上反走樣功能,性能會(huì)下降5-10倍。但是不加反走樣功能,繪制多邊形時(shí)邊緣效果比較差。不加不好看,加了又太慢,看起來是個(gè)兩難的選擇。
矩形填充是可以不用反走樣功能的。而90%以上的情況都是矩形填充。矩形填充單獨(dú)處理,一條指令搞定,性能提高20倍以上:
" if (type == 5) { //fast fill color\n" " result = innerCol;\n"
配合裁剪和矩形的優(yōu)化,性能提高10倍以上。
裁剪放到Shader中雖然合理,但是性能就要大大折扣了。
"http:// Scissoring\n" "float scissorMask(vec2 p) {\n" " vec2 sc = (abs((scissorMat * vec3(p,1.0)).xy) - scissorExt);\n" " sc = vec2(0.5,0.5) - sc * scissorScale;\n" " return clamp(sc.x,0.0,1.0) * clamp(sc.y,0.0,1.0);\n" "}\n"
與簡(jiǎn)單的賦值操作相比,加上裁剪功能,性能會(huì)下降10以上倍。但是不加裁剪功能,像滾動(dòng)視圖這樣的控件就沒法實(shí)現(xiàn),這看起來也是個(gè)兩難的選擇。
而90%以上的填充都是在裁剪區(qū)域的內(nèi)部的,沒有必要每個(gè)像素都去判斷,放在Shader之外進(jìn)行判斷即可。
static int glnvg__pathInScissor(const NVGpath* path, NVGscissor* scissor) { int32_t i = 0; float cx = scissor->xform[4]; float cy = scissor->xform[5]; float hw = scissor->extent[0]; float hh = scissor->extent[1]; float l = cx - hw; float t = cy - hh; float r = l + 2 * hw - 1; float b = t + 2 * hh - 1; const NVGvertex* verts = path->fill; for (i = 0; i < path->nfill; i++) { const NVGvertex* iter = verts + i; int x = iter->x; int y = iter->y; if (x < l || x > r || y < t || y > b) { return 0; } } return 1; }
配合裁剪和矩形的優(yōu)化,性能提高10倍以上。
到此,相信大家對(duì)“NanoVG優(yōu)化方法是什么”有了更深的了解,不妨來實(shí)際操作一番吧!這里是創(chuàng)新互聯(lián)網(wǎng)站,更多相關(guān)內(nèi)容可以進(jìn)入相關(guān)頻道進(jìn)行查詢,關(guān)注我們,繼續(xù)學(xué)習(xí)!