I am testing out some different ways to write data into a VBO and I am sort of confused.
I started to compare glBufferSubData and glMapBufferRange using the UNSYNCRONIZED flag and I'm finding that glBufferSubData is doing significantly better. Where glBufferSubData is pulling in ~950 FPS compared to the ~150 FPS glMapBufferRange with the UNSYNCRONIZED flag is getting
I am not doing anything special here. My test is 10000 untextured 32x32 quads (colored red) with a prerandomized position at init time. Where the quads are spread out over a 800x600 window.
So I'm wondering what gives, I would think that numbers would be the other way around? I thought that the UNSYNCRONIZED flag was supposed to tell the GPU not to block, where as glBufferSubData does cause a block on the GPU?
glBufferSubdata
void SpriteBatcher::Render(Matrix4 &projection)
{
glUseProgram(shaderProgram.programID);
glUniformMatrix4fv(shaderProgram.uniforms[0].location, 1, GL_FALSE, projection.data);
for (int i = 0; i < 12 * MAX_SPRITE_BATCH_SIZE; i += 12)
{
verts1[i] = pos[i];
verts1[i + 1] = pos[i + 1];
verts1[i + 2] = 0.0f;
verts1[i + 3] = _verts1[i];
verts1[i + 4] = _verts1[i + 1] + 32.0f;
verts1[i + 5] = 0.0f;
verts1[i + 6] = _verts1[i] + 32.0f;
verts1[i + 7] = _verts1[i + 1] + 32.0f;
verts1[i + 8] = 0.0f;
verts1[i + 9] = _verts1[i] + 32.0f;
verts1[i + 10] = _verts1[i + 1];
verts1[i + 11] = 0.0f;
}
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glBufferSubData(GL_ARRAY_BUFFER, 0, MAX_SPRITE_BATCH_SIZE * VERTEX_DATA_SIZE_PER_QUAD, _verts1);
glBindVertexArray(vao);
glDrawElements(GL_TRIANGLES, MAX_SPRITE_BATCH_SIZE * INDICES_COUNT_PER_QUAD, GL_UNSIGNED_SHORT, (const void*)0);
}
glMapBufferRange with UNSYNCRONIZED flag
void SpriteBatcher::Render(Matrix4 &projection)
{
glUseProgram(shaderProgram.programID);
glUniformMatrix4fv(shaderProgram.uniforms[0].location, 1, GL_FALSE, projection.data);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
GLfloat *pointer = (GLfloat*)glMapBufferRange(GL_ARRAY_BUFFER, 0, MAX_SPRITE_BATCH_SIZE * VERTEX_DATA_SIZE_PER_QUAD, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
if (pointer == NULL)
throw std::runtime_error("Null pointer on map");
for (int i = 0; i < 12 * MAX_SPRITE_BATCH_SIZE; i += 12)
{
pointer[i] = pos[i];
pointer[i + 1] = pos[i + 1];
pointer[i + 2] = 0.0f;
pointer[i + 3] = pointer[i];
pointer[i + 4] = pointer[i + 1] + 32.0f;
pointer[i + 5] = 0.0f;
pointer[i + 6] = pointer[i] + 32.0f;
pointer[i + 7] = pointer[i + 1] + 32.0f;
pointer[i + 8] = 0.0f;
pointer[i + 9] = pointer[i] + 32.0f;
pointer[i + 10] = pointer[i + 1];
pointer[i + 11] = 0.0f;
}
glUnmapBuffer(GL_ARRAY_BUFFER);
glBindVertexArray(vao);
glDrawElements(GL_TRIANGLES, MAX_SPRITE_BATCH_SIZE * INDICES_COUNT_PER_QUAD, GL_UNSIGNED_SHORT, (const void*)0);
}