The first page of the XPS spec uses 1000 1x1 transparent images to mask the drawing of 1000 1x1 transparent images. The performance of this is horrid. Please fix it :)
Created attachment 3620 [details] XPS spec page 1. The first page of the XPS spec as a separate document.
Reassigning per Henry's request.
This bug was mistakenly reassigned at the meeting, I thought we were discussing 689492.
This can't be fixed within my ownership. Passing to Henry.
repurpose this bug for collecting transparency performance examples which will be shared with rotateright. Here is a shark profile for XPS spec page 1 (make xps XCFLAGS=-g) and close up of pdf14_compose_group. # Report 0 - Session 1 - Time Profile of gxps SharkProfileViewer # Generated from the visible portion of the outline view - 67.0%, pdf14_compose_group, gxps - 5.3%, art_pdf_recomposite_group_8, gxps - 4.5%, __bzero, libSystem.B.dylib - 3.7%, blkclr, mach_kernel - 3.0%, __memcpy, libSystem.B.dylib 2.7%, lo_alltraps, mach_kernel - 1.8%, ml_set_interrupts_enabled, mach_kernel - 1.7%, pmap_remove_range, mach_kernel - 1.5%, pmap_enter, mach_kernel - 0.9%, OSAddAtomic, mach_kernel - 0.7%, vm_page_lookup, mach_kernel - 0.6%, pmap_get_mapwindow, mach_kernel - 0.4%, vm_page_free_prepare, mach_kernel - 0.4%, vm_page_grab, mach_kernel - 0.3%, vm_fault, mach_kernel - 0.3%, vm_map_lookup_locked, mach_kernel - 0.3%, hw_lock_unlock, mach_kernel - 0.2%, hw_lock_to, mach_kernel - 0.2%, vm_fault_enter, mach_kernel - 0.2%, vm_page_remove, mach_kernel - 0.2%, vm_fault_cleanup, mach_kernel - 0.2%, user_trap, mach_kernel - 0.2%, vm_page_insert_internal, mach_kernel - 0.1%, lck_rw_lock_shared, mach_kernel - 0.1%, usimple_lock, mach_kernel - 0.1%, thread_cancel_timer, mach_kernel - 0.1%, pmap64_pdpt, mach_kernel - 0.1%, mutex_lock_spin, mach_kernel - 0.1%, pmap_pte, mach_kernel - 0.1%, vm_page_free_list, mach_kernel - 0.1%, chudxnu_cpu_free, mach_kernel - 0.1%, mutex_unlock, mach_kernel - 0.1%, pmap64_pde, mach_kernel - 0.1%, vm_page_alloc, mach_kernel - 0.1%, lck_rw_lock_exclusive, mach_kernel - 0.1%, image_render_color, gxps - 0.1%, gx_build_blended_image_row, gxps - 0.1%, chunk_locate_ptr, gxps - 0.1%, floor$fenv_access_off, libSystem.B.dylib - 0.1%, kernel_preempt_check, mach_kernel - 0.1%, c_pdf14trans_read, gxps - 0.1%, bzero_phys, mach_kernel - 0.1%, _disable_preemption, mach_kernel - 0.1%, mem_true24_fill_rectangle, gxps - 0.1%, vm_object_lock, mach_kernel - 0.1%, vm_map_enter, mach_kernel - 0.1%, vm_page_activate, mach_kernel - 0.1%, pdf14_preserve_backdrop, gxps - 0.1%, pdf14_fill_rectangle, gxps - 0.1%, current_processor, mach_kernel - 0.1%, lck_rw_done, mach_kernel 215 nos_alpha_g_ptr = nos_ptr + n_chan * nos_planestride; 216 else 217 nos_alpha_g_ptr = NULL; 218 219 if (maskbuf != NULL) { 220 mask_ptr = maskbuf->data + x0 - maskbuf->rect.p.x + ! Int mult 221 (y0 - maskbuf->rect.p.y) * maskbuf->rowstride; 222 mask_planestride = maskbuf->planestride; 223 mask_bg_alpha = maskbuf->alpha; 224 mask_tr_fn = maskbuf->transfer_fn; 225 } 226 227 for (y = y0; y < y1; ++y) { 9.1% 8.5% 228 for (x = 0; x < width; ++x) { 229 byte pix_alpha = alpha; 230 231 /* Complement the components for subtractive color spaces */ 3.5% 3.3% 232 if (additive) { 10.4% 9.7% 233 for (i = 0; i < n_chan; ++i) { 14.6% 13.7% 234 tos_pixel[i] = tos_ptr[x + i * tos_planestride]; 10.4% 9.7% 235 nos_pixel[i] = nos_ptr[x + i * nos_planestride]; 236 } 237 } else { 238 for (i = 0; i < num_comp; ++i) { 239 tos_pixel[i] = 255 - tos_ptr[x + i * tos_planestride]; 240 nos_pixel[i] = 255 - nos_ptr[x + i * nos_planestride]; 241 } 242 tos_pixel[num_comp] = tos_ptr[x + num_comp * tos_planestride]; ! Int mult 243 nos_pixel[num_comp] = nos_ptr[x + num_comp * nos_planestride]; ! Int mult 244 } 245 1.2% 1.1% 246 if (mask_ptr != NULL) { 4.5% 4.2% 247 int mask_alpha = mask_ptr[x + num_comp * mask_planestride]; ! Int mult 248 int tmp; 249 byte mask; 250 251 /* 252 * The mask data is really monochrome. Thus for additive (RGB) 253 * we use the R channel for alpha since R = G = B. For 254 * subtractive (CMYK) we use the K channel. 255 */ 7.0% 6.6% 256 if (mask_alpha == 255) { 257 /* todo: rgba->mask */ 258 mask = additive ? mask_ptr[x] 259 : 255 - mask_ptr[x + 3 * mask_planestride]; 0.1% 0.1% 260 } else if (mask_alpha == 0) 261 mask = mask_bg_alpha; 262 else { 263 int t2 = additive ? mask_ptr[x] 264 : 255 - mask_ptr[x + 3 * mask_planestride]; 265 266 t2 = (t2 - mask_bg_alpha) * mask_alpha + 0x80; ! Int mult 267 mask = mask_bg_alpha + ((t2 + (t2 >> 8)) >> 8); 268 } 269 mask = mask_tr_fn[mask]; 2.9% 2.7% 270 tmp = pix_alpha * mask + 0x80; ! Int mult 6.1% 5.7% 271 pix_alpha = (tmp + (tmp >> 8)) >> 8; 272 # if VD_PAINT_MASK 273 vd_pixel(int2fixed(x), int2fixed(y), mask); 274 # endif 275 } 276 1.1% 1.0% 277 if (nos_knockout) { 278 byte *nos_shape_ptr = nos_has_shape ? 279 &nos_ptr[x + nos_shape_offset] : NULL; 280 byte tos_shape = tos_ptr[x + tos_shape_offset]; 281 282 art_pdf_composite_knockout_isolated_8(nos_pixel, 283 nos_shape_ptr, 284 tos_pixel, 285 n_chan - 1, 286 tos_shape, 287 pix_alpha, shape); 1.1% 1.1% 288 } else if (tos_isolated) { 289 art_pdf_composite_group_8(nos_pixel, nos_alpha_g_ptr, 290 tos_pixel, n_chan - 1, 291 pix_alpha, blend_mode, pblend_procs); 292 } else { 293 byte tos_alpha_g = tos_ptr[x + tos_alpha_g_offset]; 8.0% 13.9% 294 art_pdf_recomposite_group_8(nos_pixel, nos_alpha_g_ptr, 295 tos_pixel, tos_alpha_g, n_chan - 1, 296 pix_alpha, blend_mode, pblend_procs); 297 } 1.1% 1.0% 298 if (nos_has_shape) { 299 nos_ptr[x + nos_shape_offset] = 300 art_pdf_union_mul_8 (nos_ptr[x + nos_shape_offset], 301 tos_ptr[x + tos_shape_offset], 302 shape); 303 } 304 305 /* Complement the results for subtractive color spaces */ 306 if (additive) { 12.8% 12.0% 307 for (i = 0; i < n_chan; ++i) { 4.6% 4.3% 308 nos_ptr[x + i * nos_planestride] = nos_pixel[i]; 309 } 310 } else { 311 for (i = 0; i < num_comp; ++i) 312 nos_ptr[x + i * nos_planestride] = 255 - nos_pixel[i]; 313 nos_ptr[x + num_comp * nos_planestride] = nos_pixel[num_comp]; 314 } 315 # if VD_PAINT_COLORS 316 vd_pixel(int2fixed(x), int2fixed(y), n_chan == 1 ? 317 (nos_pixel[0] << 16) + (nos_pixel[0] << 8) + nos_pixel[0] : 318 (nos_pixel[0] << 16) + (nos_pixel[1] << 8) + nos_pixel[2]); 319 # endif 320 # if VD_PAINT_ALPHA 321 vd_pixel(int2fixed(x), int2fixed(y), 322 (nos_pixel[n_chan - 1] << 16) + (nos_pixel[n_chan - 1] << 8) + 323 nos_pixel[n_chan - 1]); 324 # endif 325 if (nos_alpha_g_ptr != NULL) 1.4% 1.3% 326 ++nos_alpha_g_ptr; 327 } 0.0% 0.0% 328 tos_ptr += tos->rowstride; 0.0% 0.0% 329 nos_ptr += nos->rowstride; 330 if (nos_alpha_g_ptr != NULL) 331 nos_alpha_g_ptr += nos->rowstride - width; 332 if (mask_ptr != NULL) 333 mask_ptr += maskbuf->rowstride; 334 } 335 }
Created attachment 5944 [details] AnalysisBug689606.pdf The attached PDF file has an analysis that shows the performance problem is due to a large number of data cache misses.
Duplicate of 69114. We need to make some improvements in the transparency rendering performance. *** This bug has been marked as a duplicate of bug 691114 ***