Bug 689606

Summary: transparency performance
Product: Ghostscript Reporter: Tor Andersson <tor.andersson>
Component: Graphics LibraryAssignee: Michael Vrhel <michael.vrhel>
Status: RESOLVED DUPLICATE    
Severity: normal CC: christinedelight.top85, ray.johnston
Priority: P4    
Version: 0.00   
Hardware: All   
OS: MacOS X   
Customer: Word Size: ---
Attachments: XPS spec page 1.
AnalysisBug689606.pdf

Description Tor Andersson 2007-12-12 07:45:40 UTC
The first page of the XPS spec uses 1000 1x1 transparent images
to mask the drawing of 1000 1x1 transparent images.
The performance of this is horrid.
Please fix it :)
Comment 1 Tor Andersson 2007-12-12 07:47:32 UTC
Created attachment 3620 [details]
XPS spec page 1.

The first page of the XPS spec as a separate document.
Comment 2 Ray Johnston 2007-12-18 09:33:57 UTC
Reassigning per Henry's request.
Comment 3 Henry Stiles 2007-12-19 10:45:49 UTC
This bug was mistakenly reassigned at the meeting, I thought we were discussing
689492.
Comment 4 leonardo 2008-09-05 06:34:39 UTC
This can't be fixed within my ownership. Passing to Henry.
Comment 5 Henry Stiles 2009-03-27 09:08:44 UTC
repurpose this bug for collecting transparency performance examples which will
be shared with rotateright.  Here is a shark profile for XPS spec page 1 (make
xps XCFLAGS=-g) and close up of pdf14_compose_group.

# Report 0 - Session 1 - Time Profile of gxps
SharkProfileViewer
# Generated from the visible portion of the outline view
- 67.0%, pdf14_compose_group, gxps
- 5.3%, art_pdf_recomposite_group_8, gxps
- 4.5%, __bzero, libSystem.B.dylib
- 3.7%, blkclr, mach_kernel
- 3.0%, __memcpy, libSystem.B.dylib
  2.7%, lo_alltraps, mach_kernel
- 1.8%, ml_set_interrupts_enabled, mach_kernel
- 1.7%, pmap_remove_range, mach_kernel
- 1.5%, pmap_enter, mach_kernel
- 0.9%, OSAddAtomic, mach_kernel
- 0.7%, vm_page_lookup, mach_kernel
- 0.6%, pmap_get_mapwindow, mach_kernel
- 0.4%, vm_page_free_prepare, mach_kernel
- 0.4%, vm_page_grab, mach_kernel
- 0.3%, vm_fault, mach_kernel
- 0.3%, vm_map_lookup_locked, mach_kernel
- 0.3%, hw_lock_unlock, mach_kernel
- 0.2%, hw_lock_to, mach_kernel
- 0.2%, vm_fault_enter, mach_kernel
- 0.2%, vm_page_remove, mach_kernel
- 0.2%, vm_fault_cleanup, mach_kernel
- 0.2%, user_trap, mach_kernel
- 0.2%, vm_page_insert_internal, mach_kernel
- 0.1%, lck_rw_lock_shared, mach_kernel
- 0.1%, usimple_lock, mach_kernel
- 0.1%, thread_cancel_timer, mach_kernel
- 0.1%, pmap64_pdpt, mach_kernel
- 0.1%, mutex_lock_spin, mach_kernel
- 0.1%, pmap_pte, mach_kernel
- 0.1%, vm_page_free_list, mach_kernel
- 0.1%, chudxnu_cpu_free, mach_kernel
- 0.1%, mutex_unlock, mach_kernel
- 0.1%, pmap64_pde, mach_kernel
- 0.1%, vm_page_alloc, mach_kernel
- 0.1%, lck_rw_lock_exclusive, mach_kernel
- 0.1%, image_render_color, gxps
- 0.1%, gx_build_blended_image_row, gxps
- 0.1%, chunk_locate_ptr, gxps
- 0.1%, floor$fenv_access_off, libSystem.B.dylib
- 0.1%, kernel_preempt_check, mach_kernel
- 0.1%, c_pdf14trans_read, gxps
- 0.1%, bzero_phys, mach_kernel
- 0.1%, _disable_preemption, mach_kernel
- 0.1%, mem_true24_fill_rectangle, gxps
- 0.1%, vm_object_lock, mach_kernel
- 0.1%, vm_map_enter, mach_kernel
- 0.1%, vm_page_activate, mach_kernel
- 0.1%, pdf14_preserve_backdrop, gxps
- 0.1%, pdf14_fill_rectangle, gxps
- 0.1%, current_processor, mach_kernel
- 0.1%, lck_rw_done, mach_kernel


		215		nos_alpha_g_ptr = nos_ptr + n_chan * nos_planestride;			
		216	    else			
		217		nos_alpha_g_ptr = NULL;			
		218				
		219	    if (maskbuf != NULL) {			
		220		mask_ptr = maskbuf->data + x0 - maskbuf->rect.p.x +	!	Int mult	
		221			(y0 - maskbuf->rect.p.y) * maskbuf->rowstride;			
		222		mask_planestride = maskbuf->planestride;			
		223		mask_bg_alpha = maskbuf->alpha;			
		224		mask_tr_fn = maskbuf->transfer_fn;			
		225	    }			
		226				
		227	    for (y = y0; y < y1; ++y) {			
9.1%	8.5%	228		for (x = 0; x < width; ++x) {			
		229		    byte pix_alpha = alpha;			
		230				
		231		    /* Complement the components for subtractive color spaces */			
3.5%	3.3%	232		    if (additive) {			
10.4%	9.7%	233			for (i = 0; i < n_chan; ++i) {			
14.6%	13.7%	234			    tos_pixel[i] = tos_ptr[x + i * tos_planestride];			
10.4%	9.7%	235			    nos_pixel[i] = nos_ptr[x + i * nos_planestride];			
		236			}			
		237		    } else {			
		238			for (i = 0; i < num_comp; ++i) {			
		239			    tos_pixel[i] = 255 - tos_ptr[x + i * tos_planestride];			
		240			    nos_pixel[i] = 255 - nos_ptr[x + i * nos_planestride];			
		241			}			
		242			tos_pixel[num_comp] = tos_ptr[x + num_comp * tos_planestride];	!	Int mult	
		243			nos_pixel[num_comp] = nos_ptr[x + num_comp * nos_planestride];	!	Int mult	
		244		    }			
		245				
1.2%	1.1%	246		    if (mask_ptr != NULL) {			
4.5%	4.2%	247			int mask_alpha = mask_ptr[x + num_comp * mask_planestride];	!
Int mult	
		248			int tmp;			
		249			byte mask;			
		250				
		251			    /*			
		252			    * The mask data is really monochrome.  Thus for additive (RGB)			
		253			    * we use the R channel for alpha since R = G = B.  For			
		254			    * subtractive (CMYK) we use the K channel.			
		255			    */			
7.0%	6.6%	256			if (mask_alpha == 255) {			
		257			    /* todo: rgba->mask */			
		258			    mask = additive ? mask_ptr[x]			
		259					    : 255 - mask_ptr[x + 3 * mask_planestride];			
0.1%	0.1%	260			} else if (mask_alpha == 0)			
		261			    mask = mask_bg_alpha;			
		262			else {			
		263			    int t2 = additive ? mask_ptr[x]			
		264					    : 255 - mask_ptr[x + 3 * mask_planestride];			
		265				
		266			    t2 = (t2 - mask_bg_alpha) * mask_alpha + 0x80;	!	Int mult	
		267			    mask = mask_bg_alpha + ((t2 + (t2 >> 8)) >> 8);			
		268			}			
		269			mask = mask_tr_fn[mask];			
2.9%	2.7%	270			tmp = pix_alpha * mask + 0x80;	!	Int mult	
6.1%	5.7%	271			pix_alpha = (tmp + (tmp >> 8)) >> 8;			
		272	#		    if VD_PAINT_MASK			
		273			    vd_pixel(int2fixed(x), int2fixed(y), mask);			
		274	#		    endif			
		275		    }			
		276				
1.1%	1.0%	277		    if (nos_knockout) {			
		278			byte *nos_shape_ptr = nos_has_shape ?			
		279			    &nos_ptr[x + nos_shape_offset] : NULL;			
		280			byte tos_shape = tos_ptr[x + tos_shape_offset];			
		281				
		282			art_pdf_composite_knockout_isolated_8(nos_pixel,			
		283							    nos_shape_ptr,			
		284							    tos_pixel,			
		285							    n_chan - 1,			
		286							    tos_shape,			
		287							    pix_alpha, shape);			
1.1%	1.1%	288		    } else if (tos_isolated) {			
		289			art_pdf_composite_group_8(nos_pixel, nos_alpha_g_ptr,			
		290					    tos_pixel, n_chan - 1,			
		291					    pix_alpha, blend_mode, pblend_procs);			
		292		    } else {			
		293			byte tos_alpha_g = tos_ptr[x + tos_alpha_g_offset];			
8.0%	13.9%	294			art_pdf_recomposite_group_8(nos_pixel, nos_alpha_g_ptr,			
		295					    tos_pixel, tos_alpha_g, n_chan - 1,			
		296					    pix_alpha, blend_mode, pblend_procs);			
		297		    }			
1.1%	1.0%	298		    if (nos_has_shape) {			
		299			nos_ptr[x + nos_shape_offset] =			
		300			    art_pdf_union_mul_8 (nos_ptr[x + nos_shape_offset],			
		301						    tos_ptr[x + tos_shape_offset],			
		302						    shape);			
		303		    }			
		304	        			
		305		    /* Complement the results for subtractive color spaces */			
		306		    if (additive) {			
12.8%	12.0%	307			for (i = 0; i < n_chan; ++i) {			
4.6%	4.3%	308			    nos_ptr[x + i * nos_planestride] = nos_pixel[i];			
		309			}			
		310		    } else {			
		311			for (i = 0; i < num_comp; ++i)			
		312			    nos_ptr[x + i * nos_planestride] = 255 - nos_pixel[i];			
		313			nos_ptr[x + num_comp * nos_planestride] = nos_pixel[num_comp];			
		314		    }			
		315	#		if VD_PAINT_COLORS			
		316			vd_pixel(int2fixed(x), int2fixed(y), n_chan == 1 ? 			
		317			    (nos_pixel[0] << 16) + (nos_pixel[0] << 8) + nos_pixel[0] :			
		318			    (nos_pixel[0] << 16) + (nos_pixel[1] << 8) + nos_pixel[2]);			
		319	#		endif			
		320	#		if VD_PAINT_ALPHA			
		321			vd_pixel(int2fixed(x), int2fixed(y),			
		322			    (nos_pixel[n_chan - 1] << 16) + (nos_pixel[n_chan - 1] << 8) + 			
		323			     nos_pixel[n_chan - 1]);			
		324	#		endif			
		325		    if (nos_alpha_g_ptr != NULL)			
1.4%	1.3%	326			++nos_alpha_g_ptr;			
		327		}			
0.0%	0.0%	328		tos_ptr += tos->rowstride;			
0.0%	0.0%	329		nos_ptr += nos->rowstride;			
		330		if (nos_alpha_g_ptr != NULL)			
		331		    nos_alpha_g_ptr += nos->rowstride - width;			
		332		if (mask_ptr != NULL)			
		333		    mask_ptr += maskbuf->rowstride;			
		334	    }			
		335	}			
Comment 6 Dave Eberly 2010-02-04 14:29:36 UTC
Created attachment 5944 [details]
AnalysisBug689606.pdf

The attached PDF file has an analysis that shows the performance problem is due
to a large number of data cache misses.
Comment 7 Michael Vrhel 2011-08-17 22:36:19 UTC
Duplicate of 69114.  We need to make some improvements in the transparency rendering performance.

*** This bug has been marked as a duplicate of bug 691114 ***