Bug 689606 - transparency performance
Summary: transparency performance
Status: RESOLVED DUPLICATE of bug 691114
Alias: None
Product: Ghostscript
Classification: Unclassified
Component: Graphics Library (show other bugs)
Version: 0.00
Hardware: All MacOS X
: P4 normal
Assignee: Michael Vrhel
URL:
Keywords:
Depends on:
Blocks:
 
Reported: 2007-12-12 07:45 UTC by Tor Andersson
Modified: 2011-08-17 22:36 UTC (History)
2 users (show)

See Also:
Customer:
Word Size: ---


Attachments
XPS spec page 1. (151.88 KB, application/octet-stream)
2007-12-12 07:47 UTC, Tor Andersson
Details
AnalysisBug689606.pdf (237.61 KB, application/pdf)
2010-02-04 14:29 UTC, Dave Eberly
Details

Note You need to log in before you can comment on or make changes to this bug.
Description Tor Andersson 2007-12-12 07:45:40 UTC
The first page of the XPS spec uses 1000 1x1 transparent images
to mask the drawing of 1000 1x1 transparent images.
The performance of this is horrid.
Please fix it :)
Comment 1 Tor Andersson 2007-12-12 07:47:32 UTC
Created attachment 3620 [details]
XPS spec page 1.

The first page of the XPS spec as a separate document.
Comment 2 Ray Johnston 2007-12-18 09:33:57 UTC
Reassigning per Henry's request.
Comment 3 Henry Stiles 2007-12-19 10:45:49 UTC
This bug was mistakenly reassigned at the meeting, I thought we were discussing
689492.
Comment 4 leonardo 2008-09-05 06:34:39 UTC
This can't be fixed within my ownership. Passing to Henry.
Comment 5 Henry Stiles 2009-03-27 09:08:44 UTC
repurpose this bug for collecting transparency performance examples which will
be shared with rotateright.  Here is a shark profile for XPS spec page 1 (make
xps XCFLAGS=-g) and close up of pdf14_compose_group.

# Report 0 - Session 1 - Time Profile of gxps
SharkProfileViewer
# Generated from the visible portion of the outline view
- 67.0%, pdf14_compose_group, gxps
- 5.3%, art_pdf_recomposite_group_8, gxps
- 4.5%, __bzero, libSystem.B.dylib
- 3.7%, blkclr, mach_kernel
- 3.0%, __memcpy, libSystem.B.dylib
  2.7%, lo_alltraps, mach_kernel
- 1.8%, ml_set_interrupts_enabled, mach_kernel
- 1.7%, pmap_remove_range, mach_kernel
- 1.5%, pmap_enter, mach_kernel
- 0.9%, OSAddAtomic, mach_kernel
- 0.7%, vm_page_lookup, mach_kernel
- 0.6%, pmap_get_mapwindow, mach_kernel
- 0.4%, vm_page_free_prepare, mach_kernel
- 0.4%, vm_page_grab, mach_kernel
- 0.3%, vm_fault, mach_kernel
- 0.3%, vm_map_lookup_locked, mach_kernel
- 0.3%, hw_lock_unlock, mach_kernel
- 0.2%, hw_lock_to, mach_kernel
- 0.2%, vm_fault_enter, mach_kernel
- 0.2%, vm_page_remove, mach_kernel
- 0.2%, vm_fault_cleanup, mach_kernel
- 0.2%, user_trap, mach_kernel
- 0.2%, vm_page_insert_internal, mach_kernel
- 0.1%, lck_rw_lock_shared, mach_kernel
- 0.1%, usimple_lock, mach_kernel
- 0.1%, thread_cancel_timer, mach_kernel
- 0.1%, pmap64_pdpt, mach_kernel
- 0.1%, mutex_lock_spin, mach_kernel
- 0.1%, pmap_pte, mach_kernel
- 0.1%, vm_page_free_list, mach_kernel
- 0.1%, chudxnu_cpu_free, mach_kernel
- 0.1%, mutex_unlock, mach_kernel
- 0.1%, pmap64_pde, mach_kernel
- 0.1%, vm_page_alloc, mach_kernel
- 0.1%, lck_rw_lock_exclusive, mach_kernel
- 0.1%, image_render_color, gxps
- 0.1%, gx_build_blended_image_row, gxps
- 0.1%, chunk_locate_ptr, gxps
- 0.1%, floor$fenv_access_off, libSystem.B.dylib
- 0.1%, kernel_preempt_check, mach_kernel
- 0.1%, c_pdf14trans_read, gxps
- 0.1%, bzero_phys, mach_kernel
- 0.1%, _disable_preemption, mach_kernel
- 0.1%, mem_true24_fill_rectangle, gxps
- 0.1%, vm_object_lock, mach_kernel
- 0.1%, vm_map_enter, mach_kernel
- 0.1%, vm_page_activate, mach_kernel
- 0.1%, pdf14_preserve_backdrop, gxps
- 0.1%, pdf14_fill_rectangle, gxps
- 0.1%, current_processor, mach_kernel
- 0.1%, lck_rw_done, mach_kernel


		215		nos_alpha_g_ptr = nos_ptr + n_chan * nos_planestride;			
		216	    else			
		217		nos_alpha_g_ptr = NULL;			
		218				
		219	    if (maskbuf != NULL) {			
		220		mask_ptr = maskbuf->data + x0 - maskbuf->rect.p.x +	!	Int mult	
		221			(y0 - maskbuf->rect.p.y) * maskbuf->rowstride;			
		222		mask_planestride = maskbuf->planestride;			
		223		mask_bg_alpha = maskbuf->alpha;			
		224		mask_tr_fn = maskbuf->transfer_fn;			
		225	    }			
		226				
		227	    for (y = y0; y < y1; ++y) {			
9.1%	8.5%	228		for (x = 0; x < width; ++x) {			
		229		    byte pix_alpha = alpha;			
		230				
		231		    /* Complement the components for subtractive color spaces */			
3.5%	3.3%	232		    if (additive) {			
10.4%	9.7%	233			for (i = 0; i < n_chan; ++i) {			
14.6%	13.7%	234			    tos_pixel[i] = tos_ptr[x + i * tos_planestride];			
10.4%	9.7%	235			    nos_pixel[i] = nos_ptr[x + i * nos_planestride];			
		236			}			
		237		    } else {			
		238			for (i = 0; i < num_comp; ++i) {			
		239			    tos_pixel[i] = 255 - tos_ptr[x + i * tos_planestride];			
		240			    nos_pixel[i] = 255 - nos_ptr[x + i * nos_planestride];			
		241			}			
		242			tos_pixel[num_comp] = tos_ptr[x + num_comp * tos_planestride];	!	Int mult	
		243			nos_pixel[num_comp] = nos_ptr[x + num_comp * nos_planestride];	!	Int mult	
		244		    }			
		245				
1.2%	1.1%	246		    if (mask_ptr != NULL) {			
4.5%	4.2%	247			int mask_alpha = mask_ptr[x + num_comp * mask_planestride];	!
Int mult	
		248			int tmp;			
		249			byte mask;			
		250				
		251			    /*			
		252			    * The mask data is really monochrome.  Thus for additive (RGB)			
		253			    * we use the R channel for alpha since R = G = B.  For			
		254			    * subtractive (CMYK) we use the K channel.			
		255			    */			
7.0%	6.6%	256			if (mask_alpha == 255) {			
		257			    /* todo: rgba->mask */			
		258			    mask = additive ? mask_ptr[x]			
		259					    : 255 - mask_ptr[x + 3 * mask_planestride];			
0.1%	0.1%	260			} else if (mask_alpha == 0)			
		261			    mask = mask_bg_alpha;			
		262			else {			
		263			    int t2 = additive ? mask_ptr[x]			
		264					    : 255 - mask_ptr[x + 3 * mask_planestride];			
		265				
		266			    t2 = (t2 - mask_bg_alpha) * mask_alpha + 0x80;	!	Int mult	
		267			    mask = mask_bg_alpha + ((t2 + (t2 >> 8)) >> 8);			
		268			}			
		269			mask = mask_tr_fn[mask];			
2.9%	2.7%	270			tmp = pix_alpha * mask + 0x80;	!	Int mult	
6.1%	5.7%	271			pix_alpha = (tmp + (tmp >> 8)) >> 8;			
		272	#		    if VD_PAINT_MASK			
		273			    vd_pixel(int2fixed(x), int2fixed(y), mask);			
		274	#		    endif			
		275		    }			
		276				
1.1%	1.0%	277		    if (nos_knockout) {			
		278			byte *nos_shape_ptr = nos_has_shape ?			
		279			    &nos_ptr[x + nos_shape_offset] : NULL;			
		280			byte tos_shape = tos_ptr[x + tos_shape_offset];			
		281				
		282			art_pdf_composite_knockout_isolated_8(nos_pixel,			
		283							    nos_shape_ptr,			
		284							    tos_pixel,			
		285							    n_chan - 1,			
		286							    tos_shape,			
		287							    pix_alpha, shape);			
1.1%	1.1%	288		    } else if (tos_isolated) {			
		289			art_pdf_composite_group_8(nos_pixel, nos_alpha_g_ptr,			
		290					    tos_pixel, n_chan - 1,			
		291					    pix_alpha, blend_mode, pblend_procs);			
		292		    } else {			
		293			byte tos_alpha_g = tos_ptr[x + tos_alpha_g_offset];			
8.0%	13.9%	294			art_pdf_recomposite_group_8(nos_pixel, nos_alpha_g_ptr,			
		295					    tos_pixel, tos_alpha_g, n_chan - 1,			
		296					    pix_alpha, blend_mode, pblend_procs);			
		297		    }			
1.1%	1.0%	298		    if (nos_has_shape) {			
		299			nos_ptr[x + nos_shape_offset] =			
		300			    art_pdf_union_mul_8 (nos_ptr[x + nos_shape_offset],			
		301						    tos_ptr[x + tos_shape_offset],			
		302						    shape);			
		303		    }			
		304	        			
		305		    /* Complement the results for subtractive color spaces */			
		306		    if (additive) {			
12.8%	12.0%	307			for (i = 0; i < n_chan; ++i) {			
4.6%	4.3%	308			    nos_ptr[x + i * nos_planestride] = nos_pixel[i];			
		309			}			
		310		    } else {			
		311			for (i = 0; i < num_comp; ++i)			
		312			    nos_ptr[x + i * nos_planestride] = 255 - nos_pixel[i];			
		313			nos_ptr[x + num_comp * nos_planestride] = nos_pixel[num_comp];			
		314		    }			
		315	#		if VD_PAINT_COLORS			
		316			vd_pixel(int2fixed(x), int2fixed(y), n_chan == 1 ? 			
		317			    (nos_pixel[0] << 16) + (nos_pixel[0] << 8) + nos_pixel[0] :			
		318			    (nos_pixel[0] << 16) + (nos_pixel[1] << 8) + nos_pixel[2]);			
		319	#		endif			
		320	#		if VD_PAINT_ALPHA			
		321			vd_pixel(int2fixed(x), int2fixed(y),			
		322			    (nos_pixel[n_chan - 1] << 16) + (nos_pixel[n_chan - 1] << 8) + 			
		323			     nos_pixel[n_chan - 1]);			
		324	#		endif			
		325		    if (nos_alpha_g_ptr != NULL)			
1.4%	1.3%	326			++nos_alpha_g_ptr;			
		327		}			
0.0%	0.0%	328		tos_ptr += tos->rowstride;			
0.0%	0.0%	329		nos_ptr += nos->rowstride;			
		330		if (nos_alpha_g_ptr != NULL)			
		331		    nos_alpha_g_ptr += nos->rowstride - width;			
		332		if (mask_ptr != NULL)			
		333		    mask_ptr += maskbuf->rowstride;			
		334	    }			
		335	}			
Comment 6 Dave Eberly 2010-02-04 14:29:36 UTC
Created attachment 5944 [details]
AnalysisBug689606.pdf

The attached PDF file has an analysis that shows the performance problem is due
to a large number of data cache misses.
Comment 7 Michael Vrhel 2011-08-17 22:36:19 UTC
Duplicate of 69114.  We need to make some improvements in the transparency rendering performance.

*** This bug has been marked as a duplicate of bug 691114 ***