(original) (raw)

Andrew,

thanks so much for advertising oprofile: it works like a charm !

Apparently, I understood the documentation and the default CPU_CLK_UNHALTED setting is perfect.

Moreover, it is able to annotate java source code (dtrace like) and it is so easy:


opannotate --source -o src/ -t 0.1 --search-dirs=/home/bourgesl/libs/openjdk/pisces/src/sun/java2d/pisces/


PS: It would be so great if there is a netbeans plugin (apparently an eclipse one exists in the fedora packages) ...


Here is an part of annoted java code (2 major hotspot methods):

/* int sun.java2d.pisces.Renderer$ScanlineIterator.next() total: 4380416 34.2926 */

: int next() {
: final float[] _edges = rdr.edges;

86967 0.6808 : final int[] _edgesInt = rdr.edgesInt; /* int sun.java2d.pisces.Renderer$ScanlineIterator.next() total: 4380416 34.2926 */

2653 0.0208 : final int cury = nextY++;
89373 0.6997 : final int bucket = cury - rdr.boundsMinY;

24478 0.1916 : int count = this.edgeCount;
1101 0.0086 : int[] ptrs = this.edgePtrs;

19457 0.1523 : final int bucketcount = rdr.edgeBucketCounts[bucket];
:

33547 0.2626 : if ((bucketcount & 0x1) != 0) {
16662 0.1304 : final int offYmax = YMAX;

: int newCount = 0;
72462 0.5673 : for (int i = 0, ecur; i < count; i++) {

81526 0.6382 : ecur = ptrs[i];
99950 0.7825 : if (_edgesInt[ecur + offYmax] > cury) {

209365 1.6390 : ptrs[newCount++] = ecur;
: }

: }
25606 0.2005 : count = newCount;

: }
:

: int ptrLen = bucketcount >> 1;
33571 0.2628 : if (ptrs.length < count + ptrLen) {

50928 0.3987 : boolean ptrInitial = (ptrs == edgePtrs_initial);
: this.edgePtrs = ptrs = Helpers.widenArray(rdrCtx, ptrs, count, ptrLen, arrayMaxUsed);

7024 0.0550 : if (ptrInitial && doCleanDirty) {
: IntArrayCache.fill(edgePtrs_initial, 0, arrayMaxUsed, 0);

: }
: }

:
: final int nul = NULL;

26781 0.2097 : for (int ecur = rdr.edgeBuckets[bucket]; ecur != nul; ecur = _edgesInt[ecur /* + NEXT */]) {

340270 2.6638 : ptrs[count++] = ecur;
: // REMIND: Adjust start Y if necessary

: }
:

21188 0.1659 : this.edgeCount = count;
:// if ((count & 0x1) != 0) {

:// System.out.println("ODD NUMBER OF EDGES!!!!");
:// }

:
15596 0.1221 : int[] xings = this.crossings;

31884 0.2496 : if (xings.length < count) {
10004 0.0783 : if (crossings == crossings_initial) {

: IntArrayCache.fill(crossings, 0, arrayMaxUsed, 0);
: } else {

: rdrCtx.putIntArray(crossings, arrayMaxUsed); // last known value for arrayMaxUsed
: }

: // Get larger array:
: this.crossings = xings = rdrCtx.getIntArray(count); // count or ptrs.length ?

: }
: // LBO: max used mark

: if (count > arrayMaxUsed) { arrayMaxUsed = count; }
:

5708 0.0447 : final int offSlope = SLOPE;
3331 0.0261 : final int offOr = OR;

:
: float curx;

: int cross, jcross;
:

135873 1.0637 : for (int i = 0, ecur, j; i < count; i++) {
21831 0.1709 : ecur = ptrs[i];

227570 1.7816 : curx = _edges[ecur /* + CURX */];
159331 1.2473 : _edges[ecur /* + CURX */] = curx + _edges[ecur + offSlope];

:
594904 4.6573 : cross = ((int) curx) << 1;

2985 0.0234 : if (_edgesInt[ecur + offOr] != 0 /* > 0 */) {
269643 2.1109 : cross |= 1;

: }
:

: // LBO: right shift crossings ...
: j = i;

128253 1.0040 : while (--j >= 0) {
102837 0.8051 : jcross = xings[j];

283135 2.2166 : if (jcross <= cross) {
92418 0.7235 : break;

: }
70142 0.5491 : xings[j + 1] = jcross;

257937 2.0193 : ptrs[j + 1] = ptrs[j];
: }

304554 2.3842 : xings[j + 1] = cross;
350968 2.7476 : ptrs[j + 1] = ecur;

: }
68603 0.5371 : return count;

: }
:

: boolean hasNext() {
: return nextY < maxY;

: }
:

: int curY() {
: return nextY - 1;

: }
: }



/* void sun.java2d.pisces.Renderer._endRendering(int, int, int, int) total: 2601080 20.3628 */

: private void _endRendering(final int bboxx0, final int bboxx1,
: int ymin, int ymax)

: {
: // Mask to determine the relevant bit of the crossing sum

: // 0x1 if EVEN_ODD, all bits if NON_ZERO
: final int mask = (windingRule == WIND_EVEN_ODD) ? 0x1 : ~0x0;

:
: // Useful when processing tile line by tile line

5500 0.0431 : final int[] alpha = alphaLine; /* void sun.java2d.pisces.Renderer._endRendering(int, int, int, int) total: 2601080 20.3628 */

:
39171 0.3067 : final PiscesCache _cache = this.cache;

:
: // Now we iterate through the scanlines. We must tell emitRow the coord

: // of the first non-transparent pixel, so we must keep accumulators for
: // the first and last pixels of the section of the current pixel row

: // that we will emit.
: // We also need to accumulate pix_bbox*, but the iterator does it

: // for us. We will just get the values from it once this loop is done
1199 0.0094 : int pix_maxX = Integer.MIN_VALUE;

: int pix_minX = Integer.MAX_VALUE;
:

: int y = boundsMinY; // needs to be declared here so we emit the last row properly.
:

354 0.0028 : for (final ScanlineIterator it = scanlineIterator.init(ymin, ymax);
1974 0.0155 : it.hasNext(); )

: {
525 0.0041 : final int numCrossings = it.next();

82494 0.6458 : y = it.curY();
:

6392 0.0500 : if (numCrossings > 0) {
8991 0.0704 : final int[] crossings = it.crossings; // array may change

:
: // LBO: TODO: explain crossing processing: Jim, please ? ...

48601 0.3805 : int lowx = crossings[0] >> 1;
33739 0.2641 : int highx = crossings[numCrossings - 1] >> 1;

94313 0.7383 : int x0 = Math.max(lowx, bboxx0);
: int x1 = Math.min(highx, bboxx1);

:
: pix_minX = Math.min(pix_minX, x0 >> SUBPIXEL_LG_POSITIONS_X);

44084 0.3451 : pix_maxX = Math.max(pix_maxX, x1 >> SUBPIXEL_LG_POSITIONS_X);
:

: // TODO: fix alpha last index = pix_xmax + 1
: // ie x1 >> SUBPIXEL_LG_POSITIONS_X (inclusive)

: // alpha[pix_xmax + 1] <=> alpha[x1 >> SUBPIXEL_LG_POSITIONS_X + 1]
: // in contrary to half-open pattern used by pix_maxX = max(x1 >> SUBPIXEL_LG_POSITIONS_X)

:
53976 0.4226 : int sum = 0;

: int prev = bboxx0;
: for (int i = 0; i < numCrossings; i++) {

61179 0.4789 : int curxo = crossings[i];
116872 0.9149 : int curx = curxo >> 1;

: // to turn {0, 1} into {-1, 1}, multiply by 2 and subtract 1.
86834 0.6798 : int crorientation = ((curxo & 0x1) << 1) - 1;

:
: // LBO: TODO: explain alpha computation: Jim, please ? ...

56304 0.4408 : if ((sum & mask) != 0) {
145231 1.1370 : x0 = Math.max(prev, bboxx0);

88536 0.6931 : x1 = Math.min(curx, bboxx1);
70122 0.5490 : if (x0 < x1) {

140577 1.1005 : x0 -= bboxx0; // turn x0, x1 from coords to indeces
73769 0.5775 : x1 -= bboxx0; // in the alpha array.

:
10572 0.0828 : int pix_x = x0 >> SUBPIXEL_LG_POSITIONS_X;

42994 0.3366 : int pix_xmaxm1 = (x1 - 1) >> SUBPIXEL_LG_POSITIONS_X;
:

: if (pix_x == pix_xmaxm1) {
: // Start and end in same pixel

94868 0.7427 : int tmp = (x1 - x0);
6644 0.0520 : alpha[pix_x] += tmp;

62346 0.4881 : alpha[pix_x + 1] -= tmp;
126253 0.9884 : } else {

80656 0.6314 : int pix_xmax = x1 >> SUBPIXEL_LG_POSITIONS_X;
6179 0.0484 : int tmp = (x0 & SUBPIXEL_MASK_X);

25400 0.1988 : alpha[pix_x] += SUBPIXEL_POSITIONS_X - tmp;
224672 1.7589 : alpha[pix_x + 1] += tmp;

84426 0.6609 : tmp = (x1 & SUBPIXEL_MASK_X);
1039 0.0081 : alpha[pix_xmax] -= SUBPIXEL_POSITIONS_X - tmp;

184161 1.4417 : alpha[pix_xmax + 1] -= tmp;
: }

: }
: }

106838 0.8364 : sum += crorientation;
59094 0.4626 : prev = curx;

: }
:

: }
:

: // even if this last row had no crossings, alpha will be zeroed
: // from the last emitRow call. But this doesn't matter because

: // maxX < minX, so no row will be emitted to the piscesCache.
19036 0.1490 : if ((y & SUBPIXEL_MASK_Y) == SUBPIXEL_MASK_Y) {

13020 0.1019 : if (pix_maxX >= pix_minX) {
1464 0.0115 : emitRow(_cache, alpha, y >> SUBPIXEL_LG_POSITIONS_Y, pix_minX, pix_maxX);

: } else {
130734 1.0235 : _cache.clearAARow(y >> SUBPIXEL_LG_POSITIONS_Y);

: }
45 3.5e-04 : pix_minX = Integer.MAX_VALUE;

: pix_maxX = Integer.MIN_VALUE;
: }

: } // scan line iterator
:

: // Emit final row
56073 0.4390 : if (pix_maxX >= pix_minX) {

:// System.out.println("EmitFinalRow = " + (y >> SUBPIXEL_LG_POSITIONS_Y));
1345 0.0105 : emitRow(_cache, alpha, y >> SUBPIXEL_LG_POSITIONS_Y, pix_minX, pix_maxX);

: }
2484 0.0194 : }


Thanks again,
Laurent


2013/5/7 Andrew Haley <aph@redhat.com>

On 05/07/2013 09:44 AM, Laurent Bourgčs wrote:

\> I confirm oprofile (0.96 on my fedora 14) works just fine (see below).
\>
\> Do you recommend me to use the latest (git) version ? 0.96 is quite old
\> (2011)

Only if the version you're using doesn't work.

\> Could you explain me a bit how to get sample counts corresponding to the
\> complete benchmark (few minutes long) ?

I don't understand. As far as I can see that is what you have.

\> should I use the event argument to set the highest count (reset) value ?
\>
\> By default, oprofile uses: CPU\_CLK\_UNHALTED:100000:0:1:1
\>
\> opcontrol --event=CPU\_CLK\_UNHALTED:400000
\>
\> What is the maximum value I can set ?

I always use the default. The more you increase sample frequency the
more overhead there is.

The real test is to experiment and see if it makes any difference.

Andrew.