C++ ABI for IA-64: Code and Implementation Examples (original) (raw)

The following test programs check for correct sharing of the virtual pointer with a virtual base class.


/*
Test case for sharing virtual bases.
In Derived_too,
the primary base class is NewShareme,
The bases Base and Shareme share vptr's
with Derived and are allocated at the
same offset as Derived.
Should get:
60% a.out
(long)(NewShareme *)dd - (long)dd = 0
(long)(Derived *)dd - (long)dd = 8
(long)(Base *)dd - (long)dd = 8
(long)(Shareme *)dd - (long)dd = 8
*/

struct Shareme {
    virtual void foo();
};
struct Base : virtual Shareme {
        virtual void bar();
};
struct Derived : virtual Base {
        virtual void baz();
};

struct NewShareme {
        virtual void foo();
};

struct Derived_too : virtual NewShareme, virtual Derived {
        virtual void bar();
};

void Shareme::foo() { }
void Base::bar() { }
void Derived::baz() { }
void NewShareme::foo() { }
void Derived_too::bar() { }


extern "C" int printf(const char *,...);
#define EVAL(EXPR) printf( #EXPR " = %d\n", (EXPR) );
main()
{
  Derived_too *dd = new Derived_too;
  EVAL((long)(NewShareme *)dd - (long)dd);
  EVAL((long)(Derived *)dd - (long)dd);
  EVAL((long)(Base *)dd - (long)dd);
  EVAL((long)(Shareme *)dd - (long)dd);
}

/*
Test case for sharing virtual bases.
In Most_Derived,
the primary base class is Nonvirt1,
Nonvirt2 and Nonvirt3 share vptrs with
virtual base Shared_Virt.  Shared_Virt
should be at the same offset as Nonvirt2.
Should get:
67% a.out
(long)(Nonvirt1 *)dd - (long)dd = 0
(long)(Nonvirt2 *)dd - (long)dd = 8
(long)(Nonvirt3 *)dd - (long)dd = 16
(long)(Shared_Virt *)dd - (long)dd = 8
*/

struct Shared_Virt {
    virtual void foo();
};
struct Nonvirt2 : virtual Shared_Virt {
        virtual void bar();
};
struct Nonvirt3 : virtual Shared_Virt {
        virtual void baz();
};
struct Nonvirt1 {
        virtual void foo();
};

struct Most_Derived : Nonvirt1, Nonvirt2, Nonvirt3 {
        virtual void bar();
};

void Shared_Virt::foo() { }
void Nonvirt2::bar() { }
void Nonvirt3::baz() { }
void Nonvirt1::foo() { }
void Most_Derived::bar() { }

extern "C" int printf(const char *,...);
#define EVAL(EXPR) printf( #EXPR " = %d\n", (EXPR) );
main()
{
  Most_Derived *dd = new Most_Derived;
  EVAL((long)(Nonvirt1 *)dd - (long)dd);
  EVAL((long)(Nonvirt2 *)dd - (long)dd);
  EVAL((long)(Nonvirt3 *)dd - (long)dd);
  EVAL((long)(Shared_Virt *)dd - (long)dd);
}

/*
Test case for sharing virtual bases.
In Most_Derived, share the vptr with
Interface1 but not Interface3, since
Interface3 is indirectly inherited.

Should get:
(long)(Interface1 *)dd - (long)dd = 0
(long)(Interface2 *)dd - (long)dd = 8
(long)(Interface3 *)dd - (long)dd = 8
(long)(Concrete1 *)dd - (long)dd = 8
*/

struct Interface1 {
    virtual void foo();
};
struct Interface2 : virtual Interface1 {
        virtual void bar();
};
struct Interface3 : virtual Interface2 {
        virtual void baz();
};

struct Concrete1 : virtual Interface3 {
        virtual void foo();
        int i; // important.
};

struct Most_Derived : virtual Interface1, 
                      virtual Interface2,
                      virtual Concrete1 {
        virtual void bar();
};

void Interface1::foo() { }
void Interface2::bar() { }
void Interface3::baz() { }
void Concrete1::foo() { }
void Most_Derived::bar() { }


extern "C" int printf(const char *,...);
#define EVAL(EXPR) printf( #EXPR " = %d\n", (EXPR) );
main()
{
  Most_Derived *dd = new Most_Derived;
  EVAL((long)(Interface1 *)dd - (long)dd);
  EVAL((long)(Interface2 *)dd - (long)dd);
  EVAL((long)(Interface3 *)dd - (long)dd);
  EVAL((long)(Concrete1 *)dd - (long)dd);
}

The following class definitions are intended to illustrate various cases relevant to the entries found in vtables to support virtual functions. We define a number of classes, many of which define a virtual function f. We are interested only in the vtable contents supporting calls of f; other contents of the classes are generally elided, except where necessary to emphasize some situation, and we do not attempt to describe other contents of the vtables, including precise layout.

The left column of this table contains the class definitions. The right column(s) describe the corresponding vtable contents as they are required to support calls to f. We use the notation &C::f to mean a vtable entry for the instance of f defined in class C, in the form defined by the ABI (currently a function pointer/GP pair). We use the notation &thunk(A*,C::f) to mean a vtable entry for a thunk which converts this from A* to C* and then calls C::f. We use the notation vcall(A*,C*,f) to mean a vcall offset that is added to an A* to produce a C* for use by an f thunk.

Class definition Vtable 1 contents Vtable 2 contents
struct X { virtual void u(); } Class X is an auxiliary class to be used only to prevent interesting classes from being primary bases later.
struct A { virtual void f(); } Vtable A
&A::f
struct B: public A { } Vtable A_in_B
&A::f
struct C: public A { virtual void f(); } Vtable A_in_C (primary)
&C::f
struct D: public X, public A { } Vtable A_in_D
&A::f
struct E: public X, public A { virtual void f(); } Vtable A_in_E Vtable E
&thunk(A*,E::f) &E::f
struct G: public virtual A { } Vtable A_in_G
&A::f
struct H: public X, public virtual A { virtual void f(); } Vtable A_in_H Vtable H
&thunk(A*,H::f)vcall(H::A*,H*,f) &H::f
struct I: public H { int i; } Vtable A_in_I Vtable H_in_I
&thunk(A*,H::f)vcall(I::A*,I::H*,f) &H::f

There are several possible implementations of the thunks given the above information. Note in the following that we assume that prior to calling any vtable entry, the this pointer has been adjusted to point to the subobject corresponding to the vtable from which the vptr is fetched.

  1. Since the offsets are always known at compile time, even for virtual bases, each thunk could be distinct, adding the known offset to this and branching to the target function.
    This would result in a thunk for each overrider at a distinct offset. As a result, a branch mispredict and possibly an instruction cache miss would occur each time the actual type changed for a reference at any given point in the code.
  2. In the case of virtual inheritance, the offset, although known when the overrider is declared, may differ depending on derivations from the overrider's class. H and I above are the simplest example. H is a primary base for I, but the int member of I means that A is at a different offset from H in I than it was from a standalone H. Because of this, the ABI specifies that the secondary vtable for a virtual base A contain a vcall offset to H, so that a shared thunk can load the vcall offset, adding it to this, and branch to the target function H::f.
    This would result in fewer thunks, since for a inheritance hierarchy where A is a virtual base of H, and H::f overrides A::f, all instances of H in a larger hierarchy can use the same thunk. As a result, these thunks will cause fewer branch mispredictions and instruction cache misses. The tradeoff is that they must do a load before the offset add. Since the offset is smaller than the code for a thunk, the load should miss in cache less frequently, so better cache miss behavior should produce better results in spite of the 2 or more cycles required for the vcall offset load.
  3. In the case of non-virtual inheritance, when an overrider is declared, the entire set of associated overriding and overridden functions f is known along with their associated vcall offsets, and they are all constants. Therefore, it is possible to cascade the offset adds as follows. Suppose we have A::f, overridden by B::f at offset -16, and finally overridden by C::f at offset -32 from B, -48 from A. Assuming that this is in register out0, the IA-64 code can look like:`
    thunk-A-to-C::f :
    add out0 = -16,out0 ;;
    thunk-A-to-C::f :
    add out0 = -32,out0 ;;
    C::f :

    normal entry code...

`
This code will leave us with about the same number of branch mispredictions for the thunk calls, but should eliminate a large proportion of the instruction cache misses, and costs one cycle per level in the adjustment sequence. Note that there is no particular required order of the thunk entries, so the compiler could optimize by putting the most frequent one closest to the target function and so on. Also, if the sequence gets too long, any entry can add its entire offset at once and branch directly to the

In order to make these implementations practical, the ABI must specify:

Assuming that all of the above optimizations are implemented, we note a number of benefits of the design specified.

    m2: /* Thunk for morally virtual base M2.  */  
        this += offsetof (V, M1) - offsetof (V, M2)  
    m1: /* Thunk for morally virtual base M1.  */  
        this -= offsetof (V, M1)  
    v:  /* Thunk for virtual base V.  */  
        this += vcall offset stored in V vtable  
        goto f;  
    a2: /* Thunk for non-virtual base A2.  */  
        this += offsetof (B, A1) - offsetof (B, A2)  
        /* Fall through.  */  
    a1: /* Thunk for non-virtual base A1.  */  
        this += offsetof (B, C) - offsetof (B, A1)  
        /* Fall through.  */  
    f:  /* Non-adjusting entry point.  */  

(Here `offsetof' is a compile-time computable function that gives the offset of its second parameter in its first parameter.)
(Alternatively the `v' entry point above could be of the form:``

    v: /* Thunk for virtual base V.  */  
       this += vcall offset stored in V vtable  
               - offsetof (B, C) + offsetof (B, A2)  
       /* Fall through.  */  

Which alternative is better depends on how many adds follow at this point. In general, if many adds remain before the non-adjusting entry point, it may be better to suffer the consequences of the indirect branch.)
In this way, a virtual call through a base A1, A2, etc., that is not a virtual base of C (or a direct or indirect base of a virtual base of C), does not require an additional branch, and is therefore more likely to avoid icache misses. Even the thunk for V may avoid severe icache penalties since it is located near the non-adjusting entry point for f. Furthermore, if there are no non-virtual bases, then the sequence can become just:``

    v: /* Thunk for virtual base V.  */  
       this += vcall offset stored in V vtable  
    f: /* Non-adjusting entry point.  */  

Following is a test program from Compaq, that breaks on many compilers.``

/* 
This test program should output: 

V1 called 
V2 called 
C called 
C::foo called 7 
PASSED this == cp 
D called 
~C called 
C::foo called 7 
PASSED this == cp 
~V2 called 
~V1 called 
Int caught 
*/ 

extern "C" int printf(const char *,...); 
struct V1 {
        int v; 
        virtual int foo(); 
        V1(); 
        ~V1(); 
}; 
struct V2 : virtual V1 {
        int v2; 
        virtual int foo(); 
        V2(); 
        ~V2(); 
}; 
struct C : virtual V1, virtual V2 {
        int c; 
        virtual int foo(); 
        C(); 
        ~C(); 
}; 

struct B {
   int b; }; 
struct D : B, C {
        int d; 
        virtual int bar(); 
        D(); 
        ~D(); 
}; 
extern "C" int printf(const char *,...); 
main() 
{
  try {
        D *d = new D; 
        delete d; 
  } catch (int) {
        printf("Int caught\n"); 
  } 
} 

int V1::foo() {
   printf("V1::foo called\n"); return 1; } 
V1::V1() : v(5) {
   printf("V1 called\n"); } 
V1::~V1() {
   printf("~V1 called\n"); } 

int V2::foo() {
   printf("V2::foo called\n"); return 1; } 
V2::V2() : v2(6) {
   printf("V2 called\n"); } 
V2::~V2() {
   printf("~V2 called\n"); } 

int C::foo() {
   printf("C::foo called %d\n", c); return 1; } 
C::C() : c(7) {
        printf("C called\n"); 
        V1 *vv = this; vv->foo(); 
        C *cp = dynamic_cast<C *>(vv); 
        if (this == cp) {
            printf("PASSED this == cp\n"); 
        } else {
            printf("FAILED this != cp\n"); 
        } 
} 
C::~C() {
        printf("~C called\n"); 
        V1 *vv = this; vv->foo(); 
        C *cp = dynamic_cast<C *>(vv); 
        if (this == cp) {
            printf("PASSED this == cp\n"); 
        } else {
            printf("FAILED this != cp\n"); 
        } 
} 

int D::bar() {
   printf("D::bar called\n"); return 1; } 
D::D() : d(8) {
   printf("D called\n"); throw 5; } 
D::~D() {
   printf("~D called\n"); }