C++对象模型

本文主要是在看《Inside the C++ Object Model》的时候,想通过案例的方式加深一下理解。

首先,由于编译器有内存对齐的优化,比如:

#include <iostream>

class WithAlign {
    int a;
    char b;
};

#pragma pack(push, 1)
class WithoutAlign {
    int a;
    char b;
};
#pragma pack(pop)

int main() {
    std::cout << "WithAlign: " << sizeof(WithAlign) << std::endl;
    std::cout << "WithoutAlign: " << sizeof(WithoutAlign) << std::endl;
}

// Output:
//  WithAlign: 8
//  WithoutAlign: 5

内存对齐的类大小为8(按int 4字节对齐),未对齐的为5(int + char)。这里为了更容易理解,全部默认使用1字节对齐。

注意,本文中的内容均仅在自己的机器的Docker容器中做测试,环境为:64位 ubuntu 16.04, gcc 5.5。

# lsb_release -a
Distributor ID: Ubuntu
Description:    Ubuntu 16.04.7 LTS
Release:        16.04
Codename:       xenial

# uname -a
Linux 06f25c7abffd 5.15.49-linuxkit #1 SMP Tue Sep 13 07:51:46 UTC 2022 x86_64 x86_64 x86_64 GNU/Linux

# gcc --version
gcc (Ubuntu 5.5.0-12ubuntu1~16.04) 5.5.0 20171010

何为C++对象模型?

引用《深度探索C++对象模型》这本书中的话:

有两个概念可以解释C++对象模型:

  1. 语言中直接支持面向对象程序设计的部分。包括构造函数、析构函数、多态、虚函数等。
  2. 对于各种支持的底层实现机制。这部分并没有标准化,因此不同的编译器可以有自己的实现。

C++数据存放

C++变量有两种:

  1. static:也称为类变量、类静态变量,由同一个类的所有实例共享。
  2. non-static,也称为成员变量,每个类的实例均持有自己的一份。

类成员函数由三种:

  1. static:静态函数,属于该类,不需要实例也可以调用。
  2. non-static:成员函数,一般需要实例才可以调用。
  3. virtual:虚函数,用于实现多态。

例子:

#include <iostream>
#pragma pack(push, 1)
class Base {
public:
    Base(int d) : data_(d) { ++instance_cnt_; }            // constructor, non-static function
    virtual ~Base() {}                                     // destructor, virtual function
    static int getInstanceCnt() { return instance_cnt_; }  // static function
    int getData() { return data_; }                        // non-static function
    virtual void print() {                                 // virtual function
        std::cout << "[Base] address: " << this << " data: " << this->data_ << std::endl;
    }

public:
    int data_;                 // non-static data
    static int instance_cnt_;  // static data
};

int Base::instance_cnt_ = 0;
#pragma pack(pop)

int main() {
    Base a(100);
    Base b(200);
    std::cout << "Size " << sizeof(Base) << " InstanceCnt " << Base::getInstanceCnt() << " " << a.getInstanceCnt()
              << " " << b.getInstanceCnt() << std::endl;
    a.print();
    b.print();
}

// Output:
// Size 12 InstanceCnt 2 2 2
// [Base] address: 0x7ffffe4bf670 data: 100
// [Base] address: 0x7ffffe4bf67c data: 200

存储方式:

C++ Object Model

  1. static data:单独存放,不计入class的size中。
  2. non-static data:在实例存放,计入class的size。
  3. static function:单独存放
  4. non-static function:单独存放
  5. virtual function:单独存放,一个具体类对应的虚函数会整合进一个表中,表中存放了虚函数的指针等信息,实例存放一到多个指向虚表的指针。

这里可以看到Base类的size为12,其实就是存放了vptrint data_这两个数据导致的。

那么我们就通过编译器和代码,来具体看一下每一部分的数据的存放方式。

#include <iostream>
#pragma pack(push, 1)
class Base {
public:
    Base(int d) : data_(d) { ++instance_cnt_; }                            // constructor, non-static function
    virtual ~Base() { std::cout << "destructor: " << this << std::endl; }  // destructor, virtual function
    static int getInstanceCnt() { return instance_cnt_; }                  // static function
    int getData() { return data_; }                                        // non-static function
    virtual void print() {                                                 // virtual function
        std::cout << "[Base] address: " << this << " data: " << this->data_ << std::endl;
    }

public:
    int data_;                 // non-static data
    static int instance_cnt_;  // static data
};

int Base::instance_cnt_ = 0;
#pragma pack(pop)

int main() {
    Base *a = new Base(100);
    Base *b = new Base(200);

    uint64_t ptr_list[2] = {(uint64_t)a, (uint64_t)b};

    for (int idx = 0; idx < 2; ++idx) {
        uint64_t ptr = ptr_list[idx];
        std::cout << "instance " << idx << " address " << (void *)ptr << std::endl;

        uint64_t pointer2vptr = ptr;                // 指向vptr的地址,前8字节存放vptr
        uint64_t pointer2data = ptr + 8;            // 指向数据的地址,之后存放数据
        uint64_t vptr = *(uint64_t *)pointer2vptr;  // 获取vptr的具体数据
        int data = *(int *)pointer2data;            // 获取data的具体数据

        std::cout << "  vptr address " << (void *)pointer2vptr << " vptr " << (void *)vptr << std::endl;
        std::cout << "  data address " << (void *)pointer2data << " data " << data << std::endl;

        // 关于虚表
        // 1. 虚表存放了多个指针,顺序是:offset, type_info, virtual_func1, virtual_func2, ...
        // 2. 虚函数在虚表中的顺序和声明的顺序一致
        // 3. 实例的vptr指向的是第一个virtual_func,而不是vtable真正的起点
        // 4. GCC对于虚析构函数会生成两个虚函数
        uint64_t *vtable = (uint64_t *)vptr;  // 严格来说是 vptr - 16
        uint64_t offset_ptr = vtable[-2];
        uint64_t type_info_ptr = vtable[-1];
        uint64_t destructor1_ptr = vtable[0];  // 析构函数,但是不调用delete()
        uint64_t destructor2_ptr = vtable[1];  // 析构函数,析构完调用delete()
        uint64_t print_fn_ptr = vtable[2];

        std::cout << "vtable address " << vtable << std::endl;
        std::cout << "  offset address " << (void *)offset_ptr << std::endl;
        std::cout << "  type_info address " << (void *)type_info_ptr << std::endl;
        std::cout << "  destructor1 address " << (void *)destructor1_ptr << std::endl;
        std::cout << "  destructor2 address " << (void *)destructor2_ptr << std::endl;
        std::cout << "  print_fn address " << (void *)print_fn_ptr << std::endl;

        typedef void (*print_fn_type)(void *);  // print函数的类型,入参是一个指针,且无返回值

        // call print
        std::cout << "call from instance:" << std::endl;
        ((Base *)ptr)->print();
        std::cout << "call from vtable:" << std::endl;
        ((print_fn_type)print_fn_ptr)((void *)ptr);

        typedef void (*del_fn)(void *);
        ((del_fn)(destructor2_ptr))((void *)ptr);  // 这里调用第二个析构函数

        std::cout << std::endl;
    }
}

// Output:
// instance 0 address 0x60200000eff0
//   vptr address 0x60200000eff0 vptr 0x401fd8
//   data address 0x60200000eff8 data 100
// vtable address 0x401fd8
//   offset address 0
//   type_info address 0x401ff0
//   destructor1 address 0x4017c6
//   destructor2 address 0x401890
//   print_fn address 0x4018b6
// call from instance:
// [Base] address: 0x60200000eff0 data: 100
// call from vtable:
// [Base] address: 0x60200000eff0 data: 100
// destructor: 0x60200000eff0

// instance 1 address 0x60200000efd0
//   vptr address 0x60200000efd0 vptr 0x401fd8
//   data address 0x60200000efd8 data 200
// vtable address 0x401fd8
//   offset address 0
//   type_info address 0x401ff0
//   destructor1 address 0x4017c6
//   destructor2 address 0x401890
//   print_fn address 0x4018b6
// call from instance:
// [Base] address: 0x60200000efd0 data: 200
// call from vtable:
// [Base] address: 0x60200000efd0 data: 200
// destructor: 0x60200000efd0

编译命令:

g++ -O0 -std=c++11 -fdump-class-hierarchy -fsanitize=address memory.cpp
  • -O0:表示不做编译器优化
  • -fdump-class-hierarchy: 会dump出内存布局
  • -std=c++11: 使用C++11标准
  • -fsanitize=address:开启内存检查

和网上的很多写法有一点点不同,这里为了避免各种复杂的指针转换,就直接用uint64_t(测试环境是64位的)来存指针和计算了,只在打印地址的时候转换成void *

具体的字段偏移量的分析,在注释中有详细的解释,这里不再重复了。仅做出整体的分析。

Dump出的内存布局:

Vtable for Base
Base::_ZTV4Base: 5u entries
0     (int (*)(...))0
8     (int (*)(...))(& _ZTI4Base)
16    (int (*)(...))Base::~Base
24    (int (*)(...))Base::~Base
32    (int (*)(...))Base::print

Class Base
   size=12 align=1
   base size=12 base align=1
Base (0x0x7f291b569360) 0
    vptr=((& Base::_ZTV4Base) + 16u)

可以清楚的看出,Base是12字节,按照1字节对齐。vptr指向了虚表首地址+16的位置。并且有两个Base:~Base的虚函数。

上面程序的输出也可以看出:

  1. 两个Instance本身的地址和vptr/data的地址均不同,说明这部分数据确实是存放在实例本身的。
  2. 虚表和虚函数的地址都不变,说明被所有实例共享。
  3. 类的成员函数本质上也是普通函数,只是默认有了个this指针,通过vtable的直接调用也可以证实。
  4. 虚析构函数会生成两个虚函数,前者是对象析构但不调用delete(),相当于手动调用析构函数obj->~Base(),后者是析构且调用delete(),相当于delete obj。将案例中的析构改为调用第一个的话,就会报内存泄露的错误了。参考 CXX API

继承

单继承

#include <iostream>

#pragma pack(push, 1)
class A {
public:
    A(int d) : data_(d) {}                                                  // constructor, non-static function
    virtual ~A() { std::cout << "[A] destructor: " << this << std::endl; }  // destructor, virtual function
    virtual void print() {                                                  // virtual function
        std::cout << "[A] address: " << this << " a: " << &this->data_ << " " << this->data_ << std::endl;
    }

public:
    int data_;  // non-static data
};

class B : public A {
public:
    B(int a, int b) : A(a), data_(b) {}
    virtual ~B() { std::cout << "[B] destructor: " << this << std::endl; }
    virtual void printB() {
        std::cout << "[B] address: " << this                             // this
                  << " a: " << &this->A::data_ << " " << this->A::data_  // A
                  << " b: " << &this->data_ << " " << this->data_        // B
                  << std::endl;
    }

public:
    int data_;
};

class C : public B {
public:
    C(int a, int b, int c) : B(a, b), data_(c) {}
    virtual ~C() { std::cout << "[C] destructor: " << this << std::endl; }
    virtual void print() {
        std::cout << "[C] address: " << this  // this
                  << " a: " << &this->A::data_ << " " << this->A::data_ << " " << &this->B::A::data_ << " "
                  << this->B::A::data_                                   // A
                  << " b: " << &this->B::data_ << " " << this->B::data_  // B
                  << " c: " << &this->data_ << " " << this->data_        // C
                  << std::endl;
    }

public:
    int data_;
};

#pragma pack(pop)

int main() {
    std::cout << "Sizeof: A " << sizeof(A) << " B " << sizeof(B) << " C " << sizeof(C) << std::endl;

    A *a = new A(100);
    A *b = new B(100, 200);
    A *c = new C(100, 200, 300);

    a->print();          // A::print
    b->print();          // A:print
    ((B *)b)->printB();  // B:printB
    c->print();          // C:print
    ((B *)c)->printB();  // B:printB

    delete a;
    delete b;
    delete c;
}

// Output:
// Sizeof: A 12 B 16 C 20
// [A] address: 0x142f030 a: 0x142f038 100
// [A] address: 0x142f050 a: 0x142f058 100
// [B] address: 0x142f050 a: 0x142f058 100 b: 0x142f05c 200
// [C] address: 0x142f070 a: 0x142f078 100 0x142f078 100 b: 0x142f07c 200 c: 0x142f080 300
// [B] address: 0x142f070 a: 0x142f078 100 b: 0x142f07c 200
// [A] destructor: 0x142f030
// [B] destructor: 0x142f050
// [A] destructor: 0x142f050
// [C] destructor: 0x142f070
// [B] destructor: 0x142f070
// [A] destructor: 0x142f070

编译命令:

g++ -fdump-class-hierarchy single_inheritance.cpp

首先查看内存布局:

Vtable for A
A::_ZTV1A: 5u entries
0     (int (*)(...))0
8     (int (*)(...))(& _ZTI1A)
16    (int (*)(...))A::~A
24    (int (*)(...))A::~A
32    (int (*)(...))A::print

Class A
   size=12 align=1
   base size=12 base align=1
A (0x0x7f12dea0c7e0) 0
    vptr=((& A::_ZTV1A) + 16u)

Vtable for B
B::_ZTV1B: 6u entries
0     (int (*)(...))0
8     (int (*)(...))(& _ZTI1B)
16    (int (*)(...))B::~B
24    (int (*)(...))B::~B
32    (int (*)(...))A::print
40    (int (*)(...))B::printB

Class B
   size=16 align=1
   base size=16 base align=1
B (0x0x7f12de71bb60) 0
    vptr=((& B::_ZTV1B) + 16u)
  A (0x0x7f12dea0c840) 0
      primary-for B (0x0x7f12de71bb60)

Vtable for C
C::_ZTV1C: 6u entries
0     (int (*)(...))0
8     (int (*)(...))(& _ZTI1C)
16    (int (*)(...))C::~C
24    (int (*)(...))C::~C
32    (int (*)(...))C::print
40    (int (*)(...))B::printB

Class C
   size=20 align=1
   base size=20 base align=1
C (0x0x7f12de71bbc8) 0
    vptr=((& C::_ZTV1C) + 16u)
  B (0x0x7f12de71bc30) 0
      primary-for C (0x0x7f12de71bbc8)
    A (0x0x7f12dea0c8a0) 0
        primary-for B (0x0x7f12de71bc30)

存储:

  1. A和之前的Base一样,没有啥好说的。大小12 = vptr+int
  2. B继承A。同时B也定义了自己的成员变量(虽然和A的相同,但二者不是同一个变量,可以通过obj->A::data_来访问父类的对象)。因此大小是16 = vptr + A::int + B::int
  3. C继承B。也定义了自己的成员变量。因此大小是20 = vptr + A::int + B::int + C::int
  4. 通过每个printprintB的打印结果可以看出,派生类先存放了自己的基类的数据,之后才存放自己的数据。

虚表:

每个类都有且只有一个虚表对象。

  1. A和Base一样就不解释了。
  2. B继承了A的print方法,同时自己又定义了printB方法,因此B复制了A的虚表结构,除了改了析构函数的地址外,还新增了printB的指针。
  3. C继承了B,同时覆盖了print方法。因此C复制了B的虚表,修改了析构函数,并修改了print函数的指针。
  4. 可以总结个规律:单继承下,派生类有且只有一个虚表,相当于直接将基类的虚表复制一次,替换掉自己的覆盖的虚函数,并追加自己新增的虚函数。

多继承

多继承比单继承复杂了很多。而且多继承一致被很多人诟病,像Java就直接不支持多继承。这里我们不考虑基类重名等情况。

#include <iostream>

#pragma pack(push, 1)
class A {
public:
    A(int d) : data_(d) {}
    virtual ~A() { std::cout << "[A] destructor: " << this << std::endl; }
    virtual void printA1() {
        std::cout << "[A1] address: " << this << " " << &this->data_ << " " << this->data_ << std::endl;
    }
    virtual void printA2() {
        std::cout << "[A2] address: " << this << " " << &this->data_ << " " << this->data_ << std::endl;
    }

public:
    int data_;
};

class B {
public:
    B(int d) : data_(d) {}
    virtual ~B() { std::cout << "[B] destructor: " << this << std::endl; }
    virtual void printB1() {
        std::cout << "[B1] address: " << this << " " << &this->data_ << " " << this->data_ << std::endl;
    }
    virtual void printB2() {
        std::cout << "[B2] address: " << this << " " << &this->data_ << " " << this->data_ << std::endl;
    }

public:
    int data_;
};

class C : public A, public B {
public:
    C(int a, int b, int c) : A(a), B(b), data_(c) {}
    virtual ~C() { std::cout << "[C] destructor: " << this << std::endl; }
    virtual void printB2() {
        std::cout << "[C B2] address: " << this                                 // this
                  << " a: " << &this->A::data_ << " " << this->A::data_ << " "  // A
                  << " b: " << &this->B::data_ << " " << this->B::data_ << " "  // B
                  << " c: " << &this->data_ << " " << this->data_               // C
                  << std::endl;
    }
    virtual void printC() {
        std::cout << "[C] address: " << this                                    // this
                  << " a: " << &this->A::data_ << " " << this->A::data_ << " "  // A
                  << " b: " << &this->B::data_ << " " << this->B::data_ << " "  // B
                  << " c: " << &this->data_ << " " << this->data_               // C
                  << std::endl;
    }

public:
    int data_;
};

#pragma pack(pop)

int main() {
    std::cout << "Sizeof: A " << sizeof(A) << " B " << sizeof(B) << " C " << sizeof(C) << std::endl;

    C *c = new C(100, 200, 300);

    std::cout << "\ncall from C*" << std::endl;
    c->printA1();  // A::printA1
    c->printA2();  // A::printA2
    c->printB1();  // B::printB1
    c->printB2();  // C::printB2
    c->printC();   // C::printC

    std::cout << "\ncall from dynamic_cast A* " << dynamic_cast<A *>(c) << std::endl;
    dynamic_cast<A *>(c)->printA1();
    dynamic_cast<A *>(c)->printA2();

    std::cout << "\ncall from static_cast A*" << static_cast<A *>(c) << std::endl;
    static_cast<A *>(c)->printA1();
    static_cast<A *>(c)->printA2();

    std::cout << "\ncall from reinterpret_cast A*" << reinterpret_cast<A *>(c) << std::endl;
    reinterpret_cast<A *>(c)->printA1();
    reinterpret_cast<A *>(c)->printA2();

    std::cout << "\ncall from dynamic_cast B*" << dynamic_cast<B *>(c) << std::endl;
    dynamic_cast<B *>(c)->printB1();
    dynamic_cast<B *>(c)->printB2();

    std::cout << "\ncall from static_cast B*" << static_cast<B *>(c) << std::endl;
    static_cast<B *>(c)->printB1();
    static_cast<B *>(c)->printB2();

    std::cout << "\ncall from reinterpret_cast B*" << reinterpret_cast<B *>(c) << std::endl;
    reinterpret_cast<B *>(c)->printB1();
    reinterpret_cast<B *>(c)->printB2();

    delete c;
}

// Output:
// Sizeof: A 12 B 12 C 28

// call from C*
// [A1] address: 0x183a030 0x183a038 100
// [A2] address: 0x183a030 0x183a038 100
// [B1] address: 0x183a03c 0x183a044 200
// [C B2] address: 0x183a030 a: 0x183a038 100  b: 0x183a044 200  c: 0x183a048 300
// [C] address: 0x183a030 a: 0x183a038 100  b: 0x183a044 200  c: 0x183a048 300

// call from dynamic_cast A* 0x183a030
// [A1] address: 0x183a030 0x183a038 100
// [A2] address: 0x183a030 0x183a038 100

// call from static_cast A*0x183a030
// [A1] address: 0x183a030 0x183a038 100
// [A2] address: 0x183a030 0x183a038 100

// call from reinterpret_cast A*0x183a030
// [A1] address: 0x183a030 0x183a038 100
// [A2] address: 0x183a030 0x183a038 100

// call from dynamic_cast B*0x183a03c
// [B1] address: 0x183a03c 0x183a044 200
// [C B2] address: 0x183a030 a: 0x183a038 100  b: 0x183a044 200  c: 0x183a048 300

// call from static_cast B*0x183a03c
// [B1] address: 0x183a03c 0x183a044 200
// [C B2] address: 0x183a030 a: 0x183a038 100  b: 0x183a044 200  c: 0x183a048 300

// call from reinterpret_cast B*0x183a030
// [A1] address: 0x183a030 0x183a038 100
// [A2] address: 0x183a030 0x183a038 100

// [C] destructor: 0x183a030
// [B] destructor: 0x183a03c
// [A] destructor: 0x183a030

编译命令:

g++ -fdump-class-hierarchy single_inheritance.cpp

先查看内存布局:

Vtable for A
A::_ZTV1A: 6u entries
0     (int (*)(...))0
8     (int (*)(...))(& _ZTI1A)
16    (int (*)(...))A::~A
24    (int (*)(...))A::~A
32    (int (*)(...))A::printA1
40    (int (*)(...))A::printA2

Class A
   size=12 align=1
   base size=12 base align=1
A (0x0x7f640b38b7e0) 0
    vptr=((& A::_ZTV1A) + 16u)

Vtable for B
B::_ZTV1B: 6u entries
0     (int (*)(...))0
8     (int (*)(...))(& _ZTI1B)
16    (int (*)(...))B::~B
24    (int (*)(...))B::~B
32    (int (*)(...))B::printB1
40    (int (*)(...))B::printB2

Class B
   size=12 align=1
   base size=12 base align=1
B (0x0x7f640b38b840) 0
    vptr=((& B::_ZTV1B) + 16u)

Vtable for C
C::_ZTV1C: 14u entries
0     (int (*)(...))0
8     (int (*)(...))(& _ZTI1C)
16    (int (*)(...))C::~C
24    (int (*)(...))C::~C
32    (int (*)(...))A::printA1
40    (int (*)(...))A::printA2
48    (int (*)(...))C::printB2
56    (int (*)(...))C::printC
64    (int (*)(...))-12
72    (int (*)(...))(& _ZTI1C)
80    (int (*)(...))C::_ZThn12_N1CD1Ev
88    (int (*)(...))C::_ZThn12_N1CD0Ev
96    (int (*)(...))B::printB1
104   (int (*)(...))C::_ZThn12_N1C7printB2Ev

Class C
   size=28 align=1
   base size=28 base align=1
C (0x0x7f640b0e25b0) 0
    vptr=((& C::_ZTV1C) + 16u)
  A (0x0x7f640b38b8a0) 0
      primary-for C (0x0x7f640b0e25b0)
  B (0x0x7f640b38b900) 12
      vptr=((& C::_ZTV1C) + 80u)
  1. A、B本身没啥好说的。占用空间都是12 = vptr + int
  2. C继承A和B,顺序是先A再B。C中存放了A、B的数据,并且有两个虚指针(后续解释),因此大小为28 = vptr + A::int + vptr + B::int + C::int
  3. 根据C打印的结果,可以看出
    1. A::data的地址偏移了8,前面存放了vptr
    2. B::data偏移12,前面存放了vptrA::data的数据
    3. C::data偏移24,前面存放了vptr, A::data, vptr, B::data的数据
  4. 可以看出,派生类存放数据顺序为:虚指针1 + 基类1的数据 + 虚指针2 + 基类2的数据 + ... + 自己的数据。另外如果继承的多个类,有的有虚函数有的没有,则优先存有虚函数的。参考

虚表:

  1. A、B的虚表很简单。
  2. C的虚表有两个,存储上是连续的,每个虚表都是offset + type_info + func1 + func2 + ... 的顺序。
  3. C有两个虚指针,第一个的偏移量是0,第二个是12,正好是vptr + int的大小。
  4. 第一个虚指针指向第一个虚表的第一个虚函数,第二个虚指针指向了第二个虚表的第一个虚函数。
  5. 虚函数调用,对于C的实例,如果转换为其他类型指针,则分情况:
    1. A*指针,由于偏移量是0,所以直接复用C的第一个表即可。
    2. B*指针,通过static_cast或者dynamic_cast可以得到B*指针,此时指针指向了原先实例偏移12字节的位置,其实也就是C中基类B的地址。在调用时,严格对应自己的虚表来执行(此时自己的虚表其实是C的第二个虚表)。
      1. 对于printB1的调用,其实就是调用原生B的函数,因此直接调用即可。
      2. 对于printB2的调用,由于C进行了覆盖,这里的虚表对应的函数是_ZThn12_N1C7printB2Ev,通过工具解析c++filt _ZThn12_N1C7printB2Ev --> non-virtual thunk to C::printB2(),这是编译器生成的新函数,调用它相对于把当前this指针修正(偏移12字节)以后,再调用C::printB2()
    3. C*指针,对于A和C的函数,直接使用第一个虚表来调用。对于B的函数,相当于先自动转换为B*类型,再调用。

最后,让我们直接通过虚表来调用这里所有的函数,以及强制调用被覆盖的原始函数。

#include <iostream>

#pragma pack(push, 1)
class A {
public:
    A(int d) : data_(d) {}
    virtual ~A() { std::cout << "[A] destructor: " << this << std::endl; }
    virtual void printA1() {
        std::cout << "[A1] address: " << this << " " << &this->data_ << " " << this->data_ << std::endl;
    }
    virtual void printA2() {
        std::cout << "[A2] address: " << this << " " << &this->data_ << " " << this->data_ << std::endl;
    }

public:
    int data_;
};

class B {
public:
    B(int d) : data_(d) {}
    virtual ~B() { std::cout << "[B] destructor: " << this << std::endl; }
    virtual void printB1() {
        std::cout << "[B1] address: " << this << " " << &this->data_ << " " << this->data_ << std::endl;
    }
    virtual void printB2() {
        std::cout << "[B2] address: " << this << " " << &this->data_ << " " << this->data_ << std::endl;
    }

public:
    int data_;
};

class C : public A, public B {
public:
    C(int a, int b, int c) : A(a), B(b), data_(c) {}
    virtual ~C() { std::cout << "[C] destructor: " << this << std::endl; }
    virtual void printB2() {
        std::cout << "[C B2] address: " << this                                 // this
                  << " a: " << &this->A::data_ << " " << this->A::data_ << " "  // A
                  << " b: " << &this->B::data_ << " " << this->B::data_ << " "  // B
                  << " c: " << &this->data_ << " " << this->data_               // C
                  << std::endl;
    }
    virtual void printC() {
        std::cout << "[C] address: " << this                                    // this
                  << " a: " << &this->A::data_ << " " << this->A::data_ << " "  // A
                  << " b: " << &this->B::data_ << " " << this->B::data_ << " "  // B
                  << " c: " << &this->data_ << " " << this->data_               // C
                  << std::endl;
    }

public:
    int data_;
};

#pragma pack(pop)

typedef void (*print_fn)(void *);

print_fn get_func(void *ptr, int idx) {
    uint64_t vptr = *(uint64_t *)ptr;
    return (print_fn)(((uint64_t *)vptr)[idx]);
}

int main() {
    C *c = new C(100, 200, 300);

    // printA1
    {
        std::cout << "A::printA1" << std::endl;
        std::cout << "call from C* " << c << std::endl;
        c->printA1();

        A *a = dynamic_cast<A *>(c);
        std::cout << "call from A* " << a << std::endl;
        a->printA1();

        std::cout << "call from vtable" << std::endl;
        get_func(c, 2)(c);
    }

    // printA2
    {
        std::cout << "A::printA2" << std::endl;
        std::cout << "call from C* " << c << std::endl;
        c->printA2();

        A *a = dynamic_cast<A *>(c);
        std::cout << "call from A* " << a << std::endl;
        a->printA2();

        std::cout << "call from vtable" << std::endl;
        get_func(c, 3)(c);
    }

    // printB1
    {
        std::cout << "B::printB1" << std::endl;
        std::cout << "call from C* " << c << std::endl;
        c->printB1();
        std::cout << "call from B*" << std::endl;
        B *b = dynamic_cast<B *>(c);
        b->printB1();

        std::cout << "call from B vtable" << std::endl;
        get_func(b, 2)(b);

        std::cout << "call from C vtable" << std::endl;
        get_func(c, 10)((void *)c + 12);
    }

    // printB2
    {
        std::cout << "C::printB2" << std::endl;
        std::cout << "call from C* " << c << std::endl;
        c->printB2();

        B *b = dynamic_cast<B *>(c);
        std::cout << "call from B* " << b << std::endl;
        b->printB2();

        std::cout << "call from B vtable" << std::endl;
        get_func(b, 3)(b);

        std::cout << "call from C vtable" << std::endl;
        get_func(c, 4)(c);
        get_func(c, 4)((void *)b - 12);
    }

    // force call B::printB2
    {
        B *b = dynamic_cast<B *>(c);

        std::cout << "call from B vtable C::printB2" << std::endl;
        get_func(b, 3)(b);

        std::cout << "force call B::printB2" << std::endl;
        B tmp(100);
        get_func(&tmp, 3)(b);
    }

    delete c;
}

// Output:

// A::printA1
// call from C* 0x15b7c20
// [A1] address: 0x15b7c20 0x15b7c28 100
// call from A* 0x15b7c20
// [A1] address: 0x15b7c20 0x15b7c28 100
// call from vtable
// [A1] address: 0x15b7c20 0x15b7c28 100

// A::printA2
// call from C* 0x15b7c20
// [A2] address: 0x15b7c20 0x15b7c28 100
// call from A* 0x15b7c20
// [A2] address: 0x15b7c20 0x15b7c28 100
// call from vtable
// [A2] address: 0x15b7c20 0x15b7c28 100

// B::printB1
// call from C* 0x15b7c20
// [B1] address: 0x15b7c2c 0x15b7c34 200
// call from B*
// [B1] address: 0x15b7c2c 0x15b7c34 200
// call from B vtable
// [B1] address: 0x15b7c2c 0x15b7c34 200
// call from C vtable
// [B1] address: 0x15b7c2c 0x15b7c34 200

// C::printB2
// call from C* 0x15b7c20
// [C B2] address: 0x15b7c20 a: 0x15b7c28 100  b: 0x15b7c34 200  c: 0x15b7c38 300
// call from B* 0x15b7c2c
// [C B2] address: 0x15b7c20 a: 0x15b7c28 100  b: 0x15b7c34 200  c: 0x15b7c38 300
// call from B vtable
// [C B2] address: 0x15b7c20 a: 0x15b7c28 100  b: 0x15b7c34 200  c: 0x15b7c38 300
// call from C vtable
// [C B2] address: 0x15b7c20 a: 0x15b7c28 100  b: 0x15b7c34 200  c: 0x15b7c38 300
// [C B2] address: 0x15b7c20 a: 0x15b7c28 100  b: 0x15b7c34 200  c: 0x15b7c38 300

// call from B vtable C::printB2
// [C B2] address: 0x15b7c20 a: 0x15b7c28 100  b: 0x15b7c34 200  c: 0x15b7c38 300
// force call B::printB2
// [B2] address: 0x15b7c2c 0x15b7c34 200

// [B] destructor: 0x7ffe9a25c23c
// [C] destructor: 0x15b7c20
// [B] destructor: 0x15b7c2c
// [A] destructor: 0x15b7c20

菱形继承

#include <iostream>

#pragma pack(push, 1)
class A {
public:
    A(int d) : data_(d) {}
    virtual ~A() { std::cout << "[A] destructor: " << this << std::endl; }
    virtual void printA1() {
        std::cout << "[A1] address: " << this << " " << &this->data_ << " " << this->data_ << std::endl;
    }
    virtual void printA2() {
        std::cout << "[A2] address: " << this << " " << &this->data_ << " " << this->data_ << std::endl;
    }

public:
    int data_;
};

class B : public A {
public:
    B(int a, int b) : A(a), data_(b) {}
    virtual ~B() { std::cout << "[B] destructor: " << this << std::endl; }
    virtual void printA1() {
        std::cout << "[B A1] address: " << this << " " << &this->data_ << " " << this->data_ << std::endl;
    }
    virtual void printB1() {
        std::cout << "[B1] address: " << this                                   // this
                  << " a: " << &this->A::data_ << " " << this->A::data_ << " "  // A
                  << " b: " << &this->data_ << " " << this->data_               // B
                  << std::endl;
    }
    virtual void printB2() {
        std::cout << "[B2] address: " << this                                   // this
                  << " a: " << &this->A::data_ << " " << this->A::data_ << " "  // A
                  << " b: " << &this->data_ << " " << this->data_               // B
                  << std::endl;
    }

public:
    int data_;
};

class C : public A {
public:
    C(int a, int b) : A(a), data_(b) {}
    virtual ~C() { std::cout << "[C] destructor: " << this << std::endl; }
    virtual void printA2() {
        std::cout << "[C A2] address: " << this << " " << &this->data_ << " " << this->data_ << std::endl;
    }
    virtual void printC1() {
        std::cout << "[C1] address: " << this                                   // this
                  << " a: " << &this->A::data_ << " " << this->A::data_ << " "  // A
                  << " b: " << &this->data_ << " " << this->data_               // C
                  << std::endl;
    }
    virtual void printC2() {
        std::cout << "[C2] address: " << this                                   // this
                  << " a: " << &this->A::data_ << " " << this->A::data_ << " "  // A
                  << " b: " << &this->data_ << " " << this->data_               // C
                  << std::endl;
    }

public:
    int data_;
};

class D : public B, public C {
public:
    D(int a1, int a2, int b, int c, int d) : B(a1, b), C(a2, c), data_(d) {}

public:
    int data_;
};

#pragma pack(pop)

int main() {
    std::cout << "Sizeof: A " << sizeof(A) << " B " << sizeof(B) << " C " << sizeof(C) << " D " << sizeof(D)
              << std::endl;

    D* d = new D(100, 200, 300, 400, 500);

    // d->printA1();
    // dynamic_cast<A*>(d)->printA1();
    dynamic_cast<B*>(d)->printA1();
    dynamic_cast<C*>(d)->printA1();

    // d->printA2();
    // dynamic_cast<A*>(d)->printA2();
    dynamic_cast<B*>(d)->printA2();
    dynamic_cast<C*>(d)->printA2();
}

// Output:
// Sizeof: A 12 B 16 C 16 D 36
// [B A1] address: 0x1fc7030 0x1fc703c 300
// [A1] address: 0x1fc7040 0x1fc7048 200
// [A2] address: 0x1fc7030 0x1fc7038 100
// [C A2] address: 0x1fc7040 0x1fc704c 400

内存:


Vtable for A
A::_ZTV1A: 6u entries
0     (int (*)(...))0
8     (int (*)(...))(& _ZTI1A)
16    (int (*)(...))A::~A
24    (int (*)(...))A::~A
32    (int (*)(...))A::printA1
40    (int (*)(...))A::printA2

Class A
   size=12 align=1
   base size=12 base align=1
A (0x0x7f0d6f469360) 0
    vptr=((& A::_ZTV1A) + 16u)

Vtable for B
B::_ZTV1B: 8u entries
0     (int (*)(...))0
8     (int (*)(...))(& _ZTI1B)
16    (int (*)(...))B::~B
24    (int (*)(...))B::~B
32    (int (*)(...))B::printA1
40    (int (*)(...))A::printA2
48    (int (*)(...))B::printB1
56    (int (*)(...))B::printB2

Class B
   size=16 align=1
   base size=16 base align=1
B (0x0x7f0d6f4add68) 0
    vptr=((& B::_ZTV1B) + 16u)
  A (0x0x7f0d6f4694e0) 0
      primary-for B (0x0x7f0d6f4add68)

Vtable for C
C::_ZTV1C: 8u entries
0     (int (*)(...))0
8     (int (*)(...))(& _ZTI1C)
16    (int (*)(...))C::~C
24    (int (*)(...))C::~C
32    (int (*)(...))A::printA1
40    (int (*)(...))C::printA2
48    (int (*)(...))C::printC1
56    (int (*)(...))C::printC2

Class C
   size=16 align=1
   base size=16 base align=1
C (0x0x7f0d6f4ade38) 0
    vptr=((& C::_ZTV1C) + 16u)
  A (0x0x7f0d6f4695a0) 0
      primary-for C (0x0x7f0d6f4ade38)

Vtable for D
D::_ZTV1D: 16u entries
0     (int (*)(...))0
8     (int (*)(...))(& _ZTI1D)
16    (int (*)(...))D::~D
24    (int (*)(...))D::~D
32    (int (*)(...))B::printA1
40    (int (*)(...))A::printA2
48    (int (*)(...))B::printB1
56    (int (*)(...))B::printB2
64    (int (*)(...))-16
72    (int (*)(...))(& _ZTI1D)
80    (int (*)(...))D::_ZThn16_N1DD1Ev
88    (int (*)(...))D::_ZThn16_N1DD0Ev
96    (int (*)(...))A::printA1
104   (int (*)(...))C::printA2
112   (int (*)(...))C::printC1
120   (int (*)(...))C::printC2

Class D
   size=36 align=1
   base size=36 base align=1
D (0x0x7f0d6f524a10) 0
    vptr=((& D::_ZTV1D) + 16u)
  B (0x0x7f0d6f4adf08) 0
      primary-for D (0x0x7f0d6f524a10)
    A (0x0x7f0d6f469660) 0
        primary-for B (0x0x7f0d6f4adf08)
  C (0x0x7f0d6f4adf70) 16
      vptr=((& D::_ZTV1D) + 80u)
    A (0x0x7f0d6f4696c0) 16
        primary-for C (0x0x7f0d6f4adf70)
  1. A, B, C三个的大小之前已经解释过了。
  2. D同时继承B和C,这里B和C中各有一份A的数据。D中认为B、C中的A数据是不同的,因此D中存在两份A的数据,可以称为B::AC::A。同时D有两个虚表。所以内存为 36 = B:vptr + B:A:int + B:int + C:vptr + C:A:int + C:int + D:int

虚表:

  1. 这里也和多继承一样,有两个虚表。
  2. 其中由于D中有两个A的数据,对应的两个虚表中的A的函数的指针就不做偏移。也就是说,如果转换为B*指针,则控制B::A的数据,反之亦然。
  3. D不能直接调用A的任何变量和函数,因为B、C分别覆盖了其中的函数,编译器会报错 ambiguous

具体使用虚表来调用的例子就不给出了,对照内存布局,很容易看出来。

虚继承

虚继承是为了解决菱形继承而存在的。由于C++本身就不推荐多继承,出现菱形继承的情况又更少了。我们只需要知道虚继承可以减少内存的占用即可。

#include <iostream>

#pragma pack(push, 1)
class A {
public:
    A() {}
    virtual ~A() { std::cout << "[A] destructor: " << this << std::endl; }
public:
    int a_;
};

class B : virtual public A {
public:
    B() {}
    virtual ~B() { std::cout << "[B] destructor: " << this << std::endl; }
public:
    int b_;
};

class C : virtual public A {
public:
    C() {}
    virtual ~C() { std::cout << "[C] destructor: " << this << std::endl; }
public:
    int c_;
};

class D : public B, public C {
public:
    D() {}
    ~D() {
        std::cout << "[D] destructor: " << this << std::endl;
    }

public:
    int d_;
};

#pragma pack(pop)

int main() {
    std::cout << "Sizeof: A " << sizeof(A) << " B " << sizeof(B) << " C " << sizeof(C) << " D " << sizeof(D)
              << std::endl;
    D d;
    d.a_ = 1;
    d.B::a_ = 2;
    d.C::a_ = 3;
    std::cout << d.a_ << " " << d.B::a_ << " " << d.C::a_ << std::endl; // 三部分的值相同
}

// Output:
// Sizeof: A 12 B 24 C 24 D 40
// 3 3 3
// [D] destructor: 0x7ffd41e292a0
// [C] destructor: 0x7ffd41e292ac
// [B] destructor: 0x7ffd41e292a0
// [A] destructor: 0x7ffd41e292bc

这种情况下,D中只有一份A的数据。D中有3个虚指针以及A,B,C,D自身的4个int,因此大小为40 = vptr x 3 + int x 4。但是dump出的内存结构很复杂,感兴趣的同学可以自行测试一下。

获取函数的地址

最后,补充一下如果在代码中直接获取函数的地址。

#include <iostream>

// 普通C函数
int add(int a, int b) { return a + b; }

class Base {
public:
    Base(int d) : data_(d) {}
    int add1(int a, int b) { return this->data_ + a + b; }
    virtual int add2(int a, int b) { return this->data_ + a + b; }
    static int add(int a, int b) { return a + b; }
    virtual ~Base() {}

public:
    int data_;
};

uint64_t get_func(void *ptr, int idx) {
    uint64_t vptr = *(uint64_t *)ptr;
    return ((uint64_t *)vptr)[idx];
}

int main() {
    // C函数
    {
        std::cout << "normal func" << std::endl;
        typedef int (*add_fn)(int, int);
        add_fn fn = (add_fn)add;
        std::cout << (void *)fn << " " << add(100, 200) << " " << fn(100, 200) << std::endl;
    }

    // 类静态函数
    {
        std::cout << "class static func" << std::endl;
        typedef int (*add_fn)(int, int);
        add_fn fn = (add_fn)Base::add;
        std::cout << (void *)fn << " " << Base::add(100, 200) << " " << fn(100, 200) << std::endl;
    }

    // 成员函数(非虚函数)
    {
        std::cout << "class non-virtual func 1" << std::endl;
        typedef int (Base::*add_fn)(int, int);
        add_fn fn = (add_fn)&Base::add1;
        Base b(100);
        std::cout << (void *)fn << " " << b.add1(100, 200) << " " << (b.*fn)(100, 200) << std::endl;
    }
    {
        std::cout << "class non-virtual func 2" << std::endl;
        typedef int (*add_fn)(void *, int, int);
        add_fn fn = (add_fn)&Base::add1;
        Base b(100);
        std::cout << (void *)fn << " " << b.add1(100, 200) << " " << fn(&b, 100, 200) << std::endl;
    }

    // 成员函数(虚函数),通过函数名
    {
        std::cout << "class virtual func 1" << std::endl;
        typedef int (Base::*add_fn)(int, int);
        add_fn fn = (add_fn)&Base::add2;
        Base b(200);
        std::cout << (void *)(&Base::add2) << " " << (void *)fn << " " << b.add2(100, 200) << " " << (b.*fn)(100, 200)
                  << std::endl;
    }

    // 成员函数(虚函数),通过虚表
    {
        std::cout << "class virtual func 2" << std::endl;
        typedef int (*add_fn)(void *, int, int);
        Base b(200);
        add_fn fn = (add_fn)get_func(&b, 0);
        std::cout << (void *)fn << " " << b.add2(100, 200) << " " << fn(&b, 100, 200) << std::endl;
    }
}

// Output:
// normal func
// 0x400af6 300 300
// class static func
// 0x40116f 300 300
// class non-virtual func 1
// 0x40112c 400 400
// class non-virtual func 2
// 0x40112c 400 400
// class virtual func 1
// 0x40114e 0x1 500 500
// class virtual func 2
// 0x40114e 500 500