(Boolan) C++ STL与泛型编程

作者: 故事狗 | 来源:发表于2017-06-20 20:51 被阅读201次

关于STL的话题已经探讨了好几个篇了，今天来讨论一下剩下的一些内容吧

一个万用的hash function

使用以Hash Table为底层的容器，比如unordered_map（hash_map），在使用个过程中，需要有计算hash code的方法来计算出元素在hashtable中的具体位置。
那么对于自定义类型来说，需要指定对应的Hash Function，只有这样才能在使用的过程，系统找到对应元素应该插入的位置，以便系统自动管理我们需要插入到容器中的各个元素。关于这个函数该如何定义，对于不同个自定义类型来说并不相同，也没有办法找到一个通用的方式，毕竟石头类和动物类，他们各自的hash function多少会有些区别吧，具体的定义，还需要依靠开发这自己了。
那么，为什么后还会有关于个万用的hash function呢？关于这个问题，先放下不说，咱们先来看看关于指定hash function的方式吧。

指定自定义的hash function的方法一

#include<functional>
class Customer{
      //........
};
//这是一个自定义的类，如果使用他作为hashtable为底层的容器时
//系统需要知道如何把他插入到hashtable中的具体位置去


//通过一个类，重载了操作符()的方式，形成了一个仿函数（函数对象）
class CustomerHash
{
public:
      std::size_t operator()(const Customer& c) const{
            return /*........*/;
      }
};

//使用
unordered_set<Customer, CustomerHash> customers;
//声明容器变量时，指定的第二参数，就是对应的Hash Function
//实际在第二参数中指定的是一个函数对象

指定自定义的hash function的方法二

size_t customer_hash_func(const Customer& c)
{
        return /*......*/;
}

//使用
unorder_set<Customer, size(*) (const Cunstomer&)> customers(20， customer_hash_func);
//这个方法实际是通过指定了一个函数指针的方式来通知容器，对于这个自定义类到底应该使用哪个hash function

指定自定义的hash function的方法三

//以struct hash 偏特化形式实现hash function
class MyString
{
private:
      char* _data;
      size_t _len;
};

namespace std;
{
      template<>
      struct hash<MyStrinng>
      {
              size_t operatoe()(const MyString& s) const noexcept{
                      return hash<string>()(string(s.get()));
              }
      }
}

那么通过这三种方式可以指定需要的hash function，但是到底能不能有一个万能的hash function呢？不论自定义的的class，内涵的成员是由什么组成，总归都是有基本数据类型所组成（比如int、char、double等），而之前我们也提过，基本数据类型，STL是提供了一套Hash Function的实现代码的。那么，能否依靠之前我们提到的方法将hash code相加来获取该对象的hash code呢？（例如下方代码）

class CustomerHash
{
public:
      std::size_t operator()(const Customer& c) const{
            return std::hash<std::string>()(c.fname) 
                  + std::hash<std::string>()(c.Iname) 
                  + std::hash<long><c.no);
      }
}
//这个方法可以实现出计算Hash code的功能，但是，不得不提出的就是
//这个方法其实对于hash code的重复概率比较高，因为只是简单的相累加嘛。而重复的hash code 会导致“bucket”中的元素过多，影响查询的效率

其实在*** C++ TR1 ***的版本后，STL为我么提供了一套万用的Hash Function。那么应该如何使用呢？咱们就先来看看吧。

//使用万能的Hash Function
class CustomerHash
{
public:
      std::size_t operator()(const Cunstomer& c) const {
            return hash_val(c.fname, c,Iname, c.no);  
            //在此，只需要调用hash_val函数，并把每个成员的值传递给他就行了
      }
}

咱们看过了这个万用Hash Functon的使用，那么他其中的实现原理到底是什么呢？咱们来看看他的代码吧。

#include<functional>
template<typename T>
inline void hash_combine(size_t& seed, const T& val){
        seed = std::hash<T>(val) + 0x9e3779b9
              + (seed << 6) + (seed >> 2);
}

template<typename T>
inline void hash_val(size_t& seed, const T& val){
      hash_combine(seed, val);
}

template<typename T, typename... Types>
inline void hash_val(size_t& seed, const T& val, const Type&... args){
      hash_combine(seed, val);
      hash_val(seed, args...);
}

template<typename... Types>
inline size_t hash_val(const Types&... args){
      size_t seed = 0;
      hash_val(seed, args...);
      return seed;
}

上面的代码其实写的还是比较巧妙的，并且使用了一个C++的新特性，名字是variadic templates，也就是可以传入多个模版。他的作用有点类似与可变参数的传递，但是对于可变参数来说，改变的是参数的个数，他们的类型都是相同的。可以通过头文件<stdarg.h>中的va_list中的一些列函数来拿出传入的每个变量。

而对于variadic templates来说，很重要的是，传入函数中的每个参数都会具备一个模版，也就是说，可能每个参数的类型都会不同，那么应该处理每个不同的参数需要一套对应的解决方案

其实上面的代码就是这套方案的解法，其实对于函数

template<typename... Types>
inline size_t hash_val(const Types&... args)

来说可以看作为是一个泛化的版本，因为这里面的模版可以传入任意数量的任意模版类型。
而函数

template<typename T, typename... Types>
inline void hash_val(size_t& seed, const T& val, const Type&... args)

不难看出，他的模版变为了*** 1 + n ***的模式也正是由于这样的模式，他接收的参数的范围是比第一个函数要少的，那么，也就是范围其实先对减小，我们可以把它理解为偏特化的一个过程，也就是在泛化中，调用了偏特化的函数，每次调用其实都会减少模版的数量。
观察之前源码中的四个函数不难看出，有三个重载，而最后一个hash_val只接收一个模版，那么也就是最后的时候会被调用，也就是，通过函数重载和模版特化的过程来一层一层把模版参数的数量解开。
而最后四级都是调用hash_combine的函数来计算出hash code。

tuple

tuple是在C++ 2.0之后引进的一种存放元素的集合，通过这个集合，可以存放不同类型的数据。

用例

#include <iostream>     // std::cout
#include <tuple>        // std::tuple, std::get, std::tie, std::ignore

int main ()
{
  //通过构造函数来创建tuple对象
  std::tuple<int,char> foo (10,'x');

  //通过make_tuple来创建tuple对象，并且可以实现自动类型推到
  //实际的类型应该为: tuple<string, double, int, char>
  auto bar = std::make_tuple ("test", 3.1, 14, 'y');

  /从tuple中获取数据
  std::get<2>(bar) = 100;                                    // access element

  int myint; char mychar;
  //通过tie函数来从tuple中一次性获取所有的数据
  std::tie (myint, mychar) = foo;                            // unpack elements
  
  //也可以通过std::ignore跳过部分不需要的元素，只取出需要的元素
  std::tie (std::ignore, std::ignore, myint, mychar) = bar;  // unpack (with ignore)

  mychar = std::get<3>(bar);

  //直接修改其中元素的值
  std::get<0>(foo) = std::get<2>(bar);
  std::get<1>(foo) = mychar;

  std::cout << "foo contains: ";
  std::cout << std::get<0>(foo) << ' ';
  std::cout << std::get<1>(foo) << '\n';

  typedef tuple<int, float, string> TupleType;
  //获取tuple中的元素个数
  cout << tuple_size<TupleType>::value << endl;  //3
  //获取元素类型
  tuple_element<1, TupleType>::type f1 = 1.0;  //float

  return 0;
}

tuple的实现原理
GNU4.8节选并简化版本

//只有一个模版参数的版本的特化版本
template<typename Values> class tuple;
//没有模版参数的版本的特化版本
templat<> class tuple<>{};

//每次都会继承剔除掉第一个模版参数的class
template<typename Head, typename... Tail>
class tuple<Head, Tail...>: private tuple<Tail...>
{
    typedef tuple<Tail...> inherited;
public:
    tuple();
    tuple(Head v, Tail... vtail):m_head(v), inherited(vtail...){}

    typename Head::type head() {return m_head;}
protected:
    Head m_head;
};

`tuple<int, float, string> t(6, 6.3, "xxxxx")` UML图

tuple可以自动帮我们将多个参数实现继承，通过这样的继承方式，来处理每个元素的不同的类型。每次继承的都是模版的后半部分，逐渐将第一个模版参数解析出来。

tuple中的成员函数
head()可以返回第一个元素的值。
tail()可以返回父类成分起点，返回类型是父类的类型，但实际上是自己对象，而由于类型指定，实际返回的就是父类对象了。

TypeTraits

GNU 2.9版本
struct __true_type{};
struct __false_type{};
//泛化
template<class type>
struct __type_traits{
      typedef __true_type this_dummy_member_must_be_first;
      typedef __false_type has_trivial_default_constructor;
      typedef __false_type has_trivial_copy_constructor;
      typedef __false_type has_trivial_assignment_operator;
      typedef __false_type has_trivial_destructor;
      typedef __false_type is_POD_type;  //POD = Plain Old Data，代表旧式的class 也就是struct
}；

//int的特化
template<> 
struct __type_traits<int>{
      typedef __true_type has_trivial_default_constructor;
      typedef __true_type has_trivial_copy_constructor;
      typedef __true_type has_trivial_assignment_operator;
      typedef __true_type has_trivial_destructor;
      typedef __true_type is_POD_type;
}

//double的特化
template<> 
struct __type_traits<double>{
      typedef __true_type has_trivial_default_constructor;
      typedef __true_type has_trivial_copy_constructor;
      typedef __true_type has_trivial_assignment_operator;
      typedef __true_type has_trivial_destructor;
      typedef __true_type is_POD_type;
}

在GNU2.9版本中，其实typeTraits实际是依靠模版的泛化和特化的版本来让不同的类型进入不同的代码块中的。代码块中实际也就是一些typedef，这些typedef，实际就标注了，是否有默认的构造函数，拷贝构造，赋值构造，析构函数等具体类型的情况。这个版本的type traits 来说，需要使用者自己特化一个自己类型的type traits ，实际来说实用性不高。

从C++2.0 开始，type traits的复杂程度大幅上升。

C++11 之后type_traits所提供的查询的项目

对于C++11的改版后的最大的便利就是不需要在重新定义特化版本的type_traits，系统都能够自动得到具体的属性特征

{
   std::cout << "int: " << std::is_trivially_copy_constructible<int>::value << std::endl;
}

type_traits的实现
- is_void

//去除const
template<typename _Tp>
struct remove_const{
    typedef _Tp type;
};

template<typename _Tp>
struct remove_const<_Tp const>{
    typedef _Tp type;
};

//取出volatile
template<typename _Tp>
struct remove_volotile{
    typedef _Tp type;
};

template<typename _Tp>
struct remove_volotile<_Tp volatile>{
    typedef _Tp type;
};

//去除const和volatile
template<typename _Tp>
struct remove_cv{
    typedef remove_const<typename remove_volatile<_Tp>::type>::type type;
};


template<typename>
struct __is_void_helper: public false_type{};

template<>
struct __is_void_helper<void>:public true_type{};

//is_void
template<typename _Tp>
struct is_void
    :public __is_void_helper<typename remove_cv<_Tp>::type>::type {}

关于去除const和volatile都是通过模版的泛化和特化组合来实现的
同时，是否是void的类型，也是通过模版的泛化和特化的方式来区分出来不同的类型

is_integral

typename<typename>
struct __is_integral_helper
  :public false_type{};

typename<>
struct __is_integral_helper<bool>
  :public true_type{};

typename<>
struct __is_integral_helper<char>
  :public true_type{};

typename<>
struct __is_integral_helper<signed char>
  :public true_type{};

typename<>
struct __is_integral_helper<unsigned char>
  :public true_type{};

template<>
struct __is_integral_helper<int>
  :public true_type{};

template<>
struct __is_integral_helper<unsigned int>
  :public true_type{};

template<>
struct __is_integral_helper<long>
  :public true_type{};

template<>
struct __is_integral_helper<unsigned long>
  :public true_type{};

template<>
struct __is_integral_helper<long long>
  :public true_type{};

template<>
struct __is_integral_helper<unsigned long long>
  :public true_type{};

//is_integral
template<typename _Tp>
struct __is_integral
  :public __is_integral_helper<typename remove_cv<_Tp>::type>::type{};

is_class, is_union, is_enum, is_pod

//is_enum
template<typename _Tp>
struct is_enum
    :public integral_constant<bool, __is_enum(_Tp)>{};

//is_union
template<typename _Tp>
struct is_union
    :public integral_constant<bool, __is_union(_Tp)>{};

//is_class
template<typename _Tp>
struct is_class
    :public integral_constant<bool, __is_class(_Tp)>{};

//is_pod
//Could use is_standard_layout && is_trivial instead of the builtin.
template<typename _Tp>
struct is_pod
    :public integral_constant<bool, __is_pod(_Tp)>();

在标准库中，没有__is_xxx(_Tp)的内容，实际实现应该是调用了编译器的底层的函数

is_move_assignable

template<typename _Tp, bool = __is_referenceable<_Tp>::value>
struct __is_move_assignable_impl;

template<typename _Tp>
struct __is_move_assignable_impl<_Tp, false>:public false_type{};

template<typename _Tp>
struct __is_move_assignable_impl<_Tp, true>:public is_assignable<_Tp&, _Tp&&>{};

template<typename _Tp>
struct __is_referenceable:public __or_<is_object<_Tp>, is_reference<_Tp>>::type

template<typename _Res, typename... _Args>
struct __is_referenceable<_Res(_Args...)>:public true_type{};

template<typename _Res, typename... _Args>
struct __is_referenceable<_Res(_Args......)>:public true_type{};

//is_move_assignable
template<typename _Tp>
struct is_move_assignable:public __is_move_assignable_impl<_Tp>{};

cout

class ostream: virual public ios
{
public:
    ostream& operator << (char c);
    ostream& operator << (unsigned char c){return (*this) << (char)c; };
   ostream& operator << (signed char c){return (*this) << (char)c; };
//............................
//cout根据操作符重载的方式来使得cout可以接受许多的类型
} ;

class _IO_ostream_withassign: public ostream{
public:
    _IO_ostream_withassign& operator= (ostream&);
    _IO_ostream_withassign& operator=(_IO_ostream_withassign& rhs){
        return oprator=(static_cast<ostream&> (rhs); 
    }
}

extern _IO_ostream_withassign cout;

moveable元素对于容器影响

moveable是C++11之后推出的功能

对vector的影响

对于插入300万个数据，vector会牵扯到多次扩容，所以在搬动数据的过程中，需要调用copy Constructer或move Constructer。
有图上可以知道，有move Constructer的结果会更快。

对list的影响

对于没有太多将数据搬移的list来说，有没有move Constructer的效率差别不大

定义一个moveable class

class MyString{
public:
    static size_t DCtor;  //累计调用默认ctor的次数
    static size_t Ctor;  //累计ctor的调用次数
    static size_t CCtor;  //累计copy-ctor的调用次数
    static size_t CAsgn;  //累计copy-asgn的调用次数
    static size_t MCtor;  //累计move-ctor的调用次数
    static size_t MAsgn;  //累计move-asgn的调用次数
    static size_t Dtor; //累计dtor的调用次数

private:
    char* _data;
    size_t _len;
    void _init_data(const char* s){
        _data  = new char[_len + 1];
        memcpy(_data, s, _len);
        _data[_len] = '0';
    }
public:
    //default ctor;
    MyString():_data(NULL), _len(0){ ++DCtor; }
    //ctor
    MyString(const char* p): _len(strlen(p)){
        ++Ctor;
        _init_data(p);
    }

    //copy ctor
    MyString(const  MyString& str): _len(str._len){
        ++CCtor;
        _init_data(str._data);
    }

    //move ctor, with noexcept
    MyString(MyString&& str) noexcept
        :_data(str._data), _len(str._len){
        ++MCtor;
        str._len = 0;
        str._data = NULL;  //避免delete（in dtor）
    }

    //cope assignment 
    MyString& operator= (const MyString& str){
        ++CAsgn;
        if(this != &str){
            if(_data) delete _data;
            _len = str._len;
            _init_data(str._data);
        }
        reutrn *this;
    }
    
    //move assignment
    MyString& operator=(MyString&& str) noexcept{
        ++MAsgn;
        if(this != &str){
            if(_data) delete _data;
            _len = str._len;
            _data = str._data;  //move
            str._len = 0;
            str._data = NULL;
        }
        return *this;
    }

    //dtor
    vitrual ~MyString(){
        ++Dtor;
        if(_data) delete _data;
    }

    bool operator< (const MyString& rhs) const 
    {
        return std::string(this->_data) < std::string(rhs._data);
    }

    bool operator==(const MyString& rhs) const 
    {
        return std::string(this->_data) == std::string(rhs._data);
    }
    char* get() const {return _data;}
};

size_t MyString::DCtor = 0; 
size_t MyString::Ctor = 0;
size_t MyString::CCtor = 0;
size_t MyString::CAsgn = 0;
size_t MyString::MCtor = 0; 
size_t MyString::MAsgn = 0;
size_t MyString::Dtor = 0; 

namespace std{
    template<>
    struct hash<MyString>{
        size_t operator()(const MyString& s)const noexcept{ return hash<string>()(string(s.get()));}
    }
}

(Boolan) C++ STL与泛型编程

一个万用的hash function

tuple

TypeTraits

cout

moveable元素对于容器影响

相关文章

网友评论

延伸阅读

深度阅读

栏目导航

热点阅读

C++

程序员