VPP路由可以看作是思科iOS CEF的翻版。将路由分为路由表和转发表。路由表的实现基于hash,转发表的实现基于8-8-8-8 mtrie树。路由表存储了所有可能的路由表项(其中最优路由将安装到转发表中),转发表存储了实际使用的路由项(数据包转发真正查找的是这个转发表。)
VPP转发表查找的最后结果将得到一个DPO(data path object),DPO将指示数据包的下一步动作(分类,ARP查找,丢弃等等,请查看DPO_TYPES枚举类型)。
以下皆以ipv4来分析,只分析了我看懂了的一部分。
路由表:
typedef struct ip4_fib_t
{
/* Hash table for each prefix length mapping. */
//根据掩码大小对路由项作hash
uword *fib_entry_by_dst_address[33];
/* Mtrie for fast lookups. Hash is used to maintain overlapping prefixes. */
//8-8-8-8 mtrie转发表
ip4_fib_mtrie_t mtrie;
/* Table ID (hash key) for this FIB. */
u32 table_id;
/* Index into FIB vector. */
u32 index;
/* flow hash configuration */
flow_hash_config_t flow_hash_config;
/* N-tuple classifier indices */
u32 fwd_classify_table_index;
u32 rev_classify_table_index;
} ip4_fib_t;
路由表项hash链入fib_entry_by_dst_address中。
转发表项安装在mtrie中,这是实际被数据平面使用的查找数据库。
路由表的任何变化,将导致转发表的更新。
路由表项:
typedef struct fib_entry_t_ {
/**
* Base class. The entry's node representation in the graph.
*/
//所有路由关键数据结构都已这个打头,可以看做是他们的基类。用来将这些不同数据结构连在一起,又能用统一的接口来遍历。
fib_node_t fe_node;
/**
* The prefix of the route. this is const just to be sure.
* It is the entry's key/identity and so should never change.
*/
//可以理解为路由目的地址+掩码
const fib_prefix_t fe_prefix;
/**
* The index of the FIB table this entry is in
*/
u32 fe_fib_index;
/**
* The load-balance used for forwarding.
*
* We don't share the EOS and non-EOS even in case when they could be
* because:
* - complexity & reliability v. memory
* determining the conditions where sharing is possible is non-trivial.
* - separate LBs means we can get the EOS bit right in the MPLS label DPO
* and so save a few clock cycles in the DP imposition node since we can
* paint the header straight on without the need to check the packet
* type to derive the EOS bit value.
*/
dpo_id_t fe_lb; // [FIB_FORW_CHAIN_MPLS_NUM];
/**
* Vector of source infos.
* Most entries will only have 1 source. So we optimise for memory usage,
* which is preferable since we have many entries.
*/
//同一个路由目的,可以是由不同的来源添加的,所有来源类型查看fib_source_t_枚举类型,数值越小的来源,优先级越高,最高优先级的来源的路由将安装到转发表,低优先级来源的路由项默默地保存在最高优先级来源后面。这个数组是有序的,添加删除都将导致排序,0号位置为最优路由。
fib_entry_src_t *fe_srcs;
/**
* the path-list for which this entry is a child. This is also the path-list
* that is contributing forwarding for this entry.
*/
fib_node_index_t fe_parent;
/**
* index of this entry in the parent's child list.
* This is set when this entry is added as a child, but can also
* be changed by the parent as it manages its list.
*/
u32 fe_sibling;
/**
* A vector of delegates.
*/
fib_entry_delegate_t *fe_delegates;
} fib_entry_t;
source:
表明该路由项的来源,每个source结构中包含了一个path_list,这个表包含了所有可能的下一跳。
typedef struct fib_entry_src_t_ {
/**
* A vector of path extensions
*/
struct fib_path_ext_t_ *fes_path_exts;
/**
* The path-list created by the source
*/
//通过它可以找到该source的path_list
fib_node_index_t fes_pl;
/**
* Which source this info block is for
*/
fib_source_t fes_src;
/**
* Flags on the source
*/
fib_entry_src_flag_t fes_flags;
/**
* 1 bytes ref count. This is not the number of users of the Entry
* (which is itself not large, due to path-list sharing), but the number
* of times a given source has been added. Which is even fewer
*/
u8 fes_ref_count;
/**
* Flags the source contributes to the entry
*/
fib_entry_flag_t fes_entry_flags;
/**
* Source specific info
*/
union {
struct {
/**
* the index of the FIB entry that is the covering entry
*/
fib_node_index_t fesr_cover;
/**
* This source's index in the cover's list
*/
u32 fesr_sibling;
} rr;
struct {
/**
* the index of the FIB entry that is the covering entry
*/
fib_node_index_t fesa_cover;
/**
* This source's index in the cover's list
*/
u32 fesa_sibling;
} adj;
struct {
/**
* the index of the FIB entry that is the covering entry
*/
fib_node_index_t fesi_cover;
/**
* This source's index in the cover's list
*/
u32 fesi_sibling;
} interface;
struct {
/**
* This MPLS local label associated with the prefix.
*/
mpls_label_t fesm_label;
/**
* the indicies of the LFIB entries created
*/
fib_node_index_t fesm_lfes[2];
} mpls;
struct {
/**
* The source FIB index.
*/
fib_node_index_t fesl_fib_index;
} lisp;
};
} fib_entry_src_t;
path和path_list:
为了到达目的路由的下一跳称作path,所有可能的下一跳组合成path_list。
typedef struct fib_path_list_t_ {
/**
* A path-list is a node in the FIB graph.
*/
fib_node_t fpl_node;
/**
* Flags on the path-list
*/
fib_path_list_flags_t fpl_flags;
/**
* The next-hop protocol for the paths in this path list.
* Note that fixing the proto here means we don't support a mix of
* v4 and v6 paths. ho hum.
*/
fib_protocol_t fpl_nh_proto;
/**
* Vector of paths indicies for all configured paths.
* For shareable path-lists this list MUST not change.
*/
//path数组,记录了所有下一跳
fib_node_index_t *fpl_paths;
/**
* the RPF list calculated for this path list
*/
fib_node_index_t fpl_urpf;
} fib_path_list_t;
关键函数
路由的添加:
从vnet_ip_route_cmd函数入手,这是通过CLI添加路由的入口函数。从中找到路由添加关键函数fib_table_entry_path_add2。
fib_node_index_t
fib_table_entry_path_add2 (u32 fib_index,
const fib_prefix_t *prefix,
fib_source_t source,
fib_entry_flag_t flags,
fib_route_path_t *rpath)
{
fib_node_index_t fib_entry_index;
fib_table_t *fib_table;
u32 ii;
//找到路由表
fib_table = fib_table_get(fib_index, prefix->fp_proto);
//精确匹配到路由表项,注意不是最长匹配。
fib_entry_index = fib_table_lookup_exact_match_i(fib_table, prefix);
for (ii = 0; ii < vec_len(rpath); ii++)
{
fib_table_route_path_fixup(prefix, &rpath[ii]);
}
if (FIB_NODE_INDEX_INVALID == fib_entry_index)
{
//创建路由表项,包括souces,path_list等,比较复杂,将详细分析。
fib_entry_index = fib_entry_create(fib_index, prefix,
source, flags,
rpath);
//路由表项链入hash表,很简单。
fib_table_entry_insert(fib_table, prefix, fib_entry_index);
fib_table->ft_src_route_counts[source]++;
}
else
{
int was_sourced;
was_sourced = fib_entry_is_sourced(fib_entry_index, source);
//添加一个下一跳到路由表项,可能会新建source,也可能只是在现有source的path_list中增加一个path
fib_entry_path_add(fib_entry_index, source, flags, rpath);;
if (was_sourced != fib_entry_is_sourced(fib_entry_index, source))
{
//如果这个source是从无到有,更新下统计值
fib_table->ft_src_route_counts[source]++;
}
}
return (fib_entry_index);
}
fib_node_index_t
fib_entry_create (u32 fib_index,
const fib_prefix_t *prefix,
fib_source_t source,
fib_entry_flag_t flags,
const fib_route_path_t *paths)
{
fib_node_index_t fib_entry_index;
fib_entry_t *fib_entry;
ASSERT(0 < vec_len(paths));
//简单的分配一个初始化了的路由表项,很简单。
fib_entry = fib_entry_alloc(fib_index, prefix, &fib_entry_index);
/*
* since this is a new entry create, we don't need to check for winning
* sources - there is only one.
*/
//不同的source有不同的fesv_add函数。这里就是为了调用它。
fib_entry = fib_entry_src_action_add(fib_entry, source, flags,
drop_dpo_get(
fib_proto_to_dpo(
fib_entry_get_proto(fib_entry))));
//不同的source有不同的fesv_path_swap函数。这里就是为了调用它。
fib_entry_src_action_path_swap(fib_entry,
source,
flags,
paths);
/*
* handle possible realloc's by refetching the pointer
*/
fib_entry = fib_entry_get(fib_entry_index);
//不同的source有不同的fesv_activate函数。这里就是为了调用它。如果该source没有fesv_activate函数,则调用fib_entry_src_action_install
fib_entry_src_action_activate(fib_entry, source);
//调用source的fesv_installed函数和fesv_fwd_update函数
fib_entry_post_install_actions(fib_entry, source, FIB_ENTRY_FLAG_NONE);
return (fib_entry_index);
}