pktgen--内核态发包工具

pktgen是一个位于linux内核层的高性能网络测试工具。
主要用来测试网络驱动与网卡设备,支持多线程,能够产生随机mac地址、IP地址、UDP端口号的数据包。
pktgen的配置与统计信息查看都使用/proc文件系统完成,/proc文件系统是一种特殊的,有软件创建的文件系统,内核使用/proc文件系统向外界导出信息,外界也可以通过它配置内核态的一些进程的参数,如ps top uptime等linux发行版中的很多工具就是通过/proc实现的。在大多情况下,我们只用/proc读出数据(用于调试内核驱动等),而在pktgen中配置命令就用到了/proc的写入数据功能。

pktgen 相关文件

  • /proc/net/pktgen/pgctrl
    控制pktgen的启动和停止,默认输出当前版信息

    1
    2
    # cat /proc/net/pktgen/pgctrl 
    pktgen 2.72: Packet Generator for packet performance testing.
  • /proc/net/pktgen/kpktgend_X
    pktgen 线程配置文件,个数与CPU核心数一致

  • /proc/net/pktgen/ethX
    pktgen 关联的网卡配置文件
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    # cat /proc/net/pktgen/bond1.32\@0
    Params: count 100000000 min_pkt_size: 64 max_pkt_size: 64
    frags: 0 delay: 0 clone_skb: 0 ifname: bond1.32@0
    flows: 0 flowlen: 0
    queue_map_min: 1 queue_map_max: 1
    dst_min: 32.0.22.1 dst_max:
    src_min: 202.0.0.1 src_max: 202.0.0.32
    src_mac: 90:e2:ba:4d:01:d9 dst_mac: 90:e2:ba:4c:06:51
    udp_src_min: 9 udp_src_max: 9 udp_dst_min: 9 udp_dst_max: 9
    src_mac_count: 0 dst_mac_count: 0
    Flags: IPSRC_RND QUEUE_MAP_CPU
    Current:
    pkts-sofar: 44913913 errors: 0
    started: 414539499019us stopped: 414784402020us idle: 25946us
    seq_num: 44913914 cur_dst_mac_offset: 0 cur_src_mac_offset: 0
    cur_saddr: 0xd0000ca cur_daddr: 0x1160020
    cur_udp_dst: 9 cur_udp_src: 9
    cur_queue_map: 1
    flows: 0

pktgen 基本使用

  1. 加载模块

    1
    modprobe pktgen
  2. 为了方便配置,定义下面的工具函数

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    pgset() {
    local result

    echo $1 > $PGDEV

    result=`cat $PGDEV | fgrep "Result: OK:"`
    if [ "$result" = "" ]; then
    cat $PGDEV | fgrep Result:
    fi
    }

    pg() {
    echo inject > $PGDEV
    cat $PGDEV
    }
  3. 绑定pktgen的0号线程到eth1网卡,添加少量配置并启动发包

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    PGDEV=/proc/net/pktgen/kpktgend_0
    pgset "rem_device_all"
    pgset "add_device eth1"

    PGDEV=/proc/net/pktgen/eth1
    pgset "flag IPSRC_RND"
    pgset "src_min 202.0.0.1"
    pgset "src_max 202.0.0.32"

    PGDEV=/proc/net/pktgen/pgctrl
    pgset "start"

性能优化

  • 为了最大化pktgen的发包性能,增大网卡的TX ring buffer

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    # ethtool -G eth1 tx 4096
    # ethtool -g eth1
    Ring parameters for eth1:
    Pre-set maximums:
    RX: 4096
    RX Mini: 0
    RX Jumbo: 0
    TX: 4096
    Current hardware settings:
    RX: 4096
    RX Mini: 0
    RX Jumbo: 0
    TX: 4096
  • 网卡中断亲和性
    通过一对一绑定网卡中断到cpu可以提高cpu缓存的利用率,并利用到网卡的多队列特性(一个网卡队列对应一个中断号)

  1. 绑定原理

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    # cat /proc/interrupts |grep eth1|awk '{print $1}'  # 获取中断号
    138:
    139:
    140:
    141:
    142:
    143:
    144:
    145:
    146:
    # echo 0x0 > /proc/irq/138/smp_affinity # 绑定0号cpu到irq 138
    # echo 0x2 > /proc/irq/139/smp_affinity # 绑定1号cpu到irq 139
    计算公式:单cpu的 mask 值 = 2 ** cpu号 的16进制值
  2. 绑定脚本 eth2cpu.sh

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    usage()
    {
    echo "Usage: $1 [-f file.conf ] [-h]"
    exit
    }


    cpu_mask() {
    local id=$1
    echo "obase=16;`echo 2^$id|bc`" |bc
    }

    set_eth_cpu()
    {
    conf=$1
    echo $conf set_cpu

    cnt=$(wc -l $conf|awk '{print $1;}')
    for ((i=1;i<=$cnt;i++))
    do
    eth=$(awk -F'=' -v n=$i '{ if ((index($0,"e") || index($0, "p") )&&NR==n) print $1;}' $conf)
    v=$(awk -F'=' -v n=$i '{ if ((index($0,"e") || index($0, "p") )&&NR==n) print strtonum($2);}' $conf)
    if [ $v ]
    then
    interruptnum=$(cat /proc/interrupts | grep $eth | awk -v e=$eth '{if ($NF==e) print $0;}'| awk -F':' '{print strtonum($1);}')
    echo $eth"="$v":"$interruptnum

    targetfile=/proc/irq/$interruptnum/smp_affinity

    mask=$(cpu_mask $v)
    [ -e $targetfile ] && echo $mask > $targetfile
    listfile=/proc/irq/$interruptnum/smp_affinity_list
    printf "$listfile: %d\n" `cat $listfile`
    fi
    done
    }

    while getopts "f:h" flag
    do
    case $flag in
    f)conf=$OPTARG
    ;;
    h)
    usage
    ;;
    *)
    usage
    ;;
    esac
    done

    if [ -s $conf ]
    then
    set_eth_cpu $conf
    fi

    echo "End"

    exit
  3. 绑定配置文件 eth2cpu.conf

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    eth1-TxRx-0=0
    eth1-TxRx-1=1
    eth1-TxRx-2=2
    eth1-TxRx-3=3
    eth1-TxRx-4=4
    eth1-TxRx-5=5
    eth1-TxRx-6=6
    eth1-TxRx-7=7
    eth1-TxRx-8=8
    eth1-TxRx-9=9
    eth1-TxRx-10=10
    eth1-TxRx-11=11
    eth1-TxRx-12=12
    eth1-TxRx-13=13
    eth1-TxRx-14=14
    eth1-TxRx-15=15
  4. 使用方法

    1
    # ./eth2cpu.sh -f eth2cpu.conf

pktgen 发包脚本

一个支持多线程发包的脚本

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#! /bin/sh

modprobe pktgen

pgset() {
local result

echo $1 > $PGDEV

result=`cat $PGDEV | fgrep "Result: OK:"`
if [ "$result" = "" ]; then
cat $PGDEV | fgrep Result:
fi
}

pg() {
echo inject > $PGDEV
cat $PGDEV
}

for cpu in {1..15}
do
PGDEV=/proc/net/pktgen/kpktgend_${cpu}
echo "Removing all devices"
pgset "rem_device_all"
echo "Adding bond1.32@${cpu}"
pgset "add_device bond1.32@${cpu}"
#echo "Setting max_before_softirq 10000"
#pgset "max_before_softirq 10000"
done

CLONE_SKB="clone_skb 0"
PKT_SIZE="pkt_size 64"
#COUNT="count 0"
COUNT="count 100000000"
DELAY="delay 0"

# 开启15个线程
for cpu in {1..15}
do
PGDEV="/proc/net/pktgen/bond1.32@${cpu}" # 注意每个线程要有不同的标识(`@`前缀)
echo "Configuring $PGDEV"
pgset "delay 0"
pgset "queue_map_min $cpu"
pgset "queue_map_max $cpu"
pgset "flag IPSRC_RND"
pgset "flag QUEUE_MAP_CPU"
pgset "src_min 202.0.0.1"
pgset "src_max 202.0.0.32"

pgset "$COUNT"
pgset "$CLONE_SKB"
pgset "$PKT_SIZE"
pgset "$DELAY"
pgset "dst 32.0.22.1"
pgset "dst_mac 90:e2:ba:4c:06:51"

done

PGDEV=/proc/net/pktgen/pgctrl

echo "Running… ctrl^C to stop"
pgset "start"
echo "Done"

pktgen 配置参考

pgset "clone_skb 1"     sets the number of copies of the same packet
pgset "clone_skb 0"     use single SKB for all transmits
pgset "burst 8"         uses xmit_more API to queue 8 copies of the same
                     packet and update HW tx queue tail pointer once.
                     "burst 1" is the default
pgset "pkt_size 9014"   sets packet size to 9014
pgset "frags 5"         packet will consist of 5 fragments
pgset "count 200000"    sets number of packets to send, set to zero
                     for continuous sends until explicitly stopped.

pgset "delay 5000"      adds delay to hard_start_xmit(). nanoseconds

pgset "dst 10.0.0.1"    sets IP destination address
                     (BEWARE! This generator is very aggressive!)

pgset "dst_min 10.0.0.1"            Same as dst
pgset "dst_max 10.0.0.254"          Set the maximum destination IP.
pgset "src_min 10.0.0.1"            Set the minimum (or only) source IP.
pgset "src_max 10.0.0.254"          Set the maximum source IP.
pgset "dst6 fec0::1"     IPV6 destination address
pgset "src6 fec0::2"     IPV6 source address
pgset "dstmac 00:00:00:00:00:00"    sets MAC destination address
pgset "srcmac 00:00:00:00:00:00"    sets MAC source address

pgset "queue_map_min 0" Sets the min value of tx queue interval
pgset "queue_map_max 7" Sets the max value of tx queue interval, for multiqueue devices
                     To select queue 1 of a given device,
                     use queue_map_min=1 and queue_map_max=1

pgset "src_mac_count 1" Sets the number of MACs we'll range through.
                     The 'minimum' MAC is what you set with srcmac.

pgset "dst_mac_count 1" Sets the number of MACs we'll range through.
                     The 'minimum' MAC is what you set with dstmac.

pgset "flag [name]"     Set a flag to determine behaviour.  Current flags
                     are: IPSRC_RND # IP source is random (between min/max)
                          IPDST_RND # IP destination is random
                          UDPSRC_RND, UDPDST_RND,
                          MACSRC_RND, MACDST_RND
                          TXSIZE_RND, IPV6,
                          MPLS_RND, VID_RND, SVID_RND
                          FLOW_SEQ,
                          QUEUE_MAP_RND # queue map random
                          QUEUE_MAP_CPU # queue map mirrors smp_processor_id()
                          UDPCSUM,
                          IPSEC # IPsec encapsulation (needs CONFIG_XFRM)
                          NODE_ALLOC # node specific memory allocation
                          NO_TIMESTAMP # disable timestamping

pgset spi SPI_VALUE     Set specific SA used to transform packet.

pgset "udp_src_min 9"   set UDP source port min, If < udp_src_max, then
                     cycle through the port range.

pgset "udp_src_max 9"   set UDP source port max.
pgset "udp_dst_min 9"   set UDP destination port min, If < udp_dst_max, then
                     cycle through the port range.
pgset "udp_dst_max 9"   set UDP destination port max.

pgset "mpls 0001000a,0002000a,0000000a" set MPLS labels (in this example
                                     outer label=16,middle label=32,
                 inner label=0 (IPv4 NULL)) Note that
                 there must be no spaces between the
                 arguments. Leading zeros are required.
                 Do not set the bottom of stack bit,
                 that's done automatically. If you do
                 set the bottom of stack bit, that
                 indicates that you want to randomly
                 generate that address and the flag
                 MPLS_RND will be turned on. You
                 can have any mix of random and fixed
                 labels in the label stack.

pgset "mpls 0"          turn off mpls (or any invalid argument works too!)

pgset "vlan_id 77"       set VLAN ID 0-4095
pgset "vlan_p 3"         set priority bit 0-7 (default 0)
pgset "vlan_cfi 0"       set canonical format identifier 0-1 (default 0)

pgset "svlan_id 22"      set SVLAN ID 0-4095
pgset "svlan_p 3"        set priority bit 0-7 (default 0)
pgset "svlan_cfi 0"      set canonical format identifier 0-1 (default 0)

pgset "vlan_id 9999"     > 4095 remove vlan and svlan tags
pgset "svlan 9999"       > 4095 remove svlan tag


pgset "tos XX"           set former IPv4 TOS field (e.g. "tos 28" for AF11 no ECN, default 00)
pgset "traffic_class XX" set former IPv6 TRAFFIC CLASS (e.g. "traffic_class B8" for EF no ECN, default 00)

pgset stop                  aborts injection. Also, ^C aborts generator.

pgset "rate 300M"        set rate to 300 Mb/s
pgset "ratep 1000000"    set rate to 1Mpps

pgset "xmit_mode netif_receive"  RX inject into stack netif_receive_skb()
              Works with "burst" but not with "clone_skb".
              Default xmit_mode is "start_xmit".

参考文档

  1. HOWTO for the linux packet generator
  2. Pktgen Getting Started