#!/usr/bin/perl
# * Copyright (c) 2009-2011 Mellanox Technologies Ltd. All rights reserved.
# * Copyright (c) 2009-2011 System Fabric Works, Inc. All rights reserved.
# *
# * This software is available to you under a choice of one of two
# * licenses. You may choose to be licensed under the terms of the GNU
# * General Public License (GPL) Version 2, available from the file
# * COPYING in the main directory of this source tree, or the
# * OpenIB.org BSD license below:
# *
# * Redistribution and use in source and binary forms, with or
# * without modification, are permitted provided that the following
# * conditions are met:
# *
# * - Redistributions of source code must retain the above
# * copyright notice, this list of conditions and the following
# * disclaimer.
# *
# * - Redistributions in binary form must reproduce the above
# * copyright notice, this list of conditions and the following
# * disclaimer in the documentation and/or other materials
# * provided with the distribution.
# *
# * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
# * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
# * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# * SOFTWARE.
#
use warnings;
use strict;
use File::Basename;
use File::Path qw(make_path);
use Getopt::Long;
my $help = 0;
my $no_persist = 0;
my $debug = 0;
my $force = 0;
my $linkonly = 0;
my $parms = "/sys/module/rdma_rxe/parameters";
my $modprobe_opt = "";
my $modprobe_checked = "0";
my $persistence_path = "/var/lib/rxe";
my $persistence_file = "${persistence_path}/rxe";
my $num_persistent = 0;
my $sys = "/sys/module/rdma_rxe/parameters";
my %rxe_names;
my @rxe_array;
my %eth_names;
my @eth_list;
my %eth_driver;
my %link_state;
my %link_speed;
my %eth_mtu;
my %ipv4_addr;
my %rxe_mtu;
my @persistence_array;
my %persistence_hash;
my @mlx4_port;
my @mlx4_ether;
my @roce_list;
# Read a file and return its contents as a string.
sub read_file {
my $filename = shift;
my $result = "";
if (open(FILE, $filename)) {
$result = <FILE>;
close FILE;
}
return $result;
}
#get mapping between rxe and eth devices
sub get_names {
my $i = 0;
foreach my $rxe (glob("/sys/class/infiniband/rxe*")) {
$rxe = basename($rxe);
my $eth = read_file("/sys/class/infiniband/$rxe/parent");
chomp($eth);
if (($eth =~ /[\w]+[\d]/)
&& ($rxe =~ /rxe[0123456789]/)) {
# hash ethername to rxename
$rxe_names{$eth} = $rxe;
$rxe_array[$i++] = $rxe;
# hash rxename to ethername
$eth_names{$rxe} = $eth;
}
}
}
# get list of Mellanox RoCE ports
sub get_mlx4_list {
my $i = 0;
foreach my $mlx4 (glob("/sys/class/infiniband/mlx4_*")) {
$mlx4 = basename($mlx4);
foreach my $port (glob("/sys/class/infiniband/$mlx4/ports/*")) {
$port = basename($port);
my $link = read_file("$port/link_layer");
chomp($link);
if ($link =~ "Ethernet") {
$roce_list[$i++] = "$mlx4:$port";
}
}
}
}
#collect per device information
sub get_dev_info {
my @list;
my @fields;
my @lines;
my $line;
my $eth;
my $drv;
my $np;
my $i = 0;
my $j = 0;
get_mlx4_list();
my @my_eth_list = ();
foreach my $my_eth_dev (glob("/sys/class/net/*")) {
$my_eth_dev = basename($my_eth_dev);
if ($my_eth_dev ne "bonding_masters"){
my $my_dev_type = read_file("/sys/class/net/${my_eth_dev}/type");
chomp($my_dev_type);
if ($my_dev_type == "1") {
push(@my_eth_list, "$my_eth_dev");
}
}
}
@list = @my_eth_list;
foreach $eth (@list) {
chomp($eth);
$eth_list[$i++] = $eth;
@lines = `ethtool -i $eth`;
foreach $line (@lines) {
chomp($line);
@fields = split(/\s+/, $line);
chomp($fields[0]);
if ($fields[0] =~ /driver:/) {
$drv = $fields[1];
$eth_driver{$eth} = $drv;
if ($drv =~ /mlx4_en/ && scalar(@roce_list) > 0 ) {
$eth_names{$roce_list[$j++]} = $eth;
}
}
}
# get link status
$link_state{$eth} = "";
$link_speed{$eth} = "";
@lines = `ethtool $eth`;
foreach $line (@lines) {
chomp($line);
@fields = split(/:/, $line);
if (defined($fields[1])) {
$fields[1] =~ s/^\s+//g;
if ($fields[0] =~ "Link detected") {
$link_state{$eth} = $fields[1];
}
}
elsif ($line =~ "10000baseT") {
$link_speed{$eth} = "10GigE";
}
}
$ipv4_addr{$eth} = " ";
$eth_mtu{$eth} = "";
@lines = `ip addr show $eth`;
foreach $line (@lines) {
# get IP address
if ($line =~ /inet /) {
$line =~ s/^\s+inet ([0-9.]+)\//$1 /g;
@fields = split(/\s+/, $line);
$ipv4_addr{$eth} = $fields[0];
}
# get ethernet mtu
if ($line =~ /mtu /) {
$line =~ s/^.*mtu //g;
@fields = split(/\s+/, $line);
$eth_mtu{$eth} = $fields[0];
}
}
}
# get rxe mtu
foreach my $rxe (@rxe_array) {
@lines = `ibv_devinfo -d $rxe`;
foreach $line (@lines) {
if ($line =~ "active_mtu") {
$line =~ s/^\s+active_mtu:\s+//g;
chomp($line);
$rxe_mtu{$rxe} = $line;
}
}
$rxe_mtu{$rxe} = "(?)" if (!$rxe_mtu{$rxe});
}
}
# return string or the string "###" if string is all whitespace
sub set_field {
my $fld = $_[0];
if (defined($fld) && $fld =~ /\S/) {
return $fld;
} else {
return "###";
}
}
# format status output into fixed width columns
sub status_print {
my @fields;
my $field;
my @flen = ();
my $num_fields = 0;
my $i;
my $pad;
my $line;
# one pass to size the columns
foreach $line (@_) {
@fields = split(/\s+/, $line);
$i = 0;
foreach $field (@fields) {
if (!defined($flen[$i])) {
$flen[$i] = length($field);
}
else {
$flen[$i] = max($flen[$i], length($field));
}
$i++;
}
if ($i > $num_fields) {
$num_fields = $i;
}
}
# one pass to print
foreach $line (@_) {
print " ";
@fields = split(/\s+/, $line);
for ($i = 0; $i < $num_fields; $i++) {
if (defined($fields[$i])) {
$pad = $flen[$i] - length($fields[$i]) + 2;
}
else {
$pad = $flen[$i] + 2;
}
if (defined($fields[$i]) && ($fields[$i] ne "###")) {
print "$fields[$i]";
}
else {
print " ";
}
printf("%*s", $pad, "");
}
print "\n";
}
}
# check driver load status
sub check_module_status {
if (-e $sys) {
return 0;
} else {
return 1;
}
}
# print driver load status and ethertype for rdma_rxe and rdma_rxe_net
sub show_module_status {
print "rdma_rxe module not loaded\n" if (!(-e $sys));
}
# print rxe status
sub do_status {
my $instance = $_[0];
my $ln = 0;
my @outp;
my $rxe;
my $rmtu;
get_names();
get_dev_info();
show_module_status();
$outp[$ln++] = "Name\tLink\tDriver\t\tSpeed\tNMTU\tIPv4_addr\tRDEV\tRMTU";
foreach my $eth (@eth_list) {
# handle case where rxe_drivers are not loaded
if (defined($rxe_names{$eth})) {
$rxe = $rxe_names{$eth};
$rmtu = $rxe_mtu{$rxe};
}
else {
$rxe = "";
$rmtu = "";
}
if ((!defined($instance)
&& (($linkonly == 0) || ($link_state{$eth} =~ "yes")))
|| (defined($instance) && ($rxe =~ "$instance"))) {
$outp[$ln] = set_field("$eth");
$outp[$ln] .= "\t";
$outp[$ln] .= set_field("$link_state{$eth}");
$outp[$ln] .= "\t";
$outp[$ln] .= set_field(exists($eth_driver{$eth}) ? $eth_driver{$eth} : "");
$outp[$ln] .= "\t";
$outp[$ln] .= set_field("$link_speed{$eth}");
$outp[$ln] .= "\t";
$outp[$ln] .= set_field("$eth_mtu{$eth}");
$outp[$ln] .= "\t";
$outp[$ln] .= set_field("$ipv4_addr{$eth}");
$outp[$ln] .= "\t";
$outp[$ln] .= set_field("$rxe");
$outp[$ln] .= "\t";
$outp[$ln] .= set_field("$rmtu");
$ln++;
}
}
status_print(@outp);
}
# read file containing list of ethernet devices into a list
sub populate_persistence {
my $i = 0;
open FILE, $persistence_file;
while(<FILE>) {
my $line = $_;
chomp($line);
$line =~ s/^\s+//g;
if ($line =~ /[\w]+[\d]/) {
# in case we add fields later
my ($eth, $cruft) = split(/\s+/, $line, 2);
if ($eth =~ /^[\w]+[\d]/) {
$persistence_array[$i] = $eth;
$persistence_hash{$eth} = $i++;
}
}
}
close FILE;
$num_persistent = $i;
}
# print out list of ethernet devices to file
sub commit_persistent {
my $i;
my $eth;
open(PF, ">$persistence_file");
for ($i = 0; $i < $num_persistent; $i++) {
$eth = $persistence_array[$i];
if ($eth =~ /[\w]+[\d]/) {
print(PF "$persistence_array[$i]\n");
}
}
close(PF);
}
sub delete_persistent {
my $eth = $_[0];
if (defined($persistence_hash{$eth})) {
$persistence_array[$persistence_hash{$eth}] = "";
}
}
sub add_persistent {
my $eth = $_[0];
# Is this one already in the persistence list?
if (!defined($persistence_hash{$eth})) {
$persistence_array[$num_persistent] = $eth;
$persistence_hash{$eth} = $num_persistent;
$num_persistent++;
}
}
# add new rxe device to eth if not already up
sub rxe_add {
my $eth = $_[0];
if (!($eth =~ /[\w]+[\d]/)) {
print "eth_name ($eth) looks bogus\n";
return;
}
if (!defined($rxe_names{$eth})) {
system("echo '$eth' > $parms/add");
}
if (!$no_persist) {
add_persistent($eth);
commit_persistent();
}
}
sub rxe_remove {
my $arg2 = $_[0];
my $rxe;
my $eth;
print "remove $arg2\n" if ($debug > 0);
if ($arg2 =~ /[\w]+[\d]/) {
$eth = $arg2;
$rxe = $rxe_names{$eth};
}
elsif ($arg2 =~ /rxe[0123456789]/) {
$rxe = $arg2;
$eth = $eth_names{$rxe};
}
elsif ($arg2 eq "all") {
$rxe = "all";
}
if (($rxe eq "all") || ($rxe =~ /^rxe[0123456789]/)) {
my $cmd = "echo '$rxe' > $parms/remove";
#print "$cmd\n";
system($cmd);
if (!$no_persist) {
if ($rxe eq "all") {
unlink($persistence_file);
}
elsif ($eth =~/[\w]+[\d]/) {
delete_persistent($eth);
commit_persistent();
}
else {
print "Warning: Unable to resolve ethname; "
. "instance may persist on restart\n";
}
}
}
else {
print "rxe instance $rxe not found\n";
}
}
sub get_devinfo {
my $rxe = $_[0];
my $cmd = "ibv_devinfo -d $rxe";
return `$cmd`;
}
# allow unsupported modules to load in SLES11 if allowed
sub modprobe {
my $module = $_[0];
my $opts = $_[1];
my @lines;
my $line;
if ($modprobe_checked == "0") {
@lines = `modprobe -c`;
foreach $line (@lines) {
if ($line =~ /^allow_unsupported_modules *0/) {
$modprobe_opt = " --allow-unsupported-modules ";
last;
}
}
$modprobe_checked = "1";
}
if (!defined($opts)) {
$opts = "";
}
system("modprobe $modprobe_opt $module $opts");
}
# bring up rxe
sub do_start {
my $proto_str = "";
system("mkdir -p $persistence_path");
system("touch $persistence_file");
modprobe("ib_core");
modprobe("ib_uverbs");
modprobe("rdma_ucm");
modprobe("rdma_rxe");
populate_persistence();
system("udevadm control --reload");
foreach my $eth (@persistence_array) {
rxe_add($eth);
}
get_names();
foreach my $rxe (@rxe_array) {
my $stat = get_devinfo($rxe);
if ($stat =~ "PORT_DOWN") {
my $cmd = "ip link set $eth_names{$rxe} up";
system($cmd);
}
}
}
# check if argument is an integer
sub is_integer {
defined $_[0] && $_[0] =~ /^[+-]?\d+$/;
}
# remove all rxe devices and unload drivers
sub do_stop {
my $rxe;
foreach $rxe (@rxe_array) {
system("echo '$rxe' > $sys/remove");
}
if (-e $sys) {
system("rmmod rdma_rxe");
}
if (-e $sys) {
print "unable to unload drivers, reboot required\n";
}
}
sub do_debug {
my $arg2 = $_[0];
my $debugfile = "$parms/debug";
chomp($arg2);
if (!(-e "$debugfile")) {
print "Error: debug is compiled out of this rxe driver\n";
return;
}
if ($arg2 eq "on") { system("echo '31' > $debugfile"); }
elsif ($arg2 eq "off") { system("echo '0' > $debugfile"); }
elsif ($arg2 eq "0") { system("echo '0' > $debugfile"); }
elsif ($arg2 eq "") { }
elsif ($arg2 ge "0" && $arg2 le "31") {
system("echo '$arg2' > $debugfile");
}
else {
print "unrecognized debug cmd ($arg2)\n";
}
my $current = read_file($debugfile);
chomp($current);
if ($current > 0) {
print "Debug is ON ($current)\n";
}
elsif ($current == 0) {
print "Debug is OFF\n";
}
else {
print "Unrecognized debug value\n";
}
}
sub max {
my $a = $_[0];
my $b = $_[1];
return $a if ($a > $b);
return $b;
}
# show usage for rxe_cfg
sub usage {
print " Usage:\n";
print " rxe_cfg [options] start|stop|status|persistent\n";
print " rxe_cfg debug on|off|<num>\n";
print " rxe_cfg [-n] add <ndev>\n";
print " rxe_cfg [-n] remove <ndev>|<rdev>\n";
print "\n";
print " <ndev> = network device e.g. eth3\n";
print " <rdev> = rdma device e.g. rxe1\n";
print "\n";
print " Options:\n";
print " -h: print this usage information\n";
print " -n: do not make the configuration action persistent\n";
print " -v: print additional debug output\n";
print " -l: show status for interfaces with link up\n";
print " -p <num>: (start command only) - set ethertype\n";
}
sub main {
GetOptions(
"-h" => \$help,
"--help" => \$help,
"-n" => \$no_persist,
"-v:+" => \$debug,
"-f" => \$force,
"-l" => \$linkonly,
);
my $arg1 = $ARGV[0];
my $arg2 = $ARGV[1];
my $arg3 = $ARGV[2];
# status is the default
if (!defined($arg1) || ($arg1 =~ /status/)) {
do_status($arg2);
exit;
}
if ($help) {
usage();
exit;
}
# stuff that does not require modules to be loaded
if ($arg1 eq "help") { usage(); exit; }
elsif ($arg1 eq "start") { do_start(); do_status(); exit; }
elsif ($arg1 eq "persistent") { system("cat $persistence_file"); exit; }
# can't do much else, bail if modules aren't loaded
if (check_module_status()) {
exit;
}
# create persistence file if necessary
make_path($persistence_path);
if (!(-e $persistence_file)) {
`touch $persistence_file`;
}
# Get full context of the configuration
populate_persistence();
get_names();
get_dev_info();
# Stuff that requires the rdma_rxe module to be loaded
if ($arg1 eq "stop") { do_stop(); exit; }
elsif ($arg1 eq "debug") { do_debug($arg2); exit; }
elsif ($arg1 eq "add") { rxe_add($arg2); exit; }
elsif ($arg1 eq "remove") { rxe_remove($arg2); exit; }
elsif ($arg1 eq "help") { usage(); exit; }
}
main();
exit;