#! /usr/bin/env perl
#
# (C) 2014 by Argonne National Laboratory.
#     See COPYRIGHT in top-level directory.
#

use warnings;
use strict;
use Data::Dumper;

# For a choice buffer arugment, usually it has two companion arguments (count, datatype) after it.
# But this is not always true. So we have this table to store the info. An entry of the table is in
# form of "func => pos". pos is a flattened array of triplet <buffer_idx, count_idx, datatype_idx>.
# buffer_idx means the buffer_idx-th argument of func is a choice buffer;
# count_idx means the count_idx-th arugment of func is the count argument for this choice buffer;
# datatype_idx means the datatype_idx-th arugment of func is the datatype argument for this choice
# buffer; Note these indices are 0-based. It is easy to see pos's size is a multiple of 3.
#
# For some choice buffers, they don't have the companion (count, datatype). We put special values
# to their count_idx and datatype_idx. There are three cases here.
#
# 1. The buffer must be simply contiguous, such as that in MPI_Buffer_attach.
#    count_idx, datatype_idx are set to -1.
# 2. The buffer share the same (count, datatype) with another buffer, such as sendbuf in MPI_Reduce.
#    Strictly speaking, we need to check at runtime the two buffers have the same shape and stride.
#    Currently, we just set count_idx and datatype_idx of the first choice buffer to -2.
# 3. For some functions, such as MPI_Alltoallw, though MPI Standard doesn't say a choice buffer
#    arg must be simply contiguous, we don't support non-contiguous buffers there. Because generally
#    it is hard to implement it efficiently, and I don't see values of that. Please remember that
#    subarrays are meant to provide the programmer a convenient language construct with reasonable
#    overhead, not something that can hide the huge complexity of datatype creation but incurs a good
#    deal of overhead. If this complexity is needed, it shold be done by the programmer explicitly
#    but not implicitly done by the runtime. So we treat case 3 as case 1. We can checks to
#    enforce "simply contiguous".

my %bufpos = (
    "MPI_Accumulate" => [0,  1,  2],
    "MPI_Allgather" => [0,  1,  2, 3,  4,  5],
    "MPI_Allgatherv" => [0,  1,  2, 3, -1, -1],
    "MPI_Allreduce" => [0, -1, -1, 1,  2,  3],
    "MPI_Alltoall" => [0,  1,  2, 3,  4,  5],
    "MPI_Alltoallv" => [0, -1, -1, 4, -1, -1],
    "MPI_Alltoallw" => [0, -1, -1, 4, -1, -1],
    "MPI_Bcast" => [0, 1, 2],
    "MPI_Bsend" => [0, 1, 2],
    "MPI_Bsend_init" => [0, 1, 2],
    "MPI_Buffer_attach" => [0, -1, -1],
    "MPI_Compare_and_swap" =>[0, -1, -1, 1, -1, -1, 2, -1, -1],
    "MPI_Exscan" => [0, -1, -1, 1, 2, 3],
    "MPI_Fetch_and_op" => [0, -1, -1, 1, -1, -1],
    "MPI_File_iread_at" => [2, 3, 4],
    "MPI_File_iread" => [1, 2, 3],
    "MPI_File_iread_shared" => [1, 2, 3],
    "MPI_File_iwrite_at" => [2, 3, 4],
    "MPI_File_iwrite" => [1, 2, 3],
    "MPI_File_iwrite_shared" => [1, 2, 3],
    "MPI_File_read_all_begin" => [1, 2, 3],
    "MPI_File_read_all" => [1, 2, 3],
    "MPI_File_read_all_end" => [1, -1, -1],
    "MPI_File_read_at_all_begin" => [2, 3, 4],
    "MPI_File_read_at_all" => [2, 3, 4],
    "MPI_File_read_at_all_end" => [1, -1, -1],
    "MPI_File_read_at" => [2, 3, 4],
    "MPI_File_read" => [1, 2, 3],
    "MPI_File_read_ordered_begin" => [1, 2, 3],
    "MPI_File_read_ordered" => [1, 2, 3],
    "MPI_File_read_ordered_end" => [1, -1, -1],
    "MPI_File_read_shared" => [1, 2, 3],
    "MPI_File_write_all_begin" => [1, 2, 3],
    "MPI_File_write_all" => [1, 2, 3],
    "MPI_File_write_all_end" => [1, -1, -1],
    "MPI_File_write_at_all_begin" => [2, 3, 4],
    "MPI_File_write_at_all" => [2, 3, 4],
    "MPI_File_write_at_all_end" => [1, -1, -1],
    "MPI_File_write_at" => [2, 3, 4],
    "MPI_File_write" => [1, 2, 3],
    "MPI_File_write_ordered_begin" => [1, 2, 3],
    "MPI_File_write_ordered" => [1, 2, 3],
    "MPI_File_write_ordered_end" => [1, -1, -1],
    "MPI_File_write_shared" => [1, 2, 3],
    "MPI_Free_mem" => [0, -1, -1],
    "MPI_Gather" => [0, 1, 2, 3, 4, 5],
    "MPI_Gatherv" => [0, 1, 2, 3, -1, -1],
    "MPI_Get_accumulate" => [0, 1, 2, 3, -1, -1],
    "MPI_Get_address" => [0, -1, -1],
    "MPI_Get" => [0, 1, 2],
    "MPI_Iallgather" => [0, 1, 2, 3, 4, 5],
    "MPI_Iallgatherv" => [0, 1, 2, 3, -1, -1],
    "MPI_Iallreduce" => [0, -2, -2, 1, 2, 3],
    "MPI_Ialltoall" => [0, 1, 2, 3, 4, 5],
    "MPI_Ialltoallv" => [0, -1, -1, 4, -1, -1],
    "MPI_Ialltoallw" => [0, -1, -1, 4, -1, -1],
    "MPI_Ibcast" => [0, 1, 2],
    "MPI_Ibsend" => [0, 1, 2],
    "MPI_Iexscan" => [0, -2, -2, 1, 2, 3],
    "MPI_Igather" => [0, 1, 2, 3, 4, 5],
    "MPI_Igatherv" => [0, 1, 2, 3, -1, -1],
    "MPI_Imrecv" => [0, 1, 2],
    "MPI_Ineighbor_allgather" => [0, 1, 2, 3, 4, 5],
    "MPI_Ineighbor_allgatherv" => [0, 1, 2, 3, -1, -1],
    "MPI_Ineighbor_alltoall" => [0, 1, 2, 3, 4, 5],
    "MPI_Ineighbor_alltoallv" => [0, -1, -1, 4, -1, -1],
    "MPI_Ineighbor_alltoallw" => [0, -1, -1, 4, -1, -1],
    "MPI_Irecv" => [0, 1, 2],
    "MPI_Ireduce" => [0, -2, -2, 1, 2, 3],
    "MPI_Ireduce_scatter_block" => [0, -2, -2, 1, 2, 3],
    "MPI_Ireduce_scatter" => [0, -2, -2, 1, -2, -2],
    "MPI_Irsend" => [0, 1, 2],
    "MPI_Iscan" => [0, -2, -2, 1, 2, 3],
    "MPI_Iscatter" => [0, 1, 2, 3, 4, 5],
    "MPI_Iscatterv" => [0, -1, -1, 4, -1, -1],
    "MPI_Isend" => [0, 1, 2],
    "MPI_Issend" => [0, 1, 2],
    "MPI_Mrecv" => [0, 1, 2],
    "MPI_Neighbor_allgather" => [0, 1, 2, 3, 4, 5],
    "MPI_Neighbor_allgatherv" => [0, 1, 2, 3, -1, -1],
    "MPI_Neighbor_alltoall" => [0, 1, 2, 3, 4, 5],
    "MPI_Neighbor_alltoallv" => [0, -1, -1, 4, -1, -1],
    "MPI_Neighbor_alltoallw" => [0, -1, -1, 4, -1, -1],
    "MPI_Pack" => [0, 1, 2, 3, -1, -1],
    "MPI_Pack_external" => [1, 2, 3, 4, -1, -1],
    "MPI_Put" => [0, 1, 2],
    "MPI_Raccumulate" => [0, 1, 2],
    "MPI_Recv" => [0, 1, 2],
    "MPI_Recv_init" => [0, 1, 2],
    "MPI_Reduce" => [0, -2, -2, 1, 2, 3],
    "MPI_Reduce_local" => [0, -2, -2, 1, 2, 3],
    "MPI_Reduce_scatter_block" => [0, -2, -2, 1, 2, 3],
    "MPI_Reduce_scatter" => [0, -2, -2, 1, -2, -2],
    "MPI_Rget_accumulate" => [0, 1, 2, 3, 4, 5],
    "MPI_Rget" => [0, 1, 2],
    "MPI_Rput" => [0, 1, 2],
    "MPI_Rsend" => [0, 1, 2],
    "MPI_Rsend_init" => [0, 1, 2],
    "MPI_Scan" => [0, -2, -2, 1, 2, 3],
    "MPI_Scatter" => [0, 1, 2, 3, 4, 5],
    "MPI_Scatterv" => [0, -1, -1, 4, -1, -1],
    "MPI_Send" => [0, 1, 2],
    "MPI_Send_init" => [0, 1, 2],
    "MPI_Sendrecv" => [0, 1, 2, 5, 6, 7],
    "MPI_Sendrecv_replace" => [0, 1, 2],
    "MPI_Ssend" => [0, 1, 2],
    "MPI_Ssend_init" => [0, 1, 2],
    "MPI_Unpack" => [0, -1, -1, 3, 4, 5],
    "MPI_Unpack_external" => [1, -1, -1, 4, 5, 6],
    "MPI_Win_attach" => [1, -1, -1],
    "MPI_Win_create" => [0, -1, -1],
    "MPI_Win_detach" => [1, -1, -1]
);

# Choice buffers in some functions can be passed in MPI_IN_PLACE. We store such
# info in this table. "func => idx" means the idx-th argument of func is a choice
# buffer and can be passed in MPI_IN_PLACE. Here, idx starts from 0. Note that one
# function can have at most one such argument.
my %inplace = (
    "MPI_Allgather" => 0,
	"MPI_Allgatherv" => 0,
	"MPI_Allreduce" => 0,
	"MPI_Alltoall" => 0,
	"MPI_Alltoallv" => 0,
	"MPI_Alltoallw" => 0,
	"MPI_Exscan" => 0,
	"MPI_Gather" => 0,
	"MPI_Gatherv" => 0,
	"MPI_Iallgather" => 0,
	"MPI_Iallgatherv" => 0,
	"MPI_Iallreduce" => 0,
	"MPI_Ialltoall" => 0,
	"MPI_Ialltoallv" => 0,
	"MPI_Ialltoallw" => 0,
	"MPI_Igather" => 0,
	"MPI_Igatherv" => 0,
	"MPI_Ireduce_scatter_block" => 0,
	"MPI_Ireduce_scatter" => 0,
	"MPI_Ireduce" => 0,
	"MPI_Iscan" => 0,
	"MPI_Iscatter" => 3,
	"MPI_Iscatterv" => 4,
	"MPI_Reduce_scatter" => 0,
	"MPI_Reduce_scatter_block" => 0,
	"MPI_Reduce" => 0,
	"MPI_Scan" => 0,
	"MPI_Scatter" => 3,
	"MPI_Scatterv" => 4
);

# Some functions have a void* argument in C, but the argument is actually not
# a choice buffer (i.e., of type assumed-type, assumed-rank). So we just skip
# these functions in parsing.
my @skipped_funcs_tmp = (
    "MPI_Address",
    "MPI_Alloc_mem",
    "MPI_Attr_get",
    "MPI_Attr_put",
    "MPI_DUP_FN",
    "MPI_Grequest_start",
    "MPI_Comm_create_keyval",
    "MPI_Comm_set_attr",
    "MPI_Comm_get_attr",
    "MPI_Type_create_keyval",
    "MPI_Type_set_attr",
    "MPI_Type_get_attr",
    "MPI_Win_create_keyval",
    "MPI_Win_set_attr",
    "MPI_Win_get_attr",
    "MPI_Buffer_detach",
    "MPI_Keyval_create",
    "MPI_Register_datarep",
    "MPI_Win_allocate",
    "MPI_Win_allocate_shared",
    "MPI_Win_shared_query"
);

my %skipped_funcs = map { $_ => 1 } @skipped_funcs_tmp;
my $eol = 1;
my $fullline = "";
my $tab = "    ";
my $retarg;
my $routine;
my $args;
my @arglist;
my $fname;
my $cdesc_routine;
my $x;
my $y;
my @argbits;
my $num_dtypes;
my @dtype_bind;
my $io_header;
my $make_exists = 0;

# Check to make sure the file was passed in as a parameter
if ($#ARGV != 0) {
    print "Usage: buildiface <filename>\n";
    exit 1;
}

open(FD, $ARGV[0]) || die "Could not open file " . $ARGV[0];

while (<FD>) {
    if (/\/\*\s*Begin Prototypes/) { last; }
}

# Check to see if this is mpio.h.in. If so, we have some more to do later
if ($ARGV[0] =~ /mpio\.h\.in/) {
    $io_header = 1;
} else {
    $io_header = 0;
}

if (-e "cdesc.h") {
    open(CDESCH, ">>cdesc.h") || die "Could not open file cdesc.h";
} else {
    open(CDESCH, ">cdesc.h") || die "Could not open file cdesc.h";
    print CDESCH <<EOT;
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
 *  (C) 2014 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 *
 * This file is automatically generated by buildiface
 * DO NOT EDIT
 */

#include <stdio.h>
#include <stdlib.h>
#include <ISO_Fortran_binding.h>
#include <mpi.h>

extern int cdesc_create_datatype(CFI_cdesc_t *cdesc, int oldcount, MPI_Datatype oldtype, MPI_Datatype *newtype);
extern int MPIR_Fortran_array_of_string_f2c(const char* strs_f, char*** strs_c, int str_len, int know_size, int size);
extern int MPIR_Comm_spawn_c(const char *command, char *argv_f, int maxprocs, MPI_Info info, int root,
        MPI_Comm comm, MPI_Comm *intercomm, int* array_of_errcodes, int argv_elem_len);
extern int MPIR_Comm_spawn_multiple_c(int count, char *array_of_commands_f,
        char *array_of_argv_f, const int* array_of_maxprocs,
        const MPI_Info *array_of_info, int root, MPI_Comm comm,
        MPI_Comm *intercomm, int* array_of_errcodes,
        int commands_elem_len, int argv_elem_len);
extern int MPIR_F_sync_reg_cdesc(CFI_cdesc_t* buf);

EOT
}

open(OUTFD, ">cdesc.c") || die "Could not open file cdesc.c";
print OUTFD <<EOT;
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
 *  (C) 2014 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 *
 * This file is automatically generated by buildiface
 * DO NOT EDIT
 */

#include "cdesc.h"

int cdesc_create_datatype(CFI_cdesc_t *cdesc, int oldcount, MPI_Datatype oldtype, MPI_Datatype *newtype)
{
    const int MAX_RANK = 15; /* Fortran 2008 specifies a maximum rank of 15 */
    MPI_Datatype types[MAX_RANK + 1]; /* Use a fixed size array to avoid malloc. + 1 for oldtype */
    int mpi_errno = MPI_SUCCESS;
    int accum_elems = 1;
    int accum_sm = cdesc->elem_len;
    int done = 0; /* Have we created a datatype for oldcount of oldtype? */
    int last; /* Index of the last successfully created datatype in types[] */
    int extent;
    int i, j;

#ifdef HAVE_ERROR_CHECKING
    {
        int size;
        MPIU_Assert(cdesc->rank <= MAX_RANK);
        MPI_Type_size(oldtype, &size);
        /* When cdesc->elem_len != size, things suddenly become complicated. Generally, it is hard to create
         * a composite datatype based on two datatypes. Currently we don't support it and doubt it is usefull.
         */
        MPIU_Assert(cdesc->elem_len == size);
    }
#endif

    types[0] = oldtype;
    i = 0;
    done = 0;
    while (i < cdesc->rank && !done) {
        if (oldcount % accum_elems) {
            /* oldcount should be a multiple of accum_elems, otherwise we might need an
             * MPI indexed datatype to describle the irregular region, which is not supported yet.
             */
            mpi_errno = MPI_ERR_INTERN;
            goto fn_fail;
        }

        extent = oldcount / accum_elems;
        if (extent > cdesc->dim[i].extent) {
            extent = cdesc->dim[i].extent;
        } else {
            /* Up to now, we have accumlated enough elements */
            done = 1;
        }

        if (cdesc->dim[i].sm == accum_sm) {
            mpi_errno = MPI_Type_contiguous(extent, types[i], &types[i+1]);
        } else {
            mpi_errno = MPI_Type_create_hvector(extent, 1, cdesc->dim[i].sm, types[i], &types[i+1]);
        }
        if (mpi_errno != MPI_SUCCESS) {
            last = i; goto fn_fail;
        }

        mpi_errno = MPI_Type_commit(&types[i+1]);
        if (mpi_errno != MPI_SUCCESS) {
            last = i + 1; goto fn_fail;
        }

        accum_sm = cdesc->dim[i].sm * cdesc->dim[i].extent;
        accum_elems  *= cdesc->dim[i].extent;
        i++;
    }

    if (done) {
        *newtype = types[i];
        last = i - 1; /* To avoid freeing newtype */
    } else {
        /* If # of elements given by "oldcount oldtype" is bigger than
         * what cdesc describles, then we will reach here.
         */
        last = i;
        mpi_errno = MPI_ERR_ARG;
        goto fn_fail;
    }

fn_exit:
    for (j = 1; j <= last; j++)
        MPI_Type_free(&types[j]);
    return mpi_errno;
fn_fail:
    goto fn_exit;
}
EOT
close OUTFD;

unless (-e "Makefile.mk") {
    open(MAKEFD, ">Makefile.mk") || die "Could not open Makefile.mk\n";
    print MAKEFD <<EOT;
## DO NOT EDIT
## This file created by buildiface
##
## vim: set ft=automake :

# ensure that the buildiface script ends up in the release tarball
EXTRA_DIST += src/binding/fortran/use_mpi_f08/wrappers_c/buildiface

if BUILD_F08_BINDING
mpi_fc_sources += \\
EOT
} else {
    open(MAKEFD, ">>Makefile.mk") || die "Could not open Makefile.mk\n";
    $make_exists = 1;
}

while (<FD>) {
    if (/\/\*\s*End Prototypes/) { last; }

    if (/\/\*\s*Begin Skip Prototypes/) {
        while (<FD>) {
            if (/\/\*\s*End Skip Prototypes/) { last; }
        }
    }

    # Skip lines starting with # such as #ifdef or #endif
    if (/^\s*#/) { next; }

    # If we found a semi-colon at the end, that's the end of the line.
    # This is not perfect (e.g., does not work when a single line has
    # multiple semi-colon separated statements), but should be good
    # enough for the MPICH mpi.h file
    if (/.*;$/) { $eol = 1; }
    else { $eol = 0; }

    chomp($_);
    $fullline .= "$_";
    if ($eol == 0) { next; }

    # We got the entire prototype in a single line

    # parse out comments
    $fullline =~ s+/\*.*\*/++g;

    # parse out attributes
    $fullline =~ s/MPICH_ATTR_POINTER_WITH_TYPE_TAG\(.*\)//g;

    # parse out unnecessary spaces
    $fullline =~ s/^ *//g;
    $fullline =~ s/ *$//g;

    # split the line into the return type, routine name, and arguments
    $fullline =~ m/([^ ]*) ([^(]*)\((.*)\)/;
    $retarg = $1;
    $routine = $2;
    $args = $3;

    # cleanup args
    $args =~ s/\s\s*/ /g;
    $args =~ s/^\s*//g;
    $args =~ s/\s*$//g;

    # Skip routines with void* (but not choice buffer) arguments
    if (exists($skipped_funcs{$routine})) {
        $fullline = "";
        next;
    }

    @arglist = split(/,/, $args);

    if (grep/void\s*\*/, @arglist) {
        $fname = "$routine";
        $fname =~ s/MPI_//g;
        $fname =~ s/MPIX_//g;
        $fname =~ tr/A-Z/a-z/;
        $fname .= "_cdesc.c";

        print MAKEFD "\tsrc/binding/fortran/use_mpi_f08/wrappers_c/$fname \\\n";
        open(CFILE, ">$fname") || die "Could not open $fname\n";

        # replace MPI(X)_Foo with MPIR_Foo_cdesc
        $cdesc_routine = $routine;
        $cdesc_routine =~ s/MPI_/MPIR_/g;
        $cdesc_routine =~ s/MPIX_/MPIR_/g;
        $cdesc_routine .= "_cdesc";

        print CFILE <<EOT;
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
 *  (C) 2014 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 *
 * This file is automatically generated by buildiface
 * DO NOT EDIT
 */

#include "cdesc.h"
EOT
        print CFILE "\n$retarg $cdesc_routine(";
        print CDESCH "extern $retarg $cdesc_routine(";
        for ($x = 0; $x <= $#arglist; $x++) {
            $arglist[$x] =~ s/^\s*//g;
            $arglist[$x] =~ s/\s*$//g;
        }

        for ($x = 0; $x <= $#arglist; $x++) {
            # remove variable names in arguments
            @argbits = split(/ /, $arglist[$x]);
            $arglist[$x] = "";
            for ($y = 0; $y <= $#argbits; $y++) {
                $argbits[$y] =~ s/\*.*/*/g;
                $argbits[$y] =~ s/[^ ]*\[\]/[]/g;
                if ($y < $#argbits) {
                    $arglist[$x] .= "$argbits[$y] ";
                }
                else {
                    if ($argbits[$y] =~ /\[\]/ || $argbits[$y] =~ /\*/) {
                        $arglist[$x] .= "$argbits[$y] ";
                    }
                    else {
                        # reduce the array size by one to drop the last bit
                        $#argbits--;
                    }
                }
            }
             # replace void* with CFI_cdesc_t*
             if ($arglist[$x] =~ /.*void\s*\*/) {
                 $arglist[$x] = "CFI_cdesc_t*";
             }

            @argbits = split(/ /, $arglist[$x]);

            if ($x) {
                print CFILE  ", ";
                print CDESCH ", ";
            }

            # print out all but the last bit of the argument
            for ($y = 0; $y < $#argbits; $y++) {
                print CFILE "$argbits[$y] ";
                print CDESCH "$argbits[$y] ";
            }

            # deal with [] structures for the last bit
            if ($argbits[$#argbits] =~ /\[\]/) {
                print CFILE "x$x\[\]";
                print CDESCH "x$x\[\]";
            }
            else {
                print CFILE  "$argbits[$#argbits] x$x";
                print CDESCH "$argbits[$#argbits] x$x";
            }
        }
        print CFILE ")\n{\n";
        print CDESCH ");\n";

        #================================================
        #      Print body of the C wrapper function
        #================================================
        print CFILE "    int err = MPI_SUCCESS;\n";
        if ($io_header) {
            print CFILE "#ifdef MPI_MODE_RDONLY\n"
        }

        if (!exists($bufpos{$routine})) {
            die "Error: $routine has choice buffer(s) but is not defined in bufpos!\n";
        }

        my @vec = @{$bufpos{$routine}}; # directly copy since @vec is small

        # Temp variable declaration
        for (my $i = 0; $i < $#vec; $i += 3) {
            print CFILE "    void *buf$vec[$i] = x$vec[$i]"."->base_addr;\n";
            if ($vec[$i + 1] >= 0) {
                print CFILE "    int count$vec[$i] = x$vec[$i+1];\n";
                print CFILE "    MPI_Datatype dtype$vec[$i] = x$vec[$i+2];\n";
            }
        }
        print CFILE "\n";

        # Handle MPI_BOTTOM and MPI_IN_PLACE
        for (my $i = 0; $i < $#vec; $i += 3) {
            print CFILE "    if (buf$vec[$i] == &MPIR_F08_MPI_BOTTOM) {\n";
            print CFILE "        buf$vec[$i] = MPI_BOTTOM;\n";
            if (defined($inplace{$routine}) && $i == $inplace{$routine}) {
                print CFILE "    } else if (buf$vec[$i] == &MPIR_F08_MPI_IN_PLACE) {\n";
                print CFILE "        buf$vec[$i] = MPI_IN_PLACE;\n";
            }
            print CFILE "    }\n\n";
        }

        # Test if a subarray arg is contiguous. If it is, generate a new datatype for it.
        for (my $i = 0; $i < $#vec; $i += 3) {
            if ($vec[$i + 1] >= 0) {
                print CFILE "    if (x$vec[$i]"."->rank != 0 && !CFI_is_contiguous(x$vec[$i])) {\n";
                print CFILE "        err = cdesc_create_datatype(x$vec[$i], x$vec[$i+1], x$vec[$i+2], &dtype$vec[$i]);\n";
                print CFILE "        count$vec[$i] = 1;\n";
                print CFILE "    }\n\n";
            }
        }

        # Print the function call with proper argument substitution.
        print CFILE "    err = $routine(";
        for (my $i = 0; $i <= $#arglist; ) {
            if ($i) { print CFILE ", "; }
            if ($arglist[$i] =~ /CFI_cdesc_t\*/) {
                my $j = 0;
                while ($vec[$j] != $i) { $j++; }
                if ($vec[$j + 1] >= 0) {
                    print CFILE "buf$i, count$i, dtype$i";
                    $i += 3;
                } else {
                    print CFILE "buf$i";
                    $i++;
                }
            } else {
                print CFILE "x$i";
                $i++;
            }
        }
        print CFILE ");\n\n";

        # Free newly created datatypes if any
        for (my $i = 0; $i < $#vec; $i += 3) {
            if ($vec[$i + 1] >= 0) {
                print CFILE "    if (dtype$vec[$i] != x$vec[$i+2])  MPI_Type_free(&dtype$vec[$i]);\n";
            }
        }

        if ($io_header) { print CFILE "#else\n"; }
        if ($io_header) { print CFILE "    err = MPI_ERR_INTERN;\n"; }
        if ($io_header) { print CFILE "#endif\n"; }

        print CFILE "    return err;\n";
        print CFILE "}\n";
        close CFILE;
    }
    $fullline = "";
}

if ($make_exists) {
    print MAKEFD "\tsrc/binding/fortran/use_mpi_f08/wrappers_c/cdesc.c \\\n";
    print MAKEFD "\tsrc/binding/fortran/use_mpi_f08/wrappers_c/comm_spawn_c.c \\\n";
    print MAKEFD "\tsrc/binding/fortran/use_mpi_f08/wrappers_c/comm_spawn_multiple_c.c \\\n";
    print MAKEFD "\tsrc/binding/fortran/use_mpi_f08/wrappers_c/f_sync_reg_c.c \\\n";
    print MAKEFD "\tsrc/binding/fortran/use_mpi_f08/wrappers_c/utils.c\n\n";
    print MAKEFD <<EOT;
AM_CPPFLAGS += -I\${master_top_srcdir}/src/binding/fortran/use_mpi_f08/wrappers_c

noinst_HEADERS += src/binding/fortran/use_mpi_f08/wrappers_c/cdesc.h

endif BUILD_F08_BINDING
EOT
}

close MAKEFD;
close CDESCH;
