From 9e4b55b40f30280d45135aba8e09a8bcf4bd6959 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Wed, 31 Mar 2021 23:00:10 -0500 Subject: [PATCH 1/6] datatype: fix n_builtin_elements for struct Even though a struct may have heterogenous basic datatypes, the builtin-elements count is still meaningful. For example, it is the number of entries in the typemap. This commit prepares for typemap utilitis. Typemap is useful for debugging datatype issues and simplify algorithms. --- src/mpi/datatype/typerep/src/typerep_dataloop_create.c | 5 ++++- src/mpi/datatype/typerep/src/typerep_yaksa_create.c | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/mpi/datatype/typerep/src/typerep_dataloop_create.c b/src/mpi/datatype/typerep/src/typerep_dataloop_create.c index 725f92e284c..7bd171240eb 100644 --- a/src/mpi/datatype/typerep/src/typerep_dataloop_create.c +++ b/src/mpi/datatype/typerep/src/typerep_dataloop_create.c @@ -627,6 +627,7 @@ int MPIR_Typerep_create_struct(MPI_Aint count, const MPI_Aint * array_of_blockle MPI_Aint size = 0; MPI_Datatype el_type = MPI_DATATYPE_NULL; MPI_Aint true_lb_disp = 0, true_ub_disp = 0, lb_disp = 0, ub_disp = 0; + MPI_Aint el_count = 0; for (MPI_Aint i = 0; i < count; i++) { MPI_Aint tmp_lb, tmp_ub, tmp_true_lb, tmp_true_ub; @@ -652,6 +653,7 @@ int MPIR_Typerep_create_struct(MPI_Aint count, const MPI_Aint * array_of_blockle tmp_true_ub = tmp_ub; size += tmp_el_sz * array_of_blocklengths[i]; + el_count += array_of_blocklengths[i]; } else { MPIR_Datatype_get_ptr(array_of_types[i], old_dtp); @@ -668,6 +670,7 @@ int MPIR_Typerep_create_struct(MPI_Aint count, const MPI_Aint * array_of_blockle tmp_true_ub = tmp_ub + (old_dtp->true_ub - old_dtp->ub); size += old_dtp->size * array_of_blocklengths[i]; + el_count += array_of_blocklengths[i] * old_dtp->n_builtin_elements; } /* element size and type */ @@ -731,7 +734,7 @@ int MPIR_Typerep_create_struct(MPI_Aint count, const MPI_Aint * array_of_blockle old_are_contig = 0; } - newtype->n_builtin_elements = -1; /* TODO */ + newtype->n_builtin_elements = el_count; newtype->builtin_element_size = el_sz; newtype->basic_type = el_type; diff --git a/src/mpi/datatype/typerep/src/typerep_yaksa_create.c b/src/mpi/datatype/typerep/src/typerep_yaksa_create.c index 25fd913d674..1ee51f77c3a 100644 --- a/src/mpi/datatype/typerep/src/typerep_yaksa_create.c +++ b/src/mpi/datatype/typerep/src/typerep_yaksa_create.c @@ -322,6 +322,7 @@ int MPIR_Typerep_create_struct(MPI_Aint count, const MPI_Aint * array_of_blockle MPI_Aint el_sz = 0; MPI_Datatype el_type = MPI_DATATYPE_NULL; int found_el_type = 0; + MPI_Aint el_count = 0; for (int i = 0; i < count; i++) { MPI_Aint tmp_el_sz; MPI_Datatype tmp_el_type; @@ -333,10 +334,12 @@ int MPIR_Typerep_create_struct(MPI_Aint count, const MPI_Aint * array_of_blockle if (HANDLE_IS_BUILTIN(array_of_types[i])) { tmp_el_sz = MPIR_Datatype_get_basic_size(array_of_types[i]); tmp_el_type = array_of_types[i]; + el_count += array_of_blocklengths[i]; } else { MPIR_Datatype_get_ptr(array_of_types[i], old_dtp); tmp_el_sz = old_dtp->builtin_element_size; tmp_el_type = old_dtp->basic_type; + el_count += array_of_blocklengths[i] * old_dtp->n_builtin_elements; } if (found_el_type == 0) { @@ -351,7 +354,7 @@ int MPIR_Typerep_create_struct(MPI_Aint count, const MPI_Aint * array_of_blockle el_type = MPI_DATATYPE_NULL; } } - newtype->n_builtin_elements = -1; /* TODO */ + newtype->n_builtin_elements = el_count; newtype->builtin_element_size = el_sz; newtype->basic_type = el_type; From f9a443c7a406ce37df09c5b6fde9b71da12f5630 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Sat, 17 Apr 2021 15:49:45 -0500 Subject: [PATCH 2/6] typerep/yaksa: handle MPI_2INT in element stats Handle MPI_2INT correctly for n_builtin_elements, builtin_element_size, and basic_type. --- src/mpi/datatype/typerep/src/typerep_yaksa_create.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/mpi/datatype/typerep/src/typerep_yaksa_create.c b/src/mpi/datatype/typerep/src/typerep_yaksa_create.c index 1ee51f77c3a..a31bf6c9291 100644 --- a/src/mpi/datatype/typerep/src/typerep_yaksa_create.c +++ b/src/mpi/datatype/typerep/src/typerep_yaksa_create.c @@ -46,6 +46,10 @@ static int update_yaksa_type(MPIR_Datatype * newtype, MPI_Datatype oldtype, MPI_ if (count == 0) { /* this is a struct, deal with it in MPIR_Typerep_create_struct */ + } else if (oldtype == MPI_2INT) { + newtype->n_builtin_elements = count * 2; + newtype->builtin_element_size = (MPI_Aint) MPIR_Datatype_get_basic_size(MPI_INT); + newtype->basic_type = MPI_INT; } else if (HANDLE_IS_BUILTIN(oldtype)) { MPI_Aint el_sz = (MPI_Aint) MPIR_Datatype_get_basic_size(oldtype); newtype->n_builtin_elements = count; From 63a84a0a5ff8be6ff54e9512944f48e78fb8c5ef Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Wed, 14 Apr 2021 21:11:28 -0500 Subject: [PATCH 3/6] datatype: add typemap.c Adds type map and type signature routines. These routines are useful for debugging and datatype functions such as type matching and external32 packing/unpacking. --- src/include/mpir_datatype.h | 19 + src/mpi/datatype/Makefile.mk | 1 + src/mpi/datatype/typemap.c | 1343 ++++++++++++++++++++++++++++++++++ 3 files changed, 1363 insertions(+) create mode 100644 src/mpi/datatype/typemap.c diff --git a/src/include/mpir_datatype.h b/src/include/mpir_datatype.h index a1619d20087..aab1f451e05 100644 --- a/src/include/mpir_datatype.h +++ b/src/include/mpir_datatype.h @@ -615,4 +615,23 @@ MPI_Aint MPII_Datatype_blockindexed_count_contig(MPI_Aint count, const MPI_Aint disp_array[], int dispinbytes, MPI_Aint old_extent); +struct typemap { + MPI_Aint n; + MPI_Datatype *types; + MPI_Aint *disps; +}; + +struct typesig { + MPI_Aint n; + MPI_Datatype *types; + MPI_Aint *counts; +}; + +int MPIR_type_dump_typemap(MPI_Datatype dt); +int MPIR_type_dump_typesig(MPI_Datatype dt); +struct typemap *MPIR_type_get_typemap(MPI_Datatype dt); +struct typesig *MPIR_type_get_typesig(MPI_Datatype dt); +void MPIR_type_free_typemap(struct typemap *map); +void MPIR_type_free_typesig(struct typesig *sig); + #endif /* MPIR_DATATYPE_H_INCLUDED */ diff --git a/src/mpi/datatype/Makefile.mk b/src/mpi/datatype/Makefile.mk index f5da7fccf95..59a054d06b4 100644 --- a/src/mpi/datatype/Makefile.mk +++ b/src/mpi/datatype/Makefile.mk @@ -22,4 +22,5 @@ mpi_core_sources += \ src/mpi/datatype/type_create_darray.c \ src/mpi/datatype/type_create_subarray.c \ src/mpi/datatype/type_create_pairtype.c \ + src/mpi/datatype/typemap.c \ src/mpi/datatype/type_debug.c diff --git a/src/mpi/datatype/typemap.c b/src/mpi/datatype/typemap.c new file mode 100644 index 00000000000..29ffdfcf0af --- /dev/null +++ b/src/mpi/datatype/typemap.c @@ -0,0 +1,1343 @@ +/* + * Copyright (C) by Argonne National Laboratory + * See COPYRIGHT in top-level directory + */ + +#include "mpiimpl.h" +#include "datatype.h" +#include +#include +#include + +int MPIR_type_dump_typemap(MPI_Datatype dt); +int MPIR_type_dump_typesig(MPI_Datatype dt); +struct typemap *MPIR_type_get_typemap(MPI_Datatype dt); +struct typesig *MPIR_type_get_typesig(MPI_Datatype dt); +void MPIR_type_free_typemap(struct typemap *map); +void MPIR_type_free_typesig(struct typesig *sig); + +static void typemap_set(struct typemap *map, MPI_Datatype dt, MPI_Aint idx, MPI_Aint offset); +static void typesig_set(struct typesig *sig, MPI_Datatype dt, MPI_Aint * p_idx, MPI_Aint count); +static void type_get_n_elem_extent(MPI_Datatype dt, MPI_Aint * p_n_elem, MPI_Aint * p_extent); +static void typesig_check_space(struct typesig *sig, MPI_Aint n); + +int MPIR_type_dump_typemap(MPI_Datatype dt) +{ + MPI_Aint lb; + MPI_Aint extent; + MPIR_Type_get_extent_impl(dt, &lb, &extent); + printf(" %20s: %10ld\n", "lb", (long) lb); + printf(" %20s: %10ld\n", "ub", (long) (lb + extent)); + + struct typemap *map; + map = MPIR_type_get_typemap(dt); + for (int i = 0; i < map->n; i++) { + printf(" %20s: %10ld\n", MPIR_Datatype_builtin_to_string(map->types[i]), + (long) map->disps[i]); + } + + MPIR_type_free_typemap(map); + return MPI_SUCCESS; +} + +int MPIR_type_dump_typesig(MPI_Datatype dt) +{ + struct typesig *sig; + sig = MPIR_type_get_typesig(dt); + for (int i = 0; i < sig->n; i++) { + if (i > 0) { + printf(","); + } + printf("%s:%ld", MPIR_Datatype_builtin_to_string(sig->types[i]), (long) sig->counts[i]); + } + puts(""); + + MPIR_type_free_typesig(sig); + return MPI_SUCCESS; +} + +struct typemap *MPIR_type_get_typemap(MPI_Datatype dt) +{ + struct typemap *map; + + map = (struct typemap *) malloc(sizeof(struct typemap)); + + if (HANDLE_IS_BUILTIN(dt)) { + if (dt == MPI_2INT) { + map->n = 2; + } else { + map->n = 1; + } + } else { + MPIR_Datatype *dt_ptr; + MPIR_Datatype_get_ptr(dt, dt_ptr); + MPIR_Assert(dt_ptr != NULL); + map->n = dt_ptr->n_builtin_elements; + } + + MPIR_Assert(map->n > 0); + map->types = MPL_malloc(map->n * sizeof(MPI_Datatype), MPL_MEM_OTHER); + map->disps = MPL_malloc(map->n * sizeof(MPI_Aint), MPL_MEM_OTHER); + MPI_Aint n_elem; + MPI_Aint extent; + typemap_set(map, dt, 0, 0); + return map; +} + +struct typesig *MPIR_type_get_typesig(MPI_Datatype dt) +{ + struct typesig *sig; + + sig = (struct typesig *) malloc(sizeof(struct typesig)); + sig->n = 1; + sig->types = MPL_malloc(1 * sizeof(MPI_Datatype), MPL_MEM_OTHER); + sig->counts = MPL_malloc(1 * sizeof(MPI_Aint), MPL_MEM_OTHER); + + MPI_Aint idx = 0; + typesig_set(sig, dt, &idx, 1); + sig->n = idx; + return sig; +} + +void MPIR_type_free_typemap(struct typemap *map) +{ + MPL_free(map->types); + MPL_free(map->disps); + MPL_free(map); +} + +void MPIR_type_free_typesig(struct typesig *sig) +{ + MPL_free(sig->types); + MPL_free(sig->counts); + MPL_free(sig); +} + +void typemap_set(struct typemap *map, MPI_Datatype dt, MPI_Aint idx, MPI_Aint offset) +{ + int *p_ints; + MPI_Aint *p_aints; + MPI_Aint *p_counts; + MPI_Datatype *p_types; + MPI_Aint i; + MPI_Aint j; + + if (HANDLE_IS_BUILTIN(dt)) { + if (dt == MPI_2INT) { + map->types[idx] = MPI_INT; + map->disps[idx] = offset; + map->types[idx + 1] = MPI_INT; + map->disps[idx + 1] = offset + MPIR_Datatype_get_basic_size(MPI_INT); + } else { + map->types[idx] = dt; + map->disps[idx] = offset; + } + return; + } else if (MPIR_DATATYPE_IS_PREDEFINED(dt)) { + MPIR_Datatype *dt_ptr; + MPIR_Datatype_get_ptr(dt, dt_ptr); + MPIR_Assert(dt_ptr != NULL); + MPI_Aint disp = dt_ptr->true_ub - MPIR_Datatype_get_basic_size(MPI_INT); + if (dt == MPI_FLOAT_INT) { + map->types[idx] = MPI_FLOAT; + } + if (dt == MPI_DOUBLE_INT) { + map->types[idx] = MPI_DOUBLE; + } + if (dt == MPI_LONG_INT) { + map->types[idx] = MPI_LONG; + } + if (dt == MPI_SHORT_INT) { + map->types[idx] = MPI_SHORT; + } + map->disps[idx] = offset; + map->types[idx + 1] = MPI_INT; + map->disps[idx + 1] = offset + disp; + return; + } else { + MPI_Aint n_elem; + MPI_Aint extent; + + MPIR_Datatype *dt_ptr; + MPIR_Datatype_get_ptr(dt, dt_ptr); + MPIR_Assert(dt_ptr != NULL); + MPIR_Datatype_contents *cp = dt_ptr->contents; + MPIR_Datatype_access_contents(cp, &p_ints, &p_aints, &p_counts, &p_types); + if (cp->nr_counts == 0) { + if (cp->combiner == MPI_COMBINER_DUP) { + typemap_set(map, p_types[0], idx, offset); + } else if (cp->combiner == MPI_COMBINER_RESIZED) { + typemap_set(map, p_types[0], idx, offset + p_aints[0]); + } else if (cp->combiner == MPI_COMBINER_CONTIGUOUS) { + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset); + MPI_Aint idx2 = idx + n_elem; + for (int i = 1; i < p_ints[0]; i++) { + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + extent * i; + idx2++; + } + } + } else if (cp->combiner == MPI_COMBINER_VECTOR) { + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset); + + MPI_Aint k; + MPI_Aint i; + MPI_Aint j; + MPI_Aint off2; + MPI_Aint idx2 = idx + n_elem; + for (int k = 0; k < p_ints[0]; k++) { + off2 = p_ints[2] * k * extent; + for (int i = 0; i < p_ints[1]; i++) { + if (k || i) { + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + off2 + extent * i; + idx2++; + } + } + } + } + } else if (cp->combiner == MPI_COMBINER_HVECTOR) { + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset); + + MPI_Aint k; + MPI_Aint i; + MPI_Aint j; + MPI_Aint off2; + MPI_Aint idx2 = idx + n_elem; + for (int k = 0; k < p_ints[0]; k++) { + off2 = p_aints[0]; + for (int i = 0; i < p_ints[1]; i++) { + if (k || i) { + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + off2 + extent * i; + idx2++; + } + } + } + } + } else if (cp->combiner == MPI_COMBINER_INDEXED_BLOCK) { + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset + p_ints[2 + 0]); + + MPI_Aint k; + MPI_Aint i; + MPI_Aint j; + MPI_Aint off2; + MPI_Aint idx2 = idx + n_elem; + for (int k = 0; k < p_ints[0]; k++) { + off2 = (p_ints[2 + k] - p_ints[2 + 0]) * extent; + for (int i = 0; i < p_ints[1]; i++) { + if (k || i) { + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + off2 + extent * i; + idx2++; + } + } + } + } + } else if (cp->combiner == MPI_COMBINER_HINDEXED_BLOCK) { + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset + p_aints[0]); + + MPI_Aint k; + MPI_Aint i; + MPI_Aint j; + MPI_Aint off2; + MPI_Aint idx2 = idx + n_elem; + for (int k = 0; k < p_ints[0]; k++) { + off2 = p_aints[k] - p_aints[0]; + for (int i = 0; i < p_ints[1]; i++) { + if (k || i) { + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + off2 + extent * i; + idx2++; + } + } + } + } + } else if (cp->combiner == MPI_COMBINER_INDEXED) { + int *p_blkl = p_ints + 1; + int *p_disp = p_ints + 1 + p_ints[0]; + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset + p_disp[0]); + + MPI_Aint k; + MPI_Aint i; + MPI_Aint j; + MPI_Aint off2; + MPI_Aint idx2 = idx + n_elem; + for (int k = 0; k < p_ints[0]; k++) { + off2 = (p_disp[k] - p_disp[0]) * extent; + for (int i = 0; i < p_blkl[k]; i++) { + if (k || i) { + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + off2 + extent * i; + idx2++; + } + } + } + } + } else if (cp->combiner == MPI_COMBINER_HINDEXED) { + int *p_blkl = p_ints + 1; + MPI_Aint *p_disp = p_aints; + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset + p_disp[0]); + + MPI_Aint k; + MPI_Aint i; + MPI_Aint j; + MPI_Aint off2; + MPI_Aint idx2 = idx + n_elem; + for (int k = 0; k < p_ints[0]; k++) { + off2 = p_disp[k] - p_disp[0]; + for (int i = 0; i < p_blkl[k]; i++) { + if (k || i) { + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + off2 + extent * i; + idx2++; + } + } + } + } + } else if (cp->combiner == MPI_COMBINER_STRUCT) { + int *p_blkl = p_ints + 1; + MPI_Aint *p_disp = p_aints; + MPI_Aint k; + MPI_Aint i; + MPI_Aint j; + MPI_Aint off2; + MPI_Aint idx2 = idx; + for (int k = 0; k < p_ints[0]; k++) { + type_get_n_elem_extent(p_types[k], &n_elem, &extent); + typemap_set(map, p_types[k], idx2, offset + p_disp[k]); + idx2 += n_elem; + for (int i = 1; i < p_blkl[k]; i++) { + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + extent * i; + idx2++; + } + } + } + } else if (cp->combiner == MPI_COMBINER_SUBARRAY) { + int order = p_ints[1 + p_ints[0] * 3]; + int *p_sizes = p_ints + 1; + int *p_subsizes = p_ints + 1 + p_ints[0]; + int *p_starts = p_ints + 1 + p_ints[0] * 2; + MPI_Aint off0 = 0; + if (order == MPI_ORDER_C) { + for (int i = 0; i < p_ints[0]; i++) { + off0 = (off0 * p_sizes[i]) + p_starts[i]; + } + } else { + for (int i = p_ints[0] - 1; i >= 0; i--) { + off0 = (off0 * p_sizes[i]) + p_starts[i]; + } + } + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset + off0); + + MPI_Aint off2 = off0; + MPI_Aint idx2 = idx + n_elem; + MPI_Aint *counters; + counters = MPL_calloc(p_ints[0], sizeof(MPI_Aint), MPL_MEM_OTHER); + while (1) { + int alldone = 0; + MPI_Aint stride = 1; + int i; + if (order == MPI_ORDER_C) { + i = p_ints[0] - 1; + } else { + i = 0; + } + while (1) { + counters[i]++; + off2 += stride; + if (counters[i] < p_subsizes[i]) { + break; + } + off2 -= stride * p_subsizes[i]; + counters[i] = 0; + stride *= p_sizes[i]; + if (order == MPI_ORDER_C) { + i--; + if (i < 0) { + alldone = 1; + break; + } + } else { + i++; + if (i == p_ints[0]) { + alldone = 1; + break; + } + } + } + if (alldone) { + break; + } + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + off2 - off0; + idx2++; + } + } + MPL_free(counters); + } else if (cp->combiner == MPI_COMBINER_DARRAY) { + int size = p_ints[0]; + int rank = p_ints[1]; + int n = p_ints[2]; + int *p_gsizes = p_ints + 3; + int *p_distribs = p_ints + 3 + n; + int *p_dargs = p_ints + 3 + n * 2; + int *p_sizes = p_ints + 3 + n * 3; + int order = p_ints[3 + n * 4]; + + int *p_prank; + p_prank = MPL_malloc(n * sizeof(int), MPL_MEM_OTHER); + int *p_isblk; + p_isblk = MPL_malloc(n * sizeof(int), MPL_MEM_OTHER); + MPI_Aint *p_parg; + p_parg = MPL_malloc(n * sizeof(MPI_Aint), MPL_MEM_OTHER); + MPI_Aint *p_starts; + p_starts = MPL_malloc(n * sizeof(MPI_Aint), MPL_MEM_OTHER); + for (int i = 0; i < n; i++) { + size /= p_sizes[i]; + p_prank[i] = rank / size; + rank = rank % size; + if (p_sizes[i] == 1) { + p_isblk[i] = 1; + p_parg[i] = p_gsizes[i]; + p_starts[i] = 0; + } else if (p_distribs[i] == MPI_DISTRIBUTE_BLOCK && + p_dargs[i] == MPI_DISTRIBUTE_DFLT_DARG) { + p_parg[i] = p_gsizes[i] / p_sizes[i]; + if (p_gsizes[i] % p_sizes[i] == 0) { + p_isblk[i] = 1; + p_starts[i] = p_parg[i] * p_prank[i]; + } else { + int r = p_gsizes[i] % p_sizes[i]; + p_isblk[i] = 2; + if (p_prank[i] < r) { + p_parg[i] += 1; + p_starts[i] = p_parg[i] * p_prank[i]; + } else { + p_starts[i] = p_parg[i] * p_prank[i] + r; + } + } + } else if (p_distribs[i] == MPI_DISTRIBUTE_CYCLIC && + p_dargs[i] == MPI_DISTRIBUTE_DFLT_DARG) { + p_isblk[i] = 0; + p_parg[i] = 1; + p_starts[i] = p_prank[i]; + } else { + p_isblk[i] = 0; + p_parg[i] = p_dargs[i]; + p_starts[i] = p_parg[i] * p_prank[i]; + } + } + + MPI_Aint off0 = 0; + if (order == MPI_ORDER_C) { + for (int i = 0; i < n; i++) { + off0 = (off0 * p_gsizes[i]) + p_starts[i]; + } + } else { + for (int i = n - 1; i >= 0; i--) { + off0 = (off0 * p_gsizes[i]) + p_starts[i]; + } + } + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset + off0); + + MPI_Aint off2 = off0; + MPI_Aint idx2 = idx + n_elem; + MPI_Aint *counters; + counters = MPL_calloc(n, sizeof(MPI_Aint), MPL_MEM_OTHER); + if (p_isblk[0] == 0) { + counters[0] = p_prank[0] * p_parg[0]; + } + while (1) { + int alldone = 0; + MPI_Aint stride = 1; + int i; + if (order == MPI_ORDER_C) { + i = n - 1; + } else { + i = 0; + } + while (1) { + if (p_isblk[i] == 0) { + MPI_Aint old_counter = counters[i]; + counters[i]++; + if (counters[i] % p_parg[i] == 0) { + counters[i] += p_parg[i] * (p_sizes[i] - 1); + } + if (counters[i] < p_gsizes[i]) { + off2 += stride * (counters[i] - old_counter); + break; + } + counters[i] = p_starts[i]; + off2 += stride * (counters[i] - old_counter); + } else { + counters[i]++; + off2 += stride; + if (counters[i] < p_parg[i]) { + break; + } + off2 -= stride * p_parg[i]; + counters[i] = 0; + } + stride *= p_gsizes[i]; + if (order == MPI_ORDER_C) { + i--; + if (i < 0) { + alldone = 1; + break; + } + } else { + i++; + if (i == n) { + alldone = 1; + break; + } + } + } + if (alldone) { + break; + } + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + off2 - off0; + idx2++; + } + } + MPL_free(counters); + + MPL_free(p_prank); + MPL_free(p_isblk); + MPL_free(p_parg); + MPL_free(p_starts); + } else { + MPIR_Assert(0); + } + + } else { + if (cp->combiner == MPI_COMBINER_DUP) { + typemap_set(map, p_types[0], idx, offset); + } else if (cp->combiner == MPI_COMBINER_RESIZED) { + typemap_set(map, p_types[0], idx, offset + p_counts[0]); + } else if (cp->combiner == MPI_COMBINER_CONTIGUOUS) { + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset); + MPI_Aint idx2 = idx + n_elem; + for (int i = 1; i < p_counts[0]; i++) { + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + extent * i; + idx2++; + } + } + } else if (cp->combiner == MPI_COMBINER_VECTOR) { + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset); + + MPI_Aint k; + MPI_Aint i; + MPI_Aint j; + MPI_Aint off2; + MPI_Aint idx2 = idx + n_elem; + for (int k = 0; k < p_counts[0]; k++) { + off2 = p_counts[2] * k * extent; + for (int i = 0; i < p_counts[1]; i++) { + if (k || i) { + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + off2 + extent * i; + idx2++; + } + } + } + } + } else if (cp->combiner == MPI_COMBINER_HVECTOR) { + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset); + + MPI_Aint k; + MPI_Aint i; + MPI_Aint j; + MPI_Aint off2; + MPI_Aint idx2 = idx + n_elem; + for (int k = 0; k < p_counts[0]; k++) { + off2 = p_counts[2] * k; + for (int i = 0; i < p_counts[1]; i++) { + if (k || i) { + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + off2 + extent * i; + idx2++; + } + } + } + } + } else if (cp->combiner == MPI_COMBINER_INDEXED_BLOCK) { + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset + p_counts[2 + 0]); + + MPI_Aint k; + MPI_Aint i; + MPI_Aint j; + MPI_Aint off2; + MPI_Aint idx2 = idx + n_elem; + for (int k = 0; k < p_counts[0]; k++) { + off2 = (p_counts[2 + k] - p_counts[2 + 0]) * extent; + for (int i = 0; i < p_counts[1]; i++) { + if (k || i) { + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + off2 + extent * i; + idx2++; + } + } + } + } + } else if (cp->combiner == MPI_COMBINER_HINDEXED_BLOCK) { + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset + p_counts[2 + 0]); + + MPI_Aint k; + MPI_Aint i; + MPI_Aint j; + MPI_Aint off2; + MPI_Aint idx2 = idx + n_elem; + for (int k = 0; k < p_counts[0]; k++) { + off2 = p_counts[2 + k] - p_counts[2 + 0]; + for (int i = 0; i < p_counts[1]; i++) { + if (k || i) { + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + off2 + extent * i; + idx2++; + } + } + } + } + } else if (cp->combiner == MPI_COMBINER_INDEXED) { + MPI_Aint *p_blkl = p_counts + 1; + MPI_Aint *p_disp = p_counts + 1 + p_counts[0]; + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset + p_disp[0]); + + MPI_Aint k; + MPI_Aint i; + MPI_Aint j; + MPI_Aint off2; + MPI_Aint idx2 = idx + n_elem; + for (int k = 0; k < p_counts[0]; k++) { + off2 = (p_disp[k] - p_disp[0]) * extent; + for (int i = 0; i < p_blkl[k]; i++) { + if (k || i) { + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + off2 + extent * i; + idx2++; + } + } + } + } + } else if (cp->combiner == MPI_COMBINER_HINDEXED) { + MPI_Aint *p_blkl = p_counts + 1; + MPI_Aint *p_disp = p_counts + 1 + p_counts[0]; + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset + p_disp[0]); + + MPI_Aint k; + MPI_Aint i; + MPI_Aint j; + MPI_Aint off2; + MPI_Aint idx2 = idx + n_elem; + for (int k = 0; k < p_counts[0]; k++) { + off2 = p_disp[k] - p_disp[0]; + for (int i = 0; i < p_blkl[k]; i++) { + if (k || i) { + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + off2 + extent * i; + idx2++; + } + } + } + } + } else if (cp->combiner == MPI_COMBINER_STRUCT) { + MPI_Aint *p_blkl = p_counts + 1; + MPI_Aint *p_disp = p_counts + 1 + p_counts[0]; + MPI_Aint k; + MPI_Aint i; + MPI_Aint j; + MPI_Aint off2; + MPI_Aint idx2 = idx; + for (int k = 0; k < p_counts[0]; k++) { + type_get_n_elem_extent(p_types[k], &n_elem, &extent); + typemap_set(map, p_types[k], idx2, offset + p_disp[k]); + idx2 += n_elem; + for (int i = 1; i < p_blkl[k]; i++) { + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + extent * i; + idx2++; + } + } + } + } else if (cp->combiner == MPI_COMBINER_SUBARRAY) { + int order = p_ints[2]; + MPI_Aint *p_sizes = p_counts; + MPI_Aint *p_subsizes = p_counts + p_ints[0]; + MPI_Aint *p_starts = p_counts + p_ints[0] * 2; + MPI_Aint off0 = 0; + if (order == MPI_ORDER_C) { + for (int i = 0; i < p_ints[0]; i++) { + off0 = (off0 * p_sizes[i]) + p_starts[i]; + } + } else { + for (int i = p_ints[0] - 1; i >= 0; i--) { + off0 = (off0 * p_sizes[i]) + p_starts[i]; + } + } + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset + off0); + + MPI_Aint off2 = off0; + MPI_Aint idx2 = idx + n_elem; + MPI_Aint *counters; + counters = MPL_calloc(p_ints[0], sizeof(MPI_Aint), MPL_MEM_OTHER); + while (1) { + int alldone = 0; + MPI_Aint stride = 1; + int i; + if (order == MPI_ORDER_C) { + i = p_ints[0] - 1; + } else { + i = 0; + } + while (1) { + counters[i]++; + off2 += stride; + if (counters[i] < p_subsizes[i]) { + break; + } + off2 -= stride * p_subsizes[i]; + counters[i] = 0; + stride *= p_sizes[i]; + if (order == MPI_ORDER_C) { + i--; + if (i < 0) { + alldone = 1; + break; + } + } else { + i++; + if (i == p_ints[0]) { + alldone = 1; + break; + } + } + } + if (alldone) { + break; + } + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + off2 - off0; + idx2++; + } + } + MPL_free(counters); + } else if (cp->combiner == MPI_COMBINER_DARRAY) { + int size = p_ints[0]; + int rank = p_ints[1]; + int n = p_ints[2]; + MPI_Aint *p_gsizes = p_counts; + int *p_distribs = p_ints + 3; + int *p_dargs = p_ints + 3 + n; + int *p_sizes = p_ints + 3 + n * 2; + int order = p_ints[3 + n * 3]; + + int *p_prank; + p_prank = MPL_malloc(n * sizeof(int), MPL_MEM_OTHER); + int *p_isblk; + p_isblk = MPL_malloc(n * sizeof(int), MPL_MEM_OTHER); + MPI_Aint *p_parg; + p_parg = MPL_malloc(n * sizeof(MPI_Aint), MPL_MEM_OTHER); + MPI_Aint *p_starts; + p_starts = MPL_malloc(n * sizeof(MPI_Aint), MPL_MEM_OTHER); + for (int i = 0; i < n; i++) { + size /= p_sizes[i]; + p_prank[i] = rank / size; + rank = rank % size; + if (p_sizes[i] == 1) { + p_isblk[i] = 1; + p_parg[i] = p_gsizes[i]; + p_starts[i] = 0; + } else if (p_distribs[i] == MPI_DISTRIBUTE_BLOCK && + p_dargs[i] == MPI_DISTRIBUTE_DFLT_DARG) { + p_parg[i] = p_gsizes[i] / p_sizes[i]; + if (p_gsizes[i] % p_sizes[i] == 0) { + p_isblk[i] = 1; + p_starts[i] = p_parg[i] * p_prank[i]; + } else { + int r = p_gsizes[i] % p_sizes[i]; + p_isblk[i] = 2; + if (p_prank[i] < r) { + p_parg[i] += 1; + p_starts[i] = p_parg[i] * p_prank[i]; + } else { + p_starts[i] = p_parg[i] * p_prank[i] + r; + } + } + } else if (p_distribs[i] == MPI_DISTRIBUTE_CYCLIC && + p_dargs[i] == MPI_DISTRIBUTE_DFLT_DARG) { + p_isblk[i] = 0; + p_parg[i] = 1; + p_starts[i] = p_prank[i]; + } else { + p_isblk[i] = 0; + p_parg[i] = p_dargs[i]; + p_starts[i] = p_parg[i] * p_prank[i]; + } + } + + MPI_Aint off0 = 0; + if (order == MPI_ORDER_C) { + for (int i = 0; i < n; i++) { + off0 = (off0 * p_gsizes[i]) + p_starts[i]; + } + } else { + for (int i = n - 1; i >= 0; i--) { + off0 = (off0 * p_gsizes[i]) + p_starts[i]; + } + } + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset + off0); + + MPI_Aint off2 = off0; + MPI_Aint idx2 = idx + n_elem; + MPI_Aint *counters; + counters = MPL_calloc(n, sizeof(MPI_Aint), MPL_MEM_OTHER); + if (p_isblk[0] == 0) { + counters[0] = p_prank[0] * p_parg[0]; + } + while (1) { + int alldone = 0; + MPI_Aint stride = 1; + int i; + if (order == MPI_ORDER_C) { + i = n - 1; + } else { + i = 0; + } + while (1) { + if (p_isblk[i] == 0) { + MPI_Aint old_counter = counters[i]; + counters[i]++; + if (counters[i] % p_parg[i] == 0) { + counters[i] += p_parg[i] * (p_sizes[i] - 1); + } + if (counters[i] < p_gsizes[i]) { + off2 += stride * (counters[i] - old_counter); + break; + } + counters[i] = p_starts[i]; + off2 += stride * (counters[i] - old_counter); + } else { + counters[i]++; + off2 += stride; + if (counters[i] < p_parg[i]) { + break; + } + off2 -= stride * p_parg[i]; + counters[i] = 0; + } + stride *= p_gsizes[i]; + if (order == MPI_ORDER_C) { + i--; + if (i < 0) { + alldone = 1; + break; + } + } else { + i++; + if (i == n) { + alldone = 1; + break; + } + } + } + if (alldone) { + break; + } + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + off2 - off0; + idx2++; + } + } + MPL_free(counters); + + MPL_free(p_prank); + MPL_free(p_isblk); + MPL_free(p_parg); + MPL_free(p_starts); + } else { + MPIR_Assert(0); + } + + } + } + +} + +void typesig_set(struct typesig *sig, MPI_Datatype dt, MPI_Aint * p_idx, MPI_Aint count) +{ + MPI_Aint idx = *p_idx; + int *p_ints; + MPI_Aint *p_aints; + MPI_Aint *p_counts; + MPI_Datatype *p_types; + + if (count <= 0) { + return; + } + + if (HANDLE_IS_BUILTIN(dt)) { + typesig_check_space(sig, idx + 1); + if (dt == MPI_2INT) { + sig->types[idx] = MPI_INT; + sig->counts[idx] = 2 * count; + } else { + sig->types[idx] = dt; + sig->counts[idx] = count; + } + *p_idx = idx + 1; + return; + } else if (MPIR_DATATYPE_IS_PREDEFINED(dt)) { + typesig_check_space(sig, idx + 2 * count); + MPI_Datatype dt_a; + if (dt == MPI_FLOAT_INT) { + dt_a = MPI_FLOAT; + } + if (dt == MPI_DOUBLE_INT) { + dt_a = MPI_DOUBLE; + } + if (dt == MPI_LONG_INT) { + dt_a = MPI_LONG; + } + if (dt == MPI_SHORT_INT) { + dt_a = MPI_SHORT; + } + for (int i = 0; i < count; i++) { + sig->types[idx] = dt_a; + sig->types[idx + 1] = MPI_INT; + sig->counts[idx] = 1; + sig->counts[idx + 1] = 1; + idx += 2; + } + *p_idx = idx; + } else { + MPIR_Datatype *dt_ptr; + MPIR_Datatype_get_ptr(dt, dt_ptr); + MPIR_Assert(dt_ptr != NULL); + if (dt_ptr->basic_type != MPI_DATATYPE_NULL) { + if (HANDLE_IS_BUILTIN(dt_ptr->basic_type)) { + typesig_check_space(sig, idx + 1); + sig->types[idx] = dt_ptr->basic_type; + sig->counts[idx] = dt_ptr->n_builtin_elements * count; + *p_idx = idx + 1; + return; + } else { + typesig_set(sig, dt_ptr->basic_type, p_idx, count * dt_ptr->n_builtin_elements / 2); + return; + } + } else { + MPIR_Datatype_contents *cp = dt_ptr->contents; + MPIR_Datatype_access_contents(cp, &p_ints, &p_aints, &p_counts, &p_types); + if (cp->nr_counts == 0) { + if (cp->combiner == MPI_COMBINER_DUP) { + typesig_set(sig, p_types[0], p_idx, count); + } else if (cp->combiner == MPI_COMBINER_RESIZED) { + typesig_set(sig, p_types[0], p_idx, count); + } else if (cp->combiner == MPI_COMBINER_CONTIGUOUS) { + typesig_set(sig, p_types[0], p_idx, count * p_ints[0]); + } else if (cp->combiner == MPI_COMBINER_VECTOR) { + typesig_set(sig, p_types[0], p_idx, count * p_ints[0] * p_ints[1]); + } else if (cp->combiner == MPI_COMBINER_HVECTOR) { + typesig_set(sig, p_types[0], p_idx, count * p_ints[0] * p_ints[1]); + } else if (cp->combiner == MPI_COMBINER_INDEXED_BLOCK) { + MPI_Aint blkl_sum = 0; + for (int i = 0; i < p_ints[0]; i++) { + blkl_sum += p_ints[1]; + } + typesig_set(sig, p_types[0], p_idx, count * blkl_sum); + } else if (cp->combiner == MPI_COMBINER_HINDEXED_BLOCK) { + MPI_Aint blkl_sum = 0; + for (int i = 0; i < p_ints[0]; i++) { + blkl_sum += p_ints[1]; + } + typesig_set(sig, p_types[0], p_idx, count * blkl_sum); + } else if (cp->combiner == MPI_COMBINER_INDEXED) { + int *p_blkl = p_ints + 1; + int *p_disp = p_ints + 1 + p_ints[0]; + MPI_Aint blkl_sum = 0; + for (int i = 0; i < p_ints[0]; i++) { + blkl_sum += p_blkl[i]; + } + typesig_set(sig, p_types[0], p_idx, count * blkl_sum); + } else if (cp->combiner == MPI_COMBINER_HINDEXED) { + int *p_blkl = p_ints + 1; + MPI_Aint *p_disp = p_aints; + MPI_Aint blkl_sum = 0; + for (int i = 0; i < p_ints[0]; i++) { + blkl_sum += p_blkl[i]; + } + typesig_set(sig, p_types[0], p_idx, count * blkl_sum); + } else if (cp->combiner == MPI_COMBINER_STRUCT) { + int *p_blkl = p_ints + 1; + MPI_Aint *p_disp = p_aints; + MPI_Aint i; + MPI_Aint j; + MPI_Aint idx_save = *p_idx; + MPI_Aint idx_last = *p_idx; + for (int i = 0; i < p_ints[0]; i++) { + typesig_set(sig, p_types[i], p_idx, p_blkl[i]); + if (idx_last > 0 && sig->types[idx_last - 1] == sig->types[idx_last]) { + sig->counts[idx_last - 1] += sig->counts[idx_last]; + for (int j = idx_last; j < (*p_idx - 1); j++) { + sig->types[j] = sig->types[j + 1]; + sig->counts[j] = sig->counts[j + 1]; + } + (*p_idx)--; + } + idx_last = *p_idx; + } + if (count > 1) { + MPI_Aint num = *p_idx - idx_save; + typesig_check_space(sig, idx_save + count * num); + idx = *p_idx; + for (int i = 1; i < count; i++) { + for (int j = 0; j < num; j++) { + sig->types[idx] = sig->types[idx_save + j]; + sig->counts[idx] = sig->counts[idx_save + j]; + idx++; + } + } + *p_idx = idx; + } + } else if (cp->combiner == MPI_COMBINER_SUBARRAY) { + int order = p_ints[1 + p_ints[0] * 3]; + int *p_sizes = p_ints + 1; + int *p_subsizes = p_ints + 1 + p_ints[0]; + int *p_starts = p_ints + 1 + p_ints[0] * 2; + MPI_Aint num = 1; + for (int i = 0; i < p_ints[0]; i++) { + num *= p_subsizes[i]; + } + typesig_set(sig, p_types[0], p_idx, count * num); + } else if (cp->combiner == MPI_COMBINER_DARRAY) { + int size = p_ints[0]; + int rank = p_ints[1]; + int n = p_ints[2]; + int *p_gsizes = p_ints + 3; + int *p_distribs = p_ints + 3 + n; + int *p_dargs = p_ints + 3 + n * 2; + int *p_sizes = p_ints + 3 + n * 3; + int order = p_ints[3 + n * 4]; + + int *p_prank; + p_prank = MPL_malloc(n * sizeof(int), MPL_MEM_OTHER); + int *p_isblk; + p_isblk = MPL_malloc(n * sizeof(int), MPL_MEM_OTHER); + MPI_Aint *p_parg; + p_parg = MPL_malloc(n * sizeof(MPI_Aint), MPL_MEM_OTHER); + MPI_Aint *p_starts; + p_starts = MPL_malloc(n * sizeof(MPI_Aint), MPL_MEM_OTHER); + for (int i = 0; i < n; i++) { + size /= p_sizes[i]; + p_prank[i] = rank / size; + rank = rank % size; + if (p_sizes[i] == 1) { + p_isblk[i] = 1; + p_parg[i] = p_gsizes[i]; + p_starts[i] = 0; + } else if (p_distribs[i] == MPI_DISTRIBUTE_BLOCK && + p_dargs[i] == MPI_DISTRIBUTE_DFLT_DARG) { + p_parg[i] = p_gsizes[i] / p_sizes[i]; + if (p_gsizes[i] % p_sizes[i] == 0) { + p_isblk[i] = 1; + p_starts[i] = p_parg[i] * p_prank[i]; + } else { + int r = p_gsizes[i] % p_sizes[i]; + p_isblk[i] = 2; + if (p_prank[i] < r) { + p_parg[i] += 1; + p_starts[i] = p_parg[i] * p_prank[i]; + } else { + p_starts[i] = p_parg[i] * p_prank[i] + r; + } + } + } else if (p_distribs[i] == MPI_DISTRIBUTE_CYCLIC && + p_dargs[i] == MPI_DISTRIBUTE_DFLT_DARG) { + p_isblk[i] = 0; + p_parg[i] = 1; + p_starts[i] = p_prank[i]; + } else { + p_isblk[i] = 0; + p_parg[i] = p_dargs[i]; + p_starts[i] = p_parg[i] * p_prank[i]; + } + } + MPI_Aint num = 1; + for (int i = 0; i < n; i++) { + if (p_isblk[i] == 0) { + MPI_Aint num_this; + MPI_Aint n_blks; + MPI_Aint n_groups; + n_blks = p_gsizes[i] / p_parg[i]; + n_groups = n_blks / p_sizes[i]; + num_this = n_groups * p_parg[i]; + if (p_prank[i] < n_blks % p_sizes[i]) { + num_this += p_prank[i]; + } + if (p_prank[i] == n_blks % p_sizes[i]) { + num_this += p_gsizes[i] % p_parg[i]; + } + num *= num_this; + } else if (p_isblk[i] == 1) { + num *= p_gsizes[i] / p_sizes[i]; + } else { + if (p_prank[i] < p_gsizes[i] % p_sizes[i]) { + num *= p_gsizes[i] / p_sizes[i] + 1; + } else { + num *= p_gsizes[i] / p_sizes[i]; + } + } + } + typesig_set(sig, p_types[0], p_idx, count * num); + MPL_free(p_prank); + MPL_free(p_isblk); + MPL_free(p_parg); + MPL_free(p_starts); + } else { + MPIR_Assert(0); + } + + } else { + if (cp->combiner == MPI_COMBINER_DUP) { + typesig_set(sig, p_types[0], p_idx, count); + } else if (cp->combiner == MPI_COMBINER_RESIZED) { + typesig_set(sig, p_types[0], p_idx, count); + } else if (cp->combiner == MPI_COMBINER_CONTIGUOUS) { + typesig_set(sig, p_types[0], p_idx, count * p_counts[0]); + } else if (cp->combiner == MPI_COMBINER_VECTOR) { + typesig_set(sig, p_types[0], p_idx, count * p_counts[0] * p_counts[1]); + } else if (cp->combiner == MPI_COMBINER_HVECTOR) { + typesig_set(sig, p_types[0], p_idx, count * p_counts[0] * p_counts[1]); + } else if (cp->combiner == MPI_COMBINER_INDEXED_BLOCK) { + MPI_Aint blkl_sum = 0; + for (int i = 0; i < p_counts[0]; i++) { + blkl_sum += p_counts[1]; + } + typesig_set(sig, p_types[0], p_idx, count * blkl_sum); + } else if (cp->combiner == MPI_COMBINER_HINDEXED_BLOCK) { + MPI_Aint blkl_sum = 0; + for (int i = 0; i < p_counts[0]; i++) { + blkl_sum += p_counts[1]; + } + typesig_set(sig, p_types[0], p_idx, count * blkl_sum); + } else if (cp->combiner == MPI_COMBINER_INDEXED) { + MPI_Aint *p_blkl = p_counts + 1; + MPI_Aint *p_disp = p_counts + 1 + p_counts[0]; + MPI_Aint blkl_sum = 0; + for (int i = 0; i < p_counts[0]; i++) { + blkl_sum += p_blkl[i]; + } + typesig_set(sig, p_types[0], p_idx, count * blkl_sum); + } else if (cp->combiner == MPI_COMBINER_HINDEXED) { + MPI_Aint *p_blkl = p_counts + 1; + MPI_Aint *p_disp = p_counts + 1 + p_counts[0]; + MPI_Aint blkl_sum = 0; + for (int i = 0; i < p_counts[0]; i++) { + blkl_sum += p_blkl[i]; + } + typesig_set(sig, p_types[0], p_idx, count * blkl_sum); + } else if (cp->combiner == MPI_COMBINER_STRUCT) { + MPI_Aint *p_blkl = p_counts + 1; + MPI_Aint *p_disp = p_counts + 1 + p_counts[0]; + MPI_Aint i; + MPI_Aint j; + MPI_Aint idx_save = *p_idx; + MPI_Aint idx_last = *p_idx; + for (int i = 0; i < p_counts[0]; i++) { + typesig_set(sig, p_types[i], p_idx, p_blkl[i]); + if (idx_last > 0 && sig->types[idx_last - 1] == sig->types[idx_last]) { + sig->counts[idx_last - 1] += sig->counts[idx_last]; + for (int j = idx_last; j < (*p_idx - 1); j++) { + sig->types[j] = sig->types[j + 1]; + sig->counts[j] = sig->counts[j + 1]; + } + (*p_idx)--; + } + idx_last = *p_idx; + } + if (count > 1) { + MPI_Aint num = *p_idx - idx_save; + typesig_check_space(sig, idx_save + count * num); + idx = *p_idx; + for (int i = 1; i < count; i++) { + for (int j = 0; j < num; j++) { + sig->types[idx] = sig->types[idx_save + j]; + sig->counts[idx] = sig->counts[idx_save + j]; + idx++; + } + } + *p_idx = idx; + } + } else if (cp->combiner == MPI_COMBINER_SUBARRAY) { + int order = p_ints[2]; + MPI_Aint *p_sizes = p_counts; + MPI_Aint *p_subsizes = p_counts + p_ints[0]; + MPI_Aint *p_starts = p_counts + p_ints[0] * 2; + MPI_Aint num = 1; + for (int i = 0; i < p_ints[0]; i++) { + num *= p_subsizes[i]; + } + typesig_set(sig, p_types[0], p_idx, count * num); + } else if (cp->combiner == MPI_COMBINER_DARRAY) { + int size = p_ints[0]; + int rank = p_ints[1]; + int n = p_ints[2]; + MPI_Aint *p_gsizes = p_counts; + int *p_distribs = p_ints + 3; + int *p_dargs = p_ints + 3 + n; + int *p_sizes = p_ints + 3 + n * 2; + int order = p_ints[3 + n * 3]; + + int *p_prank; + p_prank = MPL_malloc(n * sizeof(int), MPL_MEM_OTHER); + int *p_isblk; + p_isblk = MPL_malloc(n * sizeof(int), MPL_MEM_OTHER); + MPI_Aint *p_parg; + p_parg = MPL_malloc(n * sizeof(MPI_Aint), MPL_MEM_OTHER); + MPI_Aint *p_starts; + p_starts = MPL_malloc(n * sizeof(MPI_Aint), MPL_MEM_OTHER); + for (int i = 0; i < n; i++) { + size /= p_sizes[i]; + p_prank[i] = rank / size; + rank = rank % size; + if (p_sizes[i] == 1) { + p_isblk[i] = 1; + p_parg[i] = p_gsizes[i]; + p_starts[i] = 0; + } else if (p_distribs[i] == MPI_DISTRIBUTE_BLOCK && + p_dargs[i] == MPI_DISTRIBUTE_DFLT_DARG) { + p_parg[i] = p_gsizes[i] / p_sizes[i]; + if (p_gsizes[i] % p_sizes[i] == 0) { + p_isblk[i] = 1; + p_starts[i] = p_parg[i] * p_prank[i]; + } else { + int r = p_gsizes[i] % p_sizes[i]; + p_isblk[i] = 2; + if (p_prank[i] < r) { + p_parg[i] += 1; + p_starts[i] = p_parg[i] * p_prank[i]; + } else { + p_starts[i] = p_parg[i] * p_prank[i] + r; + } + } + } else if (p_distribs[i] == MPI_DISTRIBUTE_CYCLIC && + p_dargs[i] == MPI_DISTRIBUTE_DFLT_DARG) { + p_isblk[i] = 0; + p_parg[i] = 1; + p_starts[i] = p_prank[i]; + } else { + p_isblk[i] = 0; + p_parg[i] = p_dargs[i]; + p_starts[i] = p_parg[i] * p_prank[i]; + } + } + MPI_Aint num = 1; + for (int i = 0; i < n; i++) { + if (p_isblk[i] == 0) { + MPI_Aint num_this; + MPI_Aint n_blks; + MPI_Aint n_groups; + n_blks = p_gsizes[i] / p_parg[i]; + n_groups = n_blks / p_sizes[i]; + num_this = n_groups * p_parg[i]; + if (p_prank[i] < n_blks % p_sizes[i]) { + num_this += p_prank[i]; + } + if (p_prank[i] == n_blks % p_sizes[i]) { + num_this += p_gsizes[i] % p_parg[i]; + } + num *= num_this; + } else if (p_isblk[i] == 1) { + num *= p_gsizes[i] / p_sizes[i]; + } else { + if (p_prank[i] < p_gsizes[i] % p_sizes[i]) { + num *= p_gsizes[i] / p_sizes[i] + 1; + } else { + num *= p_gsizes[i] / p_sizes[i]; + } + } + } + typesig_set(sig, p_types[0], p_idx, count * num); + MPL_free(p_prank); + MPL_free(p_isblk); + MPL_free(p_parg); + MPL_free(p_starts); + } else { + MPIR_Assert(0); + } + + } + } + } + +} + +void type_get_n_elem_extent(MPI_Datatype dt, MPI_Aint * p_n_elem, MPI_Aint * p_extent) +{ + if (HANDLE_IS_BUILTIN(dt)) { + if (dt == MPI_2INT) { + *p_n_elem = 2; + *p_extent = MPIR_Datatype_get_basic_size(MPI_INT); + } else { + *p_n_elem = 1; + *p_extent = MPIR_Datatype_get_basic_size(dt); + } + return; + } + MPIR_Datatype *dt_ptr; + MPIR_Datatype_get_ptr(dt, dt_ptr); + MPIR_Assert(dt_ptr != NULL); + *p_n_elem = dt_ptr->n_builtin_elements; + *p_extent = dt_ptr->extent; +} + +void typesig_check_space(struct typesig *sig, MPI_Aint n) +{ + if (sig->n < n) { + sig->n = n * 2; + sig->types = MPL_realloc(sig->types, sig->n * sizeof(MPI_Datatype), MPL_MEM_OTHER); + sig->counts = MPL_realloc(sig->counts, sig->n * sizeof(MPI_Aint), MPL_MEM_OTHER); + } +} From 20df436cc2e6cfde2f4ce3063db7c74d99cf1312 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Sun, 18 Apr 2021 09:42:10 -0500 Subject: [PATCH 4/6] binding/c: add mpix_api.txt Add MPIX_Type_dump_typemap and MPIX_Type_dump_typesig. These functions allows datatype debugging and testing. --- src/binding/c/mpix_api.txt | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 src/binding/c/mpix_api.txt diff --git a/src/binding/c/mpix_api.txt b/src/binding/c/mpix_api.txt new file mode 100644 index 00000000000..5f8a6801194 --- /dev/null +++ b/src/binding/c/mpix_api.txt @@ -0,0 +1,15 @@ +MPIX_Type_dump_typemap: + datatype: DATATYPE + .desc: Dump the type map of the given datatype + .impl: direct +{ + return MPIR_type_dump_typemap(datatype); +} + +MPIX_Type_dump_typesig: + datatype: DATATYPE + .desc: Dump the type signature of the given datatype + .impl: direct +{ + return MPIR_type_dump_typesig(datatype); +} From dac284f6552b927ade86e6af227448ff8cbd7389 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Wed, 7 Apr 2021 15:18:49 -0500 Subject: [PATCH 5/6] Temporary - add MyDef source for typemap.c The typemap routines are very complex to maintain. The MyDef sources allow higher level code organizations. --- typemap.def | 124 ++++++++++++++++++++++++ typemap_cases.def | 170 +++++++++++++++++++++++++++++++++ typemap_set.def | 234 ++++++++++++++++++++++++++++++++++++++++++++++ typesig_set.def | 133 ++++++++++++++++++++++++++ 4 files changed, 661 insertions(+) create mode 100644 typemap.def create mode 100644 typemap_cases.def create mode 100644 typemap_set.def create mode 100644 typesig_set.def diff --git a/typemap.def b/typemap.def new file mode 100644 index 00000000000..c271da225d7 --- /dev/null +++ b/typemap.def @@ -0,0 +1,124 @@ +include: typemap_cases.def +include: typemap_set.def +include: typesig_set.def + +page: typemap, - + output_dir: src/mpi/datatype + module: c + + $list MPIR_type_dump_typemap, MPIR_type_dump_typesig + $list MPIR_type_get_typemap, MPIR_type_get_typesig + $list MPIR_type_free_typemap, MPIR_type_free_typesig + +subcode: no-autoload + $struct typemap + MPI_Aint n + MPI_Datatype *types + MPI_Aint *disps + + $struct typesig + MPI_Aint n + MPI_Datatype *types + MPI_Aint *counts + +subcode: _autoload + $register_name(dt) MPI_Datatype + $register_name(dt_ptr) MPIR_Datatype * + $register_name(map) struct typemap * + $register_name(sig) struct typesig * + $register_fmt(MPI_Aint) %ld + +subcode: get_dt_ptr + $my MPIR_Datatype *dt_ptr + MPIR_Datatype_get_ptr(dt, dt_ptr) + MPIR_Assert(dt_ptr != NULL); + +fncode: MPIR_type_dump_typesig(dt): int + $my sig + sig = MPIR_type_get_typesig(dt) + $for i=0:sig->n + $if i>0 + $print ",-" + $print "%s:%ld-", MPIR_Datatype_builtin_to_string(sig->types[i]), (long) sig->counts[i] + $print + + MPIR_type_free_typesig(sig) + return MPI_SUCCESS + +fncode: MPIR_type_dump_typemap(dt): int + $my MPI_Aint lb, MPI_Aint extent + MPIR_Type_get_extent_impl(dt, &lb, &extent); + printf(" %20s: %10ld\n", "lb", (long) lb) + printf(" %20s: %10ld\n", "ub", (long) (lb + extent)) + + $my map + map = MPIR_type_get_typemap(dt) + $for i=0:map->n + $print " %20s: %10ld\n", MPIR_Datatype_builtin_to_string(map->types[i]), (long) map->disps[i] + + MPIR_type_free_typemap(map) + return MPI_SUCCESS + +fncode: MPIR_type_get_typesig(dt) + $allocate sig + sig->n = 1 + $map allocate(1) sig->types, sig->counts + + $my MPI_Aint idx = 0 + typesig_set(sig, dt, &idx, 1) + sig->n = idx + return sig + +fncode: MPIR_type_get_typemap(dt) + $allocate map + + $if HANDLE_IS_BUILTIN(dt) + $if dt == MPI_2INT + map->n = 2 + $else + map->n = 1 + $else + $call get_dt_ptr + map->n = dt_ptr->n_builtin_elements + + $call allocate_space + $my n_elem, extent: MPI_Aint + typemap_set(map, dt, 0, 0) + return map + + subcode: allocate_space + MPIR_Assert(map->n > 0); + $map allocate(map->n), map->types, map->disps + +#---------------------------------------- +fncode: MPIR_type_free_typemap(map) + MPL_free(map->types); + MPL_free(map->disps); + MPL_free(map); + +fncode: MPIR_type_free_typesig(sig) + MPL_free(sig->types); + MPL_free(sig->counts); + MPL_free(sig); + +#---------------------------------------- +subcode: _autoload + $(block:frame_init) + $: /* + $: * Copyright (C) by Argonne National Laboratory + $: * See COPYRIGHT in top-level directory + $: */ + NEWLINE + $include "mpiimpl.h" + $include "datatype.h" + +subcode: allocate(n, var) + $(if:var~.*types) + $(set:T=MPI_Datatype) + $(elif:var~.*(rank|isblk)) + $(set:T=int) + $(else) + $(set:T=MPI_Aint) + $(if:var!~.*->) + $my $(T) *$(var) + $(var) = MPL_malloc($(n) * sizeof($(T)), MPL_MEM_OTHER) diff --git a/typemap_cases.def b/typemap_cases.def new file mode 100644 index 00000000000..e7fbfa6b906 --- /dev/null +++ b/typemap_cases.def @@ -0,0 +1,170 @@ +subcode: switch_combiner + $call case, DUP, dup + $call case, RESIZED, resized + $call case, CONTIGUOUS, contig + $call case, VECTOR, vector + $call case, HVECTOR, vector + $call case, INDEXED_BLOCK, idxblk + $call case, HINDEXED_BLOCK, idxblk + $call case, INDEXED, indexed + $call case, HINDEXED, indexed + $call case, STRUCT, struct + $call case, SUBARRAY, subarray + $call case, DARRAY, darray + $else + MPIR_Assert(0) + + subcode: case(NAME, type) + $case cp->combiner == MPI_COMBINER_$(NAME) + $(if:NAME~H) + $(set:h=h) + $(set:ext=) + $(else) + $(set:h=-) + $(set:ext= * extent) + $call set_$(type) + + #---------------------------------------- + subcode: set_dup + $call _set_dup + + subcode: set_resized + $(if:c=-) + $(set:lb=p_aints[0]) + $(set:extent=p_aints[1]) + $(else) + $(set:lb=p_counts[0]) + $(set:extent=p_counts[1]) + $call _set_resized + + subcode: set_contig + $(if:c=-) + $(set:n=p_ints[0]) + $(else) + $(set:n=p_counts[0]) + $call _set_contig + + # --------------------------------------- + subcode: set_vector + $(if:c=-) + $(set:n=p_ints[0]) + $(set:blkl=p_ints[1]) + $(if:h=-) + $(set:disp=p_ints[2] * $1) + $(else) + $(set:disp=p_aints[0]) + $(else) + $(set:n=p_counts[0]) + $(set:blkl=p_counts[1]) + $(set:disp=p_counts[2] * $1) + $call _set_vector + + subcode: set_idxblk + $(if:c=-) + $(set:n=p_ints[0]) + $(set:blkl=p_ints[1]) + $(if:h=-) + $(set:disp=p_ints[2+$1]) + $(else) + $(set:disp=p_aints[$1]) + $(else) + $(set:n=p_counts[0]) + $(set:blkl=p_counts[1]) + $(set:disp=p_counts[2+$1]) + $call _set_indexed + + subcode: set_indexed + $(if:c=-) + $(set:n=p_ints[0]) + $my int *p_blkl = p_ints + 1 + $(if:h=-) + $my int *p_disp = p_ints + 1 + $(n) + $(else) + $my MPI_Aint *p_disp = p_aints + $(else) + $(set:n=p_counts[0]) + $my MPI_Aint *p_blkl = p_counts + 1 + $my MPI_Aint *p_disp = p_counts + 1 + $(n) + $(set:blkl=p_blkl[$1]) + $(set:disp=p_disp[$1]) + $call _set_indexed + + subcode: set_struct + $(if:c=-) + $(set:n=p_ints[0]) + $my int *p_blkl = p_ints + 1 + $my MPI_Aint *p_disp = p_aints + $(else) + $(set:n=p_counts[0]) + $my MPI_Aint *p_blkl = p_counts + 1 + $my MPI_Aint *p_disp = p_counts + 1 + $(n) + $(set:blkl=p_blkl[$1]) + $(set:disp=p_disp[$1]) + $call _set_struct + + subcode: set_subarray + $(set:n=p_ints[0]) + $(if:c=-) + $my int order = p_ints[1 + $(n) * 3] + $my int *p_sizes = p_ints + 1 + $my int *p_subsizes = p_ints + 1 + $(n) + $my int *p_starts = p_ints + 1 + $(n) * 2 + $(else) + $my int order = p_ints[2] + $my MPI_Aint *p_sizes = p_counts + $my MPI_Aint *p_subsizes = p_counts + $(n) + $my MPI_Aint *p_starts = p_counts + $(n) * 2 + $call _set_subarray + + subcode: set_darray + $my int size = p_ints[0] + $my int rank = p_ints[1] + $my int n = p_ints[2] + $(set:n=n) + $(if:c=-) + $my int *p_gsizes = p_ints + 3 + $my int *p_distribs = p_ints + 3 + $(n) + $my int *p_dargs = p_ints + 3 + $(n) * 2 + $my int *p_sizes = p_ints + 3 + $(n) * 3 + $my int order = p_ints[3 + $(n) * 4] + $(else) + $my MPI_Aint *p_gsizes = p_counts + $my int *p_distribs = p_ints + 3 + $my int *p_dargs = p_ints + 3 + $(n) + $my int *p_sizes = p_ints + 3 + $(n) * 2 + $my int order = p_ints[3 + $(n) * 3] + + $map allocate($(n)) p_prank, p_isblk, p_parg, p_starts + $for i=0:$(n) + size /= p_sizes[i] + p_prank[i] = rank / size + rank = rank % size + $if p_sizes[i] == 1 + p_isblk[i] = 1 + p_parg[i] = p_gsizes[i] + p_starts[i] = 0 + $elif p_distribs[i] == MPI_DISTRIBUTE_BLOCK && p_dargs[i] == MPI_DISTRIBUTE_DFLT_DARG + p_parg[i] = p_gsizes[i] / p_sizes[i] + $if p_gsizes[i] % p_sizes[i] == 0 + p_isblk[i] = 1 + p_starts[i] = p_parg[i] * p_prank[i] + $else + $my int r = p_gsizes[i] % p_sizes[i] + p_isblk[i] = 2 + $if p_prank[i] < r + p_parg[i] += 1 + p_starts[i] = p_parg[i] * p_prank[i] + $else + p_starts[i] = p_parg[i] * p_prank[i] + r + $elif p_distribs[i] == MPI_DISTRIBUTE_CYCLIC && p_dargs[i] == MPI_DISTRIBUTE_DFLT_DARG + p_isblk[i] = 0 + p_parg[i] = 1 + p_starts[i] = p_prank[i] + $else + p_isblk[i] = 0 + p_parg[i] = p_dargs[i] + p_starts[i] = p_parg[i] * p_prank[i] + $call _set_darray + $(for:p_prank, p_isblk, p_parg, p_starts) + MPL_free($1) + diff --git a/typemap_set.def b/typemap_set.def new file mode 100644 index 00000000000..ffaefc1a7ff --- /dev/null +++ b/typemap_set.def @@ -0,0 +1,234 @@ +fncode: type_get_n_elem_extent(dt, MPI_Aint *p_n_elem, MPI_Aint *p_extent) + $if HANDLE_IS_BUILTIN(dt) + $if dt == MPI_2INT + *p_n_elem = 2 + *p_extent = MPIR_Datatype_get_basic_size(MPI_INT) + $else + *p_n_elem = 1 + *p_extent = MPIR_Datatype_get_basic_size(dt) + return + $call get_dt_ptr + *p_n_elem = dt_ptr->n_builtin_elements + *p_extent = dt_ptr->extent + +fncode: typemap_set(map, dt, MPI_Aint idx, MPI_Aint offset) + $if HANDLE_IS_BUILTIN(dt) + $if dt == MPI_2INT + map->types[idx] = MPI_INT + map->disps[idx] = offset + map->types[idx+1] = MPI_INT + map->disps[idx+1] = offset + MPIR_Datatype_get_basic_size(MPI_INT) + $else + map->types[idx] = dt + map->disps[idx] = offset + return + $elif MPIR_DATATYPE_IS_PREDEFINED(dt) + $call get_dt_ptr + $my MPI_Aint disp = dt_ptr->true_ub - MPIR_Datatype_get_basic_size(MPI_INT) + $(for:FLOAT,DOUBLE,LONG,SHORT) + $if dt == MPI_$1_INT + map->types[idx] = MPI_$1 + map->disps[idx] = offset + map->types[idx+1] = MPI_INT + map->disps[idx+1] = offset + disp + return + $else + $my MPI_Aint n_elem, MPI_Aint extent + + $call get_dt_ptr + $my MPIR_Datatype_contents *cp = dt_ptr->contents + $local int *p_ints, MPI_Aint *p_aints, MPI_Aint *p_counts, MPI_Datatype *p_types + MPIR_Datatype_access_contents(cp, &p_ints, &p_aints, &p_counts, &p_types) + $if cp->nr_counts == 0 + $(set:c=-) + $call switch_combiner + $else + $(set:c=c) + $call switch_combiner + + # -------------------------------- + subcode: _set_dup + typemap_set(map, p_types[0], idx, offset) + + subcode: _set_resized + typemap_set(map, p_types[0], idx, offset + $(lb)) + + subcode: _set_contig + type_get_n_elem_extent(p_types[0], &n_elem, &extent) + typemap_set(map, p_types[0], idx, offset) + $local i, j: MPI_Aint + $my MPI_Aint idx2 = idx + n_elem + $for i=1:$(n) + $for j=0:n_elem + map->types[idx2] = map->types[idx + j] + map->disps[idx2] = map->disps[idx + j] + extent * i + idx2++ + + # common for vector, indexed_block, indexed + subcode: _set_vector + type_get_n_elem_extent(p_types[0], &n_elem, &extent) + typemap_set(map, p_types[0], idx, offset) + + $my k, i, j: MPI_Aint + $my MPI_Aint off2 + $my MPI_Aint idx2 = idx + n_elem + $for k=0:$(n) + off2 = $(disp:k)$(ext) + $call _copy_inner_blk, off2 + extent * i + + subcode: _set_indexed + type_get_n_elem_extent(p_types[0], &n_elem, &extent) + typemap_set(map, p_types[0], idx, offset + $(disp:0)) + + $my k, i, j: MPI_Aint + $my MPI_Aint off2 + $my MPI_Aint idx2 = idx + n_elem + $for k=0:$(n) + off2 = ($(disp:k) - $(disp:0))$(ext) + $call _copy_inner_blk, off2 + extent * i + + subcode: _set_struct + $my k, i, j: MPI_Aint + $my MPI_Aint off2 + $my MPI_Aint idx2 = idx + $for k=0:$(n) + type_get_n_elem_extent(p_types[k], &n_elem, &extent) + typemap_set(map, p_types[k], idx2, offset + $(disp:k)) + idx2 += n_elem + $for i=1:$(blkl:k) + $call _copy_elements, extent * i + + # --------------------- + subcode: _copy_inner_blk(off) + $for i=0:$(blkl:k) + $if k || i + $call _copy_elements, $(off) + + subcode: _copy_elements(off) + $for j=0:n_elem + map->types[idx2] = map->types[idx + j] + map->disps[idx2] = map->disps[idx + j] + $(off) + idx2++ + + # --------------------- + subcode: _set_subarray + $(set:sizei=p_sizes[i]) + $(set:starti=p_starts[i]) + &call set_ndim, subarray + $call _copy_elements, off2 - off0 + + subcode: init_counters_subarray + NOOP + subcode: inc_counter_i_subarray + counters[i]++ + off2 += stride + $if counters[i] < p_subsizes[i] + break + off2 -= stride * p_subsizes[i] + counters[i] = 0 + + # --------------------- + subcode: _set_darray + $(set:sizei=p_gsizes[i]) + $(set:starti=p_starts[i]) + + &call set_ndim, darray + $call _copy_elements, off2 - off0 + + subcode: init_counters_darray + $if p_isblk[0] == 0 + $call init_counters_cyclic + subcode: inc_counter_i_darray + $if p_isblk[i] == 0 + $call inc_counter_cyclic + $else + $call inc_counter_block + + # -- cyclic - counters use actual offset + subcode: init_counters_cyclic + counters[0] = p_prank[0] * p_parg[0] + subcode: inc_counter_cyclic + $my MPI_Aint old_counter = counters[i] + counters[i]++ + $if counters[i] % p_parg[i] == 0 + counters[i] += p_parg[i] * (p_sizes[i] - 1) + $if counters[i] < p_gsizes[i] + off2 += stride * (counters[i] - old_counter) + break + # reset for next dimension + counters[i] = p_starts[i] + off2 += stride * (counters[i] - old_counter) + + # -- block + subcode: inc_counter_block + counters[i]++ + off2 += stride + $if counters[i] < p_parg[i] + break + # reset for next dimension + off2 -= stride * p_parg[i] + counters[i] = 0 + +#---------------------------------------- +subcode: set_ndim(type) + $call calc_off0 + type_get_n_elem_extent(p_types[0], &n_elem, &extent) + typemap_set(map, p_types[0], idx, offset + off0) + + $my MPI_Aint off2 = off0 + $my MPI_Aint idx2 = idx + n_elem + # -- iter_ndim -- + $my MPI_Aint *counters + counters = MPL_calloc($(n), sizeof(MPI_Aint), MPL_MEM_OTHER) + $call @init_counters_$(type) + $while 1 + $call inc_counters + BLOCK + MPL_free(counters) + + subcode: calc_off0 + # high to low dimension + $my MPI_Aint off0 = 0 + $if order == MPI_ORDER_C + $for i=0:$(n) + $call update_off0 + $else + $for i=$(n)-1 downto 0 + $call update_off0 + + subcode: update_off0 + off0 = (off0 * $(sizei)) + $(starti) + + subcode: inc_counters + # low to high dimension + $my alldone = 0 + $my MPI_Aint stride = 1 + $call @init_i + $while 1 + $call inc_counter_i_$(type) + # -- next dimension + stride *= $(sizei) + $call @next_i + $if alldone + break + + subcode: init_i + $my int i + $if order == MPI_ORDER_C + i = $(n) - 1 + $else + i = 0 + + subcode: next_i + $if order == MPI_ORDER_C + i-- + $call alldone, i < 0 + $else + i++ + $call alldone, i == $(n) + subcode: alldone(cond) + $if $(cond) + alldone = 1 + break + + diff --git a/typesig_set.def b/typesig_set.def new file mode 100644 index 00000000000..0de8450aa51 --- /dev/null +++ b/typesig_set.def @@ -0,0 +1,133 @@ +fncode: typesig_set(sig, dt, MPI_Aint *p_idx, MPI_Aint count) + $if count <= 0 + return + + $local MPI_Aint idx = *p_idx + + $if HANDLE_IS_BUILTIN(dt) + typesig_check_space(sig, idx + 1) + $if dt == MPI_2INT + sig->types[idx] = MPI_INT + sig->counts[idx] = 2 * count + $else + sig->types[idx] = dt + sig->counts[idx] = count + *p_idx = idx + 1 + return + $elif MPIR_DATATYPE_IS_PREDEFINED(dt) + typesig_check_space(sig, idx + 2 * count) + $my MPI_Datatype dt_a + $(for:FLOAT,DOUBLE,LONG,SHORT) + $if dt == MPI_$1_INT + dt_a = MPI_$1 + $for i=0:count + sig->types[idx] = dt_a + sig->types[idx + 1] = MPI_INT + sig->counts[idx] = 1 + sig->counts[idx + 1] = 1 + idx+=2 + *p_idx = idx + $else + $call get_dt_ptr + $if dt_ptr->basic_type != MPI_DATATYPE_NULL + $if HANDLE_IS_BUILTIN(dt_ptr->basic_type) + typesig_check_space(sig, idx + 1) + sig->types[idx] = dt_ptr->basic_type + sig->counts[idx] = dt_ptr->n_builtin_elements * count + *p_idx = idx + 1 + return + $else + typesig_set(sig, dt_ptr->basic_type, p_idx, count * dt_ptr->n_builtin_elements) + return + $else + $my MPIR_Datatype_contents *cp = dt_ptr->contents + $local int *p_ints, MPI_Aint *p_aints, MPI_Aint *p_counts, MPI_Datatype *p_types + MPIR_Datatype_access_contents(cp, &p_ints, &p_aints, &p_counts, &p_types) + $if cp->nr_counts == 0 + $(set:c=-) + $call switch_combiner + $else + $(set:c=c) + $call switch_combiner + + # -------------------------------- + subcode: _set_dup + # $print "typesig_set dup" + typesig_set(sig, p_types[0], p_idx, count) + + subcode: _set_resized + typesig_set(sig, p_types[0], p_idx, count) + + subcode: _set_contig + typesig_set(sig, p_types[0], p_idx, count * $(n)) + + # common for vector, indexed_block + subcode: _set_vector + # $print "typesig_set vector count=%ld", count * $(n) * $(blkl) + typesig_set(sig, p_types[0], p_idx, count * $(n) * $(blkl)) + + subcode: _set_indexed + $my MPI_Aint blkl_sum = 0 + $for i=0:$(n) + blkl_sum += $(blkl:i) + # $print "typesig_set indexed count=%ld", count * blkl_sum + typesig_set(sig, p_types[0], p_idx, count * blkl_sum) + + subcode: _set_struct + $my i, j: MPI_Aint + $my MPI_Aint idx_save = *p_idx + $my MPI_Aint idx_last = *p_idx + $for i=0:$(n) + typesig_set(sig, p_types[i], p_idx, $(blkl:i)) + $if idx_last > 0 && sig->types[idx_last-1] == sig->types[idx_last] + sig->counts[idx_last-1] += sig->counts[idx_last] + $for j=idx_last:(*p_idx - 1) + sig->types[j] = sig->types[j+1] + sig->counts[j] = sig->counts[j+1] + (*p_idx)-- + idx_last = *p_idx + $if count > 1 + $my MPI_Aint num = *p_idx - idx_save + typesig_check_space(sig, idx_save + count * num) + idx = *p_idx + $for i=1:count + $for j=0:num + sig->types[idx] = sig->types[idx_save + j] + sig->counts[idx] = sig->counts[idx_save + j] + idx++ + *p_idx = idx + + # --------------------- + subcode: _set_subarray + $my MPI_Aint num = 1 + $for i=0:$(n) + num *= p_subsizes[i] + typesig_set(sig, p_types[0], p_idx, count * num) + + subcode: _set_darray + $my MPI_Aint num = 1 + $for i=0:$(n) + $if p_isblk[i] == 0 + $my num_this, n_blks, n_groups: MPI_Aint + n_blks = p_gsizes[i] / p_parg[i] + n_groups = n_blks / p_sizes[i] + num_this = n_groups * p_parg[i] + $if p_prank[i] < n_blks % p_sizes[i] + num_this += p_prank[i] + $if p_prank[i] == n_blks % p_sizes[i] + num_this += p_gsizes[i] % p_parg[i] + num *= num_this + $elif p_isblk[i] == 1 + num *= p_gsizes[i] / p_sizes[i] + $else + $if p_prank[i] < p_gsizes[i] % p_sizes[i] + num *= p_gsizes[i] / p_sizes[i] + 1 + $else + num *= p_gsizes[i] / p_sizes[i] + typesig_set(sig, p_types[0], p_idx, count * num) + +fncode: typesig_check_space(sig, MPI_Aint n) + $if sig->n < n + sig->n = n * 2 + sig->types = MPL_realloc(sig->types, sig->n * sizeof(MPI_Datatype), MPL_MEM_OTHER) + sig->counts = MPL_realloc(sig->counts, sig->n * sizeof(MPI_Aint), MPL_MEM_OTHER) From 4e03f7ede1a818baf4fdb1c6624840c6adce2540 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Sun, 18 Apr 2021 09:39:04 -0500 Subject: [PATCH 6/6] Temporary - add t.def This is a testing code. Requires https://github.com/hzhou/mpi-def.git --- t.def | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 t.def diff --git a/t.def b/t.def new file mode 100644 index 00000000000..2743579a7c0 --- /dev/null +++ b/t.def @@ -0,0 +1,49 @@ +include: c/mpi.def +page: t, mpi_frame + # run: gdb + $dump $(rank), $(size) + + $(if:1) + $call test + $(elif:0) + $call test_subarray + $(elif:0) + $call test_darray + $(else) + $call test_1 + +macros: + # dump_type: MPIX_Type_dump_typemap + dump_type: MPIX_Type_dump_typesig + +subcode: test + &call Type_commit, MPI_SHORT_INT + $call type_struct, MPI_INT, 4, 0, MPI_DOUBLE, 8, 16 + $call type_struct, $(dt), 1, 0, $(dt), 0, 100, $(dt), 1, 0, $(dt), 0, 100 + $call type_subarray, 4, [2, 1, 1, 1], [2, 1, 1, 1], [0, 0, 0, 0] + $(dump_type)($(dt)) + +subcode: test_a1 + &call Type_commit, MPI_SHORT_INT + $call type_struct, MPI_INT, 4, 0, MPI_DOUBLE, 8, 16 + $call type_subarray, 4, [1, 1, 1, 1], [1, 1, 1, 1], [0, 0, 0, 0] + $call type_blkhidx, 1, 2, 8 + $(dump_type)($(dt)) + +subcode: test_1 + &call Type_commit, MPI_SHORT_INT + $call type_contig, 2 + $call type_hvector, 2, 1, 10 + $call type_struct, MPI_INT, 1, 0, MPI_SHORT, 10, 8, $(dt), 1, 100 + $call type_contig, 2 + $(dump_type)($(dt)) + +subcode: test_subarray + &call Type_commit, MPI_CHAR + $call type_subarray, 3, [4, 5, 10], [2, 2, 2], [0, 1, 1] + $(dump_type)($(dt)) + +subcode: test_darray + &call Type_commit, MPI_CHAR + $call type_darray, 10, 5, 3, [4, 4, 10], [2, -, 5], [1, -, 1] + $(dump_type)($(dt))