diff --git a/src/binding/c/mpix_api.txt b/src/binding/c/mpix_api.txt new file mode 100644 index 00000000000..5f8a6801194 --- /dev/null +++ b/src/binding/c/mpix_api.txt @@ -0,0 +1,15 @@ +MPIX_Type_dump_typemap: + datatype: DATATYPE + .desc: Dump the type map of the given datatype + .impl: direct +{ + return MPIR_type_dump_typemap(datatype); +} + +MPIX_Type_dump_typesig: + datatype: DATATYPE + .desc: Dump the type signature of the given datatype + .impl: direct +{ + return MPIR_type_dump_typesig(datatype); +} diff --git a/src/include/mpir_datatype.h b/src/include/mpir_datatype.h index a1619d20087..aab1f451e05 100644 --- a/src/include/mpir_datatype.h +++ b/src/include/mpir_datatype.h @@ -615,4 +615,23 @@ MPI_Aint MPII_Datatype_blockindexed_count_contig(MPI_Aint count, const MPI_Aint disp_array[], int dispinbytes, MPI_Aint old_extent); +struct typemap { + MPI_Aint n; + MPI_Datatype *types; + MPI_Aint *disps; +}; + +struct typesig { + MPI_Aint n; + MPI_Datatype *types; + MPI_Aint *counts; +}; + +int MPIR_type_dump_typemap(MPI_Datatype dt); +int MPIR_type_dump_typesig(MPI_Datatype dt); +struct typemap *MPIR_type_get_typemap(MPI_Datatype dt); +struct typesig *MPIR_type_get_typesig(MPI_Datatype dt); +void MPIR_type_free_typemap(struct typemap *map); +void MPIR_type_free_typesig(struct typesig *sig); + #endif /* MPIR_DATATYPE_H_INCLUDED */ diff --git a/src/mpi/datatype/Makefile.mk b/src/mpi/datatype/Makefile.mk index f5da7fccf95..59a054d06b4 100644 --- a/src/mpi/datatype/Makefile.mk +++ b/src/mpi/datatype/Makefile.mk @@ -22,4 +22,5 @@ mpi_core_sources += \ src/mpi/datatype/type_create_darray.c \ src/mpi/datatype/type_create_subarray.c \ src/mpi/datatype/type_create_pairtype.c \ + src/mpi/datatype/typemap.c \ src/mpi/datatype/type_debug.c diff --git a/src/mpi/datatype/typemap.c b/src/mpi/datatype/typemap.c new file mode 100644 index 00000000000..29ffdfcf0af --- /dev/null +++ b/src/mpi/datatype/typemap.c @@ -0,0 +1,1343 @@ +/* + * Copyright (C) by Argonne National Laboratory + * See COPYRIGHT in top-level directory + */ + +#include "mpiimpl.h" +#include "datatype.h" +#include +#include +#include + +int MPIR_type_dump_typemap(MPI_Datatype dt); +int MPIR_type_dump_typesig(MPI_Datatype dt); +struct typemap *MPIR_type_get_typemap(MPI_Datatype dt); +struct typesig *MPIR_type_get_typesig(MPI_Datatype dt); +void MPIR_type_free_typemap(struct typemap *map); +void MPIR_type_free_typesig(struct typesig *sig); + +static void typemap_set(struct typemap *map, MPI_Datatype dt, MPI_Aint idx, MPI_Aint offset); +static void typesig_set(struct typesig *sig, MPI_Datatype dt, MPI_Aint * p_idx, MPI_Aint count); +static void type_get_n_elem_extent(MPI_Datatype dt, MPI_Aint * p_n_elem, MPI_Aint * p_extent); +static void typesig_check_space(struct typesig *sig, MPI_Aint n); + +int MPIR_type_dump_typemap(MPI_Datatype dt) +{ + MPI_Aint lb; + MPI_Aint extent; + MPIR_Type_get_extent_impl(dt, &lb, &extent); + printf(" %20s: %10ld\n", "lb", (long) lb); + printf(" %20s: %10ld\n", "ub", (long) (lb + extent)); + + struct typemap *map; + map = MPIR_type_get_typemap(dt); + for (int i = 0; i < map->n; i++) { + printf(" %20s: %10ld\n", MPIR_Datatype_builtin_to_string(map->types[i]), + (long) map->disps[i]); + } + + MPIR_type_free_typemap(map); + return MPI_SUCCESS; +} + +int MPIR_type_dump_typesig(MPI_Datatype dt) +{ + struct typesig *sig; + sig = MPIR_type_get_typesig(dt); + for (int i = 0; i < sig->n; i++) { + if (i > 0) { + printf(","); + } + printf("%s:%ld", MPIR_Datatype_builtin_to_string(sig->types[i]), (long) sig->counts[i]); + } + puts(""); + + MPIR_type_free_typesig(sig); + return MPI_SUCCESS; +} + +struct typemap *MPIR_type_get_typemap(MPI_Datatype dt) +{ + struct typemap *map; + + map = (struct typemap *) malloc(sizeof(struct typemap)); + + if (HANDLE_IS_BUILTIN(dt)) { + if (dt == MPI_2INT) { + map->n = 2; + } else { + map->n = 1; + } + } else { + MPIR_Datatype *dt_ptr; + MPIR_Datatype_get_ptr(dt, dt_ptr); + MPIR_Assert(dt_ptr != NULL); + map->n = dt_ptr->n_builtin_elements; + } + + MPIR_Assert(map->n > 0); + map->types = MPL_malloc(map->n * sizeof(MPI_Datatype), MPL_MEM_OTHER); + map->disps = MPL_malloc(map->n * sizeof(MPI_Aint), MPL_MEM_OTHER); + MPI_Aint n_elem; + MPI_Aint extent; + typemap_set(map, dt, 0, 0); + return map; +} + +struct typesig *MPIR_type_get_typesig(MPI_Datatype dt) +{ + struct typesig *sig; + + sig = (struct typesig *) malloc(sizeof(struct typesig)); + sig->n = 1; + sig->types = MPL_malloc(1 * sizeof(MPI_Datatype), MPL_MEM_OTHER); + sig->counts = MPL_malloc(1 * sizeof(MPI_Aint), MPL_MEM_OTHER); + + MPI_Aint idx = 0; + typesig_set(sig, dt, &idx, 1); + sig->n = idx; + return sig; +} + +void MPIR_type_free_typemap(struct typemap *map) +{ + MPL_free(map->types); + MPL_free(map->disps); + MPL_free(map); +} + +void MPIR_type_free_typesig(struct typesig *sig) +{ + MPL_free(sig->types); + MPL_free(sig->counts); + MPL_free(sig); +} + +void typemap_set(struct typemap *map, MPI_Datatype dt, MPI_Aint idx, MPI_Aint offset) +{ + int *p_ints; + MPI_Aint *p_aints; + MPI_Aint *p_counts; + MPI_Datatype *p_types; + MPI_Aint i; + MPI_Aint j; + + if (HANDLE_IS_BUILTIN(dt)) { + if (dt == MPI_2INT) { + map->types[idx] = MPI_INT; + map->disps[idx] = offset; + map->types[idx + 1] = MPI_INT; + map->disps[idx + 1] = offset + MPIR_Datatype_get_basic_size(MPI_INT); + } else { + map->types[idx] = dt; + map->disps[idx] = offset; + } + return; + } else if (MPIR_DATATYPE_IS_PREDEFINED(dt)) { + MPIR_Datatype *dt_ptr; + MPIR_Datatype_get_ptr(dt, dt_ptr); + MPIR_Assert(dt_ptr != NULL); + MPI_Aint disp = dt_ptr->true_ub - MPIR_Datatype_get_basic_size(MPI_INT); + if (dt == MPI_FLOAT_INT) { + map->types[idx] = MPI_FLOAT; + } + if (dt == MPI_DOUBLE_INT) { + map->types[idx] = MPI_DOUBLE; + } + if (dt == MPI_LONG_INT) { + map->types[idx] = MPI_LONG; + } + if (dt == MPI_SHORT_INT) { + map->types[idx] = MPI_SHORT; + } + map->disps[idx] = offset; + map->types[idx + 1] = MPI_INT; + map->disps[idx + 1] = offset + disp; + return; + } else { + MPI_Aint n_elem; + MPI_Aint extent; + + MPIR_Datatype *dt_ptr; + MPIR_Datatype_get_ptr(dt, dt_ptr); + MPIR_Assert(dt_ptr != NULL); + MPIR_Datatype_contents *cp = dt_ptr->contents; + MPIR_Datatype_access_contents(cp, &p_ints, &p_aints, &p_counts, &p_types); + if (cp->nr_counts == 0) { + if (cp->combiner == MPI_COMBINER_DUP) { + typemap_set(map, p_types[0], idx, offset); + } else if (cp->combiner == MPI_COMBINER_RESIZED) { + typemap_set(map, p_types[0], idx, offset + p_aints[0]); + } else if (cp->combiner == MPI_COMBINER_CONTIGUOUS) { + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset); + MPI_Aint idx2 = idx + n_elem; + for (int i = 1; i < p_ints[0]; i++) { + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + extent * i; + idx2++; + } + } + } else if (cp->combiner == MPI_COMBINER_VECTOR) { + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset); + + MPI_Aint k; + MPI_Aint i; + MPI_Aint j; + MPI_Aint off2; + MPI_Aint idx2 = idx + n_elem; + for (int k = 0; k < p_ints[0]; k++) { + off2 = p_ints[2] * k * extent; + for (int i = 0; i < p_ints[1]; i++) { + if (k || i) { + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + off2 + extent * i; + idx2++; + } + } + } + } + } else if (cp->combiner == MPI_COMBINER_HVECTOR) { + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset); + + MPI_Aint k; + MPI_Aint i; + MPI_Aint j; + MPI_Aint off2; + MPI_Aint idx2 = idx + n_elem; + for (int k = 0; k < p_ints[0]; k++) { + off2 = p_aints[0]; + for (int i = 0; i < p_ints[1]; i++) { + if (k || i) { + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + off2 + extent * i; + idx2++; + } + } + } + } + } else if (cp->combiner == MPI_COMBINER_INDEXED_BLOCK) { + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset + p_ints[2 + 0]); + + MPI_Aint k; + MPI_Aint i; + MPI_Aint j; + MPI_Aint off2; + MPI_Aint idx2 = idx + n_elem; + for (int k = 0; k < p_ints[0]; k++) { + off2 = (p_ints[2 + k] - p_ints[2 + 0]) * extent; + for (int i = 0; i < p_ints[1]; i++) { + if (k || i) { + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + off2 + extent * i; + idx2++; + } + } + } + } + } else if (cp->combiner == MPI_COMBINER_HINDEXED_BLOCK) { + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset + p_aints[0]); + + MPI_Aint k; + MPI_Aint i; + MPI_Aint j; + MPI_Aint off2; + MPI_Aint idx2 = idx + n_elem; + for (int k = 0; k < p_ints[0]; k++) { + off2 = p_aints[k] - p_aints[0]; + for (int i = 0; i < p_ints[1]; i++) { + if (k || i) { + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + off2 + extent * i; + idx2++; + } + } + } + } + } else if (cp->combiner == MPI_COMBINER_INDEXED) { + int *p_blkl = p_ints + 1; + int *p_disp = p_ints + 1 + p_ints[0]; + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset + p_disp[0]); + + MPI_Aint k; + MPI_Aint i; + MPI_Aint j; + MPI_Aint off2; + MPI_Aint idx2 = idx + n_elem; + for (int k = 0; k < p_ints[0]; k++) { + off2 = (p_disp[k] - p_disp[0]) * extent; + for (int i = 0; i < p_blkl[k]; i++) { + if (k || i) { + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + off2 + extent * i; + idx2++; + } + } + } + } + } else if (cp->combiner == MPI_COMBINER_HINDEXED) { + int *p_blkl = p_ints + 1; + MPI_Aint *p_disp = p_aints; + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset + p_disp[0]); + + MPI_Aint k; + MPI_Aint i; + MPI_Aint j; + MPI_Aint off2; + MPI_Aint idx2 = idx + n_elem; + for (int k = 0; k < p_ints[0]; k++) { + off2 = p_disp[k] - p_disp[0]; + for (int i = 0; i < p_blkl[k]; i++) { + if (k || i) { + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + off2 + extent * i; + idx2++; + } + } + } + } + } else if (cp->combiner == MPI_COMBINER_STRUCT) { + int *p_blkl = p_ints + 1; + MPI_Aint *p_disp = p_aints; + MPI_Aint k; + MPI_Aint i; + MPI_Aint j; + MPI_Aint off2; + MPI_Aint idx2 = idx; + for (int k = 0; k < p_ints[0]; k++) { + type_get_n_elem_extent(p_types[k], &n_elem, &extent); + typemap_set(map, p_types[k], idx2, offset + p_disp[k]); + idx2 += n_elem; + for (int i = 1; i < p_blkl[k]; i++) { + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + extent * i; + idx2++; + } + } + } + } else if (cp->combiner == MPI_COMBINER_SUBARRAY) { + int order = p_ints[1 + p_ints[0] * 3]; + int *p_sizes = p_ints + 1; + int *p_subsizes = p_ints + 1 + p_ints[0]; + int *p_starts = p_ints + 1 + p_ints[0] * 2; + MPI_Aint off0 = 0; + if (order == MPI_ORDER_C) { + for (int i = 0; i < p_ints[0]; i++) { + off0 = (off0 * p_sizes[i]) + p_starts[i]; + } + } else { + for (int i = p_ints[0] - 1; i >= 0; i--) { + off0 = (off0 * p_sizes[i]) + p_starts[i]; + } + } + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset + off0); + + MPI_Aint off2 = off0; + MPI_Aint idx2 = idx + n_elem; + MPI_Aint *counters; + counters = MPL_calloc(p_ints[0], sizeof(MPI_Aint), MPL_MEM_OTHER); + while (1) { + int alldone = 0; + MPI_Aint stride = 1; + int i; + if (order == MPI_ORDER_C) { + i = p_ints[0] - 1; + } else { + i = 0; + } + while (1) { + counters[i]++; + off2 += stride; + if (counters[i] < p_subsizes[i]) { + break; + } + off2 -= stride * p_subsizes[i]; + counters[i] = 0; + stride *= p_sizes[i]; + if (order == MPI_ORDER_C) { + i--; + if (i < 0) { + alldone = 1; + break; + } + } else { + i++; + if (i == p_ints[0]) { + alldone = 1; + break; + } + } + } + if (alldone) { + break; + } + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + off2 - off0; + idx2++; + } + } + MPL_free(counters); + } else if (cp->combiner == MPI_COMBINER_DARRAY) { + int size = p_ints[0]; + int rank = p_ints[1]; + int n = p_ints[2]; + int *p_gsizes = p_ints + 3; + int *p_distribs = p_ints + 3 + n; + int *p_dargs = p_ints + 3 + n * 2; + int *p_sizes = p_ints + 3 + n * 3; + int order = p_ints[3 + n * 4]; + + int *p_prank; + p_prank = MPL_malloc(n * sizeof(int), MPL_MEM_OTHER); + int *p_isblk; + p_isblk = MPL_malloc(n * sizeof(int), MPL_MEM_OTHER); + MPI_Aint *p_parg; + p_parg = MPL_malloc(n * sizeof(MPI_Aint), MPL_MEM_OTHER); + MPI_Aint *p_starts; + p_starts = MPL_malloc(n * sizeof(MPI_Aint), MPL_MEM_OTHER); + for (int i = 0; i < n; i++) { + size /= p_sizes[i]; + p_prank[i] = rank / size; + rank = rank % size; + if (p_sizes[i] == 1) { + p_isblk[i] = 1; + p_parg[i] = p_gsizes[i]; + p_starts[i] = 0; + } else if (p_distribs[i] == MPI_DISTRIBUTE_BLOCK && + p_dargs[i] == MPI_DISTRIBUTE_DFLT_DARG) { + p_parg[i] = p_gsizes[i] / p_sizes[i]; + if (p_gsizes[i] % p_sizes[i] == 0) { + p_isblk[i] = 1; + p_starts[i] = p_parg[i] * p_prank[i]; + } else { + int r = p_gsizes[i] % p_sizes[i]; + p_isblk[i] = 2; + if (p_prank[i] < r) { + p_parg[i] += 1; + p_starts[i] = p_parg[i] * p_prank[i]; + } else { + p_starts[i] = p_parg[i] * p_prank[i] + r; + } + } + } else if (p_distribs[i] == MPI_DISTRIBUTE_CYCLIC && + p_dargs[i] == MPI_DISTRIBUTE_DFLT_DARG) { + p_isblk[i] = 0; + p_parg[i] = 1; + p_starts[i] = p_prank[i]; + } else { + p_isblk[i] = 0; + p_parg[i] = p_dargs[i]; + p_starts[i] = p_parg[i] * p_prank[i]; + } + } + + MPI_Aint off0 = 0; + if (order == MPI_ORDER_C) { + for (int i = 0; i < n; i++) { + off0 = (off0 * p_gsizes[i]) + p_starts[i]; + } + } else { + for (int i = n - 1; i >= 0; i--) { + off0 = (off0 * p_gsizes[i]) + p_starts[i]; + } + } + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset + off0); + + MPI_Aint off2 = off0; + MPI_Aint idx2 = idx + n_elem; + MPI_Aint *counters; + counters = MPL_calloc(n, sizeof(MPI_Aint), MPL_MEM_OTHER); + if (p_isblk[0] == 0) { + counters[0] = p_prank[0] * p_parg[0]; + } + while (1) { + int alldone = 0; + MPI_Aint stride = 1; + int i; + if (order == MPI_ORDER_C) { + i = n - 1; + } else { + i = 0; + } + while (1) { + if (p_isblk[i] == 0) { + MPI_Aint old_counter = counters[i]; + counters[i]++; + if (counters[i] % p_parg[i] == 0) { + counters[i] += p_parg[i] * (p_sizes[i] - 1); + } + if (counters[i] < p_gsizes[i]) { + off2 += stride * (counters[i] - old_counter); + break; + } + counters[i] = p_starts[i]; + off2 += stride * (counters[i] - old_counter); + } else { + counters[i]++; + off2 += stride; + if (counters[i] < p_parg[i]) { + break; + } + off2 -= stride * p_parg[i]; + counters[i] = 0; + } + stride *= p_gsizes[i]; + if (order == MPI_ORDER_C) { + i--; + if (i < 0) { + alldone = 1; + break; + } + } else { + i++; + if (i == n) { + alldone = 1; + break; + } + } + } + if (alldone) { + break; + } + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + off2 - off0; + idx2++; + } + } + MPL_free(counters); + + MPL_free(p_prank); + MPL_free(p_isblk); + MPL_free(p_parg); + MPL_free(p_starts); + } else { + MPIR_Assert(0); + } + + } else { + if (cp->combiner == MPI_COMBINER_DUP) { + typemap_set(map, p_types[0], idx, offset); + } else if (cp->combiner == MPI_COMBINER_RESIZED) { + typemap_set(map, p_types[0], idx, offset + p_counts[0]); + } else if (cp->combiner == MPI_COMBINER_CONTIGUOUS) { + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset); + MPI_Aint idx2 = idx + n_elem; + for (int i = 1; i < p_counts[0]; i++) { + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + extent * i; + idx2++; + } + } + } else if (cp->combiner == MPI_COMBINER_VECTOR) { + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset); + + MPI_Aint k; + MPI_Aint i; + MPI_Aint j; + MPI_Aint off2; + MPI_Aint idx2 = idx + n_elem; + for (int k = 0; k < p_counts[0]; k++) { + off2 = p_counts[2] * k * extent; + for (int i = 0; i < p_counts[1]; i++) { + if (k || i) { + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + off2 + extent * i; + idx2++; + } + } + } + } + } else if (cp->combiner == MPI_COMBINER_HVECTOR) { + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset); + + MPI_Aint k; + MPI_Aint i; + MPI_Aint j; + MPI_Aint off2; + MPI_Aint idx2 = idx + n_elem; + for (int k = 0; k < p_counts[0]; k++) { + off2 = p_counts[2] * k; + for (int i = 0; i < p_counts[1]; i++) { + if (k || i) { + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + off2 + extent * i; + idx2++; + } + } + } + } + } else if (cp->combiner == MPI_COMBINER_INDEXED_BLOCK) { + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset + p_counts[2 + 0]); + + MPI_Aint k; + MPI_Aint i; + MPI_Aint j; + MPI_Aint off2; + MPI_Aint idx2 = idx + n_elem; + for (int k = 0; k < p_counts[0]; k++) { + off2 = (p_counts[2 + k] - p_counts[2 + 0]) * extent; + for (int i = 0; i < p_counts[1]; i++) { + if (k || i) { + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + off2 + extent * i; + idx2++; + } + } + } + } + } else if (cp->combiner == MPI_COMBINER_HINDEXED_BLOCK) { + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset + p_counts[2 + 0]); + + MPI_Aint k; + MPI_Aint i; + MPI_Aint j; + MPI_Aint off2; + MPI_Aint idx2 = idx + n_elem; + for (int k = 0; k < p_counts[0]; k++) { + off2 = p_counts[2 + k] - p_counts[2 + 0]; + for (int i = 0; i < p_counts[1]; i++) { + if (k || i) { + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + off2 + extent * i; + idx2++; + } + } + } + } + } else if (cp->combiner == MPI_COMBINER_INDEXED) { + MPI_Aint *p_blkl = p_counts + 1; + MPI_Aint *p_disp = p_counts + 1 + p_counts[0]; + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset + p_disp[0]); + + MPI_Aint k; + MPI_Aint i; + MPI_Aint j; + MPI_Aint off2; + MPI_Aint idx2 = idx + n_elem; + for (int k = 0; k < p_counts[0]; k++) { + off2 = (p_disp[k] - p_disp[0]) * extent; + for (int i = 0; i < p_blkl[k]; i++) { + if (k || i) { + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + off2 + extent * i; + idx2++; + } + } + } + } + } else if (cp->combiner == MPI_COMBINER_HINDEXED) { + MPI_Aint *p_blkl = p_counts + 1; + MPI_Aint *p_disp = p_counts + 1 + p_counts[0]; + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset + p_disp[0]); + + MPI_Aint k; + MPI_Aint i; + MPI_Aint j; + MPI_Aint off2; + MPI_Aint idx2 = idx + n_elem; + for (int k = 0; k < p_counts[0]; k++) { + off2 = p_disp[k] - p_disp[0]; + for (int i = 0; i < p_blkl[k]; i++) { + if (k || i) { + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + off2 + extent * i; + idx2++; + } + } + } + } + } else if (cp->combiner == MPI_COMBINER_STRUCT) { + MPI_Aint *p_blkl = p_counts + 1; + MPI_Aint *p_disp = p_counts + 1 + p_counts[0]; + MPI_Aint k; + MPI_Aint i; + MPI_Aint j; + MPI_Aint off2; + MPI_Aint idx2 = idx; + for (int k = 0; k < p_counts[0]; k++) { + type_get_n_elem_extent(p_types[k], &n_elem, &extent); + typemap_set(map, p_types[k], idx2, offset + p_disp[k]); + idx2 += n_elem; + for (int i = 1; i < p_blkl[k]; i++) { + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + extent * i; + idx2++; + } + } + } + } else if (cp->combiner == MPI_COMBINER_SUBARRAY) { + int order = p_ints[2]; + MPI_Aint *p_sizes = p_counts; + MPI_Aint *p_subsizes = p_counts + p_ints[0]; + MPI_Aint *p_starts = p_counts + p_ints[0] * 2; + MPI_Aint off0 = 0; + if (order == MPI_ORDER_C) { + for (int i = 0; i < p_ints[0]; i++) { + off0 = (off0 * p_sizes[i]) + p_starts[i]; + } + } else { + for (int i = p_ints[0] - 1; i >= 0; i--) { + off0 = (off0 * p_sizes[i]) + p_starts[i]; + } + } + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset + off0); + + MPI_Aint off2 = off0; + MPI_Aint idx2 = idx + n_elem; + MPI_Aint *counters; + counters = MPL_calloc(p_ints[0], sizeof(MPI_Aint), MPL_MEM_OTHER); + while (1) { + int alldone = 0; + MPI_Aint stride = 1; + int i; + if (order == MPI_ORDER_C) { + i = p_ints[0] - 1; + } else { + i = 0; + } + while (1) { + counters[i]++; + off2 += stride; + if (counters[i] < p_subsizes[i]) { + break; + } + off2 -= stride * p_subsizes[i]; + counters[i] = 0; + stride *= p_sizes[i]; + if (order == MPI_ORDER_C) { + i--; + if (i < 0) { + alldone = 1; + break; + } + } else { + i++; + if (i == p_ints[0]) { + alldone = 1; + break; + } + } + } + if (alldone) { + break; + } + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + off2 - off0; + idx2++; + } + } + MPL_free(counters); + } else if (cp->combiner == MPI_COMBINER_DARRAY) { + int size = p_ints[0]; + int rank = p_ints[1]; + int n = p_ints[2]; + MPI_Aint *p_gsizes = p_counts; + int *p_distribs = p_ints + 3; + int *p_dargs = p_ints + 3 + n; + int *p_sizes = p_ints + 3 + n * 2; + int order = p_ints[3 + n * 3]; + + int *p_prank; + p_prank = MPL_malloc(n * sizeof(int), MPL_MEM_OTHER); + int *p_isblk; + p_isblk = MPL_malloc(n * sizeof(int), MPL_MEM_OTHER); + MPI_Aint *p_parg; + p_parg = MPL_malloc(n * sizeof(MPI_Aint), MPL_MEM_OTHER); + MPI_Aint *p_starts; + p_starts = MPL_malloc(n * sizeof(MPI_Aint), MPL_MEM_OTHER); + for (int i = 0; i < n; i++) { + size /= p_sizes[i]; + p_prank[i] = rank / size; + rank = rank % size; + if (p_sizes[i] == 1) { + p_isblk[i] = 1; + p_parg[i] = p_gsizes[i]; + p_starts[i] = 0; + } else if (p_distribs[i] == MPI_DISTRIBUTE_BLOCK && + p_dargs[i] == MPI_DISTRIBUTE_DFLT_DARG) { + p_parg[i] = p_gsizes[i] / p_sizes[i]; + if (p_gsizes[i] % p_sizes[i] == 0) { + p_isblk[i] = 1; + p_starts[i] = p_parg[i] * p_prank[i]; + } else { + int r = p_gsizes[i] % p_sizes[i]; + p_isblk[i] = 2; + if (p_prank[i] < r) { + p_parg[i] += 1; + p_starts[i] = p_parg[i] * p_prank[i]; + } else { + p_starts[i] = p_parg[i] * p_prank[i] + r; + } + } + } else if (p_distribs[i] == MPI_DISTRIBUTE_CYCLIC && + p_dargs[i] == MPI_DISTRIBUTE_DFLT_DARG) { + p_isblk[i] = 0; + p_parg[i] = 1; + p_starts[i] = p_prank[i]; + } else { + p_isblk[i] = 0; + p_parg[i] = p_dargs[i]; + p_starts[i] = p_parg[i] * p_prank[i]; + } + } + + MPI_Aint off0 = 0; + if (order == MPI_ORDER_C) { + for (int i = 0; i < n; i++) { + off0 = (off0 * p_gsizes[i]) + p_starts[i]; + } + } else { + for (int i = n - 1; i >= 0; i--) { + off0 = (off0 * p_gsizes[i]) + p_starts[i]; + } + } + type_get_n_elem_extent(p_types[0], &n_elem, &extent); + typemap_set(map, p_types[0], idx, offset + off0); + + MPI_Aint off2 = off0; + MPI_Aint idx2 = idx + n_elem; + MPI_Aint *counters; + counters = MPL_calloc(n, sizeof(MPI_Aint), MPL_MEM_OTHER); + if (p_isblk[0] == 0) { + counters[0] = p_prank[0] * p_parg[0]; + } + while (1) { + int alldone = 0; + MPI_Aint stride = 1; + int i; + if (order == MPI_ORDER_C) { + i = n - 1; + } else { + i = 0; + } + while (1) { + if (p_isblk[i] == 0) { + MPI_Aint old_counter = counters[i]; + counters[i]++; + if (counters[i] % p_parg[i] == 0) { + counters[i] += p_parg[i] * (p_sizes[i] - 1); + } + if (counters[i] < p_gsizes[i]) { + off2 += stride * (counters[i] - old_counter); + break; + } + counters[i] = p_starts[i]; + off2 += stride * (counters[i] - old_counter); + } else { + counters[i]++; + off2 += stride; + if (counters[i] < p_parg[i]) { + break; + } + off2 -= stride * p_parg[i]; + counters[i] = 0; + } + stride *= p_gsizes[i]; + if (order == MPI_ORDER_C) { + i--; + if (i < 0) { + alldone = 1; + break; + } + } else { + i++; + if (i == n) { + alldone = 1; + break; + } + } + } + if (alldone) { + break; + } + for (int j = 0; j < n_elem; j++) { + map->types[idx2] = map->types[idx + j]; + map->disps[idx2] = map->disps[idx + j] + off2 - off0; + idx2++; + } + } + MPL_free(counters); + + MPL_free(p_prank); + MPL_free(p_isblk); + MPL_free(p_parg); + MPL_free(p_starts); + } else { + MPIR_Assert(0); + } + + } + } + +} + +void typesig_set(struct typesig *sig, MPI_Datatype dt, MPI_Aint * p_idx, MPI_Aint count) +{ + MPI_Aint idx = *p_idx; + int *p_ints; + MPI_Aint *p_aints; + MPI_Aint *p_counts; + MPI_Datatype *p_types; + + if (count <= 0) { + return; + } + + if (HANDLE_IS_BUILTIN(dt)) { + typesig_check_space(sig, idx + 1); + if (dt == MPI_2INT) { + sig->types[idx] = MPI_INT; + sig->counts[idx] = 2 * count; + } else { + sig->types[idx] = dt; + sig->counts[idx] = count; + } + *p_idx = idx + 1; + return; + } else if (MPIR_DATATYPE_IS_PREDEFINED(dt)) { + typesig_check_space(sig, idx + 2 * count); + MPI_Datatype dt_a; + if (dt == MPI_FLOAT_INT) { + dt_a = MPI_FLOAT; + } + if (dt == MPI_DOUBLE_INT) { + dt_a = MPI_DOUBLE; + } + if (dt == MPI_LONG_INT) { + dt_a = MPI_LONG; + } + if (dt == MPI_SHORT_INT) { + dt_a = MPI_SHORT; + } + for (int i = 0; i < count; i++) { + sig->types[idx] = dt_a; + sig->types[idx + 1] = MPI_INT; + sig->counts[idx] = 1; + sig->counts[idx + 1] = 1; + idx += 2; + } + *p_idx = idx; + } else { + MPIR_Datatype *dt_ptr; + MPIR_Datatype_get_ptr(dt, dt_ptr); + MPIR_Assert(dt_ptr != NULL); + if (dt_ptr->basic_type != MPI_DATATYPE_NULL) { + if (HANDLE_IS_BUILTIN(dt_ptr->basic_type)) { + typesig_check_space(sig, idx + 1); + sig->types[idx] = dt_ptr->basic_type; + sig->counts[idx] = dt_ptr->n_builtin_elements * count; + *p_idx = idx + 1; + return; + } else { + typesig_set(sig, dt_ptr->basic_type, p_idx, count * dt_ptr->n_builtin_elements / 2); + return; + } + } else { + MPIR_Datatype_contents *cp = dt_ptr->contents; + MPIR_Datatype_access_contents(cp, &p_ints, &p_aints, &p_counts, &p_types); + if (cp->nr_counts == 0) { + if (cp->combiner == MPI_COMBINER_DUP) { + typesig_set(sig, p_types[0], p_idx, count); + } else if (cp->combiner == MPI_COMBINER_RESIZED) { + typesig_set(sig, p_types[0], p_idx, count); + } else if (cp->combiner == MPI_COMBINER_CONTIGUOUS) { + typesig_set(sig, p_types[0], p_idx, count * p_ints[0]); + } else if (cp->combiner == MPI_COMBINER_VECTOR) { + typesig_set(sig, p_types[0], p_idx, count * p_ints[0] * p_ints[1]); + } else if (cp->combiner == MPI_COMBINER_HVECTOR) { + typesig_set(sig, p_types[0], p_idx, count * p_ints[0] * p_ints[1]); + } else if (cp->combiner == MPI_COMBINER_INDEXED_BLOCK) { + MPI_Aint blkl_sum = 0; + for (int i = 0; i < p_ints[0]; i++) { + blkl_sum += p_ints[1]; + } + typesig_set(sig, p_types[0], p_idx, count * blkl_sum); + } else if (cp->combiner == MPI_COMBINER_HINDEXED_BLOCK) { + MPI_Aint blkl_sum = 0; + for (int i = 0; i < p_ints[0]; i++) { + blkl_sum += p_ints[1]; + } + typesig_set(sig, p_types[0], p_idx, count * blkl_sum); + } else if (cp->combiner == MPI_COMBINER_INDEXED) { + int *p_blkl = p_ints + 1; + int *p_disp = p_ints + 1 + p_ints[0]; + MPI_Aint blkl_sum = 0; + for (int i = 0; i < p_ints[0]; i++) { + blkl_sum += p_blkl[i]; + } + typesig_set(sig, p_types[0], p_idx, count * blkl_sum); + } else if (cp->combiner == MPI_COMBINER_HINDEXED) { + int *p_blkl = p_ints + 1; + MPI_Aint *p_disp = p_aints; + MPI_Aint blkl_sum = 0; + for (int i = 0; i < p_ints[0]; i++) { + blkl_sum += p_blkl[i]; + } + typesig_set(sig, p_types[0], p_idx, count * blkl_sum); + } else if (cp->combiner == MPI_COMBINER_STRUCT) { + int *p_blkl = p_ints + 1; + MPI_Aint *p_disp = p_aints; + MPI_Aint i; + MPI_Aint j; + MPI_Aint idx_save = *p_idx; + MPI_Aint idx_last = *p_idx; + for (int i = 0; i < p_ints[0]; i++) { + typesig_set(sig, p_types[i], p_idx, p_blkl[i]); + if (idx_last > 0 && sig->types[idx_last - 1] == sig->types[idx_last]) { + sig->counts[idx_last - 1] += sig->counts[idx_last]; + for (int j = idx_last; j < (*p_idx - 1); j++) { + sig->types[j] = sig->types[j + 1]; + sig->counts[j] = sig->counts[j + 1]; + } + (*p_idx)--; + } + idx_last = *p_idx; + } + if (count > 1) { + MPI_Aint num = *p_idx - idx_save; + typesig_check_space(sig, idx_save + count * num); + idx = *p_idx; + for (int i = 1; i < count; i++) { + for (int j = 0; j < num; j++) { + sig->types[idx] = sig->types[idx_save + j]; + sig->counts[idx] = sig->counts[idx_save + j]; + idx++; + } + } + *p_idx = idx; + } + } else if (cp->combiner == MPI_COMBINER_SUBARRAY) { + int order = p_ints[1 + p_ints[0] * 3]; + int *p_sizes = p_ints + 1; + int *p_subsizes = p_ints + 1 + p_ints[0]; + int *p_starts = p_ints + 1 + p_ints[0] * 2; + MPI_Aint num = 1; + for (int i = 0; i < p_ints[0]; i++) { + num *= p_subsizes[i]; + } + typesig_set(sig, p_types[0], p_idx, count * num); + } else if (cp->combiner == MPI_COMBINER_DARRAY) { + int size = p_ints[0]; + int rank = p_ints[1]; + int n = p_ints[2]; + int *p_gsizes = p_ints + 3; + int *p_distribs = p_ints + 3 + n; + int *p_dargs = p_ints + 3 + n * 2; + int *p_sizes = p_ints + 3 + n * 3; + int order = p_ints[3 + n * 4]; + + int *p_prank; + p_prank = MPL_malloc(n * sizeof(int), MPL_MEM_OTHER); + int *p_isblk; + p_isblk = MPL_malloc(n * sizeof(int), MPL_MEM_OTHER); + MPI_Aint *p_parg; + p_parg = MPL_malloc(n * sizeof(MPI_Aint), MPL_MEM_OTHER); + MPI_Aint *p_starts; + p_starts = MPL_malloc(n * sizeof(MPI_Aint), MPL_MEM_OTHER); + for (int i = 0; i < n; i++) { + size /= p_sizes[i]; + p_prank[i] = rank / size; + rank = rank % size; + if (p_sizes[i] == 1) { + p_isblk[i] = 1; + p_parg[i] = p_gsizes[i]; + p_starts[i] = 0; + } else if (p_distribs[i] == MPI_DISTRIBUTE_BLOCK && + p_dargs[i] == MPI_DISTRIBUTE_DFLT_DARG) { + p_parg[i] = p_gsizes[i] / p_sizes[i]; + if (p_gsizes[i] % p_sizes[i] == 0) { + p_isblk[i] = 1; + p_starts[i] = p_parg[i] * p_prank[i]; + } else { + int r = p_gsizes[i] % p_sizes[i]; + p_isblk[i] = 2; + if (p_prank[i] < r) { + p_parg[i] += 1; + p_starts[i] = p_parg[i] * p_prank[i]; + } else { + p_starts[i] = p_parg[i] * p_prank[i] + r; + } + } + } else if (p_distribs[i] == MPI_DISTRIBUTE_CYCLIC && + p_dargs[i] == MPI_DISTRIBUTE_DFLT_DARG) { + p_isblk[i] = 0; + p_parg[i] = 1; + p_starts[i] = p_prank[i]; + } else { + p_isblk[i] = 0; + p_parg[i] = p_dargs[i]; + p_starts[i] = p_parg[i] * p_prank[i]; + } + } + MPI_Aint num = 1; + for (int i = 0; i < n; i++) { + if (p_isblk[i] == 0) { + MPI_Aint num_this; + MPI_Aint n_blks; + MPI_Aint n_groups; + n_blks = p_gsizes[i] / p_parg[i]; + n_groups = n_blks / p_sizes[i]; + num_this = n_groups * p_parg[i]; + if (p_prank[i] < n_blks % p_sizes[i]) { + num_this += p_prank[i]; + } + if (p_prank[i] == n_blks % p_sizes[i]) { + num_this += p_gsizes[i] % p_parg[i]; + } + num *= num_this; + } else if (p_isblk[i] == 1) { + num *= p_gsizes[i] / p_sizes[i]; + } else { + if (p_prank[i] < p_gsizes[i] % p_sizes[i]) { + num *= p_gsizes[i] / p_sizes[i] + 1; + } else { + num *= p_gsizes[i] / p_sizes[i]; + } + } + } + typesig_set(sig, p_types[0], p_idx, count * num); + MPL_free(p_prank); + MPL_free(p_isblk); + MPL_free(p_parg); + MPL_free(p_starts); + } else { + MPIR_Assert(0); + } + + } else { + if (cp->combiner == MPI_COMBINER_DUP) { + typesig_set(sig, p_types[0], p_idx, count); + } else if (cp->combiner == MPI_COMBINER_RESIZED) { + typesig_set(sig, p_types[0], p_idx, count); + } else if (cp->combiner == MPI_COMBINER_CONTIGUOUS) { + typesig_set(sig, p_types[0], p_idx, count * p_counts[0]); + } else if (cp->combiner == MPI_COMBINER_VECTOR) { + typesig_set(sig, p_types[0], p_idx, count * p_counts[0] * p_counts[1]); + } else if (cp->combiner == MPI_COMBINER_HVECTOR) { + typesig_set(sig, p_types[0], p_idx, count * p_counts[0] * p_counts[1]); + } else if (cp->combiner == MPI_COMBINER_INDEXED_BLOCK) { + MPI_Aint blkl_sum = 0; + for (int i = 0; i < p_counts[0]; i++) { + blkl_sum += p_counts[1]; + } + typesig_set(sig, p_types[0], p_idx, count * blkl_sum); + } else if (cp->combiner == MPI_COMBINER_HINDEXED_BLOCK) { + MPI_Aint blkl_sum = 0; + for (int i = 0; i < p_counts[0]; i++) { + blkl_sum += p_counts[1]; + } + typesig_set(sig, p_types[0], p_idx, count * blkl_sum); + } else if (cp->combiner == MPI_COMBINER_INDEXED) { + MPI_Aint *p_blkl = p_counts + 1; + MPI_Aint *p_disp = p_counts + 1 + p_counts[0]; + MPI_Aint blkl_sum = 0; + for (int i = 0; i < p_counts[0]; i++) { + blkl_sum += p_blkl[i]; + } + typesig_set(sig, p_types[0], p_idx, count * blkl_sum); + } else if (cp->combiner == MPI_COMBINER_HINDEXED) { + MPI_Aint *p_blkl = p_counts + 1; + MPI_Aint *p_disp = p_counts + 1 + p_counts[0]; + MPI_Aint blkl_sum = 0; + for (int i = 0; i < p_counts[0]; i++) { + blkl_sum += p_blkl[i]; + } + typesig_set(sig, p_types[0], p_idx, count * blkl_sum); + } else if (cp->combiner == MPI_COMBINER_STRUCT) { + MPI_Aint *p_blkl = p_counts + 1; + MPI_Aint *p_disp = p_counts + 1 + p_counts[0]; + MPI_Aint i; + MPI_Aint j; + MPI_Aint idx_save = *p_idx; + MPI_Aint idx_last = *p_idx; + for (int i = 0; i < p_counts[0]; i++) { + typesig_set(sig, p_types[i], p_idx, p_blkl[i]); + if (idx_last > 0 && sig->types[idx_last - 1] == sig->types[idx_last]) { + sig->counts[idx_last - 1] += sig->counts[idx_last]; + for (int j = idx_last; j < (*p_idx - 1); j++) { + sig->types[j] = sig->types[j + 1]; + sig->counts[j] = sig->counts[j + 1]; + } + (*p_idx)--; + } + idx_last = *p_idx; + } + if (count > 1) { + MPI_Aint num = *p_idx - idx_save; + typesig_check_space(sig, idx_save + count * num); + idx = *p_idx; + for (int i = 1; i < count; i++) { + for (int j = 0; j < num; j++) { + sig->types[idx] = sig->types[idx_save + j]; + sig->counts[idx] = sig->counts[idx_save + j]; + idx++; + } + } + *p_idx = idx; + } + } else if (cp->combiner == MPI_COMBINER_SUBARRAY) { + int order = p_ints[2]; + MPI_Aint *p_sizes = p_counts; + MPI_Aint *p_subsizes = p_counts + p_ints[0]; + MPI_Aint *p_starts = p_counts + p_ints[0] * 2; + MPI_Aint num = 1; + for (int i = 0; i < p_ints[0]; i++) { + num *= p_subsizes[i]; + } + typesig_set(sig, p_types[0], p_idx, count * num); + } else if (cp->combiner == MPI_COMBINER_DARRAY) { + int size = p_ints[0]; + int rank = p_ints[1]; + int n = p_ints[2]; + MPI_Aint *p_gsizes = p_counts; + int *p_distribs = p_ints + 3; + int *p_dargs = p_ints + 3 + n; + int *p_sizes = p_ints + 3 + n * 2; + int order = p_ints[3 + n * 3]; + + int *p_prank; + p_prank = MPL_malloc(n * sizeof(int), MPL_MEM_OTHER); + int *p_isblk; + p_isblk = MPL_malloc(n * sizeof(int), MPL_MEM_OTHER); + MPI_Aint *p_parg; + p_parg = MPL_malloc(n * sizeof(MPI_Aint), MPL_MEM_OTHER); + MPI_Aint *p_starts; + p_starts = MPL_malloc(n * sizeof(MPI_Aint), MPL_MEM_OTHER); + for (int i = 0; i < n; i++) { + size /= p_sizes[i]; + p_prank[i] = rank / size; + rank = rank % size; + if (p_sizes[i] == 1) { + p_isblk[i] = 1; + p_parg[i] = p_gsizes[i]; + p_starts[i] = 0; + } else if (p_distribs[i] == MPI_DISTRIBUTE_BLOCK && + p_dargs[i] == MPI_DISTRIBUTE_DFLT_DARG) { + p_parg[i] = p_gsizes[i] / p_sizes[i]; + if (p_gsizes[i] % p_sizes[i] == 0) { + p_isblk[i] = 1; + p_starts[i] = p_parg[i] * p_prank[i]; + } else { + int r = p_gsizes[i] % p_sizes[i]; + p_isblk[i] = 2; + if (p_prank[i] < r) { + p_parg[i] += 1; + p_starts[i] = p_parg[i] * p_prank[i]; + } else { + p_starts[i] = p_parg[i] * p_prank[i] + r; + } + } + } else if (p_distribs[i] == MPI_DISTRIBUTE_CYCLIC && + p_dargs[i] == MPI_DISTRIBUTE_DFLT_DARG) { + p_isblk[i] = 0; + p_parg[i] = 1; + p_starts[i] = p_prank[i]; + } else { + p_isblk[i] = 0; + p_parg[i] = p_dargs[i]; + p_starts[i] = p_parg[i] * p_prank[i]; + } + } + MPI_Aint num = 1; + for (int i = 0; i < n; i++) { + if (p_isblk[i] == 0) { + MPI_Aint num_this; + MPI_Aint n_blks; + MPI_Aint n_groups; + n_blks = p_gsizes[i] / p_parg[i]; + n_groups = n_blks / p_sizes[i]; + num_this = n_groups * p_parg[i]; + if (p_prank[i] < n_blks % p_sizes[i]) { + num_this += p_prank[i]; + } + if (p_prank[i] == n_blks % p_sizes[i]) { + num_this += p_gsizes[i] % p_parg[i]; + } + num *= num_this; + } else if (p_isblk[i] == 1) { + num *= p_gsizes[i] / p_sizes[i]; + } else { + if (p_prank[i] < p_gsizes[i] % p_sizes[i]) { + num *= p_gsizes[i] / p_sizes[i] + 1; + } else { + num *= p_gsizes[i] / p_sizes[i]; + } + } + } + typesig_set(sig, p_types[0], p_idx, count * num); + MPL_free(p_prank); + MPL_free(p_isblk); + MPL_free(p_parg); + MPL_free(p_starts); + } else { + MPIR_Assert(0); + } + + } + } + } + +} + +void type_get_n_elem_extent(MPI_Datatype dt, MPI_Aint * p_n_elem, MPI_Aint * p_extent) +{ + if (HANDLE_IS_BUILTIN(dt)) { + if (dt == MPI_2INT) { + *p_n_elem = 2; + *p_extent = MPIR_Datatype_get_basic_size(MPI_INT); + } else { + *p_n_elem = 1; + *p_extent = MPIR_Datatype_get_basic_size(dt); + } + return; + } + MPIR_Datatype *dt_ptr; + MPIR_Datatype_get_ptr(dt, dt_ptr); + MPIR_Assert(dt_ptr != NULL); + *p_n_elem = dt_ptr->n_builtin_elements; + *p_extent = dt_ptr->extent; +} + +void typesig_check_space(struct typesig *sig, MPI_Aint n) +{ + if (sig->n < n) { + sig->n = n * 2; + sig->types = MPL_realloc(sig->types, sig->n * sizeof(MPI_Datatype), MPL_MEM_OTHER); + sig->counts = MPL_realloc(sig->counts, sig->n * sizeof(MPI_Aint), MPL_MEM_OTHER); + } +} diff --git a/src/mpi/datatype/typerep/src/typerep_dataloop_create.c b/src/mpi/datatype/typerep/src/typerep_dataloop_create.c index 725f92e284c..7bd171240eb 100644 --- a/src/mpi/datatype/typerep/src/typerep_dataloop_create.c +++ b/src/mpi/datatype/typerep/src/typerep_dataloop_create.c @@ -627,6 +627,7 @@ int MPIR_Typerep_create_struct(MPI_Aint count, const MPI_Aint * array_of_blockle MPI_Aint size = 0; MPI_Datatype el_type = MPI_DATATYPE_NULL; MPI_Aint true_lb_disp = 0, true_ub_disp = 0, lb_disp = 0, ub_disp = 0; + MPI_Aint el_count = 0; for (MPI_Aint i = 0; i < count; i++) { MPI_Aint tmp_lb, tmp_ub, tmp_true_lb, tmp_true_ub; @@ -652,6 +653,7 @@ int MPIR_Typerep_create_struct(MPI_Aint count, const MPI_Aint * array_of_blockle tmp_true_ub = tmp_ub; size += tmp_el_sz * array_of_blocklengths[i]; + el_count += array_of_blocklengths[i]; } else { MPIR_Datatype_get_ptr(array_of_types[i], old_dtp); @@ -668,6 +670,7 @@ int MPIR_Typerep_create_struct(MPI_Aint count, const MPI_Aint * array_of_blockle tmp_true_ub = tmp_ub + (old_dtp->true_ub - old_dtp->ub); size += old_dtp->size * array_of_blocklengths[i]; + el_count += array_of_blocklengths[i] * old_dtp->n_builtin_elements; } /* element size and type */ @@ -731,7 +734,7 @@ int MPIR_Typerep_create_struct(MPI_Aint count, const MPI_Aint * array_of_blockle old_are_contig = 0; } - newtype->n_builtin_elements = -1; /* TODO */ + newtype->n_builtin_elements = el_count; newtype->builtin_element_size = el_sz; newtype->basic_type = el_type; diff --git a/src/mpi/datatype/typerep/src/typerep_yaksa_create.c b/src/mpi/datatype/typerep/src/typerep_yaksa_create.c index 25fd913d674..a31bf6c9291 100644 --- a/src/mpi/datatype/typerep/src/typerep_yaksa_create.c +++ b/src/mpi/datatype/typerep/src/typerep_yaksa_create.c @@ -46,6 +46,10 @@ static int update_yaksa_type(MPIR_Datatype * newtype, MPI_Datatype oldtype, MPI_ if (count == 0) { /* this is a struct, deal with it in MPIR_Typerep_create_struct */ + } else if (oldtype == MPI_2INT) { + newtype->n_builtin_elements = count * 2; + newtype->builtin_element_size = (MPI_Aint) MPIR_Datatype_get_basic_size(MPI_INT); + newtype->basic_type = MPI_INT; } else if (HANDLE_IS_BUILTIN(oldtype)) { MPI_Aint el_sz = (MPI_Aint) MPIR_Datatype_get_basic_size(oldtype); newtype->n_builtin_elements = count; @@ -322,6 +326,7 @@ int MPIR_Typerep_create_struct(MPI_Aint count, const MPI_Aint * array_of_blockle MPI_Aint el_sz = 0; MPI_Datatype el_type = MPI_DATATYPE_NULL; int found_el_type = 0; + MPI_Aint el_count = 0; for (int i = 0; i < count; i++) { MPI_Aint tmp_el_sz; MPI_Datatype tmp_el_type; @@ -333,10 +338,12 @@ int MPIR_Typerep_create_struct(MPI_Aint count, const MPI_Aint * array_of_blockle if (HANDLE_IS_BUILTIN(array_of_types[i])) { tmp_el_sz = MPIR_Datatype_get_basic_size(array_of_types[i]); tmp_el_type = array_of_types[i]; + el_count += array_of_blocklengths[i]; } else { MPIR_Datatype_get_ptr(array_of_types[i], old_dtp); tmp_el_sz = old_dtp->builtin_element_size; tmp_el_type = old_dtp->basic_type; + el_count += array_of_blocklengths[i] * old_dtp->n_builtin_elements; } if (found_el_type == 0) { @@ -351,7 +358,7 @@ int MPIR_Typerep_create_struct(MPI_Aint count, const MPI_Aint * array_of_blockle el_type = MPI_DATATYPE_NULL; } } - newtype->n_builtin_elements = -1; /* TODO */ + newtype->n_builtin_elements = el_count; newtype->builtin_element_size = el_sz; newtype->basic_type = el_type; diff --git a/t.def b/t.def new file mode 100644 index 00000000000..2743579a7c0 --- /dev/null +++ b/t.def @@ -0,0 +1,49 @@ +include: c/mpi.def +page: t, mpi_frame + # run: gdb + $dump $(rank), $(size) + + $(if:1) + $call test + $(elif:0) + $call test_subarray + $(elif:0) + $call test_darray + $(else) + $call test_1 + +macros: + # dump_type: MPIX_Type_dump_typemap + dump_type: MPIX_Type_dump_typesig + +subcode: test + &call Type_commit, MPI_SHORT_INT + $call type_struct, MPI_INT, 4, 0, MPI_DOUBLE, 8, 16 + $call type_struct, $(dt), 1, 0, $(dt), 0, 100, $(dt), 1, 0, $(dt), 0, 100 + $call type_subarray, 4, [2, 1, 1, 1], [2, 1, 1, 1], [0, 0, 0, 0] + $(dump_type)($(dt)) + +subcode: test_a1 + &call Type_commit, MPI_SHORT_INT + $call type_struct, MPI_INT, 4, 0, MPI_DOUBLE, 8, 16 + $call type_subarray, 4, [1, 1, 1, 1], [1, 1, 1, 1], [0, 0, 0, 0] + $call type_blkhidx, 1, 2, 8 + $(dump_type)($(dt)) + +subcode: test_1 + &call Type_commit, MPI_SHORT_INT + $call type_contig, 2 + $call type_hvector, 2, 1, 10 + $call type_struct, MPI_INT, 1, 0, MPI_SHORT, 10, 8, $(dt), 1, 100 + $call type_contig, 2 + $(dump_type)($(dt)) + +subcode: test_subarray + &call Type_commit, MPI_CHAR + $call type_subarray, 3, [4, 5, 10], [2, 2, 2], [0, 1, 1] + $(dump_type)($(dt)) + +subcode: test_darray + &call Type_commit, MPI_CHAR + $call type_darray, 10, 5, 3, [4, 4, 10], [2, -, 5], [1, -, 1] + $(dump_type)($(dt)) diff --git a/typemap.def b/typemap.def new file mode 100644 index 00000000000..c271da225d7 --- /dev/null +++ b/typemap.def @@ -0,0 +1,124 @@ +include: typemap_cases.def +include: typemap_set.def +include: typesig_set.def + +page: typemap, - + output_dir: src/mpi/datatype + module: c + + $list MPIR_type_dump_typemap, MPIR_type_dump_typesig + $list MPIR_type_get_typemap, MPIR_type_get_typesig + $list MPIR_type_free_typemap, MPIR_type_free_typesig + +subcode: no-autoload + $struct typemap + MPI_Aint n + MPI_Datatype *types + MPI_Aint *disps + + $struct typesig + MPI_Aint n + MPI_Datatype *types + MPI_Aint *counts + +subcode: _autoload + $register_name(dt) MPI_Datatype + $register_name(dt_ptr) MPIR_Datatype * + $register_name(map) struct typemap * + $register_name(sig) struct typesig * + $register_fmt(MPI_Aint) %ld + +subcode: get_dt_ptr + $my MPIR_Datatype *dt_ptr + MPIR_Datatype_get_ptr(dt, dt_ptr) + MPIR_Assert(dt_ptr != NULL); + +fncode: MPIR_type_dump_typesig(dt): int + $my sig + sig = MPIR_type_get_typesig(dt) + $for i=0:sig->n + $if i>0 + $print ",-" + $print "%s:%ld-", MPIR_Datatype_builtin_to_string(sig->types[i]), (long) sig->counts[i] + $print + + MPIR_type_free_typesig(sig) + return MPI_SUCCESS + +fncode: MPIR_type_dump_typemap(dt): int + $my MPI_Aint lb, MPI_Aint extent + MPIR_Type_get_extent_impl(dt, &lb, &extent); + printf(" %20s: %10ld\n", "lb", (long) lb) + printf(" %20s: %10ld\n", "ub", (long) (lb + extent)) + + $my map + map = MPIR_type_get_typemap(dt) + $for i=0:map->n + $print " %20s: %10ld\n", MPIR_Datatype_builtin_to_string(map->types[i]), (long) map->disps[i] + + MPIR_type_free_typemap(map) + return MPI_SUCCESS + +fncode: MPIR_type_get_typesig(dt) + $allocate sig + sig->n = 1 + $map allocate(1) sig->types, sig->counts + + $my MPI_Aint idx = 0 + typesig_set(sig, dt, &idx, 1) + sig->n = idx + return sig + +fncode: MPIR_type_get_typemap(dt) + $allocate map + + $if HANDLE_IS_BUILTIN(dt) + $if dt == MPI_2INT + map->n = 2 + $else + map->n = 1 + $else + $call get_dt_ptr + map->n = dt_ptr->n_builtin_elements + + $call allocate_space + $my n_elem, extent: MPI_Aint + typemap_set(map, dt, 0, 0) + return map + + subcode: allocate_space + MPIR_Assert(map->n > 0); + $map allocate(map->n), map->types, map->disps + +#---------------------------------------- +fncode: MPIR_type_free_typemap(map) + MPL_free(map->types); + MPL_free(map->disps); + MPL_free(map); + +fncode: MPIR_type_free_typesig(sig) + MPL_free(sig->types); + MPL_free(sig->counts); + MPL_free(sig); + +#---------------------------------------- +subcode: _autoload + $(block:frame_init) + $: /* + $: * Copyright (C) by Argonne National Laboratory + $: * See COPYRIGHT in top-level directory + $: */ + NEWLINE + $include "mpiimpl.h" + $include "datatype.h" + +subcode: allocate(n, var) + $(if:var~.*types) + $(set:T=MPI_Datatype) + $(elif:var~.*(rank|isblk)) + $(set:T=int) + $(else) + $(set:T=MPI_Aint) + $(if:var!~.*->) + $my $(T) *$(var) + $(var) = MPL_malloc($(n) * sizeof($(T)), MPL_MEM_OTHER) diff --git a/typemap_cases.def b/typemap_cases.def new file mode 100644 index 00000000000..e7fbfa6b906 --- /dev/null +++ b/typemap_cases.def @@ -0,0 +1,170 @@ +subcode: switch_combiner + $call case, DUP, dup + $call case, RESIZED, resized + $call case, CONTIGUOUS, contig + $call case, VECTOR, vector + $call case, HVECTOR, vector + $call case, INDEXED_BLOCK, idxblk + $call case, HINDEXED_BLOCK, idxblk + $call case, INDEXED, indexed + $call case, HINDEXED, indexed + $call case, STRUCT, struct + $call case, SUBARRAY, subarray + $call case, DARRAY, darray + $else + MPIR_Assert(0) + + subcode: case(NAME, type) + $case cp->combiner == MPI_COMBINER_$(NAME) + $(if:NAME~H) + $(set:h=h) + $(set:ext=) + $(else) + $(set:h=-) + $(set:ext= * extent) + $call set_$(type) + + #---------------------------------------- + subcode: set_dup + $call _set_dup + + subcode: set_resized + $(if:c=-) + $(set:lb=p_aints[0]) + $(set:extent=p_aints[1]) + $(else) + $(set:lb=p_counts[0]) + $(set:extent=p_counts[1]) + $call _set_resized + + subcode: set_contig + $(if:c=-) + $(set:n=p_ints[0]) + $(else) + $(set:n=p_counts[0]) + $call _set_contig + + # --------------------------------------- + subcode: set_vector + $(if:c=-) + $(set:n=p_ints[0]) + $(set:blkl=p_ints[1]) + $(if:h=-) + $(set:disp=p_ints[2] * $1) + $(else) + $(set:disp=p_aints[0]) + $(else) + $(set:n=p_counts[0]) + $(set:blkl=p_counts[1]) + $(set:disp=p_counts[2] * $1) + $call _set_vector + + subcode: set_idxblk + $(if:c=-) + $(set:n=p_ints[0]) + $(set:blkl=p_ints[1]) + $(if:h=-) + $(set:disp=p_ints[2+$1]) + $(else) + $(set:disp=p_aints[$1]) + $(else) + $(set:n=p_counts[0]) + $(set:blkl=p_counts[1]) + $(set:disp=p_counts[2+$1]) + $call _set_indexed + + subcode: set_indexed + $(if:c=-) + $(set:n=p_ints[0]) + $my int *p_blkl = p_ints + 1 + $(if:h=-) + $my int *p_disp = p_ints + 1 + $(n) + $(else) + $my MPI_Aint *p_disp = p_aints + $(else) + $(set:n=p_counts[0]) + $my MPI_Aint *p_blkl = p_counts + 1 + $my MPI_Aint *p_disp = p_counts + 1 + $(n) + $(set:blkl=p_blkl[$1]) + $(set:disp=p_disp[$1]) + $call _set_indexed + + subcode: set_struct + $(if:c=-) + $(set:n=p_ints[0]) + $my int *p_blkl = p_ints + 1 + $my MPI_Aint *p_disp = p_aints + $(else) + $(set:n=p_counts[0]) + $my MPI_Aint *p_blkl = p_counts + 1 + $my MPI_Aint *p_disp = p_counts + 1 + $(n) + $(set:blkl=p_blkl[$1]) + $(set:disp=p_disp[$1]) + $call _set_struct + + subcode: set_subarray + $(set:n=p_ints[0]) + $(if:c=-) + $my int order = p_ints[1 + $(n) * 3] + $my int *p_sizes = p_ints + 1 + $my int *p_subsizes = p_ints + 1 + $(n) + $my int *p_starts = p_ints + 1 + $(n) * 2 + $(else) + $my int order = p_ints[2] + $my MPI_Aint *p_sizes = p_counts + $my MPI_Aint *p_subsizes = p_counts + $(n) + $my MPI_Aint *p_starts = p_counts + $(n) * 2 + $call _set_subarray + + subcode: set_darray + $my int size = p_ints[0] + $my int rank = p_ints[1] + $my int n = p_ints[2] + $(set:n=n) + $(if:c=-) + $my int *p_gsizes = p_ints + 3 + $my int *p_distribs = p_ints + 3 + $(n) + $my int *p_dargs = p_ints + 3 + $(n) * 2 + $my int *p_sizes = p_ints + 3 + $(n) * 3 + $my int order = p_ints[3 + $(n) * 4] + $(else) + $my MPI_Aint *p_gsizes = p_counts + $my int *p_distribs = p_ints + 3 + $my int *p_dargs = p_ints + 3 + $(n) + $my int *p_sizes = p_ints + 3 + $(n) * 2 + $my int order = p_ints[3 + $(n) * 3] + + $map allocate($(n)) p_prank, p_isblk, p_parg, p_starts + $for i=0:$(n) + size /= p_sizes[i] + p_prank[i] = rank / size + rank = rank % size + $if p_sizes[i] == 1 + p_isblk[i] = 1 + p_parg[i] = p_gsizes[i] + p_starts[i] = 0 + $elif p_distribs[i] == MPI_DISTRIBUTE_BLOCK && p_dargs[i] == MPI_DISTRIBUTE_DFLT_DARG + p_parg[i] = p_gsizes[i] / p_sizes[i] + $if p_gsizes[i] % p_sizes[i] == 0 + p_isblk[i] = 1 + p_starts[i] = p_parg[i] * p_prank[i] + $else + $my int r = p_gsizes[i] % p_sizes[i] + p_isblk[i] = 2 + $if p_prank[i] < r + p_parg[i] += 1 + p_starts[i] = p_parg[i] * p_prank[i] + $else + p_starts[i] = p_parg[i] * p_prank[i] + r + $elif p_distribs[i] == MPI_DISTRIBUTE_CYCLIC && p_dargs[i] == MPI_DISTRIBUTE_DFLT_DARG + p_isblk[i] = 0 + p_parg[i] = 1 + p_starts[i] = p_prank[i] + $else + p_isblk[i] = 0 + p_parg[i] = p_dargs[i] + p_starts[i] = p_parg[i] * p_prank[i] + $call _set_darray + $(for:p_prank, p_isblk, p_parg, p_starts) + MPL_free($1) + diff --git a/typemap_set.def b/typemap_set.def new file mode 100644 index 00000000000..ffaefc1a7ff --- /dev/null +++ b/typemap_set.def @@ -0,0 +1,234 @@ +fncode: type_get_n_elem_extent(dt, MPI_Aint *p_n_elem, MPI_Aint *p_extent) + $if HANDLE_IS_BUILTIN(dt) + $if dt == MPI_2INT + *p_n_elem = 2 + *p_extent = MPIR_Datatype_get_basic_size(MPI_INT) + $else + *p_n_elem = 1 + *p_extent = MPIR_Datatype_get_basic_size(dt) + return + $call get_dt_ptr + *p_n_elem = dt_ptr->n_builtin_elements + *p_extent = dt_ptr->extent + +fncode: typemap_set(map, dt, MPI_Aint idx, MPI_Aint offset) + $if HANDLE_IS_BUILTIN(dt) + $if dt == MPI_2INT + map->types[idx] = MPI_INT + map->disps[idx] = offset + map->types[idx+1] = MPI_INT + map->disps[idx+1] = offset + MPIR_Datatype_get_basic_size(MPI_INT) + $else + map->types[idx] = dt + map->disps[idx] = offset + return + $elif MPIR_DATATYPE_IS_PREDEFINED(dt) + $call get_dt_ptr + $my MPI_Aint disp = dt_ptr->true_ub - MPIR_Datatype_get_basic_size(MPI_INT) + $(for:FLOAT,DOUBLE,LONG,SHORT) + $if dt == MPI_$1_INT + map->types[idx] = MPI_$1 + map->disps[idx] = offset + map->types[idx+1] = MPI_INT + map->disps[idx+1] = offset + disp + return + $else + $my MPI_Aint n_elem, MPI_Aint extent + + $call get_dt_ptr + $my MPIR_Datatype_contents *cp = dt_ptr->contents + $local int *p_ints, MPI_Aint *p_aints, MPI_Aint *p_counts, MPI_Datatype *p_types + MPIR_Datatype_access_contents(cp, &p_ints, &p_aints, &p_counts, &p_types) + $if cp->nr_counts == 0 + $(set:c=-) + $call switch_combiner + $else + $(set:c=c) + $call switch_combiner + + # -------------------------------- + subcode: _set_dup + typemap_set(map, p_types[0], idx, offset) + + subcode: _set_resized + typemap_set(map, p_types[0], idx, offset + $(lb)) + + subcode: _set_contig + type_get_n_elem_extent(p_types[0], &n_elem, &extent) + typemap_set(map, p_types[0], idx, offset) + $local i, j: MPI_Aint + $my MPI_Aint idx2 = idx + n_elem + $for i=1:$(n) + $for j=0:n_elem + map->types[idx2] = map->types[idx + j] + map->disps[idx2] = map->disps[idx + j] + extent * i + idx2++ + + # common for vector, indexed_block, indexed + subcode: _set_vector + type_get_n_elem_extent(p_types[0], &n_elem, &extent) + typemap_set(map, p_types[0], idx, offset) + + $my k, i, j: MPI_Aint + $my MPI_Aint off2 + $my MPI_Aint idx2 = idx + n_elem + $for k=0:$(n) + off2 = $(disp:k)$(ext) + $call _copy_inner_blk, off2 + extent * i + + subcode: _set_indexed + type_get_n_elem_extent(p_types[0], &n_elem, &extent) + typemap_set(map, p_types[0], idx, offset + $(disp:0)) + + $my k, i, j: MPI_Aint + $my MPI_Aint off2 + $my MPI_Aint idx2 = idx + n_elem + $for k=0:$(n) + off2 = ($(disp:k) - $(disp:0))$(ext) + $call _copy_inner_blk, off2 + extent * i + + subcode: _set_struct + $my k, i, j: MPI_Aint + $my MPI_Aint off2 + $my MPI_Aint idx2 = idx + $for k=0:$(n) + type_get_n_elem_extent(p_types[k], &n_elem, &extent) + typemap_set(map, p_types[k], idx2, offset + $(disp:k)) + idx2 += n_elem + $for i=1:$(blkl:k) + $call _copy_elements, extent * i + + # --------------------- + subcode: _copy_inner_blk(off) + $for i=0:$(blkl:k) + $if k || i + $call _copy_elements, $(off) + + subcode: _copy_elements(off) + $for j=0:n_elem + map->types[idx2] = map->types[idx + j] + map->disps[idx2] = map->disps[idx + j] + $(off) + idx2++ + + # --------------------- + subcode: _set_subarray + $(set:sizei=p_sizes[i]) + $(set:starti=p_starts[i]) + &call set_ndim, subarray + $call _copy_elements, off2 - off0 + + subcode: init_counters_subarray + NOOP + subcode: inc_counter_i_subarray + counters[i]++ + off2 += stride + $if counters[i] < p_subsizes[i] + break + off2 -= stride * p_subsizes[i] + counters[i] = 0 + + # --------------------- + subcode: _set_darray + $(set:sizei=p_gsizes[i]) + $(set:starti=p_starts[i]) + + &call set_ndim, darray + $call _copy_elements, off2 - off0 + + subcode: init_counters_darray + $if p_isblk[0] == 0 + $call init_counters_cyclic + subcode: inc_counter_i_darray + $if p_isblk[i] == 0 + $call inc_counter_cyclic + $else + $call inc_counter_block + + # -- cyclic - counters use actual offset + subcode: init_counters_cyclic + counters[0] = p_prank[0] * p_parg[0] + subcode: inc_counter_cyclic + $my MPI_Aint old_counter = counters[i] + counters[i]++ + $if counters[i] % p_parg[i] == 0 + counters[i] += p_parg[i] * (p_sizes[i] - 1) + $if counters[i] < p_gsizes[i] + off2 += stride * (counters[i] - old_counter) + break + # reset for next dimension + counters[i] = p_starts[i] + off2 += stride * (counters[i] - old_counter) + + # -- block + subcode: inc_counter_block + counters[i]++ + off2 += stride + $if counters[i] < p_parg[i] + break + # reset for next dimension + off2 -= stride * p_parg[i] + counters[i] = 0 + +#---------------------------------------- +subcode: set_ndim(type) + $call calc_off0 + type_get_n_elem_extent(p_types[0], &n_elem, &extent) + typemap_set(map, p_types[0], idx, offset + off0) + + $my MPI_Aint off2 = off0 + $my MPI_Aint idx2 = idx + n_elem + # -- iter_ndim -- + $my MPI_Aint *counters + counters = MPL_calloc($(n), sizeof(MPI_Aint), MPL_MEM_OTHER) + $call @init_counters_$(type) + $while 1 + $call inc_counters + BLOCK + MPL_free(counters) + + subcode: calc_off0 + # high to low dimension + $my MPI_Aint off0 = 0 + $if order == MPI_ORDER_C + $for i=0:$(n) + $call update_off0 + $else + $for i=$(n)-1 downto 0 + $call update_off0 + + subcode: update_off0 + off0 = (off0 * $(sizei)) + $(starti) + + subcode: inc_counters + # low to high dimension + $my alldone = 0 + $my MPI_Aint stride = 1 + $call @init_i + $while 1 + $call inc_counter_i_$(type) + # -- next dimension + stride *= $(sizei) + $call @next_i + $if alldone + break + + subcode: init_i + $my int i + $if order == MPI_ORDER_C + i = $(n) - 1 + $else + i = 0 + + subcode: next_i + $if order == MPI_ORDER_C + i-- + $call alldone, i < 0 + $else + i++ + $call alldone, i == $(n) + subcode: alldone(cond) + $if $(cond) + alldone = 1 + break + + diff --git a/typesig_set.def b/typesig_set.def new file mode 100644 index 00000000000..0de8450aa51 --- /dev/null +++ b/typesig_set.def @@ -0,0 +1,133 @@ +fncode: typesig_set(sig, dt, MPI_Aint *p_idx, MPI_Aint count) + $if count <= 0 + return + + $local MPI_Aint idx = *p_idx + + $if HANDLE_IS_BUILTIN(dt) + typesig_check_space(sig, idx + 1) + $if dt == MPI_2INT + sig->types[idx] = MPI_INT + sig->counts[idx] = 2 * count + $else + sig->types[idx] = dt + sig->counts[idx] = count + *p_idx = idx + 1 + return + $elif MPIR_DATATYPE_IS_PREDEFINED(dt) + typesig_check_space(sig, idx + 2 * count) + $my MPI_Datatype dt_a + $(for:FLOAT,DOUBLE,LONG,SHORT) + $if dt == MPI_$1_INT + dt_a = MPI_$1 + $for i=0:count + sig->types[idx] = dt_a + sig->types[idx + 1] = MPI_INT + sig->counts[idx] = 1 + sig->counts[idx + 1] = 1 + idx+=2 + *p_idx = idx + $else + $call get_dt_ptr + $if dt_ptr->basic_type != MPI_DATATYPE_NULL + $if HANDLE_IS_BUILTIN(dt_ptr->basic_type) + typesig_check_space(sig, idx + 1) + sig->types[idx] = dt_ptr->basic_type + sig->counts[idx] = dt_ptr->n_builtin_elements * count + *p_idx = idx + 1 + return + $else + typesig_set(sig, dt_ptr->basic_type, p_idx, count * dt_ptr->n_builtin_elements) + return + $else + $my MPIR_Datatype_contents *cp = dt_ptr->contents + $local int *p_ints, MPI_Aint *p_aints, MPI_Aint *p_counts, MPI_Datatype *p_types + MPIR_Datatype_access_contents(cp, &p_ints, &p_aints, &p_counts, &p_types) + $if cp->nr_counts == 0 + $(set:c=-) + $call switch_combiner + $else + $(set:c=c) + $call switch_combiner + + # -------------------------------- + subcode: _set_dup + # $print "typesig_set dup" + typesig_set(sig, p_types[0], p_idx, count) + + subcode: _set_resized + typesig_set(sig, p_types[0], p_idx, count) + + subcode: _set_contig + typesig_set(sig, p_types[0], p_idx, count * $(n)) + + # common for vector, indexed_block + subcode: _set_vector + # $print "typesig_set vector count=%ld", count * $(n) * $(blkl) + typesig_set(sig, p_types[0], p_idx, count * $(n) * $(blkl)) + + subcode: _set_indexed + $my MPI_Aint blkl_sum = 0 + $for i=0:$(n) + blkl_sum += $(blkl:i) + # $print "typesig_set indexed count=%ld", count * blkl_sum + typesig_set(sig, p_types[0], p_idx, count * blkl_sum) + + subcode: _set_struct + $my i, j: MPI_Aint + $my MPI_Aint idx_save = *p_idx + $my MPI_Aint idx_last = *p_idx + $for i=0:$(n) + typesig_set(sig, p_types[i], p_idx, $(blkl:i)) + $if idx_last > 0 && sig->types[idx_last-1] == sig->types[idx_last] + sig->counts[idx_last-1] += sig->counts[idx_last] + $for j=idx_last:(*p_idx - 1) + sig->types[j] = sig->types[j+1] + sig->counts[j] = sig->counts[j+1] + (*p_idx)-- + idx_last = *p_idx + $if count > 1 + $my MPI_Aint num = *p_idx - idx_save + typesig_check_space(sig, idx_save + count * num) + idx = *p_idx + $for i=1:count + $for j=0:num + sig->types[idx] = sig->types[idx_save + j] + sig->counts[idx] = sig->counts[idx_save + j] + idx++ + *p_idx = idx + + # --------------------- + subcode: _set_subarray + $my MPI_Aint num = 1 + $for i=0:$(n) + num *= p_subsizes[i] + typesig_set(sig, p_types[0], p_idx, count * num) + + subcode: _set_darray + $my MPI_Aint num = 1 + $for i=0:$(n) + $if p_isblk[i] == 0 + $my num_this, n_blks, n_groups: MPI_Aint + n_blks = p_gsizes[i] / p_parg[i] + n_groups = n_blks / p_sizes[i] + num_this = n_groups * p_parg[i] + $if p_prank[i] < n_blks % p_sizes[i] + num_this += p_prank[i] + $if p_prank[i] == n_blks % p_sizes[i] + num_this += p_gsizes[i] % p_parg[i] + num *= num_this + $elif p_isblk[i] == 1 + num *= p_gsizes[i] / p_sizes[i] + $else + $if p_prank[i] < p_gsizes[i] % p_sizes[i] + num *= p_gsizes[i] / p_sizes[i] + 1 + $else + num *= p_gsizes[i] / p_sizes[i] + typesig_set(sig, p_types[0], p_idx, count * num) + +fncode: typesig_check_space(sig, MPI_Aint n) + $if sig->n < n + sig->n = n * 2 + sig->types = MPL_realloc(sig->types, sig->n * sizeof(MPI_Datatype), MPL_MEM_OTHER) + sig->counts = MPL_realloc(sig->counts, sig->n * sizeof(MPI_Aint), MPL_MEM_OTHER)