Branch data Line data Source code
1 : : // ***************************************************************************** 2 : : /*! 3 : : \file src/Base/LoadDistributor.cpp 4 : : \copyright 2012-2015 J. Bakosi, 5 : : 2016-2018 Los Alamos National Security, LLC., 6 : : 2019-2021 Triad National Security, LLC. 7 : : All rights reserved. See the LICENSE file for details. 8 : : \brief Load distributors 9 : : \details Load distributors compute chunksize based on the degree of 10 : : virtualization. 11 : : */ 12 : : // ***************************************************************************** 13 : : 14 : : #include <limits> 15 : : 16 : : #include "Types.hpp" 17 : : #include "LoadDistributor.hpp" 18 : : #include "Exception.hpp" 19 : : 20 : : namespace tk { 21 : : 22 : : uint64_t 23 : 314 : linearLoadDistributor( real virtualization, 24 : : uint64_t load, 25 : : int npe, 26 : : uint64_t& chunksize, 27 : : uint64_t& remainder ) 28 : : // ***************************************************************************** 29 : : // Compute linear load distribution for given total work and virtualization 30 : : //! \param[in] virtualization Degree of virtualization [0.0...1.0] 31 : : //! \param[in] load Total load, e.g., number of particles, number of mesh cells 32 : : //! \param[in] npe Number of processing elements to distribute the load to 33 : : //! \param[inout] chunksize Chunk size, see detailed description 34 : : //! \param[inout] remainder Remainder, see detailed description 35 : : //! \return Number of work units 36 : : //! \details Compute load distibution (number of chares and chunksize) based on 37 : : //! total work (e.g., total number of particles) and virtualization 38 : : //! 39 : : //! The virtualization parameter, specified by the user, is a real number 40 : : //! between 0.0 and 1.0, inclusive, which controls the degree of 41 : : //! virtualization or over-decomposition. Independent of the value of 42 : : //! virtualization the work is approximately evenly distributed among the 43 : : //! available processing elements, given by npe. For zero virtualization (no 44 : : //! over-decomposition), the work is simply decomposed into total_work/numPEs, 45 : : //! which yields the smallest number of Charm++ chares and the largest chunks 46 : : //! of work units. The other extreme is unity virtualization, which decomposes 47 : : //! the total work into the smallest size work units possible, yielding the 48 : : //! largest number of Charm++ chares. Obviously, the optimum will be between 49 : : //! 0.0 and 1.0, depending on the problem. 50 : : //! 51 : : //! The formula implemented uses a linear relationship between the 52 : : //! virtualization parameter and the number of work units with the extremes 53 : : //! described above. The formula is given by 54 : : //! 55 : : //! chunksize = (1 - n) * v + n; 56 : : //! 57 : : //! where 58 : : //! - v = degree of virtualization 59 : : //! - n = load/npes 60 : : //! - load = total work, e.g., number of particles, number of mesh cells 61 : : //! - npes = number of hardware processing elements 62 : : // ***************************************************************************** 63 : : { 64 [ + + ][ + + ]: 314 : Assert( virtualization > -std::numeric_limits< real >::epsilon() && [ + - ][ + - ] [ + - ] 65 : : virtualization < 1.0+std::numeric_limits< real >::epsilon(), 66 : : "Virtualization parameter must be between [0.0...1.0]" ); 67 [ + + ][ + - ]: 312 : Assert( npe > 0, "Number of processing elements must be larger than zero" ); [ + - ][ + - ] 68 : : 69 : : // Compute minimum number of work units 70 : 311 : const auto n = static_cast< real >( load ) / npe; 71 : : 72 : : // Compute work unit size based on the linear formula above 73 : 311 : chunksize = static_cast< uint64_t >( (1.0 - n) * virtualization + n ); 74 : : 75 [ - + ][ - - ]: 311 : Assert( load >= chunksize, "Load must be larger than chunksize" ); [ - - ][ - - ] 76 : : 77 : : // Compute number of work units with size computed ignoring remainder 78 : 311 : uint64_t nchare = load / chunksize; 79 : : 80 : : // Compute remainder of work if the above number of units were to be created 81 : 311 : remainder = load - nchare * chunksize; 82 : : 83 : : // Redistribute remainder among the work units for a more equal distribution 84 : 311 : chunksize += remainder / nchare; 85 : : 86 : : // Compute new remainder (after redistribution of the previous remainder) 87 : 311 : remainder = load - nchare * chunksize; 88 : : 89 : : // Return number of work units (number of Charm++ chares) 90 : 311 : return nchare; 91 : : } 92 : : 93 : : } // tk::