Quinoa all test code coverage report
Current view: top level - Base - LoadDistributor.cpp (source / functions) Hit Total Coverage
Commit: -128-NOTFOUND Lines: 8 8 100.0 %
Date: 2024-11-08 10:37:44 Functions: 1 1 100.0 %
Legend: Lines: hit not hit | Branches: + taken - not taken # not executed Branches: 0 0 -

           Branch data     Line data    Source code
       1                 :            : // *****************************************************************************
       2                 :            : /*!
       3                 :            :   \file      src/Base/LoadDistributor.cpp
       4                 :            :   \copyright 2012-2015 J. Bakosi,
       5                 :            :              2016-2018 Los Alamos National Security, LLC.,
       6                 :            :              2019-2021 Triad National Security, LLC.
       7                 :            :              All rights reserved. See the LICENSE file for details.
       8                 :            :   \brief     Load distributors
       9                 :            :   \details   Load distributors compute chunksize based on the degree of
      10                 :            :      virtualization.
      11                 :            : */
      12                 :            : // *****************************************************************************
      13                 :            : 
      14                 :            : #include <limits>
      15                 :            : 
      16                 :            : #include "Types.hpp"
      17                 :            : #include "LoadDistributor.hpp"
      18                 :            : #include "Exception.hpp"
      19                 :            : 
      20                 :            : namespace tk {
      21                 :            : 
      22                 :            : uint64_t
      23                 :        193 : linearLoadDistributor( real virtualization,
      24                 :            :                        uint64_t load,
      25                 :            :                        int npe,
      26                 :            :                        uint64_t& chunksize,
      27                 :            :                        uint64_t& remainder )
      28                 :            : // *****************************************************************************
      29                 :            : //  Compute linear load distribution for given total work and virtualization
      30                 :            : //! \param[in] virtualization Degree of virtualization [0.0...1.0]
      31                 :            : //! \param[in] load Total load, e.g., number of particles, number of mesh cells
      32                 :            : //! \param[in] npe Number of processing elements to distribute the load to
      33                 :            : //! \param[inout] chunksize Chunk size, see detailed description
      34                 :            : //! \param[inout] remainder Remainder, see detailed description
      35                 :            : //! \return Number of work units
      36                 :            : //! \details Compute load distibution (number of chares and chunksize) based on
      37                 :            : //!   total work (e.g., total number of particles) and virtualization
      38                 :            : //!
      39                 :            : //!   The virtualization parameter, specified by the user, is a real number
      40                 :            : //!   between 0.0 and 1.0, inclusive, which controls the degree of
      41                 :            : //!   virtualization or over-decomposition. Independent of the value of
      42                 :            : //!   virtualization the work is approximately evenly distributed among the
      43                 :            : //!   available processing elements, given by npe. For zero virtualization (no
      44                 :            : //!   over-decomposition), the work is simply decomposed into total_work/numPEs,
      45                 :            : //!   which yields the smallest number of Charm++ chares and the largest chunks
      46                 :            : //!   of work units. The other extreme is unity virtualization, which decomposes
      47                 :            : //!   the total work into the smallest size work units possible, yielding the
      48                 :            : //!   largest number of Charm++ chares. Obviously, the optimum will be between
      49                 :            : //!   0.0 and 1.0, depending on the problem.
      50                 :            : //!
      51                 :            : //!   The formula implemented uses a linear relationship between the
      52                 :            : //!   virtualization parameter and the number of work units with the extremes
      53                 :            : //!   described above. The formula is given by
      54                 :            : //!
      55                 :            : //!   chunksize = (1 - n) * v + n;
      56                 :            : //!
      57                 :            : //!   where
      58                 :            : //!    - v = degree of virtualization
      59                 :            : //!    - n = load/npes
      60                 :            : //!    - load = total work, e.g., number of particles, number of mesh cells
      61                 :            : //!    - npes = number of hardware processing elements
      62                 :            : // *****************************************************************************
      63                 :            : {
      64                 :            :   Assert( virtualization > -std::numeric_limits< real >::epsilon() &&
      65                 :            :           virtualization < 1.0+std::numeric_limits< real >::epsilon(),
      66                 :            :           "Virtualization parameter must be between [0.0...1.0]" );
      67                 :            :   Assert( npe > 0, "Number of processing elements must be larger than zero" );
      68                 :            : 
      69                 :            :   // Compute minimum number of work units
      70                 :        193 :   const auto n = static_cast< real >( load ) / npe;
      71                 :            : 
      72                 :            :   // Compute work unit size based on the linear formula above
      73                 :        193 :   chunksize = static_cast< uint64_t >( (1.0 - n) * virtualization + n );
      74                 :            : 
      75                 :            :   Assert( load >= chunksize, "Load must be larger than chunksize" );
      76                 :            : 
      77                 :            :   // Compute number of work units with size computed ignoring remainder
      78                 :        193 :   uint64_t nchare = load / chunksize;
      79                 :            : 
      80                 :            :   // Compute remainder of work if the above number of units were to be created
      81                 :        193 :   remainder = load - nchare * chunksize;
      82                 :            : 
      83                 :            :   // Redistribute remainder among the work units for a more equal distribution
      84                 :        193 :   chunksize += remainder / nchare;
      85                 :            : 
      86                 :            :   // Compute new remainder (after redistribution of the previous remainder)
      87                 :        193 :   remainder = load - nchare * chunksize;
      88                 :            : 
      89                 :            :   // Return number of work units (number of Charm++ chares)
      90                 :        193 :   return nchare;
      91                 :            : }
      92                 :            : 
      93                 :            : } // tk::

Generated by: LCOV version 1.14