Source code for opal.visualization.TimingPlotter

# Copyright (c) 2018 - 2019, Matthias Frey, Paul Scherrer Institut, Villigen PSI, Switzerland
# All rights reserved
#
# Implemented as part of the PhD thesis
# "Precise Simulations of Multibunches in High Intensity Cyclotrons"
#
# This file is part of pyOPALTools.
#
# pyOPALTools is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# You should have received a copy of the GNU General Public License
# along with pyOPALTools. If not, see <https://www.gnu.org/licenses/>.

from .BasePlotter import *
import numpy as np
from operator import itemgetter


[docs]class TimingPlotter(BasePlotter):
    
[docs]    def __init__(self):
        pass
    
    
    def __mostConsuming(self, n, times, labels, prop):
        """
        Retturn time and label of the first n most time
        consuming timings.
        
        Parameters
        ----------
        n       (int)   number of timings
        times   ([])    list of timing data
        labels  ([])    list of labels to appropriate timings
        
        Returns
        -------
        sorted times and labels
        """
        # 15. Jan. 2017,
        # http://stackoverflow.com/questions/9543211/sorting-a-list-in-python-using-the-result-from-sorting-another-list
        times_sorted, labels_sorted = zip(*sorted(zip(times, labels),
                                                key=itemgetter(0),
                                                reverse=True))
        
        if n < 0:
            n = 1
        elif n > len(times_sorted):
            n = len(times_sorted)
        
        return list(times_sorted[0:n]), list(labels_sorted[0:n])


[docs]    def plot_efficiency(self, dsets, what, prop, **kwargs):
        """
        Efficiency plot of a timing benchmark study
        
        E_p = S_p / p
        
        where E_p is the efficiency and S_p the
        speed-up with p cores / nodes.
        
        Parameters
        ----------
        dsets   ([TimeDataset]) all timing datasets
        what    (str)           timing name
        prop    (str)           property, i.e. 'cpu avg', 'cpu max', 'cpu min',
                                'wall avg', 'wall max', 'wall min' or
                                'cpu tot' and 'wall tot' (only for main timing)
        
        Optionals
        ---------
        xscale      (str)           x-axis scale, 'linear' or 'log'
        yscale      (str)           y-axis scale, 'linear' or 'log'
        grid        (bool)          if true, plot grid
        percent     (bool)          efficiency in percentage
        xlabel      (str)           label for x-axis. Default '#cores'
        core2node   (int)           scale #cores == 1 node
                                    (useful with xlabel='#nodes')
        
        Returns
        -------
        a matplotlib.pyplot handle
        """
        try:
            from opal import filetype
            
            if not isinstance(dsets, list):
                dsets = [dsets]
            
            dsets = [self.ds] + dsets
            
            for ds in dsets:
                if not ds.filetype == filetype.TIMING and not ds.filetype == filetype.OUTPUT:
                    raise TypeError("Dataset '" + ds.filename +
                                    "' is not a timing dataset.")
            
            cores = []
            time = []
            
            for ds in dsets:
                #access main timing
                cores.append( int(ds.getData(0, prop='cores')) )
                
                time.append( ds.getData(var=what, prop=prop) )
            
            # sort
            cores, time = zip(*sorted(zip(cores, time)))
            
            # tuple --> list
            cores = list(cores)
            
            # transform cores --> nodes
            core2node = kwargs.pop('core2node', 1)
            
            for i, c in enumerate(cores):
                cores[i] /= core2node
            
            
            # obtain speed-up
            speedup = []
            for t in time:
                speedup.append( time[0] / t )
            
            # obtain core increase
            incr = []
            for c in cores:
                incr.append( c / cores[0] )   
            
            # obtain efficiency
            efficiency = []
            
            percent = 1.0
            ylabel  = 'efficiency'
            if kwargs.pop('percent', True):
                percent = 100.0
                ylabel += ' [%]'
            
            for i, s in enumerate(speedup):
                efficiency.append( s / incr[i] * percent ) # in percent
            
            xscale = kwargs.pop('xscale', 'linear')
            yscale = kwargs.pop('yscale', 'linear')
            grid   = kwargs.pop('grid', False)
            xlab   = kwargs.pop('xlabel', '#cores')
            
            plt.plot(cores, efficiency, **kwargs)
            plt.xlabel(xlab)
            plt.ylabel(ylabel)
            plt.xscale(xscale)
            plt.yscale(yscale)
            plt.grid(grid, which='both')
            plt.tight_layout()
            
            return plt
        except Exception as ex:
            opal_logger.exception(ex)
            return plt.figure()


[docs]    def plot_speedup(self, dsets, what, prop, **kwargs):
        """
        Speedup plot of a timing benchmark study
        
        S_p = T_1 / T_p
        
        where T_1 is the time for a single core run
        (or reference run with several cores / nodes)
        and T_p the time with p cores. S_p then represents the
        speed-up with p cores / nodes.
        
        Parameters
        ----------
        dsets   ([TimeDataset]) all timing datasets
        what    (str)           timing name
        prop    (str)           property, i.e. 'cpu avg', 'cpu max', 'cpu min',
                                'wall avg', 'wall max', 'wall min' or
                                'cpu tot' and 'wall tot' (only for main timing)
        
        Optionals
        ---------
        xscale          (str)           x-axis scale, 'linear' or 'log'
        yscale          (str)           y-axis scale, 'linear' or 'log'
        grid            (bool)          if true, plot grid
        efficiency      (bool)          add efficiency to plot
        xlabel          (str)           label for x-axis. Default '#cores'
        core2node       (int)           scale #cores == 1 node
                                        (useful with xlabel='#nodes')
        perfect_scaling (bool)          add speed-up perfect scaling line
        
        Returns
        -------
        a matplotlib.pyplot handle
        """
        try:
            from opal import filetype
            
            if not isinstance(dsets, list):
                dsets = [dsets]
            
            dsets = [self.ds] + dsets
            
            for ds in dsets:
                if not ds.filetype == filetype.TIMING and not ds.filetype == filetype.OUTPUT:
                    raise TypeError("Dataset '" + ds.filename +
                                    "' is not a timing dataset.")
            
            cores = []
            time = []
            
            for ds in dsets:
                #access main timing
                cores.append( int(ds.getData(0, prop='cores')) )
                
                time.append( ds.getData(var=what, prop=prop) )
            
            # sort
            cores, time = zip(*sorted(zip(cores, time)))
            
            # tuple --> list
            cores = list(cores)
            
            # transform cores --> nodes
            core2node = kwargs.pop('core2node', 1)
            
            for i, c in enumerate(cores):
                cores[i] /= core2node
            
            # obtain speed-up
            speedup = []
            for t in time:
                speedup.append( time[0] / t )
            
            xscale = kwargs.pop('xscale', 'linear')
            yscale = kwargs.pop('yscale', 'linear')
            grid   = kwargs.pop('grid', False)
            
            ax1 = plt.gca()
            loc = 'best'
            
            if kwargs.pop('efficiency', False):
                loc = 'lower center'
                
                # obtain core increase
                incr = []
                for c in cores:
                    incr.append( c / cores[0] )   
                
                # obtain efficiency
                efficiency = []
                
                ax2 = ax1.twinx()
                ax2.set_ylabel('efficiency', color='r')
                ax2.set_yscale(yscale)
                # 8. April 2018
                # https://stackoverflow.com/questions/15256660/set-the-colour-of-matplotlib-ticks-on-a-log-scaled-axes
                ax2.tick_params('y', colors='r', which='both')
                ax2.grid(grid, which='both', color='r', linestyle='dashed', alpha=0.4)
                
                for i, s in enumerate(speedup):
                    efficiency.append( s / incr[i] )
                
                ax2.plot(cores, efficiency, 'r')
            
            ax1.plot(cores, speedup, label=ds.getLabel(what))
            ax1.set_xlabel(kwargs.pop('xlabel', '#cores'))
            ax1.set_ylabel('speed-up')
            ax1.set_xscale(xscale)
            ax1.set_yscale(yscale)
            ax1.grid(grid, which='both')
            
            if kwargs.pop('perfect_scaling', False):
                ref = []
                for c in cores:
                    ref.append( c / cores[0] )
                ax1.plot(cores, ref, 'k--', label='perfect scaling')
                ax1.legend(frameon=True, loc=loc)
            
            plt.tight_layout()
                
            return plt
        except Exception as ex:
            opal_logger.exception(ex)
            return plt.figure()


[docs]    def plot_time_scaling(self, dsets, prop, **kwargs):
        """
        Plot timing benchmark.
        
        Parameters
        ----------
        dsets   ([TimeDataset]) all timing datasets
        prop    (str)           property, 'wall' or 'cpu
        
        Optionals
        ---------
        first=None      (int)   take only the first N specialized
        xscale          (str)           x-axis scale, 'linear' or 'log'
        yscale          (str)           y-axis scale, 'linear' or 'log'
        grid            (bool)          if true, plot grid
        xlabel          (str)           label for x-axis. Default '#cores'
        core2node       (int)           scale #cores == 1 node
                                        (useful with xlabel='#nodes')
        exclude         ([])            do not use *these* timings
        tag=''          (str)           take only timings containing this tag
        perfect_scaling (bool)          add speed-up perfect scaling line
        
        Returns
        -------
        a matplotlib.pyplot handle
        """
        try:
            from opal import filetype
            
            if not isinstance(dsets, list):
                dsets = [dsets]
            
            dsets = [self.ds] + dsets
            
            for ds in dsets:
                if not ds.filetype == filetype.TIMING and not ds.filetype == filetype.OUTPUT:
                    raise TypeError("Dataset '" + ds.filename +
                                    "' is not a timing dataset.")
            
            if not prop == 'wall' and not prop == 'cpu':
                raise ValueError("Wrong property value: prop = 'wall' or prop = 'cpu'.")
            
            cores = []
            for ds in dsets:
                cores.append( int(ds.getData(0, prop='cores')) )
            
            # sort
            cores, dsets = zip(*sorted(zip(cores, dsets)))
            
            # tuple --> list
            cores = list(cores)
            
            # transform cores --> nodes
            core2node = kwargs.pop('core2node', 1)
            
            for i, c in enumerate(cores):
                cores[i] /= core2node
            
            labels = []
            times  = []
            excludeList = kwargs.pop('exclude', [])
            tag         = kwargs.pop('tag', '')
            
            for name in dsets[0].getLabels():
                skip = False
                for ex in excludeList:
                    if ex in name:
                        skip = True
                        break
                if not skip and not 'main' in name and tag in name:
                    labels.append( name )
                    times.append( dsets[0].getData(var=name, prop=prop + ' avg') )
            
            times, labels = self.__mostConsuming(kwargs.pop('first', 1e6), times, labels, prop + ' avg')

            if kwargs.pop('alphabetic', True):
                labels, times = zip(*sorted(zip(labels, times),
                                            key=itemgetter(0),
                                            reverse=True))
            else:
                times, labels = zip(*sorted(zip(times, labels),
                                            key=itemgetter(0),
                                            reverse=True))

            for label in labels:
                tmin = []
                tmax = []
                tavg = []
                for ds in dsets:
                    tavg.append( ds.getData(var=label, prop=prop + ' avg') )
                    tmin.append( tavg[-1] - ds.getData(var=label, prop=prop + ' min') )
                    tmax.append( ds.getData(var=label, prop=prop + ' max') - tavg[-1] )
                
                plt.errorbar(cores, tavg, yerr=[tmin, tmax], fmt='--o', label=label)
            
            plt.grid(kwargs.pop('grid', False), which="both")
            plt.xlabel(kwargs.pop('xlabel', '#cores'))
            plt.ylabel('time [' + ds.getUnit('') + ']')
            plt.xlim([0.5*cores[0], 1.05*cores[-1]])
            plt.xscale(kwargs.pop('xscale', 'linear'))
            plt.yscale(kwargs.pop('yscale', 'linear'))
            plt.tight_layout()
            
            
            if kwargs.pop('perfect_scaling', False):
                ref = []
                for c in cores:
                    ref.append( times[0] * cores[0] / c )
                plt.plot(cores, ref, 'k', label='perfect scaling')
            plt.legend(loc='best')
            
            return plt
        except Exception as ex:
            opal_logger.exception(ex)
            return plt.figure()


[docs]    def plot_time_summary(self, prop, **kwargs):
        """
        Create a plot with minimum, maximum and average timings
        
        Parameters
        ----------
        ds      (DatasetBase)   timing dataset
        prop    (str)           property, 'wall' or 'cpu
        
        Optionals
        ---------
        yscale          (str)           y-axis scale, 'linear' or 'log'
        grid            (bool)          if true, plot grid
        exclude         ([])            do not use *these* timings
        tag=''          (str)           take only timings containing this tag
        
        Returns
        -------
        a matplotlib.pyplot handle
        """
        try:
            if not prop == 'wall' and not prop == 'cpu':
                raise ValueError("Wrong property value: prop = 'wall' or prop = 'cpu'.")
            
            labels = []
            excludeList = kwargs.pop('exclude', [])
            tag         = kwargs.pop('tag', '')
            for name in self.ds.getLabels():
                skip = False
                for ex in excludeList:
                    if ex in name:
                        skip = True
                        break
                if not skip and not 'main' in name and tag in name:
                    labels.append( name )
            
            tmin = []
            tmax = []
            tavg = []
            
            for name in labels:
                tavg.append( self.ds.getData(var=name, prop=prop + ' avg') )
                tmin.append( tavg[-1] - self.ds.getData(var=name, prop=prop + ' min') )
                tmax.append( self.ds.getData(var=name, prop=prop + ' max') - tavg[-1] )
            
            n = len(tavg)
            x = np.linspace(0, n-1, n)

            grid   = kwargs.pop('grid', False)
            yscale = kwargs.pop('yscale', 'linear')
            plt.errorbar(x, tavg, yerr=[tmin, tmax], fmt='o', **kwargs)
            plt.xlim([-1, n])
            plt.ylim([-10, max(tmax)+max(tavg)])
            plt.ylabel('time [' + self.ds.getUnit('') + ']')
            # 2. Feb. 2018
            # https://stackoverflow.com/questions/14852821/aligning-rotated-xticklabels-with-their-respective-xticks
            plt.xticks(x, labels, rotation=45, ha='right')
            plt.grid(grid, which="both")

            ax = plt.gca()
            if yscale == 'log':
                ax.set_yscale('log', nonposy='clip')

            plt.tight_layout()
            
            return plt
        except Exception as ex:
            opal_logger.exception(ex)
            return plt.figure()


[docs]    def plot_pie_chart(self, prop, **kwargs):
        """
        Create a pie plot of the first N most time consuming timings.
        
        Parameters
        ----------
        ds      (DatasetBase)   timing dataset
        prop    (str)           property, i.e. 'cpu avg', 'cpu max', 'cpu min',
                                'wall avg', 'wall max', 'wall min' or
                                'cpu tot' and 'wall tot' (only for main timing)
        
        Optionals
        ---------
        first=None          (int)   take only the first N specialized
                                    timings
        exclude             ([])    do not use *these* timings
        tag=''              (str)   what tag should be in name
        cmap_name='YlGn'    (str)   color scheme
        
        Notes
        -----
        Throws an exception if file not available or the key is not part
        of the dictionary
        
        Returns
        -------
        a matplotlib.pyplot handle
        """
        try:
            first = kwargs.pop('first', None)
            cmap_name = kwargs.pop('cmap', 'YlGn')
            
            excludeList = kwargs.pop('exclude', [])
            tag         = kwargs.pop('tag', '')
            names = []
            for name in self.ds.getLabels():
                skip = False
                for ex in excludeList:
                    if ex in name:
                        skip = True
                        break
                if not skip and not 'main' in name and tag in name:
                    names.append( name )
            
            
            labels = []
            times  = []
            for name in names:
                if not 'main' in name:
                    labels.append(name)
                    times.append( self.ds.getData(var=name, prop=prop) )
            
            times_sorted, labels_sorted = self.__mostConsuming(first, times, labels, prop)
            
            # sum up all others
            if first:
                labels_sorted.append('others')
                t = 0.0
                for name in names:
                    if not 'main' in name and name not in labels_sorted:
                        t += self.ds.getData(var=name, prop=prop)
                times_sorted.append(t)
            
            times_sorted, labels_sorted = zip(*sorted(zip(times_sorted, labels_sorted),
                                                    key=itemgetter(0),
                                                    reverse=True))
            
            # 15. Jan. 2017, https://gist.github.com/vals/5257113
            cmap = plt.get_cmap(cmap_name)
            colors = cmap(np.linspace(0, 1, len(times_sorted)))
                
            explode = [0.0] * len(times_sorted)

            # 15. Jan. 2017,
            # http://stackoverflow.com/questions/7082345/how-to-set-the-labels-size-on-a-pie-chart-in-python
            patches, texts, autotexts = plt.pie(times_sorted,
                                                autopct='%1.1f%%',
                                                pctdistance=0.7,
                                                labeldistance=1.0,
                                                startangle=90,
                                                explode=explode,
                                                colors=colors,
                                                radius=1.1,
                                                shadow=False)
            
            #for at in autotexts:
                #at.set_fontsize(10)
            
            plt.legend(patches, labels_sorted, loc='best', bbox_to_anchor=(0.95, 0.98), borderaxespad=0.1)
            plt.axis('equal')
            
            return plt
        except Exception as ex:
            opal_logger.exception(ex)
            return plt.figure()