# Copyright (c) 2018 - 2019, Matthias Frey, Paul Scherrer Institut, Villigen PSI, Switzerland
# All rights reserved
#
# Implemented as part of the PhD thesis
# "Precise Simulations of Multibunches in High Intensity Cyclotrons"
#
# This file is part of pyOPALTools.
#
# pyOPALTools is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# You should have received a copy of the GNU General Public License
# along with pyOPALTools. If not, see <https://www.gnu.org/licenses/>.
from .BasePlotter import *
import numpy as np
from operator import itemgetter
[docs]class TimingPlotter(BasePlotter):
[docs] def __init__(self):
pass
def __mostConsuming(self, n, times, labels, prop):
"""
Retturn time and label of the first n most time
consuming timings.
Parameters
----------
n (int) number of timings
times ([]) list of timing data
labels ([]) list of labels to appropriate timings
Returns
-------
sorted times and labels
"""
# 15. Jan. 2017,
# http://stackoverflow.com/questions/9543211/sorting-a-list-in-python-using-the-result-from-sorting-another-list
times_sorted, labels_sorted = zip(*sorted(zip(times, labels),
key=itemgetter(0),
reverse=True))
if n < 0:
n = 1
elif n > len(times_sorted):
n = len(times_sorted)
return list(times_sorted[0:n]), list(labels_sorted[0:n])
[docs] def plot_efficiency(self, dsets, what, prop, **kwargs):
"""
Efficiency plot of a timing benchmark study
E_p = S_p / p
where E_p is the efficiency and S_p the
speed-up with p cores / nodes.
Parameters
----------
dsets ([TimeDataset]) all timing datasets
what (str) timing name
prop (str) property, i.e. 'cpu avg', 'cpu max', 'cpu min',
'wall avg', 'wall max', 'wall min' or
'cpu tot' and 'wall tot' (only for main timing)
Optionals
---------
xscale (str) x-axis scale, 'linear' or 'log'
yscale (str) y-axis scale, 'linear' or 'log'
grid (bool) if true, plot grid
percent (bool) efficiency in percentage
xlabel (str) label for x-axis. Default '#cores'
core2node (int) scale #cores == 1 node
(useful with xlabel='#nodes')
Returns
-------
a matplotlib.pyplot handle
"""
try:
from opal import filetype
if not isinstance(dsets, list):
dsets = [dsets]
dsets = [self.ds] + dsets
for ds in dsets:
if not ds.filetype == filetype.TIMING and not ds.filetype == filetype.OUTPUT:
raise TypeError("Dataset '" + ds.filename +
"' is not a timing dataset.")
cores = []
time = []
for ds in dsets:
#access main timing
cores.append( int(ds.getData(0, prop='cores')) )
time.append( ds.getData(var=what, prop=prop) )
# sort
cores, time = zip(*sorted(zip(cores, time)))
# tuple --> list
cores = list(cores)
# transform cores --> nodes
core2node = kwargs.pop('core2node', 1)
for i, c in enumerate(cores):
cores[i] /= core2node
# obtain speed-up
speedup = []
for t in time:
speedup.append( time[0] / t )
# obtain core increase
incr = []
for c in cores:
incr.append( c / cores[0] )
# obtain efficiency
efficiency = []
percent = 1.0
ylabel = 'efficiency'
if kwargs.pop('percent', True):
percent = 100.0
ylabel += ' [%]'
for i, s in enumerate(speedup):
efficiency.append( s / incr[i] * percent ) # in percent
xscale = kwargs.pop('xscale', 'linear')
yscale = kwargs.pop('yscale', 'linear')
grid = kwargs.pop('grid', False)
xlab = kwargs.pop('xlabel', '#cores')
plt.plot(cores, efficiency, **kwargs)
plt.xlabel(xlab)
plt.ylabel(ylabel)
plt.xscale(xscale)
plt.yscale(yscale)
plt.grid(grid, which='both')
plt.tight_layout()
return plt
except Exception as ex:
opal_logger.exception(ex)
return plt.figure()
[docs] def plot_speedup(self, dsets, what, prop, **kwargs):
"""
Speedup plot of a timing benchmark study
S_p = T_1 / T_p
where T_1 is the time for a single core run
(or reference run with several cores / nodes)
and T_p the time with p cores. S_p then represents the
speed-up with p cores / nodes.
Parameters
----------
dsets ([TimeDataset]) all timing datasets
what (str) timing name
prop (str) property, i.e. 'cpu avg', 'cpu max', 'cpu min',
'wall avg', 'wall max', 'wall min' or
'cpu tot' and 'wall tot' (only for main timing)
Optionals
---------
xscale (str) x-axis scale, 'linear' or 'log'
yscale (str) y-axis scale, 'linear' or 'log'
grid (bool) if true, plot grid
efficiency (bool) add efficiency to plot
xlabel (str) label for x-axis. Default '#cores'
core2node (int) scale #cores == 1 node
(useful with xlabel='#nodes')
perfect_scaling (bool) add speed-up perfect scaling line
Returns
-------
a matplotlib.pyplot handle
"""
try:
from opal import filetype
if not isinstance(dsets, list):
dsets = [dsets]
dsets = [self.ds] + dsets
for ds in dsets:
if not ds.filetype == filetype.TIMING and not ds.filetype == filetype.OUTPUT:
raise TypeError("Dataset '" + ds.filename +
"' is not a timing dataset.")
cores = []
time = []
for ds in dsets:
#access main timing
cores.append( int(ds.getData(0, prop='cores')) )
time.append( ds.getData(var=what, prop=prop) )
# sort
cores, time = zip(*sorted(zip(cores, time)))
# tuple --> list
cores = list(cores)
# transform cores --> nodes
core2node = kwargs.pop('core2node', 1)
for i, c in enumerate(cores):
cores[i] /= core2node
# obtain speed-up
speedup = []
for t in time:
speedup.append( time[0] / t )
xscale = kwargs.pop('xscale', 'linear')
yscale = kwargs.pop('yscale', 'linear')
grid = kwargs.pop('grid', False)
ax1 = plt.gca()
loc = 'best'
if kwargs.pop('efficiency', False):
loc = 'lower center'
# obtain core increase
incr = []
for c in cores:
incr.append( c / cores[0] )
# obtain efficiency
efficiency = []
ax2 = ax1.twinx()
ax2.set_ylabel('efficiency', color='r')
ax2.set_yscale(yscale)
# 8. April 2018
# https://stackoverflow.com/questions/15256660/set-the-colour-of-matplotlib-ticks-on-a-log-scaled-axes
ax2.tick_params('y', colors='r', which='both')
ax2.grid(grid, which='both', color='r', linestyle='dashed', alpha=0.4)
for i, s in enumerate(speedup):
efficiency.append( s / incr[i] )
ax2.plot(cores, efficiency, 'r')
ax1.plot(cores, speedup, label=ds.getLabel(what))
ax1.set_xlabel(kwargs.pop('xlabel', '#cores'))
ax1.set_ylabel('speed-up')
ax1.set_xscale(xscale)
ax1.set_yscale(yscale)
ax1.grid(grid, which='both')
if kwargs.pop('perfect_scaling', False):
ref = []
for c in cores:
ref.append( c / cores[0] )
ax1.plot(cores, ref, 'k--', label='perfect scaling')
ax1.legend(frameon=True, loc=loc)
plt.tight_layout()
return plt
except Exception as ex:
opal_logger.exception(ex)
return plt.figure()
[docs] def plot_time_scaling(self, dsets, prop, **kwargs):
"""
Plot timing benchmark.
Parameters
----------
dsets ([TimeDataset]) all timing datasets
prop (str) property, 'wall' or 'cpu
Optionals
---------
first=None (int) take only the first N specialized
xscale (str) x-axis scale, 'linear' or 'log'
yscale (str) y-axis scale, 'linear' or 'log'
grid (bool) if true, plot grid
xlabel (str) label for x-axis. Default '#cores'
core2node (int) scale #cores == 1 node
(useful with xlabel='#nodes')
exclude ([]) do not use *these* timings
tag='' (str) take only timings containing this tag
perfect_scaling (bool) add speed-up perfect scaling line
Returns
-------
a matplotlib.pyplot handle
"""
try:
from opal import filetype
if not isinstance(dsets, list):
dsets = [dsets]
dsets = [self.ds] + dsets
for ds in dsets:
if not ds.filetype == filetype.TIMING and not ds.filetype == filetype.OUTPUT:
raise TypeError("Dataset '" + ds.filename +
"' is not a timing dataset.")
if not prop == 'wall' and not prop == 'cpu':
raise ValueError("Wrong property value: prop = 'wall' or prop = 'cpu'.")
cores = []
for ds in dsets:
cores.append( int(ds.getData(0, prop='cores')) )
# sort
cores, dsets = zip(*sorted(zip(cores, dsets)))
# tuple --> list
cores = list(cores)
# transform cores --> nodes
core2node = kwargs.pop('core2node', 1)
for i, c in enumerate(cores):
cores[i] /= core2node
labels = []
times = []
excludeList = kwargs.pop('exclude', [])
tag = kwargs.pop('tag', '')
for name in dsets[0].getLabels():
skip = False
for ex in excludeList:
if ex in name:
skip = True
break
if not skip and not 'main' in name and tag in name:
labels.append( name )
times.append( dsets[0].getData(var=name, prop=prop + ' avg') )
times, labels = self.__mostConsuming(kwargs.pop('first', 1e6), times, labels, prop + ' avg')
if kwargs.pop('alphabetic', True):
labels, times = zip(*sorted(zip(labels, times),
key=itemgetter(0),
reverse=True))
else:
times, labels = zip(*sorted(zip(times, labels),
key=itemgetter(0),
reverse=True))
for label in labels:
tmin = []
tmax = []
tavg = []
for ds in dsets:
tavg.append( ds.getData(var=label, prop=prop + ' avg') )
tmin.append( tavg[-1] - ds.getData(var=label, prop=prop + ' min') )
tmax.append( ds.getData(var=label, prop=prop + ' max') - tavg[-1] )
plt.errorbar(cores, tavg, yerr=[tmin, tmax], fmt='--o', label=label)
plt.grid(kwargs.pop('grid', False), which="both")
plt.xlabel(kwargs.pop('xlabel', '#cores'))
plt.ylabel('time [' + ds.getUnit('') + ']')
plt.xlim([0.5*cores[0], 1.05*cores[-1]])
plt.xscale(kwargs.pop('xscale', 'linear'))
plt.yscale(kwargs.pop('yscale', 'linear'))
plt.tight_layout()
if kwargs.pop('perfect_scaling', False):
ref = []
for c in cores:
ref.append( times[0] * cores[0] / c )
plt.plot(cores, ref, 'k', label='perfect scaling')
plt.legend(loc='best')
return plt
except Exception as ex:
opal_logger.exception(ex)
return plt.figure()
[docs] def plot_time_summary(self, prop, **kwargs):
"""
Create a plot with minimum, maximum and average timings
Parameters
----------
ds (DatasetBase) timing dataset
prop (str) property, 'wall' or 'cpu
Optionals
---------
yscale (str) y-axis scale, 'linear' or 'log'
grid (bool) if true, plot grid
exclude ([]) do not use *these* timings
tag='' (str) take only timings containing this tag
Returns
-------
a matplotlib.pyplot handle
"""
try:
if not prop == 'wall' and not prop == 'cpu':
raise ValueError("Wrong property value: prop = 'wall' or prop = 'cpu'.")
labels = []
excludeList = kwargs.pop('exclude', [])
tag = kwargs.pop('tag', '')
for name in self.ds.getLabels():
skip = False
for ex in excludeList:
if ex in name:
skip = True
break
if not skip and not 'main' in name and tag in name:
labels.append( name )
tmin = []
tmax = []
tavg = []
for name in labels:
tavg.append( self.ds.getData(var=name, prop=prop + ' avg') )
tmin.append( tavg[-1] - self.ds.getData(var=name, prop=prop + ' min') )
tmax.append( self.ds.getData(var=name, prop=prop + ' max') - tavg[-1] )
n = len(tavg)
x = np.linspace(0, n-1, n)
grid = kwargs.pop('grid', False)
yscale = kwargs.pop('yscale', 'linear')
plt.errorbar(x, tavg, yerr=[tmin, tmax], fmt='o', **kwargs)
plt.xlim([-1, n])
plt.ylim([-10, max(tmax)+max(tavg)])
plt.ylabel('time [' + self.ds.getUnit('') + ']')
# 2. Feb. 2018
# https://stackoverflow.com/questions/14852821/aligning-rotated-xticklabels-with-their-respective-xticks
plt.xticks(x, labels, rotation=45, ha='right')
plt.grid(grid, which="both")
ax = plt.gca()
if yscale == 'log':
ax.set_yscale('log', nonposy='clip')
plt.tight_layout()
return plt
except Exception as ex:
opal_logger.exception(ex)
return plt.figure()
[docs] def plot_pie_chart(self, prop, **kwargs):
"""
Create a pie plot of the first N most time consuming timings.
Parameters
----------
ds (DatasetBase) timing dataset
prop (str) property, i.e. 'cpu avg', 'cpu max', 'cpu min',
'wall avg', 'wall max', 'wall min' or
'cpu tot' and 'wall tot' (only for main timing)
Optionals
---------
first=None (int) take only the first N specialized
timings
exclude ([]) do not use *these* timings
tag='' (str) what tag should be in name
cmap_name='YlGn' (str) color scheme
Notes
-----
Throws an exception if file not available or the key is not part
of the dictionary
Returns
-------
a matplotlib.pyplot handle
"""
try:
first = kwargs.pop('first', None)
cmap_name = kwargs.pop('cmap', 'YlGn')
excludeList = kwargs.pop('exclude', [])
tag = kwargs.pop('tag', '')
names = []
for name in self.ds.getLabels():
skip = False
for ex in excludeList:
if ex in name:
skip = True
break
if not skip and not 'main' in name and tag in name:
names.append( name )
labels = []
times = []
for name in names:
if not 'main' in name:
labels.append(name)
times.append( self.ds.getData(var=name, prop=prop) )
times_sorted, labels_sorted = self.__mostConsuming(first, times, labels, prop)
# sum up all others
if first:
labels_sorted.append('others')
t = 0.0
for name in names:
if not 'main' in name and name not in labels_sorted:
t += self.ds.getData(var=name, prop=prop)
times_sorted.append(t)
times_sorted, labels_sorted = zip(*sorted(zip(times_sorted, labels_sorted),
key=itemgetter(0),
reverse=True))
# 15. Jan. 2017, https://gist.github.com/vals/5257113
cmap = plt.get_cmap(cmap_name)
colors = cmap(np.linspace(0, 1, len(times_sorted)))
explode = [0.0] * len(times_sorted)
# 15. Jan. 2017,
# http://stackoverflow.com/questions/7082345/how-to-set-the-labels-size-on-a-pie-chart-in-python
patches, texts, autotexts = plt.pie(times_sorted,
autopct='%1.1f%%',
pctdistance=0.7,
labeldistance=1.0,
startangle=90,
explode=explode,
colors=colors,
radius=1.1,
shadow=False)
#for at in autotexts:
#at.set_fontsize(10)
plt.legend(patches, labels_sorted, loc='best', bbox_to_anchor=(0.95, 0.98), borderaxespad=0.1)
plt.axis('equal')
return plt
except Exception as ex:
opal_logger.exception(ex)
return plt.figure()