# Copyright (c) 2016 - 2017, Matthias Frey, Paul Scherrer Institut, Villigen PSI, Switzerland
# All rights reserved
#
# Implemented as part of the PhD thesis
# "Precise Simulations of Multibunches in High Intensity Cyclotrons"
#
# This file is part of pyOPALTools.
#
# pyOPALTools is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# You should have received a copy of the GNU General Public License
# along with pyOPALTools. If not, see <https://www.gnu.org/licenses/>.
import pickle as pickle
import pprint
import re
from .BaseParser import BaseParser
[docs]class TimingParser(BaseParser):
"""Read and write an Ippl timing file.
Attributes
----------
_data : list
List of dictionaries
Notes
-----
It stores the data in a list of dictionary
where the main timing dictionary is::
main_dict = {'cpu tot': [],
'wall tot': [],
'what': [],
'cores': []}
and the specialized timings are in stored in a
dictionary with the structure::
special_dict = {'what': [],
'cpu max': [],
'wall max': [],
'cpu min': [],
'wall min': [],
'cpu avg': [],
'wall avg': []}
The list of dictionaries looks then as follows::
self._data = [main_dict,
special_dict_1,
...,
special_dict_N]
Examples
--------
>>> TimingParser as timing
>>> time = timing.TimingParser()
>>> time.read_ippl_timing("/path/to/IpplTiming.dat")
>>> data = time.getTiming()
>>> print ( data )
>>> time.pie_plot('cpu avg'):
"""
[docs] def __init__(self):
# list of dictionaries
self.clear()
self._format = ['PICKLE',
'ASCII']
[docs] def parse(self, filename):
isValid = False
try:
self.read_output_file(filename)
isValid = True
except:
pass
if not isValid:
self.read_ippl_timing(filename)
[docs] def clear(self):
"""Clear data.
"""
self._problem = {}
self._data = []
[docs] def _init_data_structure(self):
"""This is the way the timing is stored
Returns
-------
main_dict : dict
The dictionary for the main timer
special_dict : dict
The dictionary for all other timers
"""
main_dict = {'cpu tot': [],
'wall tot': [],
'what': [],
'cores': []}
self._special_dict = {'what': [],
'cpu max': [],
'wall max': [],
'cpu min': [],
'wall min': [],
'cpu avg': [],
'wall avg': []}
return main_dict, self._special_dict
[docs] def read_output_file(self, f):
"""Read in the timing results from an OPAL output file.
Parameters
----------
f : str
The pathname (i.e. path + filename)
Notes
-----
Following format assumed::
Timings{0}> -----------------------------------------------------------------
Timings{0}> Timing results for 32 nodes:
Timings{0}> -----------------------------------------------------------------
Timings{0}> mainTimer........... Wall tot = 326.192, CPU tot = 325.76
Timings{0}>
Timings{0}> my awesome timer.... Wall max = 0, CPU max = 0
Timings{0}> Wall avg = 0, CPU avg = 0
Timings{0}> Wall min = 0, CPU min = 0
Timings{0}>
Timings{0}> super timer......... Wall max = 14.6091, CPU max = 14.52
Timings{0}> Wall avg = 3.34291, CPU avg = 3.31844
Timings{0}> Wall min = 0.007039, CPU min = 0
Timings{0}>
Timings{0}> best timer.......... Wall max = 33.4165, CPU max = 32.93
Timings{0}> Wall avg = 23.0727, CPU avg = 22.8328
Timings{0}> Wall min = 19.989, CPU min = 19.67
Timings{0}>
Timings{0}> -----------------------------------------------------------------
"""
self._problem = {}
self._data = []
main_dict, special_dict = self._init_data_structure()
# 13. July 2017
# https://stackoverflow.com/questions/2301789/read-a-file-in-reverse-order-using-python
lines = []
special_count = 0
main_count = 0
for line in reversed(open(f).readlines()):
if "Timings" in line:
lines.insert(0, line)
core_pattern = '.*> Timing results for (.*) nodes:'
main_pattern = '.*> (.*) Wall tot = (.*), CPU tot = (.*)'
max_pattern = '.*> (.*) Wall max = (.*), CPU max = (.*)'
avg_pattern = '.*> Wall avg = (.*), CPU avg = (.*)'
min_pattern = '.*> Wall min = (.*), CPU min = (.*)'
# we parse it the right order
for line in lines:
line = ' '.join(line.split())
obj = re.match(core_pattern, line)
if obj:
main_dict['cores'] = obj.group(1)
main_count += 1
continue
obj = re.match(main_pattern, line)
if obj:
# main timer
main_dict['what'] = obj.group(1).replace('.', '')
main_dict['wall tot'] = float(obj.group(2))
main_dict['cpu tot'] = float(obj.group(3))
main_count += 1
continue
# special timings have 3 lines
obj = re.match(max_pattern, line)
if obj:
special_dict['what'] = obj.group(1).replace('.', '')
special_dict['wall max'] = float(obj.group(2))
special_dict['cpu max'] = float(obj.group(3))
special_count += 1
continue
obj = re.match(avg_pattern, line)
if obj:
special_dict['wall avg'] = float(obj.group(1))
special_dict['cpu avg'] = float(obj.group(2))
special_count += 1
continue
obj = re.match(min_pattern, line)
if obj:
special_dict['wall min'] = float(obj.group(1))
special_dict['cpu min'] = float(obj.group(2))
special_count += 1
if special_count == 3:
special_count = 0
self._data.append(dict(special_dict))
if main_count == 2:
main_count = 0
self._data.append(dict(main_dict))
[docs] def read_ippl_timing(self, f):
"""Read in an Ippl timing file.
File created by::
std::string filename = "myTiming.dat";
Ippl:print(filename, problemSize);
The problem size is optional.
Parameters
----------
f : str
Pathname (i.e. path + filename)
"""
self._problem = {}
self._data = []
main_dict, special_dict = self._init_data_structure()
problem_pattern = '(.*): (\d+)'
main_pattern = '(.*) (\d+) (.*) (.*)'
special_pattern = '(.*) (\d+) (.*) (.*) (.*) (.*) (.*) (.*)'
with open(f, 'r') as ff:
for line in ff:
if 'num Nodes' in line:
tag = self._order(line, 2)
continue
# 2. Feb. 2018
# https://stackoverflow.com/questions/2077897/substitute-multiple-whitespace-with-single-whitespace-in-python
line = ' '.join(line.split())
obj = re.match(problem_pattern, line)
if obj:
self._problem[obj.group(1).lstrip()] = int(obj.group(2))
continue
obj = re.match(main_pattern, line)
if obj:
# remove appending dots "..." of timing names
main_dict['what'] = obj.group(1).replace('.', '')
main_dict['cores'] = obj.group(tag['num nodes'])
main_dict['cpu tot'] = float(obj.group(tag['cpu tot']))
main_dict['wall tot'] = float(obj.group(tag['wall tot']))
# we need to copy otherwise it overwrites the data
self._data.append(dict(main_dict))
# clear pattern otherwise special timings go in here too
main_pattern = '-1'
continue
obj = re.match(special_pattern, line)
if obj:
special_dict['what'] = obj.group(1).replace('.', '')
special_dict['cpu max'] = float(obj.group(tag['cpu max']).strip())
special_dict['wall max'] = float(obj.group(tag['wall max']).strip())
special_dict['cpu min'] = float(obj.group(tag['cpu min']).strip())
special_dict['wall min'] = float(obj.group(tag['wall min']).strip())
special_dict['cpu avg'] = float(obj.group(tag['cpu avg']).strip())
special_dict['wall avg'] = float(obj.group(tag['wall avg']).strip())
# we need to copy otherwise it overwrites the data
self._data.append(dict(special_dict))
continue
[docs] def getTiming(self):
"""
Returns
-------
list
The timing data
Notes
-----
It is not checked if the container is empty.
"""
return self._data
@property
def properties(self):
return self._special_dict
[docs] def getProblemSize(self):
"""
Returns
-------
dict
All problem specification in a dictionary
Notes
-----
It is not checked if the container is empty.
"""
return self._problem
def __str__(self):
if not self._data:
return 'There is no data loaded.'
else:
out = ''
for dic in self._data:
if ('mainTimer' == dic['what'] or 'main' == dic['what']) and 'cores' in dic:
out += "\t\t num Nodes CPU tot Wall tot\n"
out += "=" * 48 + "\n"
out += dic['what'] + "\t\t" + str(dic['cores']) + " " + \
str(dic['cpu tot']) + " " + str(dic['wall tot']) + "\n"
out += "\n\t\t\t CPU max\t Wall max\t CPU min\t Wall min\t CPU avg\t Wall avg\n"
out += "=" * 115 + "\n"
else:
# 16. Jan. 2017
# http://stackoverflow.com/questions/20309255/how-to-pad-a-string-to-a-fixed-length-with-spaces-in-python
out += "{:<20}".format(dic['what']) + "\t"
out += "{:<10}".format(str(dic['cpu max'])) + "\t"
out += "{:<10}".format(str(dic['wall max'])) + "\t"
out += "{:<10}".format(str(dic['cpu min'])) + "\t"
out += "{:<10}".format(str(dic['wall min'])) + "\t"
out += "{:<10}".format(str(dic['cpu avg'])) + "\t"
out += "{:<10}".format(str(dic['wall avg']))
out += "\n"
return out
[docs] def read(self, pathname, info=False):
"""
Parameters
----------
pathname : str
Path + filename of pickle file
info : bool, optional
Print data when reading
"""
self._data = []
with open(pathname, 'rb') as f:
for data in self._load_pkl(f):
self._data.append(data)
if info:
pprint.pprint(data)
[docs] def write(self, pathname, form = 'PICKLE', data = None):
"""Export a timing data in a specific format
Parameters
----------
pathname : str
Path + name of the written file
data : list [dict], optional
Timing data
form : str, optional
Which format to write
Notes
-----
Throws an exception if the format is unknown or
not available
"""
if not data and not self._data:
raise RuntimeError('No data available.')
elif not data:
data = self._data
if form == self._format[0]:
self._exportPickle(pathname, data)
elif form == self._format[1]:
self._exportAscii(pathname, data)
else:
raise RuntimeError('Not supported export format.')
[docs] def _load_pkl(self, pkl_file):
"""Pickle file loading function
Parameters
----------
pkl_file : str
Pickle timing file to load
"""
# 14. Jan. 2017, http://stackoverflow.com/questions/18675863/load-data-from-python-pickle-file-in-a-loop
try:
while True:
yield pickle.load(pkl_file)
except EOFError:
pass
[docs] def _order(self, line, i):
"""Find the order of the tags, i.e. 'cpu min', etc. and fill dictionary.
Parameters
----------
line : str
File line
i : int
Start index
Returns
-------
dict
A dictionary giving tag as key and
occurrence as number.
"""
line = line.lower()
words = re.split(r'\s{2,}', line)
order = {}
for w in words:
if w:
order[w.strip('\n')] = i
i += 1
return order
[docs] def _exportPickle(self, pathname, data):
"""Write a binary pickle file
Parameters
----------
pathname : str
Path + filename of written file
data : list [dict]
Timing data
Notes
-----
If pathname has no extension the string ".pkl" is
appended
"""
if '.' not in pathname:
pathname = pathname + ".pkl"
f = open(pathname, 'wb')
for dic in data:
pickle.dump(dic, f)
f.close()
[docs] def _exportAscii(self, pathname, data):
"""Write a human readable file
Parameters
----------
pathname : str
Path + filename of written file
data : list [dict]
Timing data
Notes
-----
If pathname has no extension the string ".dat" is
appended
"""
if '.dat' not in pathname:
pathname = pathname + ".dat"
f = open(pathname, 'w')
for dic in data:
if 'mainTimer' == dic['what'] and 'cores' in dic:
f.write("\t\t num Nodes CPU tot Wall tot\n")
f.write("=" * 48 + "\n")
f.write(dic['what'] + "\t\t" + str(dic['cores']) + " " + \
str(dic['cpu tot']) + " " + str(dic['wall tot']) + "\n")
f.write("\n\t\t\t CPU max\t Wall max\t CPU min\t Wall min\t CPU avg\t Wall avg\n")
f.write("=" * 115 + "\n")
else:
# 16. Jan. 2017
# http://stackoverflow.com/questions/20309255/how-to-pad-a-string-to-a-fixed-length-with-spaces-in-python
f.write("{:<20}".format(dic['what']) + "\t")
f.write("{:<10}".format(str(dic['cpu max'])) + "\t")
f.write("{:<10}".format(str(dic['wall max'])) + "\t")
f.write("{:<10}".format(str(dic['cpu min'])) + "\t")
f.write("{:<10}".format(str(dic['wall min'])) + "\t")
f.write("{:<10}".format(str(dic['cpu avg'])) + "\t")
f.write("{:<10}".format(str(dic['wall avg'])))
f.write("\n")
f.close()