Source code for opal.parser.TimingParser

# Copyright (c) 2016 - 2017, Matthias Frey, Paul Scherrer Institut, Villigen PSI, Switzerland
# All rights reserved
#
# Implemented as part of the PhD thesis
# "Precise Simulations of Multibunches in High Intensity Cyclotrons"
#
# This file is part of pyOPALTools.
#
# pyOPALTools is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# You should have received a copy of the GNU General Public License
# along with pyOPALTools. If not, see <https://www.gnu.org/licenses/>.

import pickle as pickle
import pprint
import re
from .BaseParser import BaseParser

[docs]class TimingParser(BaseParser): """Read and write an Ippl timing file. Attributes ---------- _data : list List of dictionaries Notes ----- It stores the data in a list of dictionary where the main timing dictionary is:: main_dict = {'cpu tot': [], 'wall tot': [], 'what': [], 'cores': []} and the specialized timings are in stored in a dictionary with the structure:: special_dict = {'what': [], 'cpu max': [], 'wall max': [], 'cpu min': [], 'wall min': [], 'cpu avg': [], 'wall avg': []} The list of dictionaries looks then as follows:: self._data = [main_dict, special_dict_1, ..., special_dict_N] Examples -------- >>> TimingParser as timing >>> time = timing.TimingParser() >>> time.read_ippl_timing("/path/to/IpplTiming.dat") >>> data = time.getTiming() >>> print ( data ) >>> time.pie_plot('cpu avg'): """
[docs] def __init__(self): # list of dictionaries self.clear() self._format = ['PICKLE', 'ASCII']
[docs] def parse(self, filename): isValid = False try: self.read_output_file(filename) isValid = True except: pass if not isValid: self.read_ippl_timing(filename)
[docs] def clear(self): """Clear data. """ self._problem = {} self._data = []
[docs] def _init_data_structure(self): """This is the way the timing is stored Returns ------- main_dict : dict The dictionary for the main timer special_dict : dict The dictionary for all other timers """ main_dict = {'cpu tot': [], 'wall tot': [], 'what': [], 'cores': []} self._special_dict = {'what': [], 'cpu max': [], 'wall max': [], 'cpu min': [], 'wall min': [], 'cpu avg': [], 'wall avg': []} return main_dict, self._special_dict
[docs] def read_output_file(self, f): """Read in the timing results from an OPAL output file. Parameters ---------- f : str The pathname (i.e. path + filename) Notes ----- Following format assumed:: Timings{0}> ----------------------------------------------------------------- Timings{0}> Timing results for 32 nodes: Timings{0}> ----------------------------------------------------------------- Timings{0}> mainTimer........... Wall tot = 326.192, CPU tot = 325.76 Timings{0}> Timings{0}> my awesome timer.... Wall max = 0, CPU max = 0 Timings{0}> Wall avg = 0, CPU avg = 0 Timings{0}> Wall min = 0, CPU min = 0 Timings{0}> Timings{0}> super timer......... Wall max = 14.6091, CPU max = 14.52 Timings{0}> Wall avg = 3.34291, CPU avg = 3.31844 Timings{0}> Wall min = 0.007039, CPU min = 0 Timings{0}> Timings{0}> best timer.......... Wall max = 33.4165, CPU max = 32.93 Timings{0}> Wall avg = 23.0727, CPU avg = 22.8328 Timings{0}> Wall min = 19.989, CPU min = 19.67 Timings{0}> Timings{0}> ----------------------------------------------------------------- """ self._problem = {} self._data = [] main_dict, special_dict = self._init_data_structure() # 13. July 2017 # https://stackoverflow.com/questions/2301789/read-a-file-in-reverse-order-using-python lines = [] special_count = 0 main_count = 0 for line in reversed(open(f).readlines()): if "Timings" in line: lines.insert(0, line) core_pattern = '.*> Timing results for (.*) nodes:' main_pattern = '.*> (.*) Wall tot = (.*), CPU tot = (.*)' max_pattern = '.*> (.*) Wall max = (.*), CPU max = (.*)' avg_pattern = '.*> Wall avg = (.*), CPU avg = (.*)' min_pattern = '.*> Wall min = (.*), CPU min = (.*)' # we parse it the right order for line in lines: line = ' '.join(line.split()) obj = re.match(core_pattern, line) if obj: main_dict['cores'] = obj.group(1) main_count += 1 continue obj = re.match(main_pattern, line) if obj: # main timer main_dict['what'] = obj.group(1).replace('.', '') main_dict['wall tot'] = float(obj.group(2)) main_dict['cpu tot'] = float(obj.group(3)) main_count += 1 continue # special timings have 3 lines obj = re.match(max_pattern, line) if obj: special_dict['what'] = obj.group(1).replace('.', '') special_dict['wall max'] = float(obj.group(2)) special_dict['cpu max'] = float(obj.group(3)) special_count += 1 continue obj = re.match(avg_pattern, line) if obj: special_dict['wall avg'] = float(obj.group(1)) special_dict['cpu avg'] = float(obj.group(2)) special_count += 1 continue obj = re.match(min_pattern, line) if obj: special_dict['wall min'] = float(obj.group(1)) special_dict['cpu min'] = float(obj.group(2)) special_count += 1 if special_count == 3: special_count = 0 self._data.append(dict(special_dict)) if main_count == 2: main_count = 0 self._data.append(dict(main_dict))
[docs] def read_ippl_timing(self, f): """Read in an Ippl timing file. File created by:: std::string filename = "myTiming.dat"; Ippl:print(filename, problemSize); The problem size is optional. Parameters ---------- f : str Pathname (i.e. path + filename) """ self._problem = {} self._data = [] main_dict, special_dict = self._init_data_structure() problem_pattern = '(.*): (\d+)' main_pattern = '(.*) (\d+) (.*) (.*)' special_pattern = '(.*) (\d+) (.*) (.*) (.*) (.*) (.*) (.*)' with open(f, 'r') as ff: for line in ff: if 'num Nodes' in line: tag = self._order(line, 2) continue # 2. Feb. 2018 # https://stackoverflow.com/questions/2077897/substitute-multiple-whitespace-with-single-whitespace-in-python line = ' '.join(line.split()) obj = re.match(problem_pattern, line) if obj: self._problem[obj.group(1).lstrip()] = int(obj.group(2)) continue obj = re.match(main_pattern, line) if obj: # remove appending dots "..." of timing names main_dict['what'] = obj.group(1).replace('.', '') main_dict['cores'] = obj.group(tag['num nodes']) main_dict['cpu tot'] = float(obj.group(tag['cpu tot'])) main_dict['wall tot'] = float(obj.group(tag['wall tot'])) # we need to copy otherwise it overwrites the data self._data.append(dict(main_dict)) # clear pattern otherwise special timings go in here too main_pattern = '-1' continue obj = re.match(special_pattern, line) if obj: special_dict['what'] = obj.group(1).replace('.', '') special_dict['cpu max'] = float(obj.group(tag['cpu max']).strip()) special_dict['wall max'] = float(obj.group(tag['wall max']).strip()) special_dict['cpu min'] = float(obj.group(tag['cpu min']).strip()) special_dict['wall min'] = float(obj.group(tag['wall min']).strip()) special_dict['cpu avg'] = float(obj.group(tag['cpu avg']).strip()) special_dict['wall avg'] = float(obj.group(tag['wall avg']).strip()) # we need to copy otherwise it overwrites the data self._data.append(dict(special_dict)) continue
[docs] def getTiming(self): """ Returns ------- list The timing data Notes ----- It is not checked if the container is empty. """ return self._data
@property def properties(self): return self._special_dict
[docs] def getProblemSize(self): """ Returns ------- dict All problem specification in a dictionary Notes ----- It is not checked if the container is empty. """ return self._problem
def __str__(self): if not self._data: return 'There is no data loaded.' else: out = '' for dic in self._data: if ('mainTimer' == dic['what'] or 'main' == dic['what']) and 'cores' in dic: out += "\t\t num Nodes CPU tot Wall tot\n" out += "=" * 48 + "\n" out += dic['what'] + "\t\t" + str(dic['cores']) + " " + \ str(dic['cpu tot']) + " " + str(dic['wall tot']) + "\n" out += "\n\t\t\t CPU max\t Wall max\t CPU min\t Wall min\t CPU avg\t Wall avg\n" out += "=" * 115 + "\n" else: # 16. Jan. 2017 # http://stackoverflow.com/questions/20309255/how-to-pad-a-string-to-a-fixed-length-with-spaces-in-python out += "{:<20}".format(dic['what']) + "\t" out += "{:<10}".format(str(dic['cpu max'])) + "\t" out += "{:<10}".format(str(dic['wall max'])) + "\t" out += "{:<10}".format(str(dic['cpu min'])) + "\t" out += "{:<10}".format(str(dic['wall min'])) + "\t" out += "{:<10}".format(str(dic['cpu avg'])) + "\t" out += "{:<10}".format(str(dic['wall avg'])) out += "\n" return out
[docs] def read(self, pathname, info=False): """ Parameters ---------- pathname : str Path + filename of pickle file info : bool, optional Print data when reading """ self._data = [] with open(pathname, 'rb') as f: for data in self._load_pkl(f): self._data.append(data) if info: pprint.pprint(data)
[docs] def write(self, pathname, form = 'PICKLE', data = None): """Export a timing data in a specific format Parameters ---------- pathname : str Path + name of the written file data : list [dict], optional Timing data form : str, optional Which format to write Notes ----- Throws an exception if the format is unknown or not available """ if not data and not self._data: raise RuntimeError('No data available.') elif not data: data = self._data if form == self._format[0]: self._exportPickle(pathname, data) elif form == self._format[1]: self._exportAscii(pathname, data) else: raise RuntimeError('Not supported export format.')
[docs] def _load_pkl(self, pkl_file): """Pickle file loading function Parameters ---------- pkl_file : str Pickle timing file to load """ # 14. Jan. 2017, http://stackoverflow.com/questions/18675863/load-data-from-python-pickle-file-in-a-loop try: while True: yield pickle.load(pkl_file) except EOFError: pass
[docs] def _order(self, line, i): """Find the order of the tags, i.e. 'cpu min', etc. and fill dictionary. Parameters ---------- line : str File line i : int Start index Returns ------- dict A dictionary giving tag as key and occurrence as number. """ line = line.lower() words = re.split(r'\s{2,}', line) order = {} for w in words: if w: order[w.strip('\n')] = i i += 1 return order
[docs] def _exportPickle(self, pathname, data): """Write a binary pickle file Parameters ---------- pathname : str Path + filename of written file data : list [dict] Timing data Notes ----- If pathname has no extension the string ".pkl" is appended """ if '.' not in pathname: pathname = pathname + ".pkl" f = open(pathname, 'wb') for dic in data: pickle.dump(dic, f) f.close()
[docs] def _exportAscii(self, pathname, data): """Write a human readable file Parameters ---------- pathname : str Path + filename of written file data : list [dict] Timing data Notes ----- If pathname has no extension the string ".dat" is appended """ if '.dat' not in pathname: pathname = pathname + ".dat" f = open(pathname, 'w') for dic in data: if 'mainTimer' == dic['what'] and 'cores' in dic: f.write("\t\t num Nodes CPU tot Wall tot\n") f.write("=" * 48 + "\n") f.write(dic['what'] + "\t\t" + str(dic['cores']) + " " + \ str(dic['cpu tot']) + " " + str(dic['wall tot']) + "\n") f.write("\n\t\t\t CPU max\t Wall max\t CPU min\t Wall min\t CPU avg\t Wall avg\n") f.write("=" * 115 + "\n") else: # 16. Jan. 2017 # http://stackoverflow.com/questions/20309255/how-to-pad-a-string-to-a-fixed-length-with-spaces-in-python f.write("{:<20}".format(dic['what']) + "\t") f.write("{:<10}".format(str(dic['cpu max'])) + "\t") f.write("{:<10}".format(str(dic['wall max'])) + "\t") f.write("{:<10}".format(str(dic['cpu min'])) + "\t") f.write("{:<10}".format(str(dic['wall min'])) + "\t") f.write("{:<10}".format(str(dic['cpu avg'])) + "\t") f.write("{:<10}".format(str(dic['wall avg']))) f.write("\n") f.close()