Source code for opal.analysis.pareto_fronts

# Copyright (c) 2018, Nicole Neveu, SLAC National Accelerator Laboratory
# All rights reserved
#
# This file is part of pyOPALTools.
#
# pyOPALTools is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# You should have received a copy of the GNU General Public License
# along with pyOPALTools. If not, see <https://www.gnu.org/licenses/>.

import numpy as np
import pandas as pd
from opal.datasets.filetype import FileType
from db import mldb

[docs]def pareto_pts(x, y): """Find Pareto points Find Pareto points for 2 objectives, given all data recorded by optimization run. These points are calculated independent of generation. i.e. best points from all generations are found and saved. Parameters ---------- x : array_like 1D array of first objective values y : array_like 1D array of second objective values dvars : array_like, optional ND array of design variables Returns ------- pfdict : dict Dictionary that holds pareto front values and corresponding design values """ #Check data is correct length lx = len(x) ly = len(y) if lx==ly: pass else: print('Input data sizes do not match\n') print('Please check input arrays') #Making holders for my pareto fronts pts = [] pareto_y = [] pareto_x = [] pfdict = {} w = np.arange(0,1.01, 0.01) sx = scaleData(x) sy = scaleData(y) #Finding locations of best points #with respect to all weights (w) for i in range(0, len(w)): fobj = sy * w[i] + sx *(1-w[i]) wmins = np.where(fobj==min(fobj))[0][0] pts = np.append(pts, wmins) ind = (np.unique(pts)).astype(int) pareto_x = x[ind] pareto_y = y[ind] #Reordering values for easier plotting #Maybe not the best way to do this? reorder = sorted(zip(*[pareto_x, pareto_y, ind])) pfdict['x'], pfdict['y'], ind = list(zip(*reorder)) return(pfdict, ind)
#return(pareto_pts.ix[:,0], pareto_pts.ix[:,1], pdvar) #pareto_x, pareto_y, pdvar)
[docs]def get_all_data_db(dbpath): """Get objectives and design variables Get all objectives and design variables from every generation in an optimzation database. Databases are made using OPAL output from json files or stat files. Functions to make databases can be found in mldb.py. Parameters ---------- db : str Path to pickle file containing database made with mldb.py Returns ------- data : dict Dictonary containing all objectives and design values in optimization database. """ data = {} dbr = mldb.mldb() dbr.load(dbpath) dvar_names = dbr.getXNames() obj_names = dbr.getYNames() num_gens = dbr.getNumberOfSamples() #Make arrays with data from all generations for gen in range(0, num_gens): dvals = dbr.getAllDvar(gen) objvals = dbr.getAllObj(gen) if gen==0: alldvals = dvals allobjs = objvals else: alldvals = np.append(alldvals, dvals, axis=0) allobjs = np.append(allobjs, objvals, axis=0) #Make dict entries for design variables for i,dname in enumerate(dvar_names): data[dname] = alldvals[:,i] #Make dict entries for objectives for j,objname in enumerate(obj_names): data[objname] = allobjs[:,j] return(data)
[docs]def scaleData(vals): """Scale 1D data array from 0 to 1. Used to compare objectives with different units. Parameters ---------- vals : array_like 1D array that holds any opal data Returns ------- sacaled_vals : array_like 1D array scaled from 0 to 1 """ smax = np.max(vals) smin = np.min(vals) scaled_vals = (vals - smin)/smax return (scaled_vals)
[docs]def delete_repeats(x, y, z=0): """Delete repeated pareto front values, if any. Parameters ---------- x : array_like 1D array of first objective values y : array_like 1D array of second objective values z : array_like, optional ND array of second design variables Returns ------- df (pandas db) database with out repeats """ if z==0: df = pd.DataFrame({'x':x, 'y':y}) #, 'z':z}) else: df = pd.DataFrame({'x':x, 'y':y, 'z':z}) return df.drop_duplicates(subset=['x', 'y'], keep='first')