Source code for opal.analysis.pareto_fronts

# Copyright (c) 2018, Nicole Neveu, SLAC National Accelerator Laboratory
# All rights reserved
#
# This file is part of pyOPALTools.
#
# pyOPALTools is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# You should have received a copy of the GNU General Public License
# along with pyOPALTools. If not, see <https://www.gnu.org/licenses/>.

import numpy as np
import pandas as pd
from opal.datasets.filetype import FileType
from db import mldb

[docs]def pareto_pts(x, y):
    """Find Pareto points

    Find Pareto points for 2 objectives, given
    all data recorded by optimization run.
    These points are calculated independent
    of generation. i.e. best points from all
    generations are found and saved.

    Parameters
    ----------
    x : array_like
        1D array of first objective values
    y : array_like
        1D array of second objective values
    dvars : array_like, optional
        ND array of design variables

    Returns
    -------
    pfdict : dict
        Dictionary that holds pareto front
        values and corresponding design values
    """
    #Check data is correct length
    lx = len(x)
    ly = len(y)
    if lx==ly:
        pass
    else:
        print('Input data sizes do not match\n')
        print('Please check input arrays')

    #Making holders for my pareto fronts
    pts      = []
    pareto_y = []
    pareto_x = []
    pfdict   = {}
    w  = np.arange(0,1.01, 0.01)
    sx = scaleData(x)
    sy = scaleData(y)

    #Finding locations of best points
    #with respect to all weights (w)
    for i in range(0, len(w)):
        fobj    = sy * w[i] + sx *(1-w[i])
        wmins   = np.where(fobj==min(fobj))[0][0]
        pts     = np.append(pts, wmins)

    ind = (np.unique(pts)).astype(int)
    pareto_x = x[ind]
    pareto_y = y[ind]

    #Reordering values for easier plotting
    #Maybe not the best way to do this?
    reorder = sorted(zip(*[pareto_x, pareto_y, ind]))
    pfdict['x'], pfdict['y'], ind = list(zip(*reorder))

    return(pfdict, ind)
    #return(pareto_pts.ix[:,0], pareto_pts.ix[:,1], pdvar) #pareto_x, pareto_y, pdvar)


[docs]def get_all_data_db(dbpath):
    """Get objectives and design variables

    Get all objectives and design variables
    from every generation in an optimzation
    database. Databases are made using OPAL
    output from json files or stat files.
    Functions to make databases can be found
    in mldb.py.

    Parameters
    ----------
    db : str
        Path to pickle file containing
        database made with mldb.py

    Returns
    -------
    data : dict
        Dictonary containing all
        objectives and design values
        in optimization database.
    """
    data = {}
    dbr = mldb.mldb()
    dbr.load(dbpath)
    dvar_names = dbr.getXNames()
    obj_names  = dbr.getYNames()
    num_gens   = dbr.getNumberOfSamples()

    #Make arrays with data from all generations
    for gen in range(0, num_gens):
        dvals   = dbr.getAllDvar(gen)
        objvals = dbr.getAllObj(gen)
        if gen==0:
            alldvals = dvals
            allobjs  = objvals
        else:
            alldvals = np.append(alldvals, dvals, axis=0)
            allobjs  = np.append(allobjs, objvals, axis=0)

    #Make dict entries for design variables
    for i,dname in enumerate(dvar_names):
        data[dname] = alldvals[:,i]

    #Make dict entries for objectives
    for j,objname in enumerate(obj_names):
        data[objname] = allobjs[:,j]

    return(data)


[docs]def scaleData(vals):
    """Scale 1D data array from 0 to 1.

    Used to compare objectives with different units.

    Parameters
    ----------
    vals : array_like
        1D array that holds any opal data

    Returns
    -------
    sacaled_vals : array_like
        1D array scaled from 0 to 1
    """
    smax = np.max(vals)
    smin = np.min(vals)
    scaled_vals = (vals - smin)/smax
    return (scaled_vals)


[docs]def delete_repeats(x, y, z=0):
    """Delete repeated pareto front values, if any.

    Parameters
    ----------
    x : array_like
        1D array of first objective values
    y : array_like
        1D array of second objective values
    z : array_like, optional
        ND array of second design variables

    Returns
    -------
    df  (pandas db) database with out repeats
    """
    if z==0:
        df = pd.DataFrame({'x':x, 'y':y}) #, 'z':z})
    else:
        df = pd.DataFrame({'x':x, 'y':y, 'z':z})

    return df.drop_duplicates(subset=['x', 'y'], keep='first')