-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathPLS_Demo.py
74 lines (56 loc) · 3.02 KB
/
PLS_Demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# -*- coding: utf-8 -*-
"""
@author: JiangSu
Email: [email protected]
"""
import scipy.io as sio
from pypls import *
import os
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# ================== Load Spectra DataSet ==================
# https://eigenvector.com/resources/data-sets/
# This data set consists of 80 samples of corn measured on 3 different NIR spectrometers.
# The wavelength range is 1100-2498nm at 2 nm intervals (700 channels).
# The moisture, oil, protein and starch values for each of the samples is also included.
# A number of NBS glass standards were also measured on each instrument.
# The data was originally taken at Cargill.
mat_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), r'Benchmark_Corn.mat')
data = sio.loadmat(mat_path)
# the only difference between 'spectra' and 'absorbance' is that
# 'spectra' array includes wavelength in the first row.
# ------ take M5 for example ------
spectra = data['m5'] # (81, 700)
absorbance = spectra[1:, :] # (80, 700) 80 samples's nir spectra, 700 data points
# ------ take 'oil' for example ------
target = data['oil'] # (80, 1) concentration of Oil
wavelength = spectra[0, :] # 1100 - 2498 nm
# ================== samples split(Kennard-Stone) ==================
# divide the total samples into 3 parts: calset, valset, testset
calset_indices, valset_indices, testset_indices = samples_ks_split(absorbance, val_size=0.2, test_size=0.2)
xcal = np.vstack((wavelength, absorbance[calset_indices, :]))
ycal = target[calset_indices]
xval = np.vstack((wavelength, absorbance[valset_indices, :]))
yval = target[valset_indices]
xtest = np.vstack((wavelength, absorbance[testset_indices, :]))
ytest = target[testset_indices]
# ================== start computing ==================
# Note: In PartialLeastSquares Class, all X inputs should include 'wavelength'
# 'pretreat_method2' include 'MC' and 'ZS'
pls_instance = PartialLeastSquares(algorithm='ikpls_algorithm',
max_nlv=20,
pretreat_method1='SGMSC',
pretreat_params1={'window_size':11, 'deriv':1},
pretreat_method2='MC',
customized_regions=[[1100, 1500], [1600, 2400]])
# -------------- cross validation --------------
# includes: 'cv_result' and 'cal_result'
pls_cv_result = pls_instance.cv(xcal, ycal, cv_sampling_method='cv_kfold_systematic_sampling',
sampling_param={'kfold':10}, calset_indices=None)
# -------------- valset validation --------------
# includes: 'vv_result' and 'cal_result'
pls_vv_result = pls_instance.vv(xcal, ycal, xval, yval)
# -------------- prediction --------------
# You can specify the 'nlv' yourself, or using the 'optimal_nlv' generated from 'cv' or 'vv'
pls_predict_result = pls_instance.predict(xtest, nlv=None, testset_indices=testset_indices, testset_target=ytest)
print('Thank you!')