-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathl_regression.m
344 lines (298 loc) · 10.9 KB
/
l_regression.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
function [wlog,aux]=l_regression(wlog,expression,varargin)
% Function creates new log curve using the mathematical expression contained in the
% third input argument. Null values (NaNs) in the log curves are ignored when computing
% the regression parameters.
%
% Written by E. R., March 16, 2001
% Last updated: July 12, 2001: Creates initial values even if no bounds are given
%
% [wlog,aux]=l_regression(wlog,expression,varargin)
% INPUT
% wlog log structure
% expression expression in MATLAB syntax between curves of wlog. These curves are
% represented by their mnemonics. No default.
% EXAMPLE: 'rho = x1*Vp^x2' which estimates a Gardner-type relationship
% for the density
% The curve to the left of the equal sign is called the target curve
% ('rho' in this example)
% The parameters to be estimated are represented by "x1","x2",...,"x9".
% varargin one or more cell arrays; the first element of each cell array is a
% keyword, the other elements are parameters. Presently, keywords are:
% 'depths' start and end depth of curves used for regression calculation
% Default: {'depths',wlog.first,wlog.last}
% 'rows' string with logical expression involving one more of the curve mnemonics
% Can be used to restrict curve values used for regression to certain lithologies
% Default: {'rows',''} (implies all rows)
% 'lbounds' lower limits for the values of the parameters to be
% estimated.
% No defaults.
% 'ubounds' upper limits for the values of the parameters to be
% estimated.
% No defaults.
% 'description' description of the newly created curve
% 'x' initial values of the parameters
% Default: {'x',0.5*(ubounds-lbounds)}
% Error message if any of the bounds are infinite.
% 'mnem' mnemonic of the estimated curve.
% Default: {'mnem','curve_pred'} where "curve" is the mnmonic of the
% target curve in "expression"; in the example above it
% would be 'rho_pred')
% 'norm' Norm to use; possible values are 'L1' (minimizes the sum of the
% absolute values of the error; more robust) or 'L2' (sum of the
% squares of the error; faster). If norm is 'L1', two additional parameters
% may be given which allow treating positve deflections different from
% negative ones. Not case-sensitive.
% Default: {'norm','L1')
% OUTPUT
% wlog input log with the additional curve appended (including an updated field
% "curve_info")
% aux structure with auxiliary information. The following fields are present:
% x1, x2, ... estimated parameters;
% x estimated paramters as a row vector
% fval final function value (error)
% exitflag of the minimization routine
% general info from minimization routine (important only for debugging)
% Set defaults of input arguments
param.depths=[];
param.rows=[];
param.lbounds=[];
param.ubounds=[];
% param.description='';
param.x=[];
param.mnem=[];
param.norm='l1';
% Decode and assign input arguments
param=assign_input(param,varargin);
% Find all the words in the expression
words=extract_words(expression);
% Check if mnemonic of output curve (first variable in expression) exists
mnem=words{1};
[index,ier]=curve_index1(wlog,mnem,0);
if ier
if isempty(index)
disp([char(13),' No curve with mnemonic "',mnem,'" found in log structure'])
else
disp([char(13),' More than one curve with mnemonic "',mnem,'" found in log structure'])
end
error(' Abnormal termination')
end
% Set default name for target curve (if not defined in argument list)
if isempty(param.mnem)
param.mnem=[mnem,'_pred'];
end
nsamp=size(wlog.curves,1);
% words=extract_words(expression);
vars=symvar(expression);
% Find all the parameters
x1to9={'x1','x2','x3','x4','x5','x6','x7','x8','x9'};
lindex=ismember(vars,x1to9);
xi=vars(lindex);
lxi=length(xi);
% Check initial values for parameters
if ~isempty(param.x)
if length(param.x) ~= lxi
disp([char(13),' Discrepancy: ',num2str(lxi),' parameters found in expression, but ', ...
num2str(length(param.x)),' initial value(s) found'])
error(' Abnormal termination')
end
if iscell(param.x)
param.x=cat(2,param.x{:});
end
end
% Check bounds of parameters
if isempty(param.lbounds)
param.lbounds=-inf*ones(1,lxi);
else
if length(param.lbounds) ~= lxi
disp([char(13),' Number of lower bounds (',num2str(length(param.lbounds)), ...
') not equal to number of parameters in expression (', ...
num2str(lxi),')'])
disp(' Check number of values entered via keyword "lbounds"')
error(' Abnormal ternination')
end
if iscell(param.lbounds)
param.lbounds=cat(2,param.lbounds{:});
end
end
if isempty(param.ubounds)
param.ubounds=inf*ones(1,lxi);
else
if length(param.ubounds) ~= lxi
disp([char(13),' Number of upper bounds (',num2str(length(param.ubounds)), ...
') not equal to number of parameters in expression (', ...
num2str(lxi),')'])
disp(' Check number of values entered via keyword "ubounds"')
error(' Abnormal ternination')
end
if iscell(param.ubounds)
param.ubounds=cat(2,param.ubounds{:});
end
end
if isempty(param.x)
param.x=0.5*(param.ubounds+param.lbounds);
if any(isinf(param.x))
error('There is no default for the starting values if any of the bounds are infinite.')
end
idx=find(isnan(param.x));
if ~isempty(idx)
for ii=1:length(idx)
if param.lbounds(idx(ii)) > -inf
param.x(idx(ii))=param.lbounds(idx(ii)) + 1;
elseif param.ubounds(idx(ii)) < inf
param.x(idx(ii))=param.ubounds(idx(ii)) - 1;
else
param.x(idx(ii))=0;
end
end
end
end
% Check if the initial values of the parameters are within the bounds
temp=find(param.x < param.lbounds);
ier=0;
if ~isempty(temp)
ier=1;
disp([char(13),' One or more initial parameter values are below the lower bounds'])
end
temp=find(param.x > param.ubounds);
if ~isempty(temp)
ier=1;
disp([char(13),' One or more initial parameter values are above the upper bounds'])
end
if ier
disp(' Initial parameter values:')
disp(param.x)
error(' Abnormal termination')
end
% Find all the curves ...
curve_names=vars(~lindex);
lp=length(curve_names);
% Check if "expression" is a valid MATLAB expression
for ii=1:lxi
eval([xi{ii},'=param.x(ii);']);
end
for ii=1:lp
eval([curve_names{ii},'=1;']);
end
try
eval([expression,';'])
catch
disp([char(13),' It is very likely, that "',expression,'" is not a valid MATLAB expression']);
error(' Abnormal termination')
end
% Check if there are selection criteria regarding depth range and rows
if ~isempty(param.depths) | ~isempty(param.rows)
if ~isempty(param.depths)
temp=l_select(wlog,{'depths',param.depths{1},param.depths{2}});
if ~isempty(param.rows)
temp=l_select(temp,{'rows',param.rows});
end
else
temp=l_select(wlog,{'rows',param.rows});
end
[aux,rhs_expression]=find_parameters(temp,expression,xi,param,curve_names);
else
[aux,rhs_expression]=find_parameters(wlog,expression,xi,param,curve_names);
end
% Create new curve with the parameters just estimated and add it
% to input log structure
curves=zeros(nsamp,lp);
for ii=1:lp
if ~strcmpi(curve_names(ii),mnem)
curves(:,ii)=l_gc(wlog,curve_names{ii});
end
end
x=aux.x;
test=sum(curves,2);
lindex=find(~isnan(test));
curves=curves(lindex,:);
temp=eval(rhs_expression);
new_curve=NaN*zeros(nsamp,1);
new_curve(lindex)=temp;
wlog=l_curve(wlog,'add_ne',param.mnem,new_curve,l_gu(wlog,mnem), ...
[l_gd(wlog,mnem),' (predicted)']);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [aux,rhs_expression]=find_parameters(wlog,expression,xi,param,curve_names)
[nsamp,ntr]=size(wlog.curves);
lxi=length(param.x);
% ... and copy them into matrix "curves", ...
lp=length(curve_names);
curves=zeros(nsamp,lp);
for ii=1:lp
eval(['curves(:,ii)=l_gc(wlog,''',curve_names{ii},''');']);
end
% curves_orig=curves;
% ... remove all null values
test=sum(curves,2);
% idx=find(~isnan(test));
curves=curves(~isnan(test),:);
% Check if there are enough curve samples left
if size(curves,1) < lxi
error(' Not enough valid sampes in requested curves')
end
% Transform "expression" into the objective function to be minimized
% 1. Modify expression to be valid for vectors
expr=strrep(expression,'*','.*');
expr=strrep(expr,'/','./');
expr=strrep(expr,'^','.^');
% 2. replace x1, x2, ... by x(1),x(2),...
for ii=1:lxi
expr=strrep(expr,xi{ii},['x(',num2str(ii),')']);
end
% 3. replace "=" sign by "-" sign
idx=findstr(expr,'=');
if length(idx) ~= 1
disp([char(13),' No equal sigh (=) found in expression "',expr])
error(' Abnormal termination')
end
expr1=[expr(1:idx-1),'-(',expr(idx+1:end),')'];
rhs_expression=expr(idx+1:end);
% 4. replace header mnemonics by columns of array,...
funct=expr1;
for ii=1:lp
funct=strrep(funct,curve_names{ii},['curves(:,',num2str(ii),')']);
rhs_expression=strrep(rhs_expression,curve_names{ii},['curves(:,',num2str(ii),')']);
end
% 5. define objective function for the minimization condition
if ischar(param.norm)
if strcmpi(param.norm,'L1')
min_cond=['sum(abs(',funct,'))'];
elseif strcmpi(param.norm,'L2')
min_cond=['norm(',funct,')'];
else
disp([char(13),' Unknown norm "',param.norm,'"'])
error(' Abnormal termination')
end
else
if strcmpi(param.norm{1},'L1')
funct1=['(',expr1,')'];
funct2=funct1;
for ii=1:lp
funct1=strrep(funct1,curve_names{ii},['curves(',funct,' > 0,',num2str(ii),')']);
funct2=strrep(funct2,curve_names{ii},['curves(',funct,' < 0,',num2str(ii),')']);
end
min_cond=[num2str(param.norm{3}),'*','sum(abs(',funct1,')) + ', ...
num2str(param.norm{2}),'*','sum(abs(',funct2,'))'];
else
disp([char(13),' Unknown norm "',param.norm,'" or parameters missing from "L1"'])
error(' Abnormal termination')
end
end
% Create inline function for the objective function
ifunct=inline(min_cond,'x','curves');
options=optimset('MaxFunEvals',5000,'LargeScale','off');
if isempty(param.lbounds) & isempty(param.ubounds)
% Perform unconstrained minimization
[x,fval,exitflag,output]=fminunc(ifunct,param.x,options,curves);
else % Perform constrained minimization
[x,fval,exitflag,output]=fmincon(ifunct,param.x,[],[],[],[],param.lbounds,param.ubounds, ...
[],options,curves);
end
% Prepare output
aux=[];
for ii=1:lxi
aux=setfield(aux,xi{ii},x(ii));
end
aux.x=x;
aux.fval=fval;
aux.exitflag=exitflag;
aux.output=output;