-
Notifications
You must be signed in to change notification settings - Fork 1
/
wwm_sparskit.F90
3195 lines (3160 loc) · 105 KB
/
wwm_sparskit.F90
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
!----------------------------------------------------------------------c
! S P A R S K I T c
!----------------------------------------------------------------------c
! Basic Iterative Solvers with Reverse Communication c
!----------------------------------------------------------------------c
! This file currently has several basic iterative linear system c
! solvers. They are: c
! CG -- Conjugate Gradient Method c
! CGNR -- Conjugate Gradient Method (Normal Residual equation) c
! BCG -- Bi-Conjugate Gradient Method c
! DBCG -- BCG with partial pivoting c
! BCGSTAB -- BCG stabilized c
! TFQMR -- Transpose-Free Quasi-Minimum Residual method c
! FOM -- Full Orthogonalization Method c
! GMRES -- Generalized Minimum RESidual method c
! FGMRES -- Flexible version of Generalized Minimum c
! RESidual method c
! DQGMRES -- Direct versions of Quasi Generalize Minimum c
! Residual method c
!----------------------------------------------------------------------c
! They all have the following calling sequence:
! subroutine solver(n, rhs, sol, ipar, fpar, w)
! integer n, ipar(16)
! real(rkind) rhs(n), sol(n), fpar(16), w(*)
! Where
! (1) 'n' is the size of the linear system,
! (2) 'rhs' is the right-hand side of the linear system,
! (3) 'sol' is the solution to the linear system,
! (4) 'ipar' is an integer parameter array for the reverse
! communication protocol,
! (5) 'fpar' is an floating-point parameter array storing
! information to and from the iterative solvers.
! (6) 'w' is the work space (size is specified in ipar)
!
! They are preconditioned iterative solvers with reverse
! communication. The preconditioners can be applied from either
! from left or right or both (specified by ipar(2), see below).
!
! Author: Kesheng John Wu ([email protected]) 1993
!
! NOTES:
!
! (1) Work space required by each of the iterative solver
! routines is as follows:
! CG == 5 * n
! CGNR == 5 * n
! BCG == 7 * n
! DBCG == 11 * n
! BCGSTAB == 8 * n
! TFQMR == 11 * n
! FOM == (n+3)*(m+2) + (m+1)*m/2 (m = ipar(5), default m=15)
! GMRES == (n+3)*(m+2) + (m+1)*m/2 (m = ipar(5), default m=15)
! FGMRES == 2*n*(m+1) + (m+1)*m/2 + 3*m + 2 (m = ipar(5),
! default m=15)
! DQGMRES == n + lb * (2*n+4) (lb=ipar(5)+1, default lb = 16)
!
! (2) ALL iterative solvers require a user-supplied DOT-product
! routine named DISTDOT. The prototype of DISTDOT is
!
! real(rkind) function distdot(n,x,ix,y,iy)
! integer n, ix, iy
! real(rkind) x(1+(n-1)*ix), y(1+(n-1)*iy)
!
! This interface of DISTDOT is exactly the same as that of
! DDOT (or SDOT if real == real(rkind)) from BLAS-1. It should have
! same functionality as DDOT on a single processor machine. On a
! parallel/distributed environment, each processor can perform
! DDOT on the data it has, then perform a summation on all the
! partial results.
!
! (3) To use this set of routines under SPMD/MIMD program paradigm,
! several things are to be noted: (a) 'n' should be the number of
! vector elements of 'rhs' that is present on the local processor.
! (b) if RHS(i) is on processor j, it is expected that SOL(i)
! will be on the same processor, i.e. the vectors are distributed
! to each processor in the same way. (c) the preconditioning and
! stopping criteria specifications have to be the same on all
! processor involved, ipar and fpar have to be the same on each
! processor. (d) DISTDOT should be replaced by a distributed
! dot-product function.
!
! ..................................................................
! Reverse Communication Protocols
!
! When a reverse-communication routine returns, it could be either
! that the routine has terminated or it simply requires the caller
! to perform one matrix-vector multiplication. The possible matrices
! that involve in the matrix-vector multiplications are:
! A (the matrix of the linear system),
! A^T (A transposed),
! Ml^{-1} (inverse of the left preconditioner),
! Ml^{-T} (inverse of the left preconditioner transposed),
! Mr^{-1} (inverse of the right preconditioner),
! Mr^{-T} (inverse of the right preconditioner transposed).
! For all the matrix vector multiplication, v = A u. The input and
! output vectors are supposed to be part of the work space 'w', and
! the starting positions of them are stored in ipar(8:9), see below.
!
! The array 'ipar' is used to store the information about the solver.
! Here is the list of what each element represents:
!
! ipar(1) -- status of the call/return.
! A call to the solver with ipar(1) == 0 will initialize the
! iterative solver. On return from the iterative solver, ipar(1)
! carries the status flag which indicates the condition of the
! return. The status information is divided into two categories,
! (1) a positive value indicates the solver requires a matrix-vector
! multiplication,
! (2) a non-positive value indicates termination of the solver.
! Here is the current definition:
! 1 == request a matvec with A,
! 2 == request a matvec with A^T,
! 3 == request a left preconditioner solve (Ml^{-1}),
! 4 == request a left preconditioner transposed solve (Ml^{-T}),
! 5 == request a right preconditioner solve (Mr^{-1}),
! 6 == request a right preconditioner transposed solve (Mr^{-T}),
! 10 == request the caller to perform stopping test,
! 0 == normal termination of the solver, satisfied the stopping
! criteria,
! -1 == termination because iteration number is greater than the
! preset limit,
! -2 == return due to insufficient work space,
! -3 == return due to anticipated break-down / divide by zero,
! in the case where Arnoldi procedure is used, additional
! error code can be found in ipar(12), where ipar(12) is
! the error code of orthogonalization procedure MGSRO:
! -1: zero input vector
! -2: input vector contains abnormal numbers
! -3: input vector is a linear combination of others
! -4: trianguler system in GMRES/FOM/etc. has nul rank
! -4 == the values of fpar(1) and fpar(2) are both <= 0, the valid
! ranges are 0 <= fpar(1) < 1, 0 <= fpar(2), and they can
! not be zero at the same time
! -9 == while trying to detect a break-down, an abnormal number is
! detected.
! -10 == return due to some non-numerical reasons, e.g. invalid
! floating-point numbers etc.
!
! ipar(2) -- status of the preconditioning:
! 0 == no preconditioning
! 1 == left preconditioning only
! 2 == right preconditioning only
! 3 == both left and right preconditioning
!
! ipar(3) -- stopping criteria (details of this will be
! discussed later).
!
! ipar(4) -- number of elements in the array 'w'. if this is less
! than the desired size, it will be over-written with the minimum
! requirement. In which case the status flag ipar(1) = -2.
!
! ipar(5) -- size of the Krylov subspace (used by GMRES and its
! variants), e.g. GMRES(ipar(5)), FGMRES(ipar(5)),
! DQGMRES(ipar(5)).
!
! ipar(6) -- maximum number of matrix-vector multiplies, if not a
! positive number the iterative solver will run till convergence
! test is satisfied.
!
! ipar(7) -- current number of matrix-vector multiplies. It is
! incremented after each matrix-vector multiplication. If there
! is preconditioning, the counter is incremented after the
! preconditioning associated with each matrix-vector multiplication.
!
! ipar(8) -- pointer to the input vector to the requested matrix-
! vector multiplication.
!
! ipar(9) -- pointer to the output vector of the requested matrix-
! vector multiplication.
!
! To perform v = A * u, it is assumed that u is w(ipar(8):ipar(8)+n-1)
! and v is stored as w(ipar(9):ipar(9)+n-1).
!
! ipar(10) -- the return address (used to determine where to go to
! inside the iterative solvers after the caller has performed the
! requested services).
!
! ipar(11) -- the result of the external convergence test
! On final return from the iterative solvers, this value
! will be reflected by ipar(1) = 0 (details discussed later)
!
! ipar(12) -- error code of MGSRO, it is
! 1 if the input vector to MGSRO is linear combination
! of others,
! 0 if MGSRO was successful,
! -1 if the input vector to MGSRO is zero,
! -2 if the input vector contains invalid number.
!
! ipar(13) -- number of initializations. During each initilization
! residual norm is computed directly from M_l(b - A x).
!
! ipar(14) to ipar(16) are NOT defined, they are NOT USED by
! any iterative solver at this time.
!
! Information about the error and tolerance are stored in the array
! FPAR. So are some internal variables that need to be saved from
! one iteration to the next one. Since the internal variables are
! not the same for each routine, we only define the common ones.
!
! The first two are input parameters:
! fpar(1) -- the relative tolerance,
! fpar(2) -- the absolute tolerance (details discussed later),
!
! When the iterative solver terminates,
! fpar(3) -- initial residual/error norm,
! fpar(4) -- target residual/error norm,
! fpar(5) -- current residual norm (if available),
! fpar(6) -- current residual/error norm,
! fpar(7) -- convergence rate,
!
! fpar(8:10) are used by some of the iterative solvers to save some
! internal information.
!
! fpar(11) -- number of floating-point operations. The iterative
! solvers will add the number of FLOPS they used to this variable,
! but they do NOT initialize it, nor add the number of FLOPS due to
! matrix-vector multiplications (since matvec is outside of the
! iterative solvers). To insure the correct FLOPS count, the
! caller should set fpar(11) = 0 before invoking the iterative
! solvers and account for the number of FLOPS from matrix-vector
! multiplications and preconditioners.
!
! fpar(12:16) are not used in current implementation.
!
! Whether the content of fpar(3), fpar(4) and fpar(6) are residual
! norms or error norms depends on ipar(3). If the requested
! convergence test is based on the residual norm, they will be
! residual norms. If the caller want to test convergence based the
! error norms (estimated by the norm of the modifications applied
! to the approximate solution), they will be error norms.
! Convergence rate is defined by (Fortran 77 statement)
! fpar(7) = log10(fpar(3) / fpar(6)) / (ipar(7)-ipar(13))
! If fpar(7) = 0.5, it means that approximately every 2 (= 1/0.5)
! steps the residual/error norm decrease by a factor of 10.
!
! ..................................................................
! Stopping criteria,
!
! An iterative solver may be terminated due to (1) satisfying
! convergence test; (2) exceeding iteration limit; (3) insufficient
! work space; (4) break-down. Checking of the work space is
! only done in the initialization stage, i.e. when it is called with
! ipar(1) == 0. A complete convergence test is done after each
! update of the solutions. Other conditions are monitored
! continuously.
!
! With regard to the number of iteration, when ipar(6) is positive,
! the current iteration number will be checked against it. If
! current iteration number is greater the ipar(6) than the solver
! will return with status -1. If ipar(6) is not positive, the
! iteration will continue until convergence test is satisfied.
!
! Two things may be used in the convergence tests, one is the
! residual 2-norm, the other one is 2-norm of the change in the
! approximate solution. The residual and the change in approximate
! solution are from the preconditioned system (if preconditioning
! is applied). The DQGMRES and TFQMR use two estimates for the
! residual norms. The estimates are not accurate, but they are
! acceptable in most of the cases. Generally speaking, the error
! of the TFQMR's estimate is less accurate.
!
! The convergence test type is indicated by ipar(3). There are four
! type convergence tests: (1) tests based on the residual norm;
! (2) tests based on change in approximate solution; (3) caller
! does not care, the solver choose one from above two on its own;
! (4) caller will perform the test, the solver should simply continue.
! Here is the complete definition:
! -2 == || dx(i) || <= rtol * || rhs || + atol
! -1 == || dx(i) || <= rtol * || dx(1) || + atol
! 0 == solver will choose test 1 (next)
! 1 == || residual || <= rtol * || initial residual || + atol
! 2 == || residual || <= rtol * || rhs || + atol
! 999 == caller will perform the test
! where dx(i) denote the change in the solution at the ith update.
! ||.|| denotes 2-norm. rtol = fpar(1) and atol = fpar(2).
!
! If the caller is to perform the convergence test, the outcome
! should be stored in ipar(11).
! ipar(11) = 0 -- failed the convergence test, iterative solver
! should continue
! ipar(11) = 1 -- satisfied convergence test, iterative solver
! should perform the clean up job and stop.
!
! Upon return with ipar(1) = 10,
! ipar(8) points to the starting position of the change in
! solution Sx, where the actual solution of the step is
! x_j = x_0 + M_r^{-1} Sx.
! Exception: ipar(8) < 0, Sx = 0. It is mostly used by
! GMRES and variants to indicate (1) Sx was not necessary,
! (2) intermediate result of Sx is not computed.
! ipar(9) points to the starting position of a work vector that
! can be used by the caller.
!
! NOTE: the caller should allow the iterative solver to perform
! clean up job after the external convergence test is satisfied,
! since some of the iterative solvers do not directly
! update the 'sol' array. A typical clean-up stage includes
! performing the final update of the approximate solution and
! computing the convergence information (e.g. values of fpar(3:7)).
!
! NOTE: fpar(4) and fpar(6) are not set by the accelerators (the
! routines implemented here) if ipar(3) = 999.
!
! ..................................................................
! Usage:
!
! To start solving a linear system, the user needs to specify
! first 6 elements of the ipar, and first 2 elements of fpar.
! The user may optionally set fpar(11) = 0 if one wants to count
! the number of floating-point operations. (Note: the iterative
! solvers will only add the floating-point operations inside
! themselves, the caller will have to add the FLOPS from the
! matrix-vector multiplication routines and the preconditioning
! routines in order to account for all the arithmetic operations.)
!
! Here is an example:
! ipar(1) = 0 ! always 0 to start an iterative solver
! ipar(2) = 2 ! right preconditioning
! ipar(3) = 1 ! use convergence test scheme 1
! ipar(4) = 10000 ! the 'w' has 10,000 elements
! ipar(5) = 10 ! use *GMRES(10) (e.g. FGMRES(10))
! ipar(6) = 100 ! use at most 100 matvec's
! fpar(1) = 1.0E-6 ! relative tolerance 1.0E-6
! fpar(2) = 1.0E-10 ! absolute tolerance 1.0E-10
! fpar(11) = 0.0 ! clearing the FLOPS counter
!
! After the above specifications, one can start to call an iterative
! solver, say BCG. Here is a piece of pseudo-code showing how it can
! be done,
!
! 10 call bcg(n,rhs,sol,ipar,fpar,w)
! if (ipar(1).eq.1) then
! call amux(n,w(ipar(8)),w(ipar(9)),a,ja,ia)
! goto 10
! else if (ipar(1).eq.2) then
! call atmux(n,w(ipar(8)),w(ipar(9)),a,ja,ia)
! goto 10
! else if (ipar(1).eq.3) then
! left preconditioner solver
! goto 10
! else if (ipar(1).eq.4) then
! left preconditioner transposed solve
! goto 10
! else if (ipar(1).eq.5) then
! right preconditioner solve
! goto 10
! else if (ipar(1).eq.6) then
! right preconditioner transposed solve
! goto 10
! else if (ipar(1).eq.10) then
! call my own stopping test routine
! goto 10
! else if (ipar(1).gt.0) then
! ipar(1) is an unspecified code
! else
! the iterative solver terminated with code = ipar(1)
! endif
!
! This segment of pseudo-code assumes the matrix is in CSR format,
! AMUX and ATMUX are two routines from the SPARSKIT MATVEC module.
! They perform matrix-vector multiplications for CSR matrices,
! where w(ipar(8)) is the first element of the input vectors to the
! two routines, and w(ipar(9)) is the first element of the output
! vectors from them. For simplicity, we did not show the name of
! the routine that performs the preconditioning operations or the
! convergence tests.
!-----------------------------------------------------------------------
subroutine bcgstab(n, rhs, sol, ipar, fpar, w)
use datapool, only : rkind
implicit none
integer, intent(inout) :: n, ipar(16)
real(rkind), intent(inout) :: rhs(n), sol(n), fpar(16), w(n,8)
!-----------------------------------------------------------------------
! BCGSTAB --- Bi Conjugate Gradient stabilized (BCGSTAB)
! This is an improved BCG routine. (1) no matrix transpose is
! involved. (2) the convergence is smoother.
!
!
! Algorithm:
! Initialization - r = b - A x, r0 = r, p = r, rho = (r0, r),
! Iterate -
! (1) v = A p
! (2) alpha = rho / (r0, v)
! (3) s = r - alpha v
! (4) t = A s
! (5) omega = (t, s) / (t, t)
! (6) x = x + alpha * p + omega * s
! (7) r = s - omega * t
! convergence test goes here
! (8) beta = rho, rho = (r0, r), beta = rho * alpha / (beta * omega)
! p = r + beta * (p - omega * v)
!
! in this routine, before successful return, the fpar's are
! fpar(3) == initial (preconditionied-)residual norm
! fpar(4) == target (preconditionied-)residual norm
! fpar(5) == current (preconditionied-)residual norm
! fpar(6) == current residual norm or error
! fpar(7) == current rho (rhok = <r, r0>)
! fpar(8) == alpha
! fpar(9) == omega
!
! Usage of the work space W
! w(:, 1) = r0, the initial residual vector
! w(:, 2) = r, current residual vector
! w(:, 3) = s
! w(:, 4) = t
! w(:, 5) = v
! w(:, 6) = p
! w(:, 7) = tmp, used in preconditioning, etc.
! w(:, 8) = delta x, the correction to the answer is accumulated
! here, so that the right-preconditioning may be applied
! at the end
!-----------------------------------------------------------------------
! external routines used
!
real(rkind) distdot
logical stopbis, brkdn
external distdot, stopbis, brkdn
!
real(rkind) one
parameter(one=1.0_rkind)
!
! local variables
!
integer i
real(rkind) alpha,beta,rho,omega
logical lp, rp
save lp, rp
!
! where to go
!
if (ipar(1).gt.0) then
goto (10, 20, 40, 50, 60, 70, 80, 90, 100, 110) ipar(10)
else if (ipar(1).lt.0) then
goto 900
endif
!
! call the initialization routine
!
call bisinit(ipar,fpar,8*n,1,lp,rp,w)
if (ipar(1).lt.0) return
!
! perform a matvec to compute the initial residual
!
ipar(1) = 1
ipar(8) = 1
ipar(9) = 1 + n
do i = 1, n
w(i,1) = sol(i)
enddo
ipar(10) = 1
return
10 ipar(7) = ipar(7) + 1
ipar(13) = ipar(13) + 1
do i = 1, n
w(i,1) = rhs(i) - w(i,2)
enddo
fpar(11) = fpar(11) + n
if (lp) then
ipar(1) = 3
ipar(10) = 2
return
endif
!
20 if (lp) then
do i = 1, n
w(i,1) = w(i,2)
w(i,6) = w(i,2)
enddo
else
do i = 1, n
w(i,2) = w(i,1)
w(i,6) = w(i,1)
enddo
endif
!
fpar(7) = distdot(n,w,1,w,1)
fpar(11) = fpar(11) + 2 * n
fpar(5) = sqrt(fpar(7))
fpar(3) = fpar(5)
if (abs(ipar(3)).eq.2) then
fpar(4) = fpar(1) * sqrt(distdot(n,rhs,1,rhs,1)) + fpar(2)
fpar(11) = fpar(11) + 2 * n
else if (ipar(3).ne.999) then
fpar(4) = fpar(1) * fpar(3) + fpar(2)
endif
if (ipar(3).ge.0) fpar(6) = fpar(5)
if (ipar(3).ge.0 .and. fpar(5).le.fpar(4) .and. ipar(3).ne.999) then
goto 900
endif
!
! beginning of the iterations
!
! Step (1), v = A p
30 if (rp) then
ipar(1) = 5
ipar(8) = 5*n+1
if (lp) then
ipar(9) = 4*n + 1
else
ipar(9) = 6*n + 1
endif
ipar(10) = 3
return
endif
!
40 ipar(1) = 1
if (rp) then
ipar(8) = ipar(9)
else
ipar(8) = 5*n+1
endif
if (lp) then
ipar(9) = 6*n + 1
else
ipar(9) = 4*n + 1
endif
ipar(10) = 4
return
50 if (lp) then
ipar(1) = 3
ipar(8) = ipar(9)
ipar(9) = 4*n + 1
ipar(10) = 5
return
endif
!
60 ipar(7) = ipar(7) + 1
!
! step (2)
alpha = distdot(n,w(1,1),1,w(1,5),1)
fpar(11) = fpar(11) + 2 * n
if (brkdn(alpha, ipar)) goto 900
alpha = fpar(7) / alpha
fpar(8) = alpha
!
! step (3)
do i = 1, n
w(i,3) = w(i,2) - alpha * w(i,5)
enddo
fpar(11) = fpar(11) + 2 * n
!
! Step (4): the second matvec -- t = A s
!
if (rp) then
ipar(1) = 5
ipar(8) = n+n+1
if (lp) then
ipar(9) = ipar(8)+n
else
ipar(9) = 6*n + 1
endif
ipar(10) = 6
return
endif
!
70 ipar(1) = 1
if (rp) then
ipar(8) = ipar(9)
else
ipar(8) = n+n+1
endif
if (lp) then
ipar(9) = 6*n + 1
else
ipar(9) = 3*n + 1
endif
ipar(10) = 7
return
80 if (lp) then
ipar(1) = 3
ipar(8) = ipar(9)
ipar(9) = 3*n + 1
ipar(10) = 8
return
endif
90 ipar(7) = ipar(7) + 1
!
! step (5)
omega = distdot(n,w(1,4),1,w(1,4),1)
fpar(11) = fpar(11) + n + n
if (brkdn(omega,ipar)) goto 900
omega = distdot(n,w(1,4),1,w(1,3),1) / omega
fpar(11) = fpar(11) + n + n
if (brkdn(omega,ipar)) goto 900
fpar(9) = omega
alpha = fpar(8)
!
! step (6) and (7)
do i = 1, n
w(i,7) = alpha * w(i,6) + omega * w(i,3)
w(i,8) = w(i,8) + w(i,7)
w(i,2) = w(i,3) - omega * w(i,4)
enddo
fpar(11) = fpar(11) + 6 * n + 1
!
! convergence test
if (ipar(3).eq.999) then
ipar(1) = 10
ipar(8) = 7*n + 1
ipar(9) = 6*n + 1
ipar(10) = 9
return
endif
if (stopbis(n,ipar,2,fpar,w(1,2),w(1,7),one)) goto 900
100 if (ipar(3).eq.999.and.ipar(11).eq.1) goto 900
!
! step (8): computing new p and rho
!
rho = fpar(7)
fpar(7) = distdot(n,w(1,2),1,w(1,1),1)
omega = fpar(9)
beta = fpar(7) * fpar(8) / (fpar(9) * rho)
do i = 1, n
w(i,6) = w(i,2) + beta * (w(i,6) - omega * w(i,5))
enddo
fpar(11) = fpar(11) + 6 * n + 3
if (brkdn(fpar(7),ipar)) goto 900
!
! end of an iteration
!
goto 30
!
! some clean up job to do
!
900 if (rp) then
if (ipar(1).lt.0) ipar(12) = ipar(1)
ipar(1) = 5
ipar(8) = 7*n + 1
ipar(9) = ipar(8) - n
ipar(10) = 10
return
endif
110 if (rp) then
call tidycg(n,ipar,fpar,sol,w(1,7))
else
call tidycg(n,ipar,fpar,sol,w(1,8))
endif
!
return
!-----end-of-bcgstab
end
!-----------------------------------------------------------------------
subroutine gmres(n, rhs, sol, ipar, fpar, w)
USE DATAPOOL, only : rkind
implicit none
integer n, ipar(16)
real(rkind) rhs(n), sol(n), fpar(16), w(*)
!-----------------------------------------------------------------------
! This a version of GMRES implemented with reverse communication.
! It is a simple restart version of the GMRES algorithm.
!
! ipar(5) == the dimension of the Krylov subspace
! after every ipar(5) iterations, the GMRES will restart with
! the updated solution and recomputed residual vector.
!
! the space of the `w' is used as follows:
! (1) the basis for the Krylov subspace, size n*(m+1);
! (2) the Hessenberg matrix, only the upper triangular
! portion of the matrix is stored, size (m+1)*m/2 + 1
! (3) three vectors, all are of size m, they are
! the cosine and sine of the Givens rotations, the third one holds
! the residuals, it is of size m+1.
!
! TOTAL SIZE REQUIRED == (n+3)*(m+2) + (m+1)*m/2
! Note: m == ipar(5). The default value for this is 15 if
! ipar(5) <= 1.
!-----------------------------------------------------------------------
! external functions used
!
real(rkind) distdot
external distdot
!
real(rkind) one, zero
parameter(one=1.0_rkind, zero=0.0_rkind)
!
! local variables, ptr and p2 are temporary pointers,
! hess points to the Hessenberg matrix,
! vc, vs point to the cosines and sines of the Givens rotations
! vrn points to the vectors of residual norms, more precisely
! the right hand side of the least square problem solved.
!
integer i,ii,idx,k,m,ptr,p2,hess,vc,vs,vrn
real(rkind) alpha, c, s
logical lp, rp
save
!
! check the status of the call
!
if (ipar(1).le.0) ipar(10) = 0
goto (10, 20, 30, 40, 50, 60, 70) ipar(10)
!
! initialization
!
if (ipar(5).le.1) then
m = 15
else
m = ipar(5)
endif
idx = n * (m+1)
hess = idx + n
vc = hess + (m+1) * m / 2 + 1
vs = vc + m
vrn = vs + m
i = vrn + m + 1
call bisinit(ipar,fpar,i,1,lp,rp,w)
if (ipar(1).lt.0) return
!
! request for matrix vector multiplication A*x in the initialization
!
100 ipar(1) = 1
ipar(8) = n+1
ipar(9) = 1
ipar(10) = 1
k = 0
do i = 1, n
w(n+i) = sol(i)
enddo
return
10 ipar(7) = ipar(7) + 1
ipar(13) = ipar(13) + 1
if (lp) then
do i = 1, n
w(n+i) = rhs(i) - w(i)
enddo
ipar(1) = 3
ipar(10) = 2
return
else
do i = 1, n
w(i) = rhs(i) - w(i)
enddo
endif
fpar(11) = fpar(11) + n
!
20 alpha = sqrt(distdot(n,w,1,w,1))
fpar(11) = fpar(11) + 2*n
if (ipar(7).eq.1 .and. ipar(3).ne.999) then
if (abs(ipar(3)).eq.2) then
fpar(4) = fpar(1) * sqrt(distdot(n,rhs,1,rhs,1)) + fpar(2)
fpar(11) = fpar(11) + 2*n
else
fpar(4) = fpar(1) * alpha + fpar(2)
endif
fpar(3) = alpha
endif
fpar(5) = alpha
w(vrn+1) = alpha
if (alpha.le.fpar(4) .and. ipar(3).ge.0 .and. ipar(3).ne.999) then
ipar(1) = 0
fpar(6) = alpha
goto 300
endif
alpha = one / alpha
do ii = 1, n
w(ii) = alpha * w(ii)
enddo
fpar(11) = fpar(11) + n
!
! request for (1) right preconditioning
! (2) matrix vector multiplication
! (3) left preconditioning
!
110 k = k + 1
if (rp) then
ipar(1) = 5
ipar(8) = k*n - n + 1
if (lp) then
ipar(9) = k*n + 1
else
ipar(9) = idx + 1
endif
ipar(10) = 3
return
endif
!
30 ipar(1) = 1
if (rp) then
ipar(8) = ipar(9)
else
ipar(8) = (k-1)*n + 1
endif
if (lp) then
ipar(9) = idx + 1
else
ipar(9) = 1 + k*n
endif
ipar(10) = 4
return
!
40 if (lp) then
ipar(1) = 3
ipar(8) = ipar(9)
ipar(9) = k*n + 1
ipar(10) = 5
return
endif
!
! Modified Gram-Schmidt orthogonalization procedure
! temporary pointer 'ptr' is pointing to the current column of the
! Hessenberg matrix. 'p2' points to the new basis vector
!
50 ipar(7) = ipar(7) + 1
ptr = k * (k - 1) / 2 + hess
p2 = ipar(9)
call mgsro(.false.,n,n,k+1,k+1,fpar(11),w,w(ptr+1), &
& ipar(12))
if (ipar(12).lt.0) goto 200
!
! apply previous Givens rotations and generate a new one to eliminate
! the subdiagonal element.
!
p2 = ptr + 1
do i = 1, k-1
ptr = p2
p2 = p2 + 1
alpha = w(ptr)
c = w(vc+i)
s = w(vs+i)
w(ptr) = c * alpha + s * w(p2)
w(p2) = c * w(p2) - s * alpha
enddo
call givens(w(p2), w(p2+1), c, s)
w(vc+k) = c
w(vs+k) = s
p2 = vrn + k
alpha = - s * w(p2)
w(p2) = c * w(p2)
w(p2+1) = alpha
!
! end of one Arnoldi iteration, alpha will store the estimated
! residual norm at current stage
!
fpar(11) = fpar(11) + 6*k + 2
alpha = abs(alpha)
fpar(5) = alpha
if (k.lt.m .and. .not.(ipar(3).ge.0 .and. alpha.le.fpar(4)) &
& .and. (ipar(6).le.0 .or. ipar(7).lt.ipar(6))) goto 110
!
! update the approximate solution, first solve the upper triangular
! system, temporary pointer ptr points to the Hessenberg matrix,
! p2 points to the right-hand-side (also the solution) of the system.
!
200 ptr = hess + k * (k + 1) / 2
p2 = vrn + k
if (w(ptr).eq.zero) then
!
! if the diagonal elements of the last column is zero, reduce k by 1
! so that a smaller trianguler system is solved [It should only
! happen when the matrix is singular, and at most once!]
!
k = k - 1
if (k.gt.0) then
goto 200
else
ipar(1) = -3
ipar(12) = -4
goto 300
endif
endif
w(p2) = w(p2) / w(ptr)
do i = k-1, 1, -1
ptr = ptr - i - 1
do ii = 1, i
w(vrn+ii) = w(vrn+ii) - w(p2) * w(ptr+ii)
enddo
p2 = p2 - 1
w(p2) = w(p2) / w(ptr)
enddo
!
do ii = 1, n
w(ii) = w(ii) * w(p2)
enddo
do i = 1, k-1
ptr = i*n
p2 = p2 + 1
do ii = 1, n
w(ii) = w(ii) + w(p2) * w(ptr+ii)
enddo
enddo
fpar(11) = fpar(11) + 2*k*n - n + k*(k+1)
!
if (rp) then
ipar(1) = 5
ipar(8) = 1
ipar(9) = idx + 1
ipar(10) = 6
return
endif
!
60 if (rp) then
do i = 1, n
sol(i) = sol(i) + w(idx+i)
enddo
else
do i = 1, n
sol(i) = sol(i) + w(i)
enddo
endif
fpar(11) = fpar(11) + n
!
! process the complete stopping criteria
!
if (ipar(3).eq.999) then
ipar(1) = 10
ipar(8) = -1
ipar(9) = idx + 1
ipar(10) = 7
return
else if (ipar(3).lt.0) then
if (ipar(7).le.m+1) then
fpar(3) = abs(w(vrn+1))
if (ipar(3).eq.-1) fpar(4) = fpar(1)*fpar(3)+fpar(2)
endif
fpar(6) = abs(w(vrn+k))
else
fpar(6) = fpar(5)
endif
!
! do we need to restart ?
!
70 if (ipar(12).ne.0) then
ipar(1) = -3
goto 300
endif
if ((ipar(7).lt.ipar(6) .or. ipar(6).le.0) .and. &
& ((ipar(3).eq.999.and.ipar(11).eq.0) .or. &
& (ipar(3).ne.999.and.fpar(6).gt.fpar(4)))) goto 100
!
! termination, set error code, compute convergence rate
!
if (ipar(1).gt.0) then
if (ipar(3).eq.999 .and. ipar(11).eq.1) then
ipar(1) = 0
else if (ipar(3).ne.999 .and. fpar(6).le.fpar(4)) then
ipar(1) = 0
else if (ipar(7).ge.ipar(6) .and. ipar(6).gt.0) then
ipar(1) = -1
else
ipar(1) = -10
endif
endif
300 if (fpar(3).ne.zero .and. fpar(6).ne.zero .and. &
& ipar(7).gt.ipar(13)) then
fpar(7) = log10(fpar(3) / fpar(6)) / dble(ipar(7)-ipar(13))
else
fpar(7) = zero
endif
return
end
!-----end-of-gmres
!-----------------------------------------------------------------------
subroutine implu(np,umm,beta,ypiv,u,permut,full)
USE DATAPOOL, ONLY : rkind
real(rkind) umm,beta,ypiv(*),u(*),x, xpiv
logical full, perm, permut(*)
integer np,k,npm1
!-----------------------------------------------------------------------
! performs implicitly one step of the lu factorization of a
! banded hessenberg matrix.
!-----------------------------------------------------------------------
if (np .le. 1) goto 12
npm1 = np - 1
!
! -- perform previous step of the factorization-
!
do 6 k=1,npm1
if (.not. permut(k)) goto 5
x=u(k)
u(k) = u(k+1)
u(k+1) = x
5 u(k+1) = u(k+1) - ypiv(k)*u(k)
6 continue
!-----------------------------------------------------------------------
! now determine pivotal information to be used in the next call
!-----------------------------------------------------------------------
12 umm = u(np)
perm = (beta .gt. abs(umm))
if (.not. perm) goto 4
xpiv = umm / beta
u(np) = beta
goto 8
4 xpiv = beta/umm
8 permut(np) = perm
ypiv(np) = xpiv
if (.not. full) return
! shift everything up if full...
do 7 k=1,npm1
ypiv(k) = ypiv(k+1)
permut(k) = permut(k+1)
7 continue
return
!-----end-of-implu
end
!-----------------------------------------------------------------------
subroutine uppdir(n,p,np,lbp,indp,y,u,usav,flops)
use datapool, only : rkind
implicit none