Actual source code: dvd_improvex.c
1: /*
2: SLEPc eigensolver: "davidson"
4: Step: improve the eigenvectors X
6: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
7: SLEPc - Scalable Library for Eigenvalue Problem Computations
8: Copyright (c) 2002-2013, Universitat Politecnica de Valencia, Spain
10: This file is part of SLEPc.
12: SLEPc is free software: you can redistribute it and/or modify it under the
13: terms of version 3 of the GNU Lesser General Public License as published by
14: the Free Software Foundation.
16: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
17: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
19: more details.
21: You should have received a copy of the GNU Lesser General Public License
22: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
23: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
24: */
26: #include davidson.h
27: #include <slepc-private/vecimplslepc.h> /*I "slepcvec.h" I*/
29: PetscErrorCode dvd_improvex_PfuncV(dvdDashboard *d,void *funcV,Vec *D,PetscInt max_size_D,PetscInt r_s,PetscInt r_e,Vec *auxV,PetscScalar *auxS);
30: PetscErrorCode PCApplyBA_dvd(PC pc,PCSide side,Vec in,Vec out,Vec w);
31: PetscErrorCode PCApply_dvd(PC pc,Vec in,Vec out);
32: PetscErrorCode PCApplyTranspose_dvd(PC pc,Vec in,Vec out);
33: PetscErrorCode MatMult_dvd_jd(Mat A,Vec in,Vec out);
34: PetscErrorCode MatMultTranspose_dvd_jd(Mat A,Vec in,Vec out);
35: PetscErrorCode MatGetVecs_dvd_jd(Mat A,Vec *right,Vec *left);
36: PetscErrorCode dvd_improvex_jd_d(dvdDashboard *d);
37: PetscErrorCode dvd_improvex_jd_start(dvdDashboard *d);
38: PetscErrorCode dvd_improvex_jd_end(dvdDashboard *d);
39: PetscErrorCode dvd_improvex_jd_gen(dvdDashboard *d,Vec *D,PetscInt max_size_D,PetscInt r_s,PetscInt r_e,PetscInt *size_D);
40: PetscErrorCode dvd_improvex_jd_proj_cuv(dvdDashboard *d,PetscInt i_s,PetscInt i_e,Vec **u,Vec **v,Vec *kr,Vec **auxV,PetscScalar **auxS,PetscScalar *theta,PetscScalar *thetai,PetscScalar *pX,PetscScalar *pY,PetscInt ld);
41: PetscErrorCode dvd_improvex_jd_proj_uv_KXX(dvdDashboard *d,PetscInt i_s,PetscInt i_e,Vec *u,Vec *v,Vec *kr,Vec *auxV,PetscScalar *theta,PetscScalar *thetai,PetscScalar *pX,PetscScalar *pY,PetscInt ld);
42: PetscErrorCode dvd_improvex_jd_proj_uv_KZX(dvdDashboard *d,PetscInt i_s,PetscInt i_e,Vec *u,Vec *v,Vec *kr,Vec *auxV,PetscScalar *theta,PetscScalar *thetai,PetscScalar *pX,PetscScalar *pY,PetscInt ld);
43: PetscErrorCode dvd_improvex_jd_lit_const_0(dvdDashboard *d,PetscInt i,PetscScalar* theta,PetscScalar* thetai,PetscInt *maxits,PetscReal *tol);
44: PetscErrorCode dvd_improvex_apply_proj(dvdDashboard *d,Vec *V,PetscInt cV,PetscScalar *auxS);
45: PetscErrorCode dvd_improvex_applytrans_proj(dvdDashboard *d,Vec *V,PetscInt cV,PetscScalar *auxS);
47: #define size_Z (64*4)
49: /**** JD update step (I - Kfg'/(g'Kf)) K(A - sB) (I - Kfg'/(g'Kf)) t = (I - Kfg'/(g'Kf))r ****/
51: typedef struct {
52: PetscInt size_X;
53: void
54: *old_improveX_data; /* old improveX_data */
55: improveX_type
56: old_improveX; /* old improveX */
57: KSP ksp; /* correction equation solver */
58: Vec
59: friends, /* reference vector for composite vectors */
60: *auxV; /* auxiliar vectors */
61: PetscScalar *auxS, /* auxiliar scalars */
62: *theta,
63: *thetai; /* the shifts used in the correction eq. */
64: PetscInt maxits, /* maximum number of iterations */
65: r_s, r_e, /* the selected eigenpairs to improve */
66: ksp_max_size; /* the ksp maximum subvectors size */
67: PetscReal tol, /* the maximum solution tolerance */
68: lastTol, /* last tol for dynamic stopping criterion */
69: fix; /* tolerance for using the approx. eigenvalue */
70: PetscBool
71: dynamic; /* if the dynamic stopping criterion is applied */
72: dvdDashboard
73: *d; /* the currect dvdDashboard reference */
74: PC old_pc; /* old pc in ksp */
75: Vec
76: *u, /* new X vectors */
77: *real_KZ, /* original KZ */
78: *KZ; /* KZ vecs for the projector KZ*inv(X'*KZ)*X' */
79: PetscScalar
80: *XKZ, /* X'*KZ */
81: *iXKZ; /* inverse of XKZ */
82: PetscInt
83: ldXKZ, /* leading dimension of XKZ */
84: size_iXKZ, /* size of iXKZ */
85: ldiXKZ, /* leading dimension of iXKZ */
86: size_KZ, /* size of converged KZ */
87: size_real_KZ, /* original size of KZ */
88: size_cX, /* last value of d->size_cX */
89: old_size_X; /* last number of improved vectors */
90: PetscBLASInt
91: *iXKZPivots; /* array of pivots */
92: } dvdImprovex_jd;
94: PETSC_STATIC_INLINE PetscErrorCode dvd_aux_matmult(dvdImprovex_jd *data,const Vec *x,const Vec *y,const Vec *auxV);
95: PETSC_STATIC_INLINE PetscErrorCode dvd_aux_matmulttrans(dvdImprovex_jd *data,const Vec *x,const Vec *y,const Vec *auxV);
99: PetscErrorCode dvd_improvex_jd(dvdDashboard *d,dvdBlackboard *b,KSP ksp,PetscInt max_bs,PetscInt cX_impr,PetscBool dynamic)
100: {
101: PetscErrorCode ierr;
102: dvdImprovex_jd *data;
103: PetscBool useGD,her_probl,std_probl;
104: PC pc;
105: PetscInt size_P,s=1;
108: std_probl = DVD_IS(d->sEP,DVD_EP_STD)?PETSC_TRUE:PETSC_FALSE;
109: her_probl = DVD_IS(d->sEP,DVD_EP_HERMITIAN)?PETSC_TRUE:PETSC_FALSE;
111: /* Setting configuration constrains */
112: PetscObjectTypeCompare((PetscObject)ksp,KSPPREONLY,&useGD);
114: /* If the arithmetic is real and the problem is not Hermitian, then
115: the block size is incremented in one */
116: #if !defined(PETSC_USE_COMPLEX)
117: if (!her_probl) {
118: max_bs++;
119: b->max_size_P = PetscMax(b->max_size_P,2);
120: s = 2;
121: } else
122: #endif
123: b->max_size_P = PetscMax(b->max_size_P,1);
124: b->max_size_X = PetscMax(b->max_size_X,max_bs);
125: size_P = b->max_size_P+cX_impr;
126: b->max_size_auxV = PetscMax(b->max_size_auxV,
127: b->max_size_X*3+(useGD?0:2)+ /* u, kr, auxV(max_size_X+2?) */
128: ((her_probl || !d->eps->trueres)?1:PetscMax(s*2,b->max_size_cX_proj+b->max_size_X))); /* testConv */
130: b->own_scalars+= size_P*size_P; /* XKZ */
131: b->max_size_auxS = PetscMax(b->max_size_auxS,
132: b->max_size_X*3 + /* theta, thetai */
133: size_P*size_P + /* iXKZ */
134: FromIntToScalar(size_P) + /* iXkZPivots */
135: PetscMax(PetscMax(
136: 3*b->max_size_proj*b->max_size_X, /* dvd_improvex_apply_proj */
137: 8*cX_impr*b->max_size_X), /* dvd_improvex_jd_proj_cuv_KZX */
138: (her_probl || !d->eps->trueres)?0:b->max_nev*b->max_nev+PetscMax(b->max_nev*6,(b->max_nev+b->max_size_proj)*s+b->max_nev*(b->max_size_X+b->max_size_cX_proj)*(std_probl?2:4)+64))); /* preTestConv */
139: b->own_vecs+= size_P; /* KZ */
141: /* Setup the preconditioner */
142: if (ksp) {
143: KSPGetPC(ksp,&pc);
144: dvd_static_precond_PC(d,b,pc);
145: } else {
146: dvd_static_precond_PC(d,b,0);
147: }
149: /* Setup the step */
150: if (b->state >= DVD_STATE_CONF) {
151: PetscMalloc(sizeof(dvdImprovex_jd),&data);
152: PetscLogObjectMemory(d->eps,sizeof(dvdImprovex_jd));
153: data->dynamic = dynamic;
154: data->size_real_KZ = size_P;
155: data->real_KZ = b->free_vecs; b->free_vecs+= data->size_real_KZ;
156: d->max_cX_in_impr = cX_impr;
157: data->XKZ = b->free_scalars; b->free_scalars+= size_P*size_P;
158: data->ldXKZ = size_P;
159: data->size_X = b->max_size_X;
160: data->old_improveX_data = d->improveX_data;
161: d->improveX_data = data;
162: data->old_improveX = d->improveX;
163: data->ksp = useGD?NULL:ksp;
164: data->d = d;
165: d->improveX = dvd_improvex_jd_gen;
166: data->ksp_max_size = max_bs;
168: DVD_FL_ADD(d->startList,dvd_improvex_jd_start);
169: DVD_FL_ADD(d->endList,dvd_improvex_jd_end);
170: DVD_FL_ADD(d->destroyList,dvd_improvex_jd_d);
171: }
172: return(0);
173: }
177: PetscErrorCode dvd_improvex_jd_start(dvdDashboard *d)
178: {
179: PetscErrorCode ierr;
180: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
181: PetscInt rA, cA, rlA, clA;
182: Mat A;
183: PetscBool t;
184: PC pc;
187: data->KZ = data->real_KZ;
188: data->size_KZ = data->size_cX = data->old_size_X = 0;
189: data->lastTol = data->dynamic?0.5:0.0;
191: /* Setup the ksp */
192: if (data->ksp) {
193: /* Create the reference vector */
194: VecCreateCompWithVecs(d->V,data->ksp_max_size,NULL,&data->friends);
195: PetscLogObjectParent(d->eps,data->friends);
197: /* Save the current pc and set a PCNONE */
198: KSPGetPC(data->ksp, &data->old_pc);
199: PetscObjectTypeCompare((PetscObject)data->old_pc,PCNONE,&t);
200: data->lastTol = 0.5;
201: if (t) {
202: data->old_pc = 0;
203: } else {
204: PetscObjectReference((PetscObject)data->old_pc);
205: PCCreate(PetscObjectComm((PetscObject)d->eps),&pc);
206: PCSetType(pc,PCSHELL);
207: PCSetOperators(pc, d->A, d->A, SAME_PRECONDITIONER);
208: PCShellSetApply(pc,PCApply_dvd);
209: PCShellSetApplyBA(pc,PCApplyBA_dvd);
210: PCShellSetApplyTranspose(pc,PCApplyTranspose_dvd);
211: KSPSetPC(data->ksp,pc);
212: PCDestroy(&pc);
213: }
215: /* Create the (I-v*u')*K*(A-s*B) matrix */
216: MatGetSize(d->A, &rA, &cA);
217: MatGetLocalSize(d->A, &rlA, &clA);
218: MatCreateShell(PetscObjectComm((PetscObject)d->A), rlA*data->ksp_max_size,
219: clA*data->ksp_max_size, rA*data->ksp_max_size,
220: cA*data->ksp_max_size, data, &A);
221: MatShellSetOperation(A, MATOP_MULT,(void(*)(void))MatMult_dvd_jd);
222: MatShellSetOperation(A, MATOP_MULT_TRANSPOSE,(void(*)(void))MatMultTranspose_dvd_jd);
223: MatShellSetOperation(A, MATOP_GET_VECS,(void(*)(void))MatGetVecs_dvd_jd);
225: /* Try to avoid KSPReset */
226: KSPGetOperatorsSet(data->ksp,&t,NULL);
227: if (t) {
228: Mat M;
229: PetscInt rM;
230: KSPGetOperators(data->ksp,&M,NULL,NULL);
231: MatGetSize(M,&rM,NULL);
232: if (rM != rA*data->ksp_max_size) { KSPReset(data->ksp); }
233: }
234: KSPSetOperators(data->ksp, A, A, SAME_PRECONDITIONER);
235: KSPSetUp(data->ksp);
236: MatDestroy(&A);
237: } else {
238: data->old_pc = 0;
239: data->friends = NULL;
240: }
241: return(0);
242: }
246: PetscErrorCode dvd_improvex_jd_end(dvdDashboard *d)
247: {
248: PetscErrorCode ierr;
249: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
252: if (data->friends) { VecDestroy(&data->friends); }
254: /* Restore the pc of ksp */
255: if (data->old_pc) {
256: KSPSetPC(data->ksp, data->old_pc);
257: PCDestroy(&data->old_pc);
258: }
259: return(0);
260: }
264: PetscErrorCode dvd_improvex_jd_d(dvdDashboard *d)
265: {
266: PetscErrorCode ierr;
267: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
270: /* Restore changes in dvdDashboard */
271: d->improveX_data = data->old_improveX_data;
273: /* Free local data and objects */
274: PetscFree(data);
275: return(0);
276: }
280: PetscErrorCode dvd_improvex_jd_gen(dvdDashboard *d,Vec *D,PetscInt max_size_D,PetscInt r_s,PetscInt r_e,PetscInt *size_D)
281: {
282: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
283: PetscErrorCode ierr;
284: PetscInt i,j,n,maxits,maxits0,lits,s,ld,k;
285: PetscScalar *pX,*pY,*auxS = d->auxS,*auxS0;
286: PetscReal tol,tol0;
287: Vec *u,*v,*kr,kr_comp,D_comp;
288: PetscBool odd_situation = PETSC_FALSE;
291: /* Quick exit */
292: if ((max_size_D == 0) || r_e-r_s <= 0) {
293: *size_D = 0;
294: /* Callback old improveX */
295: if (data->old_improveX) {
296: d->improveX_data = data->old_improveX_data;
297: data->old_improveX(d, NULL, 0, 0, 0, NULL);
298: d->improveX_data = data;
299: }
300: return(0);
301: }
303: n = PetscMin(PetscMin(data->size_X, max_size_D), r_e-r_s);
304: if (n == 0) SETERRQ(PETSC_COMM_SELF,1, "n == 0");
305: if (data->size_X < r_e-r_s) SETERRQ(PETSC_COMM_SELF,1, "size_X < r_e-r_s");
307: DSGetLeadingDimension(d->ps,&ld);
309: /* Restart lastTol if a new pair converged */
310: if (data->dynamic && data->size_cX < d->size_cX)
311: data->lastTol = 0.5;
313: for (i=0,s=0,auxS0=auxS;i<n;i+=s) {
314: /* If the selected eigenvalue is complex, but the arithmetic is real... */
315: #if !defined(PETSC_USE_COMPLEX)
316: if (d->eigi[i] != 0.0) {
317: if (i+2 <= max_size_D) s=2;
318: else break;
319: } else
320: #endif
321: s=1;
323: data->auxV = d->auxV;
324: data->r_s = r_s+i; data->r_e = r_s+i+s;
325: auxS = auxS0;
326: data->theta = auxS; auxS+= 2*s;
327: data->thetai = auxS; auxS+= s;
328: kr = data->auxV; data->auxV+= s;
330: /* Compute theta, maximum iterations and tolerance */
331: maxits = 0; tol = 1;
332: for (j=0;j<s;j++) {
333: d->improvex_jd_lit(d, r_s+i+j, &data->theta[2*j],
334: &data->thetai[j], &maxits0, &tol0);
335: maxits+= maxits0; tol*= tol0;
336: }
337: maxits/= s; tol = data->dynamic?data->lastTol:exp(log(tol)/s);
339: /* Compute u, v and kr */
340: k = r_s+i+d->cX_in_H;
341: DSVectors(d->ps,DS_MAT_X,&k,NULL);
342: DSNormalize(d->ps,DS_MAT_X,r_s+i+d->cX_in_H);
343: k = r_s+i+d->cX_in_H;
344: DSVectors(d->ps,DS_MAT_Y,&k,NULL);
345: DSNormalize(d->ps,DS_MAT_Y,r_s+i+d->cX_in_H);
346: DSGetArray(d->ps,DS_MAT_X,&pX);
347: DSGetArray(d->ps,DS_MAT_Y,&pY);
348: dvd_improvex_jd_proj_cuv(d,r_s+i,r_s+i+s,&u,&v,kr,&data->auxV,&auxS,data->theta,data->thetai,pX,pY,ld);
349: DSRestoreArray(d->ps,DS_MAT_X,&pX);
350: DSRestoreArray(d->ps,DS_MAT_Y,&pY);
351: data->u = u;
353: /* Check if the first eigenpairs are converged */
354: if (i == 0) {
355: PetscInt n_auxV = data->auxV-d->auxV+s, n_auxS = auxS - d->auxS;
356: d->auxV+= n_auxV; d->size_auxV-= n_auxV;
357: d->auxS+= n_auxS; d->size_auxS-= n_auxS;
358: d->preTestConv(d,0,s,s,d->auxV-s,NULL,&d->npreconv);
359: d->auxV-= n_auxV; d->size_auxV+= n_auxV;
360: d->auxS-= n_auxS; d->size_auxS+= n_auxS;
361: if (d->npreconv > 0) break;
362: }
364: /* Test the odd situation of solving Ax=b with A=I */
365: #if !defined(PETSC_USE_COMPLEX)
366: odd_situation = (data->ksp && data->theta[0] == 1. && data->theta[1] == 0. && data->thetai[0] == 0. && d->B == NULL)?PETSC_TRUE:PETSC_FALSE;
367: #else
368: odd_situation = (data->ksp && data->theta[0] == 1. && data->theta[1] == 0. && d->B == NULL)?PETSC_TRUE:PETSC_FALSE;
369: #endif
370: /* If JD */
371: if (data->ksp && !odd_situation) {
372: data->auxS = auxS;
374: /* kr <- -kr */
375: for (j=0;j<s;j++) {
376: VecScale(kr[j],-1.0);
377: }
379: /* Compose kr and D */
380: VecCreateCompWithVecs(kr,data->ksp_max_size,data->friends,&kr_comp);
381: VecCreateCompWithVecs(&D[i],data->ksp_max_size,data->friends,&D_comp);
382: VecCompSetSubVecs(data->friends,s,NULL);
384: /* Solve the correction equation */
385: KSPSetTolerances(data->ksp,tol,PETSC_DEFAULT,PETSC_DEFAULT,maxits);
386: KSPSolve(data->ksp,kr_comp,D_comp);
387: KSPGetIterationNumber(data->ksp,&lits);
388: d->eps->st->lineariterations+= lits;
390: /* Destroy the composed ks and D */
391: VecDestroy(&kr_comp);
392: VecDestroy(&D_comp);
394: /* If GD */
395: } else {
396: for (j=0;j<s;j++) {
397: d->improvex_precond(d, r_s+i+j, kr[j], D[i+j]);
398: }
399: dvd_improvex_apply_proj(d, &D[i], s, auxS);
400: }
401: /* Prevent that short vectors are discarded in the orthogonalization */
402: if (i == 0 && d->eps->errest[d->nconv+r_s] > PETSC_MACHINE_EPSILON && d->eps->errest[d->nconv+r_s] < PETSC_MAX_REAL) {
403: for (j=0;j<s;j++) {
404: VecScale(D[j],1.0/d->eps->errest[d->nconv+r_s]);
405: }
406: }
407: }
408: *size_D = i;
409: if (data->dynamic) data->lastTol = PetscMax(data->lastTol/2.0,PETSC_MACHINE_EPSILON*10.0);
411: /* Callback old improveX */
412: if (data->old_improveX) {
413: d->improveX_data = data->old_improveX_data;
414: data->old_improveX(d, NULL, 0, 0, 0, NULL);
415: d->improveX_data = data;
416: }
417: return(0);
418: }
420: /* y <- theta[1]A*x - theta[0]*B*x
421: auxV, two auxiliary vectors */
424: PETSC_STATIC_INLINE PetscErrorCode dvd_aux_matmult(dvdImprovex_jd *data,const Vec *x,const Vec *y,const Vec *auxV)
425: {
426: PetscErrorCode ierr;
427: PetscInt n,i;
428: const Vec *Bx;
431: n = data->r_e - data->r_s;
432: for (i=0;i<n;i++) {
433: MatMult(data->d->A,x[i],y[i]);
434: }
436: for (i=0;i<n;i++) {
437: #if !defined(PETSC_USE_COMPLEX)
438: if (data->d->eigi[data->r_s+i] != 0.0) {
439: if (data->d->B) {
440: MatMult(data->d->B,x[i],auxV[0]);
441: MatMult(data->d->B,x[i+1],auxV[1]);
442: Bx = auxV;
443: } else Bx = &x[i];
445: /* y_i <- [ t_2i+1*A*x_i - t_2i*Bx_i + ti_i*Bx_i+1;
446: y_i+1 t_2i+1*A*x_i+1 - ti_i*Bx_i - t_2i*Bx_i+1 ] */
447: VecAXPBYPCZ(y[i],-data->theta[2*i],data->thetai[i],data->theta[2*i+1],Bx[0],Bx[1]);
448: VecAXPBYPCZ(y[i+1],-data->thetai[i],-data->theta[2*i],data->theta[2*i+1],Bx[0],Bx[1]);
449: i++;
450: } else
451: #endif
452: {
453: if (data->d->B) {
454: MatMult(data->d->B,x[i],auxV[0]);
455: Bx = auxV;
456: } else Bx = &x[i];
457: VecAXPBY(y[i],-data->theta[i*2],data->theta[i*2+1],Bx[0]);
458: }
459: }
460: return(0);
461: }
463: /* y <- theta[1]'*A'*x - theta[0]'*B'*x
464: auxV, two auxiliary vectors */
467: PETSC_STATIC_INLINE PetscErrorCode dvd_aux_matmulttrans(dvdImprovex_jd *data,const Vec *x,const Vec *y,const Vec *auxV)
468: {
469: PetscErrorCode ierr;
470: PetscInt n,i;
471: const Vec *Bx;
474: n = data->r_e - data->r_s;
475: for (i=0;i<n;i++) {
476: MatMultTranspose(data->d->A,x[i],y[i]);
477: }
479: for (i=0;i<n;i++) {
480: #if !defined(PETSC_USE_COMPLEX)
481: if (data->d->eigi[data->r_s+i] != 0.0) {
482: if (data->d->B) {
483: MatMultTranspose(data->d->B,x[i],auxV[0]);
484: MatMultTranspose(data->d->B,x[i+1],auxV[1]);
485: Bx = auxV;
486: } else Bx = &x[i];
488: /* y_i <- [ t_2i+1*A*x_i - t_2i*Bx_i - ti_i*Bx_i+1;
489: y_i+1 t_2i+1*A*x_i+1 + ti_i*Bx_i - t_2i*Bx_i+1 ] */
490: VecAXPBYPCZ(y[i],-data->theta[2*i],-data->thetai[i],data->theta[2*i+1],Bx[0],Bx[1]);
491: VecAXPBYPCZ(y[i+1],data->thetai[i],-data->theta[2*i],data->theta[2*i+1],Bx[0],Bx[1]);
492: i++;
493: } else
494: #endif
495: {
496: if (data->d->B) {
497: MatMultTranspose(data->d->B,x[i],auxV[0]);
498: Bx = auxV;
499: } else Bx = &x[i];
500: VecAXPBY(y[i],PetscConj(-data->theta[i*2]),PetscConj(data->theta[i*2+1]),Bx[0]);
501: }
502: }
503: return(0);
504: }
508: PetscErrorCode PCApplyBA_dvd(PC pc,PCSide side,Vec in,Vec out,Vec w)
509: {
510: PetscErrorCode ierr;
511: dvdImprovex_jd *data;
512: PetscInt n,i;
513: const Vec *inx,*outx,*wx;
514: Mat A;
517: PCGetOperators(pc,&A,NULL,NULL);
518: MatShellGetContext(A,(void**)&data);
519: VecCompGetSubVecs(in,NULL,&inx);
520: VecCompGetSubVecs(out,NULL,&outx);
521: VecCompGetSubVecs(w,NULL,&wx);
522: n = data->r_e - data->r_s;
524: /* Check auxiliary vectors */
525: if (&data->auxV[n] > data->d->auxV+data->d->size_auxV) SETERRQ(PETSC_COMM_SELF,1, "Insufficient auxiliary vectors");
527: switch (side) {
528: case PC_LEFT:
529: /* aux <- theta[1]A*in - theta[0]*B*in */
530: dvd_aux_matmult(data,inx,data->auxV,outx);
532: /* out <- K * aux */
533: for (i=0;i<n;i++) {
534: data->d->improvex_precond(data->d,data->r_s+i,data->auxV[i],outx[i]);
535: }
536: break;
537: case PC_RIGHT:
538: /* aux <- K * in */
539: for (i=0;i<n;i++) {
540: data->d->improvex_precond(data->d,data->r_s+i,inx[i],data->auxV[i]);
541: }
543: /* out <- theta[1]A*auxV - theta[0]*B*auxV */
544: dvd_aux_matmult(data,data->auxV,outx,wx);
545: break;
546: case PC_SYMMETRIC:
547: /* aux <- K^{1/2} * in */
548: for (i=0;i<n;i++) {
549: PCApplySymmetricRight(data->old_pc,inx[i],data->auxV[i]);
550: }
552: /* out <- theta[1]A*auxV - theta[0]*B*auxV */
553: dvd_aux_matmult(data,data->auxV,wx,outx);
555: /* aux <- K^{1/2} * in */
556: for (i=0;i<n;i++) {
557: PCApplySymmetricLeft(data->old_pc,wx[i],outx[i]);
558: }
559: break;
560: default:
561: SETERRQ(PETSC_COMM_SELF,1, "Unsupported KSP side");
562: }
564: /* out <- out - v*(u'*out) */
565: dvd_improvex_apply_proj(data->d,(Vec*)outx,n,data->auxS);
566: return(0);
567: }
571: PetscErrorCode PCApply_dvd(PC pc,Vec in,Vec out)
572: {
573: PetscErrorCode ierr;
574: dvdImprovex_jd *data;
575: PetscInt n,i;
576: const Vec *inx, *outx;
577: Mat A;
580: PCGetOperators(pc,&A,NULL,NULL);
581: MatShellGetContext(A,(void**)&data);
582: VecCompGetSubVecs(in,NULL,&inx);
583: VecCompGetSubVecs(out,NULL,&outx);
584: n = data->r_e - data->r_s;
586: /* out <- K * in */
587: for (i=0;i<n;i++) {
588: data->d->improvex_precond(data->d,data->r_s+i,inx[i],outx[i]);
589: }
591: /* out <- out - v*(u'*out) */
592: dvd_improvex_apply_proj(data->d,(Vec*)outx,n,data->auxS);
593: return(0);
594: }
598: PetscErrorCode PCApplyTranspose_dvd(PC pc,Vec in,Vec out)
599: {
600: PetscErrorCode ierr;
601: dvdImprovex_jd *data;
602: PetscInt n,i;
603: const Vec *inx, *outx;
604: Mat A;
607: PCGetOperators(pc,&A,NULL,NULL);
608: MatShellGetContext(A,(void**)&data);
609: VecCompGetSubVecs(in,NULL,&inx);
610: VecCompGetSubVecs(out,NULL,&outx);
611: n = data->r_e - data->r_s;
613: /* Check auxiliary vectors */
614: if (&data->auxV[n] > data->d->auxV+data->d->size_auxV) SETERRQ(PETSC_COMM_SELF,1, "Insufficient auxiliary vectors");
616: /* auxV <- in */
617: for (i=0;i<n;i++) {
618: VecCopy(inx[i],data->auxV[i]);
619: }
621: /* auxV <- auxV - u*(v'*auxV) */
622: dvd_improvex_applytrans_proj(data->d,data->auxV,n,data->auxS);
624: /* out <- K' * aux */
625: for (i=0;i<n;i++) {
626: PCApplyTranspose(data->old_pc,data->auxV[i],outx[i]);
627: }
628: return(0);
629: }
633: PetscErrorCode MatMult_dvd_jd(Mat A,Vec in,Vec out)
634: {
635: PetscErrorCode ierr;
636: dvdImprovex_jd *data;
637: PetscInt n;
638: const Vec *inx, *outx;
639: PCSide side;
642: MatShellGetContext(A,(void**)&data);
643: VecCompGetSubVecs(in,NULL,&inx);
644: VecCompGetSubVecs(out,NULL,&outx);
645: n = data->r_e - data->r_s;
647: /* Check auxiliary vectors */
648: if (&data->auxV[2] > data->d->auxV+data->d->size_auxV) SETERRQ(PETSC_COMM_SELF,1, "Insufficient auxiliary vectors");
650: /* out <- theta[1]A*in - theta[0]*B*in */
651: dvd_aux_matmult(data,inx,outx,data->auxV);
653: KSPGetPCSide(data->ksp,&side);
654: if (side == PC_RIGHT) {
655: /* out <- out - v*(u'*out) */
656: dvd_improvex_apply_proj(data->d,(Vec*)outx,n,data->auxS);
657: }
658: return(0);
659: }
663: PetscErrorCode MatMultTranspose_dvd_jd(Mat A,Vec in,Vec out)
664: {
665: PetscErrorCode ierr;
666: dvdImprovex_jd *data;
667: PetscInt n,i;
668: const Vec *inx,*outx,*r,*auxV;
669: PCSide side;
672: MatShellGetContext(A,(void**)&data);
673: VecCompGetSubVecs(in,NULL,&inx);
674: VecCompGetSubVecs(out,NULL,&outx);
675: n = data->r_e - data->r_s;
677: /* Check auxiliary vectors */
678: if (&data->auxV[n+2] > data->d->auxV+data->d->size_auxV) SETERRQ(PETSC_COMM_SELF,1, "Insufficient auxiliary vectors");
680: KSPGetPCSide(data->ksp,&side);
681: if (side == PC_RIGHT) {
682: /* auxV <- in */
683: for (i=0;i<n;i++) {
684: VecCopy(inx[i],data->auxV[i]);
685: }
687: /* auxV <- auxV - v*(u'*auxV) */
688: dvd_improvex_applytrans_proj(data->d,data->auxV,n,data->auxS);
689: r = data->auxV;
690: auxV = data->auxV+n;
691: } else {
692: r = inx;
693: auxV = data->auxV;
694: }
696: /* out <- theta[1]A*r - theta[0]*B*r */
697: dvd_aux_matmulttrans(data,r,outx,auxV);
698: return(0);
699: }
703: PetscErrorCode MatGetVecs_dvd_jd(Mat A,Vec *right,Vec *left)
704: {
705: PetscErrorCode ierr;
706: Vec *r, *l;
707: dvdImprovex_jd *data;
708: PetscInt n, i;
711: MatShellGetContext(A, (void**)&data);
712: n = data->ksp_max_size;
713: if (right) {
714: PetscMalloc(sizeof(Vec)*n, &r);
715: }
716: if (left) {
717: PetscMalloc(sizeof(Vec)*n, &l);
718: }
719: for (i=0; i<n; i++) {
720: MatGetVecs(data->d->A, right?&r[i]:NULL,left?&l[i]:NULL);
721: }
722: if (right) {
723: VecCreateCompWithVecs(r, n, data->friends, right);
724: for (i=0; i<n; i++) {
725: VecDestroy(&r[i]);
726: }
727: }
728: if (left) {
729: VecCreateCompWithVecs(l, n, data->friends, left);
730: for (i=0; i<n; i++) {
731: VecDestroy(&l[i]);
732: }
733: }
735: if (right) {
736: PetscFree(r);
737: }
738: if (left) {
739: PetscFree(l);
740: }
741: return(0);
742: }
746: PetscErrorCode dvd_improvex_jd_proj_uv(dvdDashboard *d,dvdBlackboard *b,ProjType_t p)
747: {
749: /* Setup the step */
750: if (b->state >= DVD_STATE_CONF) {
751: switch (p) {
752: case DVD_PROJ_KXX:
753: d->improvex_jd_proj_uv = dvd_improvex_jd_proj_uv_KXX; break;
754: case DVD_PROJ_KZX:
755: d->improvex_jd_proj_uv = dvd_improvex_jd_proj_uv_KZX; break;
756: }
757: }
758: return(0);
759: }
763: /*
764: Compute: u <- X, v <- K*(theta[0]*A+theta[1]*B)*X,
765: kr <- K^{-1}*(A-eig*B)*X, being X <- V*pX[i_s..i_e-1], Y <- W*pY[i_s..i_e-1]
766: where
767: auxV, 4*(i_e-i_s) auxiliar global vectors
768: pX,pY, the right and left eigenvectors of the projected system
769: ld, the leading dimension of pX and pY
770: auxS: max(8*bs*max_cX_in_proj,size_V*size_V)
771: */
772: PetscErrorCode dvd_improvex_jd_proj_cuv(dvdDashboard *d,PetscInt i_s,PetscInt i_e,Vec **u,Vec **v,Vec *kr,Vec **auxV,PetscScalar **auxS,PetscScalar *theta,PetscScalar *thetai,PetscScalar *pX,PetscScalar *pY,PetscInt ld)
773: {
774: #if defined(PETSC_MISSING_LAPACK_GETRF)
776: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF - Lapack routine is unavailable");
777: #else
778: PetscErrorCode ierr;
779: PetscInt n = i_e - i_s, size_KZ, V_new, rm, i, size_in;
780: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
781: PetscBLASInt s, ldXKZ, info;
782: DvdReduction r;
783: DvdReductionChunk ops[2];
784: DvdMult_copy_func sr[2];
787: /* Check consistency */
788: V_new = d->size_cX - data->size_cX;
789: if (V_new > data->old_size_X) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");
790: data->old_size_X = n;
792: /* KZ <- KZ(rm:rm+max_cX-1) */
793: rm = PetscMax(V_new+data->size_KZ-d->max_cX_in_impr, 0);
794: for (i=0; i<d->max_cX_in_impr; i++) {
795: VecCopy(data->KZ[i+rm], data->KZ[i]);
796: }
797: data->size_cX = d->size_cX;
799: /* XKZ <- XKZ(rm:rm+max_cX-1,rm:rm+max_cX-1) */
800: for (i=0; i<d->max_cX_in_impr; i++) {
801: SlepcDenseCopy(&data->XKZ[i*data->ldXKZ+i], data->ldXKZ, &data->XKZ[(i+rm)*data->ldXKZ+i+rm], data->ldXKZ, data->size_KZ, 1);
802: }
803: data->size_KZ = PetscMin(d->max_cX_in_impr, data->size_KZ+V_new);
805: /* Compute X, KZ and KR */
806: *u = *auxV; *auxV+= n;
807: *v = &data->KZ[data->size_KZ];
808: d->improvex_jd_proj_uv(d, i_s, i_e, *u, *v, kr, *auxV, theta, thetai,
809: pX, pY, ld);
811: /* XKZ <- X'*KZ */
812: size_KZ = data->size_KZ+n;
813: size_in = 2*n*data->size_KZ+n*n;
814: SlepcAllReduceSumBegin(ops,2,*auxS,*auxS+size_in,size_in,&r,PetscObjectComm((PetscObject)d->V[0]));
815: VecsMultS(data->XKZ,0,data->ldXKZ,d->V-data->size_KZ,0,data->size_KZ,data->KZ,data->size_KZ,size_KZ,&r,&sr[0]);
816: VecsMultS(&data->XKZ[data->size_KZ],0,data->ldXKZ,*u,0,n,data->KZ,0,size_KZ,&r,&sr[1]);
817: SlepcAllReduceSumEnd(&r);
819: /* iXKZ <- inv(XKZ) */
820: PetscBLASIntCast(size_KZ,&s);
821: data->ldiXKZ = data->size_iXKZ = size_KZ;
822: data->iXKZ = *auxS; *auxS+= size_KZ*size_KZ;
823: data->iXKZPivots = (PetscBLASInt*)*auxS;
824: *auxS += FromIntToScalar(size_KZ);
825: SlepcDenseCopy(data->iXKZ,data->ldiXKZ,data->XKZ,data->ldXKZ,size_KZ,size_KZ);
826: PetscBLASIntCast(data->ldiXKZ,&ldXKZ);
827: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
828: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&s, &s, data->iXKZ, &ldXKZ, data->iXKZPivots, &info));
829: PetscFPTrapPop();
830: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack XGETRF %d", info);
831: return(0);
832: #endif
833: }
835: #define DVD_COMPLEX_RAYLEIGH_QUOTIENT(ur,ui,Axr,Axi,Bxr,Bxi,eigr,eigi,b,ierr)\
836: { \
837: VecDot((Axr), (ur), &(b)[0]); /* r*A*r */ \
838: VecDot((Axr), (ui), &(b)[1]); /* i*A*r */ \
839: VecDot((Axi), (ur), &(b)[2]); /* r*A*i */ \
840: VecDot((Axi), (ui), &(b)[3]); /* i*A*i */ \
841: VecDot((Bxr), (ur), &(b)[4]); /* r*B*r */ \
842: VecDot((Bxr), (ui), &(b)[5]); /* i*B*r */ \
843: VecDot((Bxi), (ur), &(b)[6]); /* r*B*i */ \
844: VecDot((Bxi), (ui), &(b)[7]); /* i*B*i */ \
845: (b)[0] = (b)[0]+(b)[3]; /* rAr+iAi */ \
846: (b)[2] = (b)[2]-(b)[1]; /* rAi-iAr */ \
847: (b)[4] = (b)[4]+(b)[7]; /* rBr+iBi */ \
848: (b)[6] = (b)[6]-(b)[5]; /* rBi-iBr */ \
849: (b)[7] = (b)[4]*(b)[4] + (b)[6]*(b)[6]; /* k */ \
850: *(eigr) = ((b)[0]*(b)[4] + (b)[2]*(b)[6]) / (b)[7]; /* eig_r */ \
851: *(eigi) = ((b)[2]*(b)[4] - (b)[0]*(b)[6]) / (b)[7]; /* eig_i */ \
852: }
854: #if !defined(PETSC_USE_COMPLEX)
855: #define DVD_COMPUTE_N_RR(eps,i,i_s,n,eigr,eigi,u,Ax,Bx,b,ierr) \
856: for ((i)=0; (i)<(n); (i)++) { \
857: if ((eigi)[(i_s)+(i)] != 0.0) { \
858: /* eig_r = [(rAr+iAi)*(rBr+iBi) + (rAi-iAr)*(rBi-iBr)]/k \
859: eig_i = [(rAi-iAr)*(rBr+iBi) - (rAr+iAi)*(rBi-iBr)]/k \
860: k = (rBr+iBi)*(rBr+iBi) + (rBi-iBr)*(rBi-iBr) */ \
861: DVD_COMPLEX_RAYLEIGH_QUOTIENT((u)[(i)], (u)[(i)+1], (Ax)[(i)], \
862: (Ax)[(i)+1], (Bx)[(i)], (Bx)[(i)+1], &(b)[8], &(b)[9], (b), (ierr)); \
863: if (PetscAbsScalar((eigr)[(i_s)+(i)] - (b)[8])/ \
864: PetscAbsScalar((eigr)[(i_s)+(i)]) > 1e-10 || \
865: PetscAbsScalar((eigi)[(i_s)+(i)] - (b)[9])/ \
866: PetscAbsScalar((eigi)[(i_s)+(i)]) > 1e-10) { \
867: (ierr) = PetscInfo4((eps), "The eigenvalue %G+%G is far from its "\
868: "Rayleigh quotient value %G+%G\n", \
869: (eigr)[(i_s)+(i)], \
870: (eigi)[(i_s)+(i)], (b)[8], (b)[9]); \
871: } \
872: (i)++; \
873: } \
874: }
875: #else
876: #define DVD_COMPUTE_N_RR(eps,i,i_s,n,eigr,eigi,u,Ax,Bx,b,ierr) \
877: for ((i)=0; (i)<(n); (i)++) { \
878: (ierr) = VecDot((Ax)[(i)], (u)[(i)], &(b)[0]); \
879: (ierr) = VecDot((Bx)[(i)], (u)[(i)], &(b)[1]); \
880: (b)[0] = (b)[0]/(b)[1]; \
881: if (PetscAbsScalar((eigr)[(i_s)+(i)] - (b)[0])/ \
882: PetscAbsScalar((eigr)[(i_s)+(i)]) > 1e-10) { \
883: (ierr) = PetscInfo4((eps), "The eigenvalue %G+%G is far from its " \
884: "Rayleigh quotient value %G+%G\n", \
885: PetscRealPart((eigr)[(i_s)+(i)]), \
886: PetscImaginaryPart((eigr)[(i_s)+(i)]), PetscRealPart((b)[0]), \
887: PetscImaginaryPart((b)[0])); \
888: } \
889: }
890: #endif
894: /*
895: Compute: u <- X, v <- K*(theta[0]*A+theta[1]*B)*X,
896: kr <- K^{-1}*(A-eig*B)*X, being X <- V*pX[i_s..i_e-1], Y <- W*pY[i_s..i_e-1]
897: where
898: auxV, 4*(i_e-i_s) auxiliar global vectors
899: pX,pY, the right and left eigenvectors of the projected system
900: ld, the leading dimension of pX and pY
901: */
902: PetscErrorCode dvd_improvex_jd_proj_uv_KZX(dvdDashboard *d,PetscInt i_s,PetscInt i_e,Vec *u,Vec *v,Vec *kr,Vec *auxV,PetscScalar *theta,PetscScalar *thetai,PetscScalar *pX,PetscScalar *pY,PetscInt ld)
903: {
904: PetscErrorCode ierr;
905: PetscInt n = i_e - i_s, i;
906: PetscScalar b[16];
907: Vec *Ax, *Bx, *r=auxV, X[4];
908: /* The memory manager doen't allow to call a subroutines */
909: PetscScalar Z[size_Z];
912: /* u <- X(i) */
913: dvd_improvex_compute_X(d,i_s,i_e,u,pX,ld);
915: /* v <- theta[0]A*u + theta[1]*B*u */
917: /* Bx <- B*X(i) */
918: Bx = kr;
919: if (d->BV) {
920: SlepcUpdateVectorsZ(Bx, 0.0, 1.0, d->BV-d->cX_in_H, d->size_BV+d->cX_in_H, &pX[ld*i_s], ld, d->size_H, n);
921: } else {
922: for (i=0;i<n;i++) {
923: if (d->B) {
924: MatMult(d->B, u[i], Bx[i]);
925: } else {
926: VecCopy(u[i], Bx[i]);
927: }
928: }
929: }
931: /* Ax <- A*X(i) */
932: Ax = r;
933: SlepcUpdateVectorsZ(Ax, 0.0, 1.0, d->AV-d->cX_in_H, d->size_AV+d->cX_in_H, &pX[ld*i_s], ld, d->size_H, n);
935: /* v <- Y(i) */
936: SlepcUpdateVectorsZ(v, 0.0, 1.0, (d->W?d->W:d->V)-d->cX_in_H, d->size_V+d->cX_in_H, &pY[ld*i_s], ld, d->size_H, n);
938: /* Recompute the eigenvalue */
939: DVD_COMPUTE_N_RR(d->eps, i, i_s, n, d->eigr, d->eigi, v, Ax, Bx, b, ierr);
941: for (i=0;i<n;i++) {
942: #if !defined(PETSC_USE_COMPLEX)
943: if (d->eigi[i_s+i] != 0.0) {
944: /* [r_i r_i+1 kr_i kr_i+1]*= [ theta_2i' 0 1 0
945: 0 theta_2i' 0 1
946: theta_2i+1 -thetai_i -eigr_i -eigi_i
947: thetai_i theta_2i+1 eigi_i -eigr_i ] */
948: b[0] = b[5] = PetscConj(theta[2*i]);
949: b[2] = b[7] = -theta[2*i+1];
950: b[6] = -(b[3] = thetai[i]);
951: b[1] = b[4] = 0.0;
952: b[8] = b[13] = 1.0/d->nX[i_s+i];
953: b[10] = b[15] = -d->eigr[i_s+i]/d->nX[i_s+i];
954: b[14] = -(b[11] = d->eigi[i_s+i]/d->nX[i_s+i]);
955: b[9] = b[12] = 0.0;
956: X[0] = Ax[i]; X[1] = Ax[i+1]; X[2] = Bx[i]; X[3] = Bx[i+1];
957: SlepcUpdateVectorsD(X, 4, 1.0, b, 4, 4, 4, Z, size_Z);
958: i++;
959: } else
960: #endif
961: {
962: /* [Ax_i Bx_i]*= [ theta_2i' 1/nX_i
963: theta_2i+1 -eig_i/nX_i ] */
964: b[0] = PetscConj(theta[i*2]);
965: b[1] = theta[i*2+1];
966: b[2] = 1.0/d->nX[i_s+i];
967: b[3] = -d->eigr[i_s+i]/d->nX[i_s+i];
968: X[0] = Ax[i]; X[1] = Bx[i];
969: SlepcUpdateVectorsD(X, 2, 1.0, b, 2, 2, 2, Z, size_Z);
970: }
971: }
972: for (i=0; i<n; i++) d->nX[i_s+i] = 1.0;
974: /* v <- K^{-1} r = K^{-1}(theta_2i'*Ax + theta_2i+1*Bx) */
975: for (i=0;i<n;i++) {
976: d->improvex_precond(d, i_s+i, r[i], v[i]);
977: }
979: /* kr <- P*(Ax - eig_i*Bx) */
980: d->calcpairs_proj_res(d, i_s, i_e, kr);
981: return(0);
982: }
986: /*
987: Compute: u <- K^{-1}*X, v <- X,
988: kr <- K^{-1}*(A-eig*B)*X, being X <- V*pX[i_s..i_e-1]
989: where
990: auxV, 4*(i_e-i_s) auxiliar global vectors
991: pX,pY, the right and left eigenvectors of the projected system
992: ld, the leading dimension of pX and pY
993: */
994: PetscErrorCode dvd_improvex_jd_proj_uv_KXX(dvdDashboard *d,PetscInt i_s,PetscInt i_e,Vec *u,Vec *v,Vec *kr,Vec *auxV,PetscScalar *theta,PetscScalar *thetai,PetscScalar *pX,PetscScalar *pY,PetscInt ld)
995: {
996: PetscErrorCode ierr;
997: PetscInt n = i_e - i_s, i;
998: PetscScalar b[16];
999: Vec *Ax, *Bx, *r = auxV, X[4];
1000: /* The memory manager doen't allow to call a subroutines */
1001: PetscScalar Z[size_Z];
1004: /* [v u] <- X(i) Y(i) */
1005: dvd_improvex_compute_X(d,i_s,i_e,v,pX,ld);
1006: SlepcUpdateVectorsZ(u, 0.0, 1.0, (d->W?d->W:d->V)-d->cX_in_H, d->size_V+d->cX_in_H, &pY[ld*i_s], ld, d->size_H, n);
1008: /* Bx <- B*X(i) */
1009: Bx = r;
1010: if (d->BV) {
1011: SlepcUpdateVectorsZ(Bx, 0.0, 1.0, d->BV-d->cX_in_H, d->size_BV+d->cX_in_H, &pX[ld*i_s], ld, d->size_H, n);
1012: } else {
1013: if (d->B) {
1014: for (i=0;i<n;i++) {
1015: MatMult(d->B, v[i], Bx[i]);
1016: }
1017: } else Bx = v;
1018: }
1020: /* Ax <- A*X(i) */
1021: Ax = kr;
1022: SlepcUpdateVectorsZ(Ax, 0.0, 1.0, d->AV-d->cX_in_H, d->size_AV+d->cX_in_H, &pX[ld*i_s], ld, d->size_H, n);
1024: /* Recompute the eigenvalue */
1025: DVD_COMPUTE_N_RR(d->eps, i, i_s, n, d->eigr, d->eigi, u, Ax, Bx, b, ierr);
1027: for (i=0;i<n;i++) {
1028: if (d->eigi[i_s+i] == 0.0) {
1029: /* kr <- Ax -eig*Bx */
1030: VecAXPBY(kr[i], -d->eigr[i_s+i]/d->nX[i_s+i], 1.0/d->nX[i_s+i], Bx[i]);
1031: } else {
1032: /* [kr_i kr_i+1 r_i r_i+1]*= [ 1 0
1033: 0 1
1034: -eigr_i -eigi_i
1035: eigi_i -eigr_i] */
1036: b[0] = b[5] = 1.0/d->nX[i_s+i];
1037: b[2] = b[7] = -d->eigr[i_s+i]/d->nX[i_s+i];
1038: b[6] = -(b[3] = d->eigi[i_s+i]/d->nX[i_s+i]);
1039: b[1] = b[4] = 0.0;
1040: X[0] = kr[i]; X[1] = kr[i+1]; X[2] = r[i]; X[3] = r[i+1];
1041: SlepcUpdateVectorsD(X, 4, 1.0, b, 4, 4, 2, Z, size_Z);
1042: i++;
1043: }
1044: }
1045: for (i=0; i<n; i++) d->nX[i_s+i] = 1.0;
1047: /* kr <- P*kr */
1048: d->calcpairs_proj_res(d, i_s, i_e, r);
1050: /* u <- K^{-1} X(i) */
1051: for (i=0;i<n;i++) {
1052: d->improvex_precond(d, i_s+i, v[i], u[i]);
1053: }
1054: return(0);
1055: }
1059: PetscErrorCode dvd_improvex_jd_lit_const(dvdDashboard *d,dvdBlackboard *b,PetscInt maxits,PetscReal tol,PetscReal fix)
1060: {
1061: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
1064: /* Setup the step */
1065: if (b->state >= DVD_STATE_CONF) {
1066: data->maxits = maxits;
1067: data->tol = tol;
1068: data->fix = fix;
1069: d->improvex_jd_lit = dvd_improvex_jd_lit_const_0;
1070: }
1071: return(0);
1072: }
1076: PetscErrorCode dvd_improvex_jd_lit_const_0(dvdDashboard *d,PetscInt i,PetscScalar* theta,PetscScalar* thetai,PetscInt *maxits,PetscReal *tol)
1077: {
1078: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
1079: PetscReal a;
1082: a = SlepcAbsEigenvalue(d->eigr[i],d->eigi[i]);
1084: if (d->nR[i]/a < data->fix) {
1085: theta[0] = d->eigr[i];
1086: theta[1] = 1.0;
1087: #if !defined(PETSC_USE_COMPLEX)
1088: *thetai = d->eigi[i];
1089: #endif
1090: } else {
1091: theta[0] = d->target[0];
1092: theta[1] = d->target[1];
1093: #if !defined(PETSC_USE_COMPLEX)
1094: *thetai = 0.0;
1095: #endif
1096: }
1098: #if defined(PETSC_USE_COMPLEX)
1099: if (thetai) *thetai = 0.0;
1100: #endif
1101: *maxits = data->maxits;
1102: *tol = data->tol;
1103: return(0);
1104: }
1106: /**** Patterns implementation *************************************************/
1108: typedef PetscInt (*funcV0_t)(dvdDashboard*,PetscInt,PetscInt,Vec*);
1109: typedef PetscInt (*funcV1_t)(dvdDashboard*,PetscInt,PetscInt,Vec*,PetscScalar*,Vec);
1113: /* Compute D <- K^{-1} * funcV[r_s..r_e] */
1114: PetscErrorCode dvd_improvex_PfuncV(dvdDashboard *d,void *funcV,Vec *D,PetscInt max_size_D,PetscInt r_s,PetscInt r_e,Vec *auxV,PetscScalar *auxS)
1115: {
1116: PetscErrorCode ierr;
1117: PetscInt i;
1120: if (max_size_D >= r_e-r_s+1) {
1121: /* The optimized version needs one vector extra of D */
1122: /* D(1:r.size) = R(r_s:r_e-1) */
1123: if (auxS) ((funcV1_t)funcV)(d, r_s, r_e, D+1, auxS, auxV[0]);
1124: else ((funcV0_t)funcV)(d, r_s, r_e, D+1);
1126: /* D = K^{-1} * R */
1127: for (i=0; i<r_e-r_s; i++) {
1128: d->improvex_precond(d, i+r_s, D[i+1], D[i]);
1129: }
1130: } else if (max_size_D == r_e-r_s) {
1131: /* Non-optimized version */
1132: /* auxV <- R[r_e-1] */
1133: if (auxS) ((funcV1_t)funcV)(d, r_e-1, r_e, auxV, auxS, auxV[1]);
1134: else ((funcV0_t)funcV)(d, r_e-1, r_e, auxV);
1136: /* D(1:r.size-1) = R(r_s:r_e-2) */
1137: if (auxS) ((funcV1_t)funcV)(d, r_s, r_e-1, D+1, auxS, auxV[1]);
1138: else ((funcV0_t)funcV)(d, r_s, r_e-1, D+1);
1140: /* D = K^{-1} * R */
1141: for (i=0; i<r_e-r_s-1; i++) {
1142: d->improvex_precond(d, i+r_s, D[i+1], D[i]);
1143: }
1144: d->improvex_precond(d, r_e-1, auxV[0], D[r_e-r_s-1]);
1145: } else SETERRQ(PETSC_COMM_SELF,1, "Problem: r_e-r_s > max_size_D");
1146: return(0);
1147: }
1151: /* Compute (I - KZ*iXKZ*X')*V where,
1152: V, the vectors to apply the projector,
1153: cV, the number of vectors in V,
1154: auxS, auxiliar vector of size length 3*size_iXKZ*cV
1155: */
1156: PetscErrorCode dvd_improvex_apply_proj(dvdDashboard *d,Vec *V,PetscInt cV,PetscScalar *auxS)
1157: {
1158: #if defined(PETSC_MISSING_LAPACK_GETRS)
1160: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routines are unavailable");
1161: #else
1162: PetscErrorCode ierr;
1163: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
1164: PetscInt size_in = data->size_iXKZ*cV, i, ldh;
1165: PetscScalar *h, *in, *out;
1166: PetscBLASInt cV_, n, info, ld;
1167: DvdReduction r;
1168: DvdReductionChunk ops[4];
1169: DvdMult_copy_func sr[4];
1172: if (cV > 2) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");
1174: /* h <- X'*V */
1175: h = auxS; in = h+size_in; out = in+size_in; ldh = data->size_iXKZ;
1176: SlepcAllReduceSumBegin(ops, 4, in, out, size_in, &r,
1177: PetscObjectComm((PetscObject)d->V[0]));
1178: for (i=0; i<cV; i++) {
1179: VecsMultS(&h[i*ldh],0,ldh,d->V-data->size_KZ,0,data->size_KZ,V+i,0,1,&r,&sr[i*2]);
1180: VecsMultS(&h[i*ldh+data->size_KZ],0,ldh,data->u,0,data->size_iXKZ-data->size_KZ,V+i,0,1,&r,&sr[i*2+1]);
1181: }
1182: SlepcAllReduceSumEnd(&r);
1184: /* h <- iXKZ\h */
1185: PetscBLASIntCast(cV,&cV_);
1186: PetscBLASIntCast(data->size_iXKZ,&n);
1187: PetscBLASIntCast(data->ldiXKZ,&ld);
1189: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1190: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N", &n, &cV_, data->iXKZ, &ld, data->iXKZPivots, h, &n, &info));
1191: PetscFPTrapPop();
1192: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack XGETRS %d", info);
1194: /* V <- V - KZ*h */
1195: for (i=0; i<cV; i++) {
1196: SlepcUpdateVectorsZ(V+i,1.0,-1.0,data->KZ,data->size_iXKZ,&h[ldh*i],ldh,data->size_iXKZ,1);
1197: }
1198: return(0);
1199: #endif
1200: }
1204: /* Compute (I - X*iXKZ*KZ')*V where,
1205: V, the vectors to apply the projector,
1206: cV, the number of vectors in V,
1207: auxS, auxiliar vector of size length 3*size_iXKZ*cV
1208: */
1209: PetscErrorCode dvd_improvex_applytrans_proj(dvdDashboard *d,Vec *V,PetscInt cV,PetscScalar *auxS)
1210: {
1211: #if defined(PETSC_MISSING_LAPACK_GETRS)
1213: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routines are unavailable");
1214: #else
1215: PetscErrorCode ierr;
1216: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
1217: PetscInt size_in = data->size_iXKZ*cV, i, ldh;
1218: PetscScalar *h, *in, *out;
1219: PetscBLASInt cV_, n, info, ld;
1220: DvdReduction r;
1221: DvdReductionChunk ops[2];
1222: DvdMult_copy_func sr[2];
1225: if (cV > 2) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");
1227: /* h <- KZ'*V */
1228: h = auxS; in = h+size_in; out = in+size_in; ldh = data->size_iXKZ;
1229: SlepcAllReduceSumBegin(ops, 2, in, out, size_in, &r,
1230: PetscObjectComm((PetscObject)d->V[0]));
1231: for (i=0; i<cV; i++) {
1232: VecsMultS(&h[i*ldh],0,ldh,data->KZ,0,data->size_KZ,V+i,0,1,&r,&sr[i]);
1233: }
1234: SlepcAllReduceSumEnd(&r);
1236: /* h <- iXKZ\h */
1237: PetscBLASIntCast(cV,&cV_);
1238: PetscBLASIntCast(data->size_iXKZ,&n);
1239: PetscBLASIntCast(data->ldiXKZ,&ld);
1241: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1242: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("C", &n, &cV_, data->iXKZ, &ld, data->iXKZPivots, h, &n, &info));
1243: PetscFPTrapPop();
1244: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack XGETRS %d", info);
1246: /* V <- V - X*h */
1247: for (i=0; i<cV; i++) {
1248: SlepcUpdateVectorsZ(V+i,1.0,-1.0,d->V-data->size_KZ,data->size_KZ,&h[ldh*i],ldh,data->size_KZ,1);
1249: SlepcUpdateVectorsZ(V+i,1.0,-1.0,data->u,data->size_iXKZ-data->size_KZ,&h[ldh*i+data->size_KZ],ldh,data->size_iXKZ-data->size_KZ,1);
1250: }
1251: return(0);
1252: #endif
1253: }