Annotation of sys/dev/raidframe/rf_dagfuncs.c, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: rf_dagfuncs.c,v 1.7 2004/09/20 17:51:07 miod Exp $ */
2: /* $NetBSD: rf_dagfuncs.c,v 1.6 2000/03/30 12:45:40 augustss Exp $ */
3:
4: /*
5: * Copyright (c) 1995 Carnegie-Mellon University.
6: * All rights reserved.
7: *
8: * Author: Mark Holland, William V. Courtright II
9: *
10: * Permission to use, copy, modify and distribute this software and
11: * its documentation is hereby granted, provided that both the copyright
12: * notice and this permission notice appear in all copies of the
13: * software, derivative works or modified versions, and any portions
14: * thereof, and that both notices appear in supporting documentation.
15: *
16: * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
17: * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
18: * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
19: *
20: * Carnegie Mellon requests users of this software to return to
21: *
22: * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
23: * School of Computer Science
24: * Carnegie Mellon University
25: * Pittsburgh PA 15213-3890
26: *
27: * any improvements or extensions that they make and grant Carnegie the
28: * rights to redistribute these changes.
29: */
30:
31: /*
32: * dagfuncs.c -- DAG node execution routines.
33: *
34: * Rules:
35: * 1. Every DAG execution function must eventually cause node->status to
36: * get set to "good" or "bad", and "FinishNode" to be called. In the
37: * case of nodes that complete immediately (xor, NullNodeFunc, etc),
38: * the node execution function can do these two things directly. In
39: * the case of nodes that have to wait for some event (a disk read to
40: * complete, a lock to be released, etc) to occur before they can
41: * complete, this is typically achieved by having whatever module
42: * is doing the operation call GenericWakeupFunc upon completion.
43: * 2. DAG execution functions should check the status in the DAG header
44: * and NOP out their operations if the status is not "enable". However,
45: * execution functions that release resources must be sure to release
46: * them even when they NOP out the function that would use them.
47: * Functions that acquire resources should go ahead and acquire them
48: * even when they NOP, so that a downstream release node will not have
49: * to check to find out whether or not the acquire was suppressed.
50: */
51:
52: #include <sys/ioctl.h>
53: #include <sys/param.h>
54:
55: #include "rf_archs.h"
56: #include "rf_raid.h"
57: #include "rf_dag.h"
58: #include "rf_layout.h"
59: #include "rf_etimer.h"
60: #include "rf_acctrace.h"
61: #include "rf_diskqueue.h"
62: #include "rf_dagfuncs.h"
63: #include "rf_general.h"
64: #include "rf_engine.h"
65: #include "rf_dagutils.h"
66:
67: #include "rf_kintf.h"
68:
69: #if RF_INCLUDE_PARITYLOGGING > 0
70: #include "rf_paritylog.h"
71: #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
72:
73: int (*rf_DiskReadFunc) (RF_DagNode_t *);
74: int (*rf_DiskWriteFunc) (RF_DagNode_t *);
75: int (*rf_DiskReadUndoFunc) (RF_DagNode_t *);
76: int (*rf_DiskWriteUndoFunc) (RF_DagNode_t *);
77: int (*rf_DiskUnlockFunc) (RF_DagNode_t *);
78: int (*rf_DiskUnlockUndoFunc) (RF_DagNode_t *);
79: int (*rf_RegularXorUndoFunc) (RF_DagNode_t *);
80: int (*rf_SimpleXorUndoFunc) (RF_DagNode_t *);
81: int (*rf_RecoveryXorUndoFunc) (RF_DagNode_t *);
82:
83: /*****************************************************************************
84: * Main (only) configuration routine for this module.
85: *****************************************************************************/
86: int
87: rf_ConfigureDAGFuncs(RF_ShutdownList_t **listp)
88: {
89: RF_ASSERT(((sizeof(long) == 8) && RF_LONGSHIFT == 3) ||
90: ((sizeof(long) == 4) && RF_LONGSHIFT == 2));
91: rf_DiskReadFunc = rf_DiskReadFuncForThreads;
92: rf_DiskReadUndoFunc = rf_DiskUndoFunc;
93: rf_DiskWriteFunc = rf_DiskWriteFuncForThreads;
94: rf_DiskWriteUndoFunc = rf_DiskUndoFunc;
95: rf_DiskUnlockFunc = rf_DiskUnlockFuncForThreads;
96: rf_DiskUnlockUndoFunc = rf_NullNodeUndoFunc;
97: rf_RegularXorUndoFunc = rf_NullNodeUndoFunc;
98: rf_SimpleXorUndoFunc = rf_NullNodeUndoFunc;
99: rf_RecoveryXorUndoFunc = rf_NullNodeUndoFunc;
100: return (0);
101: }
102:
103:
104: /*****************************************************************************
105: * The execution function associated with a terminate node.
106: *****************************************************************************/
107: int
108: rf_TerminateFunc(RF_DagNode_t *node)
109: {
110: RF_ASSERT(node->dagHdr->numCommits == node->dagHdr->numCommitNodes);
111: node->status = rf_good;
112: return (rf_FinishNode(node, RF_THREAD_CONTEXT));
113: }
114:
115: int
116: rf_TerminateUndoFunc(RF_DagNode_t *node)
117: {
118: return (0);
119: }
120:
121:
122: /*****************************************************************************
123: * Execution functions associated with a mirror node.
124: *
125: * parameters:
126: *
127: * 0 - Physical disk address of data.
128: * 1 - Buffer for holding read data.
129: * 2 - Parity stripe ID.
130: * 3 - Flags.
131: * 4 - Physical disk address of mirror (parity).
132: *
133: *****************************************************************************/
134:
135: int
136: rf_DiskReadMirrorIdleFunc(RF_DagNode_t *node)
137: {
138: /*
139: * Select the mirror copy with the shortest queue and fill in node
140: * parameters with physical disk address.
141: */
142:
143: rf_SelectMirrorDiskIdle(node);
144: return (rf_DiskReadFunc(node));
145: }
146:
147: int
148: rf_DiskReadMirrorPartitionFunc(RF_DagNode_t *node)
149: {
150: /*
151: * Select the mirror copy with the shortest queue and fill in node
152: * parameters with physical disk address.
153: */
154:
155: rf_SelectMirrorDiskPartition(node);
156: return (rf_DiskReadFunc(node));
157: }
158:
159: int
160: rf_DiskReadMirrorUndoFunc(RF_DagNode_t *node)
161: {
162: return (0);
163: }
164:
165:
166:
167: #if RF_INCLUDE_PARITYLOGGING > 0
168: /*****************************************************************************
169: * The execution function associated with a parity log update node.
170: *****************************************************************************/
171: int
172: rf_ParityLogUpdateFunc(RF_DagNode_t *node)
173: {
174: RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
175: caddr_t buf = (caddr_t) node->params[1].p;
176: RF_ParityLogData_t *logData;
177: RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
178: RF_Etimer_t timer;
179:
180: if (node->dagHdr->status == rf_enable) {
181: RF_ETIMER_START(timer);
182: logData = rf_CreateParityLogData(RF_UPDATE, pda, buf,
183: (RF_Raid_t *) (node->dagHdr->raidPtr),
184: node->wakeFunc, (void *) node,
185: node->dagHdr->tracerec, timer);
186: if (logData)
187: rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE);
188: else {
189: RF_ETIMER_STOP(timer);
190: RF_ETIMER_EVAL(timer);
191: tracerec->plog_us += RF_ETIMER_VAL_US(timer);
192: (node->wakeFunc) (node, ENOMEM);
193: }
194: }
195: return (0);
196: }
197:
198:
199: /*****************************************************************************
200: * The execution function associated with a parity log overwrite node.
201: *****************************************************************************/
202: int
203: rf_ParityLogOverwriteFunc(RF_DagNode_t *node)
204: {
205: RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
206: caddr_t buf = (caddr_t) node->params[1].p;
207: RF_ParityLogData_t *logData;
208: RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
209: RF_Etimer_t timer;
210:
211: if (node->dagHdr->status == rf_enable) {
212: RF_ETIMER_START(timer);
213: logData = rf_CreateParityLogData(RF_OVERWRITE, pda, buf,
214: (RF_Raid_t *) (node->dagHdr->raidPtr), node->wakeFunc,
215: (void *) node, node->dagHdr->tracerec, timer);
216: if (logData)
217: rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE);
218: else {
219: RF_ETIMER_STOP(timer);
220: RF_ETIMER_EVAL(timer);
221: tracerec->plog_us += RF_ETIMER_VAL_US(timer);
222: (node->wakeFunc) (node, ENOMEM);
223: }
224: }
225: return (0);
226: }
227: #else /* RF_INCLUDE_PARITYLOGGING > 0 */
228:
229: int
230: rf_ParityLogUpdateFunc(RF_DagNode_t *node)
231: {
232: return (0);
233: }
234:
235: int
236: rf_ParityLogOverwriteFunc(RF_DagNode_t *node)
237: {
238: return (0);
239: }
240: #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
241:
242: int
243: rf_ParityLogUpdateUndoFunc(RF_DagNode_t *node)
244: {
245: return (0);
246: }
247:
248: int
249: rf_ParityLogOverwriteUndoFunc(RF_DagNode_t *node)
250: {
251: return (0);
252: }
253:
254: /*****************************************************************************
255: * The execution function associated with a NOP node.
256: *****************************************************************************/
257: int
258: rf_NullNodeFunc(RF_DagNode_t *node)
259: {
260: node->status = rf_good;
261: return (rf_FinishNode(node, RF_THREAD_CONTEXT));
262: }
263:
264: int
265: rf_NullNodeUndoFunc(RF_DagNode_t *node)
266: {
267: node->status = rf_undone;
268: return (rf_FinishNode(node, RF_THREAD_CONTEXT));
269: }
270:
271:
272: /*****************************************************************************
273: * The execution function associated with a disk-read node.
274: *****************************************************************************/
275: int
276: rf_DiskReadFuncForThreads(RF_DagNode_t *node)
277: {
278: RF_DiskQueueData_t *req;
279: RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
280: caddr_t buf = (caddr_t) node->params[1].p;
281: RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v;
282: unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v);
283: unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v);
284: unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v);
285: unsigned which_ru = RF_EXTRACT_RU(node->params[3].v);
286: RF_DiskQueueDataFlags_t flags = 0;
287: RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ?
288: RF_IO_TYPE_READ : RF_IO_TYPE_NOP;
289: RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;
290: void *b_proc = NULL;
291:
292: if (node->dagHdr->bp)
293: b_proc = (void *) ((struct buf *) node->dagHdr->bp)->b_proc;
294:
295: RF_ASSERT(!(lock && unlock));
296: flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0;
297: flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0;
298:
299: req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector,
300: buf, parityStripeID, which_ru,
301: (int (*) (void *, int)) node->wakeFunc,
302: node, NULL, node->dagHdr->tracerec,
303: (void *) (node->dagHdr->raidPtr), flags, b_proc);
304: if (!req) {
305: (node->wakeFunc) (node, ENOMEM);
306: } else {
307: node->dagFuncData = (void *) req;
308: rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, priority);
309: }
310: return (0);
311: }
312:
313:
314: /*****************************************************************************
315: * the execution function associated with a disk-write node
316: *****************************************************************************/
317: int
318: rf_DiskWriteFuncForThreads(RF_DagNode_t *node)
319: {
320: RF_DiskQueueData_t *req;
321: RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
322: caddr_t buf = (caddr_t) node->params[1].p;
323: RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v;
324: unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v);
325: unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v);
326: unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v);
327: unsigned which_ru = RF_EXTRACT_RU(node->params[3].v);
328: RF_DiskQueueDataFlags_t flags = 0;
329: RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ?
330: RF_IO_TYPE_WRITE : RF_IO_TYPE_NOP;
331: RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;
332: void *b_proc = NULL;
333:
334: if (node->dagHdr->bp)
335: b_proc = (void *) ((struct buf *) node->dagHdr->bp)->b_proc;
336:
337: /* Normal processing (rollaway or forward recovery) begins here. */
338: RF_ASSERT(!(lock && unlock));
339: flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0;
340: flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0;
341: req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector,
342: buf, parityStripeID, which_ru,
343: (int (*) (void *, int)) node->wakeFunc, (void *) node, NULL,
344: node->dagHdr->tracerec, (void *) (node->dagHdr->raidPtr),
345: flags, b_proc);
346:
347: if (!req) {
348: (node->wakeFunc) (node, ENOMEM);
349: } else {
350: node->dagFuncData = (void *) req;
351: rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, priority);
352: }
353:
354: return (0);
355: }
356: /*****************************************************************************
357: * The undo function for disk nodes.
358: * Note: This is not a proper undo of a write node, only locks are released.
359: * old data is not restored to disk !
360: *****************************************************************************/
361: int
362: rf_DiskUndoFunc(RF_DagNode_t *node)
363: {
364: RF_DiskQueueData_t *req;
365: RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
366: RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;
367:
368: req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP, 0L, 0, NULL, 0L, 0,
369: (int (*) (void *, int)) node->wakeFunc, (void *) node,
370: NULL, node->dagHdr->tracerec, (void *) (node->dagHdr->raidPtr),
371: RF_UNLOCK_DISK_QUEUE, NULL);
372: if (!req)
373: (node->wakeFunc) (node, ENOMEM);
374: else {
375: node->dagFuncData = (void *) req;
376: rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req,
377: RF_IO_NORMAL_PRIORITY);
378: }
379:
380: return (0);
381: }
382:
383: /*****************************************************************************
384: * The execution function associated with an "unlock disk queue" node.
385: *****************************************************************************/
386: int
387: rf_DiskUnlockFuncForThreads(RF_DagNode_t *node)
388: {
389: RF_DiskQueueData_t *req;
390: RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
391: RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;
392:
393: req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP, 0L, 0, NULL, 0L, 0,
394: (int (*) (void *, int)) node->wakeFunc, (void *) node,
395: NULL, node->dagHdr->tracerec, (void *) (node->dagHdr->raidPtr),
396: RF_UNLOCK_DISK_QUEUE, NULL);
397: if (!req)
398: (node->wakeFunc) (node, ENOMEM);
399: else {
400: node->dagFuncData = (void *) req;
401: rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req,
402: RF_IO_NORMAL_PRIORITY);
403: }
404:
405: return (0);
406: }
407:
408: /*****************************************************************************
409: * Callback routine for DiskRead and DiskWrite nodes. When the disk op
410: * completes, the routine is called to set the node status and inform
411: * the execution engine that the node has fired.
412: *****************************************************************************/
413: int
414: rf_GenericWakeupFunc(RF_DagNode_t *node, int status)
415: {
416: switch (node->status) {
417: case rf_bwd1:
418: node->status = rf_bwd2;
419: if (node->dagFuncData)
420: rf_FreeDiskQueueData((RF_DiskQueueData_t *)
421: node->dagFuncData);
422: return (rf_DiskWriteFuncForThreads(node));
423: break;
424: case rf_fired:
425: if (status)
426: node->status = rf_bad;
427: else
428: node->status = rf_good;
429: break;
430: case rf_recover:
431: /* Probably should never reach this case. */
432: if (status)
433: node->status = rf_panic;
434: else
435: node->status = rf_undone;
436: break;
437: default:
438: printf("rf_GenericWakeupFunc:");
439: printf("node->status is %d,", node->status);
440: printf("status is %d \n", status);
441: RF_PANIC();
442: break;
443: }
444: if (node->dagFuncData)
445: rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData);
446: return (rf_FinishNode(node, RF_INTR_CONTEXT));
447: }
448:
449:
450: /*****************************************************************************
451: * There are three distinct types of xor nodes.
452: *
453: * A "regular xor" is used in the fault-free case where the access spans
454: * a complete stripe unit. It assumes that the result buffer is one full
455: * stripe unit in size, and uses the stripe-unit-offset values that it
456: * computes from the PDAs to determine where within the stripe unit to
457: * XOR each argument buffer.
458: *
459: * A "simple xor" is used in the fault-free case where the access touches
460: * only a portion of one (or two, in some cases) stripe unit(s). It assumes
461: * that all the argument buffers are of the same size and have the same
462: * stripe unit offset.
463: *
464: * A "recovery xor" is used in the degraded-mode case. It's similar to
465: * the regular xor function except that it takes the failed PDA as an
466: * additional parameter, and uses it to determine what portions of the
467: * argument buffers need to be xor'd into the result buffer, and where
468: * in the result buffer they should go.
469: *****************************************************************************/
470:
471: /*
472: * Xor the params together and store the result in the result field.
473: * Assume the result field points to a buffer that is the size of one SU,
474: * and use the pda params to determine where within the buffer to XOR
475: * the input buffers.
476: */
477: int
478: rf_RegularXorFunc(RF_DagNode_t *node)
479: {
480: RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
481: RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
482: RF_Etimer_t timer;
483: int i, retcode;
484:
485: retcode = 0;
486: if (node->dagHdr->status == rf_enable) {
487: /* Don't do the XOR if the input is the same as the output. */
488: RF_ETIMER_START(timer);
489: for (i = 0; i < node->numParams - 1; i += 2)
490: if (node->params[i + 1].p != node->results[0]) {
491: retcode = rf_XorIntoBuffer(raidPtr,
492: (RF_PhysDiskAddr_t *) node->params[i].p,
493: (char *) node->params[i + 1].p,
494: (char *) node->results[0],
495: node->dagHdr->bp);
496: }
497: RF_ETIMER_STOP(timer);
498: RF_ETIMER_EVAL(timer);
499: tracerec->xor_us += RF_ETIMER_VAL_US(timer);
500: }
501: /* Call wake func explicitly since no I/O in this node. */
502: return (rf_GenericWakeupFunc(node, retcode));
503: }
504:
505: /* Xor the inputs into the result buffer, ignoring placement issues. */
506: int
507: rf_SimpleXorFunc(RF_DagNode_t *node)
508: {
509: RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
510: int i, retcode = 0;
511: RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
512: RF_Etimer_t timer;
513:
514: if (node->dagHdr->status == rf_enable) {
515: RF_ETIMER_START(timer);
516: /* Don't do the XOR if the input is the same as the output. */
517: for (i = 0; i < node->numParams - 1; i += 2)
518: if (node->params[i + 1].p != node->results[0]) {
519: retcode = rf_bxor((char *)
520: node->params[i + 1].p,
521: (char *) node->results[0],
522: rf_RaidAddressToByte(raidPtr,
523: ((RF_PhysDiskAddr_t *)
524: node->params[i].p)->numSector),
525: (struct buf *) node->dagHdr->bp);
526: }
527: RF_ETIMER_STOP(timer);
528: RF_ETIMER_EVAL(timer);
529: tracerec->xor_us += RF_ETIMER_VAL_US(timer);
530: }
531: /* Call wake func explicitly since no I/O in this node. */
532: return (rf_GenericWakeupFunc(node, retcode));
533: }
534:
535: /*
536: * This xor is used by the degraded-mode dag functions to recover lost data.
537: * The second-to-last parameter is the PDA for the failed portion of the access.
538: * The code here looks at this PDA and assumes that the xor target buffer is
539: * equal in size to the number of sectors in the failed PDA. It then uses
540: * the other PDAs in the parameter list to determine where within the target
541: * buffer the corresponding data should be xored.
542: */
543: int
544: rf_RecoveryXorFunc(RF_DagNode_t *node)
545: {
546: RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
547: RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
548: RF_PhysDiskAddr_t *failedPDA =
549: (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
550: int i, retcode = 0;
551: RF_PhysDiskAddr_t *pda;
552: int suoffset, failedSUOffset =
553: rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
554: char *srcbuf, *destbuf;
555: RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
556: RF_Etimer_t timer;
557:
558: if (node->dagHdr->status == rf_enable) {
559: RF_ETIMER_START(timer);
560: for (i = 0; i < node->numParams - 2; i += 2)
561: if (node->params[i + 1].p != node->results[0]) {
562: pda = (RF_PhysDiskAddr_t *) node->params[i].p;
563: srcbuf = (char *) node->params[i + 1].p;
564: suoffset = rf_StripeUnitOffset(layoutPtr,
565: pda->startSector);
566: destbuf = ((char *) node->results[0]) +
567: rf_RaidAddressToByte(raidPtr,
568: suoffset - failedSUOffset);
569: retcode = rf_bxor(srcbuf, destbuf,
570: rf_RaidAddressToByte(raidPtr,
571: pda->numSector), node->dagHdr->bp);
572: }
573: RF_ETIMER_STOP(timer);
574: RF_ETIMER_EVAL(timer);
575: tracerec->xor_us += RF_ETIMER_VAL_US(timer);
576: }
577: return (rf_GenericWakeupFunc(node, retcode));
578: }
579:
580:
581: /*****************************************************************************
582: * The next three functions are utilities used by the above xor-execution
583: * functions.
584: *****************************************************************************/
585:
586: /*
587: * This is just a glorified buffer xor. Targbuf points to a buffer that is
588: * one full stripe unit in size. srcbuf points to a buffer that may be less
589: * than 1 SU, but never more. When the access described by pda is one SU in
590: * size (which by implication means it's SU-aligned), all that happens is
591: * (targbuf) <- (srcbuf ^ targbuf). When the access is less than one SU in
592: * size the XOR occurs on only the portion of targbuf identified in the pda.
593: */
594:
595: int
596: rf_XorIntoBuffer(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda, char *srcbuf,
597: char *targbuf, void *bp)
598: {
599: char *targptr;
600: int sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
601: int SUOffset = pda->startSector % sectPerSU;
602: int length, retcode = 0;
603:
604: RF_ASSERT(pda->numSector <= sectPerSU);
605:
606: targptr = targbuf + rf_RaidAddressToByte(raidPtr, SUOffset);
607: length = rf_RaidAddressToByte(raidPtr, pda->numSector);
608: retcode = rf_bxor(srcbuf, targptr, length, bp);
609: return (retcode);
610: }
611:
612: /*
613: * It really should be the case that the buffer pointers (returned by malloc)
614: * are aligned to the natural word size of the machine, so this is the only
615: * case we optimize for. The length should always be a multiple of the sector
616: * size, so there should be no problem with leftover bytes at the end.
617: */
618: int
619: rf_bxor(char *src, char *dest, int len, void *bp)
620: {
621: unsigned mask = sizeof(long) - 1, retcode = 0;
622:
623: if (!(((unsigned long) src) & mask) &&
624: !(((unsigned long) dest) & mask) && !(len & mask)) {
625: retcode = rf_longword_bxor((unsigned long *) src,
626: (unsigned long *) dest, len >> RF_LONGSHIFT, bp);
627: } else {
628: RF_ASSERT(0);
629: }
630: return (retcode);
631: }
632:
633: /* Map a user buffer into kernel space, if necessary. */
634: #define REMAP_VA(_bp,x,y) (y) = (x)
635:
636: /*
637: * When XORing in kernel mode, we need to map each user page to kernel
638: * space before we can access it.
639: * We don't want to assume anything about which input buffers are in
640: * kernel/user space, nor about their alignment, so in each loop we
641: * compute the maximum number of bytes that we can xor without crossing
642: * any page boundaries, and do only this many bytes before the next remap.
643: */
644: int
645: rf_longword_bxor(unsigned long *src, unsigned long *dest, int len, void *bp)
646: {
647: unsigned long *end = src + len; /* len in longwords. */
648: unsigned long d0, d1, d2, d3, s0, s1, s2, s3; /* temps */
649: unsigned long *pg_src, *pg_dest; /* Per-page source/dest pointers. */
650: int longs_this_time; /* # longwords to xor in the current iteration. */
651:
652: REMAP_VA(bp, src, pg_src);
653: REMAP_VA(bp, dest, pg_dest);
654: if (!pg_src || !pg_dest)
655: return (EFAULT);
656:
657: while (len >= 4) {
658: longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(pg_src),
659: RF_BLIP(pg_dest)) >> RF_LONGSHIFT);
660: src += longs_this_time;
661: dest += longs_this_time;
662: len -= longs_this_time;
663: while (longs_this_time >= 4) {
664: d0 = pg_dest[0];
665: d1 = pg_dest[1];
666: d2 = pg_dest[2];
667: d3 = pg_dest[3];
668: s0 = pg_src[0];
669: s1 = pg_src[1];
670: s2 = pg_src[2];
671: s3 = pg_src[3];
672: pg_dest[0] = d0 ^ s0;
673: pg_dest[1] = d1 ^ s1;
674: pg_dest[2] = d2 ^ s2;
675: pg_dest[3] = d3 ^ s3;
676: pg_src += 4;
677: pg_dest += 4;
678: longs_this_time -= 4;
679: }
680: while (longs_this_time > 0) {
681: /* Cannot cross any page boundaries here. */
682: *pg_dest++ ^= *pg_src++;
683: longs_this_time--;
684: }
685:
686: /*
687: * Either we're done, or we've reached a page boundary on one
688: * (or possibly both) of the pointers.
689: */
690: if (len) {
691: if (RF_PAGE_ALIGNED(src))
692: REMAP_VA(bp, src, pg_src);
693: if (RF_PAGE_ALIGNED(dest))
694: REMAP_VA(bp, dest, pg_dest);
695: if (!pg_src || !pg_dest)
696: return (EFAULT);
697: }
698: }
699: while (src < end) {
700: *pg_dest++ ^= *pg_src++;
701: src++;
702: dest++;
703: len--;
704: if (RF_PAGE_ALIGNED(src))
705: REMAP_VA(bp, src, pg_src);
706: if (RF_PAGE_ALIGNED(dest))
707: REMAP_VA(bp, dest, pg_dest);
708: }
709: RF_ASSERT(len == 0);
710: return (0);
711: }
712:
713:
714: /*
715: * dst = a ^ b ^ c;
716: * a may equal dst
717: * see comment above longword_bxor
718: */
719: int
720: rf_longword_bxor3(unsigned long *dst, unsigned long *a, unsigned long *b,
721: unsigned long *c, int len, void *bp)
722: {
723: unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
724: /* Per-page source/dest pointers. */
725: unsigned long *pg_a, *pg_b, *pg_c, *pg_dst;
726: int longs_this_time; /* # longs to xor in the current iteration */
727: char dst_is_a = 0;
728:
729: /* Note: The length (len) is in longwords. */
730:
731: REMAP_VA(bp, a, pg_a);
732: REMAP_VA(bp, b, pg_b);
733: REMAP_VA(bp, c, pg_c);
734: if (a == dst) {
735: pg_dst = pg_a;
736: dst_is_a = 1;
737: } else {
738: REMAP_VA(bp, dst, pg_dst);
739: }
740:
741: /* Align dest to cache line. Can't cross a pg boundary on dst here. */
742: while ((((unsigned long) pg_dst) & 0x1f)) {
743: *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++;
744: dst++;
745: a++;
746: b++;
747: c++;
748: if (RF_PAGE_ALIGNED(a)) {
749: REMAP_VA(bp, a, pg_a);
750: if (!pg_a)
751: return (EFAULT);
752: }
753: if (RF_PAGE_ALIGNED(b)) {
754: REMAP_VA(bp, a, pg_b);
755: if (!pg_b)
756: return (EFAULT);
757: }
758: if (RF_PAGE_ALIGNED(c)) {
759: REMAP_VA(bp, a, pg_c);
760: if (!pg_c)
761: return (EFAULT);
762: }
763: len--;
764: }
765:
766: while (len > 4) {
767: longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(a),
768: RF_MIN(RF_BLIP(b), RF_MIN(RF_BLIP(c), RF_BLIP(dst)))) >>
769: RF_LONGSHIFT);
770: a += longs_this_time;
771: b += longs_this_time;
772: c += longs_this_time;
773: dst += longs_this_time;
774: len -= longs_this_time;
775: while (longs_this_time >= 4) {
776: a0 = pg_a[0];
777: longs_this_time -= 4;
778:
779: a1 = pg_a[1];
780: a2 = pg_a[2];
781:
782: a3 = pg_a[3];
783: pg_a += 4;
784:
785: b0 = pg_b[0];
786: b1 = pg_b[1];
787:
788: b2 = pg_b[2];
789: b3 = pg_b[3];
790: /* Start dual issue. */
791: a0 ^= b0;
792: b0 = pg_c[0];
793:
794: pg_b += 4;
795: a1 ^= b1;
796:
797: a2 ^= b2;
798: a3 ^= b3;
799:
800: b1 = pg_c[1];
801: a0 ^= b0;
802:
803: b2 = pg_c[2];
804: a1 ^= b1;
805:
806: b3 = pg_c[3];
807: a2 ^= b2;
808:
809: pg_dst[0] = a0;
810: a3 ^= b3;
811: pg_dst[1] = a1;
812: pg_c += 4;
813: pg_dst[2] = a2;
814: pg_dst[3] = a3;
815: pg_dst += 4;
816: }
817: while (longs_this_time > 0) {
818: /* Cannot cross any page boundaries here. */
819: *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++;
820: longs_this_time--;
821: }
822:
823: if (len) {
824: if (RF_PAGE_ALIGNED(a)) {
825: REMAP_VA(bp, a, pg_a);
826: if (!pg_a)
827: return (EFAULT);
828: if (dst_is_a)
829: pg_dst = pg_a;
830: }
831: if (RF_PAGE_ALIGNED(b)) {
832: REMAP_VA(bp, b, pg_b);
833: if (!pg_b)
834: return (EFAULT);
835: }
836: if (RF_PAGE_ALIGNED(c)) {
837: REMAP_VA(bp, c, pg_c);
838: if (!pg_c)
839: return (EFAULT);
840: }
841: if (!dst_is_a)
842: if (RF_PAGE_ALIGNED(dst)) {
843: REMAP_VA(bp, dst, pg_dst);
844: if (!pg_dst)
845: return (EFAULT);
846: }
847: }
848: }
849: while (len) {
850: *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++;
851: dst++;
852: a++;
853: b++;
854: c++;
855: if (RF_PAGE_ALIGNED(a)) {
856: REMAP_VA(bp, a, pg_a);
857: if (!pg_a)
858: return (EFAULT);
859: if (dst_is_a)
860: pg_dst = pg_a;
861: }
862: if (RF_PAGE_ALIGNED(b)) {
863: REMAP_VA(bp, b, pg_b);
864: if (!pg_b)
865: return (EFAULT);
866: }
867: if (RF_PAGE_ALIGNED(c)) {
868: REMAP_VA(bp, c, pg_c);
869: if (!pg_c)
870: return (EFAULT);
871: }
872: if (!dst_is_a)
873: if (RF_PAGE_ALIGNED(dst)) {
874: REMAP_VA(bp, dst, pg_dst);
875: if (!pg_dst)
876: return (EFAULT);
877: }
878: len--;
879: }
880: return (0);
881: }
882:
883: int
884: rf_bxor3(unsigned char *dst, unsigned char *a, unsigned char *b,
885: unsigned char *c, unsigned long len, void *bp)
886: {
887: RF_ASSERT(((RF_UL(dst) | RF_UL(a) | RF_UL(b) | RF_UL(c) | len) & 0x7)
888: == 0);
889:
890: return (rf_longword_bxor3((unsigned long *) dst, (unsigned long *) a,
891: (unsigned long *) b, (unsigned long *) c, len >> RF_LONGSHIFT,
892: bp));
893: }
CVSweb