File: [local] / sys / dev / raidframe / rf_evenodd_dagfuncs.c (download)
Revision 1.1.1.1 (vendor branch), Tue Mar 4 16:09:47 2008 UTC (16 years, 4 months ago) by nbrk
Branch: OPENBSD_4_2_BASE, MAIN
CVS Tags: jornada-partial-support-wip, HEAD Changes since 1.1: +0 -0 lines
Import of OpenBSD 4.2 release kernel tree with initial code to support
Jornada 720/728, StrongARM 1110-based handheld PC.
At this point kernel roots on NFS and boots into vfs_mountroot() and traps.
What is supported:
- glass console, Jornada framebuffer (jfb) works in 16bpp direct color mode
(needs some palette tweaks for non black/white/blue colors, i think)
- saic, SA11x0 interrupt controller (needs cleanup)
- sacom, SA11x0 UART (supported only as boot console for now)
- SA11x0 GPIO controller fully supported (but can't handle multiple interrupt
handlers on one gpio pin)
- sassp, SSP port on SA11x0 that attaches spibus
- Jornada microcontroller (jmcu) to control kbd, battery, etc throught
the SPI bus (wskbd attaches on jmcu, but not tested)
- tod functions seem work
- initial code for SA-1111 (chip companion) : this is TODO
Next important steps, i think:
- gpio and intc on sa1111
- pcmcia support for sa11x0 (and sa1111 help logic)
- REAL root on nfs when we have PCMCIA support (we may use any of supported pccard NICs)
- root on wd0! (using already supported PCMCIA-ATA)
|
/* $OpenBSD: rf_evenodd_dagfuncs.c,v 1.7 2002/12/16 07:01:04 tdeval Exp $ */
/* $NetBSD: rf_evenodd_dagfuncs.c,v 1.6 2000/03/30 12:45:40 augustss Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: ChangMing Wu
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* Code for RAID-EVENODD architecture.
*/
#include "rf_types.h"
#include "rf_raid.h"
#include "rf_dag.h"
#include "rf_dagffrd.h"
#include "rf_dagffwr.h"
#include "rf_dagdegrd.h"
#include "rf_dagdegwr.h"
#include "rf_dagutils.h"
#include "rf_dagfuncs.h"
#include "rf_etimer.h"
#include "rf_general.h"
#include "rf_configure.h"
#include "rf_parityscan.h"
#include "rf_evenodd.h"
#include "rf_evenodd_dagfuncs.h"
/* These redundant functions are for small write. */
RF_RedFuncs_t rf_EOSmallWritePFuncs = {
rf_RegularXorFunc, "Regular Old-New P",
rf_SimpleXorFunc, "Simple Old-New P"
};
RF_RedFuncs_t rf_EOSmallWriteEFuncs = {
rf_RegularONEFunc, "Regular Old-New E",
rf_SimpleONEFunc, "Regular Old-New E"
};
/* These redundant functions are for degraded read. */
RF_RedFuncs_t rf_eoPRecoveryFuncs = {
rf_RecoveryXorFunc, "Recovery Xr",
rf_RecoveryXorFunc, "Recovery Xr"
};
RF_RedFuncs_t rf_eoERecoveryFuncs = {
rf_RecoveryEFunc, "Recovery E Func",
rf_RecoveryEFunc, "Recovery E Func"
};
/*****************************************************************************
* The following encoding node functions is used in
* EO_000_CreateLargeWriteDAG.
*****************************************************************************/
int
rf_RegularPEFunc(RF_DagNode_t *node)
{
rf_RegularESubroutine(node, node->results[1]);
rf_RegularXorFunc(node); /* Do the wakeup here ! */
#if 1
return (0); /* XXX This was missing... GO */
#endif
}
/*****************************************************************************
* For EO_001_CreateSmallWriteDAG, there are (i) RegularONEFunc() and
* (ii) SimpleONEFunc() to be used. The previous case is when write accesses
* at least sectors of full stripe unit.
* The later function is used when the write accesses two stripe units but
* with total sectors less than sectors per SU. In this case, the access of
* parity and 'E' are shown as disconnected areas in their stripe unit and
* parity write and 'E' write are both divided into two distinct writes
* (totally four). This simple old-new write and regular old-new write happen
* as in RAID-5.
*****************************************************************************/
/*
* Algorithm:
* 1. Store the difference of old data and new data in the Rod buffer.
* 2. Then encode this buffer into the buffer that already have old 'E'
* information inside it, the result can be shown to be the new 'E'
* information.
* 3. Xor the Wnd buffer into the difference buffer to recover the original
* old data.
* Here we have another alternative: to allocate a temporary buffer for
* storing the difference of old data and new data, then encode temp buf
* into old 'E' buf to form new 'E', but this approach takes the same speed
* as the previous, and needs more memory.
*/
int
rf_RegularONEFunc(RF_DagNode_t *node)
{
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
int EpdaIndex = (node->numParams - 1) / 2 - 1; /*
* The parameter of node
* where you can find
* e-pda.
*/
int i, k, retcode = 0;
int suoffset, length;
RF_RowCol_t scol;
char *srcbuf, *destbuf;
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
RF_Etimer_t timer;
RF_PhysDiskAddr_t *pda, *EPDA = (RF_PhysDiskAddr_t *)
node->params[EpdaIndex].p;
/* Generally zero. */
int ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector);
RF_ASSERT(EPDA->type == RF_PDA_TYPE_Q);
RF_ASSERT(ESUOffset == 0);
RF_ETIMER_START(timer);
/*
* Xor the Wnd buffer into Rod buffer. The difference of old data and
* new data is stored in Rod buffer.
*/
for (k = 0; k < EpdaIndex; k += 2) {
length = rf_RaidAddressToByte(raidPtr,
((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
retcode = rf_bxor(node->params[k + EpdaIndex + 3].p,
node->params[k + 1].p, length, node->dagHdr->bp);
}
/*
* Start to encode the buffer, storing the difference of old data and
* new data into 'E' buffer.
*/
for (i = 0; i < EpdaIndex; i += 2)
if (node->params[i + 1].p != node->results[0]) {
/* results[0] is buf ptr of E. */
pda = (RF_PhysDiskAddr_t *) node->params[i].p;
srcbuf = (char *) node->params[i + 1].p;
scol = rf_EUCol(layoutPtr, pda->raidAddress);
suoffset = rf_StripeUnitOffset(layoutPtr,
pda->startSector);
destbuf = ((char *) node->results[0]) +
rf_RaidAddressToByte(raidPtr, suoffset);
rf_e_encToBuf(raidPtr, scol, srcbuf,
RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
}
/*
* Recover the original old data to be used by parity encoding
* function in XorNode.
*/
for (k = 0; k < EpdaIndex; k += 2) {
length = rf_RaidAddressToByte(raidPtr,
((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
retcode = rf_bxor(node->params[k + EpdaIndex + 3].p,
node->params[k + 1].p, length, node->dagHdr->bp);
}
RF_ETIMER_STOP(timer);
RF_ETIMER_EVAL(timer);
tracerec->q_us += RF_ETIMER_VAL_US(timer);
rf_GenericWakeupFunc(node, 0);
#if 1
return (0); /* XXX This was missing... GO */
#endif
}
int
rf_SimpleONEFunc(RF_DagNode_t *node)
{
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
int retcode = 0;
char *srcbuf, *destbuf;
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
int length;
RF_RowCol_t scol;
RF_Etimer_t timer;
RF_ASSERT(((RF_PhysDiskAddr_t *) node->params[2].p)->type ==
RF_PDA_TYPE_Q);
if (node->dagHdr->status == rf_enable) {
RF_ETIMER_START(timer);
/* This is a pda of writeDataNodes. */
length = rf_RaidAddressToByte(raidPtr,
((RF_PhysDiskAddr_t *) node->params[4].p)->numSector);
/* bxor to buffer of readDataNodes. */
retcode = rf_bxor(node->params[5].p, node->params[1].p,
length, node->dagHdr->bp);
/*
* Find out the corresponding column in encoding matrix for
* write column to be encoded into redundant disk 'E'.
*/
scol = rf_EUCol(layoutPtr, pda->raidAddress);
srcbuf = node->params[1].p;
destbuf = node->params[3].p;
/* Start encoding process. */
rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2,
destbuf, pda->numSector);
rf_bxor(node->params[5].p, node->params[1].p, length,
node->dagHdr->bp);
RF_ETIMER_STOP(timer);
RF_ETIMER_EVAL(timer);
tracerec->q_us += RF_ETIMER_VAL_US(timer);
}
return (rf_GenericWakeupFunc(node, retcode)); /*
* Call wake func
* explicitly since no
* I/O in this node.
*/
}
/*
* Called by rf_RegularPEFunc(node) and rf_RegularEFunc(node)
* in f.f. large write.
*/
void
rf_RegularESubroutine(RF_DagNode_t *node, char *ebuf)
{
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
RF_PhysDiskAddr_t *pda;
int i, suoffset;
RF_RowCol_t scol;
char *srcbuf, *destbuf;
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
RF_Etimer_t timer;
RF_ETIMER_START(timer);
for (i = 0; i < node->numParams - 2; i += 2) {
RF_ASSERT(node->params[i + 1].p != ebuf);
pda = (RF_PhysDiskAddr_t *) node->params[i].p;
suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
scol = rf_EUCol(layoutPtr, pda->raidAddress);
srcbuf = (char *) node->params[i + 1].p;
destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset);
rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2,
destbuf, pda->numSector);
}
RF_ETIMER_STOP(timer);
RF_ETIMER_EVAL(timer);
tracerec->xor_us += RF_ETIMER_VAL_US(timer);
}
/*****************************************************************************
* Used in EO_001_CreateLargeWriteDAG.
*****************************************************************************/
int
rf_RegularEFunc(RF_DagNode_t *node)
{
rf_RegularESubroutine(node, node->results[0]);
rf_GenericWakeupFunc(node, 0);
#if 1
return (0); /* XXX This was missing... GO */
#endif
}
/*****************************************************************************
* This degraded function allow only two cases:
* 1. When write accesses the full failed stripe unit, then the access can
* be more than one stripe unit.
* 2. When write accesses only part of the failed SU, we assume accesses of
* more than one stripe unit are not allowed so that the write can be
* dealt with like a large write.
* The following function is based on these assumptions. So except in the
* second case, it looks the same as a large write encoding function. But
* this is not exactly the normal way of doing a degraded write, since
* RAIDframe has to break cases of accesses other than the above two into
* smaller accesses. We may have to change DegrESubroutin in the future.
*****************************************************************************/
void
rf_DegrESubroutine(RF_DagNode_t *node, char *ebuf)
{
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
RF_PhysDiskAddr_t *pda;
int i, suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
RF_RowCol_t scol;
char *srcbuf, *destbuf;
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
RF_Etimer_t timer;
RF_ETIMER_START(timer);
for (i = 0; i < node->numParams - 2; i += 2) {
RF_ASSERT(node->params[i + 1].p != ebuf);
pda = (RF_PhysDiskAddr_t *) node->params[i].p;
suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
scol = rf_EUCol(layoutPtr, pda->raidAddress);
srcbuf = (char *) node->params[i + 1].p;
destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
}
RF_ETIMER_STOP(timer);
RF_ETIMER_EVAL(timer);
tracerec->q_us += RF_ETIMER_VAL_US(timer);
}
/*****************************************************************************
* This function is used in case where one data disk failed and both redundant
* disks are alive. It is used in the EO_100_CreateWriteDAG. Note: if there is
* another disk failed in the stripe but not accessed at this time, then we
* should, instead, use the rf_EOWriteDoubleRecoveryFunc().
*****************************************************************************/
int
rf_Degraded_100_EOFunc(RF_DagNode_t *node)
{
rf_DegrESubroutine(node, node->results[1]);
rf_RecoveryXorFunc(node); /* Does the wakeup here ! */
#if 1
return (0); /* XXX This was missing... Should these be
* void functions ??? GO */
#endif
}
/*****************************************************************************
* This function is to encode one sector in one of the data disks to the E
* disk. However, in evenodd this function can also be used as decoding
* function to recover data from dead disk in the case of parity failure and
* a single data failure.
*****************************************************************************/
void
rf_e_EncOneSect(RF_RowCol_t srcLogicCol, char *srcSecbuf,
RF_RowCol_t destLogicCol, char *destSecbuf, int bytesPerSector)
{
int S_index; /*
* Index of the EU in the src col which need
* be Xored into all EUs in a dest sector.
*/
int numRowInEncMatrix = (RF_EO_MATRIX_DIM) - 1;
RF_RowCol_t j, indexInDest; /*
* Row index of an encoding unit in
* the destination column of encoding
* matrix.
*/
RF_RowCol_t indexInSrc; /*
* Row index of an encoding unit in the source
* column used for recovery.
*/
int bytesPerEU = bytesPerSector / numRowInEncMatrix;
#if RF_EO_MATRIX_DIM > 17
int shortsPerEU = bytesPerEU / sizeof(short);
short *destShortBuf, *srcShortBuf1, *srcShortBuf2;
short temp1;
#elif RF_EO_MATRIX_DIM == 17
int longsPerEU = bytesPerEU / sizeof(long);
long *destLongBuf, *srcLongBuf1, *srcLongBuf2;
long temp1;
#endif
#if RF_EO_MATRIX_DIM > 17
RF_ASSERT(sizeof(short) == 2 || sizeof(short) == 1);
RF_ASSERT(bytesPerEU % sizeof(short) == 0);
#elif RF_EO_MATRIX_DIM == 17
RF_ASSERT(sizeof(long) == 8 || sizeof(long) == 4);
RF_ASSERT(bytesPerEU % sizeof(long) == 0);
#endif
S_index = rf_EO_Mod((RF_EO_MATRIX_DIM - 1 + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
#if RF_EO_MATRIX_DIM > 17
srcShortBuf1 = (short *) (srcSecbuf + S_index * bytesPerEU);
#elif RF_EO_MATRIX_DIM == 17
srcLongBuf1 = (long *) (srcSecbuf + S_index * bytesPerEU);
#endif
for (indexInDest = 0; indexInDest < numRowInEncMatrix; indexInDest++) {
indexInSrc = rf_EO_Mod((indexInDest + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
#if RF_EO_MATRIX_DIM > 17
destShortBuf = (short *) (destSecbuf + indexInDest * bytesPerEU);
srcShortBuf2 = (short *) (srcSecbuf + indexInSrc * bytesPerEU);
for (j = 0; j < shortsPerEU; j++) {
temp1 = destShortBuf[j] ^ srcShortBuf1[j];
/* Note: S_index won't be at the end row for any src
* col ! */
if (indexInSrc != RF_EO_MATRIX_DIM - 1)
destShortBuf[j] = (srcShortBuf2[j]) ^ temp1;
/* if indexInSrc is at the end row, ie.
* RF_EO_MATRIX_DIM -1, then all elements are zero ! */
else
destShortBuf[j] = temp1;
}
#elif RF_EO_MATRIX_DIM == 17
destLongBuf = (long *) (destSecbuf + indexInDest * bytesPerEU);
srcLongBuf2 = (long *) (srcSecbuf + indexInSrc * bytesPerEU);
for (j = 0; j < longsPerEU; j++) {
temp1 = destLongBuf[j] ^ srcLongBuf1[j];
if (indexInSrc != RF_EO_MATRIX_DIM - 1)
destLongBuf[j] = (srcLongBuf2[j]) ^ temp1;
else
destLongBuf[j] = temp1;
}
#endif
}
}
void
rf_e_encToBuf(RF_Raid_t *raidPtr, RF_RowCol_t srcLogicCol, char *srcbuf,
RF_RowCol_t destLogicCol, char *destbuf, int numSector)
{
int i, bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
for (i = 0; i < numSector; i++) {
rf_e_EncOneSect(srcLogicCol, srcbuf, destLogicCol, destbuf, bytesPerSector);
srcbuf += bytesPerSector;
destbuf += bytesPerSector;
}
}
/*****************************************************************************
* when parity die and one data die, We use second redundant information, 'E',
* to recover the data in dead disk. This function is used in the recovery node of
* for EO_110_CreateReadDAG
*****************************************************************************/
int
rf_RecoveryEFunc(RF_DagNode_t *node)
{
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
RF_RowCol_t scol; /* source logical column */
RF_RowCol_t fcol = rf_EUCol(layoutPtr, failedPDA->raidAddress); /* logical column of
* failed SU */
int i;
RF_PhysDiskAddr_t *pda;
int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
char *srcbuf, *destbuf;
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
RF_Etimer_t timer;
bzero((char *) node->results[0], rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
if (node->dagHdr->status == rf_enable) {
RF_ETIMER_START(timer);
for (i = 0; i < node->numParams - 2; i += 2)
if (node->params[i + 1].p != node->results[0]) {
pda = (RF_PhysDiskAddr_t *) node->params[i].p;
if (i == node->numParams - 4)
scol = RF_EO_MATRIX_DIM - 2; /* the colume of
* redundant E */
else
scol = rf_EUCol(layoutPtr, pda->raidAddress);
srcbuf = (char *) node->params[i + 1].p;
suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
rf_e_encToBuf(raidPtr, scol, srcbuf, fcol, destbuf, pda->numSector);
}
RF_ETIMER_STOP(timer);
RF_ETIMER_EVAL(timer);
tracerec->xor_us += RF_ETIMER_VAL_US(timer);
}
return (rf_GenericWakeupFunc(node, 0)); /* node execute successfully */
}
/*****************************************************************************
* This function is used in the case where one data and the parity have filed.
* (in EO_110_CreateWriteDAG)
*****************************************************************************/
int
rf_EO_DegradedWriteEFunc(RF_DagNode_t *node)
{
rf_DegrESubroutine(node, node->results[0]);
rf_GenericWakeupFunc(node, 0);
#if 1
return (0); /* XXX Yet another one !!! GO */
#endif
}
/*****************************************************************************
* THE FUNCTION IS FOR DOUBLE DEGRADED READ AND WRITE CASES.
*****************************************************************************/
void
rf_doubleEOdecode(RF_Raid_t *raidPtr, char **rrdbuf, char **dest,
RF_RowCol_t *fcol, char *pbuf, char *ebuf)
{
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &(raidPtr->Layout);
int i, j, k, f1, f2, row;
int rrdrow, erow, count = 0;
int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
int numRowInEncMatrix = (RF_EO_MATRIX_DIM) - 1;
#if 0
int pcol = (RF_EO_MATRIX_DIM) - 1;
#endif
int ecol = (RF_EO_MATRIX_DIM) - 2;
int bytesPerEU = bytesPerSector / numRowInEncMatrix;
int numDataCol = layoutPtr->numDataCol;
#if RF_EO_MATRIX_DIM > 17
int shortsPerEU = bytesPerEU / sizeof(short);
short *rrdbuf_current, *pbuf_current, *ebuf_current;
short *dest_smaller, *dest_smaller_current;
short *dest_larger, *dest_larger_current;
short *temp;
short *P;
RF_ASSERT(bytesPerEU % sizeof(short) == 0);
RF_Malloc(P, bytesPerEU, (short *));
RF_Malloc(temp, bytesPerEU, (short *));
#elif RF_EO_MATRIX_DIM == 17
int longsPerEU = bytesPerEU / sizeof(long);
long *rrdbuf_current, *pbuf_current, *ebuf_current;
long *dest_smaller, *dest_smaller_current;
long *dest_larger, *dest_larger_current;
long *temp;
long *P;
RF_ASSERT(bytesPerEU % sizeof(long) == 0);
RF_Malloc(P, bytesPerEU, (long *));
RF_Malloc(temp, bytesPerEU, (long *));
#endif
RF_ASSERT(*((long *) dest[0]) == 0);
RF_ASSERT(*((long *) dest[1]) == 0);
bzero((char *) P, bytesPerEU);
bzero((char *) temp, bytesPerEU);
RF_ASSERT(*P == 0);
/*
* Calculate the 'P' parameter, which, not parity, is the Xor of all
* elements in the last two column, ie. 'E' and 'parity' columns, see
* the Ref. paper by Blaum, et al 1993.
*/
for (i = 0; i < numRowInEncMatrix; i++)
for (k = 0; k < longsPerEU; k++) {
#if RF_EO_MATRIX_DIM > 17
ebuf_current = ((short *) ebuf) + i * shortsPerEU + k;
pbuf_current = ((short *) pbuf) + i * shortsPerEU + k;
#elif RF_EO_MATRIX_DIM == 17
ebuf_current = ((long *) ebuf) + i * longsPerEU + k;
pbuf_current = ((long *) pbuf) + i * longsPerEU + k;
#endif
P[k] ^= *ebuf_current;
P[k] ^= *pbuf_current;
}
RF_ASSERT(fcol[0] != fcol[1]);
if (fcol[0] < fcol[1]) {
#if RF_EO_MATRIX_DIM > 17
dest_smaller = (short *) (dest[0]);
dest_larger = (short *) (dest[1]);
#elif RF_EO_MATRIX_DIM == 17
dest_smaller = (long *) (dest[0]);
dest_larger = (long *) (dest[1]);
#endif
f1 = fcol[0];
f2 = fcol[1];
} else {
#if RF_EO_MATRIX_DIM > 17
dest_smaller = (short *) (dest[1]);
dest_larger = (short *) (dest[0]);
#elif RF_EO_MATRIX_DIM == 17
dest_smaller = (long *) (dest[1]);
dest_larger = (long *) (dest[0]);
#endif
f1 = fcol[1];
f2 = fcol[0];
}
row = (RF_EO_MATRIX_DIM) - 1;
while ((row = rf_EO_Mod((row + f1 - f2), RF_EO_MATRIX_DIM)) !=
((RF_EO_MATRIX_DIM) - 1)) {
#if RF_EO_MATRIX_DIM > 17
dest_larger_current = dest_larger + row * shortsPerEU;
dest_smaller_current = dest_smaller + row * shortsPerEU;
#elif RF_EO_MATRIX_DIM == 17
dest_larger_current = dest_larger + row * longsPerEU;
dest_smaller_current = dest_smaller + row * longsPerEU;
#endif
/*
* Do the diagonal recovery. Initially, temp[k] = (failed 1),
* which is the failed data in the column that has smaller
* col index.
*/
/* Step 1: ^(SUM of nonfailed in-diagonal A(rrdrow,0..m-3)) */
for (j = 0; j < numDataCol; j++) {
if (j == f1 || j == f2)
continue;
rrdrow = rf_EO_Mod((row + f2 - j), RF_EO_MATRIX_DIM);
if (rrdrow != (RF_EO_MATRIX_DIM) - 1) {
#if RF_EO_MATRIX_DIM > 17
rrdbuf_current = (short *) (rrdbuf[j]) +
rrdrow * shortsPerEU;
for (k = 0; k < shortsPerEU; k++)
temp[k] ^= *(rrdbuf_current + k);
#elif RF_EO_MATRIX_DIM == 17
rrdbuf_current = (long *) (rrdbuf[j]) +
rrdrow * longsPerEU;
for (k = 0; k < longsPerEU; k++)
temp[k] ^= *(rrdbuf_current + k);
#endif
}
}
/*
* Step 2: ^E(erow,m-2), If erow is at the bottom row, don't
* Xor into it. E(erow,m-2) = (principle diagonal) ^ (failed
* 1) ^ (failed 2) ^ (SUM of nonfailed in-diagonal
* A(rrdrow,0..m-3))
* After this step, temp[k] = (principle diagonal) ^ (failed 2).
*/
erow = rf_EO_Mod((row + f2 - ecol), (RF_EO_MATRIX_DIM));
if (erow != (RF_EO_MATRIX_DIM) - 1) {
#if RF_EO_MATRIX_DIM > 17
ebuf_current = (short *) ebuf + shortsPerEU * erow;
for (k = 0; k < shortsPerEU; k++)
temp[k] ^= *(ebuf_current + k);
#elif RF_EO_MATRIX_DIM == 17
ebuf_current = (long *) ebuf + longsPerEU * erow;
for (k = 0; k < longsPerEU; k++)
temp[k] ^= *(ebuf_current + k);
#endif
}
/*
* Step 3: ^P to obtain the failed data (failed 2). P can be
* proved to be actually (principal diagonal). After this
* step, temp[k] = (failed 2), the failed data to be recovered.
*/
#if RF_EO_MATRIX_DIM > 17
for (k = 0; k < shortsPerEU; k++)
temp[k] ^= P[k];
/* Put the data into the destination buffer. */
for (k = 0; k < shortsPerEU; k++)
dest_larger_current[k] = temp[k];
#elif RF_EO_MATRIX_DIM == 17
for (k = 0; k < longsPerEU; k++)
temp[k] ^= P[k];
/* Put the data into the destination buffer. */
for (k = 0; k < longsPerEU; k++)
dest_larger_current[k] = temp[k];
#endif
/* THE FOLLOWING DO THE HORIZONTAL XOR. */
/*
* Step 1: ^(SUM of A(row,0..m-3)), ie. all nonfailed data
* columns.
*/
for (j = 0; j < numDataCol; j++) {
if (j == f1 || j == f2)
continue;
#if RF_EO_MATRIX_DIM > 17
rrdbuf_current = (short *) (rrdbuf[j]) +
row * shortsPerEU;
for (k = 0; k < shortsPerEU; k++)
temp[k] ^= *(rrdbuf_current + k);
#elif RF_EO_MATRIX_DIM == 17
rrdbuf_current = (long *) (rrdbuf[j]) +
row * longsPerEU;
for (k = 0; k < longsPerEU; k++)
temp[k] ^= *(rrdbuf_current + k);
#endif
}
/* Step 2: ^A(row,m-1) */
/* Step 3: Put the data into the destination buffer. */
#if RF_EO_MATRIX_DIM > 17
pbuf_current = (short *) pbuf + shortsPerEU * row;
for (k = 0; k < shortsPerEU; k++)
temp[k] ^= *(pbuf_current + k);
for (k = 0; k < shortsPerEU; k++)
dest_smaller_current[k] = temp[k];
#elif RF_EO_MATRIX_DIM == 17
pbuf_current = (long *) pbuf + longsPerEU * row;
for (k = 0; k < longsPerEU; k++)
temp[k] ^= *(pbuf_current + k);
for (k = 0; k < longsPerEU; k++)
dest_smaller_current[k] = temp[k];
#endif
count++;
}
/*
* Check if all Encoding Unit in the data buffer have been decoded ?
* According to EvenOdd theory, if "RF_EO_MATRIX_DIM" is a prime
* number, this algorithm will covered all buffer.
*/
RF_ASSERT(count == numRowInEncMatrix);
RF_Free((char *) P, bytesPerEU);
RF_Free((char *) temp, bytesPerEU);
}
/*****************************************************************************
* This function is called by double degraded read EO_200_CreateReadDAG.
*****************************************************************************/
int
rf_EvenOddDoubleRecoveryFunc(RF_DagNode_t *node)
{
int ndataParam = 0;
int np = node->numParams;
RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *)
node->params[np - 1].p;
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
int i, prm, sector, nresults = node->numResults;
RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
unsigned sosAddr;
int two = 0, mallc_one = 0, mallc_two = 0; /*
* Flags to indicate if
* memory is allocated.
*/
int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
RF_PhysDiskAddr_t *ppda, *ppda2, *epda, *epda2, *pda, *pda0, *pda1,
npda;
RF_RowCol_t fcol[2], fsuoff[2], fsuend[2],
numDataCol = layoutPtr->numDataCol;
char **buf, *ebuf, *pbuf, *dest[2];
long *suoff = NULL, *suend = NULL, *prmToCol = NULL, psuoff, esuoff;
RF_SectorNum_t startSector, endSector;
RF_Etimer_t timer;
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
RF_ETIMER_START(timer);
/*
* Find out the number of parameters that are pdas for data
* information.
*/
for (i = 0; i <= np; i++)
if (((RF_PhysDiskAddr_t *) node->params[i].p)->type !=
RF_PDA_TYPE_DATA) {
ndataParam = i;
break;
}
RF_Malloc(buf, numDataCol * sizeof(char *), (char **));
if (ndataParam != 0) {
RF_Malloc(suoff, ndataParam * sizeof(long), (long *));
RF_Malloc(suend, ndataParam * sizeof(long), (long *));
RF_Malloc(prmToCol, ndataParam * sizeof(long), (long *));
}
if (asmap->failedPDAs[1] &&
(asmap->failedPDAs[1]->numSector +
asmap->failedPDAs[0]->numSector) < secPerSU) {
RF_ASSERT(0); /* Currently, no support for this situation. */
ppda = node->params[np - 6].p;
ppda2 = node->params[np - 5].p;
RF_ASSERT(ppda2->type == RF_PDA_TYPE_PARITY);
epda = node->params[np - 4].p;
epda2 = node->params[np - 3].p;
RF_ASSERT(epda2->type == RF_PDA_TYPE_Q);
two = 1;
} else {
ppda = node->params[np - 4].p;
epda = node->params[np - 3].p;
psuoff = rf_StripeUnitOffset(layoutPtr, ppda->startSector);
esuoff = rf_StripeUnitOffset(layoutPtr, epda->startSector);
RF_ASSERT(psuoff == esuoff);
}
/*
* The followings have three goals:
* 1. Determine the startSector to begin decoding and endSector
* to end decoding.
* 2. Determine the column numbers of the two failed disks.
* 3. Determine the offset and end offset of the access within
* each failed stripe unit.
*/
if (nresults == 1) {
/* Find the startSector to begin decoding. */
pda = node->results[0];
bzero(pda->bufPtr, bytesPerSector * pda->numSector);
fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
fsuend[0] = fsuoff[0] + pda->numSector;
startSector = fsuoff[0];
endSector = fsuend[0];
/* Find out the column of failed disk being accessed. */
fcol[0] = rf_EUCol(layoutPtr, pda->raidAddress);
/* Find out the other failed column not accessed. */
sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr,
asmap->raidAddress);
for (i = 0; i < numDataCol; i++) {
npda.raidAddress = sosAddr + (i * secPerSU);
(raidPtr->Layout.map->MapSector) (raidPtr,
npda.raidAddress, &(npda.row), &(npda.col),
&(npda.startSector), 0);
/* Skip over dead disks. */
if (RF_DEAD_DISK(raidPtr
->Disks[npda.row][npda.col].status))
if (i != fcol[0])
break;
}
RF_ASSERT(i < numDataCol);
fcol[1] = i;
} else {
RF_ASSERT(nresults == 2);
pda0 = node->results[0];
bzero(pda0->bufPtr, bytesPerSector * pda0->numSector);
pda1 = node->results[1];
bzero(pda1->bufPtr, bytesPerSector * pda1->numSector);
/*
* Determine the failed column numbers of the two failed
* disks.
*/
fcol[0] = rf_EUCol(layoutPtr, pda0->raidAddress);
fcol[1] = rf_EUCol(layoutPtr, pda1->raidAddress);
/*
* Determine the offset and end offset of the access within
* each failed stripe unit.
*/
fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda0->startSector);
fsuend[0] = fsuoff[0] + pda0->numSector;
fsuoff[1] = rf_StripeUnitOffset(layoutPtr, pda1->startSector);
fsuend[1] = fsuoff[1] + pda1->numSector;
/* Determine the startSector to begin decoding. */
startSector = RF_MIN(pda0->startSector, pda1->startSector);
/* Determine the endSector to end decoding. */
endSector = RF_MAX(fsuend[0], fsuend[1]);
}
/*
* Assign the beginning sector and the end sector for each parameter.
* Find out the corresponding column # for each parameter.
*/
for (prm = 0; prm < ndataParam; prm++) {
pda = node->params[prm].p;
suoff[prm] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
suend[prm] = suoff[prm] + pda->numSector;
prmToCol[prm] = rf_EUCol(layoutPtr, pda->raidAddress);
}
/*
* 'sector' is the sector for the current decoding algorithm. For each
* sector in the failed SU
* 1. Find out the corresponding parameters that cover the current
* sector and that are needed for the decoding of this sector in
* failed SU.
* 2. Find out if sector is in the shadow of any accessed failed SU.
* If not, malloc a temporary space of a sector in size.
*/
for (sector = startSector; sector < endSector; sector++) {
if (nresults == 2)
if (!(fsuoff[0] <= sector && sector < fsuend[0]) &&
!(fsuoff[1] <= sector && sector < fsuend[1]))
continue;
for (prm = 0; prm < ndataParam; prm++)
if (suoff[prm] <= sector && sector < suend[prm])
buf[(prmToCol[prm])] = ((RF_PhysDiskAddr_t *)
node->params[prm].p)->bufPtr +
rf_RaidAddressToByte(raidPtr,
sector - suoff[prm]);
/*
* Find out if sector is in the shadow of any accessed failed
* SU. If yes, assign dest[0], dest[1] to point at suitable
* position of the buffer corresponding to failed SUs. If no,
* malloc a temporary space of a sector in size for
* destination of decoding.
*/
RF_ASSERT(nresults == 1 || nresults == 2);
if (nresults == 1) {
dest[0] = ((RF_PhysDiskAddr_t *)
node->results[0])->bufPtr +
rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]);
/* Always malloc temp buffer to dest[1]. */
RF_Malloc(dest[1], bytesPerSector, (char *));
bzero(dest[1], bytesPerSector);
mallc_two = 1;
} else {
if (fsuoff[0] <= sector && sector < fsuend[0])
dest[0] = ((RF_PhysDiskAddr_t *)
node->results[0])->bufPtr +
rf_RaidAddressToByte(raidPtr,
sector - fsuoff[0]);
else {
RF_Malloc(dest[0], bytesPerSector, (char *));
bzero(dest[0], bytesPerSector);
mallc_one = 1;
}
if (fsuoff[1] <= sector && sector < fsuend[1])
dest[1] = ((RF_PhysDiskAddr_t *)
node->results[1])->bufPtr +
rf_RaidAddressToByte(raidPtr,
sector - fsuoff[1]);
else {
RF_Malloc(dest[1], bytesPerSector, (char *));
bzero(dest[1], bytesPerSector);
mallc_two = 1;
}
RF_ASSERT(mallc_one == 0 || mallc_two == 0);
}
pbuf = ppda->bufPtr + rf_RaidAddressToByte(raidPtr,
sector - psuoff);
ebuf = epda->bufPtr + rf_RaidAddressToByte(raidPtr,
sector - esuoff);
/*
* After finish finding all needed sectors, call doubleEOdecode
* function for decoding one sector to destination.
*/
rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
/*
* Free all allocated memory, and mark flag to indicate no
* memory is being allocated.
*/
if (mallc_one == 1)
RF_Free(dest[0], bytesPerSector);
if (mallc_two == 1)
RF_Free(dest[1], bytesPerSector);
mallc_one = mallc_two = 0;
}
RF_Free(buf, numDataCol * sizeof(char *));
if (ndataParam != 0) {
RF_Free(suoff, ndataParam * sizeof(long));
RF_Free(suend, ndataParam * sizeof(long));
RF_Free(prmToCol, ndataParam * sizeof(long));
}
RF_ETIMER_STOP(timer);
RF_ETIMER_EVAL(timer);
if (tracerec) {
tracerec->q_us += RF_ETIMER_VAL_US(timer);
}
rf_GenericWakeupFunc(node, 0);
#if 1
return (0); /* XXX Is this even close !!?!?!!? GO */
#endif
}
/*
* Currently, only access of one of the two failed SU is allowed in this
* function. Also, asmap->numStripeUnitsAccessed is limited to be one,
* the RAIDframe will break large access into many accesses of single
* stripe unit.
*/
int
rf_EOWriteDoubleRecoveryFunc(RF_DagNode_t *node)
{
int np = node->numParams;
RF_AccessStripeMap_t *asmap =
(RF_AccessStripeMap_t *) node->params[np - 1].p;
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &(raidPtr->Layout);
RF_SectorNum_t sector;
RF_RowCol_t col, scol;
int prm, i, j;
RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
unsigned sosAddr;
unsigned bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
RF_int64 numbytes;
RF_SectorNum_t startSector, endSector;
RF_PhysDiskAddr_t *ppda, *epda, *pda, *fpda, npda;
RF_RowCol_t fcol[2], numDataCol = layoutPtr->numDataCol;
char **buf; /*
* buf[0], buf[1], buf[2], ... etc, point to
* buffer storing data read from col0, col1,
* col2.
*/
char *ebuf, *pbuf, *dest[2], *olddata[2];
RF_Etimer_t timer;
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
RF_ASSERT(asmap->numDataFailed == 1); /*
* Currently only support this
* case, the other failed SU
* is not being accessed.
*/
RF_ETIMER_START(timer);
RF_Malloc(buf, numDataCol * sizeof(char *), (char **));
ppda = node->results[0]; /*
* Instead of being buffers,
* node->results[0] and [1]
* are Ppda and Epda.
*/
epda = node->results[1];
fpda = asmap->failedPDAs[0];
/* First, recovery the failed old SU using EvenOdd double decoding. */
/* Determine the startSector and endSector for decoding. */
startSector = rf_StripeUnitOffset(layoutPtr, fpda->startSector);
endSector = startSector + fpda->numSector;
/*
* Assign buf[col] pointers to point to each non-failed column and
* initialize the pbuf and ebuf to point at the beginning of each
* source buffers and destination buffers. */
for (prm = 0; prm < numDataCol - 2; prm++) {
pda = (RF_PhysDiskAddr_t *) node->params[prm].p;
col = rf_EUCol(layoutPtr, pda->raidAddress);
buf[col] = pda->bufPtr;
}
/*
* pbuf and ebuf: They will change values as double recovery decoding
* goes on.
*/
pbuf = ppda->bufPtr;
ebuf = epda->bufPtr;
/*
* Find out the logical column numbers in the encoding matrix of the
* two failed columns.
*/
fcol[0] = rf_EUCol(layoutPtr, fpda->raidAddress);
/* Find out the other failed column not accessed this time. */
sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr,
asmap->raidAddress);
for (i = 0; i < numDataCol; i++) {
npda.raidAddress = sosAddr + (i * secPerSU);
(raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress,
&(npda.row), &(npda.col), &(npda.startSector), 0);
/* Skip over dead disks. */
if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
if (i != fcol[0])
break;
}
RF_ASSERT(i < numDataCol);
fcol[1] = i;
/* Assign temporary space to put recovered failed SU. */
numbytes = fpda->numSector * bytesPerSector;
RF_Malloc(olddata[0], numbytes, (char *));
RF_Malloc(olddata[1], numbytes, (char *));
dest[0] = olddata[0];
dest[1] = olddata[1];
bzero(olddata[0], numbytes);
bzero(olddata[1], numbytes);
/*
* Begin the recovery decoding, initially buf[j], ebuf, pbuf, dest[j]
* have already pointed at the beginning of each source buffers and
* destination buffers.
*/
for (sector = startSector, i = 0; sector < endSector; sector++, i++) {
rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
for (j = 0; j < numDataCol; j++)
if ((j != fcol[0]) && (j != fcol[1]))
buf[j] += bytesPerSector;
dest[0] += bytesPerSector;
dest[1] += bytesPerSector;
ebuf += bytesPerSector;
pbuf += bytesPerSector;
}
/*
* After recovery, the buffer pointed by olddata[0] is the old failed
* data. With new writing data and this old data, use small write to
* calculate the new redundant informations.
*/
/*
* node->params[ 0, ... PDAPerDisk * (numDataCol - 2)-1 ] are Pdas of
* Rrd; params[ PDAPerDisk*(numDataCol - 2), ... PDAPerDisk*numDataCol
* -1 ] are Pdas of Rp, ( Rp2 ), Re, ( Re2 ) ; params[
* PDAPerDisk*numDataCol, ... PDAPerDisk*numDataCol
* +asmap->numStripeUnitsAccessed -asmap->numDataFailed-1] are Pdas of
* wudNodes; For current implementation, we assume the simplest case:
* asmap->numStripeUnitsAccessed == 1 and asmap->numDataFailed == 1
* ie. PDAPerDisk = 1 then node->params[numDataCol] must be the new
* data to be written to the failed disk. We first bxor the new data
* into the old recovered data, then do the same things as small
* write.
*/
rf_bxor(((RF_PhysDiskAddr_t *) node->params[numDataCol].p)->bufPtr,
olddata[0], numbytes, node->dagHdr->bp);
/* Do new 'E' calculation. */
/*
* Find out the corresponding column in encoding matrix for write
* column to be encoded into redundant disk 'E'.
*/
scol = rf_EUCol(layoutPtr, fpda->raidAddress);
/*
* olddata[0] now is source buffer pointer; epda->bufPtr is the dest
* buffer pointer.
*/
rf_e_encToBuf(raidPtr, scol, olddata[0], RF_EO_MATRIX_DIM - 2,
epda->bufPtr, fpda->numSector);
/* Do new 'P' calculation. */
rf_bxor(olddata[0], ppda->bufPtr, numbytes, node->dagHdr->bp);
/* Free the allocated buffer. */
RF_Free(olddata[0], numbytes);
RF_Free(olddata[1], numbytes);
RF_Free(buf, numDataCol * sizeof(char *));
RF_ETIMER_STOP(timer);
RF_ETIMER_EVAL(timer);
if (tracerec) {
tracerec->q_us += RF_ETIMER_VAL_US(timer);
}
rf_GenericWakeupFunc(node, 0);
return (0);
}