File: [local] / sys / dev / raidframe / rf_pq.c (download)
Revision 1.1.1.1 (vendor branch), Tue Mar 4 16:09:50 2008 UTC (16 years, 4 months ago) by nbrk
Branch: OPENBSD_4_2_BASE, MAIN
CVS Tags: jornada-partial-support-wip, HEAD Changes since 1.1: +0 -0 lines
Import of OpenBSD 4.2 release kernel tree with initial code to support
Jornada 720/728, StrongARM 1110-based handheld PC.
At this point kernel roots on NFS and boots into vfs_mountroot() and traps.
What is supported:
- glass console, Jornada framebuffer (jfb) works in 16bpp direct color mode
(needs some palette tweaks for non black/white/blue colors, i think)
- saic, SA11x0 interrupt controller (needs cleanup)
- sacom, SA11x0 UART (supported only as boot console for now)
- SA11x0 GPIO controller fully supported (but can't handle multiple interrupt
handlers on one gpio pin)
- sassp, SSP port on SA11x0 that attaches spibus
- Jornada microcontroller (jmcu) to control kbd, battery, etc throught
the SPI bus (wskbd attaches on jmcu, but not tested)
- tod functions seem work
- initial code for SA-1111 (chip companion) : this is TODO
Next important steps, i think:
- gpio and intc on sa1111
- pcmcia support for sa11x0 (and sa1111 help logic)
- REAL root on nfs when we have PCMCIA support (we may use any of supported pccard NICs)
- root on wd0! (using already supported PCMCIA-ATA)
|
/* $OpenBSD: rf_pq.c,v 1.6 2002/12/16 07:01:04 tdeval Exp $ */
/* $NetBSD: rf_pq.c,v 1.7 2000/01/07 03:41:02 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Daniel Stodolsky
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* Code for RAID level 6 (P + Q) disk array architecture.
*/
#include "rf_archs.h"
#include "rf_types.h"
#include "rf_raid.h"
#include "rf_dag.h"
#include "rf_dagffrd.h"
#include "rf_dagffwr.h"
#include "rf_dagdegrd.h"
#include "rf_dagdegwr.h"
#include "rf_dagutils.h"
#include "rf_dagfuncs.h"
#include "rf_etimer.h"
#include "rf_pqdeg.h"
#include "rf_general.h"
#include "rf_map.h"
#include "rf_pq.h"
RF_RedFuncs_t rf_pFuncs = {
rf_RegularONPFunc, "Regular Old-New P",
rf_SimpleONPFunc, "Simple Old-New P"
};
RF_RedFuncs_t rf_pRecoveryFuncs = {
rf_RecoveryPFunc, "Recovery P Func",
rf_RecoveryPFunc, "Recovery P Func"
};
int
rf_RegularONPFunc(RF_DagNode_t *node)
{
return (rf_RegularXorFunc(node));
}
/*
* Same as simpleONQ func, but the coefficient is always 1.
*/
int
rf_SimpleONPFunc(RF_DagNode_t *node)
{
return (rf_SimpleXorFunc(node));
}
int
rf_RecoveryPFunc(RF_DagNode_t *node)
{
return (rf_RecoveryXorFunc(node));
}
int
rf_RegularPFunc(RF_DagNode_t *node)
{
return (rf_RegularXorFunc(node));
}
#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
void rf_QDelta(char *dest, char *obuf, char *nbuf, unsigned length,
unsigned char coeff);
void rf_InvertQ(unsigned long *qbuf, unsigned long *abuf, unsigned length,
unsigned coeff);
RF_RedFuncs_t rf_qFuncs = {
rf_RegularONQFunc, "Regular Old-New Q",
rf_SimpleONQFunc, "Simple Old-New Q"
};
RF_RedFuncs_t rf_qRecoveryFuncs = {
rf_RecoveryQFunc, "Recovery Q Func",
rf_RecoveryQFunc, "Recovery Q Func"
};
RF_RedFuncs_t rf_pqRecoveryFuncs = {
rf_RecoveryPQFunc, "Recovery PQ Func",
rf_RecoveryPQFunc, "Recovery PQ Func"
};
void
rf_PQDagSelect(RF_Raid_t *raidPtr, RF_IoType_t type,
RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc)
{
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
unsigned ndfail = asmap->numDataFailed;
unsigned npfail = asmap->numParityFailed;
unsigned ntfail = npfail + ndfail;
RF_ASSERT(RF_IO_IS_R_OR_W(type));
if (ntfail > 2) {
RF_ERRORMSG("more than two disks failed in a single group !"
" Aborting I/O operation.\n");
/* *infoFunc = */ *createFunc = NULL;
return;
}
/* Ok, we can do this I/O. */
if (type == RF_IO_TYPE_READ) {
switch (ndfail) {
case 0:
/* Fault free read. */
*createFunc = (RF_VoidFuncPtr)
rf_CreateFaultFreeReadDAG; /* Same as raid 5. */
break;
case 1:
/* Lost a single data unit. */
/*
* Two cases:
* (1) Parity is not lost. Do a normal raid 5
* reconstruct read.
* (2) Parity is lost. Do a reconstruct read using "q".
*/
if (ntfail == 2) { /* Also lost redundancy. */
if (asmap->failedPDAs[1]->type ==
RF_PDA_TYPE_PARITY)
*createFunc = (RF_VoidFuncPtr)
rf_PQ_110_CreateReadDAG;
else
*createFunc = (RF_VoidFuncPtr)
rf_PQ_101_CreateReadDAG;
} else {
/*
* P and Q are ok. But is there a failure in
* some unaccessed data unit ?
*/
if (rf_NumFailedDataUnitsInStripe(raidPtr,
asmap) == 2)
*createFunc = (RF_VoidFuncPtr)
rf_PQ_200_CreateReadDAG;
else
*createFunc = (RF_VoidFuncPtr)
rf_PQ_100_CreateReadDAG;
}
break;
case 2:
/* Lost two data units. */
/* *infoFunc = rf_PQOneTwo; */
*createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG;
break;
}
return;
}
/* A write. */
switch (ntfail) {
case 0: /* Fault free. */
if (rf_suppressLocksAndLargeWrites ||
(((asmap->numStripeUnitsAccessed <=
(layoutPtr->numDataCol / 2)) &&
(layoutPtr->numDataCol != 1)) ||
(asmap->parityInfo->next != NULL) ||
(asmap->qInfo->next != NULL) ||
rf_CheckStripeForFailures(raidPtr, asmap))) {
*createFunc = (RF_VoidFuncPtr) rf_PQCreateSmallWriteDAG;
} else {
*createFunc = (RF_VoidFuncPtr) rf_PQCreateLargeWriteDAG;
}
break;
case 1: /* Single disk fault. */
if (npfail == 1) {
RF_ASSERT((asmap->failedPDAs[0]->type ==
RF_PDA_TYPE_PARITY) ||
(asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q));
if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) {
/*
* Q died, treat like normal mode raid5 write.
*/
if (((asmap->numStripeUnitsAccessed <=
(layoutPtr->numDataCol / 2)) ||
(asmap->numStripeUnitsAccessed == 1)) ||
rf_NumFailedDataUnitsInStripe(raidPtr,
asmap))
*createFunc = (RF_VoidFuncPtr)
rf_PQ_001_CreateSmallWriteDAG;
else
*createFunc = (RF_VoidFuncPtr)
rf_PQ_001_CreateLargeWriteDAG;
} else {/* Parity died, small write only updating Q. */
if (((asmap->numStripeUnitsAccessed <=
(layoutPtr->numDataCol / 2)) ||
(asmap->numStripeUnitsAccessed == 1)) ||
rf_NumFailedDataUnitsInStripe(raidPtr,
asmap))
*createFunc = (RF_VoidFuncPtr)
rf_PQ_010_CreateSmallWriteDAG;
else
*createFunc = (RF_VoidFuncPtr)
rf_PQ_010_CreateLargeWriteDAG;
}
} else { /*
* Data missing. Do a P reconstruct write if
* only a single data unit is lost in the
* stripe, otherwise a PQ reconstruct write.
*/
if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
*createFunc = (RF_VoidFuncPtr)
rf_PQ_200_CreateWriteDAG;
else
*createFunc = (RF_VoidFuncPtr)
rf_PQ_100_CreateWriteDAG;
}
break;
case 2: /* Two disk faults. */
switch (npfail) {
case 2: /* Both p and q dead. */
*createFunc = (RF_VoidFuncPtr) rf_PQ_011_CreateWriteDAG;
break;
case 1: /* Either p or q and dead data. */
RF_ASSERT(asmap->failedPDAs[0]->type ==
RF_PDA_TYPE_DATA);
RF_ASSERT((asmap->failedPDAs[1]->type ==
RF_PDA_TYPE_PARITY) ||
(asmap->failedPDAs[1]->type ==
RF_PDA_TYPE_Q));
if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)
*createFunc = (RF_VoidFuncPtr)
rf_PQ_101_CreateWriteDAG;
else
*createFunc = (RF_VoidFuncPtr)
rf_PQ_110_CreateWriteDAG;
break;
case 0: /* Double data loss. */
*createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG;
break;
}
break;
default: /* More than 2 disk faults. */
*createFunc = NULL;
RF_PANIC();
}
return;
}
/*
* Used as a stop gap info function.
*/
#if 0
void
rf_PQOne(RF_Raid_t *raidPtr, int *nSucc, int *nAnte,
RF_AccessStripeMap_t *asmap)
{
*nSucc = *nAnte = 1;
}
void
rf_PQOneTwo(RF_Raid_t *raidPtr, int *nSucc, int *nAnte,
RF_AccessStripeMap_t *asmap)
{
*nSucc = 1;
*nAnte = 2;
}
#endif
RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG)
{
rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags,
allocList, 2, rf_RegularPQFunc, RF_FALSE);
}
int
rf_RegularONQFunc(RF_DagNode_t *node)
{
int np = node->numParams;
int d;
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
int i;
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
RF_Etimer_t timer;
char *qbuf, *qpbuf;
char *obuf, *nbuf;
RF_PhysDiskAddr_t *old, *new;
unsigned long coeff;
unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
RF_ETIMER_START(timer);
d = (np - 3) / 4;
RF_ASSERT(4 * d + 3 == np);
qbuf = (char *) node->params[2 * d + 1].p; /* Q buffer. */
for (i = 0; i < d; i++) {
old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
obuf = (char *) node->params[2 * i + 1].p;
new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
RF_ASSERT(new->numSector == old->numSector);
RF_ASSERT(new->raidAddress == old->raidAddress);
/*
* The stripe unit within the stripe tells us the coefficient
* to use for the multiply.
*/
coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
new->raidAddress);
/*
* Compute the data unit offset within the column, then add
* one.
*/
coeff = (coeff % raidPtr->Layout.numDataCol);
qpbuf = qbuf + rf_RaidAddressToByte(raidPtr,
old->startSector % secPerSU);
rf_QDelta(qpbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr,
old->numSector), coeff);
}
RF_ETIMER_STOP(timer);
RF_ETIMER_EVAL(timer);
tracerec->q_us += RF_ETIMER_VAL_US(timer);
rf_GenericWakeupFunc(node, 0); /*
* Call wake func explicitly since no
* I/O in this node.
*/
return (0);
}
/*
* See the SimpleXORFunc for the difference between a simple and regular func.
* These Q functions should be used for
* new q = Q(data, old data, old q)
* style updates and not for
* q = (new data, new data, ...)
* computations.
*
* The simple q takes 2(2d+1)+1 params, where d is the number
* of stripes written. The order of params is
* old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ...
* old data pda_d, old data buffer_d
* [2d] old q pda_0, old q buffer
* [2d_2] new data pda_0, new data buffer_0, ...
* new data pda_d, new data buffer_d
* raidPtr
*/
int
rf_SimpleONQFunc(RF_DagNode_t *node)
{
int np = node->numParams;
int d;
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
int i;
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
RF_Etimer_t timer;
char *qbuf;
char *obuf, *nbuf;
RF_PhysDiskAddr_t *old, *new;
unsigned long coeff;
RF_ETIMER_START(timer);
d = (np - 3) / 4;
RF_ASSERT(4 * d + 3 == np);
qbuf = (char *) node->params[2 * d + 1].p; /* Q buffer. */
for (i = 0; i < d; i++) {
old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
obuf = (char *) node->params[2 * i + 1].p;
new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
RF_ASSERT(new->numSector == old->numSector);
RF_ASSERT(new->raidAddress == old->raidAddress);
/*
* The stripe unit within the stripe tells us the coefficient
* to use for the multiply.
*/
coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
new->raidAddress);
/*
* Compute the data unit offset within the column, then add
* one.
*/
coeff = (coeff % raidPtr->Layout.numDataCol);
rf_QDelta(qbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr,
old->numSector), coeff);
}
RF_ETIMER_STOP(timer);
RF_ETIMER_EVAL(timer);
tracerec->q_us += RF_ETIMER_VAL_US(timer);
rf_GenericWakeupFunc(node, 0); /*
* Call wake func explicitly since no
* I/O in this node.
*/
return (0);
}
RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG)
{
rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags,
allocList, &rf_pFuncs, &rf_qFuncs);
}
void rf_RegularQSubr(RF_DagNode_t *, char *);
void
rf_RegularQSubr(RF_DagNode_t *node, char *qbuf)
{
int np = node->numParams;
int d;
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
int i;
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
RF_Etimer_t timer;
char *obuf, *qpbuf;
RF_PhysDiskAddr_t *old;
unsigned long coeff;
RF_ETIMER_START(timer);
d = (np - 1) / 2;
RF_ASSERT(2 * d + 1 == np);
for (i = 0; i < d; i++) {
old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
obuf = (char *) node->params[2 * i + 1].p;
coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
old->raidAddress);
/*
* Compute the data unit offset within the column, then add
* one.
*/
coeff = (coeff % raidPtr->Layout.numDataCol);
/*
* The input buffers may not all be aligned with the start of
* the stripe. So shift by their sector offset within the
* stripe unit.
*/
qpbuf = qbuf + rf_RaidAddressToByte(raidPtr,
old->startSector % secPerSU);
rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf,
rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
}
RF_ETIMER_STOP(timer);
RF_ETIMER_EVAL(timer);
tracerec->q_us += RF_ETIMER_VAL_US(timer);
}
/*
* Used in degraded writes.
*/
void rf_DegrQSubr(RF_DagNode_t *);
void
rf_DegrQSubr(RF_DagNode_t *node)
{
int np = node->numParams;
int d;
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
RF_Etimer_t timer;
char *qbuf = node->results[1];
char *obuf, *qpbuf;
RF_PhysDiskAddr_t *old;
unsigned long coeff;
unsigned fail_start;
int i, j;
old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
fail_start = old->startSector % secPerSU;
RF_ETIMER_START(timer);
d = (np - 2) / 2;
RF_ASSERT(2 * d + 2 == np);
for (i = 0; i < d; i++) {
old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
obuf = (char *) node->params[2 * i + 1].p;
coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
old->raidAddress);
/*
* Compute the data unit offset within the column, then add
* one.
*/
coeff = (coeff % raidPtr->Layout.numDataCol);
/*
* The input buffers may not all be aligned with the start of
* the stripe. So shift by their sector offset within the
* stripe unit.
*/
j = old->startSector % secPerSU;
RF_ASSERT(j >= fail_start);
qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf,
rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
}
RF_ETIMER_STOP(timer);
RF_ETIMER_EVAL(timer);
tracerec->q_us += RF_ETIMER_VAL_US(timer);
}
/*
* Called by large write code to compute the new parity and the new q.
*
* Structure of the params:
*
* pda_0, buffer_0, pda_1 , buffer_1, ..., pda_d, buffer_d (d = numDataCol)
* raidPtr
*
* For a total of 2d+1 arguments.
* The result buffers results[0], results[1] are the buffers for the p and q,
* respectively.
*
* We compute Q first, then compute P. The P calculation may try to reuse
* one of the input buffers for its output, so if we computed P first, we would
* corrupt the input for the q calculation.
*/
int
rf_RegularPQFunc(RF_DagNode_t *node)
{
rf_RegularQSubr(node, node->results[1]);
return (rf_RegularXorFunc(node)); /* Does the wakeup. */
}
int
rf_RegularQFunc(RF_DagNode_t *node)
{
/* Almost ... adjust Qsubr args. */
rf_RegularQSubr(node, node->results[0]);
rf_GenericWakeupFunc(node, 0); /*
* Call wake func explicitly since no
* I/O in this node.
*/
return (0);
}
/*
* Called by singly degraded write code to compute the new parity and
* the new q.
*
* Structure of the params:
*
* pda_0, buffer_0, pda_1 , buffer_1, ..., pda_d, buffer_d
* failedPDA raidPtr
*
* for a total of 2d+2 arguments.
* The result buffers results[0], results[1] are the buffers for the parity
* and q, respectively.
*
* We compute Q first, then compute parity. The parity calculation may try
* to reuse one of the input buffers for its output, so if we computed parity
* first, we would corrupt the input for the q calculation.
*
* We treat this identically to the regularPQ case, ignoring the failedPDA
* extra argument.
*/
void
rf_Degraded_100_PQFunc(RF_DagNode_t *node)
{
int np = node->numParams;
RF_ASSERT(np >= 2);
rf_DegrQSubr(node);
rf_RecoveryXorFunc(node);
}
/*
* The two below are used when reading a stripe with a single lost data unit.
* The parameters are
*
* pda_0, buffer_0, ..., pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr
*
* and results[0] contains the data buffer, which is originally zero-filled.
*/
/*
* This Q func is used by the degraded-mode dag functions to recover lost data.
* The second-to-last parameter is the PDA for the failed portion of the
* access. The code here looks at this PDA and assumes that the xor target
* buffer is equal in size to the number of sectors in the failed PDA. It then
* uses the other PDAs in the parameter list to determine where within the
* target buffer the corresponding data should be xored.
*
* Recall the basic equation is
*
* Q = (data_1 + 2 * data_2 ... + k * data_k) mod 256
*
* so to recover data_j we need
*
* J data_j = (Q - data_1 - 2 data_2 ... - k * data_k) mod 256
*
* So the coefficient for each buffer is (255 - data_col), and j should be
* initialized by copying Q into it. Then we need to do a table lookup to
* convert to solve
* data_j /= J
*
*/
int
rf_RecoveryQFunc(RF_DagNode_t *node)
{
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
RF_PhysDiskAddr_t *failedPDA =
(RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
int i;
RF_PhysDiskAddr_t *pda;
RF_RaidAddr_t suoffset;
RF_RaidAddr_t failedSUOffset =
rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
char *srcbuf, *destbuf;
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
RF_Etimer_t timer;
unsigned long coeff;
RF_ETIMER_START(timer);
/* Start by copying Q into the buffer. */
bcopy(node->params[node->numParams - 3].p, node->results[0],
rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
for (i = 0; i < node->numParams - 4; i += 2) {
RF_ASSERT(node->params[i + 1].p != node->results[0]);
pda = (RF_PhysDiskAddr_t *) node->params[i].p;
srcbuf = (char *) node->params[i + 1].p;
suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
destbuf = ((char *) node->results[0]) +
rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
pda->raidAddress);
/* Compute the data unit offset within the column. */
coeff = (coeff % raidPtr->Layout.numDataCol);
rf_IncQ((unsigned long *) destbuf, (unsigned long *) srcbuf,
rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
}
/* Do the nasty inversion now. */
coeff = (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
failedPDA->startSector) % raidPtr->Layout.numDataCol);
rf_InvertQ(node->results[0], node->results[0],
rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
RF_ETIMER_STOP(timer);
RF_ETIMER_EVAL(timer);
tracerec->q_us += RF_ETIMER_VAL_US(timer);
rf_GenericWakeupFunc(node, 0);
return (0);
}
int
rf_RecoveryPQFunc(RF_DagNode_t *node)
{
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
printf("raid%d: Recovery from PQ not implemented.\n", raidPtr->raidid);
return (1);
}
/*
* Degraded write Q subroutine.
* Used when P is dead.
* Large-write style Q computation.
* Parameters:
*
* (pda, buf), (pda, buf), ..., (failedPDA, bufPtr), failedPDA, raidPtr.
*
* We ignore failedPDA.
*
* This is a "simple style" recovery func.
*/
void
rf_PQ_DegradedWriteQFunc(RF_DagNode_t *node)
{
int np = node->numParams;
int d;
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
RF_Etimer_t timer;
char *qbuf = node->results[0];
char *obuf, *qpbuf;
RF_PhysDiskAddr_t *old;
unsigned long coeff;
int fail_start, i, j;
old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
fail_start = old->startSector % secPerSU;
RF_ETIMER_START(timer);
d = (np - 2) / 2;
RF_ASSERT(2 * d + 2 == np);
for (i = 0; i < d; i++) {
old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
obuf = (char *) node->params[2 * i + 1].p;
coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
old->raidAddress);
/*
* Compute the data unit offset within the column, then add
* one.
*/
coeff = (coeff % raidPtr->Layout.numDataCol);
j = old->startSector % secPerSU;
RF_ASSERT(j >= fail_start);
qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf,
rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
}
RF_ETIMER_STOP(timer);
RF_ETIMER_EVAL(timer);
tracerec->q_us += RF_ETIMER_VAL_US(timer);
rf_GenericWakeupFunc(node, 0);
}
/* Q computations. */
/*
* Coeff - colummn;
*
* Compute dest ^= qfor[28-coeff][rn[coeff+1] a]
*
* On 5-bit basis;
* Length in bytes;
*/
void
rf_IncQ(unsigned long *dest, unsigned long *buf, unsigned length,
unsigned coeff)
{
unsigned long a, d, new;
unsigned long a1, a2;
unsigned int *q = &(rf_qfor[28 - coeff][0]);
unsigned r = rf_rn[coeff + 1];
#define EXTRACT(a,i) ((a >> (5L*i)) & 0x1f)
#define INSERT(a,i) (a << (5L*i))
length /= 8;
/* 13 5 bit quants in a 64 bit word. */
while (length) {
a = *buf++;
d = *dest;
a1 = EXTRACT(a, 0) ^ r;
a2 = EXTRACT(a, 1) ^ r;
new = INSERT(a2, 1) | a1;
a1 = EXTRACT(a, 2) ^ r;
a2 = EXTRACT(a, 3) ^ r;
a1 = q[a1];
a2 = q[a2];
new = new | INSERT(a1, 2) | INSERT(a2, 3);
a1 = EXTRACT(a, 4) ^ r;
a2 = EXTRACT(a, 5) ^ r;
a1 = q[a1];
a2 = q[a2];
new = new | INSERT(a1, 4) | INSERT(a2, 5);
a1 = EXTRACT(a, 5) ^ r;
a2 = EXTRACT(a, 6) ^ r;
a1 = q[a1];
a2 = q[a2];
new = new | INSERT(a1, 5) | INSERT(a2, 6);
#if RF_LONGSHIFT > 2
a1 = EXTRACT(a, 7) ^ r;
a2 = EXTRACT(a, 8) ^ r;
a1 = q[a1];
a2 = q[a2];
new = new | INSERT(a1, 7) | INSERT(a2, 8);
a1 = EXTRACT(a, 9) ^ r;
a2 = EXTRACT(a, 10) ^ r;
a1 = q[a1];
a2 = q[a2];
new = new | INSERT(a1, 9) | INSERT(a2, 10);
a1 = EXTRACT(a, 11) ^ r;
a2 = EXTRACT(a, 12) ^ r;
a1 = q[a1];
a2 = q[a2];
new = new | INSERT(a1, 11) | INSERT(a2, 12);
#endif /* RF_LONGSHIFT > 2 */
d ^= new;
*dest++ = d;
length--;
}
}
/*
* Compute.
*
* dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new)]
*
* On a five bit basis.
* Optimization: compute old ^ new on 64 bit basis.
*
* Length in bytes.
*/
void
rf_QDelta(char *dest, char *obuf, char *nbuf, unsigned length,
unsigned char coeff)
{
unsigned long a, d, new;
unsigned long a1, a2;
unsigned int *q = &(rf_qfor[28 - coeff][0]);
unsigned int r = rf_rn[coeff + 1];
r = a1 = a2 = new = d = a = 0; /* XXX For now... */
q = NULL; /* XXX For now */
#ifdef _KERNEL
/*
* PQ in kernel currently not supported because the encoding/decoding
* table is not present.
*/
bzero(dest, length);
#else /* _KERNEL */
/* This code probably doesn't work and should be rewritten. -wvcii */
/* 13 5 bit quants in a 64 bit word. */
length /= 8;
while (length) {
a = *obuf++; /*
* XXX Need to reorg to avoid cache conflicts.
*/
a ^= *nbuf++;
d = *dest;
a1 = EXTRACT(a, 0) ^ r;
a2 = EXTRACT(a, 1) ^ r;
a1 = q[a1];
a2 = q[a2];
new = INSERT(a2, 1) | a1;
a1 = EXTRACT(a, 2) ^ r;
a2 = EXTRACT(a, 3) ^ r;
a1 = q[a1];
a2 = q[a2];
new = new | INSERT(a1, 2) | INSERT(a2, 3);
a1 = EXTRACT(a, 4) ^ r;
a2 = EXTRACT(a, 5) ^ r;
a1 = q[a1];
a2 = q[a2];
new = new | INSERT(a1, 4) | INSERT(a2, 5);
a1 = EXTRACT(a, 5) ^ r;
a2 = EXTRACT(a, 6) ^ r;
a1 = q[a1];
a2 = q[a2];
new = new | INSERT(a1, 5) | INSERT(a2, 6);
#if RF_LONGSHIFT > 2
a1 = EXTRACT(a, 7) ^ r;
a2 = EXTRACT(a, 8) ^ r;
a1 = q[a1];
a2 = q[a2];
new = new | INSERT(a1, 7) | INSERT(a2, 8);
a1 = EXTRACT(a, 9) ^ r;
a2 = EXTRACT(a, 10) ^ r;
a1 = q[a1];
a2 = q[a2];
new = new | INSERT(a1, 9) | INSERT(a2, 10);
a1 = EXTRACT(a, 11) ^ r;
a2 = EXTRACT(a, 12) ^ r;
a1 = q[a1];
a2 = q[a2];
new = new | INSERT(a1, 11) | INSERT(a2, 12);
#endif /* RF_LONGSHIFT > 2 */
d ^= new;
*dest++ = d;
length--;
}
#endif /* _KERNEL */
}
/*
* Recover columns a and b from the given p and q into
* bufs abuf and bbuf. All bufs are word aligned.
* Length is in bytes.
*/
/*
* XXX
*
* Everything about this seems wrong.
*/
void
rf_PQ_recover(unsigned long *pbuf, unsigned long *qbuf, unsigned long *abuf,
unsigned long *bbuf, unsigned length, unsigned coeff_a, unsigned coeff_b)
{
unsigned long p, q, a, a0, a1;
int col = (29 * coeff_a) + coeff_b;
unsigned char *q0 = &(rf_qinv[col][0]);
length /= 8;
while (length) {
p = *pbuf++;
q = *qbuf++;
a0 = EXTRACT(p, 0);
a1 = EXTRACT(q, 0);
a = q0[a0 << 5 | a1];
#define MF(i) \
do { \
a0 = EXTRACT(p, i); \
a1 = EXTRACT(q, i); \
a = a | INSERT(q0[a0<<5 | a1], i); \
} while (0)
MF(1);
MF(2);
MF(3);
MF(4);
MF(5);
MF(6);
#if 0
MF(7);
MF(8);
MF(9);
MF(10);
MF(11);
MF(12);
#endif /* 0 */
*abuf++ = a;
*bbuf++ = a ^ p;
length--;
}
}
/*
* Lost parity and a data column. Recover that data column.
* Assume col coeff is lost. Let q the contents of Q after
* all surviving data columns have been q-xored out of it.
* Then we have the equation
*
* q[28-coeff][a_i ^ r_i+1] = q
*
* but q is cyclic with period 31.
* So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] =
* q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} .
*
* so a_i = r_{coeff+1} ^ q[3+coeff][q]
*
* The routine is passed q buffer and the buffer
* the data is to be recoverd into. They can be the same.
*/
void
rf_InvertQ(unsigned long *qbuf, unsigned long *abuf, unsigned length,
unsigned coeff)
{
unsigned long a, new;
unsigned long a1, a2;
unsigned int *q = &(rf_qfor[3 + coeff][0]);
unsigned r = rf_rn[coeff + 1];
/* 13 5 bit quants in a 64 bit word. */
length /= 8;
while (length) {
a = *qbuf++;
a1 = EXTRACT(a, 0);
a2 = EXTRACT(a, 1);
a1 = r ^ q[a1];
a2 = r ^ q[a2];
new = INSERT(a2, 1) | a1;
#define M(i,j) \
do { \
a1 = EXTRACT(a, i); \
a2 = EXTRACT(a, j); \
a1 = r ^ q[a1]; \
a2 = r ^ q[a2]; \
new = new | INSERT(a1, i) | INSERT(a2, j); \
} while (0)
M(2, 3);
M(4, 5);
M(5, 6);
#if RF_LONGSHIFT > 2
M(7, 8);
M(9, 10);
M(11, 12);
#endif /* RF_LONGSHIFT > 2 */
*abuf++ = new;
length--;
}
}
#endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) */