File: [local] / sys / dev / raidframe / rf_pqdegdags.c (download)
Revision 1.1.1.1 (vendor branch), Tue Mar 4 16:09:50 2008 UTC (16 years, 4 months ago) by nbrk
Branch: OPENBSD_4_2_BASE, MAIN
CVS Tags: jornada-partial-support-wip, HEAD Changes since 1.1: +0 -0 lines
Import of OpenBSD 4.2 release kernel tree with initial code to support
Jornada 720/728, StrongARM 1110-based handheld PC.
At this point kernel roots on NFS and boots into vfs_mountroot() and traps.
What is supported:
- glass console, Jornada framebuffer (jfb) works in 16bpp direct color mode
(needs some palette tweaks for non black/white/blue colors, i think)
- saic, SA11x0 interrupt controller (needs cleanup)
- sacom, SA11x0 UART (supported only as boot console for now)
- SA11x0 GPIO controller fully supported (but can't handle multiple interrupt
handlers on one gpio pin)
- sassp, SSP port on SA11x0 that attaches spibus
- Jornada microcontroller (jmcu) to control kbd, battery, etc throught
the SPI bus (wskbd attaches on jmcu, but not tested)
- tod functions seem work
- initial code for SA-1111 (chip companion) : this is TODO
Next important steps, i think:
- gpio and intc on sa1111
- pcmcia support for sa11x0 (and sa1111 help logic)
- REAL root on nfs when we have PCMCIA support (we may use any of supported pccard NICs)
- root on wd0! (using already supported PCMCIA-ATA)
|
/* $OpenBSD: rf_pqdegdags.c,v 1.5 2002/12/16 07:01:04 tdeval Exp $ */
/* $NetBSD: rf_pqdegdags.c,v 1.5 1999/08/15 02:36:40 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Daniel Stodolsky
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* rf_pqdegdags.c
* Degraded mode dags for double fault cases.
*/
#include "rf_archs.h"
#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
#include "rf_types.h"
#include "rf_raid.h"
#include "rf_dag.h"
#include "rf_dagdegrd.h"
#include "rf_dagdegwr.h"
#include "rf_dagfuncs.h"
#include "rf_dagutils.h"
#include "rf_etimer.h"
#include "rf_acctrace.h"
#include "rf_general.h"
#include "rf_pqdegdags.h"
#include "rf_pq.h"
void rf_applyPDA(RF_Raid_t *, RF_PhysDiskAddr_t *, RF_PhysDiskAddr_t *,
RF_PhysDiskAddr_t *, void *);
/*
* Two data drives have failed, and we are doing a read that covers one of them.
* We may also be reading some of the surviving drives.
*/
/*****************************************************************************
*
* Creates a DAG to perform a degraded-mode read of data within one stripe.
* This DAG is as follows:
*
* Hdr
* |
* Block
* / / \ \ \ \
* Rud ... Rud Rrd ... Rrd Rp Rq
* | \ | \ | \ | \ | \ | \
*
* | |
* Unblock X
* \ /
* ------ T ------
*
* Each R node is a successor of the L node.
* One successor arc from each R node goes to U, and the other to X.
* There is one Rud for each chunk of surviving user data requested by the
* user, and one Rrd for each chunk of surviving user data _not_ being read
* by the user.
* R = read, ud = user data, rd = recovery (surviving) data, p = P data,
* q = Qdata, X = pq recovery node, T = terminate
*
* The block & unblock nodes are leftovers from a previous version. They
* do nothing, but I haven't deleted them because it would be a tremendous
* effort to put them back in.
*
* Note: The target buffer for the XOR node is set to the actual user buffer
* where the failed data is supposed to end up. This buffer is zero'd by the
* code here. Thus, if you create a degraded read dag, use it, and then
* re-use. You have to be sure to zero the target buffer prior to the re-use.
*
* Every buffer read is passed to the pq recovery node, whose job it is to
* sort out what's needed and what's not.
*****************************************************************************/
/* Init a disk node with 2 successors and one predecessor. */
#define INIT_DISK_NODE(node,name) \
do { \
rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, \
rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 2, 1, 4, 0, \
dag_h, name, allocList); \
(node)->succedents[0] = unblockNode; \
(node)->succedents[1] = recoveryNode; \
(node)->antecedents[0] = blockNode; \
(node)->antType[0] = rf_control; \
} while (0)
#define DISK_NODE_PARAMS(_node_,_p_) \
do { \
(_node_).params[0].p = _p_ ; \
(_node_).params[1].p = (_p_)->bufPtr; \
(_node_).params[2].v = parityStripeID; \
(_node_).params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, \
0, 0, which_ru); \
} while (0)
#define DISK_NODE_PDA(node) ((node)->params[0].p)
RF_CREATE_DAG_FUNC_DECL(rf_PQ_DoubleDegRead)
{
rf_DoubleDegRead(raidPtr, asmap, dag_h, bp, flags, allocList,
"Rq", "PQ Recovery", rf_PQDoubleRecoveryFunc);
}
void
rf_applyPDA(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda,
RF_PhysDiskAddr_t *ppda, RF_PhysDiskAddr_t *qpda, void *bp)
{
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
RF_RaidAddr_t s0off = rf_StripeUnitOffset(layoutPtr, ppda->startSector);
RF_SectorCount_t s0len = ppda->numSector, len;
RF_SectorNum_t suoffset;
unsigned coeff;
char *pbuf = ppda->bufPtr;
char *qbuf = qpda->bufPtr;
char *buf;
int delta;
suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
len = pda->numSector;
/* See if pda intersects a recovery pda. */
if ((suoffset < s0off + s0len) && (suoffset + len > s0off)) {
buf = pda->bufPtr;
coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
pda->raidAddress);
coeff = (coeff % raidPtr->Layout.numDataCol);
if (suoffset < s0off) {
delta = s0off - suoffset;
buf += rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
delta);
suoffset = s0off;
len -= delta;
}
if (suoffset > s0off) {
delta = suoffset - s0off;
pbuf += rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
delta);
qbuf += rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
delta);
}
if ((suoffset + len) > (s0len + s0off))
len = s0len + s0off - suoffset;
/* Src, dest, len. */
rf_bxor(buf, pbuf, rf_RaidAddressToByte(raidPtr, len), bp);
/* Dest, src, len, coeff. */
rf_IncQ((unsigned long *) qbuf, (unsigned long *) buf,
rf_RaidAddressToByte(raidPtr, len), coeff);
}
}
/*
* Recover data in the case of a double failure. There can be two
* result buffers, one for each chunk of data trying to be recovered.
* The params are pda's that have not been range restricted or otherwise
* politely massaged - this should be done here. The last params are the
* pdas of P and Q, followed by the raidPtr. The list can look like
*
* pda, pda, ..., p pda, q pda, raidptr, asm
*
* or
*
* pda, pda, ..., p_1 pda, p_2 pda, q_1 pda, q_2 pda, raidptr, asm
*
* depending on whether two chunks of recovery data were required.
*
* The second condition only arises if there are two failed buffers
* whose lengths do not add up a stripe unit.
*/
int
rf_PQDoubleRecoveryFunc(RF_DagNode_t *node)
{
int np = node->numParams;
RF_AccessStripeMap_t *asmap =
(RF_AccessStripeMap_t *) node->params[np - 1].p;
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
int d, i;
unsigned coeff;
RF_RaidAddr_t sosAddr, suoffset;
RF_SectorCount_t len, secPerSU = layoutPtr->sectorsPerStripeUnit;
int two = 0;
RF_PhysDiskAddr_t *ppda, *ppda2, *qpda, *qpda2, *pda, npda;
char *buf;
int numDataCol = layoutPtr->numDataCol;
RF_Etimer_t timer;
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
RF_ETIMER_START(timer);
if (asmap->failedPDAs[1] &&
(asmap->failedPDAs[1]->numSector +
asmap->failedPDAs[0]->numSector < secPerSU)) {
RF_ASSERT(0);
ppda = node->params[np - 6].p;
ppda2 = node->params[np - 5].p;
qpda = node->params[np - 4].p;
qpda2 = node->params[np - 3].p;
d = (np - 6);
two = 1;
} else {
ppda = node->params[np - 4].p;
qpda = node->params[np - 3].p;
d = (np - 4);
}
for (i = 0; i < d; i++) {
pda = node->params[i].p;
buf = pda->bufPtr;
suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
len = pda->numSector;
coeff = rf_RaidAddressToStripeUnitID(layoutPtr,
pda->raidAddress);
/* Compute the data unit offset within the column. */
coeff = (coeff % raidPtr->Layout.numDataCol);
/* See if pda intersects a recovery pda. */
rf_applyPDA(raidPtr, pda, ppda, qpda, node->dagHdr->bp);
if (two)
rf_applyPDA(raidPtr, pda, ppda, qpda, node->dagHdr->bp);
}
/*
* Ok, we got the parity back to the point where we can recover. We
* now need to determine the coeff of the columns that need to be
* recovered. We can also only need to recover a single stripe unit.
*/
if (asmap->failedPDAs[1] == NULL) { /*
* Only a single stripe unit
* to recover.
*/
pda = asmap->failedPDAs[0];
sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr,
asmap->raidAddress);
/* Need to determine the column of the other failed disk. */
coeff = rf_RaidAddressToStripeUnitID(layoutPtr,
pda->raidAddress);
/* Compute the data unit offset within the column. */
coeff = (coeff % raidPtr->Layout.numDataCol);
for (i = 0; i < numDataCol; i++) {
npda.raidAddress = sosAddr + (i * secPerSU);
(raidPtr->Layout.map->MapSector) (raidPtr,
npda.raidAddress, &(npda.row), &(npda.col),
&(npda.startSector), 0);
/* Skip over dead disks. */
if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col]
.status))
if (i != coeff)
break;
}
RF_ASSERT(i < numDataCol);
RF_ASSERT(two == 0);
/*
* Recover the data. Since we need only to recover one
* column, we overwrite the parity with the other one.
*/
if (coeff < i) /* Recovering 'a'. */
rf_PQ_recover((unsigned long *) ppda->bufPtr,
(unsigned long *) qpda->bufPtr,
(unsigned long *) pda->bufPtr,
(unsigned long *) ppda->bufPtr,
rf_RaidAddressToByte(raidPtr, pda->numSector),
coeff, i);
else /* Recovering 'b'. */
rf_PQ_recover((unsigned long *) ppda->bufPtr,
(unsigned long *) qpda->bufPtr,
(unsigned long *) ppda->bufPtr,
(unsigned long *) pda->bufPtr,
rf_RaidAddressToByte(raidPtr, pda->numSector),
i, coeff);
} else
RF_PANIC();
RF_ETIMER_STOP(timer);
RF_ETIMER_EVAL(timer);
if (tracerec)
tracerec->q_us += RF_ETIMER_VAL_US(timer);
rf_GenericWakeupFunc(node, 0);
return (0);
}
int
rf_PQWriteDoubleRecoveryFunc(RF_DagNode_t *node)
{
/*
* The situation:
*
* We are doing a write that hits only one failed data unit. The other
* failed data unit is not being overwritten, so we need to generate
* it.
*
* For the moment, we assume all the nonfailed data being written is in
* the shadow of the failed data unit. (i.e., either a single data
* unit write or the entire failed stripe unit is being overwritten.)
*
* Recovery strategy: apply the recovery data to the parity and Q.
* Use P & Q to recover the second failed data unit in P. Zero fill
* Q, then apply the recovered data to P. Then apply the data being
* written to the failed drive. Then walk through the surviving drives,
* applying new data when it exists, othewise the recovery data.
* Quite a mess.
*
*
* The params:
*
* read pda0, read pda1, ..., read pda (numDataCol-3),
* write pda0, ..., write pda (numStripeUnitAccess - numDataFailed),
* failed pda, raidPtr, asmap
*/
int np = node->numParams;
RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *)
node->params[np - 1].p;
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
int i;
RF_RaidAddr_t sosAddr;
unsigned coeff;
RF_StripeCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
RF_PhysDiskAddr_t *ppda, *qpda, *pda, npda;
int numDataCol = layoutPtr->numDataCol;
RF_Etimer_t timer;
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
RF_ASSERT(node->numResults == 2);
RF_ASSERT(asmap->failedPDAs[1] == NULL);
RF_ETIMER_START(timer);
ppda = node->results[0];
qpda = node->results[1];
/* apply the recovery data */
for (i = 0; i < numDataCol - 2; i++)
rf_applyPDA(raidPtr, node->params[i].p, ppda, qpda,
node->dagHdr->bp);
/* Determine the other failed data unit. */
pda = asmap->failedPDAs[0];
sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr,
asmap->raidAddress);
/* Need to determine the column of the other failed disk. */
coeff = rf_RaidAddressToStripeUnitID(layoutPtr, pda->raidAddress);
/* Compute the data unit offset within the column. */
coeff = (coeff % raidPtr->Layout.numDataCol);
for (i = 0; i < numDataCol; i++) {
npda.raidAddress = sosAddr + (i * secPerSU);
(raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress,
&(npda.row), &(npda.col), &(npda.startSector), 0);
/* Skip over dead disks. */
if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
if (i != coeff)
break;
}
RF_ASSERT(i < numDataCol);
/*
* Recover the data. The column we want to recover, we write over the
* parity. The column we don't care about, we dump in q.
*/
if (coeff < i) /* Recovering 'a'. */
rf_PQ_recover((unsigned long *) ppda->bufPtr,
(unsigned long *) qpda->bufPtr,
(unsigned long *) ppda->bufPtr,
(unsigned long *) qpda->bufPtr,
rf_RaidAddressToByte(raidPtr, pda->numSector), coeff, i);
else /* Recovering 'b'. */
rf_PQ_recover((unsigned long *) ppda->bufPtr,
(unsigned long *) qpda->bufPtr,
(unsigned long *) qpda->bufPtr,
(unsigned long *) ppda->bufPtr,
rf_RaidAddressToByte(raidPtr, pda->numSector), i, coeff);
/* OK. The valid data is in P. Zero fill Q, then inc it into it. */
bzero(qpda->bufPtr, rf_RaidAddressToByte(raidPtr, qpda->numSector));
rf_IncQ((unsigned long *) qpda->bufPtr, (unsigned long *) ppda->bufPtr,
rf_RaidAddressToByte(raidPtr, qpda->numSector), i);
/* Now apply all the write data to the buffer. */
/*
* Single stripe unit write case: The failed data is the only thing
* we are writing.
*/
RF_ASSERT(asmap->numStripeUnitsAccessed == 1);
/* Dest, src, len, coeff. */
rf_IncQ((unsigned long *) qpda->bufPtr,
(unsigned long *) asmap->failedPDAs[0]->bufPtr,
rf_RaidAddressToByte(raidPtr, qpda->numSector), coeff);
rf_bxor(asmap->failedPDAs[0]->bufPtr, ppda->bufPtr,
rf_RaidAddressToByte(raidPtr, ppda->numSector), node->dagHdr->bp);
/* Now apply all the recovery data. */
for (i = 0; i < numDataCol - 2; i++)
rf_applyPDA(raidPtr, node->params[i].p, ppda, qpda,
node->dagHdr->bp);
RF_ETIMER_STOP(timer);
RF_ETIMER_EVAL(timer);
if (tracerec)
tracerec->q_us += RF_ETIMER_VAL_US(timer);
rf_GenericWakeupFunc(node, 0);
return (0);
}
RF_CREATE_DAG_FUNC_DECL(rf_PQ_DDLargeWrite)
{
RF_PANIC();
}
/*
* Two lost data unit write case.
*
* There are really two cases here:
*
* (1) The write completely covers the two lost data units.
* In that case, a reconstruct write that doesn't write the
* failed data units will do the correct thing. So in this case,
* the dag looks like
*
* Full stripe read of surviving data units (not being overwritten)
* Write new data (ignoring failed units)
* Compute P&Q
* Write P&Q
*
*
* (2) The write does not completely cover both failed data units
* (but touches at least one of them). Then we need to do the
* equivalent of a reconstruct read to recover the missing data
* unit from the other stripe.
*
* For any data we are writing that is not in the "shadow"
* of the failed units, we need to do a four cycle update.
* PANIC on this case. For now.
*
*/
RF_CREATE_DAG_FUNC_DECL(rf_PQ_200_CreateWriteDAG)
{
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
RF_SectorCount_t sectorsPerSU = layoutPtr->sectorsPerStripeUnit;
int sum;
int nf = asmap->numDataFailed;
sum = asmap->failedPDAs[0]->numSector;
if (nf == 2)
sum += asmap->failedPDAs[1]->numSector;
if ((nf == 2) && (sum == (2 * sectorsPerSU))) {
/* Large write case. */
rf_PQ_DDLargeWrite(raidPtr, asmap, dag_h, bp, flags, allocList);
return;
}
if ((nf == asmap->numStripeUnitsAccessed) || (sum >= sectorsPerSU)) {
/* Small write case, no user data not in shadow. */
rf_PQ_DDSimpleSmallWrite(raidPtr, asmap, dag_h, bp, flags,
allocList);
return;
}
RF_PANIC();
}
RF_CREATE_DAG_FUNC_DECL(rf_PQ_DDSimpleSmallWrite)
{
rf_DoubleDegSmallWrite(raidPtr, asmap, dag_h, bp, flags, allocList,
"Rq", "Wq", "PQ Recovery", rf_PQWriteDoubleRecoveryFunc);
}
#endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) */