Annotation of sys/dev/raidframe/rf_decluster.c, Revision 1.1
1.1 ! nbrk 1: /* $OpenBSD: rf_decluster.c,v 1.5 2002/12/16 07:01:03 tdeval Exp $ */
! 2: /* $NetBSD: rf_decluster.c,v 1.5 2000/03/07 01:54:29 oster Exp $ */
! 3:
! 4: /*
! 5: * Copyright (c) 1995 Carnegie-Mellon University.
! 6: * All rights reserved.
! 7: *
! 8: * Author: Mark Holland
! 9: *
! 10: * Permission to use, copy, modify and distribute this software and
! 11: * its documentation is hereby granted, provided that both the copyright
! 12: * notice and this permission notice appear in all copies of the
! 13: * software, derivative works or modified versions, and any portions
! 14: * thereof, and that both notices appear in supporting documentation.
! 15: *
! 16: * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
! 17: * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
! 18: * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
! 19: *
! 20: * Carnegie Mellon requests users of this software to return to
! 21: *
! 22: * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
! 23: * School of Computer Science
! 24: * Carnegie Mellon University
! 25: * Pittsburgh PA 15213-3890
! 26: *
! 27: * any improvements or extensions that they make and grant Carnegie the
! 28: * rights to redistribute these changes.
! 29: */
! 30:
! 31: /*****************************************************************************
! 32: *
! 33: * rf_decluster.c -- Code related to the declustered layout.
! 34: *
! 35: * Created 10-21-92 (MCH)
! 36: *
! 37: * Nov 93: Adding support for distributed sparing. This code is a little
! 38: * complex; the basic layout used is as follows:
! 39: * Let F = (v-1)/GCD(r,v-1). The spare space for each set of
! 40: * F consecutive fulltables is grouped together and placed after
! 41: * that set of tables.
! 42: * +-------------------------------+
! 43: * | F fulltables |
! 44: * | Spare Space |
! 45: * | F fulltables |
! 46: * | Spare Space |
! 47: * | ... |
! 48: * +-------------------------------+
! 49: *
! 50: *****************************************************************************/
! 51:
! 52: #include "rf_types.h"
! 53: #include "rf_raid.h"
! 54: #include "rf_raidframe.h"
! 55: #include "rf_configure.h"
! 56: #include "rf_decluster.h"
! 57: #include "rf_debugMem.h"
! 58: #include "rf_utils.h"
! 59: #include "rf_alloclist.h"
! 60: #include "rf_general.h"
! 61: #include "rf_shutdown.h"
! 62:
! 63: extern int rf_copyback_in_progress; /* Debug only. */
! 64:
! 65: /* Found in rf_kintf.c */
! 66: int rf_GetSpareTableFromDaemon(RF_SparetWait_t *);
! 67:
! 68: /* Configuration code. */
! 69:
! 70: int
! 71: rf_ConfigureDeclustered(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
! 72: RF_Config_t *cfgPtr)
! 73: {
! 74: RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
! 75: int b, v, k, r, lambda; /* block design params */
! 76: int i, j;
! 77: RF_RowCol_t *first_avail_slot;
! 78: RF_StripeCount_t complete_FT_count, numCompleteFullTablesPerDisk;
! 79: RF_DeclusteredConfigInfo_t *info;
! 80: RF_StripeCount_t PUsPerDisk, spareRegionDepthInPUs,
! 81: numCompleteSpareRegionsPerDisk, extraPUsPerDisk;
! 82: RF_StripeCount_t totSparePUsPerDisk;
! 83: RF_SectorNum_t diskOffsetOfLastFullTableInSUs;
! 84: RF_SectorCount_t SpareSpaceInSUs;
! 85: char *cfgBuf = (char *) (cfgPtr->layoutSpecific);
! 86: RF_StripeNum_t l, SUID;
! 87:
! 88: SUID = l = 0;
! 89: numCompleteSpareRegionsPerDisk = 0;
! 90:
! 91: /* 1. Create layout specific structure. */
! 92: RF_MallocAndAdd(info, sizeof(RF_DeclusteredConfigInfo_t),
! 93: (RF_DeclusteredConfigInfo_t *), raidPtr->cleanupList);
! 94: if (info == NULL)
! 95: return (ENOMEM);
! 96: layoutPtr->layoutSpecificInfo = (void *) info;
! 97: info->SpareTable = NULL;
! 98:
! 99: /* 2. Extract parameters from the config structure. */
! 100: if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) {
! 101: bcopy(cfgBuf, info->sparemap_fname, RF_SPAREMAP_NAME_LEN);
! 102: }
! 103: cfgBuf += RF_SPAREMAP_NAME_LEN;
! 104:
! 105: b = *((int *) cfgBuf);
! 106: cfgBuf += sizeof(int);
! 107: v = *((int *) cfgBuf);
! 108: cfgBuf += sizeof(int);
! 109: k = *((int *) cfgBuf);
! 110: cfgBuf += sizeof(int);
! 111: r = *((int *) cfgBuf);
! 112: cfgBuf += sizeof(int);
! 113: lambda = *((int *) cfgBuf);
! 114: cfgBuf += sizeof(int);
! 115: raidPtr->noRotate = *((int *) cfgBuf);
! 116: cfgBuf += sizeof(int);
! 117:
! 118: /*
! 119: * The sparemaps are generated assuming that parity is rotated, so we
! 120: * issue a warning if both distributed sparing and no-rotate are on at
! 121: * the same time.
! 122: */
! 123: if ((layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) &&
! 124: raidPtr->noRotate) {
! 125: RF_ERRORMSG("Warning: distributed sparing specified without"
! 126: " parity rotation.\n");
! 127: }
! 128: if (raidPtr->numCol != v) {
! 129: RF_ERRORMSG2("RAID: config error: table element count (%d)"
! 130: " not equal to no. of cols (%d).\n", v, raidPtr->numCol);
! 131: return (EINVAL);
! 132: }
! 133: /* 3. Set up the values used in the mapping code. */
! 134: info->BlocksPerTable = b;
! 135: info->Lambda = lambda;
! 136: info->NumParityReps = info->groupSize = k;
! 137: /* b blks, k-1 SUs each. */
! 138: info->SUsPerTable = b * (k - 1) * layoutPtr->SUsPerPU;
! 139: info->SUsPerFullTable = k * info->SUsPerTable; /* rot k times */
! 140: info->PUsPerBlock = k - 1;
! 141: info->SUsPerBlock = info->PUsPerBlock * layoutPtr->SUsPerPU;
! 142: info->TableDepthInPUs = (b * k) / v;
! 143: /* k repetitions. */
! 144: info->FullTableDepthInPUs = info->TableDepthInPUs * k;
! 145:
! 146: /* Used only in distributed sparing case. */
! 147: /* (v-1)/gcd fulltables. */
! 148: info->FullTablesPerSpareRegion = (v - 1) / rf_gcd(r, v - 1);
! 149: info->TablesPerSpareRegion = k * info->FullTablesPerSpareRegion;
! 150: info->SpareSpaceDepthPerRegionInSUs = (r * info->TablesPerSpareRegion /
! 151: (v - 1)) * layoutPtr->SUsPerPU;
! 152:
! 153: /* Check to make sure the block design is sufficiently small. */
! 154: if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
! 155: if (info->FullTableDepthInPUs * layoutPtr->SUsPerPU +
! 156: info->SpareSpaceDepthPerRegionInSUs >
! 157: layoutPtr->stripeUnitsPerDisk) {
! 158: RF_ERRORMSG3("RAID: config error: Full Table depth"
! 159: " (%d) + Spare Space (%d) larger than disk size"
! 160: " (%d) (BD too big).\n",
! 161: (int) info->FullTableDepthInPUs,
! 162: (int) info->SpareSpaceDepthPerRegionInSUs,
! 163: (int) layoutPtr->stripeUnitsPerDisk);
! 164: return (EINVAL);
! 165: }
! 166: } else {
! 167: if (info->TableDepthInPUs * layoutPtr->SUsPerPU >
! 168: layoutPtr->stripeUnitsPerDisk) {
! 169: RF_ERRORMSG2("RAID: config error: Table depth (%d)"
! 170: " larger than disk size (%d) (BD too big).\n",
! 171: (int) (info->TableDepthInPUs * layoutPtr->SUsPerPU),
! 172: (int) layoutPtr->stripeUnitsPerDisk);
! 173: return (EINVAL);
! 174: }
! 175: }
! 176:
! 177:
! 178: /*
! 179: * Compute the size of each disk, and the number of tables in the last
! 180: * fulltable (which need not be complete).
! 181: */
! 182: if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
! 183:
! 184: PUsPerDisk = layoutPtr->stripeUnitsPerDisk /
! 185: layoutPtr->SUsPerPU;
! 186: spareRegionDepthInPUs =
! 187: (info->TablesPerSpareRegion * info->TableDepthInPUs +
! 188: (info->TablesPerSpareRegion * info->TableDepthInPUs) /
! 189: (v - 1));
! 190: info->SpareRegionDepthInSUs =
! 191: spareRegionDepthInPUs * layoutPtr->SUsPerPU;
! 192:
! 193: numCompleteSpareRegionsPerDisk =
! 194: PUsPerDisk / spareRegionDepthInPUs;
! 195: info->NumCompleteSRs = numCompleteSpareRegionsPerDisk;
! 196: extraPUsPerDisk = PUsPerDisk % spareRegionDepthInPUs;
! 197:
! 198: /*
! 199: * Assume conservatively that we need the full amount of spare
! 200: * space in one region in order to provide spares for the
! 201: * partial spare region at the end of the array. We set "i"
! 202: * to the number of tables in the partial spare region. This
! 203: * may actually include some fulltables.
! 204: */
! 205: extraPUsPerDisk -= (info->SpareSpaceDepthPerRegionInSUs /
! 206: layoutPtr->SUsPerPU);
! 207: if (extraPUsPerDisk <= 0)
! 208: i = 0;
! 209: else
! 210: i = extraPUsPerDisk / info->TableDepthInPUs;
! 211:
! 212: complete_FT_count = raidPtr->numRow *
! 213: (numCompleteSpareRegionsPerDisk *
! 214: (info->TablesPerSpareRegion / k) + i / k);
! 215: info->FullTableLimitSUID =
! 216: complete_FT_count * info->SUsPerFullTable;
! 217: info->ExtraTablesPerDisk = i % k;
! 218:
! 219: /*
! 220: * Note that in the last spare region, the spare space is
! 221: * complete even though data/parity space is not.
! 222: */
! 223: totSparePUsPerDisk = (numCompleteSpareRegionsPerDisk + 1) *
! 224: (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU);
! 225: info->TotSparePUsPerDisk = totSparePUsPerDisk;
! 226:
! 227: layoutPtr->stripeUnitsPerDisk =
! 228: ((complete_FT_count / raidPtr->numRow) *
! 229: info->FullTableDepthInPUs + /* data & parity space */
! 230: info->ExtraTablesPerDisk * info->TableDepthInPUs +
! 231: totSparePUsPerDisk /* spare space */
! 232: ) * layoutPtr->SUsPerPU;
! 233: layoutPtr->dataStripeUnitsPerDisk =
! 234: (complete_FT_count * info->FullTableDepthInPUs +
! 235: info->ExtraTablesPerDisk * info->TableDepthInPUs) *
! 236: layoutPtr->SUsPerPU * (k - 1) / k;
! 237:
! 238: } else {
! 239: /*
! 240: * Non-dist spare case: force each disk to contain an
! 241: * integral number of tables.
! 242: */
! 243: layoutPtr->stripeUnitsPerDisk /=
! 244: (info->TableDepthInPUs * layoutPtr->SUsPerPU);
! 245: layoutPtr->stripeUnitsPerDisk *=
! 246: (info->TableDepthInPUs * layoutPtr->SUsPerPU);
! 247:
! 248: /*
! 249: * Compute the number of tables in the last fulltable, which
! 250: * need not be complete.
! 251: */
! 252: complete_FT_count =
! 253: ((layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU) /
! 254: info->FullTableDepthInPUs) * raidPtr->numRow;
! 255:
! 256: info->FullTableLimitSUID =
! 257: complete_FT_count * info->SUsPerFullTable;
! 258: info->ExtraTablesPerDisk =
! 259: ((layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU) /
! 260: info->TableDepthInPUs) % k;
! 261: }
! 262:
! 263: raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk *
! 264: layoutPtr->sectorsPerStripeUnit;
! 265:
! 266: /*
! 267: * Find the disk offset of the stripe unit where the last fulltable
! 268: * starts.
! 269: */
! 270: numCompleteFullTablesPerDisk = complete_FT_count / raidPtr->numRow;
! 271: diskOffsetOfLastFullTableInSUs = numCompleteFullTablesPerDisk *
! 272: info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
! 273: if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
! 274: SpareSpaceInSUs = numCompleteSpareRegionsPerDisk *
! 275: info->SpareSpaceDepthPerRegionInSUs;
! 276: diskOffsetOfLastFullTableInSUs += SpareSpaceInSUs;
! 277: info->DiskOffsetOfLastSpareSpaceChunkInSUs =
! 278: diskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk *
! 279: info->TableDepthInPUs * layoutPtr->SUsPerPU;
! 280: }
! 281: info->DiskOffsetOfLastFullTableInSUs = diskOffsetOfLastFullTableInSUs;
! 282: info->numCompleteFullTablesPerDisk = numCompleteFullTablesPerDisk;
! 283:
! 284: /* 4. Create and initialize the lookup tables. */
! 285: info->LayoutTable = rf_make_2d_array(b, k, raidPtr->cleanupList);
! 286: if (info->LayoutTable == NULL)
! 287: return (ENOMEM);
! 288: info->OffsetTable = rf_make_2d_array(b, k, raidPtr->cleanupList);
! 289: if (info->OffsetTable == NULL)
! 290: return (ENOMEM);
! 291: info->BlockTable = rf_make_2d_array(info->TableDepthInPUs *
! 292: layoutPtr->SUsPerPU, raidPtr->numCol, raidPtr->cleanupList);
! 293: if (info->BlockTable == NULL)
! 294: return (ENOMEM);
! 295:
! 296: first_avail_slot = rf_make_1d_array(v, NULL);
! 297: if (first_avail_slot == NULL)
! 298: return (ENOMEM);
! 299:
! 300: for (i = 0; i < b; i++)
! 301: for (j = 0; j < k; j++)
! 302: info->LayoutTable[i][j] = *cfgBuf++;
! 303:
! 304: /* Initialize the offset table. */
! 305: for (i = 0; i < b; i++)
! 306: for (j = 0; j < k; j++) {
! 307: info->OffsetTable[i][j] =
! 308: first_avail_slot[info->LayoutTable[i][j]];
! 309: first_avail_slot[info->LayoutTable[i][j]]++;
! 310: }
! 311:
! 312: /* Initialize the block table. */
! 313: for (SUID = l = 0; l < layoutPtr->SUsPerPU; l++) {
! 314: for (i = 0; i < b; i++) {
! 315: for (j = 0; j < k; j++) {
! 316: info->BlockTable[(info->OffsetTable[i][j] *
! 317: layoutPtr->SUsPerPU) + l]
! 318: [info->LayoutTable[i][j]] = SUID;
! 319: }
! 320: SUID++;
! 321: }
! 322: }
! 323:
! 324: rf_free_1d_array(first_avail_slot, v);
! 325:
! 326: /* 5. Set up the remaining redundant-but-useful parameters. */
! 327:
! 328: raidPtr->totalSectors = (k * complete_FT_count + raidPtr->numRow *
! 329: info->ExtraTablesPerDisk) * info->SUsPerTable *
! 330: layoutPtr->sectorsPerStripeUnit;
! 331: layoutPtr->numStripe = (raidPtr->totalSectors /
! 332: layoutPtr->sectorsPerStripeUnit) / (k - 1);
! 333:
! 334: /*
! 335: * Strange evaluation order below to try and minimize overflow
! 336: * problems.
! 337: */
! 338:
! 339: layoutPtr->dataSectorsPerStripe =
! 340: (k - 1) * layoutPtr->sectorsPerStripeUnit;
! 341: layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit <<
! 342: raidPtr->logBytesPerSector;
! 343: layoutPtr->numDataCol = k - 1;
! 344: layoutPtr->numParityCol = 1;
! 345:
! 346: return (0);
! 347: }
! 348:
! 349: /* Declustering with distributed sparing. */
! 350: void rf_ShutdownDeclusteredDS(RF_ThreadArg_t);
! 351: void
! 352: rf_ShutdownDeclusteredDS(RF_ThreadArg_t arg)
! 353: {
! 354: RF_DeclusteredConfigInfo_t *info;
! 355: RF_Raid_t *raidPtr;
! 356:
! 357: raidPtr = (RF_Raid_t *) arg;
! 358: info =
! 359: (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
! 360: if (info->SpareTable)
! 361: rf_FreeSpareTable(raidPtr);
! 362: }
! 363:
! 364: int
! 365: rf_ConfigureDeclusteredDS(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
! 366: RF_Config_t *cfgPtr)
! 367: {
! 368: int rc;
! 369:
! 370: rc = rf_ConfigureDeclustered(listp, raidPtr, cfgPtr);
! 371: if (rc)
! 372: return (rc);
! 373:
! 374: rc = rf_ShutdownCreate(listp, rf_ShutdownDeclusteredDS, raidPtr);
! 375: if (rc) {
! 376: RF_ERRORMSG1("Got %d adding shutdown event for"
! 377: " DeclusteredDS.\n", rc);
! 378: rf_ShutdownDeclusteredDS(raidPtr);
! 379: return (rc);
! 380: }
! 381:
! 382: return (0);
! 383: }
! 384:
! 385: void
! 386: rf_MapSectorDeclustered(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
! 387: RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap)
! 388: {
! 389: RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
! 390: RF_DeclusteredConfigInfo_t *info =
! 391: (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
! 392: RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
! 393: RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
! 394: RF_StripeNum_t BlockID, BlockOffset, RepIndex;
! 395: RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
! 396: RF_StripeCount_t fulltable_depth =
! 397: info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
! 398: RF_StripeNum_t base_suid = 0, outSU, SpareRegion = 0, SpareSpace = 0;
! 399:
! 400: rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable,
! 401: &fulltable_depth, &base_suid);
! 402:
! 403: /* Fulltable ID within array (across rows). */
! 404: FullTableID = SUID / sus_per_fulltable;
! 405: if (raidPtr->numRow == 1)
! 406: *row = 0; /* Avoid a mod and a div in the common case. */
! 407: else {
! 408: *row = FullTableID % raidPtr->numRow;
! 409: /* Convert to fulltable ID on this disk. */
! 410: FullTableID /= raidPtr->numRow;
! 411: }
! 412: if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
! 413: SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
! 414: SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
! 415: }
! 416: FullTableOffset = SUID % sus_per_fulltable;
! 417: TableID = FullTableOffset / info->SUsPerTable;
! 418: TableOffset = FullTableOffset - TableID * info->SUsPerTable;
! 419: BlockID = TableOffset / info->PUsPerBlock;
! 420: BlockOffset = TableOffset - BlockID * info->PUsPerBlock;
! 421: BlockID %= info->BlocksPerTable;
! 422: RepIndex = info->PUsPerBlock - TableID;
! 423: if (!raidPtr->noRotate)
! 424: BlockOffset += ((BlockOffset >= RepIndex) ? 1 : 0);
! 425: *col = info->LayoutTable[BlockID][BlockOffset];
! 426:
! 427: /* Remap to distributed spare space if indicated. */
! 428: if (remap) {
! 429: RF_ASSERT(raidPtr->Disks[*row][*col].status ==
! 430: rf_ds_reconstructing ||
! 431: raidPtr->Disks[*row][*col].status == rf_ds_dist_spared ||
! 432: (rf_copyback_in_progress &&
! 433: raidPtr->Disks[*row][*col].status == rf_ds_optimal));
! 434: rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID,
! 435: TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col,
! 436: &outSU);
! 437: } else {
! 438:
! 439: outSU = base_suid;
! 440: outSU += FullTableID * fulltable_depth;
! 441: /* Offset to start of FT. */
! 442: outSU += SpareSpace;
! 443: /* Skip rsvd spare space. */
! 444: outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU;
! 445: /* Offset to start of table. */
! 446: outSU += info->OffsetTable[BlockID][BlockOffset] *
! 447: layoutPtr->SUsPerPU;
! 448: /* Offset to the PU. */
! 449: }
! 450: outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock);
! 451: /* offs to the SU within a PU */
! 452:
! 453: /*
! 454: * Convert SUs to sectors, and, if not aligned to SU boundary, add in
! 455: * offset to sector.
! 456: */
! 457: *diskSector = outSU * layoutPtr->sectorsPerStripeUnit +
! 458: (raidSector % layoutPtr->sectorsPerStripeUnit);
! 459:
! 460: RF_ASSERT(*col != -1);
! 461: }
! 462:
! 463: /*
! 464: * Prototyping this inexplicably causes the compile of the layout table
! 465: * (rf_layout.c) to fail.
! 466: */
! 467: void
! 468: rf_MapParityDeclustered(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
! 469: RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap)
! 470: {
! 471: RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
! 472: RF_DeclusteredConfigInfo_t *info =
! 473: (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
! 474: RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
! 475: RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
! 476: RF_StripeNum_t BlockID, BlockOffset, RepIndex;
! 477: RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
! 478: RF_StripeCount_t fulltable_depth =
! 479: info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
! 480: RF_StripeNum_t base_suid = 0, outSU, SpareRegion = 0, SpareSpace = 0;
! 481:
! 482: rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable,
! 483: &fulltable_depth, &base_suid);
! 484:
! 485: /* Compute row & (possibly) spare space exactly as before. */
! 486: FullTableID = SUID / sus_per_fulltable;
! 487: if (raidPtr->numRow == 1)
! 488: *row = 0; /* Avoid a mod and a div in the common case. */
! 489: else {
! 490: *row = FullTableID % raidPtr->numRow;
! 491: /* Convert to fulltable ID on this disk. */
! 492: FullTableID /= raidPtr->numRow;
! 493: }
! 494: if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
! 495: SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
! 496: SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
! 497: }
! 498: /* Compute BlockID and RepIndex exactly as before. */
! 499: FullTableOffset = SUID % sus_per_fulltable;
! 500: TableID = FullTableOffset / info->SUsPerTable;
! 501: TableOffset = FullTableOffset - TableID * info->SUsPerTable;
! 502: /*TableOffset = FullTableOffset % info->SUsPerTable;*/
! 503: /*BlockID = (TableOffset / info->PUsPerBlock) %
! 504: *info->BlocksPerTable;*/
! 505: BlockID = TableOffset / info->PUsPerBlock;
! 506: /*BlockOffset = TableOffset % info->PUsPerBlock;*/
! 507: BlockOffset = TableOffset - BlockID * info->PUsPerBlock;
! 508: BlockID %= info->BlocksPerTable;
! 509:
! 510: /* The parity block is in the position indicated by RepIndex. */
! 511: RepIndex = (raidPtr->noRotate) ?
! 512: info->PUsPerBlock : info->PUsPerBlock - TableID;
! 513: *col = info->LayoutTable[BlockID][RepIndex];
! 514:
! 515: if (remap) {
! 516: RF_ASSERT(raidPtr->Disks[*row][*col].status ==
! 517: rf_ds_reconstructing ||
! 518: raidPtr->Disks[*row][*col].status == rf_ds_dist_spared ||
! 519: (rf_copyback_in_progress &&
! 520: raidPtr->Disks[*row][*col].status == rf_ds_optimal));
! 521: rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID,
! 522: TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col,
! 523: &outSU);
! 524: } else {
! 525:
! 526: /*
! 527: * Compute sector as before, except use RepIndex instead of
! 528: * BlockOffset.
! 529: */
! 530: outSU = base_suid;
! 531: outSU += FullTableID * fulltable_depth;
! 532: outSU += SpareSpace; /* skip rsvd spare space */
! 533: outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU;
! 534: outSU += info->OffsetTable[BlockID][RepIndex] *
! 535: layoutPtr->SUsPerPU;
! 536: }
! 537:
! 538: outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock);
! 539: *diskSector = outSU * layoutPtr->sectorsPerStripeUnit +
! 540: (raidSector % layoutPtr->sectorsPerStripeUnit);
! 541:
! 542: RF_ASSERT(*col != -1);
! 543: }
! 544:
! 545: /*
! 546: * Return an array of ints identifying the disks that comprise the stripe
! 547: * containing the indicated address.
! 548: * The caller must _never_ attempt to modify this array.
! 549: */
! 550: void
! 551: rf_IdentifyStripeDeclustered(RF_Raid_t *raidPtr, RF_RaidAddr_t addr,
! 552: RF_RowCol_t **diskids, RF_RowCol_t *outRow)
! 553: {
! 554: RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
! 555: RF_DeclusteredConfigInfo_t *info =
! 556: (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
! 557: RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
! 558: RF_StripeCount_t fulltable_depth =
! 559: info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
! 560: RF_StripeNum_t base_suid = 0;
! 561: RF_StripeNum_t SUID = rf_RaidAddressToStripeUnitID(layoutPtr, addr);
! 562: RF_StripeNum_t stripeID, FullTableID;
! 563: int tableOffset;
! 564:
! 565: rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable,
! 566: &fulltable_depth, &base_suid);
! 567: /* Fulltable ID within array (across rows). */
! 568: FullTableID = SUID / sus_per_fulltable;
! 569: *outRow = FullTableID % raidPtr->numRow;
! 570: /* Find stripe offset into array. */
! 571: stripeID = rf_StripeUnitIDToStripeID(layoutPtr, SUID);
! 572: /* Find offset into block design table. */
! 573: tableOffset = (stripeID % info->BlocksPerTable);
! 574: *diskids = info->LayoutTable[tableOffset];
! 575: }
! 576:
! 577: /*
! 578: * This returns the default head-separation limit, measured in
! 579: * "required units for reconstruction". Each time a disk fetches
! 580: * a unit, it bumps a counter. The head-sep code prohibits any disk
! 581: * from getting more than headSepLimit counter values ahead of any
! 582: * other.
! 583: *
! 584: * We assume here that the number of floating recon buffers is already
! 585: * set. There are r stripes to be reconstructed in each table, and so
! 586: * if we have a total of B buffers, we can have at most B/r tables
! 587: * under recon at any one time. In each table, lambda units are required
! 588: * from each disk, so given B buffers, the head sep limit has to be
! 589: * (lambda*B)/r units. We subtract one to avoid weird boundary cases.
! 590: *
! 591: * For example, suppose we are given 50 buffers, r=19, and lambda=4 as in
! 592: * the 20.5 design. There are 19 stripes/table to be reconstructed, so
! 593: * we can have 50/19 tables concurrently under reconstruction, which means
! 594: * we can allow the fastest disk to get 50/19 tables ahead of the slower
! 595: * disk. There are lambda "required units" for each disk, so the fastest
! 596: * disk can get 4*50/19 = 10 counter values ahead of the slowest.
! 597: *
! 598: * If numBufsToAccumulate is not 1, we need to limit the head sep further
! 599: * because multiple bufs will be required for each stripe under recon.
! 600: */
! 601: RF_HeadSepLimit_t
! 602: rf_GetDefaultHeadSepLimitDeclustered(RF_Raid_t *raidPtr)
! 603: {
! 604: RF_DeclusteredConfigInfo_t *info =
! 605: (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
! 606:
! 607: return (info->Lambda * raidPtr->numFloatingReconBufs /
! 608: info->TableDepthInPUs / rf_numBufsToAccumulate);
! 609: }
! 610:
! 611: /*
! 612: * Return the default number of recon buffers to use. The value
! 613: * is somewhat arbitrary... It's intended to be large enough to
! 614: * allow for a reasonably large head-sep limit, but small enough
! 615: * that you don't use up all your system memory with buffers.
! 616: */
! 617: int
! 618: rf_GetDefaultNumFloatingReconBuffersDeclustered(RF_Raid_t *raidPtr)
! 619: {
! 620: return (100 * rf_numBufsToAccumulate);
! 621: }
! 622:
! 623: /*
! 624: * Sectors in the last fulltable of the array need to be handled
! 625: * specially since this fulltable can be incomplete. This function
! 626: * changes the values of certain params to handle this.
! 627: *
! 628: * The idea here is that MapSector et. al. figure out which disk the
! 629: * addressed unit lives on by computing the modulos of the unit number
! 630: * with the number of units per fulltable, table, etc. In the last
! 631: * fulltable, there are fewer units per fulltable, so we need to adjust
! 632: * the number of user data units per fulltable to reflect this.
! 633: *
! 634: * So, we (1) convert the fulltable size and depth parameters to
! 635: * the size of the partial fulltable at the end, (2) compute the
! 636: * disk sector offset where this fulltable starts, and (3) convert
! 637: * the users stripe unit number from an offset into the array to
! 638: * an offset into the last fulltable.
! 639: */
! 640: void
! 641: rf_decluster_adjust_params(RF_RaidLayout_t *layoutPtr, RF_StripeNum_t *SUID,
! 642: RF_StripeCount_t *sus_per_fulltable, RF_StripeCount_t *fulltable_depth,
! 643: RF_StripeNum_t *base_suid)
! 644: {
! 645: RF_DeclusteredConfigInfo_t *info =
! 646: (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
! 647:
! 648: if (*SUID >= info->FullTableLimitSUID) {
! 649: /* New full table size is size of last full table on disk. */
! 650: *sus_per_fulltable =
! 651: info->ExtraTablesPerDisk * info->SUsPerTable;
! 652:
! 653: /* New full table depth is corresponding depth. */
! 654: *fulltable_depth =
! 655: info->ExtraTablesPerDisk * info->TableDepthInPUs *
! 656: layoutPtr->SUsPerPU;
! 657:
! 658: /* Set up the new base offset. */
! 659: *base_suid = info->DiskOffsetOfLastFullTableInSUs;
! 660:
! 661: /*
! 662: * Convert user's array address to an offset into the last
! 663: * fulltable.
! 664: */
! 665: *SUID -= info->FullTableLimitSUID;
! 666: }
! 667: }
! 668:
! 669: /*
! 670: * Map a stripe ID to a parity stripe ID.
! 671: * See comment above RaidAddressToParityStripeID in layout.c.
! 672: */
! 673: void
! 674: rf_MapSIDToPSIDDeclustered(RF_RaidLayout_t *layoutPtr, RF_StripeNum_t stripeID,
! 675: RF_StripeNum_t *psID, RF_ReconUnitNum_t *which_ru)
! 676: {
! 677: RF_DeclusteredConfigInfo_t *info;
! 678:
! 679: info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
! 680:
! 681: *psID = (stripeID / (layoutPtr->SUsPerPU * info->BlocksPerTable)) *
! 682: info->BlocksPerTable + (stripeID % info->BlocksPerTable);
! 683: *which_ru = (stripeID % (info->BlocksPerTable * layoutPtr->SUsPerPU)) /
! 684: info->BlocksPerTable;
! 685: RF_ASSERT((*which_ru) < layoutPtr->SUsPerPU / layoutPtr->SUsPerRU);
! 686: }
! 687:
! 688: /*
! 689: * Called from MapSector and MapParity to retarget an access at the spare unit.
! 690: * Modifies the "col" and "outSU" parameters only.
! 691: */
! 692: void
! 693: rf_remap_to_spare_space(RF_RaidLayout_t *layoutPtr,
! 694: RF_DeclusteredConfigInfo_t *info, RF_RowCol_t row,
! 695: RF_StripeNum_t FullTableID, RF_StripeNum_t TableID, RF_SectorNum_t BlockID,
! 696: RF_StripeNum_t base_suid, RF_StripeNum_t SpareRegion, RF_RowCol_t *outCol,
! 697: RF_StripeNum_t *outSU)
! 698: {
! 699: RF_StripeNum_t ftID, spareTableStartSU, TableInSpareRegion,
! 700: lastSROffset, which_ft;
! 701:
! 702: /*
! 703: * Note that FullTableID and hence SpareRegion may have gotten
! 704: * tweaked by rf_decluster_adjust_params. We detect this by
! 705: * noticing that base_suid is not 0.
! 706: */
! 707: if (base_suid == 0) {
! 708: ftID = FullTableID;
! 709: } else {
! 710: /*
! 711: * There may be > 1.0 full tables in the last (i.e. partial)
! 712: * spare region. Find out which of these we are in.
! 713: */
! 714: lastSROffset = info->NumCompleteSRs *
! 715: info->SpareRegionDepthInSUs;
! 716: which_ft =
! 717: (info->DiskOffsetOfLastFullTableInSUs - lastSROffset) /
! 718: (info->FullTableDepthInPUs * layoutPtr->SUsPerPU);
! 719:
! 720: /* Compute the actual full table ID. */
! 721: ftID = info->DiskOffsetOfLastFullTableInSUs /
! 722: (info->FullTableDepthInPUs * layoutPtr->SUsPerPU) +
! 723: which_ft;
! 724: SpareRegion = info->NumCompleteSRs;
! 725: }
! 726: TableInSpareRegion = (ftID * info->NumParityReps + TableID) %
! 727: info->TablesPerSpareRegion;
! 728:
! 729: *outCol = info->SpareTable[TableInSpareRegion][BlockID].spareDisk;
! 730: RF_ASSERT(*outCol != -1);
! 731:
! 732: spareTableStartSU = (SpareRegion == info->NumCompleteSRs) ?
! 733: info->DiskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk *
! 734: info->TableDepthInPUs * layoutPtr->SUsPerPU :
! 735: (SpareRegion + 1) * info->SpareRegionDepthInSUs -
! 736: info->SpareSpaceDepthPerRegionInSUs;
! 737: *outSU = spareTableStartSU +
! 738: info->SpareTable[TableInSpareRegion][BlockID].spareBlockOffsetInSUs;
! 739: if (*outSU >= layoutPtr->stripeUnitsPerDisk) {
! 740: printf("rf_remap_to_spare_space: invalid remapped disk SU"
! 741: " offset %ld.\n", (long) *outSU);
! 742: }
! 743: }
! 744:
! 745: int
! 746: rf_InstallSpareTable(RF_Raid_t *raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol)
! 747: {
! 748: RF_DeclusteredConfigInfo_t *info =
! 749: (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
! 750: RF_SparetWait_t *req;
! 751: int retcode;
! 752:
! 753: RF_Malloc(req, sizeof(*req), (RF_SparetWait_t *));
! 754: req->C = raidPtr->numCol;
! 755: req->G = raidPtr->Layout.numDataCol + raidPtr->Layout.numParityCol;
! 756: req->fcol = fcol;
! 757: req->SUsPerPU = raidPtr->Layout.SUsPerPU;
! 758: req->TablesPerSpareRegion = info->TablesPerSpareRegion;
! 759: req->BlocksPerTable = info->BlocksPerTable;
! 760: req->TableDepthInPUs = info->TableDepthInPUs;
! 761: req->SpareSpaceDepthPerRegionInSUs =
! 762: info->SpareSpaceDepthPerRegionInSUs;
! 763:
! 764: retcode = rf_GetSpareTableFromDaemon(req);
! 765: RF_ASSERT(!retcode);
! 766: /* XXX -- Fix this to recover gracefully. -- XXX */
! 767:
! 768: return (retcode);
! 769: }
! 770:
! 771: /*
! 772: * Invoked via ioctl to install a spare table in the kernel.
! 773: */
! 774: int
! 775: rf_SetSpareTable(RF_Raid_t *raidPtr, void *data)
! 776: {
! 777: RF_DeclusteredConfigInfo_t *info =
! 778: (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
! 779: RF_SpareTableEntry_t **ptrs;
! 780: int i, retcode;
! 781:
! 782: /*
! 783: * What we need to copyin is a 2-d array, so first copyin the user
! 784: * pointers to the rows in the table.
! 785: */
! 786: RF_Malloc(ptrs, info->TablesPerSpareRegion *
! 787: sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **));
! 788: retcode = copyin((caddr_t) data, (caddr_t) ptrs,
! 789: info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *));
! 790:
! 791: if (retcode)
! 792: return (retcode);
! 793:
! 794: /* Now allocate kernel space for the row pointers. */
! 795: RF_Malloc(info->SpareTable, info->TablesPerSpareRegion *
! 796: sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **));
! 797:
! 798: /*
! 799: * Now allocate kernel space for each row in the table, and copy it in
! 800: * from user space. */
! 801: for (i = 0; i < info->TablesPerSpareRegion; i++) {
! 802: RF_Malloc(info->SpareTable[i], info->BlocksPerTable *
! 803: sizeof(RF_SpareTableEntry_t), (RF_SpareTableEntry_t *));
! 804: retcode = copyin(ptrs[i], info->SpareTable[i],
! 805: info->BlocksPerTable * sizeof(RF_SpareTableEntry_t));
! 806: if (retcode) {
! 807: /* Blow off the memory we have allocated. */
! 808: info->SpareTable = NULL;
! 809: return (retcode);
! 810: }
! 811: }
! 812:
! 813: /* Free up the temporary array we used. */
! 814: RF_Free(ptrs, info->TablesPerSpareRegion *
! 815: sizeof(RF_SpareTableEntry_t *));
! 816:
! 817: return (0);
! 818: }
! 819:
! 820: RF_ReconUnitCount_t
! 821: rf_GetNumSpareRUsDeclustered(RF_Raid_t *raidPtr)
! 822: {
! 823: RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
! 824:
! 825: return (((RF_DeclusteredConfigInfo_t *)
! 826: layoutPtr->layoutSpecificInfo)->TotSparePUsPerDisk);
! 827: }
! 828:
! 829:
! 830: void
! 831: rf_FreeSpareTable(RF_Raid_t *raidPtr)
! 832: {
! 833: long i;
! 834: RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
! 835: RF_DeclusteredConfigInfo_t *info =
! 836: (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
! 837: RF_SpareTableEntry_t **table = info->SpareTable;
! 838:
! 839: for (i = 0; i < info->TablesPerSpareRegion; i++) {
! 840: RF_Free(table[i], info->BlocksPerTable *
! 841: sizeof(RF_SpareTableEntry_t));
! 842: }
! 843: RF_Free(table, info->TablesPerSpareRegion *
! 844: sizeof(RF_SpareTableEntry_t *));
! 845: info->SpareTable = (RF_SpareTableEntry_t **) NULL;
! 846: }
CVSweb