#ifdef NFS_TWEAK /* * Copyright (c) 1989, 1993, 2002 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 2002-2003 * The President and Fellows of Harvard College. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_serv.c 8.8 (Berkeley) 7/31/95 * $FreeBSD: src/sys/nfs/nfs_serv.c,v 1.93.2.5 2002/04/28 11:29:12 iedowse Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #define NHUSE_INIT 64 #define NHUSE_INC 16 #define NHUSE_MAX 2048 /* * The constant NUM_HEURISTIC (from nfs_serv.c) is split into three * values: * * constant NFST_NUM_HEURISTIC_MAX gives the maximum table size * * constant NFST_NUM_HEURISTIC_MIN gives the minimum table size * * constant NFST_NUM_HEURISTIC_DEF, gives the default table size * * variable nfst_FHtabSize, defines the current table size. * * If you are not planning to do experiments, but simply want to use a * large table, then make NFST_NUM_HEURISTIC_MAX exactly the size you * want, define NFST_NUM_HEURISTIC_DEF to be the same as * NFST_NUM_HEURISTIC_MAX, and you're all set. */ #define NFST_NUM_HEURISTIC_DEF 64 /* The original 4.6 default */ #define NFST_NUM_HEURISTIC_MAX 1021 #define NFST_NUM_HEURISTIC_MIN 4 #define NFST_NUM_TRIES_DEF 4 /* The original 4.6 default */ /* * The cursor structure stores info about each live "cursor" for each * file handle/client combo. Note that the clock is just an int that * is based on the last time the file was touched relative to the last * time that this cursor was touched. It is not real time, it is just * a way to determine which cursor was accessed least recently. */ typedef struct { off_t off; /* most recent offset */ int seq; /* sequentiality count */ int clock; /* "time" for last access */ } rhcursor_t ; #define NHUSE_CURSORS 8 /* * nfst_heur_t corresponds to the nfsheur structure used in the * original nfs_serv code, but with additional fields to allow for * cursors. * * This could be implemented in a much more space-efficient manner * (for example, right now we allocate space for every possible * cursor, instead of only allocating space for cursors we actually * use). This is just a proof of concept. */ typedef struct nfst_heur_t { struct vnode *nh_vp; /* vp to match (unreferenced pointer) */ off_t nh_nextr; /* next offset for sequential detection */ int nh_use; /* use count for selection */ int nh_seqcount; /* heuristic */ int clock; /* clock for the whole file */ rhcursor_t cursors[NHUSE_CURSORS]; } nfst_heur_t; static nfst_heur_t nfsheur[NFST_NUM_HEURISTIC_MAX]; /* * NFST_MIN/MAX_SEQ are tied to the range of values permitted in the * seq bits of the ioflag (see nfs_serv and the ufs code). I guess * there are only seven bits to play with. */ #define NFST_MIN_SEQ 0 #define NFST_MAX_SEQ 127 int nfst_verbose = 0; /* if set, be verbose */ int nfst_reset = 0; /* if set, reset all parameters */ int nfst_default = 1; /* use all default behavior */ static int nfst_useSlowDown = 0; /* if true, use SlowDown */ static int nfst_useCursors = 0; /* if true, use Cursors */ static int nfst_FHtabSize = NFST_NUM_HEURISTIC_DEF; static int nfst_FHtabSizeMax = NFST_NUM_HEURISTIC_MAX; static int nfst_FHtabSizeMin = NFST_NUM_HEURISTIC_MIN; static int nfst_FHtabTries = NFST_NUM_TRIES_DEF; static int nfst_minseq = NFST_MIN_SEQ; static int nfst_maxseq = NFST_MAX_SEQ; static int nfst_FHtabKickout = 0; static int nfst_sysctl_proc (SYSCTL_HANDLER_ARGS); static nfst_heur_t *nfst_find_nh (struct vnode *vp, off_t curr_off, off_t **next_off); static int nfst_find_cursor (nfst_heur_t *nh, off_t curr_off); static int nfst_sysctl_proc (SYSCTL_HANDLER_ARGS) { int error = sysctl_handle_int (oidp, oidp->oid_arg1, oidp->oid_arg2, req); if (error) { return (error); } /* * When there is a reset, set everything back to the default * values. The only thing NOT changed by this is * nfst_verbose. */ if (nfst_reset) { nfst_reset = 0; nfst_default = 1; nfst_CscanMode = 0; nfst_useSlowDown = 0; nfst_useCursors = 0; nfst_FHtabSize = NFST_NUM_HEURISTIC_DEF; nfst_FHtabTries = NFST_NUM_TRIES_DEF; nfst_minseq = NFST_MIN_SEQ; nfst_maxseq = NFST_MAX_SEQ; } /* * Sanity checks follow. */ /* Make sure that the FHtabSize is not too large. */ if (nfst_FHtabSize > nfst_FHtabSizeMax) { printf ("FHtabSize > FHtabSizeMax, setting to FHtabSizeMax (%d)\n", nfst_FHtabSizeMax); nfst_FHtabSize = nfst_FHtabSizeMax; } if (nfst_FHtabSize < nfst_FHtabSizeMin) { printf ("FHtabSize < FHtabSizeMin, setting to FHtabSizeMin (%d)\n", nfst_FHtabSizeMin); nfst_FHtabSize = nfst_FHtabSizeMin; } /* * nfst_minseq and nfs_maxseq must always be 1..127, and * nfst_maxseq must be no smaller than nfst_minseq. */ if (nfst_minseq < NFST_MIN_SEQ) { nfst_minseq = NFST_MIN_SEQ; } if (nfst_minseq > NFST_MAX_SEQ) { nfst_minseq = NFST_MAX_SEQ; } if (nfst_maxseq > NFST_MAX_SEQ) { nfst_maxseq = NFST_MAX_SEQ; } if (nfst_maxseq < NFST_MIN_SEQ) { nfst_maxseq = NFST_MIN_SEQ; } if (nfst_minseq > nfst_maxseq) { nfst_minseq = nfst_maxseq; } return (0); } SYSCTL_DECL(_vfs_nfs); SYSCTL_NODE(_vfs_nfs, OID_AUTO, nfst, CTLFLAG_RW, 0, "NFS Tweaks"); SYSCTL_PROC(_vfs_nfs_nfst, OID_AUTO, verbose, CTLTYPE_INT | CTLFLAG_RW, &nfst_verbose, NULL, nfst_sysctl_proc, "I", "Set diagnostics to verbose mode"); SYSCTL_PROC(_vfs_nfs_nfst, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_RW, &nfst_reset, NULL, nfst_sysctl_proc, "I", "Reset all parameters to defaults"); SYSCTL_PROC(_vfs_nfs_nfst, OID_AUTO, default, CTLTYPE_INT | CTLFLAG_RW, &nfst_default, NULL, nfst_sysctl_proc, "I", "Use defaults?"); SYSCTL_PROC(_vfs_nfs_nfst, OID_AUTO, useSlowDown, CTLTYPE_INT | CTLFLAG_RW, &nfst_useSlowDown, NULL, nfst_sysctl_proc, "I", "Use SlowDown heuristic?"); SYSCTL_PROC(_vfs_nfs_nfst, OID_AUTO, useCursors, CTLTYPE_INT | CTLFLAG_RW, &nfst_useCursors, NULL, nfst_sysctl_proc, "I", "Use Cursor heuristic?"); SYSCTL_PROC(_vfs_nfs_nfst, OID_AUTO, kickout, CTLTYPE_INT | CTLFLAG_RW, &nfst_FHtabKickout, NULL, nfst_sysctl_proc, "I", "FH table kickout count"); SYSCTL_PROC(_vfs_nfs_nfst, OID_AUTO, tabsize, CTLTYPE_INT | CTLFLAG_RW, &nfst_FHtabSize, NULL, nfst_sysctl_proc, "I", "FH table size"); SYSCTL_PROC(_vfs_nfs_nfst, OID_AUTO, tabsizeMax, CTLTYPE_INT | CTLFLAG_RD, &nfst_FHtabSizeMax, NULL, nfst_sysctl_proc, "I", "FH table max size"); SYSCTL_PROC(_vfs_nfs_nfst, OID_AUTO, tabsizeMin, CTLTYPE_INT | CTLFLAG_RD, &nfst_FHtabSizeMin, NULL, nfst_sysctl_proc, "I", "FH table min size"); SYSCTL_PROC(_vfs_nfs_nfst, OID_AUTO, tries, CTLTYPE_INT | CTLFLAG_RW, &nfst_FHtabTries, NULL, nfst_sysctl_proc, "I", "FH table tries"); SYSCTL_PROC(_vfs_nfs_nfst, OID_AUTO, minseq, CTLTYPE_INT | CTLFLAG_RW, &nfst_minseq, NULL, nfst_sysctl_proc, "I", "min seq value"); SYSCTL_PROC(_vfs_nfs_nfst, OID_AUTO, maxseq, CTLTYPE_INT | CTLFLAG_RW, &nfst_maxseq, NULL, nfst_sysctl_proc, "I", "max seq value"); /* Note: NFST_BLK_SIZE has no fixed relationship with the actual * block size. It's based on empirical observation of what most NFS * clients will request (either 4k or 8k blocks) and the SlowDown * heuristic. Go ahead and futz with it, if you find a better value. * -DJE */ #define NFST_BLK_SIZE 8192 #define NFST_MAX_FWD_JITTER 8 #define NFST_MAX_BCK_JITTER (-8) int nfst_calc_seqcount (struct vnode *vn, off_t curr_off, off_t **next_off_p) { static unsigned int count = 0; static unsigned int c0 = 0, c1 = 0, c2 = 0, c3 = 0, c4 = 0; int prev_seqcount; off_t prev_off; unsigned int seq; int cursor_index = 0; nfst_heur_t *nh; /* * ensure that next_off_p pointer doesn't point somewhere * random. */ if (next_off_p) *next_off_p = NULL; nh = nfst_find_nh (vn, curr_off, next_off_p); if (nfst_verbose && ++count == 100000) { printf ("seq = %d back = %d fwd = %d jitter = %d\n", c0, c2, c3, c4); c0 = c1 = c2 = c3 = c4 = 0; count = 0; } if (! nfst_useCursors) { prev_seqcount = nh->nh_seqcount; prev_off = nh->nh_nextr; if (next_off_p) *next_off_p = &nh->nh_nextr; } else { cursor_index = nfst_find_cursor (nh, curr_off); prev_seqcount = nh->cursors [cursor_index].seq; prev_off = nh->cursors [cursor_index].off; if (next_off_p) *next_off_p = &nh->cursors [cursor_index].off; nh->cursors [cursor_index].clock = ++nh->clock; } /* printf ("curr = %lld prev = %lld \n", curr_off / NFST_BLK_SIZE, prev_off / NFST_BLK_SIZE); */ if (curr_off == prev_off) { c0++; seq = prev_seqcount + 1; } else if ((curr_off == 0) && (prev_seqcount > 0)) { c1++; seq = prev_seqcount + 1; } else if (nfst_useSlowDown) { int distance = (curr_off - prev_off) / NFST_BLK_SIZE; if (distance < NFST_MAX_BCK_JITTER) { c2++; seq = prev_seqcount / 2; } else if (distance > NFST_MAX_FWD_JITTER) { c3++; seq = prev_seqcount / 2; } else { c4++; seq = prev_seqcount; } } else if (prev_seqcount > 1) { seq = 1; } else { seq = 0; } if (seq > NFST_MAX_SEQ) { seq = NFST_MAX_SEQ; } nh->nh_use += NHUSE_INC; if (nh->nh_use > NHUSE_MAX) nh->nh_use = NHUSE_MAX; nh->nh_seqcount = seq; if (nfst_useCursors) { nh->cursors [cursor_index].seq = seq; nh->cursors [cursor_index].off = curr_off; } /* * We can override the actual return value by pinning the * actual value returned to a value between maxseq and minseq. * (Even when this happens, however, the nh structure is * updated as normal.) */ if (seq > nfst_maxseq) { seq = nfst_maxseq; } else if (seq < nfst_minseq) { seq = nfst_minseq; } return (seq); } static nfst_heur_t * nfst_find_nh (struct vnode *vp, off_t curr_off, off_t **next_off) { int hi; int try = nfst_FHtabTries; nfst_heur_t *nh; unsigned int i; /* * Locate the best candidate slot in the hash table -- either * an exact match for the vp, or the slot whose contents we * should eject and replace with vp. * * This uses almost exactly the same algorithm as the original * code, but allows the size of the hash table and the number * of probes to be variables. */ hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % nfst_FHtabSize; nh = &nfsheur[hi]; while (try--) { if (nfsheur[hi].nh_vp == vp) { nh = &nfsheur[hi]; break; } if (nfsheur[hi].nh_use > 0) --nfsheur[hi].nh_use; hi = (hi + 1) % nfst_FHtabSize; if (nfsheur[hi].nh_use < nh->nh_use) nh = &nfsheur[hi]; } /* * If we didn't find an exact match, then steal the node nh * points at and fill it with initial values for vp. * * Important note: even if we're NOT using cursors, it's * necessary to properly initialize the fields used by the * cursor logic because otherwise when someone toggles * useCursors we will end up looking at a bunch of arbitrary * junk in those fields. */ if (nh->nh_vp != vp) { if (nh->nh_use > 0) { nfst_FHtabKickout++; } nh->nh_vp = vp; nh->nh_nextr = curr_off; nh->nh_use = NHUSE_INIT; if (curr_off == 0) nh->nh_seqcount = 4; else nh->nh_seqcount = 1; nh->clock = 0; for (i = 0; i < NHUSE_CURSORS; i++) { nh->cursors [i].off = -1; nh->cursors [i].seq = 4; nh->cursors [i].clock = 0; } } return (nh); } static int nfst_find_cursor (nfst_heur_t *nh, off_t curr_off) { unsigned int i; off_t diffs [NHUSE_CURSORS]; int empty_i = -1; int best_i; /* * If we find an exact match, then we can return immediately. * Otherwise, we need to choose which of the other cursors to * use. If we have to evict another cursor in order to make * room for this new one, then we must also initialize it * before returning. */ for (i = 0; i < NHUSE_CURSORS; i++) { if (nh->cursors [i].off < 0) { diffs [i] = -1; empty_i = i; } else { diffs [i] = curr_off - nh->cursors [i].off; } if (diffs [i] == 0) { /* printf ("EXACT MATCH (%d)\n", i); */ return (i); } } /* * If we didn't find an exact match, search for * something close. * * &&& This could be improved. */ for (i = 0; i < NHUSE_CURSORS; i++) { if (nh->cursors [i].off >= 0 && (-64 * 1024 <= diffs [i]) && (diffs [i] < 64 * 1024)) { if (nfst_verbose) printf ("CLOSE MATCH (%d)\n", i); return (i); } } /* * If we still didn't find anything that matched, and we * didn't find an empty cursor to use, then we need to find * the oldest cursor to kick out. */ if (empty_i < 0) { int oldest = nh->clock; int oldest_i = 0; for (i = 0; i < NHUSE_CURSORS; i++) { if (nh->cursors [i].off < 0) { continue; } if (oldest > nh->cursors [i].clock) { oldest = nh->cursors [i].clock; oldest_i = i; } } if (nfst_verbose) printf ("oldest = %d (for nh = %p)\n", oldest_i, nh); empty_i = oldest_i; } if (empty_i >= 0) { best_i = empty_i; } else { if (nfst_verbose) printf ("NFS_TWEAK: BIZARRO: search failed off = %llu\n", curr_off); best_i = 0; } nh->cursors [best_i].seq = 4; nh->cursors [best_i].off = curr_off; return (best_i); } /* * end of nfs_tweak.c */ #endif /* NFS_TWEAK */