xv6

port of xv6 to x86-64
git clone http://frotz.net/git/xv6.git
Log | Files | Refs | README | LICENSE

fs.c (14877B)


      1 // File system implementation.  Five layers:
      2 //   + Blocks: allocator for raw disk blocks.
      3 //   + Log: crash recovery for multi-step updates.
      4 //   + Files: inode allocator, reading, writing, metadata.
      5 //   + Directories: inode with special contents (list of other inodes!)
      6 //   + Names: paths like /usr/rtm/xv6/fs.c for convenient naming.
      7 //
      8 // This file contains the low-level file system manipulation 
      9 // routines.  The (higher-level) system call implementations
     10 // are in sysfile.c.
     11 
     12 #include "types.h"
     13 #include "defs.h"
     14 #include "param.h"
     15 #include "stat.h"
     16 #include "mmu.h"
     17 #include "proc.h"
     18 #include "spinlock.h"
     19 #include "buf.h"
     20 #include "fs.h"
     21 #include "file.h"
     22 
     23 #define min(a, b) ((a) < (b) ? (a) : (b))
     24 static void itrunc(struct inode*);
     25 
     26 // Read the super block.
     27 void
     28 readsb(int dev, struct superblock *sb)
     29 {
     30   struct buf *bp;
     31   
     32   bp = bread(dev, 1);
     33   memmove(sb, bp->data, sizeof(*sb));
     34   brelse(bp);
     35 }
     36 
     37 // Zero a block.
     38 static void
     39 bzero(int dev, int bno)
     40 {
     41   struct buf *bp;
     42   
     43   bp = bread(dev, bno);
     44   memset(bp->data, 0, BSIZE);
     45   log_write(bp);
     46   brelse(bp);
     47 }
     48 
     49 // Blocks. 
     50 
     51 // Allocate a zeroed disk block.
     52 static uint
     53 balloc(uint dev)
     54 {
     55   int b, bi, m;
     56   struct buf *bp;
     57   struct superblock sb;
     58 
     59   bp = 0;
     60   readsb(dev, &sb);
     61   for(b = 0; b < sb.size; b += BPB){
     62     bp = bread(dev, BBLOCK(b, sb.ninodes));
     63     for(bi = 0; bi < BPB && b + bi < sb.size; bi++){
     64       m = 1 << (bi % 8);
     65       if((bp->data[bi/8] & m) == 0){  // Is block free?
     66         bp->data[bi/8] |= m;  // Mark block in use.
     67         log_write(bp);
     68         brelse(bp);
     69         bzero(dev, b + bi);
     70         return b + bi;
     71       }
     72     }
     73     brelse(bp);
     74   }
     75   panic("balloc: out of blocks");
     76 }
     77 
     78 // Free a disk block.
     79 static void
     80 bfree(int dev, uint b)
     81 {
     82   struct buf *bp;
     83   struct superblock sb;
     84   int bi, m;
     85 
     86   readsb(dev, &sb);
     87   bp = bread(dev, BBLOCK(b, sb.ninodes));
     88   bi = b % BPB;
     89   m = 1 << (bi % 8);
     90   if((bp->data[bi/8] & m) == 0)
     91     panic("freeing free block");
     92   bp->data[bi/8] &= ~m;
     93   log_write(bp);
     94   brelse(bp);
     95 }
     96 
     97 // Inodes.
     98 //
     99 // An inode describes a single unnamed file.
    100 // The inode disk structure holds metadata: the file's type,
    101 // its size, the number of links referring to it, and the
    102 // list of blocks holding the file's content.
    103 //
    104 // The inodes are laid out sequentially on disk immediately after
    105 // the superblock. Each inode has a number, indicating its
    106 // position on the disk.
    107 //
    108 // The kernel keeps a cache of in-use inodes in memory
    109 // to provide a place for synchronizing access
    110 // to inodes used by multiple processes. The cached
    111 // inodes include book-keeping information that is
    112 // not stored on disk: ip->ref and ip->flags.
    113 //
    114 // An inode and its in-memory represtative go through a
    115 // sequence of states before they can be used by the
    116 // rest of the file system code.
    117 //
    118 // * Allocation: an inode is allocated if its type (on disk)
    119 //   is non-zero. ialloc() allocates, iput() frees if
    120 //   the link count has fallen to zero.
    121 //
    122 // * Referencing in cache: an entry in the inode cache
    123 //   is free if ip->ref is zero. Otherwise ip->ref tracks
    124 //   the number of in-memory pointers to the entry (open
    125 //   files and current directories). iget() to find or
    126 //   create a cache entry and increment its ref, iput()
    127 //   to decrement ref.
    128 //
    129 // * Valid: the information (type, size, &c) in an inode
    130 //   cache entry is only correct when the I_VALID bit
    131 //   is set in ip->flags. ilock() reads the inode from
    132 //   the disk and sets I_VALID, while iput() clears
    133 //   I_VALID if ip->ref has fallen to zero.
    134 //
    135 // * Locked: file system code may only examine and modify
    136 //   the information in an inode and its content if it
    137 //   has first locked the inode. The I_BUSY flag indicates
    138 //   that the inode is locked. ilock() sets I_BUSY,
    139 //   while iunlock clears it.
    140 //
    141 // Thus a typical sequence is:
    142 //   ip = iget(dev, inum)
    143 //   ilock(ip)
    144 //   ... examine and modify ip->xxx ...
    145 //   iunlock(ip)
    146 //   iput(ip)
    147 //
    148 // ilock() is separate from iget() so that system calls can
    149 // get a long-term reference to an inode (as for an open file)
    150 // and only lock it for short periods (e.g., in read()).
    151 // The separation also helps avoid deadlock and races during
    152 // pathname lookup. iget() increments ip->ref so that the inode
    153 // stays cached and pointers to it remain valid.
    154 //
    155 // Many internal file system functions expect the caller to
    156 // have locked the inodes involved; this lets callers create
    157 // multi-step atomic operations.
    158 
    159 struct {
    160   struct spinlock lock;
    161   struct inode inode[NINODE];
    162 } icache;
    163 
    164 void
    165 iinit(void)
    166 {
    167   initlock(&icache.lock, "icache");
    168 }
    169 
    170 static struct inode* iget(uint dev, uint inum);
    171 
    172 //PAGEBREAK!
    173 // Allocate a new inode with the given type on device dev.
    174 // A free inode has a type of zero.
    175 struct inode*
    176 ialloc(uint dev, short type)
    177 {
    178   int inum;
    179   struct buf *bp;
    180   struct dinode *dip;
    181   struct superblock sb;
    182 
    183   readsb(dev, &sb);
    184 
    185   for(inum = 1; inum < sb.ninodes; inum++){
    186     bp = bread(dev, IBLOCK(inum));
    187     dip = (struct dinode*)bp->data + inum%IPB;
    188     if(dip->type == 0){  // a free inode
    189       memset(dip, 0, sizeof(*dip));
    190       dip->type = type;
    191       log_write(bp);   // mark it allocated on the disk
    192       brelse(bp);
    193       return iget(dev, inum);
    194     }
    195     brelse(bp);
    196   }
    197   panic("ialloc: no inodes");
    198 }
    199 
    200 // Copy a modified in-memory inode to disk.
    201 void
    202 iupdate(struct inode *ip)
    203 {
    204   struct buf *bp;
    205   struct dinode *dip;
    206 
    207   bp = bread(ip->dev, IBLOCK(ip->inum));
    208   dip = (struct dinode*)bp->data + ip->inum%IPB;
    209   dip->type = ip->type;
    210   dip->major = ip->major;
    211   dip->minor = ip->minor;
    212   dip->nlink = ip->nlink;
    213   dip->size = ip->size;
    214   memmove(dip->addrs, ip->addrs, sizeof(ip->addrs));
    215   log_write(bp);
    216   brelse(bp);
    217 }
    218 
    219 // Find the inode with number inum on device dev
    220 // and return the in-memory copy. Does not lock
    221 // the inode and does not read it from disk.
    222 static struct inode*
    223 iget(uint dev, uint inum)
    224 {
    225   struct inode *ip, *empty;
    226 
    227   acquire(&icache.lock);
    228 
    229   // Is the inode already cached?
    230   empty = 0;
    231   for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){
    232     if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){
    233       ip->ref++;
    234       release(&icache.lock);
    235       return ip;
    236     }
    237     if(empty == 0 && ip->ref == 0)    // Remember empty slot.
    238       empty = ip;
    239   }
    240 
    241   // Recycle an inode cache entry.
    242   if(empty == 0)
    243     panic("iget: no inodes");
    244 
    245   ip = empty;
    246   ip->dev = dev;
    247   ip->inum = inum;
    248   ip->ref = 1;
    249   ip->flags = 0;
    250   release(&icache.lock);
    251 
    252   return ip;
    253 }
    254 
    255 // Increment reference count for ip.
    256 // Returns ip to enable ip = idup(ip1) idiom.
    257 struct inode*
    258 idup(struct inode *ip)
    259 {
    260   acquire(&icache.lock);
    261   ip->ref++;
    262   release(&icache.lock);
    263   return ip;
    264 }
    265 
    266 // Lock the given inode.
    267 // Reads the inode from disk if necessary.
    268 void
    269 ilock(struct inode *ip)
    270 {
    271   struct buf *bp;
    272   struct dinode *dip;
    273 
    274   if(ip == 0 || ip->ref < 1)
    275     panic("ilock");
    276 
    277   acquire(&icache.lock);
    278   while(ip->flags & I_BUSY)
    279     sleep(ip, &icache.lock);
    280   ip->flags |= I_BUSY;
    281   release(&icache.lock);
    282 
    283   if(!(ip->flags & I_VALID)){
    284     bp = bread(ip->dev, IBLOCK(ip->inum));
    285     dip = (struct dinode*)bp->data + ip->inum%IPB;
    286     ip->type = dip->type;
    287     ip->major = dip->major;
    288     ip->minor = dip->minor;
    289     ip->nlink = dip->nlink;
    290     ip->size = dip->size;
    291     memmove(ip->addrs, dip->addrs, sizeof(ip->addrs));
    292     brelse(bp);
    293     ip->flags |= I_VALID;
    294     if(ip->type == 0)
    295       panic("ilock: no type");
    296   }
    297 }
    298 
    299 // Unlock the given inode.
    300 void
    301 iunlock(struct inode *ip)
    302 {
    303   if(ip == 0 || !(ip->flags & I_BUSY) || ip->ref < 1)
    304     panic("iunlock");
    305 
    306   acquire(&icache.lock);
    307   ip->flags &= ~I_BUSY;
    308   wakeup(ip);
    309   release(&icache.lock);
    310 }
    311 
    312 // Drop a reference to an in-memory inode.
    313 // If that was the last reference, the inode cache entry can
    314 // be recycled.
    315 // If that was the last reference and the inode has no links
    316 // to it, free the inode (and its content) on disk.
    317 void
    318 iput(struct inode *ip)
    319 {
    320   acquire(&icache.lock);
    321   if(ip->ref == 1 && (ip->flags & I_VALID) && ip->nlink == 0){
    322     // inode has no links: truncate and free inode.
    323     if(ip->flags & I_BUSY)
    324       panic("iput busy");
    325     ip->flags |= I_BUSY;
    326     release(&icache.lock);
    327     itrunc(ip);
    328     ip->type = 0;
    329     iupdate(ip);
    330     acquire(&icache.lock);
    331     ip->flags = 0;
    332     wakeup(ip);
    333   }
    334   ip->ref--;
    335   release(&icache.lock);
    336 }
    337 
    338 // Common idiom: unlock, then put.
    339 void
    340 iunlockput(struct inode *ip)
    341 {
    342   iunlock(ip);
    343   iput(ip);
    344 }
    345 
    346 //PAGEBREAK!
    347 // Inode content
    348 //
    349 // The content (data) associated with each inode is stored
    350 // in blocks on the disk. The first NDIRECT block numbers
    351 // are listed in ip->addrs[].  The next NINDIRECT blocks are 
    352 // listed in block ip->addrs[NDIRECT].
    353 
    354 // Return the disk block address of the nth block in inode ip.
    355 // If there is no such block, bmap allocates one.
    356 static uint
    357 bmap(struct inode *ip, uint bn)
    358 {
    359   uint addr, *a;
    360   struct buf *bp;
    361 
    362   if(bn < NDIRECT){
    363     if((addr = ip->addrs[bn]) == 0)
    364       ip->addrs[bn] = addr = balloc(ip->dev);
    365     return addr;
    366   }
    367   bn -= NDIRECT;
    368 
    369   if(bn < NINDIRECT){
    370     // Load indirect block, allocating if necessary.
    371     if((addr = ip->addrs[NDIRECT]) == 0)
    372       ip->addrs[NDIRECT] = addr = balloc(ip->dev);
    373     bp = bread(ip->dev, addr);
    374     a = (uint*)bp->data;
    375     if((addr = a[bn]) == 0){
    376       a[bn] = addr = balloc(ip->dev);
    377       log_write(bp);
    378     }
    379     brelse(bp);
    380     return addr;
    381   }
    382 
    383   panic("bmap: out of range");
    384 }
    385 
    386 // Truncate inode (discard contents).
    387 // Only called when the inode has no links
    388 // to it (no directory entries referring to it)
    389 // and has no in-memory reference to it (is
    390 // not an open file or current directory).
    391 static void
    392 itrunc(struct inode *ip)
    393 {
    394   int i, j;
    395   struct buf *bp;
    396   uint *a;
    397 
    398   for(i = 0; i < NDIRECT; i++){
    399     if(ip->addrs[i]){
    400       bfree(ip->dev, ip->addrs[i]);
    401       ip->addrs[i] = 0;
    402     }
    403   }
    404   
    405   if(ip->addrs[NDIRECT]){
    406     bp = bread(ip->dev, ip->addrs[NDIRECT]);
    407     a = (uint*)bp->data;
    408     for(j = 0; j < NINDIRECT; j++){
    409       if(a[j])
    410         bfree(ip->dev, a[j]);
    411     }
    412     brelse(bp);
    413     bfree(ip->dev, ip->addrs[NDIRECT]);
    414     ip->addrs[NDIRECT] = 0;
    415   }
    416 
    417   ip->size = 0;
    418   iupdate(ip);
    419 }
    420 
    421 // Copy stat information from inode.
    422 void
    423 stati(struct inode *ip, struct stat *st)
    424 {
    425   st->dev = ip->dev;
    426   st->ino = ip->inum;
    427   st->type = ip->type;
    428   st->nlink = ip->nlink;
    429   st->size = ip->size;
    430 }
    431 
    432 //PAGEBREAK!
    433 // Read data from inode.
    434 int
    435 readi(struct inode *ip, char *dst, uint off, uint n)
    436 {
    437   uint tot, m;
    438   struct buf *bp;
    439 
    440   if(ip->type == T_DEV){
    441     if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read)
    442       return -1;
    443     return devsw[ip->major].read(ip, dst, n);
    444   }
    445 
    446   if(off > ip->size || off + n < off)
    447     return -1;
    448   if(off + n > ip->size)
    449     n = ip->size - off;
    450 
    451   for(tot=0; tot<n; tot+=m, off+=m, dst+=m){
    452     bp = bread(ip->dev, bmap(ip, off/BSIZE));
    453     m = min(n - tot, BSIZE - off%BSIZE);
    454     memmove(dst, bp->data + off%BSIZE, m);
    455     brelse(bp);
    456   }
    457   return n;
    458 }
    459 
    460 // PAGEBREAK!
    461 // Write data to inode.
    462 int
    463 writei(struct inode *ip, char *src, uint off, uint n)
    464 {
    465   uint tot, m;
    466   struct buf *bp;
    467 
    468   if(ip->type == T_DEV){
    469     if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write)
    470       return -1;
    471     return devsw[ip->major].write(ip, src, n);
    472   }
    473 
    474   if(off > ip->size || off + n < off)
    475     return -1;
    476   if(off + n > MAXFILE*BSIZE)
    477     return -1;
    478 
    479   for(tot=0; tot<n; tot+=m, off+=m, src+=m){
    480     bp = bread(ip->dev, bmap(ip, off/BSIZE));
    481     m = min(n - tot, BSIZE - off%BSIZE);
    482     memmove(bp->data + off%BSIZE, src, m);
    483     log_write(bp);
    484     brelse(bp);
    485   }
    486 
    487   if(n > 0 && off > ip->size){
    488     ip->size = off;
    489     iupdate(ip);
    490   }
    491   return n;
    492 }
    493 
    494 //PAGEBREAK!
    495 // Directories
    496 
    497 int
    498 namecmp(const char *s, const char *t)
    499 {
    500   return strncmp(s, t, DIRSIZ);
    501 }
    502 
    503 // Look for a directory entry in a directory.
    504 // If found, set *poff to byte offset of entry.
    505 struct inode*
    506 dirlookup(struct inode *dp, char *name, uint *poff)
    507 {
    508   uint off, inum;
    509   struct dirent de;
    510 
    511   if(dp->type != T_DIR)
    512     panic("dirlookup not DIR");
    513 
    514   for(off = 0; off < dp->size; off += sizeof(de)){
    515     if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
    516       panic("dirlink read");
    517     if(de.inum == 0)
    518       continue;
    519     if(namecmp(name, de.name) == 0){
    520       // entry matches path element
    521       if(poff)
    522         *poff = off;
    523       inum = de.inum;
    524       return iget(dp->dev, inum);
    525     }
    526   }
    527 
    528   return 0;
    529 }
    530 
    531 // Write a new directory entry (name, inum) into the directory dp.
    532 int
    533 dirlink(struct inode *dp, char *name, uint inum)
    534 {
    535   int off;
    536   struct dirent de;
    537   struct inode *ip;
    538 
    539   // Check that name is not present.
    540   if((ip = dirlookup(dp, name, 0)) != 0){
    541     iput(ip);
    542     return -1;
    543   }
    544 
    545   // Look for an empty dirent.
    546   for(off = 0; off < dp->size; off += sizeof(de)){
    547     if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
    548       panic("dirlink read");
    549     if(de.inum == 0)
    550       break;
    551   }
    552 
    553   strncpy(de.name, name, DIRSIZ);
    554   de.inum = inum;
    555   if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
    556     panic("dirlink");
    557   
    558   return 0;
    559 }
    560 
    561 //PAGEBREAK!
    562 // Paths
    563 
    564 // Copy the next path element from path into name.
    565 // Return a pointer to the element following the copied one.
    566 // The returned path has no leading slashes,
    567 // so the caller can check *path=='\0' to see if the name is the last one.
    568 // If no name to remove, return 0.
    569 //
    570 // Examples:
    571 //   skipelem("a/bb/c", name) = "bb/c", setting name = "a"
    572 //   skipelem("///a//bb", name) = "bb", setting name = "a"
    573 //   skipelem("a", name) = "", setting name = "a"
    574 //   skipelem("", name) = skipelem("////", name) = 0
    575 //
    576 static char*
    577 skipelem(char *path, char *name)
    578 {
    579   char *s;
    580   int len;
    581 
    582   while(*path == '/')
    583     path++;
    584   if(*path == 0)
    585     return 0;
    586   s = path;
    587   while(*path != '/' && *path != 0)
    588     path++;
    589   len = path - s;
    590   if(len >= DIRSIZ)
    591     memmove(name, s, DIRSIZ);
    592   else {
    593     memmove(name, s, len);
    594     name[len] = 0;
    595   }
    596   while(*path == '/')
    597     path++;
    598   return path;
    599 }
    600 
    601 // Look up and return the inode for a path name.
    602 // If parent != 0, return the inode for the parent and copy the final
    603 // path element into name, which must have room for DIRSIZ bytes.
    604 static struct inode*
    605 namex(char *path, int nameiparent, char *name)
    606 {
    607   struct inode *ip, *next;
    608 
    609   if(*path == '/')
    610     ip = iget(ROOTDEV, ROOTINO);
    611   else
    612     ip = idup(proc->cwd);
    613 
    614   while((path = skipelem(path, name)) != 0){
    615     ilock(ip);
    616     if(ip->type != T_DIR){
    617       iunlockput(ip);
    618       return 0;
    619     }
    620     if(nameiparent && *path == '\0'){
    621       // Stop one level early.
    622       iunlock(ip);
    623       return ip;
    624     }
    625     if((next = dirlookup(ip, name, 0)) == 0){
    626       iunlockput(ip);
    627       return 0;
    628     }
    629     iunlockput(ip);
    630     ip = next;
    631   }
    632   if(nameiparent){
    633     iput(ip);
    634     return 0;
    635   }
    636   return ip;
    637 }
    638 
    639 struct inode*
    640 namei(char *path)
    641 {
    642   char name[DIRSIZ];
    643   return namex(path, 0, name);
    644 }
    645 
    646 struct inode*
    647 nameiparent(char *path, char *name)
    648 {
    649   return namex(path, 1, name);
    650 }