xv6

port of xv6 to x86-64
git clone http://frotz.net/git/xv6.git
Log | Files | Refs | README | LICENSE

vm.c (9549B)


      1 #include "param.h"
      2 #include "types.h"
      3 #include "defs.h"
      4 #include "x86.h"
      5 #include "memlayout.h"
      6 #include "mmu.h"
      7 #include "proc.h"
      8 #include "elf.h"
      9 
     10 extern char data[];  // defined by kernel.ld
     11 pde_t *kpgdir;  // for use in scheduler()
     12 struct segdesc gdt[NSEGS];
     13 
     14 #ifndef X64
     15 // Set up CPU's kernel segment descriptors.
     16 // Run once on entry on each CPU.
     17 void
     18 seginit(void)
     19 {
     20   struct cpu *c;
     21 
     22   // Map "logical" addresses to virtual addresses using identity map.
     23   // Cannot share a CODE descriptor for both kernel and user
     24   // because it would have to have DPL_USR, but the CPU forbids
     25   // an interrupt from CPL=0 to DPL=3.
     26   c = &cpus[cpunum()];
     27   c->gdt[SEG_KCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, 0);
     28   c->gdt[SEG_KDATA] = SEG(STA_W, 0, 0xffffffff, 0);
     29   c->gdt[SEG_UCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, DPL_USER);
     30   c->gdt[SEG_UDATA] = SEG(STA_W, 0, 0xffffffff, DPL_USER);
     31 
     32   // Map cpu, and curproc
     33   c->gdt[SEG_KCPU] = SEG(STA_W, &c->cpu, 8, 0);
     34 
     35   lgdt(c->gdt, sizeof(c->gdt));
     36   loadgs(SEG_KCPU << 3);
     37   
     38   // Initialize cpu-local storage.
     39   cpu = c;
     40   proc = 0;
     41 }
     42 #endif
     43 
     44 // Return the address of the PTE in page table pgdir
     45 // that corresponds to virtual address va.  If alloc!=0,
     46 // create any required page table pages.
     47 static pte_t *
     48 walkpgdir(pde_t *pgdir, const void *va, int alloc)
     49 {
     50   pde_t *pde;
     51   pte_t *pgtab;
     52 
     53   pde = &pgdir[PDX(va)];
     54   if(*pde & PTE_P){
     55     pgtab = (pte_t*)p2v(PTE_ADDR(*pde));
     56   } else {
     57     if(!alloc || (pgtab = (pte_t*)kalloc()) == 0)
     58       return 0;
     59     // Make sure all those PTE_P bits are zero.
     60     memset(pgtab, 0, PGSIZE);
     61     // The permissions here are overly generous, but they can
     62     // be further restricted by the permissions in the page table 
     63     // entries, if necessary.
     64     *pde = v2p(pgtab) | PTE_P | PTE_W | PTE_U;
     65   }
     66   return &pgtab[PTX(va)];
     67 }
     68 
     69 // Create PTEs for virtual addresses starting at va that refer to
     70 // physical addresses starting at pa. va and size might not
     71 // be page-aligned.
     72 static int
     73 mappages(pde_t *pgdir, void *va, uintp size, uintp pa, int perm)
     74 {
     75   char *a, *last;
     76   pte_t *pte;
     77   
     78   a = (char*)PGROUNDDOWN((uintp)va);
     79   last = (char*)PGROUNDDOWN(((uintp)va) + size - 1);
     80   for(;;){
     81     if((pte = walkpgdir(pgdir, a, 1)) == 0)
     82       return -1;
     83     if(*pte & PTE_P)
     84       panic("remap");
     85     *pte = pa | perm | PTE_P;
     86     if(a == last)
     87       break;
     88     a += PGSIZE;
     89     pa += PGSIZE;
     90   }
     91   return 0;
     92 }
     93 
     94 #ifndef X64
     95 // There is one page table per process, plus one that's used when
     96 // a CPU is not running any process (kpgdir). The kernel uses the
     97 // current process's page table during system calls and interrupts;
     98 // page protection bits prevent user code from using the kernel's
     99 // mappings.
    100 // 
    101 // setupkvm() and exec() set up every page table like this:
    102 //
    103 //   0..KERNBASE: user memory (text+data+stack+heap), mapped to
    104 //                phys memory allocated by the kernel
    105 //   KERNBASE..KERNBASE+EXTMEM: mapped to 0..EXTMEM (for I/O space)
    106 //   KERNBASE+EXTMEM..data: mapped to EXTMEM..V2P(data)
    107 //                for the kernel's instructions and r/o data
    108 //   data..KERNBASE+PHYSTOP: mapped to V2P(data)..PHYSTOP, 
    109 //                                  rw data + free physical memory
    110 //   0xfe000000..0: mapped direct (devices such as ioapic)
    111 //
    112 // The kernel allocates physical memory for its heap and for user memory
    113 // between V2P(end) and the end of physical memory (PHYSTOP)
    114 // (directly addressable from end..P2V(PHYSTOP)).
    115 
    116 // This table defines the kernel's mappings, which are present in
    117 // every process's page table.
    118 static struct kmap {
    119   void *virt;
    120   uintp phys_start;
    121   uintp phys_end;
    122   int perm;
    123 } kmap[] = {
    124  { (void*)KERNBASE, 0,             EXTMEM,    PTE_W}, // I/O space
    125  { (void*)KERNLINK, V2P(KERNLINK), V2P(data), 0},     // kern text+rodata
    126  { (void*)data,     V2P(data),     PHYSTOP,   PTE_W}, // kern data+memory
    127  { (void*)DEVBASE,  DEVSPACE,      0,         PTE_W}, // more devices
    128 };
    129 
    130 // Set up kernel part of a page table.
    131 pde_t*
    132 setupkvm(void)
    133 {
    134   pde_t *pgdir;
    135   struct kmap *k;
    136 
    137   if((pgdir = (pde_t*)kalloc()) == 0)
    138     return 0;
    139   memset(pgdir, 0, PGSIZE);
    140   if (p2v(PHYSTOP) > (void*)DEVSPACE)
    141     panic("PHYSTOP too high");
    142   for(k = kmap; k < &kmap[NELEM(kmap)]; k++)
    143     if(mappages(pgdir, k->virt, k->phys_end - k->phys_start, 
    144                 (uint)k->phys_start, k->perm) < 0)
    145       return 0;
    146   return pgdir;
    147 }
    148 
    149 // Allocate one page table for the machine for the kernel address
    150 // space for scheduler processes.
    151 void
    152 kvmalloc(void)
    153 {
    154   kpgdir = setupkvm();
    155   switchkvm();
    156 }
    157 
    158 // Switch h/w page table register to the kernel-only page table,
    159 // for when no process is running.
    160 void
    161 switchkvm(void)
    162 {
    163   lcr3(v2p(kpgdir));   // switch to the kernel page table
    164 }
    165 
    166 // Switch TSS and h/w page table to correspond to process p.
    167 void
    168 switchuvm(struct proc *p)
    169 {
    170   pushcli();
    171   cpu->gdt[SEG_TSS] = SEG16(STS_T32A, &cpu->ts, sizeof(cpu->ts)-1, 0);
    172   cpu->gdt[SEG_TSS].s = 0;
    173   cpu->ts.ss0 = SEG_KDATA << 3;
    174   cpu->ts.esp0 = (uintp)proc->kstack + KSTACKSIZE;
    175   ltr(SEG_TSS << 3);
    176   if(p->pgdir == 0)
    177     panic("switchuvm: no pgdir");
    178   lcr3(v2p(p->pgdir));  // switch to new address space
    179   popcli();
    180 }
    181 #endif
    182 
    183 // Load the initcode into address 0 of pgdir.
    184 // sz must be less than a page.
    185 void
    186 inituvm(pde_t *pgdir, char *init, uint sz)
    187 {
    188   char *mem;
    189   
    190   if(sz >= PGSIZE)
    191     panic("inituvm: more than a page");
    192   mem = kalloc();
    193   memset(mem, 0, PGSIZE);
    194   mappages(pgdir, 0, PGSIZE, v2p(mem), PTE_W|PTE_U);
    195   memmove(mem, init, sz);
    196 }
    197 
    198 // Load a program segment into pgdir.  addr must be page-aligned
    199 // and the pages from addr to addr+sz must already be mapped.
    200 int
    201 loaduvm(pde_t *pgdir, char *addr, struct inode *ip, uint offset, uint sz)
    202 {
    203   uint i, pa, n;
    204   pte_t *pte;
    205 
    206   if((uintp) addr % PGSIZE != 0)
    207     panic("loaduvm: addr must be page aligned");
    208   for(i = 0; i < sz; i += PGSIZE){
    209     if((pte = walkpgdir(pgdir, addr+i, 0)) == 0)
    210       panic("loaduvm: address should exist");
    211     pa = PTE_ADDR(*pte);
    212     if(sz - i < PGSIZE)
    213       n = sz - i;
    214     else
    215       n = PGSIZE;
    216     if(readi(ip, p2v(pa), offset+i, n) != n)
    217       return -1;
    218   }
    219   return 0;
    220 }
    221 
    222 // Allocate page tables and physical memory to grow process from oldsz to
    223 // newsz, which need not be page aligned.  Returns new size or 0 on error.
    224 int
    225 allocuvm(pde_t *pgdir, uint oldsz, uint newsz)
    226 {
    227   char *mem;
    228   uintp a;
    229 
    230   if(newsz >= KERNBASE)
    231     return 0;
    232   if(newsz < oldsz)
    233     return oldsz;
    234 
    235   a = PGROUNDUP(oldsz);
    236   for(; a < newsz; a += PGSIZE){
    237     mem = kalloc();
    238     if(mem == 0){
    239       cprintf("allocuvm out of memory\n");
    240       deallocuvm(pgdir, newsz, oldsz);
    241       return 0;
    242     }
    243     memset(mem, 0, PGSIZE);
    244     mappages(pgdir, (char*)a, PGSIZE, v2p(mem), PTE_W|PTE_U);
    245   }
    246   return newsz;
    247 }
    248 
    249 // Deallocate user pages to bring the process size from oldsz to
    250 // newsz.  oldsz and newsz need not be page-aligned, nor does newsz
    251 // need to be less than oldsz.  oldsz can be larger than the actual
    252 // process size.  Returns the new process size.
    253 int
    254 deallocuvm(pde_t *pgdir, uintp oldsz, uintp newsz)
    255 {
    256   pte_t *pte;
    257   uintp a, pa;
    258 
    259   if(newsz >= oldsz)
    260     return oldsz;
    261 
    262   a = PGROUNDUP(newsz);
    263   for(; a  < oldsz; a += PGSIZE){
    264     pte = walkpgdir(pgdir, (char*)a, 0);
    265     if(!pte)
    266       a += (NPTENTRIES - 1) * PGSIZE;
    267     else if((*pte & PTE_P) != 0){
    268       pa = PTE_ADDR(*pte);
    269       if(pa == 0)
    270         panic("kfree");
    271       char *v = p2v(pa);
    272       kfree(v);
    273       *pte = 0;
    274     }
    275   }
    276   return newsz;
    277 }
    278 
    279 // Free a page table and all the physical memory pages
    280 // in the user part.
    281 void
    282 freevm(pde_t *pgdir)
    283 {
    284   uint i;
    285   if(pgdir == 0)
    286     panic("freevm: no pgdir");
    287   deallocuvm(pgdir, 0x3fa00000, 0);
    288   for(i = 0; i < NPDENTRIES-2; i++){
    289     if(pgdir[i] & PTE_P){
    290       char * v = p2v(PTE_ADDR(pgdir[i]));
    291       kfree(v);
    292     }
    293   }
    294   kfree((char*)pgdir);
    295 }
    296 
    297 // Clear PTE_U on a page. Used to create an inaccessible
    298 // page beneath the user stack.
    299 void
    300 clearpteu(pde_t *pgdir, char *uva)
    301 {
    302   pte_t *pte;
    303 
    304   pte = walkpgdir(pgdir, uva, 0);
    305   if(pte == 0)
    306     panic("clearpteu");
    307   *pte &= ~PTE_U;
    308 }
    309 
    310 // Given a parent process's page table, create a copy
    311 // of it for a child.
    312 pde_t*
    313 copyuvm(pde_t *pgdir, uint sz)
    314 {
    315   pde_t *d;
    316   pte_t *pte;
    317   uintp pa, i, flags;
    318   char *mem;
    319 
    320   if((d = setupkvm()) == 0)
    321     return 0;
    322   for(i = 0; i < sz; i += PGSIZE){
    323     if((pte = walkpgdir(pgdir, (void *) i, 0)) == 0)
    324       panic("copyuvm: pte should exist");
    325     if(!(*pte & PTE_P))
    326       panic("copyuvm: page not present");
    327     pa = PTE_ADDR(*pte);
    328     flags = PTE_FLAGS(*pte);
    329     if((mem = kalloc()) == 0)
    330       goto bad;
    331     memmove(mem, (char*)p2v(pa), PGSIZE);
    332     if(mappages(d, (void*)i, PGSIZE, v2p(mem), flags) < 0)
    333       goto bad;
    334   }
    335   return d;
    336 
    337 bad:
    338   freevm(d);
    339   return 0;
    340 }
    341 
    342 //PAGEBREAK!
    343 // Map user virtual address to kernel address.
    344 char*
    345 uva2ka(pde_t *pgdir, char *uva)
    346 {
    347   pte_t *pte;
    348 
    349   pte = walkpgdir(pgdir, uva, 0);
    350   if((*pte & PTE_P) == 0)
    351     return 0;
    352   if((*pte & PTE_U) == 0)
    353     return 0;
    354   return (char*)p2v(PTE_ADDR(*pte));
    355 }
    356 
    357 // Copy len bytes from p to user address va in page table pgdir.
    358 // Most useful when pgdir is not the current page table.
    359 // uva2ka ensures this only works for PTE_U pages.
    360 int
    361 copyout(pde_t *pgdir, uint va, void *p, uint len)
    362 {
    363   char *buf, *pa0;
    364   uintp n, va0;
    365 
    366   buf = (char*)p;
    367   while(len > 0){
    368     va0 = (uint)PGROUNDDOWN(va);
    369     pa0 = uva2ka(pgdir, (char*)va0);
    370     if(pa0 == 0)
    371       return -1;
    372     n = PGSIZE - (va - va0);
    373     if(n > len)
    374       n = len;
    375     memmove(pa0 + (va - va0), buf, n);
    376     len -= n;
    377     buf += n;
    378     va = va0 + PGSIZE;
    379   }
    380   return 0;
    381 }