vm.c (9549B)
1 #include "param.h" 2 #include "types.h" 3 #include "defs.h" 4 #include "x86.h" 5 #include "memlayout.h" 6 #include "mmu.h" 7 #include "proc.h" 8 #include "elf.h" 9 10 extern char data[]; // defined by kernel.ld 11 pde_t *kpgdir; // for use in scheduler() 12 struct segdesc gdt[NSEGS]; 13 14 #ifndef X64 15 // Set up CPU's kernel segment descriptors. 16 // Run once on entry on each CPU. 17 void 18 seginit(void) 19 { 20 struct cpu *c; 21 22 // Map "logical" addresses to virtual addresses using identity map. 23 // Cannot share a CODE descriptor for both kernel and user 24 // because it would have to have DPL_USR, but the CPU forbids 25 // an interrupt from CPL=0 to DPL=3. 26 c = &cpus[cpunum()]; 27 c->gdt[SEG_KCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, 0); 28 c->gdt[SEG_KDATA] = SEG(STA_W, 0, 0xffffffff, 0); 29 c->gdt[SEG_UCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, DPL_USER); 30 c->gdt[SEG_UDATA] = SEG(STA_W, 0, 0xffffffff, DPL_USER); 31 32 // Map cpu, and curproc 33 c->gdt[SEG_KCPU] = SEG(STA_W, &c->cpu, 8, 0); 34 35 lgdt(c->gdt, sizeof(c->gdt)); 36 loadgs(SEG_KCPU << 3); 37 38 // Initialize cpu-local storage. 39 cpu = c; 40 proc = 0; 41 } 42 #endif 43 44 // Return the address of the PTE in page table pgdir 45 // that corresponds to virtual address va. If alloc!=0, 46 // create any required page table pages. 47 static pte_t * 48 walkpgdir(pde_t *pgdir, const void *va, int alloc) 49 { 50 pde_t *pde; 51 pte_t *pgtab; 52 53 pde = &pgdir[PDX(va)]; 54 if(*pde & PTE_P){ 55 pgtab = (pte_t*)p2v(PTE_ADDR(*pde)); 56 } else { 57 if(!alloc || (pgtab = (pte_t*)kalloc()) == 0) 58 return 0; 59 // Make sure all those PTE_P bits are zero. 60 memset(pgtab, 0, PGSIZE); 61 // The permissions here are overly generous, but they can 62 // be further restricted by the permissions in the page table 63 // entries, if necessary. 64 *pde = v2p(pgtab) | PTE_P | PTE_W | PTE_U; 65 } 66 return &pgtab[PTX(va)]; 67 } 68 69 // Create PTEs for virtual addresses starting at va that refer to 70 // physical addresses starting at pa. va and size might not 71 // be page-aligned. 72 static int 73 mappages(pde_t *pgdir, void *va, uintp size, uintp pa, int perm) 74 { 75 char *a, *last; 76 pte_t *pte; 77 78 a = (char*)PGROUNDDOWN((uintp)va); 79 last = (char*)PGROUNDDOWN(((uintp)va) + size - 1); 80 for(;;){ 81 if((pte = walkpgdir(pgdir, a, 1)) == 0) 82 return -1; 83 if(*pte & PTE_P) 84 panic("remap"); 85 *pte = pa | perm | PTE_P; 86 if(a == last) 87 break; 88 a += PGSIZE; 89 pa += PGSIZE; 90 } 91 return 0; 92 } 93 94 #ifndef X64 95 // There is one page table per process, plus one that's used when 96 // a CPU is not running any process (kpgdir). The kernel uses the 97 // current process's page table during system calls and interrupts; 98 // page protection bits prevent user code from using the kernel's 99 // mappings. 100 // 101 // setupkvm() and exec() set up every page table like this: 102 // 103 // 0..KERNBASE: user memory (text+data+stack+heap), mapped to 104 // phys memory allocated by the kernel 105 // KERNBASE..KERNBASE+EXTMEM: mapped to 0..EXTMEM (for I/O space) 106 // KERNBASE+EXTMEM..data: mapped to EXTMEM..V2P(data) 107 // for the kernel's instructions and r/o data 108 // data..KERNBASE+PHYSTOP: mapped to V2P(data)..PHYSTOP, 109 // rw data + free physical memory 110 // 0xfe000000..0: mapped direct (devices such as ioapic) 111 // 112 // The kernel allocates physical memory for its heap and for user memory 113 // between V2P(end) and the end of physical memory (PHYSTOP) 114 // (directly addressable from end..P2V(PHYSTOP)). 115 116 // This table defines the kernel's mappings, which are present in 117 // every process's page table. 118 static struct kmap { 119 void *virt; 120 uintp phys_start; 121 uintp phys_end; 122 int perm; 123 } kmap[] = { 124 { (void*)KERNBASE, 0, EXTMEM, PTE_W}, // I/O space 125 { (void*)KERNLINK, V2P(KERNLINK), V2P(data), 0}, // kern text+rodata 126 { (void*)data, V2P(data), PHYSTOP, PTE_W}, // kern data+memory 127 { (void*)DEVBASE, DEVSPACE, 0, PTE_W}, // more devices 128 }; 129 130 // Set up kernel part of a page table. 131 pde_t* 132 setupkvm(void) 133 { 134 pde_t *pgdir; 135 struct kmap *k; 136 137 if((pgdir = (pde_t*)kalloc()) == 0) 138 return 0; 139 memset(pgdir, 0, PGSIZE); 140 if (p2v(PHYSTOP) > (void*)DEVSPACE) 141 panic("PHYSTOP too high"); 142 for(k = kmap; k < &kmap[NELEM(kmap)]; k++) 143 if(mappages(pgdir, k->virt, k->phys_end - k->phys_start, 144 (uint)k->phys_start, k->perm) < 0) 145 return 0; 146 return pgdir; 147 } 148 149 // Allocate one page table for the machine for the kernel address 150 // space for scheduler processes. 151 void 152 kvmalloc(void) 153 { 154 kpgdir = setupkvm(); 155 switchkvm(); 156 } 157 158 // Switch h/w page table register to the kernel-only page table, 159 // for when no process is running. 160 void 161 switchkvm(void) 162 { 163 lcr3(v2p(kpgdir)); // switch to the kernel page table 164 } 165 166 // Switch TSS and h/w page table to correspond to process p. 167 void 168 switchuvm(struct proc *p) 169 { 170 pushcli(); 171 cpu->gdt[SEG_TSS] = SEG16(STS_T32A, &cpu->ts, sizeof(cpu->ts)-1, 0); 172 cpu->gdt[SEG_TSS].s = 0; 173 cpu->ts.ss0 = SEG_KDATA << 3; 174 cpu->ts.esp0 = (uintp)proc->kstack + KSTACKSIZE; 175 ltr(SEG_TSS << 3); 176 if(p->pgdir == 0) 177 panic("switchuvm: no pgdir"); 178 lcr3(v2p(p->pgdir)); // switch to new address space 179 popcli(); 180 } 181 #endif 182 183 // Load the initcode into address 0 of pgdir. 184 // sz must be less than a page. 185 void 186 inituvm(pde_t *pgdir, char *init, uint sz) 187 { 188 char *mem; 189 190 if(sz >= PGSIZE) 191 panic("inituvm: more than a page"); 192 mem = kalloc(); 193 memset(mem, 0, PGSIZE); 194 mappages(pgdir, 0, PGSIZE, v2p(mem), PTE_W|PTE_U); 195 memmove(mem, init, sz); 196 } 197 198 // Load a program segment into pgdir. addr must be page-aligned 199 // and the pages from addr to addr+sz must already be mapped. 200 int 201 loaduvm(pde_t *pgdir, char *addr, struct inode *ip, uint offset, uint sz) 202 { 203 uint i, pa, n; 204 pte_t *pte; 205 206 if((uintp) addr % PGSIZE != 0) 207 panic("loaduvm: addr must be page aligned"); 208 for(i = 0; i < sz; i += PGSIZE){ 209 if((pte = walkpgdir(pgdir, addr+i, 0)) == 0) 210 panic("loaduvm: address should exist"); 211 pa = PTE_ADDR(*pte); 212 if(sz - i < PGSIZE) 213 n = sz - i; 214 else 215 n = PGSIZE; 216 if(readi(ip, p2v(pa), offset+i, n) != n) 217 return -1; 218 } 219 return 0; 220 } 221 222 // Allocate page tables and physical memory to grow process from oldsz to 223 // newsz, which need not be page aligned. Returns new size or 0 on error. 224 int 225 allocuvm(pde_t *pgdir, uint oldsz, uint newsz) 226 { 227 char *mem; 228 uintp a; 229 230 if(newsz >= KERNBASE) 231 return 0; 232 if(newsz < oldsz) 233 return oldsz; 234 235 a = PGROUNDUP(oldsz); 236 for(; a < newsz; a += PGSIZE){ 237 mem = kalloc(); 238 if(mem == 0){ 239 cprintf("allocuvm out of memory\n"); 240 deallocuvm(pgdir, newsz, oldsz); 241 return 0; 242 } 243 memset(mem, 0, PGSIZE); 244 mappages(pgdir, (char*)a, PGSIZE, v2p(mem), PTE_W|PTE_U); 245 } 246 return newsz; 247 } 248 249 // Deallocate user pages to bring the process size from oldsz to 250 // newsz. oldsz and newsz need not be page-aligned, nor does newsz 251 // need to be less than oldsz. oldsz can be larger than the actual 252 // process size. Returns the new process size. 253 int 254 deallocuvm(pde_t *pgdir, uintp oldsz, uintp newsz) 255 { 256 pte_t *pte; 257 uintp a, pa; 258 259 if(newsz >= oldsz) 260 return oldsz; 261 262 a = PGROUNDUP(newsz); 263 for(; a < oldsz; a += PGSIZE){ 264 pte = walkpgdir(pgdir, (char*)a, 0); 265 if(!pte) 266 a += (NPTENTRIES - 1) * PGSIZE; 267 else if((*pte & PTE_P) != 0){ 268 pa = PTE_ADDR(*pte); 269 if(pa == 0) 270 panic("kfree"); 271 char *v = p2v(pa); 272 kfree(v); 273 *pte = 0; 274 } 275 } 276 return newsz; 277 } 278 279 // Free a page table and all the physical memory pages 280 // in the user part. 281 void 282 freevm(pde_t *pgdir) 283 { 284 uint i; 285 if(pgdir == 0) 286 panic("freevm: no pgdir"); 287 deallocuvm(pgdir, 0x3fa00000, 0); 288 for(i = 0; i < NPDENTRIES-2; i++){ 289 if(pgdir[i] & PTE_P){ 290 char * v = p2v(PTE_ADDR(pgdir[i])); 291 kfree(v); 292 } 293 } 294 kfree((char*)pgdir); 295 } 296 297 // Clear PTE_U on a page. Used to create an inaccessible 298 // page beneath the user stack. 299 void 300 clearpteu(pde_t *pgdir, char *uva) 301 { 302 pte_t *pte; 303 304 pte = walkpgdir(pgdir, uva, 0); 305 if(pte == 0) 306 panic("clearpteu"); 307 *pte &= ~PTE_U; 308 } 309 310 // Given a parent process's page table, create a copy 311 // of it for a child. 312 pde_t* 313 copyuvm(pde_t *pgdir, uint sz) 314 { 315 pde_t *d; 316 pte_t *pte; 317 uintp pa, i, flags; 318 char *mem; 319 320 if((d = setupkvm()) == 0) 321 return 0; 322 for(i = 0; i < sz; i += PGSIZE){ 323 if((pte = walkpgdir(pgdir, (void *) i, 0)) == 0) 324 panic("copyuvm: pte should exist"); 325 if(!(*pte & PTE_P)) 326 panic("copyuvm: page not present"); 327 pa = PTE_ADDR(*pte); 328 flags = PTE_FLAGS(*pte); 329 if((mem = kalloc()) == 0) 330 goto bad; 331 memmove(mem, (char*)p2v(pa), PGSIZE); 332 if(mappages(d, (void*)i, PGSIZE, v2p(mem), flags) < 0) 333 goto bad; 334 } 335 return d; 336 337 bad: 338 freevm(d); 339 return 0; 340 } 341 342 //PAGEBREAK! 343 // Map user virtual address to kernel address. 344 char* 345 uva2ka(pde_t *pgdir, char *uva) 346 { 347 pte_t *pte; 348 349 pte = walkpgdir(pgdir, uva, 0); 350 if((*pte & PTE_P) == 0) 351 return 0; 352 if((*pte & PTE_U) == 0) 353 return 0; 354 return (char*)p2v(PTE_ADDR(*pte)); 355 } 356 357 // Copy len bytes from p to user address va in page table pgdir. 358 // Most useful when pgdir is not the current page table. 359 // uva2ka ensures this only works for PTE_U pages. 360 int 361 copyout(pde_t *pgdir, uint va, void *p, uint len) 362 { 363 char *buf, *pa0; 364 uintp n, va0; 365 366 buf = (char*)p; 367 while(len > 0){ 368 va0 = (uint)PGROUNDDOWN(va); 369 pa0 = uva2ka(pgdir, (char*)va0); 370 if(pa0 == 0) 371 return -1; 372 n = PGSIZE - (va - va0); 373 if(n > len) 374 n = len; 375 memmove(pa0 + (va - va0), buf, n); 376 len -= n; 377 buf += n; 378 va = va0 + PGSIZE; 379 } 380 return 0; 381 }