Main Page | Alphabetical List | Data Structures | Directories | File List | Data Fields | Globals

mm.h

Go to the documentation of this file.
00001 #ifndef _LINUX_MM_H
00002 #define _LINUX_MM_H
00003 
00004 #include <linux/sched.h>
00005 #include <linux/errno.h>
00006 
00007 #ifdef __KERNEL__
00008 
00009 #include <linux/string.h>
00010 
00011 extern unsigned long max_mapnr;
00012 extern unsigned long num_physpages;
00013 extern void * high_memory;
00014 extern int page_cluster;
00015 
00016 #include <asm/page.h>
00017 #include <asm/atomic.h>
00018 
00019 /*
00020  * Linux kernel virtual memory manager primitives.
00021  * The idea being to have a "virtual" mm in the same way
00022  * we have a virtual fs - giving a cleaner interface to the
00023  * mm details, and allowing different kinds of memory mappings
00024  * (from shared memory to executable loading to arbitrary
00025  * mmap() functions).
00026  */
00027 
00028 /*
00029  * This struct defines a memory VMM memory area. There is one of these
00030  * per VM-area/task.  A VM area is any part of the process virtual memory
00031  * space that has a special rule for the page-fault handlers (ie a shared
00032  * library, the executable area etc).
00033  */
00034 struct vm_area_struct {
00035         struct mm_struct * vm_mm;       /* VM area parameters */
00036         unsigned long vm_start;
00037         unsigned long vm_end;
00038 
00039         /* linked list of VM areas per task, sorted by address */
00040         struct vm_area_struct *vm_next;
00041 
00042         pgprot_t vm_page_prot;
00043         unsigned short vm_flags;
00044 
00045         /* AVL tree of VM areas per task, sorted by address */
00046         short vm_avl_height;
00047         struct vm_area_struct * vm_avl_left;
00048         struct vm_area_struct * vm_avl_right;
00049 
00050         /* For areas with inode, the list inode->i_mmap{,_shared}, for shm areas,
00051          * the list of attaches, otherwise unused.
00052          */
00053         struct vm_area_struct *vm_next_share;
00054         struct vm_area_struct **vm_pprev_share;
00055 
00056         struct vm_operations_struct * vm_ops;
00057         unsigned long vm_offset;
00058         struct file * vm_file;
00059         unsigned long vm_pte;                   /* shared mem */
00060 };
00061 
00062 /*
00063  * vm_flags..
00064  */
00065 #define VM_READ         0x0001  /* currently active flags */
00066 #define VM_WRITE        0x0002
00067 #define VM_EXEC         0x0004
00068 #define VM_SHARED       0x0008
00069 
00070 #define VM_MAYREAD      0x0010  /* limits for mprotect() etc */
00071 #define VM_MAYWRITE     0x0020
00072 #define VM_MAYEXEC      0x0040
00073 #define VM_MAYSHARE     0x0080
00074 
00075 #define VM_GROWSDOWN    0x0100  /* general info on the segment */
00076 #define VM_GROWSUP      0x0200
00077 #define VM_SHM          0x0400  /* shared memory area, don't swap out */
00078 #define VM_DENYWRITE    0x0800  /* ETXTBSY on write attempts.. */
00079 
00080 #define VM_EXECUTABLE   0x1000
00081 #define VM_LOCKED       0x2000
00082 #define VM_IO           0x4000  /* Memory mapped I/O or similar */
00083 
00084 #define VM_STACK_FLAGS  0x0177
00085 
00086 /*
00087  * mapping from the currently active vm_flags protection bits (the
00088  * low four bits) to a page protection mask..
00089  */
00090 extern pgprot_t protection_map[16];
00091 
00092 
00093 /*
00094  * These are the virtual MM functions - opening of an area, closing and
00095  * unmapping it (needed to keep files on disk up-to-date etc), pointer
00096  * to the functions called when a no-page or a wp-page exception occurs. 
00097  */
00098 struct vm_operations_struct {
00099         void (*open)(struct vm_area_struct * area);
00100         void (*close)(struct vm_area_struct * area);
00101         void (*unmap)(struct vm_area_struct *area, unsigned long, size_t);
00102         void (*protect)(struct vm_area_struct *area, unsigned long, size_t, unsigned int newprot);
00103         int (*sync)(struct vm_area_struct *area, unsigned long, size_t, unsigned int flags);
00104         void (*advise)(struct vm_area_struct *area, unsigned long, size_t, unsigned int advise);
00105         unsigned long (*nopage)(struct vm_area_struct * area, unsigned long address, int write_access);
00106         unsigned long (*wppage)(struct vm_area_struct * area, unsigned long address,
00107                 unsigned long page);
00108         int (*swapout)(struct vm_area_struct *, struct page *);
00109         pte_t (*swapin)(struct vm_area_struct *, unsigned long, unsigned long);
00110 };
00111 
00112 /*
00113  * Try to keep the most commonly accessed fields in single cache lines
00114  * here (16 bytes or greater).  This ordering should be particularly
00115  * beneficial on 32-bit processors.
00116  *
00117  * The first line is data used in page cache lookup, the second line
00118  * is used for linear searches (eg. clock algorithm scans). 
00119  */
00120 typedef struct page {
00121         /* these must be first (free area handling) */
00122         struct page *next;
00123         struct page *prev;
00124         struct inode *inode;
00125         unsigned long offset;
00126         struct page *next_hash;
00127         atomic_t count;
00128         unsigned long flags;    /* atomic flags, some possibly updated asynchronously */
00129         struct wait_queue *wait;
00130         struct page **pprev_hash;
00131         struct buffer_head * buffers;
00132 } mem_map_t;
00133 
00134 /* Page flag bit values */
00135 #define PG_locked                0
00136 #define PG_error                 1
00137 #define PG_referenced            2
00138 #define PG_dirty                 3
00139 #define PG_uptodate              4
00140 #define PG_free_after            5
00141 #define PG_decr_after            6
00142 #define PG_swap_unlock_after     7
00143 #define PG_DMA                   8
00144 #define PG_Slab                  9
00145 #define PG_swap_cache           10
00146 #define PG_skip                 11
00147 #define PG_reserved             31
00148 
00149 /* Make it prettier to test the above... */
00150 #define PageLocked(page)        (test_bit(PG_locked, &(page)->flags))
00151 #define PageError(page)         (test_bit(PG_error, &(page)->flags))
00152 #define PageReferenced(page)    (test_bit(PG_referenced, &(page)->flags))
00153 #define PageDirty(page)         (test_bit(PG_dirty, &(page)->flags))
00154 #define PageUptodate(page)      (test_bit(PG_uptodate, &(page)->flags))
00155 #define PageFreeAfter(page)     (test_bit(PG_free_after, &(page)->flags))
00156 #define PageDecrAfter(page)     (test_bit(PG_decr_after, &(page)->flags))
00157 #define PageSwapUnlockAfter(page) (test_bit(PG_swap_unlock_after, &(page)->flags))
00158 #define PageDMA(page)           (test_bit(PG_DMA, &(page)->flags))
00159 #define PageSlab(page)          (test_bit(PG_Slab, &(page)->flags))
00160 #define PageSwapCache(page)     (test_bit(PG_swap_cache, &(page)->flags))
00161 #define PageReserved(page)      (test_bit(PG_reserved, &(page)->flags))
00162 
00163 #define PageSetSlab(page)       (set_bit(PG_Slab, &(page)->flags))
00164 #define PageSetSwapCache(page)  (set_bit(PG_swap_cache, &(page)->flags))
00165 
00166 #define PageTestandSetDirty(page)       \
00167                         (test_and_set_bit(PG_dirty, &(page)->flags))
00168 #define PageTestandSetSwapCache(page)   \
00169                         (test_and_set_bit(PG_swap_cache, &(page)->flags))
00170 
00171 #define PageClearSlab(page)     (clear_bit(PG_Slab, &(page)->flags))
00172 #define PageClearSwapCache(page)(clear_bit(PG_swap_cache, &(page)->flags))
00173 
00174 #define PageTestandClearDirty(page) \
00175                         (test_and_clear_bit(PG_dirty, &(page)->flags))
00176 #define PageTestandClearSwapCache(page) \
00177                         (test_and_clear_bit(PG_swap_cache, &(page)->flags))
00178 
00179 /*
00180  * Various page->flags bits:
00181  *
00182  * PG_reserved is set for a page which must never be accessed (which
00183  * may not even be present).
00184  *
00185  * PG_DMA is set for those pages which lie in the range of
00186  * physical addresses capable of carrying DMA transfers.
00187  *
00188  * Multiple processes may "see" the same page. E.g. for untouched
00189  * mappings of /dev/null, all processes see the same page full of
00190  * zeroes, and text pages of executables and shared libraries have
00191  * only one copy in memory, at most, normally.
00192  *
00193  * For the non-reserved pages, page->count denotes a reference count.
00194  *   page->count == 0 means the page is free.
00195  *   page->count == 1 means the page is used for exactly one purpose
00196  *   (e.g. a private data page of one process).
00197  *
00198  * A page may be used for kmalloc() or anyone else who does a
00199  * get_free_page(). In this case the page->count is at least 1, and
00200  * all other fields are unused but should be 0 or NULL. The
00201  * management of this page is the responsibility of the one who uses
00202  * it.
00203  *
00204  * The other pages (we may call them "process pages") are completely
00205  * managed by the Linux memory manager: I/O, buffers, swapping etc.
00206  * The following discussion applies only to them.
00207  *
00208  * A page may belong to an inode's memory mapping. In this case,
00209  * page->inode is the pointer to the inode, and page->offset is the
00210  * file offset of the page (not necessarily a multiple of PAGE_SIZE).
00211  *
00212  * A page may have buffers allocated to it. In this case,
00213  * page->buffers is a circular list of these buffer heads. Else,
00214  * page->buffers == NULL.
00215  *
00216  * For pages belonging to inodes, the page->count is the number of
00217  * attaches, plus 1 if buffers are allocated to the page.
00218  *
00219  * All pages belonging to an inode make up a doubly linked list
00220  * inode->i_pages, using the fields page->next and page->prev. (These
00221  * fields are also used for freelist management when page->count==0.)
00222  * There is also a hash table mapping (inode,offset) to the page
00223  * in memory if present. The lists for this hash table use the fields
00224  * page->next_hash and page->pprev_hash.
00225  *
00226  * All process pages can do I/O:
00227  * - inode pages may need to be read from disk,
00228  * - inode pages which have been modified and are MAP_SHARED may need
00229  *   to be written to disk,
00230  * - private pages which have been modified may need to be swapped out
00231  *   to swap space and (later) to be read back into memory.
00232  * During disk I/O, PG_locked is used. This bit is set before I/O
00233  * and reset when I/O completes. page->wait is a wait queue of all
00234  * tasks waiting for the I/O on this page to complete.
00235  * PG_uptodate tells whether the page's contents is valid.
00236  * When a read completes, the page becomes uptodate, unless a disk I/O
00237  * error happened.
00238  * When a write completes, and PG_free_after is set, the page is
00239  * freed without any further delay.
00240  *
00241  * For choosing which pages to swap out, inode pages carry a
00242  * PG_referenced bit, which is set any time the system accesses
00243  * that page through the (inode,offset) hash table.
00244  *
00245  * PG_skip is used on sparc/sparc64 architectures to "skip" certain
00246  * parts of the address space.
00247  *
00248  * PG_error is set to indicate that an I/O error occurred on this page.
00249  */
00250 
00251 extern mem_map_t * mem_map;
00252 
00253 /*
00254  * This is timing-critical - most of the time in getting a new page
00255  * goes to clearing the page. If you want a page without the clearing
00256  * overhead, just use __get_free_page() directly..
00257  */
00258 #define __get_free_page(gfp_mask) __get_free_pages((gfp_mask),0)
00259 #define __get_dma_pages(gfp_mask, order) __get_free_pages((gfp_mask) | GFP_DMA,(order))
00260 extern unsigned long FASTCALL(__get_free_pages(int gfp_mask, unsigned long gfp_order));
00261 
00262 extern inline unsigned long get_free_page(int gfp_mask)
00263 {
00264         unsigned long page;
00265 
00266         page = __get_free_page(gfp_mask);
00267         if (page)
00268                 clear_page(page);
00269         return page;
00270 }
00271 
00272 extern int low_on_memory;
00273 
00274 /* memory.c & swap.c*/
00275 
00276 #define free_page(addr) free_pages((addr),0)
00277 extern void FASTCALL(free_pages(unsigned long addr, unsigned long order));
00278 #define __free_page(page) __free_pages((page),0)
00279 extern void FASTCALL(__free_pages(struct page *, unsigned long));
00280 
00281 extern void show_free_areas(void);
00282 extern unsigned long put_dirty_page(struct task_struct * tsk,unsigned long page,
00283         unsigned long address);
00284 
00285 extern void free_page_tables(struct mm_struct * mm);
00286 extern void clear_page_tables(struct mm_struct *, unsigned long, int);
00287 extern int new_page_tables(struct task_struct * tsk);
00288 
00289 extern void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size);
00290 extern int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma);
00291 extern int remap_page_range(unsigned long from, unsigned long to, unsigned long size, pgprot_t prot);
00292 extern int zeromap_page_range(unsigned long from, unsigned long size, pgprot_t prot);
00293 
00294 extern void vmtruncate(struct inode * inode, unsigned long offset);
00295 extern int handle_mm_fault(struct task_struct *tsk,struct vm_area_struct *vma, unsigned long address, int write_access);
00296 extern int make_pages_present(unsigned long addr, unsigned long end);
00297 
00298 extern int pgt_cache_water[2];
00299 extern int check_pgt_cache(void);
00300 
00301 extern unsigned long paging_init(unsigned long start_mem, unsigned long end_mem);
00302 extern void mem_init(unsigned long start_mem, unsigned long end_mem);
00303 extern void show_mem(void);
00304 extern void si_meminfo(struct sysinfo * val);
00305 
00306 /* mmap.c */
00307 extern void vma_init(void);
00308 extern void merge_segments(struct mm_struct *, unsigned long, unsigned long);
00309 extern void insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
00310 extern void build_mmap_avl(struct mm_struct *);
00311 extern void exit_mmap(struct mm_struct *);
00312 extern unsigned long get_unmapped_area(unsigned long, unsigned long);
00313 
00314 extern unsigned long do_mmap(struct file *, unsigned long, unsigned long,
00315         unsigned long, unsigned long, unsigned long);
00316 extern int do_munmap(unsigned long, size_t);
00317 
00318 /* filemap.c */
00319 extern void remove_inode_page(struct page *);
00320 extern unsigned long page_unuse(struct page *);
00321 extern int shrink_mmap(int, int);
00322 extern void truncate_inode_pages(struct inode *, unsigned long);
00323 extern unsigned long get_cached_page(struct inode *, unsigned long, int);
00324 extern void put_cached_page(unsigned long);
00325 
00326 /*
00327  * GFP bitmasks..
00328  */
00329 #define __GFP_WAIT      0x01
00330 #define __GFP_LOW       0x02
00331 #define __GFP_MED       0x04
00332 #define __GFP_HIGH      0x08
00333 #define __GFP_IO        0x10
00334 #define __GFP_SWAP      0x20
00335 
00336 #define __GFP_DMA       0x80
00337 
00338 #define GFP_BUFFER      (__GFP_MED | __GFP_WAIT)
00339 #define GFP_ATOMIC      (__GFP_HIGH)
00340 #define GFP_USER        (__GFP_LOW | __GFP_WAIT | __GFP_IO)
00341 #define GFP_KERNEL      (__GFP_MED | __GFP_WAIT | __GFP_IO)
00342 #define GFP_NFS         (__GFP_HIGH | __GFP_WAIT | __GFP_IO)
00343 #define GFP_KSWAPD      (__GFP_IO | __GFP_SWAP)
00344 
00345 /* Flag - indicates that the buffer will be suitable for DMA.  Ignored on some
00346    platforms, used as appropriate on others */
00347 
00348 #define GFP_DMA         __GFP_DMA
00349 
00350 /* vma is the first one with  address < vma->vm_end,
00351  * and even  address < vma->vm_start. Have to extend vma. */
00352 static inline int expand_stack(struct vm_area_struct * vma, unsigned long address)
00353 {
00354         unsigned long grow;
00355 
00356         address &= PAGE_MASK;
00357         grow = vma->vm_start - address;
00358         if ((vma->vm_end - address
00359             > current->rlim[RLIMIT_STACK].rlim_cur) ||
00360             ((current->rlim[RLIMIT_AS].rlim_cur < RLIM_INFINITY) &&
00361             ((vma->vm_mm->total_vm << PAGE_SHIFT) + grow
00362             > current->rlim[RLIMIT_AS].rlim_cur)))
00363                 return -ENOMEM;
00364         vma->vm_start = address;
00365         vma->vm_offset -= grow;
00366         vma->vm_mm->total_vm += grow >> PAGE_SHIFT;
00367         if (vma->vm_flags & VM_LOCKED)
00368                 vma->vm_mm->locked_vm += grow >> PAGE_SHIFT;
00369         return 0;
00370 }
00371 
00372 /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
00373 extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);
00374 
00375 /* Look up the first VMA which intersects the interval start_addr..end_addr-1,
00376    NULL if none.  Assume start_addr < end_addr. */
00377 static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr)
00378 {
00379         struct vm_area_struct * vma = find_vma(mm,start_addr);
00380 
00381         if (vma && end_addr <= vma->vm_start)
00382                 vma = NULL;
00383         return vma;
00384 }
00385 
00386 #define buffer_under_min()      ((buffermem >> PAGE_SHIFT) * 100 < \
00387                                 buffer_mem.min_percent * num_physpages)
00388 #define pgcache_under_min()     (page_cache_size * 100 < \
00389                                 page_cache.min_percent * num_physpages)
00390 
00391 #endif /* __KERNEL__ */
00392 
00393 #endif