| Left: clean-2.6.13.1/ | Right: modif-2.6.13.1/ | ||
|---|---|---|---|
| arch/i386/kernel/syscall_table.S fs/exec.c include/asm-i386/unistd.h include/linux/init_task.h include/linux/pkmem.h include/linux/sched.h kernel/exit.c Makefile mm/mprotect.c pkmem/Makefile pkmem/pkmem.c |
|||
| clean-2.6.13.1/arch/i386/kernel/syscall_table.S | modif-2.6.13.1/arch/i386/kernel/syscall_table.S | ||
| 2005-09-09 21:42:58.000000000 -0500 | 2006-03-01 04:40:37.114049000 -0600 | ||
| 294 | .long sys_inotify_init | 294 | .long sys_inotify_init |
| 295 | .long sys_inotify_add_watch | 295 | .long sys_inotify_add_watch |
| 296 | .long sys_inotify_rm_watch | 296 | .long sys_inotify_rm_watch |
| 297 | .long sys_pk_alloc | ||
| 298 | .long sys_pk_alloc_log /* 295 */ | ||
| 299 | .long sys_pk_free | ||
| 300 | .long sys_pk_free_log | ||
| 301 | .long sys_pk_get | ||
| 302 | .long sys_pk_end | ||
| 303 | |||
| 304 | |||
| 305 | |||
| \ | No newline at end of file | ||
| clean-2.6.13.1/fs/exec.c | modif-2.6.13.1/fs/exec.c | ||
| 2005-09-09 21:42:58.000000000 -0500 | 2006-03-02 04:26:12.964415000 -0600 | ||
| 1138 | /* | 1138 | /* |
| 1139 | * sys_execve() executes a new program. | 1139 | * sys_execve() executes a new program. |
| 1140 | */ | 1140 | */ |
| 1141 | extern int attach_pkmem(void); | ||
| 1142 | |||
| 1141 | int do_execve(char * filename, | 1143 | int do_execve(char * filename, |
| 1142 | char __user *__user *argv, | 1144 | char __user *__user *argv, |
| 1143 | char __user *__user *envp, | 1145 | char __user *__user *envp, |
| ... | ... | ||
| 1207 | retval = search_binary_handler(bprm,regs); | 1209 | retval = search_binary_handler(bprm,regs); |
| 1208 | if (retval >= 0) { | 1210 | if (retval >= 0) { |
| 1209 | free_arg_pages(bprm); | 1211 | free_arg_pages(bprm); |
| 1210 | 1212 | attach_pkmem(); | |
| 1213 | current->pk = NULL; | ||
| 1211 | /* execve success */ | 1214 | /* execve success */ |
| 1212 | security_bprm_free(bprm); | 1215 | security_bprm_free(bprm); |
| 1213 | acct_update_integrals(current); | 1216 | acct_update_integrals(current); |
| clean-2.6.13.1/include/asm-i386/unistd.h | modif-2.6.13.1/include/asm-i386/unistd.h | ||
| 2005-09-09 21:42:58.000000000 -0500 | 2006-02-28 13:23:44.717387000 -0600 | ||
| 299 | #define __NR_inotify_init 291 | 299 | #define __NR_inotify_init 291 |
| 300 | #define __NR_inotify_add_watch 292 | 300 | #define __NR_inotify_add_watch 292 |
| 301 | #define __NR_inotify_rm_watch 293 | 301 | #define __NR_inotify_rm_watch 293 |
| 302 | #define __NR_pk_alloc 294 | ||
| 303 | #define __NR_pk_alloc_log 295 | ||
| 304 | #define __NR_pk_free 296 | ||
| 305 | #define __NR_pk_free_log 297 | ||
| 306 | #define __NR_pk_get 298 | ||
| 307 | #define __NR_pk_end 299 | ||
| 302 | 308 | ||
| 303 | #define NR_syscalls 294 | 309 | |
| 310 | #define NR_syscalls 300 | ||
| 304 | 311 | ||
| 305 | /* | 312 | /* |
| 306 | * user-visible error numbers are in the range -1 - -128: see | 313 | * user-visible error numbers are in the range -1 - -128: see |
| clean-2.6.13.1/include/linux/init_task.h | modif-2.6.13.1/include/linux/init_task.h | ||
| 2005-09-09 21:42:58.000000000 -0500 | 2006-02-28 03:38:53.194325000 -0600 | ||
| 112 | .journal_info = NULL, \ | 112 | .journal_info = NULL, \ |
| 113 | .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ | 113 | .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ |
| 114 | .fs_excl = ATOMIC_INIT(0), \ | 114 | .fs_excl = ATOMIC_INIT(0), \ |
| 115 | .pk = NULL, \ | ||
| 115 | } | 116 | } |
| 116 | 117 | ||
| 117 | 118 | ||
| clean-2.6.13.1/include/linux/pkmem.h | modif-2.6.13.1/include/linux/pkmem.h | ||
| 1969-12-31 18:00:00.000000000 -0600 | 2006-03-23 15:45:04.318940000 -0600 | ||
| 1 | #ifndef __PKMEM_H | ||
| 2 | #define __PKMEM_H | ||
| 3 | |||
| 4 | #include <linux/types.h> | ||
| 5 | #include <linux/rwsem.h> | ||
| 6 | #include <linux/jiffies.h> | ||
| 7 | #include <linux/sched.h> | ||
| 8 | #include <linux/errno.h> | ||
| 9 | #include <linux/spinlock.h> | ||
| 10 | #include <linux/list.h> | ||
| 11 | |||
| 12 | #include <asm/types.h> | ||
| 13 | #include <asm/processor.h> | ||
| 14 | |||
| 15 | //////////////////////////////////////////////////////////////////////////////// | ||
| 16 | /// Important Symbols | ||
| 17 | //////////////////////////////////////////////////////////////////////////////// | ||
| 18 | #define PKMEM_START 0x9bef8000 | ||
| 19 | #define PKMEM_PAGES 512 | ||
| 20 | #define PKMEM_SIZE (PAGE_SIZE * PKMEM_PAGES) | ||
| 21 | //#define PKMEM_START (TASK_SIZE - 1 - PKMEM_SIZE) | ||
| 22 | #define PK_TIMEOUT (3 * HZ) | ||
| 23 | |||
| 24 | //////////////////////////////////////////////////////////////////////////////// | ||
| 25 | /// Important Macros | ||
| 26 | /////////////////////////////////////////////////////////////////////////////// | ||
| 27 | #define PK_CUR_TASK_LOCK spin_lock(¤t->pk->lock) | ||
| 28 | #define PK_CUR_TASK_UNLOCK spin_unlock(¤t->pk->lock) | ||
| 29 | #define PK_TASK_LOCK(pk_tsk) spin_lock(&pk_tsk->lock) | ||
| 30 | #define PK_TASK_UNLOCK(pk_tsk) spin_unlock(&pk_tsk->lock) | ||
| 31 | |||
| 32 | #define PK_LOCK spin_lock(&pk_lock) | ||
| 33 | #define PK_UNLOCK spin_unlock(&pk_lock) | ||
| 34 | #define PK_SHARE_LOCK(share) spin_lock(&share->lock) | ||
| 35 | #define PK_SHARE_UNLOCK(share) spin_unlock(&share->lock) | ||
| 36 | |||
| 37 | #define GET_SEMAPHORE(task_share, status) \ | ||
| 38 | if(status & PK_FL_RDLCK) down_read(&task_share->share->sem); \ | ||
| 39 | else down_write(&task_share->share->sem); | ||
| 40 | |||
| 41 | #define RELEASE_SEMAPHORE(task_share, status) \ | ||
| 42 | if(status & PK_FL_RDLCK) up_read(&task_share->share->sem); \ | ||
| 43 | else up_write(&task_share->share->sem); | ||
| 44 | |||
| 45 | //////////////////////////////////////////////////////////////////////////////// | ||
| 46 | /// Debug Macros | ||
| 47 | /// #define dbg(format, arg...) printk(KERN_INFO "pk: " format "\n" , ##arg) | ||
| 48 | //////////////////////////////////////////////////////////////////////////////// | ||
| 49 | //#define PKMEM_DEBUG | ||
| 50 | |||
| 51 | #ifdef PKMEM_DEBUG | ||
| 52 | #define dbg(format, arg...) \ | ||
| 53 | printk(KERN_INFO "pk: %d,(%s, %d):%s() " format "\n", \ | ||
| 54 | current->pid, __FILE__, __LINE__, __FUNCTION__, ##arg) | ||
| 55 | #else | ||
| 56 | #define dbg(format, arg...) do {} while (0) | ||
| 57 | #endif | ||
| 58 | |||
| 59 | //////////////////////////////////////////////////////////////////////////////// | ||
| 60 | /// Error Numbers in pkMem | ||
| 61 | //////////////////////////////////////////////////////////////////////////////// | ||
| 62 | #define EABRT 100 | ||
| 63 | #define EABRTONGOING 101 | ||
| 64 | #define ENOTSTOPPED 102 | ||
| 65 | #define EUNUSED 103 /* Unused */ | ||
| 66 | #define ENOPARAM 104 | ||
| 67 | #define EMPROTECT 105 | ||
| 68 | #define ENOEXIST 106 | ||
| 69 | #define EDELETED 107 | ||
| 70 | #define ENODGRADE 108 | ||
| 71 | #define ENOUGRADE 109 | ||
| 72 | #define EBUG 110 | ||
| 73 | #define EABRTPENDING 111 | ||
| 74 | #define EDENIED 112 | ||
| 75 | |||
| 76 | //////////////////////////////////////////////////////////////////////////////// | ||
| 77 | /// Constants in pkMem | ||
| 78 | //////////////////////////////////////////////////////////////////////////////// | ||
| 79 | |||
| 80 | /// Constants that are used to describe the status field of struct pk_share | ||
| 81 | /// AND are used by _userspace_ to specify the lock requested | ||
| 82 | #define PK_FL_RDLCK 0x000001 | ||
| 83 | #define PK_FL_WRLCK 0x000002 | ||
| 84 | |||
| 85 | /// Constants used to descibe the status field of struct pk_share | ||
| 86 | #define PK_ST_DEL 0x000004 | ||
| 87 | #define PK_ST_DEL_FINAL 0x002000 | ||
| 88 | |||
| 89 | /// Constants used to describe the status field of struct pk_task | ||
| 90 | #define PK_TR_STARTED 0x000008 | ||
| 91 | #define PK_TR_STOPPED 0x000010 | ||
| 92 | #define PK_TR_ABORTED 0x000020 | ||
| 93 | #define PK_TR_ABORT_PENDING 0x000040 | ||
| 94 | #define PK_TR_SYS_INPROGRESS 0x000080 | ||
| 95 | #define PK_TR_ABORT_INPROGRESS 0x000100 | ||
| 96 | |||
| 97 | /// Constants used to descibe the status field of struct pk_task_share | ||
| 98 | #define PK_PROC_GOT_WR 0x000200 | ||
| 99 | #define PK_PROC_GOT_RD 0x000400 | ||
| 100 | #define PK_PROC_FREE 0x000800 | ||
| 101 | #define PK_PROC_ALLOC 0x001000 | ||
| 102 | |||
| 103 | //////////////////////////////////////////////////////////////////////////////// | ||
| 104 | /// Structures in pkMem | ||
| 105 | /////////////////////////////////////////////////////////////////////////////// | ||
| 106 | struct pk_basic { | ||
| 107 | struct list_head list; | ||
| 108 | unsigned long start; | ||
| 109 | unsigned long pages; | ||
| 110 | }; | ||
| 111 | |||
| 112 | struct pk_share { | ||
| 113 | struct rw_semaphore sem; | ||
| 114 | |||
| 115 | // Modification of these fields is done holding PK_LOCK | ||
| 116 | unsigned long start; | ||
| 117 | unsigned long pages; | ||
| 118 | struct list_head list; | ||
| 119 | |||
| 120 | spinlock_t lock; | ||
| 121 | |||
| 122 | // Modification of these fields is done holding ``lock' | ||
| 123 | struct list_head task_head; | ||
| 124 | unsigned long status; | ||
| 125 | unsigned int refcnt; | ||
| 126 | }; | ||
| 127 | |||
| 128 | struct pk_task_share { | ||
| 129 | struct list_head list; | ||
| 130 | struct list_head list_task; | ||
| 131 | spinlock_t lock; | ||
| 132 | unsigned int status; | ||
| 133 | struct task_struct *task; | ||
| 134 | struct pk_share *share; | ||
| 135 | unsigned long jiffies; | ||
| 136 | }; | ||
| 137 | |||
| 138 | //////////////////////////////////////////////////////////////////////////////// | ||
| 139 | /// Externally defined | ||
| 140 | //////////////////////////////////////////////////////////////////////////////// | ||
| 141 | |||
| 142 | extern long mprotect_for_task(struct task_struct *task, unsigned long start, | ||
| 143 | size_t len, unsigned long prot); | ||
| 144 | extern long __do_abort(struct task_struct *task, int call_mprotect); | ||
| 145 | extern void __release_log_regions(struct pk_task *pk); | ||
| 146 | |||
| 147 | #endif | ||
| clean-2.6.13.1/include/linux/sched.h | modif-2.6.13.1/include/linux/sched.h | ||
| 2005-09-09 21:42:58.000000000 -0500 | 2006-03-01 03:04:48.417676000 -0600 | ||
| 596 | struct audit_context; /* See audit.c */ | 596 | struct audit_context; /* See audit.c */ |
| 597 | struct mempolicy; | 597 | struct mempolicy; |
| 598 | 598 | ||
| 599 | /// Note: struct pk_task is embedded inside struct task_struct; | ||
| 600 | struct pk_task { | ||
| 601 | spinlock_t lock; | ||
| 602 | unsigned long status; | ||
| 603 | struct list_head logs_head; | ||
| 604 | struct list_head share_head; | ||
| 605 | }; | ||
| 606 | |||
| 599 | struct task_struct { | 607 | struct task_struct { |
| 600 | volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ | 608 | volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ |
| 601 | struct thread_info *thread_info; | 609 | struct thread_info *thread_info; |
| ... | ... | ||
| 770 | int cpuset_mems_generation; | 778 | int cpuset_mems_generation; |
| 771 | #endif | 779 | #endif |
| 772 | atomic_t fs_excl; /* holding fs exclusive resources */ | 780 | atomic_t fs_excl; /* holding fs exclusive resources */ |
| 781 | struct pk_task *pk; | ||
| 773 | }; | 782 | }; |
| 774 | 783 | ||
| 775 | static inline pid_t process_group(struct task_struct *tsk) | 784 | static inline pid_t process_group(struct task_struct *tsk) |
| clean-2.6.13.1/kernel/exit.c | modif-2.6.13.1/kernel/exit.c | ||
| 2005-09-09 21:42:58.000000000 -0500 | 2006-03-23 00:57:31.231458000 -0600 | ||
| 28 | #include <linux/cpuset.h> | 28 | #include <linux/cpuset.h> |
| 29 | #include <linux/syscalls.h> | 29 | #include <linux/syscalls.h> |
| 30 | #include <linux/signal.h> | 30 | #include <linux/signal.h> |
| 31 | #include <linux/pkmem.h> | ||
| 31 | 32 | ||
| 32 | #include <asm/uaccess.h> | 33 | #include <asm/uaccess.h> |
| 33 | #include <asm/unistd.h> | 34 | #include <asm/unistd.h> |
| ... | ... | ||
| 833 | del_timer_sync(&tsk->signal->real_timer); | 834 | del_timer_sync(&tsk->signal->real_timer); |
| 834 | acct_process(code); | 835 | acct_process(code); |
| 835 | } | 836 | } |
| 837 | |||
| 838 | /// If the task's current transaction has already been aborted or | ||
| 839 | /// has stopped then don't call __do_abort. But we always call | ||
| 840 | /// __release_log_regions() it releases all the log regions allocated by | ||
| 841 | /// the task back into the free list | ||
| 842 | if(tsk->pk) { | ||
| 843 | PK_TASK_LOCK(tsk->pk); | ||
| 844 | if(!(tsk->pk->status & PK_TR_ABORTED | ||
| 845 | || tsk->pk->status & PK_TR_STOPPED | ||
| 846 | || tsk->pk->status & PK_TR_ABORT_INPROGRESS)) { | ||
| 847 | PK_TASK_UNLOCK(tsk->pk); | ||
| 848 | __do_abort(tsk, 0); | ||
| 849 | } else | ||
| 850 | PK_TASK_UNLOCK(tsk->pk); | ||
| 851 | __release_log_regions(tsk->pk); | ||
| 852 | } | ||
| 853 | |||
| 836 | exit_mm(tsk); | 854 | exit_mm(tsk); |
| 837 | 855 | ||
| 838 | exit_sem(tsk); | 856 | exit_sem(tsk); |
| ... | ... | ||
| 852 | 870 | ||
| 853 | tsk->exit_code = code; | 871 | tsk->exit_code = code; |
| 854 | exit_notify(tsk); | 872 | exit_notify(tsk); |
| 873 | |||
| 855 | #ifdef CONFIG_NUMA | 874 | #ifdef CONFIG_NUMA |
| 856 | mpol_free(tsk->mempolicy); | 875 | mpol_free(tsk->mempolicy); |
| 857 | tsk->mempolicy = NULL; | 876 | tsk->mempolicy = NULL; |
| clean-2.6.13.1/Makefile | modif-2.6.13.1/Makefile | ||
| 2005-09-09 21:42:58.000000000 -0500 | 2006-01-16 12:46:52.292063000 -0600 | ||
| 562 | 562 | ||
| 563 | 563 | ||
| 564 | ifeq ($(KBUILD_EXTMOD),) | 564 | ifeq ($(KBUILD_EXTMOD),) |
| 565 | core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ | 565 | core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ pkmem/ |
| 566 | 566 | ||
| 567 | vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ | 567 | vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ |
| 568 | $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ | 568 | $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ |
| clean-2.6.13.1/mm/mprotect.c | modif-2.6.13.1/mm/mprotect.c | ||
| 2005-09-09 21:42:58.000000000 -0500 | 2006-03-08 01:13:10.271147000 -0600 | ||
| 19 | #include <linux/mempolicy.h> | 19 | #include <linux/mempolicy.h> |
| 20 | #include <linux/personality.h> | 20 | #include <linux/personality.h> |
| 21 | #include <linux/syscalls.h> | 21 | #include <linux/syscalls.h> |
| 22 | #include <linux/pkmem.h> | ||
| 22 | 23 | ||
| 23 | #include <asm/uaccess.h> | 24 | #include <asm/uaccess.h> |
| 24 | #include <asm/pgtable.h> | 25 | #include <asm/pgtable.h> |
| ... | ... | ||
| 184 | struct vm_area_struct *vma, *prev; | 185 | struct vm_area_struct *vma, *prev; |
| 185 | int error = -EINVAL; | 186 | int error = -EINVAL; |
| 186 | const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP); | 187 | const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP); |
| 188 | |||
| 189 | /* Cannot mprotect a VMA that belongs to pkmem */ | ||
| 190 | if(start >= PKMEM_START && start <= (PKMEM_START + PKMEM_SIZE)) { | ||
| 191 | error = -EACCES; | ||
| 192 | goto out; | ||
| 193 | } | ||
| 194 | |||
| 187 | prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP); | 195 | prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP); |
| 188 | if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */ | 196 | if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */ |
| 189 | return -EINVAL; | 197 | return -EINVAL; |
| ... | ... | ||
| 280 | up_write(¤t->mm->mmap_sem); | 288 | up_write(¤t->mm->mmap_sem); |
| 281 | return error; | 289 | return error; |
| 282 | } | 290 | } |
| 291 | |||
| 292 | long mprotect_for_task(struct task_struct *task, unsigned long start, | ||
| 293 | size_t len, unsigned long prot) | ||
| 294 | { | ||
| 295 | unsigned long vm_flags, nstart, end, tmp, reqprot; | ||
| 296 | struct vm_area_struct *vma, *prev; | ||
| 297 | int error = -EINVAL; | ||
| 298 | const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP); | ||
| 299 | prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP); | ||
| 300 | if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */ | ||
| 301 | return -EINVAL; | ||
| 302 | |||
| 303 | if (start & ~PAGE_MASK) | ||
| 304 | return -EINVAL; | ||
| 305 | if (!len) | ||
| 306 | return 0; | ||
| 307 | len = PAGE_ALIGN(len); | ||
| 308 | end = start + len; | ||
| 309 | if (end <= start) | ||
| 310 | return -ENOMEM; | ||
| 311 | if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM)) | ||
| 312 | return -EINVAL; | ||
| 313 | |||
| 314 | reqprot = prot; | ||
| 315 | /* | ||
| 316 | * Does the application expect PROT_READ to imply PROT_EXEC: | ||
| 317 | */ | ||
| 318 | if (unlikely((prot & PROT_READ) && | ||
| 319 | (task->personality & READ_IMPLIES_EXEC))) | ||
| 320 | prot |= PROT_EXEC; | ||
| 321 | |||
| 322 | vm_flags = calc_vm_prot_bits(prot); | ||
| 323 | |||
| 324 | down_write(&task->mm->mmap_sem); | ||
| 325 | |||
| 326 | vma = find_vma_prev(task->mm, start, &prev); | ||
| 327 | error = -ENOMEM; | ||
| 328 | if (!vma) | ||
| 329 | goto out; | ||
| 330 | if (unlikely(grows & PROT_GROWSDOWN)) { | ||
| 331 | if (vma->vm_start >= end) | ||
| 332 | goto out; | ||
| 333 | start = vma->vm_start; | ||
| 334 | error = -EINVAL; | ||
| 335 | if (!(vma->vm_flags & VM_GROWSDOWN)) | ||
| 336 | goto out; | ||
| 337 | } | ||
| 338 | else { | ||
| 339 | if (vma->vm_start > start) | ||
| 340 | goto out; | ||
| 341 | if (unlikely(grows & PROT_GROWSUP)) { | ||
| 342 | end = vma->vm_end; | ||
| 343 | error = -EINVAL; | ||
| 344 | if (!(vma->vm_flags & VM_GROWSUP)) | ||
| 345 | goto out; | ||
| 346 | } | ||
| 347 | } | ||
| 348 | if (start > vma->vm_start) | ||
| 349 | prev = vma; | ||
| 350 | |||
| 351 | for (nstart = start ; ; ) { | ||
| 352 | unsigned long newflags; | ||
| 353 | |||
| 354 | /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ | ||
| 355 | |||
| 356 | if (is_vm_hugetlb_page(vma)) { | ||
| 357 | error = -EACCES; | ||
| 358 | goto out; | ||
| 359 | } | ||
| 360 | |||
| 361 | newflags = vm_flags | (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC)); | ||
| 362 | |||
| 363 | if ((newflags & ~(newflags >> 4)) & 0xf) { | ||
| 364 | error = -EACCES; | ||
| 365 | goto out; | ||
| 366 | } | ||
| 367 | |||
| 368 | tmp = vma->vm_end; | ||
| 369 | if (tmp > end) | ||
| 370 | tmp = end; | ||
| 371 | error = mprotect_fixup(vma, &prev, nstart, tmp, newflags); | ||
| 372 | if (error) | ||
| 373 | goto out; | ||
| 374 | nstart = tmp; | ||
| 375 | |||
| 376 | if (nstart < prev->vm_end) | ||
| 377 | nstart = prev->vm_end; | ||
| 378 | if (nstart >= end) | ||
| 379 | goto out; | ||
| 380 | |||
| 381 | vma = prev->vm_next; | ||
| 382 | if (!vma || vma->vm_start != nstart) { | ||
| 383 | error = -ENOMEM; | ||
| 384 | goto out; | ||
| 385 | } | ||
| 386 | } | ||
| 387 | out: | ||
| 388 | up_write(&task->mm->mmap_sem); | ||
| 389 | return error; | ||
| 390 | } | ||
| clean-2.6.13.1/pkmem/Makefile | modif-2.6.13.1/pkmem/Makefile | ||
| 1969-12-31 18:00:00.000000000 -0600 | 2006-01-16 12:46:52.382063000 -0600 | ||
| 1 | # pkmem | ||
| 2 | # | ||
| 3 | obj-y := pkmem.o | ||
| clean-2.6.13.1/pkmem/pkmem.c | modif-2.6.13.1/pkmem/pkmem.c | ||
| 1969-12-31 18:00:00.000000000 -0600 | 2006-04-02 19:05:32.234443000 -0500 | ||
| 1 | #include <linux/init.h> | ||
| 2 | #include <linux/devfs_fs_kernel.h> | ||
| 3 | #include <linux/vfs.h> | ||
| 4 | #include <linux/mount.h> | ||
| 5 | #include <linux/file.h> | ||
| 6 | #include <linux/mm.h> | ||
| 7 | #include <linux/module.h> | ||
| 8 | #include <linux/swap.h> | ||
| 9 | #include <linux/ramfs.h> | ||
| 10 | #include <asm/mman.h> | ||
| 11 | #include <linux/list.h> | ||
| 12 | #include <linux/slab.h> | ||
| 13 | #include <linux/ptrace.h> | ||
| 14 | #include <linux/syscalls.h> | ||
| 15 | #include <linux/linkage.h> | ||
| 16 | #include <linux/spinlock.h> | ||
| 17 | #include <linux/pkmem.h> | ||
| 18 | |||
| 19 | /// Globals needed for creating the pkMem region | ||
| 20 | static struct vfsmount *pkmem_mnt; | ||
| 21 | static struct file *pk_file; | ||
| 22 | |||
| 23 | /// Global pkMem data structure slabs | ||
| 24 | kmem_cache_t *pk_task_cachep; | ||
| 25 | kmem_cache_t *pk_basic_cachep; | ||
| 26 | kmem_cache_t *pk_share_cachep; | ||
| 27 | kmem_cache_t *pk_task_share_cachep; | ||
| 28 | |||
| 29 | /// Definitions of global PK_LOCK | ||
| 30 | spinlock_t pk_lock; | ||
| 31 | |||
| 32 | /// Keeps track of the size of the largest contigious free pkMem region in the | ||
| 33 | /// free list [Protected by PK_LOCK] | ||
| 34 | /// pk_freeHEAD points to a list of struct pk_basic object (free list) | ||
| 35 | /// pk_shareHEAD points to a list of struct pk_share objects (shared list) | ||
| 36 | static unsigned long __pk_free_pages; | ||
| 37 | static LIST_HEAD(pk_freeHEAD); | ||
| 38 | static LIST_HEAD(pk_shareHEAD); | ||
| 39 | |||
| 40 | //////////////////////////////////////////////////////////////////////////////// | ||
| 41 | /// Functions to initialize the pkMem data structures | ||
| 42 | //////////////////////////////////////////////////////////////////////////////// | ||
| 43 | |||
| 44 | static inline struct pk_basic *init_pk_basic(void) | ||
| 45 | { | ||
| 46 | struct pk_basic *basic; | ||
| 47 | |||
| 48 | might_sleep(); | ||
| 49 | basic = kmem_cache_alloc(pk_basic_cachep, GFP_KERNEL); | ||
| 50 | |||
| 51 | if(!basic) | ||
| 52 | return NULL; | ||
| 53 | INIT_LIST_HEAD(&basic->list); | ||
| 54 | basic->pages = 0; | ||
| 55 | basic->start = 0; | ||
| 56 | return basic; | ||
| 57 | } | ||
| 58 | |||
| 59 | static inline struct pk_share *init_pk_share(void) | ||
| 60 | { | ||
| 61 | struct pk_share *share; | ||
| 62 | |||
| 63 | might_sleep(); | ||
| 64 | share = kmem_cache_alloc(pk_share_cachep, GFP_KERNEL); | ||
| 65 | |||
| 66 | if(!share) | ||
| 67 | return NULL; | ||
| 68 | init_rwsem(&share->sem); | ||
| 69 | share->lock = SPIN_LOCK_UNLOCKED; | ||
| 70 | INIT_LIST_HEAD(&share->list); | ||
| 71 | INIT_LIST_HEAD(&share->task_head); | ||
| 72 | share->status = 0; | ||
| 73 | share->pages = 0; | ||
| 74 | share->start = 0; | ||
| 75 | share->refcnt = 0; | ||
| 76 | return share; | ||
| 77 | } | ||
| 78 | |||
| 79 | static inline struct pk_task *init_pk_task(unsigned long status) | ||
| 80 | { | ||
| 81 | struct pk_task *pk; | ||
| 82 | |||
| 83 | might_sleep(); | ||
| 84 | pk = kmem_cache_alloc(pk_task_cachep, GFP_KERNEL); | ||
| 85 | |||
| 86 | if(!pk) | ||
| 87 | return NULL; | ||
| 88 | pk->lock = SPIN_LOCK_UNLOCKED; | ||
| 89 | pk->status = status; | ||
| 90 | INIT_LIST_HEAD(&pk->logs_head); | ||
| 91 | INIT_LIST_HEAD(&pk->share_head); | ||
| 92 | return pk; | ||
| 93 | } | ||
| 94 | |||
| 95 | static inline struct pk_task_share *init_pk_task_share(void) | ||
| 96 | { | ||
| 97 | struct pk_task_share *task_share; | ||
| 98 | |||
| 99 | might_sleep(); | ||
| 100 | task_share = kmem_cache_alloc(pk_task_share_cachep, GFP_KERNEL); | ||
| 101 | |||
| 102 | if(!task_share) | ||
| 103 | return NULL; | ||
| 104 | INIT_LIST_HEAD(&task_share->list); | ||
| 105 | task_share->share = NULL; | ||
| 106 | task_share->status = 0; | ||
| 107 | task_share->task = current; | ||
| 108 | // Note: We do not set task_share->jiffies at this point | ||
| 109 | return task_share; | ||
| 110 | } | ||
| 111 | |||
| 112 | static inline long setup_transaction(unsigned long status) | ||
| 113 | { | ||
| 114 | long err; | ||
| 115 | |||
| 116 | err = 0; | ||
| 117 | if(current->pk == NULL) { | ||
| 118 | dbg(); | ||
| 119 | current->pk = init_pk_task((PK_TR_STARTED | status)); | ||
| 120 | if(current->pk == NULL) { | ||
| 121 | dbg(); | ||
| 122 | err = -ENOMEM; | ||
| 123 | goto out; | ||
| 124 | } | ||
| 125 | } else { | ||
| 126 | PK_CUR_TASK_LOCK; | ||
| 127 | dbg(); | ||
| 128 | if(current->pk->status & PK_TR_ABORTED) { | ||
| 129 | dbg(); | ||
| 130 | err = -EABRT; | ||
| 131 | } else if(current->pk->status & PK_TR_ABORT_PENDING) { | ||
| 132 | err = -EABRTPENDING; | ||
| 133 | BUG(); | ||
| 134 | } else if(current->pk->status & PK_TR_STOPPED) { | ||
| 135 | dbg(); | ||
| 136 | current->pk->status = PK_TR_STARTED; | ||
| 137 | } | ||
| 138 | else if(current->pk->status & PK_TR_ABORT_INPROGRESS) { | ||
| 139 | dbg(); | ||
| 140 | err = -EABRT; | ||
| 141 | } | ||
| 142 | if(err == 0) | ||
| 143 | current->pk->status |= status; | ||
| 144 | PK_CUR_TASK_UNLOCK; | ||
| 145 | } | ||
| 146 | |||
| 147 | out: | ||
| 148 | return err; | ||
| 149 | } | ||
| 150 | |||
| 151 | //////////////////////////////////////////////////////////////////////////////// | ||
| 152 | /// Functions to search {log|share} regions in pkMem | ||
| 153 | //////////////////////////////////////////////////////////////////////////////// | ||
| 154 | static inline struct pk_share *search_shareHEAD(unsigned long start) | ||
| 155 | { | ||
| 156 | struct list_head *idx; | ||
| 157 | struct pk_share *share; | ||
| 158 | |||
| 159 | PK_LOCK; | ||
| 160 | list_for_each(idx, &pk_shareHEAD) { | ||
| 161 | share = list_entry(idx, struct pk_share, list); | ||
| 162 | if(share->start == start) { | ||
| 163 | PK_SHARE_LOCK(share); | ||
| 164 | if(share->status & PK_ST_DEL | ||
| 165 | || share->status & PK_ST_DEL_FINAL) | ||
| 166 | share = NULL; | ||
| 167 | else | ||
| 168 | share->refcnt++; | ||
| 169 | PK_SHARE_UNLOCK(share); | ||
| 170 | goto out; | ||
| 171 | } | ||
| 172 | } | ||
| 173 | |||
| 174 | share = NULL; | ||
| 175 | out: | ||
| 176 | PK_UNLOCK; | ||
| 177 | return share; | ||
| 178 | } | ||
| 179 | |||
| 180 | /// This function searches if a struct pk_share object that is in the pk_shareHEAD | ||
| 181 | /// global list, can be found in the task's pk_share_head list | ||
| 182 | static inline struct pk_task_share *search_share_head(struct pk_share *share) | ||
| 183 | { | ||
| 184 | struct list_head *idx; | ||
| 185 | struct pk_task_share *task_share; | ||
| 186 | |||
| 187 | list_for_each(idx, ¤t->pk->share_head) { | ||
| 188 | task_share = list_entry(idx, struct pk_task_share, list); | ||
| 189 | if(task_share->share == share) | ||
| 190 | goto out; | ||
| 191 | } | ||
| 192 | |||
| 193 | task_share = NULL; | ||
| 194 | out: | ||
| 195 | return task_share; | ||
| 196 | } | ||
| 197 | |||
| 198 | /// During rollback this function checks if the address given has been obtained | ||
| 199 | /// by the process for exclusive accesse either via an alloc or a pk_get | ||
| 200 | static inline int __share_addr_in_range(unsigned long addr) | ||
| 201 | { | ||
| 202 | struct list_head *idx; | ||
| 203 | struct pk_task_share *task_share; | ||
| 204 | unsigned long start, pages, status; | ||
| 205 | int err; | ||
| 206 | |||
| 207 | err = 0; | ||
| 208 | list_for_each(idx, ¤t->pk->share_head) { | ||
| 209 | task_share = list_entry(idx, struct pk_task_share, list); | ||
| 210 | status = task_share->status; | ||
| 211 | if(status & PK_PROC_ALLOC || status & PK_PROC_GOT_WR) { | ||
| 212 | start = task_share->share->start; | ||
| 213 | pages = task_share->share->pages; | ||
| 214 | if(start <= addr && addr <= (start + pages * PAGE_SIZE)) | ||
| 215 | goto out; | ||
| 216 | } | ||
| 217 | } | ||
| 218 | err = -1; | ||
| 219 | out: | ||
| 220 | return err; | ||
| 221 | } | ||
| 222 | |||
| 223 | /// This function searches if a struct pk_basic object that is in the | ||
| 224 | /// logs_head list can be found | ||
| 225 | static inline struct pk_basic *search_logs_head(unsigned long start) | ||
| 226 | { | ||
| 227 | struct list_head *idx; | ||
| 228 | struct pk_basic *log; | ||
| 229 | |||
| 230 | list_for_each(idx, ¤t->pk->logs_head) { | ||
| 231 | log = list_entry(idx, struct pk_basic, list); | ||
| 232 | if(log->start == start) | ||
| 233 | goto out; | ||
| 234 | } | ||
| 235 | log = NULL; | ||
| 236 | out: | ||
| 237 | return log; | ||
| 238 | } | ||
| 239 | |||
| 240 | /// During rollback this function checks if the address given does indeed refer | ||
| 241 | /// to a start address of a log page | ||
| 242 | static inline int __log_addr_in_range(unsigned long addr) | ||
| 243 | { | ||
| 244 | struct list_head *idx; | ||
| 245 | struct pk_basic *log; | ||
| 246 | int err; | ||
| 247 | |||
| 248 | err = 0; | ||
| 249 | list_for_each(idx, ¤t->pk->logs_head) { | ||
| 250 | log = list_entry(idx, struct pk_basic, list); | ||
| 251 | if((unsigned long)log->start == addr) | ||
| 252 | goto out; | ||
| 253 | } | ||
| 254 | err = -1; | ||
| 255 | out: | ||
| 256 | return err; | ||
| 257 | } | ||
| 258 | |||
| 259 | //////////////////////////////////////////////////////////////////////////////// | ||
| 260 | /// Create the pkMem file in memory | ||
| 261 | //////////////////////////////////////////////////////////////////////////////// | ||
| 262 | |||
| 263 | /// The internal pkmem filesystem that represents the entire pkMem region | ||
| 264 | static struct file_system_type pkmemfs_fs_type = { | ||
| 265 | .owner = THIS_MODULE, | ||
| 266 | .name = "pkmemfs", | ||
| 267 | .get_sb = ramfs_get_sb, | ||
| 268 | .kill_sb = kill_litter_super, | ||
| 269 | }; | ||
| 270 | |||
| 271 | struct file *__file_setup(char *name, loff_t size) | ||
| 272 | { | ||
| 273 | int error; | ||
| 274 | struct file *file; | ||
| 275 | struct inode *inode; | ||
| 276 | struct dentry *dentry, *root; | ||
| 277 | struct qstr this; | ||
| 278 | |||
| 279 | if (IS_ERR(pkmem_mnt)) | ||
| 280 | return (void *)pkmem_mnt; | ||
| 281 | |||
| 282 | error = -ENOMEM; | ||
| 283 | this.name = name; | ||
| 284 | this.len = strlen(name); | ||
| 285 | this.hash = 0; /* Make file (deleted) in /proc/<pid>/maps */ | ||
| 286 | root = pkmem_mnt->mnt_root; | ||
| 287 | dentry = d_alloc(root, &this); | ||
| 288 | if (!dentry) | ||
| 289 | goto put_memory; | ||
| 290 | error = -ENFILE; | ||
| 291 | file = get_empty_filp(); | ||
| 292 | if (!file) | ||
| 293 | goto put_dentry; | ||
| 294 | |||
| 295 | error = -ENOSPC; | ||
| 296 | inode = ramfs_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0); | ||
| 297 | if (!inode) | ||
| 298 | goto close_file; | ||
| 299 | |||
| 300 | d_instantiate(dentry, inode); | ||
| 301 | inode->i_size = size; | ||
| 302 | inode->i_nlink = 0; /* It is unlinked */ | ||
| 303 | file->f_vfsmnt = mntget(pkmem_mnt); | ||
| 304 | file->f_dentry = dentry; | ||
| 305 | file->f_mapping = inode->i_mapping; | ||
| 306 | file->f_op = &ramfs_file_operations; | ||
| 307 | file->f_mode = FMODE_WRITE | FMODE_READ; | ||
| 308 | return file; | ||
| 309 | |||
| 310 | close_file: | ||
| 311 | put_filp(file); | ||
| 312 | put_dentry: | ||
| 313 | dput(dentry); | ||
| 314 | put_memory: | ||
| 315 | return ERR_PTR(error); | ||
| 316 | } | ||
| 317 | |||
| 318 | int attach_pkmem(void) | ||
| 319 | { | ||
| 320 | if(IS_ERR(pk_file)) | ||
| 321 | return -1; | ||
| 322 | down_write(¤t->mm->mmap_sem); | ||
| 323 | do_mmap(pk_file, PKMEM_START, PKMEM_SIZE, PROT_NONE, MAP_SHARED, 0); | ||
| 324 | up_write(¤t->mm->mmap_sem); | ||
| 325 | |||
| 326 | return 0; | ||
| 327 | } | ||
| 328 | |||
| 329 | static int __init init_pkmemfs(void) | ||
| 330 | { | ||
| 331 | int error; | ||
| 332 | loff_t size; | ||
| 333 | struct pk_basic *basic; | ||
| 334 | |||
| 335 | size = PKMEM_SIZE; | ||
| 336 | pk_lock = SPIN_LOCK_UNLOCKED; | ||
| 337 | error = register_filesystem(&pkmemfs_fs_type); | ||
| 338 | if (error) { | ||
| 339 | printk(KERN_ERR "pkMem: Could not register pkmemfs\n"); | ||
| 340 | goto out2; | ||
| 341 | } | ||
| 342 | |||
| 343 | devfs_mk_dir("pkmem"); | ||
| 344 | pkmem_mnt = kern_mount(&pkmemfs_fs_type); | ||
| 345 | if (IS_ERR(pkmem_mnt)) { | ||
| 346 | printk(KERN_ERR "pkMem: Could not kern_mount pkmemfs\n"); | ||
| 347 | error = PTR_ERR(pkmem_mnt); | ||
| 348 | goto out1; | ||
| 349 | } | ||
| 350 | |||
| 351 | pk_file = __file_setup("pkmem", size); | ||
| 352 | if(IS_ERR(pk_file)) { | ||
| 353 | error = -1; | ||
| 354 | goto out1; | ||
| 355 | } | ||
| 356 | |||
| 357 | pk_file->f_op = &ramfs_file_operations; | ||
| 358 | pk_basic_cachep = kmem_cache_create("pk_basic", | ||
| 359 | sizeof(struct pk_basic), | ||
| 360 | 0, SLAB_RECLAIM_ACCOUNT, | ||
| 361 | NULL, NULL); | ||
| 362 | pk_share_cachep = kmem_cache_create("pk_share", | ||
| 363 | sizeof(struct pk_share), | ||
| 364 | 0, SLAB_RECLAIM_ACCOUNT, | ||
| 365 | NULL, NULL); | ||
| 366 | pk_task_cachep = kmem_cache_create("pk_task", | ||
| 367 | sizeof(struct pk_task), | ||
| 368 | 0, SLAB_RECLAIM_ACCOUNT, | ||
| 369 | NULL, NULL); | ||
| 370 | pk_task_share_cachep = kmem_cache_create("pk_task_share", | ||
| 371 | sizeof(struct pk_task_share), | ||
| 372 | 0, SLAB_RECLAIM_ACCOUNT, | ||
| 373 | NULL, NULL); | ||
| 374 | |||
| 375 | basic = init_pk_basic(); | ||
| 376 | basic->start = PKMEM_START; | ||
| 377 | basic->pages = PKMEM_PAGES; | ||
| 378 | list_add(&basic->list, &pk_freeHEAD); | ||
| 379 | __pk_free_pages = PKMEM_PAGES; | ||
| 380 | printk(KERN_INFO "pkMem: initialized\n"); | ||
| 381 | return 0; | ||
| 382 | out1: | ||
| 383 | unregister_filesystem(&pkmemfs_fs_type); | ||
| 384 | out2: | ||
| 385 | return error; | ||
| 386 | } | ||
| 387 | |||
| 388 | module_init(init_pkmemfs) | ||
| 389 | |||
| 390 | //////////////////////////////////////////////////////////////////////////////// | ||
| 391 | /// __insert_into_freelist releases the pk_basic region pointed by insert | ||
| 392 | /// The caller needn't destroy the region. Since the region, which belongs to | ||
| 393 | /// the pk_logHEAD list is of type pk_basic and so are the nodes in the | ||
| 394 | /// pk_freeHEAD list. So all we have to do is just re-insert into sorted order | ||
| 395 | /// Hold PK_LOCK | ||
| 396 | //////////////////////////////////////////////////////////////////////////////// | ||
| 397 | void __insert_into_freelist(struct pk_basic *insert) | ||
| 398 | { | ||
| 399 | struct list_head *idx, *prev; | ||
| 400 | struct pk_basic *ptr; | ||
| 401 | |||
| 402 | // Insert the free space pointed to by the deleted node | ||
| 403 | // into the sorted place in the pk_freeHEAD list | ||
| 404 | // case 1: check if pk_freeHEAD list is empty | ||
| 405 | if(list_empty(&pk_freeHEAD)) { | ||
| 406 | dbg(); | ||
| 407 | list_add(&insert->list, &pk_freeHEAD); | ||
| 408 | goto out; | ||
| 409 | } | ||
| 410 | // case 2: put entry in sorted position in pk_freeHEAD list | ||
| 411 | prev = NULL; | ||
| 412 | list_for_each(idx, &pk_freeHEAD) { | ||
| 413 | ptr = list_entry(idx, struct pk_basic, list); | ||
| 414 | prev = idx; | ||
| 415 | if(insert->start < ptr->start) { | ||
| 416 | dbg(); | ||
| 417 | list_add_tail(&insert->list, idx); | ||
| 418 | goto out; | ||
| 419 | } | ||
| 420 | } | ||
| 421 | // case 3: We need to put the entry in the end (case 3) | ||
| 422 | list_add(&insert->list, prev); | ||
| 423 | |||
| 424 | out: | ||
| 425 | __pk_free_pages += insert->pages; | ||
| 426 | return; | ||
| 427 | } | ||
| 428 | |||
| 429 | //////////////////////////////////////////////////////////////////////////////// | ||
| 430 | /// __release_share_region releases the share region pointed by delete | ||
| 431 | /// The caller NEEDS to destroy the region. The caller must also allocate memory | ||
| 432 | /// for the insert node, which will eventually be inserted into the pk_freeHEAD | ||
| 433 | /// list. (insert = init_pk_basic();) | ||
| 434 | /// IMP: On err < 0 the caller must free insert | ||
| 435 | /// Hold PK_LOCK [Always], PK_SHARE_LOCK(share) [Except on pk_alloc, we needn't] | ||
| 436 | //////////////////////////////////////////////////////////////////////////////// | ||
| 437 | int __release_share_region(struct pk_share *delete, struct pk_basic *insert) | ||
| 438 | { | ||
| 439 | int err; | ||
| 440 | struct list_head *idx; | ||
| 441 | struct pk_share *share; | ||
| 442 | |||
| 443 | // Delete the node from the shareHEAD list | ||
| 444 | err = -ENOEXIST; | ||
| 445 | list_for_each(idx, &pk_shareHEAD) { | ||
| 446 | share = list_entry(idx, struct pk_share, list); | ||
| 447 | if(share == delete) { | ||
| 448 | dbg(); | ||
| 449 | list_del(&share->list); | ||
| 450 | err = 0; | ||
| 451 | break; | ||
| 452 | } | ||
| 453 | } | ||
| 454 | |||
| 455 | if(err == -ENOEXIST) { | ||
| 456 | dbg(); | ||
| 457 | goto out; | ||
| 458 | } | ||
| 459 | insert->start = delete->start; | ||
| 460 | insert->pages = delete->pages; | ||
| 461 | __insert_into_freelist(insert); | ||
| 462 | |||
| 463 | out: | ||
| 464 | return err; | ||
| 465 | } | ||
| 466 | |||
| 467 | //////////////////////////////////////////////////////////////////////////////// | ||
| 468 | /// __dec_share_refcnt decrements the share->refcnt and if the region | ||
| 469 | /// has been marked for deletion, then it releases the region back | ||
| 470 | /// into the free list | ||
| 471 | //////////////////////////////////////////////////////////////////////////////// | ||
| 472 | void __dec_share_refcnt(struct pk_share *share) | ||
| 473 | { | ||
| 474 | struct pk_basic *insert; | ||
| 475 | |||
| 476 | PK_SHARE_LOCK(share); | ||
| 477 | if(--share->refcnt == 0 && share->status & PK_ST_DEL_FINAL) { | ||
| 478 | PK_SHARE_UNLOCK(share); | ||
| 479 | dbg(); | ||
| 480 | insert = init_pk_basic(); | ||
| 481 | |||
| 482 | if(!insert) { | ||
| 483 | dbg(); | ||
| 484 | goto out; | ||
| 485 | } | ||
| 486 | PK_LOCK; | ||
| 487 | PK_SHARE_LOCK(share); | ||
| 488 | |||
| 489 | if(__release_share_region(share, insert) < 0) { | ||
| 490 | dbg(); | ||
| 491 | kmem_cache_free(pk_basic_cachep, insert); | ||
| 492 | } | ||
| 493 | else { | ||
| 494 | dbg(); | ||
| 495 | PK_SHARE_UNLOCK(share); | ||
| 496 | kmem_cache_free(pk_share_cachep, share); | ||
| 497 | goto unlock; | ||
| 498 | } | ||
| 499 | dbg(); | ||
| 500 | PK_SHARE_UNLOCK(share); | ||
| 501 | goto unlock; | ||
| 502 | } | ||
| 503 | dbg(); | ||
| 504 | PK_SHARE_UNLOCK(share); | ||
| 505 | goto out; | ||
| 506 | |||
| 507 | unlock: | ||
| 508 | PK_UNLOCK; | ||
| 509 | out: | ||
| 510 | return; | ||
| 511 | } | ||
| 512 | |||
| 513 | //////////////////////////////////////////////////////////////////////////////// | ||
| 514 | /// __merge_free_list(): merges consecutive entries in the free list | ||
| 515 | /// and returns the largest consecutive region that it has merged | ||
| 516 | //////////////////////////////////////////////////////////////////////////////// | ||
| 517 | int __merge_free_list(void) | ||
| 518 | { | ||
| 519 | unsigned long page_cnt; | ||
| 520 | struct list_head *idx; | ||
| 521 | struct pk_basic *cur, *prev; | ||
| 522 | |||
| 523 | page_cnt = 0; | ||
| 524 | restart: | ||
| 525 | prev = NULL; | ||
| 526 | PK_LOCK; | ||
| 527 | list_for_each(idx, &pk_freeHEAD) { | ||
| 528 | dbg(); | ||
| 529 | cur = list_entry(idx, struct pk_basic, list); | ||
| 530 | if(prev) { | ||
| 531 | dbg(); | ||
| 532 | if(prev->start + (prev->pages * PAGE_SIZE) | ||
| 533 | == cur->start) { | ||
| 534 | dbg(); | ||
| 535 | prev->pages += cur->pages; | ||
| 536 | list_del(&cur->list); | ||
| 537 | if(page_cnt < cur->pages) { | ||
| 538 | dbg(); | ||
| 539 | page_cnt = prev->pages; | ||
| 540 | } | ||
| 541 | PK_UNLOCK; | ||
| 542 | kmem_cache_free(pk_basic_cachep, cur); | ||
| 543 | dbg(); | ||
| 544 | goto restart; | ||
| 545 | } | ||
| 546 | } | ||
| 547 | prev = cur; | ||
| 548 | } | ||
| 549 | PK_UNLOCK; | ||
| 550 | |||
| 551 | dbg("page_cnt: %lu", page_cnt); | ||
| 552 | return page_cnt; | ||
| 553 | } | ||
| 554 | |||
| 555 | //////////////////////////////////////////////////////////////////////////////// | ||
| 556 | /// __split_free_list(pages): Splits free list, to satisfy the | ||
| 557 | /// allocation for 'pages' number of pages. It returns the start | ||
| 558 | /// address of the allocation | ||
| 559 | //////////////////////////////////////////////////////////////////////////////// | ||
| 560 | unsigned long __split_free_list(unsigned long pages) | ||
| 561 | { | ||
| 562 | int try; | ||
| 563 | unsigned long addr; | ||
| 564 | struct list_head *idx; | ||
| 565 | struct pk_basic *free_entry; | ||
| 566 | |||
| 567 | addr = try = 0; | ||
| 568 | PK_LOCK; | ||
| 569 | if(__pk_free_pages < pages) { | ||
| 570 | PK_UNLOCK; | ||
| 571 | dbg("__pk_free_pages %lu\n", __pk_free_pages); | ||
| 572 | goto out; | ||
| 573 | } | ||
| 574 | |||
| 575 | retry: | ||
| 576 | try++; | ||
| 577 | list_for_each(idx, &pk_freeHEAD) { | ||
| 578 | dbg(); | ||
| 579 | free_entry = list_entry(idx, struct pk_basic, list); | ||
| 580 | if(free_entry->pages >= pages) { | ||
| 581 | dbg(); | ||
| 582 | addr = free_entry->start; | ||
| 583 | __pk_free_pages -= pages; | ||
| 584 | free_entry->start += (pages * PAGE_SIZE); | ||
| 585 | free_entry->pages -= pages; | ||
| 586 | if(free_entry->pages == 0) { | ||
| 587 | dbg(); | ||
| 588 | list_del(&free_entry->list); | ||
| 589 | kmem_cache_free(pk_basic_cachep, free_entry); | ||
| 590 | } | ||
| 591 | PK_UNLOCK; | ||
| 592 | goto out; | ||
| 593 | } | ||
| 594 | } | ||
| 595 | PK_UNLOCK; | ||
| 596 | |||
| 597 | if(__merge_free_list() >= pages && try <= 2) { | ||
| 598 | PK_LOCK; | ||
| 599 | goto retry; | ||
| 600 | } | ||
| 601 | |||
| 602 | out: | ||
| 603 | dbg("__pk_free_pages %lu\n", __pk_free_pages); | ||
| 604 | return addr; | ||
| 605 | } | ||
| 606 | |||
| 607 | //////////////////////////////////////////////////////////////////////////////// | ||
| 608 | /// __alloc_share_regions(pages): returns a shareable region by | ||
| 609 | /// splitting the free list | ||
| 610 | /// IMPORTANT: It is upto the caller of this function, to add the | ||
| 611 | /// share region into the pk_shareHEAD list | ||
| 612 | /// i.e. list_add(&share->list, &pk_shareHEAD); | ||
| 613 | //////////////////////////////////////////////////////////////////////////////// | ||
| 614 | struct pk_share *__alloc_share_regions(unsigned int pages) | ||
| 615 | { | ||
| 616 | unsigned long addr; | ||
| 617 | struct pk_share *share; | ||
| 618 | |||
| 619 | dbg("__pk_free_pages %lu\n", __pk_free_pages); | ||
| 620 | share = NULL; | ||
| 621 | addr = __split_free_list(pages); | ||
| 622 | |||
| 623 | if(addr == 0) { | ||
| 624 | dbg(); | ||
| 625 | goto out; | ||
| 626 | } | ||
| 627 | share = init_pk_share(); | ||
| 628 | |||
| 629 | if(!share) { | ||
| 630 | dbg(); | ||
| 631 | goto out; | ||
| 632 | } | ||
| 633 | share->start = addr; | ||
| 634 | share->pages = pages; | ||
| 635 | out: | ||
| 636 | return share; | ||
| 637 | } | ||
| 638 | |||
| 639 | //////////////////////////////////////////////////////////////////////////////// | ||
| 640 | /// __alloc_log_pages(pages): returns a log region by | ||
| 641 | /// splitting the free list | ||
| 642 | //////////////////////////////////////////////////////////////////////////////// | ||
| 643 | struct pk_basic *__alloc_log_pages(unsigned int pages) | ||
| 644 | { | ||
| 645 | unsigned long addr; | ||
| 646 | struct pk_basic *log; | ||
| 647 | |||
| 648 | dbg("__pk_free_pages %lu\n", __pk_free_pages); | ||
| 649 | log = NULL; | ||
| 650 | addr = __split_free_list(pages); | ||
| 651 | |||
| 652 | if(addr == 0) { | ||
| 653 | dbg(); | ||
| 654 | goto out; | ||
| 655 | } | ||
| 656 | log = init_pk_basic(); | ||
| 657 | |||
| 658 | if(!log) { | ||
| 659 | dbg(); | ||
| 660 | goto out; | ||
| 661 | } | ||
| 662 | log->start = addr; | ||
| 663 | log->pages = pages; | ||
| 664 | out: | ||
| 665 | return log; | ||
| 666 | } | ||
| 667 | |||
| 668 | //////////////////////////////////////////////////////////////////////////////// | ||
| 669 | /// __rollback performs the rollback by following the records given in the | ||
| 670 | /// task's log regions | ||
| 671 | //////////////////////////////////////////////////////////////////////////////// | ||
| 672 | void __rollback(unsigned long start) | ||
| 673 | { | ||
| 674 | // __log_addr_in_range(unsigned long addr); 0 or -1 | ||
| 675 | // __share_addr_in_range(unsigned long addr); 0 or -1 | ||
| 676 | dbg(); | ||
| 677 | PK_LOCK; | ||
| 678 | |||
| 679 | PK_UNLOCK; | ||
| 680 | } | ||
| 681 | |||
| 682 | //////////////////////////////////////////////////////////////////////////////// | ||
| 683 | /// __release_log_regions(). Called from do_exit() to release the log regions | ||
| 684 | /// that the process has obtained. The status of the transaction at this point | ||
| 685 | /// is always PK_TR_ABORTED or PK_TR_STOPPED | ||
| 686 | //////////////////////////////////////////////////////////////////////////////// | ||
| 687 | void __release_log_regions(struct pk_task *pk) | ||
| 688 | { | ||
| 689 | struct list_head *idx; | ||
| 690 | struct pk_basic *log_region; | ||
| 691 | |||
| 692 | while(!list_empty(&pk->logs_head)) { | ||
| 693 | dbg(); | ||
| 694 | idx = pk->logs_head.next; | ||
| 695 | log_region = list_entry(idx, struct pk_basic, list); | ||
| 696 | list_del(idx); | ||
| 697 | __insert_into_freelist(log_region); | ||
| 698 | } | ||
| 699 | } | ||
| 700 | |||
| 701 | //////////////////////////////////////////////////////////////////////////////// | ||
| 702 | /// __do_abort(). The main body of the __do_abort call | ||
| 703 | /// it finally sets the task's status to PK_TR_ABORTED and only a pk_end() | ||
| 704 | /// will set the status to PK_TR_STOPPED. From then on, new transactions can be | ||
| 705 | /// started | ||
| 706 | //////////////////////////////////////////////////////////////////////////////// | ||
| 707 | long __do_abort(struct task_struct *task, int call_mprotect) | ||
| 708 | { | ||
| 709 | long err; | ||
| 710 | struct list_head *idx; | ||
| 711 | struct pk_task *pk; | ||
| 712 | struct pk_task_share *task_share; | ||
| 713 | struct pk_basic *log_region; | ||
| 714 | |||
| 715 | err = 0; | ||
| 716 | pk = task->pk; | ||
| 717 | |||
| 718 | if(!task) { | ||
| 719 | err = -EBUG; | ||
| 720 | BUG(); | ||
| 721 | goto out; | ||
| 722 | } | ||
| 723 | |||
| 724 | if(!pk) { | ||
| 725 | dbg(); | ||
| 726 | goto out; | ||
| 727 | } | ||
| 728 | |||
| 729 | // If we are interrupting another pk_* system call then we set the | ||
| 730 | // status to PK_TR_ABORT_PENDING otherwise we set the status to note | ||
| 731 | // that and abort is in progress with PK_TR_ABORT_INPROGRESS | ||
| 732 | PK_TASK_LOCK(pk); | ||
| 733 | if(pk->status & PK_TR_SYS_INPROGRESS) { | ||
| 734 | dbg(); | ||
| 735 | PK_TASK_UNLOCK(pk); | ||
| 736 | pk->status |= PK_TR_ABORT_PENDING; | ||
| 737 | goto out; | ||
| 738 | } | ||
| 739 | else { | ||
| 740 | dbg(); | ||
| 741 | pk->status |= PK_TR_ABORT_INPROGRESS; | ||
| 742 | } | ||
| 743 | PK_TASK_UNLOCK(pk); | ||
| 744 | |||
| 745 | // Perform rollback | ||
| 746 | if(!list_empty(&pk->logs_head)) { | ||
| 747 | idx = pk->logs_head.next; | ||
| 748 | log_region = list_entry(idx, struct pk_basic, list); | ||
| 749 | __rollback(log_region->start); | ||
| 750 | } | ||
| 751 | |||
| 752 | // 1. We iterate over all the shared regions in the task's share_head | ||
| 753 | // list and remove the pk_task_share nodes from the share's task_head | ||
| 754 | // list. | ||
| 755 | // 2. Undo | ||
| 756 | // If a region has been: ALLOCED + FREED then delete it (default) | ||
| 757 | // If a region has been: FREEED don't delete | ||
| 758 | // If a region has been: ALLOCED, then delete it | ||
| 759 | // 3. Call mprotect(): Since we need to unmap the memory regions that | ||
| 760 | // that the task_share elements in the task's share_head list point to | ||
| 761 | // NOTE: If PK_PROC_FREE only then don't call mprotect(). All other | ||
| 762 | // possible combinations call mprotect() | ||
| 763 | // 4. dec refcnts of the share regions | ||
| 764 | list_for_each(idx, &pk->share_head) { | ||
| 765 | unsigned long start, pages; | ||
| 766 | // 1. | ||
| 767 | task_share = list_entry(idx, struct pk_task_share, list); | ||
| 768 | PK_LOCK; | ||
| 769 | PK_SHARE_LOCK(task_share->share); | ||
| 770 | list_del(&task_share->list_task); | ||
| 771 | |||
| 772 | // 2. | ||
| 773 | // NOTE: We can call PK_UNLOCK here, but we call it below to be | ||
| 774 | // consistent | ||
| 775 | if(task_share->status & PK_PROC_FREE && | ||
| 776 | !(task_share->status & PK_PROC_ALLOC)) | ||
| 777 | task_share->share->status &= ~(PK_ST_DEL); | ||
| 778 | else if(task_share->status & PK_PROC_ALLOC) | ||
| 779 | task_share->share->status |= PK_ST_DEL_FINAL; | ||
| 780 | PK_SHARE_UNLOCK(task_share->share); | ||
| 781 | PK_UNLOCK; | ||
| 782 | |||
| 783 | // 3. | ||
| 784 | PK_SHARE_LOCK(task_share->share); | ||
| 785 | start = task_share->share->start; | ||
| 786 | pages = task_share->share->pages; | ||
| 787 | PK_SHARE_UNLOCK(task_share->share); | ||
| 788 | if(call_mprotect) { | ||
| 789 | dbg(); | ||
| 790 | if(!((task_share->status & PK_PROC_FREE) | ||
| 791 | && !(task_share->status & PK_PROC_GOT_WR) | ||
| 792 | && !(task_share->status & PK_PROC_ALLOC))) { | ||
| 793 | dbg("undoing mprotect at %lu, pages %lu", start, | ||
| 794 | pages); | ||
| 795 | err = mprotect_for_task(task, start, pages | ||
| 796 | * PAGE_SIZE, PROT_NONE); | ||
| 797 | if(err < 0) { | ||
| 798 | BUG(); | ||
| 799 | err = -EMPROTECT; | ||
| 800 | goto out; | ||
| 801 | } | ||
| 802 | } | ||
| 803 | } | ||
| 804 | // 4. | ||
| 805 | __dec_share_refcnt(task_share->share); | ||
| 806 | dbg(); | ||
| 807 | } | ||
| 808 | |||
| 809 | // Empty the elements in the task's pk->share_head list and free the | ||
| 810 | // task_share structure | ||
| 811 | while(!list_empty(&pk->share_head)) { | ||
| 812 | dbg(); | ||
| 813 | idx = pk->share_head.next; | ||
| 814 | list_del(idx); | ||
| 815 | kmem_cache_free(pk_task_share_cachep, idx); | ||
| 816 | } | ||
| 817 | |||
| 818 | // Set the status of the transaction to PK_TR_ABORTED | ||
| 819 | PK_TASK_LOCK(pk); | ||
| 820 | pk->status = PK_TR_ABORTED; | ||
| 821 | PK_TASK_UNLOCK(pk); | ||
| 822 | err = -EABRT; | ||
| 823 | out: | ||
| 824 | return err; | ||
| 825 | } | ||
| 826 | |||
| 827 | //////////////////////////////////////////////////////////////////////////////// | ||
| 828 | /// __advance_queue() makes sure no task holds a shareable region for ever | ||
| 829 | //////////////////////////////////////////////////////////////////////////////// | ||
| 830 | long __advance_queue(struct pk_task_share *task_share, unsigned long status) | ||
| 831 | { | ||
| 832 | long err; | ||
| 833 | struct list_head *idx; | ||
| 834 | struct pk_task_share *ts; | ||
| 835 | unsigned long alloted_time, current_time; | ||
| 836 | |||
| 837 | err = 0; | ||
| 838 | GET_SEMAPHORE(task_share, status); | ||
| 839 | PK_SHARE_LOCK(task_share->share); | ||
| 840 | |||
| 841 | // Has the region been marked for deletion while we waited for it? | ||
| 842 | if(task_share->share->status & PK_ST_DEL || | ||
| 843 | task_share->share->status & PK_ST_DEL_FINAL) { | ||
| 844 | dbg(); err = -EDELETED; goto out; | ||
| 845 | } | ||
| 846 | |||
| 847 | // If there is no one currently holding the region, then we just add | ||
| 848 | // ourselves to the list of users after setting the status of the region | ||
| 849 | if(list_empty(&task_share->share->task_head)) { | ||
| 850 | dbg(); | ||
| 851 | task_share->share->status |= status; | ||
| 852 | goto add; | ||
| 853 | } | ||
| 854 | |||
| 855 | // Case 1: If there are currently readers to the region and we've | ||
| 856 | // requested a read lock | ||
| 857 | if((task_share->share->status & PK_FL_RDLCK) && (status & PK_FL_RDLCK)) { | ||
| 858 | dbg(); | ||
| 859 | goto add; | ||
| 860 | } | ||
| 861 | |||
| 862 | // Case 2: share->status & PK_FL_RDLCK && status & PK_FL_WRLCK | ||
| 863 | // Case 3: share->status & PK_FL_WRLCK | ||
| 864 | // - If alloted_time <= current_time, then abort the process! | ||
| 865 | // - Otherwise wait for time differential to elapse and retry | ||
| 866 | _continue: | ||
| 867 | dbg(); | ||
| 868 | while(!list_empty(&task_share->share->task_head)) { | ||
| 869 | list_for_each(idx, &task_share->share->task_head) { | ||
| 870 | ts = list_entry(idx, struct pk_task_share, list_task); | ||
| 871 | current_time = jiffies; | ||
| 872 | alloted_time = ts->jiffies + PK_TIMEOUT; | ||
| 873 | dbg("current_time %lu ts->jiffies %lu alloted_time %lu", | ||
| 874 | current_time, ts->jiffies, alloted_time); | ||
| 875 | if(time_before_eq(alloted_time, current_time)) { | ||
| 876 | PK_SHARE_UNLOCK(task_share->share); | ||
| 877 | dbg("calling __do_abort %d -> %d", | ||
| 878 | current->pid, ts->task->pid); | ||
| 879 | __do_abort(ts->task, 1); | ||
| 880 | PK_SHARE_LOCK(task_share->share); | ||
| 881 | goto _continue; | ||
| 882 | } | ||
| 883 | else { | ||
| 884 | long diff; | ||
| 885 | diff = (long) alloted_time - (long) current_time; | ||
| 886 | dbg("sleep for diff = %lu", diff); | ||
| 887 | PK_SHARE_UNLOCK(task_share->share); | ||
| 888 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
| 889 | schedule_timeout(diff); | ||
| 890 | PK_SHARE_LOCK(task_share->share); | ||
| 891 | goto _continue; | ||
| 892 | } | ||
| 893 | } | ||
| 894 | } | ||
| 895 | |||
| 896 | // Update the status of share (other cases the status stays the same) | ||
| 897 | if(task_share->share->status & PK_FL_RDLCK && status & PK_FL_WRLCK) { | ||
| 898 | dbg(); | ||
| 899 | task_share->share->status &= ~(PK_FL_RDLCK); | ||
| 900 | task_share->share->status |= PK_FL_WRLCK; | ||
| 901 | } | ||
| 902 | |||
| 903 | if(task_share->share->status & PK_FL_WRLCK && status & PK_FL_RDLCK) { | ||
| 904 | dbg(); | ||
| 905 | task_share->share->status &= ~(PK_FL_WRLCK); | ||
| 906 | task_share->share->status |= PK_FL_RDLCK; | ||
| 907 | } | ||
| 908 | add: | ||
| 909 | task_share->jiffies = jiffies; | ||
| 910 | dbg("jiffies assigned to task_share %lu", task_share->jiffies); | ||
| 911 | list_add_tail(&task_share->list_task, &task_share->share->task_head); | ||
| 912 | out: | ||
| 913 | PK_SHARE_UNLOCK(task_share->share); | ||
| 914 | RELEASE_SEMAPHORE(task_share, status); | ||
| 915 | return err; | ||
| 916 | } | ||
| 917 | |||
| 918 | //////////////////////////////////////////////////////////////////////////////// | ||
| 919 | /// The pk_alloc_log() system call | ||
| 920 | //////////////////////////////////////////////////////////////////////////////// | ||
| 921 | asmlinkage long sys_pk_alloc_log(unsigned long pages, unsigned long flag) | ||
| 922 | { | ||
| 923 | long err; | ||
| 924 | struct pk_basic *log; | ||
| 925 | |||
| 926 | // Initialize the transaction and/or change its status | ||
| 927 | err = setup_transaction(PK_TR_SYS_INPROGRESS); | ||
| 928 | if(err == -ENOMEM || err == -EABRT) | ||
| 929 | goto out; | ||
| 930 | |||
| 931 | log = __alloc_log_pages(pages); | ||
| 932 | if(!log) { | ||
| 933 | dbg(); | ||
| 934 | err = -ENOMEM; | ||
| 935 | goto out; | ||
| 936 | } | ||
| 937 | |||
| 938 | err = mprotect_for_task(current, log->start, pages * PAGE_SIZE, | ||
| 939 | (PROT_READ | PROT_WRITE)); | ||
| 940 | if(err < 0) { | ||
| 941 | BUG(); | ||
| 942 | err = -EMPROTECT; | ||
| 943 | PK_LOCK; | ||
| 944 | __insert_into_freelist(log); | ||
| 945 | PK_UNLOCK; | ||
| 946 | goto out; | ||
| 947 | } | ||
| 948 | err = log->start; | ||
| 949 | list_add_tail(&log->list, ¤t->pk->logs_head); | ||
| 950 | |||
| 951 | // Did someone abort us, while we were executing this call? | ||
| 952 | // If so, then it is _our_ responsibility to abort the transaction | ||
| 953 | PK_CUR_TASK_LOCK; | ||
| 954 | if(current->pk->status & PK_TR_ABORT_PENDING) { | ||
| 955 | PK_CUR_TASK_UNLOCK; | ||
| 956 | dbg("calling __do_abort on %d", current->pid); | ||
| 957 | err = __do_abort(current, 1); | ||
| 958 | goto out; | ||
| 959 | } | ||
| 960 | current->pk->status &= ~(PK_TR_SYS_INPROGRESS); | ||
| 961 | PK_CUR_TASK_UNLOCK; | ||
| 962 | out: | ||
| 963 | force_successful_syscall_return(); | ||
| 964 | return err; | ||
| 965 | } | ||
| 966 | |||
| 967 | //////////////////////////////////////////////////////////////////////////////// | ||
| 968 | /// The sys_pk_alloc() system call | ||
| 969 | //////////////////////////////////////////////////////////////////////////////// | ||
| 970 | asmlinkage long sys_pk_alloc(unsigned long pages) | ||
| 971 | { | ||
| 972 | long err; | ||
| 973 | struct pk_share *share; | ||
| 974 | struct pk_task_share *task_share; | ||
| 975 | struct pk_basic *insert; | ||
| 976 | |||
| 977 | // Initialize the transaction and/or change its status | ||
| 978 | err = setup_transaction(PK_TR_SYS_INPROGRESS); | ||
| 979 | if(err == -ENOMEM || err == -EABRT) | ||
| 980 | goto out; | ||
| 981 | task_share = init_pk_task_share(); | ||
| 982 | if(!task_share) { | ||
| 983 | err = -ENOMEM; | ||
| 984 | goto reset; | ||
| 985 | } | ||
| 986 | |||
| 987 | share = __alloc_share_regions(pages); | ||
| 988 | if(!share) { | ||
| 989 | dbg(); | ||
| 990 | err = -ENOMEM; | ||
| 991 | kmem_cache_free(pk_task_share_cachep, task_share); | ||
| 992 | goto reset; | ||
| 993 | } | ||
| 994 | |||
| 995 | task_share->share = share; | ||
| 996 | task_share->status = PK_PROC_ALLOC | PK_FL_WRLCK; | ||
| 997 | share->status = PK_FL_WRLCK; | ||
| 998 | err = mprotect_for_task(current, share->start, pages * PAGE_SIZE, | ||
| 999 | (PROT_READ | PROT_WRITE)); | ||
| 1000 | if(err < 0) { | ||
| 1001 | BUG(); | ||
| 1002 | err = -EMPROTECT; | ||
| 1003 | insert = init_pk_basic(); | ||
| 1004 | // Release the share region to the free list | ||
| 1005 | PK_LOCK; | ||
| 1006 | PK_SHARE_LOCK(share); | ||
| 1007 | if(__release_share_region(share, insert) < 0) | ||
| 1008 | kmem_cache_free(pk_basic_cachep, insert); | ||
| 1009 | PK_SHARE_UNLOCK(share); | ||
| 1010 | PK_UNLOCK; | ||
| 1011 | // Free memory allocated to share | ||
| 1012 | kmem_cache_free(pk_share_cachep, share); | ||
| 1013 | kmem_cache_free(pk_task_share_cachep, task_share); | ||
| 1014 | goto reset; | ||
| 1015 | } | ||
| 1016 | |||
| 1017 | err = share->start; | ||
| 1018 | // Now that memory protection has been setup | ||
| 1019 | // - Link up this task_share node with the task's share_head list | ||
| 1020 | // - Link up this task_share node with region's task_head list | ||
| 1021 | // - Increment the refcnt of the region, no one _can_ possibly | ||
| 1022 | // use the region now, so don't bother holding the share->lock | ||
| 1023 | // NOTE: There is no point in holding the share->lock as there | ||
| 1024 | // can be no contention for the lock | ||
| 1025 | list_add(&task_share->list, ¤t->pk->share_head); | ||
| 1026 | list_add_tail(&task_share->list_task, &share->task_head); | ||
| 1027 | share->refcnt++; | ||
| 1028 | |||
| 1029 | // Add the share region to the global shareable region list | ||
| 1030 | PK_LOCK; | ||
| 1031 | list_add(&share->list, &pk_shareHEAD); | ||
| 1032 | PK_UNLOCK; | ||
| 1033 | |||
| 1034 | // Did someone abort us, while we were executing this call? | ||
| 1035 | // If so, then it is _our_ responsibility to abort the transaction | ||
| 1036 | PK_CUR_TASK_LOCK; | ||
| 1037 | if(current->pk->status & PK_TR_ABORT_PENDING) { | ||
| 1038 | PK_CUR_TASK_UNLOCK; | ||
| 1039 | dbg("calling __do_abort on %d", current->pid); | ||
| 1040 | err = __do_abort(current, 1); | ||
| 1041 | goto out; | ||
| 1042 | } | ||
| 1043 | |||
| 1044 | current->pk->status &= ~(PK_TR_SYS_INPROGRESS); | ||
| 1045 | PK_CUR_TASK_UNLOCK; | ||
| 1046 | goto out; | ||
| 1047 | reset: | ||
| 1048 | PK_CUR_TASK_LOCK; | ||
| 1049 | current->pk->status &= ~(PK_TR_SYS_INPROGRESS); | ||
| 1050 | PK_CUR_TASK_UNLOCK; | ||
| 1051 | out: | ||
| 1052 | force_successful_syscall_return(); | ||
| 1053 | return err; | ||
| 1054 | } | ||
| 1055 | |||
| 1056 | //////////////////////////////////////////////////////////////////////////////// | ||
| 1057 | /// sys_pk_get() | ||
| 1058 | //////////////////////////////////////////////////////////////////////////////// | ||
| 1059 | asmlinkage long sys_pk_get(unsigned long start, unsigned long flag) | ||
| 1060 | { | ||
| 1061 | long err; | ||
| 1062 | unsigned long prot; | ||
| 1063 | struct pk_share *share; | ||
| 1064 | struct pk_task_share *task_share; | ||
| 1065 | |||
| 1066 | // Check if the parameters to the syscall are correct | ||
| 1067 | if(flag & ~(PK_FL_WRLCK | PK_FL_RDLCK)) { | ||
| 1068 | dbg(); | ||
| 1069 | err = -EINVAL; | ||
| 1070 | goto out; | ||
| 1071 | } | ||
| 1072 | |||
| 1073 | // Initialize the transaction and/or change its status | ||
| 1074 | err = setup_transaction(PK_TR_SYS_INPROGRESS); | ||
| 1075 | if(err == -ENOMEM || err == -EABRT) { | ||
| 1076 | dbg(); | ||
| 1077 | goto out; | ||
| 1078 | } | ||
| 1079 | |||
| 1080 | // First off, search the shareHEAD list, to see if the region | ||
| 1081 | // exists in the first place; if it does exist, then we increment | ||
| 1082 | // the share->refcnt of the region (IMPORTANT) | ||
| 1083 | share = search_shareHEAD(start); | ||
| 1084 | if(!share) { | ||
| 1085 | dbg(); | ||
| 1086 | err = -ENOEXIST; | ||
| 1087 | goto reset; | ||
| 1088 | } | ||
| 1089 | |||
| 1090 | // The region does exist, now we check to make sure we haven't | ||
| 1091 | // already got, free-ed, or alloced the same region in the | ||
| 1092 | // current transaction | ||
| 1093 | // Case #1: We have alloced this region; If a request for WRLCK was | ||
| 1094 | // made then the request is ignored. If a request for RDLCK is made then | ||
| 1095 | // we return with an error stating ENODGRADE (no downgrading lock!) | ||
| 1096 | // Case #2: We have already free-ed this region; signal error because | ||
| 1097 | // it makes _no_ sense to obtain a RDLCK or WRLCK on the region | ||
| 1098 | // Case #3a: We have already got WRLCK on this region; Then the cases | ||
| 1099 | // are identical to case #1 | ||
| 1100 | // Case #3b: We already got RDLCK on this region; If a request for | ||
| 1101 | // WRLCK is made then return ENUGRADE (no upgrading lock!); If a request | ||
| 1102 | // for RDLCK is made then the request is ignored | ||
| 1103 | task_share = search_share_head(share); | ||
| 1104 | if(task_share) { | ||
| 1105 | dbg(); | ||
| 1106 | if(task_share->status & PK_PROC_ALLOC | ||
| 1107 | || task_share->status & PK_PROC_GOT_WR) { | ||
| 1108 | if(flag & PK_FL_WRLCK) { | ||
| 1109 | dbg(); | ||
| 1110 | err = share->pages; | ||
| 1111 | goto dec; | ||
| 1112 | } | ||
| 1113 | else { | ||
| 1114 | dbg(); | ||
| 1115 | err = -ENODGRADE; | ||
| 1116 | goto dec; | ||
| 1117 | } | ||
| 1118 | } | ||
| 1119 | else if(task_share->status & PK_PROC_FREE) { | ||
| 1120 | dbg(); | ||
| 1121 | err = -EDELETED; | ||
| 1122 | goto dec; | ||
| 1123 | } | ||
| 1124 | else if(task_share->status & PK_PROC_GOT_RD) { | ||
| 1125 | if(flag & PK_FL_WRLCK) { | ||
| 1126 | dbg(); | ||
| 1127 | err = -ENOUGRADE; | ||
| 1128 | goto dec; | ||
| 1129 | } else { | ||
| 1130 | dbg(); | ||
| 1131 | err = share->pages; | ||
| 1132 | goto dec; | ||
| 1133 | } | ||
| 1134 | } | ||
| 1135 | } | ||
| 1136 | |||
| 1137 | prot = PROT_READ; | ||
| 1138 | // Initialize an object of the task_share struct, to link with share obj | ||
| 1139 | task_share = init_pk_task_share(); | ||
| 1140 | if(!task_share) { | ||
| 1141 | dbg(); | ||
| 1142 | err = -ENOMEM; | ||
| 1143 | goto dec; | ||
| 1144 | } | ||
| 1145 | task_share->share = share; | ||
| 1146 | if(flag & PK_FL_WRLCK) { | ||
| 1147 | dbg(); | ||
| 1148 | prot |= (PROT_WRITE); | ||
| 1149 | task_share->status = PK_PROC_GOT_WR; | ||
| 1150 | } | ||
| 1151 | else { | ||
| 1152 | dbg(); | ||
| 1153 | task_share->status = PK_PROC_GOT_RD; | ||
| 1154 | } | ||
| 1155 | |||
| 1156 | // Advance the wait queue for the shareable region. No one should be | ||
| 1157 | // allowed to hold a lock on a region for ever | ||
| 1158 | err = __advance_queue(task_share, flag); | ||
| 1159 | if(err < 0) { | ||
| 1160 | dbg(); | ||
| 1161 | goto release; | ||
| 1162 | } | ||
| 1163 | |||
| 1164 | // Call mprotect to protect the region | ||
| 1165 | err = mprotect_for_task(current, share->start, share->pages * PAGE_SIZE, | ||
| 1166 | prot); | ||
| 1167 | if(err < 0) { | ||
| 1168 | BUG(); | ||
| 1169 | err = -EMPROTECT; | ||
| 1170 | goto release; | ||
| 1171 | } | ||
| 1172 | |||
| 1173 | // Link up this task_share node with the task's share_head list | ||
| 1174 | list_add(&task_share->list, ¤t->pk->share_head); | ||
| 1175 | |||
| 1176 | // Did someone abort us, while we were executing this call? | ||
| 1177 | // If so, then it is _our_ responsibility to abort the transaction | ||
| 1178 | PK_CUR_TASK_LOCK; | ||
| 1179 | if(current->pk->status & PK_TR_ABORT_PENDING) { | ||
| 1180 | PK_CUR_TASK_UNLOCK; | ||
| 1181 | dbg("calling __do_abort on %d", current->pid); | ||
| 1182 | err = __do_abort(current, 1); | ||
| 1183 | goto out; | ||
| 1184 | } | ||
| 1185 | current->pk->status &= ~(PK_TR_SYS_INPROGRESS); | ||
| 1186 | PK_CUR_TASK_UNLOCK; | ||
| 1187 | err = share->pages; | ||
| 1188 | goto out; | ||
| 1189 | |||
| 1190 | release: | ||
| 1191 | kmem_cache_free(pk_task_share_cachep, task_share); | ||
| 1192 | dec: | ||
| 1193 | __dec_share_refcnt(share); | ||
| 1194 | reset: | ||
| 1195 | PK_CUR_TASK_LOCK; | ||
| 1196 | current->pk->status &= ~(PK_TR_SYS_INPROGRESS); | ||
| 1197 | PK_CUR_TASK_UNLOCK; | ||
| 1198 | out: | ||
| 1199 | force_successful_syscall_return(); | ||
| 1200 | return err; | ||
| 1201 | } | ||
| 1202 | |||
| 1203 | //////////////////////////////////////////////////////////////////////////////// | ||
| 1204 | /// sys_pk_free_log() | ||
| 1205 | //////////////////////////////////////////////////////////////////////////////// | ||
| 1206 | asmlinkage long sys_pk_free_log(unsigned long start) | ||
| 1207 | { | ||
| 1208 | long err; | ||
| 1209 | struct pk_basic *remove; | ||
| 1210 | |||
| 1211 | // Success only if the status is PK_TR_STOPPED | ||
| 1212 | err = -ENOTSTOPPED; | ||
| 1213 | if(current->pk) { | ||
| 1214 | dbg(); | ||
| 1215 | PK_CUR_TASK_LOCK; | ||
| 1216 | if(current->pk->status & PK_TR_STOPPED) | ||
| 1217 | err = 0; | ||
| 1218 | PK_CUR_TASK_UNLOCK; | ||
| 1219 | } | ||
| 1220 | if(err < 0) | ||
| 1221 | goto out; | ||
| 1222 | |||
| 1223 | // Check if a log region with log->start = start exists | ||
| 1224 | // in the processes logs_head list | ||
| 1225 | remove = search_logs_head(start); | ||
| 1226 | if(!remove) { | ||
| 1227 | dbg(); | ||
| 1228 | err = -ENOEXIST; | ||
| 1229 | goto out; | ||
| 1230 | } | ||
| 1231 | |||
| 1232 | // Remove the log region from the task's logs_head list | ||
| 1233 | list_del(&remove->list); | ||
| 1234 | |||
| 1235 | // Call mprotect to remove the physical memory protection | ||
| 1236 | err = mprotect_for_task(current, remove->start, | ||
| 1237 | remove->pages * PAGE_SIZE, PROT_NONE); | ||
| 1238 | if(err < 0) { | ||
| 1239 | BUG(); | ||
| 1240 | err = -EMPROTECT; | ||
| 1241 | goto out; | ||
| 1242 | } | ||
| 1243 | |||
| 1244 | // Now, we release the log region back into the free list | ||
| 1245 | PK_LOCK; | ||
| 1246 | __insert_into_freelist(remove); | ||
| 1247 | PK_UNLOCK; | ||
| 1248 | |||
| 1249 | out: | ||
| 1250 | force_successful_syscall_return(); | ||
| 1251 | return err; | ||
| 1252 | } | ||
| 1253 | |||
| 1254 | //////////////////////////////////////////////////////////////////////////////// | ||
| 1255 | /// sys_pk_free() | ||
| 1256 | //////////////////////////////////////////////////////////////////////////////// | ||
| 1257 | asmlinkage long sys_pk_free(unsigned long start) | ||
| 1258 | { | ||
| 1259 | long err; | ||
| 1260 | struct pk_share *share; | ||
| 1261 | struct pk_task_share *task_share; | ||
| 1262 | |||
| 1263 | // Initialize the transaction and/or change its status | ||
| 1264 | err = setup_transaction(PK_TR_SYS_INPROGRESS); | ||
| 1265 | if(err == -ENOMEM || err == -EABRT) { | ||
| 1266 | dbg(); | ||
| 1267 | goto out; | ||
| 1268 | } | ||
| 1269 | |||
| 1270 | // First off, search the shareHEAD list, to see if the region | ||
| 1271 | // exists in the first place; if it does exist, then we increment | ||
| 1272 | // the share->refcnt of the region (IMPORTANT) | ||
| 1273 | share = search_shareHEAD(start); | ||
| 1274 | if(!share) { | ||
| 1275 | dbg(); | ||
| 1276 | err = -ENOEXIST; | ||
| 1277 | goto reset; | ||
| 1278 | } | ||
| 1279 | |||
| 1280 | // If the region is found in the sharedH list, then we come up with the | ||
| 1281 | // following cases | ||
| 1282 | // Case #1: If the region has already been alloced, then we set the | ||
| 1283 | // status of the shareable region to |= PK_ST_DEL | ||
| 1284 | // Case #2: If the region has already been free-ed, then we return 0 | ||
| 1285 | // Case #3a: If the region has already been got (WRLCK) then it is the | ||
| 1286 | // same situation as #1 | ||
| 1287 | // Case #3b: If the region has already been got (RDLCK) then we deny | ||
| 1288 | // the operation | ||
| 1289 | // FIXME? We never do an mprotect() to remove any protection that we | ||
| 1290 | // have on an already alloced/got-wr region. | ||
| 1291 | task_share = search_share_head(share); | ||
| 1292 | if(task_share) { | ||
| 1293 | dbg(); | ||
| 1294 | if(task_share->status & PK_PROC_ALLOC | ||
| 1295 | || task_share->status & PK_PROC_GOT_WR) { | ||
| 1296 | dbg(); | ||
| 1297 | task_share->status |= PK_PROC_FREE; | ||
| 1298 | PK_SHARE_LOCK(task_share->share); | ||
| 1299 | share->status |= PK_ST_DEL; | ||
| 1300 | PK_SHARE_UNLOCK(task_share->share); | ||
| 1301 | err = 0; | ||
| 1302 | goto dec; | ||
| 1303 | } | ||
| 1304 | else if(task_share->status & PK_PROC_FREE) { | ||
| 1305 | dbg(); | ||
| 1306 | err = 0; | ||
| 1307 | goto dec; | ||
| 1308 | } | ||
| 1309 | else if(task_share->status & PK_PROC_GOT_RD) { | ||
| 1310 | dbg(); | ||
| 1311 | err = -EDENIED; | ||
| 1312 | goto dec; | ||
| 1313 | } | ||
| 1314 | } | ||
| 1315 | |||
| 1316 | // Initialize an object of the task_share struct, to link with share obj | ||
| 1317 | task_share = init_pk_task_share(); | ||
| 1318 | if(!task_share) { | ||
| 1319 | dbg(); | ||
| 1320 | err = -ENOMEM; | ||
| 1321 | goto dec; | ||
| 1322 | } | ||
| 1323 | task_share->share = share; | ||
| 1324 | task_share->status = PK_PROC_FREE; | ||
| 1325 | |||
| 1326 | // Advance the wait queue for the shareable region. No one should be | ||
| 1327 | // allowed to hold a lock on a region for ever | ||
| 1328 | err = __advance_queue(task_share, PK_FL_WRLCK); | ||
| 1329 | if(err < 0) { | ||
| 1330 | dbg(); | ||
| 1331 | goto release; | ||
| 1332 | } | ||
| 1333 | |||
| 1334 | // Mark the region as deleted | ||
| 1335 | PK_SHARE_LOCK(task_share->share); | ||
| 1336 | share->status |= PK_ST_DEL; | ||
| 1337 | PK_SHARE_UNLOCK(task_share->share); | ||
| 1338 | |||
| 1339 | // Link up this task_share node with the task's share_head list | ||
| 1340 | list_add(&task_share->list, ¤t->pk->share_head); | ||
| 1341 | |||
| 1342 | // Did someone abort us, while we were executing this call? | ||
| 1343 | // If so, then it is _our_ responsibility to abort the transaction | ||
| 1344 | PK_CUR_TASK_LOCK; | ||
| 1345 | if(current->pk->status & PK_TR_ABORT_PENDING) { | ||
| 1346 | PK_CUR_TASK_UNLOCK; | ||
| 1347 | dbg("calling __do_abort on %d", current->pid); | ||
| 1348 | err = __do_abort(current, 1); | ||
| 1349 | goto out; | ||
| 1350 | } | ||
| 1351 | |||
| 1352 | current->pk->status &= ~(PK_TR_SYS_INPROGRESS); | ||
| 1353 | PK_CUR_TASK_UNLOCK; | ||
| 1354 | goto out; | ||
| 1355 | |||
| 1356 | release: | ||
| 1357 | kmem_cache_free(pk_task_share_cachep, task_share); | ||
| 1358 | dec: | ||
| 1359 | __dec_share_refcnt(share); | ||
| 1360 | reset: | ||
| 1361 | PK_CUR_TASK_LOCK; | ||
| 1362 | current->pk->status &= ~(PK_TR_SYS_INPROGRESS); | ||
| 1363 | PK_CUR_TASK_UNLOCK; | ||
| 1364 | out: | ||
| 1365 | force_successful_syscall_return(); | ||
| 1366 | return err; | ||
| 1367 | } | ||
| 1368 | |||
| 1369 | //////////////////////////////////////////////////////////////////////////////// | ||
| 1370 | /// sys_pk_end call | ||
| 1371 | //////////////////////////////////////////////////////////////////////////////// | ||
| 1372 | asmlinkage long sys_pk_end(void) | ||
| 1373 | { | ||
| 1374 | long err; | ||
| 1375 | struct list_head *idx; | ||
| 1376 | struct pk_task_share *task_share; | ||
| 1377 | err = 0; | ||
| 1378 | |||
| 1379 | // If the transaction has been stopped, then the call to pk_end is as | ||
| 1380 | // good as a NOP. If the transaction has been aborted then set the | ||
| 1381 | // status to stopped. If the transaction is currently being aborted, | ||
| 1382 | // then userspace would need to call pk_end again. Else set the | ||
| 1383 | // transaction to PK_SYS_IN_PROGRESS | ||
| 1384 | if(current->pk) { | ||
| 1385 | dbg(); | ||
| 1386 | PK_CUR_TASK_LOCK; | ||
| 1387 | if(current->pk->status & PK_TR_STOPPED) | ||
| 1388 | goto unlock; | ||
| 1389 | else if(current->pk->status & PK_TR_ABORTED) { | ||
| 1390 | dbg(); | ||
| 1391 | current->pk->status |= PK_TR_STOPPED; | ||
| 1392 | err = -EABRT; | ||
| 1393 | goto unlock; | ||
| 1394 | } | ||
| 1395 | else if(current->pk->status & PK_TR_ABORT_INPROGRESS) { | ||
| 1396 | dbg(); | ||
| 1397 | err = -EABRTONGOING; | ||
| 1398 | goto unlock; | ||
| 1399 | } | ||
| 1400 | else if(current->pk->status & PK_TR_STARTED) { | ||
| 1401 | dbg(); | ||
| 1402 | current->pk->status |= PK_TR_SYS_INPROGRESS; | ||
| 1403 | } | ||
| 1404 | } | ||
| 1405 | else { | ||
| 1406 | dbg(); | ||
| 1407 | goto out; | ||
| 1408 | } | ||
| 1409 | PK_CUR_TASK_UNLOCK; | ||
| 1410 | |||
| 1411 | // 0. Delete the task_share node from the processes | ||
| 1412 | // share_head list | ||
| 1413 | // 1. Now, we need to unmap the memory regions point to by the | ||
| 1414 | // task_share elements in the task's share_head list NOTE: If | ||
| 1415 | // PK_PROC_FREE only then don't call mprotect(). All other | ||
| 1416 | // possible combinations call mprotect() to remove protection If the | ||
| 1417 | // region has been marked for deletion, then make sure it | ||
| 1418 | // does get deleted. PK_ST_DEL_FINAL is needed to make sure the region | ||
| 1419 | // is indeed released | ||
| 1420 | // 2. We delete the task_share node from the share region's task_head | ||
| 1421 | // list | ||
| 1422 | // 3. We also decrement the refcnts of all the shared | ||
| 1423 | // regions we have task_share->share | ||
| 1424 | // 4. Free up the task_share datastructure | ||
| 1425 | while(!list_empty(¤t->pk->share_head)) { | ||
| 1426 | unsigned long start, pages; | ||
| 1427 | dbg(); | ||
| 1428 | idx = current->pk->share_head.next; | ||
| 1429 | // 0. | ||
| 1430 | list_del(idx); | ||
| 1431 | task_share = list_entry(idx, struct pk_task_share, list); | ||
| 1432 | // 1. | ||
| 1433 | PK_SHARE_LOCK(task_share->share); | ||
| 1434 | start = task_share->share->start; | ||
| 1435 | pages = task_share->share->pages; | ||
| 1436 | if(task_share->status & PK_PROC_FREE) | ||
| 1437 | task_share->share->status |= PK_ST_DEL_FINAL; | ||
| 1438 | PK_SHARE_UNLOCK(task_share->share); | ||
| 1439 | if(!((task_share->status & PK_PROC_FREE) | ||
| 1440 | && !(task_share->status & PK_PROC_GOT_WR) | ||
| 1441 | && !(task_share->status & PK_PROC_ALLOC))) { | ||
| 1442 | dbg("undoing mprotect at %lu, pages %lu", start, pages); | ||
| 1443 | err = mprotect_for_task(current, start, pages | ||
| 1444 | * PAGE_SIZE, PROT_NONE); | ||
| 1445 | if(err < 0) { | ||
| 1446 | BUG(); | ||
| 1447 | err = -EMPROTECT; | ||
| 1448 | goto out; | ||
| 1449 | } | ||
| 1450 | } | ||
| 1451 | // 2. | ||
| 1452 | PK_LOCK; | ||
| 1453 | PK_SHARE_LOCK(task_share->share); | ||
| 1454 | list_del(&task_share->list_task); | ||
| 1455 | PK_SHARE_UNLOCK(task_share->share); | ||
| 1456 | PK_UNLOCK; | ||
| 1457 | // 3. | ||
| 1458 | __dec_share_refcnt(task_share->share); | ||
| 1459 | // 4. | ||
| 1460 | kmem_cache_free(pk_task_share_cachep, task_share); | ||
| 1461 | } | ||
| 1462 | |||
| 1463 | // Set the status of the transaction to PK_TR_STOPPED | ||
| 1464 | // NOTE: If someone aborts us during this call, we ignore it and still go | ||
| 1465 | // ahead with the pk_end | ||
| 1466 | PK_CUR_TASK_LOCK; | ||
| 1467 | current->pk->status = PK_TR_STOPPED; | ||
| 1468 | |||
| 1469 | unlock: | ||
| 1470 | PK_CUR_TASK_UNLOCK; | ||
| 1471 | out: | ||
| 1472 | force_successful_syscall_return(); | ||
| 1473 | return err; | ||
| 1474 | } | ||