diff -ruN -X dontdiff clean-2.6.13.1/arch/i386/kernel/syscall_table.S modif-2.6.13.1/arch/i386/kernel/syscall_table.S --- clean-2.6.13.1/arch/i386/kernel/syscall_table.S 2005-09-09 21:42:58.000000000 -0500 +++ modif-2.6.13.1/arch/i386/kernel/syscall_table.S 2006-03-01 04:40:37.114049000 -0600 @@ -294,3 +294,12 @@ .long sys_inotify_init .long sys_inotify_add_watch .long sys_inotify_rm_watch + .long sys_pk_alloc + .long sys_pk_alloc_log /* 295 */ + .long sys_pk_free + .long sys_pk_free_log + .long sys_pk_get + .long sys_pk_end + + + \ No newline at end of file diff -ruN -X dontdiff clean-2.6.13.1/fs/exec.c modif-2.6.13.1/fs/exec.c --- clean-2.6.13.1/fs/exec.c 2005-09-09 21:42:58.000000000 -0500 +++ modif-2.6.13.1/fs/exec.c 2006-03-02 04:26:12.964415000 -0600 @@ -1138,6 +1138,8 @@ /* * sys_execve() executes a new program. */ +extern int attach_pkmem(void); + int do_execve(char * filename, char __user *__user *argv, char __user *__user *envp, @@ -1207,7 +1209,8 @@ retval = search_binary_handler(bprm,regs); if (retval >= 0) { free_arg_pages(bprm); - + attach_pkmem(); + current->pk = NULL; /* execve success */ security_bprm_free(bprm); acct_update_integrals(current); diff -ruN -X dontdiff clean-2.6.13.1/include/asm-i386/unistd.h modif-2.6.13.1/include/asm-i386/unistd.h --- clean-2.6.13.1/include/asm-i386/unistd.h 2005-09-09 21:42:58.000000000 -0500 +++ modif-2.6.13.1/include/asm-i386/unistd.h 2006-02-28 13:23:44.717387000 -0600 @@ -299,8 +299,15 @@ #define __NR_inotify_init 291 #define __NR_inotify_add_watch 292 #define __NR_inotify_rm_watch 293 +#define __NR_pk_alloc 294 +#define __NR_pk_alloc_log 295 +#define __NR_pk_free 296 +#define __NR_pk_free_log 297 +#define __NR_pk_get 298 +#define __NR_pk_end 299 -#define NR_syscalls 294 + +#define NR_syscalls 300 /* * user-visible error numbers are in the range -1 - -128: see diff -ruN -X dontdiff clean-2.6.13.1/include/linux/init_task.h modif-2.6.13.1/include/linux/init_task.h --- clean-2.6.13.1/include/linux/init_task.h 2005-09-09 21:42:58.000000000 -0500 +++ modif-2.6.13.1/include/linux/init_task.h 2006-02-28 03:38:53.194325000 -0600 @@ -112,6 +112,7 @@ .journal_info = NULL, \ .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ .fs_excl = ATOMIC_INIT(0), \ + .pk = NULL, \ } diff -ruN -X dontdiff clean-2.6.13.1/include/linux/pkmem.h modif-2.6.13.1/include/linux/pkmem.h --- clean-2.6.13.1/include/linux/pkmem.h 1969-12-31 18:00:00.000000000 -0600 +++ modif-2.6.13.1/include/linux/pkmem.h 2006-03-23 15:45:04.318940000 -0600 @@ -0,0 +1,147 @@ +#ifndef __PKMEM_H +#define __PKMEM_H + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +//////////////////////////////////////////////////////////////////////////////// +/// Important Symbols +//////////////////////////////////////////////////////////////////////////////// +#define PKMEM_START 0x9bef8000 +#define PKMEM_PAGES 512 +#define PKMEM_SIZE (PAGE_SIZE * PKMEM_PAGES) +//#define PKMEM_START (TASK_SIZE - 1 - PKMEM_SIZE) +#define PK_TIMEOUT (3 * HZ) + +//////////////////////////////////////////////////////////////////////////////// +/// Important Macros +/////////////////////////////////////////////////////////////////////////////// +#define PK_CUR_TASK_LOCK spin_lock(¤t->pk->lock) +#define PK_CUR_TASK_UNLOCK spin_unlock(¤t->pk->lock) +#define PK_TASK_LOCK(pk_tsk) spin_lock(&pk_tsk->lock) +#define PK_TASK_UNLOCK(pk_tsk) spin_unlock(&pk_tsk->lock) + +#define PK_LOCK spin_lock(&pk_lock) +#define PK_UNLOCK spin_unlock(&pk_lock) +#define PK_SHARE_LOCK(share) spin_lock(&share->lock) +#define PK_SHARE_UNLOCK(share) spin_unlock(&share->lock) + +#define GET_SEMAPHORE(task_share, status) \ + if(status & PK_FL_RDLCK) down_read(&task_share->share->sem); \ + else down_write(&task_share->share->sem); + +#define RELEASE_SEMAPHORE(task_share, status) \ + if(status & PK_FL_RDLCK) up_read(&task_share->share->sem); \ + else up_write(&task_share->share->sem); + +//////////////////////////////////////////////////////////////////////////////// +/// Debug Macros +/// #define dbg(format, arg...) printk(KERN_INFO "pk: " format "\n" , ##arg) +//////////////////////////////////////////////////////////////////////////////// +//#define PKMEM_DEBUG + +#ifdef PKMEM_DEBUG + #define dbg(format, arg...) \ + printk(KERN_INFO "pk: %d,(%s, %d):%s() " format "\n", \ + current->pid, __FILE__, __LINE__, __FUNCTION__, ##arg) +#else + #define dbg(format, arg...) do {} while (0) +#endif + +//////////////////////////////////////////////////////////////////////////////// +/// Error Numbers in pkMem +//////////////////////////////////////////////////////////////////////////////// +#define EABRT 100 +#define EABRTONGOING 101 +#define ENOTSTOPPED 102 +#define EUNUSED 103 /* Unused */ +#define ENOPARAM 104 +#define EMPROTECT 105 +#define ENOEXIST 106 +#define EDELETED 107 +#define ENODGRADE 108 +#define ENOUGRADE 109 +#define EBUG 110 +#define EABRTPENDING 111 +#define EDENIED 112 + +//////////////////////////////////////////////////////////////////////////////// +/// Constants in pkMem +//////////////////////////////////////////////////////////////////////////////// + +/// Constants that are used to describe the status field of struct pk_share +/// AND are used by _userspace_ to specify the lock requested +#define PK_FL_RDLCK 0x000001 +#define PK_FL_WRLCK 0x000002 + +/// Constants used to descibe the status field of struct pk_share +#define PK_ST_DEL 0x000004 +#define PK_ST_DEL_FINAL 0x002000 + +/// Constants used to describe the status field of struct pk_task +#define PK_TR_STARTED 0x000008 +#define PK_TR_STOPPED 0x000010 +#define PK_TR_ABORTED 0x000020 +#define PK_TR_ABORT_PENDING 0x000040 +#define PK_TR_SYS_INPROGRESS 0x000080 +#define PK_TR_ABORT_INPROGRESS 0x000100 + +/// Constants used to descibe the status field of struct pk_task_share +#define PK_PROC_GOT_WR 0x000200 +#define PK_PROC_GOT_RD 0x000400 +#define PK_PROC_FREE 0x000800 +#define PK_PROC_ALLOC 0x001000 + +//////////////////////////////////////////////////////////////////////////////// +/// Structures in pkMem +/////////////////////////////////////////////////////////////////////////////// +struct pk_basic { + struct list_head list; + unsigned long start; + unsigned long pages; +}; + +struct pk_share { + struct rw_semaphore sem; + + // Modification of these fields is done holding PK_LOCK + unsigned long start; + unsigned long pages; + struct list_head list; + + spinlock_t lock; + + // Modification of these fields is done holding ``lock' + struct list_head task_head; + unsigned long status; + unsigned int refcnt; +}; + +struct pk_task_share { + struct list_head list; + struct list_head list_task; + spinlock_t lock; + unsigned int status; + struct task_struct *task; + struct pk_share *share; + unsigned long jiffies; +}; + +//////////////////////////////////////////////////////////////////////////////// +/// Externally defined +//////////////////////////////////////////////////////////////////////////////// + +extern long mprotect_for_task(struct task_struct *task, unsigned long start, + size_t len, unsigned long prot); +extern long __do_abort(struct task_struct *task, int call_mprotect); +extern void __release_log_regions(struct pk_task *pk); + +#endif diff -ruN -X dontdiff clean-2.6.13.1/include/linux/sched.h modif-2.6.13.1/include/linux/sched.h --- clean-2.6.13.1/include/linux/sched.h 2005-09-09 21:42:58.000000000 -0500 +++ modif-2.6.13.1/include/linux/sched.h 2006-03-01 03:04:48.417676000 -0600 @@ -596,6 +596,14 @@ struct audit_context; /* See audit.c */ struct mempolicy; +/// Note: struct pk_task is embedded inside struct task_struct; +struct pk_task { + spinlock_t lock; + unsigned long status; + struct list_head logs_head; + struct list_head share_head; +}; + struct task_struct { volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ struct thread_info *thread_info; @@ -770,6 +778,7 @@ int cpuset_mems_generation; #endif atomic_t fs_excl; /* holding fs exclusive resources */ + struct pk_task *pk; }; static inline pid_t process_group(struct task_struct *tsk) diff -ruN -X dontdiff clean-2.6.13.1/kernel/exit.c modif-2.6.13.1/kernel/exit.c --- clean-2.6.13.1/kernel/exit.c 2005-09-09 21:42:58.000000000 -0500 +++ modif-2.6.13.1/kernel/exit.c 2006-03-23 00:57:31.231458000 -0600 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -833,6 +834,23 @@ del_timer_sync(&tsk->signal->real_timer); acct_process(code); } + + /// If the task's current transaction has already been aborted or + /// has stopped then don't call __do_abort. But we always call + /// __release_log_regions() it releases all the log regions allocated by + /// the task back into the free list + if(tsk->pk) { + PK_TASK_LOCK(tsk->pk); + if(!(tsk->pk->status & PK_TR_ABORTED + || tsk->pk->status & PK_TR_STOPPED + || tsk->pk->status & PK_TR_ABORT_INPROGRESS)) { + PK_TASK_UNLOCK(tsk->pk); + __do_abort(tsk, 0); + } else + PK_TASK_UNLOCK(tsk->pk); + __release_log_regions(tsk->pk); + } + exit_mm(tsk); exit_sem(tsk); @@ -852,6 +870,7 @@ tsk->exit_code = code; exit_notify(tsk); + #ifdef CONFIG_NUMA mpol_free(tsk->mempolicy); tsk->mempolicy = NULL; diff -ruN -X dontdiff clean-2.6.13.1/Makefile modif-2.6.13.1/Makefile --- clean-2.6.13.1/Makefile 2005-09-09 21:42:58.000000000 -0500 +++ modif-2.6.13.1/Makefile 2006-01-16 12:46:52.292063000 -0600 @@ -562,7 +562,7 @@ ifeq ($(KBUILD_EXTMOD),) -core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ +core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ pkmem/ vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ diff -ruN -X dontdiff clean-2.6.13.1/mm/mprotect.c modif-2.6.13.1/mm/mprotect.c --- clean-2.6.13.1/mm/mprotect.c 2005-09-09 21:42:58.000000000 -0500 +++ modif-2.6.13.1/mm/mprotect.c 2006-03-08 01:13:10.271147000 -0600 @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -184,6 +185,13 @@ struct vm_area_struct *vma, *prev; int error = -EINVAL; const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP); + + /* Cannot mprotect a VMA that belongs to pkmem */ + if(start >= PKMEM_START && start <= (PKMEM_START + PKMEM_SIZE)) { + error = -EACCES; + goto out; + } + prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP); if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */ return -EINVAL; @@ -280,3 +288,103 @@ up_write(¤t->mm->mmap_sem); return error; } + +long mprotect_for_task(struct task_struct *task, unsigned long start, + size_t len, unsigned long prot) +{ + unsigned long vm_flags, nstart, end, tmp, reqprot; + struct vm_area_struct *vma, *prev; + int error = -EINVAL; + const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP); + prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP); + if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */ + return -EINVAL; + + if (start & ~PAGE_MASK) + return -EINVAL; + if (!len) + return 0; + len = PAGE_ALIGN(len); + end = start + len; + if (end <= start) + return -ENOMEM; + if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM)) + return -EINVAL; + + reqprot = prot; + /* + * Does the application expect PROT_READ to imply PROT_EXEC: + */ + if (unlikely((prot & PROT_READ) && + (task->personality & READ_IMPLIES_EXEC))) + prot |= PROT_EXEC; + + vm_flags = calc_vm_prot_bits(prot); + + down_write(&task->mm->mmap_sem); + + vma = find_vma_prev(task->mm, start, &prev); + error = -ENOMEM; + if (!vma) + goto out; + if (unlikely(grows & PROT_GROWSDOWN)) { + if (vma->vm_start >= end) + goto out; + start = vma->vm_start; + error = -EINVAL; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto out; + } + else { + if (vma->vm_start > start) + goto out; + if (unlikely(grows & PROT_GROWSUP)) { + end = vma->vm_end; + error = -EINVAL; + if (!(vma->vm_flags & VM_GROWSUP)) + goto out; + } + } + if (start > vma->vm_start) + prev = vma; + + for (nstart = start ; ; ) { + unsigned long newflags; + + /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ + + if (is_vm_hugetlb_page(vma)) { + error = -EACCES; + goto out; + } + + newflags = vm_flags | (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC)); + + if ((newflags & ~(newflags >> 4)) & 0xf) { + error = -EACCES; + goto out; + } + + tmp = vma->vm_end; + if (tmp > end) + tmp = end; + error = mprotect_fixup(vma, &prev, nstart, tmp, newflags); + if (error) + goto out; + nstart = tmp; + + if (nstart < prev->vm_end) + nstart = prev->vm_end; + if (nstart >= end) + goto out; + + vma = prev->vm_next; + if (!vma || vma->vm_start != nstart) { + error = -ENOMEM; + goto out; + } + } +out: + up_write(&task->mm->mmap_sem); + return error; +} diff -ruN -X dontdiff clean-2.6.13.1/pkmem/Makefile modif-2.6.13.1/pkmem/Makefile --- clean-2.6.13.1/pkmem/Makefile 1969-12-31 18:00:00.000000000 -0600 +++ modif-2.6.13.1/pkmem/Makefile 2006-01-16 12:46:52.382063000 -0600 @@ -0,0 +1,3 @@ +# pkmem +# +obj-y := pkmem.o diff -ruN -X dontdiff clean-2.6.13.1/pkmem/pkmem.c modif-2.6.13.1/pkmem/pkmem.c --- clean-2.6.13.1/pkmem/pkmem.c 1969-12-31 18:00:00.000000000 -0600 +++ modif-2.6.13.1/pkmem/pkmem.c 2006-04-02 19:05:32.234443000 -0500 @@ -0,0 +1,1474 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/// Globals needed for creating the pkMem region +static struct vfsmount *pkmem_mnt; +static struct file *pk_file; + +/// Global pkMem data structure slabs +kmem_cache_t *pk_task_cachep; +kmem_cache_t *pk_basic_cachep; +kmem_cache_t *pk_share_cachep; +kmem_cache_t *pk_task_share_cachep; + +/// Definitions of global PK_LOCK +spinlock_t pk_lock; + +/// Keeps track of the size of the largest contigious free pkMem region in the +/// free list [Protected by PK_LOCK] +/// pk_freeHEAD points to a list of struct pk_basic object (free list) +/// pk_shareHEAD points to a list of struct pk_share objects (shared list) +static unsigned long __pk_free_pages; +static LIST_HEAD(pk_freeHEAD); +static LIST_HEAD(pk_shareHEAD); + +//////////////////////////////////////////////////////////////////////////////// +/// Functions to initialize the pkMem data structures +//////////////////////////////////////////////////////////////////////////////// + +static inline struct pk_basic *init_pk_basic(void) +{ + struct pk_basic *basic; + + might_sleep(); + basic = kmem_cache_alloc(pk_basic_cachep, GFP_KERNEL); + + if(!basic) + return NULL; + INIT_LIST_HEAD(&basic->list); + basic->pages = 0; + basic->start = 0; + return basic; +} + +static inline struct pk_share *init_pk_share(void) +{ + struct pk_share *share; + + might_sleep(); + share = kmem_cache_alloc(pk_share_cachep, GFP_KERNEL); + + if(!share) + return NULL; + init_rwsem(&share->sem); + share->lock = SPIN_LOCK_UNLOCKED; + INIT_LIST_HEAD(&share->list); + INIT_LIST_HEAD(&share->task_head); + share->status = 0; + share->pages = 0; + share->start = 0; + share->refcnt = 0; + return share; +} + +static inline struct pk_task *init_pk_task(unsigned long status) +{ + struct pk_task *pk; + + might_sleep(); + pk = kmem_cache_alloc(pk_task_cachep, GFP_KERNEL); + + if(!pk) + return NULL; + pk->lock = SPIN_LOCK_UNLOCKED; + pk->status = status; + INIT_LIST_HEAD(&pk->logs_head); + INIT_LIST_HEAD(&pk->share_head); + return pk; +} + +static inline struct pk_task_share *init_pk_task_share(void) +{ + struct pk_task_share *task_share; + + might_sleep(); + task_share = kmem_cache_alloc(pk_task_share_cachep, GFP_KERNEL); + + if(!task_share) + return NULL; + INIT_LIST_HEAD(&task_share->list); + task_share->share = NULL; + task_share->status = 0; + task_share->task = current; + // Note: We do not set task_share->jiffies at this point + return task_share; +} + +static inline long setup_transaction(unsigned long status) +{ + long err; + + err = 0; + if(current->pk == NULL) { + dbg(); + current->pk = init_pk_task((PK_TR_STARTED | status)); + if(current->pk == NULL) { + dbg(); + err = -ENOMEM; + goto out; + } + } else { + PK_CUR_TASK_LOCK; + dbg(); + if(current->pk->status & PK_TR_ABORTED) { + dbg(); + err = -EABRT; + } else if(current->pk->status & PK_TR_ABORT_PENDING) { + err = -EABRTPENDING; + BUG(); + } else if(current->pk->status & PK_TR_STOPPED) { + dbg(); + current->pk->status = PK_TR_STARTED; + } + else if(current->pk->status & PK_TR_ABORT_INPROGRESS) { + dbg(); + err = -EABRT; + } + if(err == 0) + current->pk->status |= status; + PK_CUR_TASK_UNLOCK; + } + +out: + return err; +} + +//////////////////////////////////////////////////////////////////////////////// +/// Functions to search {log|share} regions in pkMem +//////////////////////////////////////////////////////////////////////////////// +static inline struct pk_share *search_shareHEAD(unsigned long start) +{ + struct list_head *idx; + struct pk_share *share; + + PK_LOCK; + list_for_each(idx, &pk_shareHEAD) { + share = list_entry(idx, struct pk_share, list); + if(share->start == start) { + PK_SHARE_LOCK(share); + if(share->status & PK_ST_DEL + || share->status & PK_ST_DEL_FINAL) + share = NULL; + else + share->refcnt++; + PK_SHARE_UNLOCK(share); + goto out; + } + } + + share = NULL; +out: + PK_UNLOCK; + return share; +} + +/// This function searches if a struct pk_share object that is in the pk_shareHEAD +/// global list, can be found in the task's pk_share_head list +static inline struct pk_task_share *search_share_head(struct pk_share *share) +{ + struct list_head *idx; + struct pk_task_share *task_share; + + list_for_each(idx, ¤t->pk->share_head) { + task_share = list_entry(idx, struct pk_task_share, list); + if(task_share->share == share) + goto out; + } + + task_share = NULL; +out: + return task_share; +} + +/// During rollback this function checks if the address given has been obtained +/// by the process for exclusive accesse either via an alloc or a pk_get +static inline int __share_addr_in_range(unsigned long addr) +{ + struct list_head *idx; + struct pk_task_share *task_share; + unsigned long start, pages, status; + int err; + + err = 0; + list_for_each(idx, ¤t->pk->share_head) { + task_share = list_entry(idx, struct pk_task_share, list); + status = task_share->status; + if(status & PK_PROC_ALLOC || status & PK_PROC_GOT_WR) { + start = task_share->share->start; + pages = task_share->share->pages; + if(start <= addr && addr <= (start + pages * PAGE_SIZE)) + goto out; + } + } + err = -1; +out: + return err; +} + +/// This function searches if a struct pk_basic object that is in the +/// logs_head list can be found +static inline struct pk_basic *search_logs_head(unsigned long start) +{ + struct list_head *idx; + struct pk_basic *log; + + list_for_each(idx, ¤t->pk->logs_head) { + log = list_entry(idx, struct pk_basic, list); + if(log->start == start) + goto out; + } + log = NULL; +out: + return log; +} + +/// During rollback this function checks if the address given does indeed refer +/// to a start address of a log page +static inline int __log_addr_in_range(unsigned long addr) +{ + struct list_head *idx; + struct pk_basic *log; + int err; + + err = 0; + list_for_each(idx, ¤t->pk->logs_head) { + log = list_entry(idx, struct pk_basic, list); + if((unsigned long)log->start == addr) + goto out; + } + err = -1; +out: + return err; +} + +//////////////////////////////////////////////////////////////////////////////// +/// Create the pkMem file in memory +//////////////////////////////////////////////////////////////////////////////// + +/// The internal pkmem filesystem that represents the entire pkMem region +static struct file_system_type pkmemfs_fs_type = { + .owner = THIS_MODULE, + .name = "pkmemfs", + .get_sb = ramfs_get_sb, + .kill_sb = kill_litter_super, +}; + +struct file *__file_setup(char *name, loff_t size) +{ + int error; + struct file *file; + struct inode *inode; + struct dentry *dentry, *root; + struct qstr this; + + if (IS_ERR(pkmem_mnt)) + return (void *)pkmem_mnt; + + error = -ENOMEM; + this.name = name; + this.len = strlen(name); + this.hash = 0; /* Make file (deleted) in /proc//maps */ + root = pkmem_mnt->mnt_root; + dentry = d_alloc(root, &this); + if (!dentry) + goto put_memory; + error = -ENFILE; + file = get_empty_filp(); + if (!file) + goto put_dentry; + + error = -ENOSPC; + inode = ramfs_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0); + if (!inode) + goto close_file; + + d_instantiate(dentry, inode); + inode->i_size = size; + inode->i_nlink = 0; /* It is unlinked */ + file->f_vfsmnt = mntget(pkmem_mnt); + file->f_dentry = dentry; + file->f_mapping = inode->i_mapping; + file->f_op = &ramfs_file_operations; + file->f_mode = FMODE_WRITE | FMODE_READ; + return file; + +close_file: + put_filp(file); +put_dentry: + dput(dentry); +put_memory: + return ERR_PTR(error); +} + +int attach_pkmem(void) +{ + if(IS_ERR(pk_file)) + return -1; + down_write(¤t->mm->mmap_sem); + do_mmap(pk_file, PKMEM_START, PKMEM_SIZE, PROT_NONE, MAP_SHARED, 0); + up_write(¤t->mm->mmap_sem); + + return 0; +} + +static int __init init_pkmemfs(void) +{ + int error; + loff_t size; + struct pk_basic *basic; + + size = PKMEM_SIZE; + pk_lock = SPIN_LOCK_UNLOCKED; + error = register_filesystem(&pkmemfs_fs_type); + if (error) { + printk(KERN_ERR "pkMem: Could not register pkmemfs\n"); + goto out2; + } + + devfs_mk_dir("pkmem"); + pkmem_mnt = kern_mount(&pkmemfs_fs_type); + if (IS_ERR(pkmem_mnt)) { + printk(KERN_ERR "pkMem: Could not kern_mount pkmemfs\n"); + error = PTR_ERR(pkmem_mnt); + goto out1; + } + + pk_file = __file_setup("pkmem", size); + if(IS_ERR(pk_file)) { + error = -1; + goto out1; + } + + pk_file->f_op = &ramfs_file_operations; + pk_basic_cachep = kmem_cache_create("pk_basic", + sizeof(struct pk_basic), + 0, SLAB_RECLAIM_ACCOUNT, + NULL, NULL); + pk_share_cachep = kmem_cache_create("pk_share", + sizeof(struct pk_share), + 0, SLAB_RECLAIM_ACCOUNT, + NULL, NULL); + pk_task_cachep = kmem_cache_create("pk_task", + sizeof(struct pk_task), + 0, SLAB_RECLAIM_ACCOUNT, + NULL, NULL); + pk_task_share_cachep = kmem_cache_create("pk_task_share", + sizeof(struct pk_task_share), + 0, SLAB_RECLAIM_ACCOUNT, + NULL, NULL); + + basic = init_pk_basic(); + basic->start = PKMEM_START; + basic->pages = PKMEM_PAGES; + list_add(&basic->list, &pk_freeHEAD); + __pk_free_pages = PKMEM_PAGES; + printk(KERN_INFO "pkMem: initialized\n"); + return 0; +out1: + unregister_filesystem(&pkmemfs_fs_type); +out2: + return error; +} + +module_init(init_pkmemfs) + +//////////////////////////////////////////////////////////////////////////////// +/// __insert_into_freelist releases the pk_basic region pointed by insert +/// The caller needn't destroy the region. Since the region, which belongs to +/// the pk_logHEAD list is of type pk_basic and so are the nodes in the +/// pk_freeHEAD list. So all we have to do is just re-insert into sorted order +/// Hold PK_LOCK +//////////////////////////////////////////////////////////////////////////////// +void __insert_into_freelist(struct pk_basic *insert) +{ + struct list_head *idx, *prev; + struct pk_basic *ptr; + + // Insert the free space pointed to by the deleted node + // into the sorted place in the pk_freeHEAD list + // case 1: check if pk_freeHEAD list is empty + if(list_empty(&pk_freeHEAD)) { + dbg(); + list_add(&insert->list, &pk_freeHEAD); + goto out; + } + // case 2: put entry in sorted position in pk_freeHEAD list + prev = NULL; + list_for_each(idx, &pk_freeHEAD) { + ptr = list_entry(idx, struct pk_basic, list); + prev = idx; + if(insert->start < ptr->start) { + dbg(); + list_add_tail(&insert->list, idx); + goto out; + } + } + // case 3: We need to put the entry in the end (case 3) + list_add(&insert->list, prev); + +out: + __pk_free_pages += insert->pages; + return; +} + +//////////////////////////////////////////////////////////////////////////////// +/// __release_share_region releases the share region pointed by delete +/// The caller NEEDS to destroy the region. The caller must also allocate memory +/// for the insert node, which will eventually be inserted into the pk_freeHEAD +/// list. (insert = init_pk_basic();) +/// IMP: On err < 0 the caller must free insert +/// Hold PK_LOCK [Always], PK_SHARE_LOCK(share) [Except on pk_alloc, we needn't] +//////////////////////////////////////////////////////////////////////////////// +int __release_share_region(struct pk_share *delete, struct pk_basic *insert) +{ + int err; + struct list_head *idx; + struct pk_share *share; + + // Delete the node from the shareHEAD list + err = -ENOEXIST; + list_for_each(idx, &pk_shareHEAD) { + share = list_entry(idx, struct pk_share, list); + if(share == delete) { + dbg(); + list_del(&share->list); + err = 0; + break; + } + } + + if(err == -ENOEXIST) { + dbg(); + goto out; + } + insert->start = delete->start; + insert->pages = delete->pages; + __insert_into_freelist(insert); + +out: + return err; +} + +//////////////////////////////////////////////////////////////////////////////// +/// __dec_share_refcnt decrements the share->refcnt and if the region +/// has been marked for deletion, then it releases the region back +/// into the free list +//////////////////////////////////////////////////////////////////////////////// +void __dec_share_refcnt(struct pk_share *share) +{ + struct pk_basic *insert; + + PK_SHARE_LOCK(share); + if(--share->refcnt == 0 && share->status & PK_ST_DEL_FINAL) { + PK_SHARE_UNLOCK(share); + dbg(); + insert = init_pk_basic(); + + if(!insert) { + dbg(); + goto out; + } + PK_LOCK; + PK_SHARE_LOCK(share); + + if(__release_share_region(share, insert) < 0) { + dbg(); + kmem_cache_free(pk_basic_cachep, insert); + } + else { + dbg(); + PK_SHARE_UNLOCK(share); + kmem_cache_free(pk_share_cachep, share); + goto unlock; + } + dbg(); + PK_SHARE_UNLOCK(share); + goto unlock; + } + dbg(); + PK_SHARE_UNLOCK(share); + goto out; + +unlock: + PK_UNLOCK; +out: + return; +} + +//////////////////////////////////////////////////////////////////////////////// +/// __merge_free_list(): merges consecutive entries in the free list +/// and returns the largest consecutive region that it has merged +//////////////////////////////////////////////////////////////////////////////// +int __merge_free_list(void) +{ + unsigned long page_cnt; + struct list_head *idx; + struct pk_basic *cur, *prev; + + page_cnt = 0; +restart: + prev = NULL; + PK_LOCK; + list_for_each(idx, &pk_freeHEAD) { + dbg(); + cur = list_entry(idx, struct pk_basic, list); + if(prev) { + dbg(); + if(prev->start + (prev->pages * PAGE_SIZE) + == cur->start) { + dbg(); + prev->pages += cur->pages; + list_del(&cur->list); + if(page_cnt < cur->pages) { + dbg(); + page_cnt = prev->pages; + } + PK_UNLOCK; + kmem_cache_free(pk_basic_cachep, cur); + dbg(); + goto restart; + } + } + prev = cur; + } + PK_UNLOCK; + + dbg("page_cnt: %lu", page_cnt); + return page_cnt; +} + +//////////////////////////////////////////////////////////////////////////////// +/// __split_free_list(pages): Splits free list, to satisfy the +/// allocation for 'pages' number of pages. It returns the start +/// address of the allocation +//////////////////////////////////////////////////////////////////////////////// +unsigned long __split_free_list(unsigned long pages) +{ + int try; + unsigned long addr; + struct list_head *idx; + struct pk_basic *free_entry; + + addr = try = 0; + PK_LOCK; + if(__pk_free_pages < pages) { + PK_UNLOCK; + dbg("__pk_free_pages %lu\n", __pk_free_pages); + goto out; + } + +retry: + try++; + list_for_each(idx, &pk_freeHEAD) { + dbg(); + free_entry = list_entry(idx, struct pk_basic, list); + if(free_entry->pages >= pages) { + dbg(); + addr = free_entry->start; + __pk_free_pages -= pages; + free_entry->start += (pages * PAGE_SIZE); + free_entry->pages -= pages; + if(free_entry->pages == 0) { + dbg(); + list_del(&free_entry->list); + kmem_cache_free(pk_basic_cachep, free_entry); + } + PK_UNLOCK; + goto out; + } + } + PK_UNLOCK; + + if(__merge_free_list() >= pages && try <= 2) { + PK_LOCK; + goto retry; + } + +out: + dbg("__pk_free_pages %lu\n", __pk_free_pages); + return addr; +} + +//////////////////////////////////////////////////////////////////////////////// +/// __alloc_share_regions(pages): returns a shareable region by +/// splitting the free list +/// IMPORTANT: It is upto the caller of this function, to add the +/// share region into the pk_shareHEAD list +/// i.e. list_add(&share->list, &pk_shareHEAD); +//////////////////////////////////////////////////////////////////////////////// +struct pk_share *__alloc_share_regions(unsigned int pages) +{ + unsigned long addr; + struct pk_share *share; + + dbg("__pk_free_pages %lu\n", __pk_free_pages); + share = NULL; + addr = __split_free_list(pages); + + if(addr == 0) { + dbg(); + goto out; + } + share = init_pk_share(); + + if(!share) { + dbg(); + goto out; + } + share->start = addr; + share->pages = pages; +out: + return share; +} + +//////////////////////////////////////////////////////////////////////////////// +/// __alloc_log_pages(pages): returns a log region by +/// splitting the free list +//////////////////////////////////////////////////////////////////////////////// +struct pk_basic *__alloc_log_pages(unsigned int pages) +{ + unsigned long addr; + struct pk_basic *log; + + dbg("__pk_free_pages %lu\n", __pk_free_pages); + log = NULL; + addr = __split_free_list(pages); + + if(addr == 0) { + dbg(); + goto out; + } + log = init_pk_basic(); + + if(!log) { + dbg(); + goto out; + } + log->start = addr; + log->pages = pages; +out: + return log; +} + +//////////////////////////////////////////////////////////////////////////////// +/// __rollback performs the rollback by following the records given in the +/// task's log regions +//////////////////////////////////////////////////////////////////////////////// +void __rollback(unsigned long start) +{ + // __log_addr_in_range(unsigned long addr); 0 or -1 + // __share_addr_in_range(unsigned long addr); 0 or -1 + dbg(); + PK_LOCK; + + PK_UNLOCK; +} + +//////////////////////////////////////////////////////////////////////////////// +/// __release_log_regions(). Called from do_exit() to release the log regions +/// that the process has obtained. The status of the transaction at this point +/// is always PK_TR_ABORTED or PK_TR_STOPPED +//////////////////////////////////////////////////////////////////////////////// +void __release_log_regions(struct pk_task *pk) +{ + struct list_head *idx; + struct pk_basic *log_region; + + while(!list_empty(&pk->logs_head)) { + dbg(); + idx = pk->logs_head.next; + log_region = list_entry(idx, struct pk_basic, list); + list_del(idx); + __insert_into_freelist(log_region); + } +} + +//////////////////////////////////////////////////////////////////////////////// +/// __do_abort(). The main body of the __do_abort call +/// it finally sets the task's status to PK_TR_ABORTED and only a pk_end() +/// will set the status to PK_TR_STOPPED. From then on, new transactions can be +/// started +//////////////////////////////////////////////////////////////////////////////// +long __do_abort(struct task_struct *task, int call_mprotect) +{ + long err; + struct list_head *idx; + struct pk_task *pk; + struct pk_task_share *task_share; + struct pk_basic *log_region; + + err = 0; + pk = task->pk; + + if(!task) { + err = -EBUG; + BUG(); + goto out; + } + + if(!pk) { + dbg(); + goto out; + } + + // If we are interrupting another pk_* system call then we set the + // status to PK_TR_ABORT_PENDING otherwise we set the status to note + // that and abort is in progress with PK_TR_ABORT_INPROGRESS + PK_TASK_LOCK(pk); + if(pk->status & PK_TR_SYS_INPROGRESS) { + dbg(); + PK_TASK_UNLOCK(pk); + pk->status |= PK_TR_ABORT_PENDING; + goto out; + } + else { + dbg(); + pk->status |= PK_TR_ABORT_INPROGRESS; + } + PK_TASK_UNLOCK(pk); + + // Perform rollback + if(!list_empty(&pk->logs_head)) { + idx = pk->logs_head.next; + log_region = list_entry(idx, struct pk_basic, list); + __rollback(log_region->start); + } + + // 1. We iterate over all the shared regions in the task's share_head + // list and remove the pk_task_share nodes from the share's task_head + // list. + // 2. Undo + // If a region has been: ALLOCED + FREED then delete it (default) + // If a region has been: FREEED don't delete + // If a region has been: ALLOCED, then delete it + // 3. Call mprotect(): Since we need to unmap the memory regions that + // that the task_share elements in the task's share_head list point to + // NOTE: If PK_PROC_FREE only then don't call mprotect(). All other + // possible combinations call mprotect() + // 4. dec refcnts of the share regions + list_for_each(idx, &pk->share_head) { + unsigned long start, pages; + // 1. + task_share = list_entry(idx, struct pk_task_share, list); + PK_LOCK; + PK_SHARE_LOCK(task_share->share); + list_del(&task_share->list_task); + + // 2. + // NOTE: We can call PK_UNLOCK here, but we call it below to be + // consistent + if(task_share->status & PK_PROC_FREE && + !(task_share->status & PK_PROC_ALLOC)) + task_share->share->status &= ~(PK_ST_DEL); + else if(task_share->status & PK_PROC_ALLOC) + task_share->share->status |= PK_ST_DEL_FINAL; + PK_SHARE_UNLOCK(task_share->share); + PK_UNLOCK; + + // 3. + PK_SHARE_LOCK(task_share->share); + start = task_share->share->start; + pages = task_share->share->pages; + PK_SHARE_UNLOCK(task_share->share); + if(call_mprotect) { + dbg(); + if(!((task_share->status & PK_PROC_FREE) + && !(task_share->status & PK_PROC_GOT_WR) + && !(task_share->status & PK_PROC_ALLOC))) { + dbg("undoing mprotect at %lu, pages %lu", start, + pages); + err = mprotect_for_task(task, start, pages + * PAGE_SIZE, PROT_NONE); + if(err < 0) { + BUG(); + err = -EMPROTECT; + goto out; + } + } + } + // 4. + __dec_share_refcnt(task_share->share); + dbg(); + } + + // Empty the elements in the task's pk->share_head list and free the + // task_share structure + while(!list_empty(&pk->share_head)) { + dbg(); + idx = pk->share_head.next; + list_del(idx); + kmem_cache_free(pk_task_share_cachep, idx); + } + + // Set the status of the transaction to PK_TR_ABORTED + PK_TASK_LOCK(pk); + pk->status = PK_TR_ABORTED; + PK_TASK_UNLOCK(pk); + err = -EABRT; +out: + return err; +} + +//////////////////////////////////////////////////////////////////////////////// +/// __advance_queue() makes sure no task holds a shareable region for ever +//////////////////////////////////////////////////////////////////////////////// +long __advance_queue(struct pk_task_share *task_share, unsigned long status) +{ + long err; + struct list_head *idx; + struct pk_task_share *ts; + unsigned long alloted_time, current_time; + + err = 0; + GET_SEMAPHORE(task_share, status); + PK_SHARE_LOCK(task_share->share); + + // Has the region been marked for deletion while we waited for it? + if(task_share->share->status & PK_ST_DEL || + task_share->share->status & PK_ST_DEL_FINAL) { + dbg(); err = -EDELETED; goto out; + } + + // If there is no one currently holding the region, then we just add + // ourselves to the list of users after setting the status of the region + if(list_empty(&task_share->share->task_head)) { + dbg(); + task_share->share->status |= status; + goto add; + } + + // Case 1: If there are currently readers to the region and we've + // requested a read lock + if((task_share->share->status & PK_FL_RDLCK) && (status & PK_FL_RDLCK)) { + dbg(); + goto add; + } + + // Case 2: share->status & PK_FL_RDLCK && status & PK_FL_WRLCK + // Case 3: share->status & PK_FL_WRLCK + // - If alloted_time <= current_time, then abort the process! + // - Otherwise wait for time differential to elapse and retry +_continue: + dbg(); + while(!list_empty(&task_share->share->task_head)) { + list_for_each(idx, &task_share->share->task_head) { + ts = list_entry(idx, struct pk_task_share, list_task); + current_time = jiffies; + alloted_time = ts->jiffies + PK_TIMEOUT; + dbg("current_time %lu ts->jiffies %lu alloted_time %lu", + current_time, ts->jiffies, alloted_time); + if(time_before_eq(alloted_time, current_time)) { + PK_SHARE_UNLOCK(task_share->share); + dbg("calling __do_abort %d -> %d", + current->pid, ts->task->pid); + __do_abort(ts->task, 1); + PK_SHARE_LOCK(task_share->share); + goto _continue; + } + else { + long diff; + diff = (long) alloted_time - (long) current_time; + dbg("sleep for diff = %lu", diff); + PK_SHARE_UNLOCK(task_share->share); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(diff); + PK_SHARE_LOCK(task_share->share); + goto _continue; + } + } + } + + // Update the status of share (other cases the status stays the same) + if(task_share->share->status & PK_FL_RDLCK && status & PK_FL_WRLCK) { + dbg(); + task_share->share->status &= ~(PK_FL_RDLCK); + task_share->share->status |= PK_FL_WRLCK; + } + + if(task_share->share->status & PK_FL_WRLCK && status & PK_FL_RDLCK) { + dbg(); + task_share->share->status &= ~(PK_FL_WRLCK); + task_share->share->status |= PK_FL_RDLCK; + } +add: + task_share->jiffies = jiffies; + dbg("jiffies assigned to task_share %lu", task_share->jiffies); + list_add_tail(&task_share->list_task, &task_share->share->task_head); +out: + PK_SHARE_UNLOCK(task_share->share); + RELEASE_SEMAPHORE(task_share, status); + return err; +} + +//////////////////////////////////////////////////////////////////////////////// +/// The pk_alloc_log() system call +//////////////////////////////////////////////////////////////////////////////// +asmlinkage long sys_pk_alloc_log(unsigned long pages, unsigned long flag) +{ + long err; + struct pk_basic *log; + + // Initialize the transaction and/or change its status + err = setup_transaction(PK_TR_SYS_INPROGRESS); + if(err == -ENOMEM || err == -EABRT) + goto out; + + log = __alloc_log_pages(pages); + if(!log) { + dbg(); + err = -ENOMEM; + goto out; + } + + err = mprotect_for_task(current, log->start, pages * PAGE_SIZE, + (PROT_READ | PROT_WRITE)); + if(err < 0) { + BUG(); + err = -EMPROTECT; + PK_LOCK; + __insert_into_freelist(log); + PK_UNLOCK; + goto out; + } + err = log->start; + list_add_tail(&log->list, ¤t->pk->logs_head); + + // Did someone abort us, while we were executing this call? + // If so, then it is _our_ responsibility to abort the transaction + PK_CUR_TASK_LOCK; + if(current->pk->status & PK_TR_ABORT_PENDING) { + PK_CUR_TASK_UNLOCK; + dbg("calling __do_abort on %d", current->pid); + err = __do_abort(current, 1); + goto out; + } + current->pk->status &= ~(PK_TR_SYS_INPROGRESS); + PK_CUR_TASK_UNLOCK; +out: + force_successful_syscall_return(); + return err; +} + +//////////////////////////////////////////////////////////////////////////////// +/// The sys_pk_alloc() system call +//////////////////////////////////////////////////////////////////////////////// +asmlinkage long sys_pk_alloc(unsigned long pages) +{ + long err; + struct pk_share *share; + struct pk_task_share *task_share; + struct pk_basic *insert; + + // Initialize the transaction and/or change its status + err = setup_transaction(PK_TR_SYS_INPROGRESS); + if(err == -ENOMEM || err == -EABRT) + goto out; + task_share = init_pk_task_share(); + if(!task_share) { + err = -ENOMEM; + goto reset; + } + + share = __alloc_share_regions(pages); + if(!share) { + dbg(); + err = -ENOMEM; + kmem_cache_free(pk_task_share_cachep, task_share); + goto reset; + } + + task_share->share = share; + task_share->status = PK_PROC_ALLOC | PK_FL_WRLCK; + share->status = PK_FL_WRLCK; + err = mprotect_for_task(current, share->start, pages * PAGE_SIZE, + (PROT_READ | PROT_WRITE)); + if(err < 0) { + BUG(); + err = -EMPROTECT; + insert = init_pk_basic(); + // Release the share region to the free list + PK_LOCK; + PK_SHARE_LOCK(share); + if(__release_share_region(share, insert) < 0) + kmem_cache_free(pk_basic_cachep, insert); + PK_SHARE_UNLOCK(share); + PK_UNLOCK; + // Free memory allocated to share + kmem_cache_free(pk_share_cachep, share); + kmem_cache_free(pk_task_share_cachep, task_share); + goto reset; + } + + err = share->start; + // Now that memory protection has been setup + // - Link up this task_share node with the task's share_head list + // - Link up this task_share node with region's task_head list + // - Increment the refcnt of the region, no one _can_ possibly + // use the region now, so don't bother holding the share->lock + // NOTE: There is no point in holding the share->lock as there + // can be no contention for the lock + list_add(&task_share->list, ¤t->pk->share_head); + list_add_tail(&task_share->list_task, &share->task_head); + share->refcnt++; + + // Add the share region to the global shareable region list + PK_LOCK; + list_add(&share->list, &pk_shareHEAD); + PK_UNLOCK; + + // Did someone abort us, while we were executing this call? + // If so, then it is _our_ responsibility to abort the transaction + PK_CUR_TASK_LOCK; + if(current->pk->status & PK_TR_ABORT_PENDING) { + PK_CUR_TASK_UNLOCK; + dbg("calling __do_abort on %d", current->pid); + err = __do_abort(current, 1); + goto out; + } + + current->pk->status &= ~(PK_TR_SYS_INPROGRESS); + PK_CUR_TASK_UNLOCK; + goto out; +reset: + PK_CUR_TASK_LOCK; + current->pk->status &= ~(PK_TR_SYS_INPROGRESS); + PK_CUR_TASK_UNLOCK; +out: + force_successful_syscall_return(); + return err; +} + +//////////////////////////////////////////////////////////////////////////////// +/// sys_pk_get() +//////////////////////////////////////////////////////////////////////////////// +asmlinkage long sys_pk_get(unsigned long start, unsigned long flag) +{ + long err; + unsigned long prot; + struct pk_share *share; + struct pk_task_share *task_share; + + // Check if the parameters to the syscall are correct + if(flag & ~(PK_FL_WRLCK | PK_FL_RDLCK)) { + dbg(); + err = -EINVAL; + goto out; + } + + // Initialize the transaction and/or change its status + err = setup_transaction(PK_TR_SYS_INPROGRESS); + if(err == -ENOMEM || err == -EABRT) { + dbg(); + goto out; + } + + // First off, search the shareHEAD list, to see if the region + // exists in the first place; if it does exist, then we increment + // the share->refcnt of the region (IMPORTANT) + share = search_shareHEAD(start); + if(!share) { + dbg(); + err = -ENOEXIST; + goto reset; + } + + // The region does exist, now we check to make sure we haven't + // already got, free-ed, or alloced the same region in the + // current transaction + // Case #1: We have alloced this region; If a request for WRLCK was + // made then the request is ignored. If a request for RDLCK is made then + // we return with an error stating ENODGRADE (no downgrading lock!) + // Case #2: We have already free-ed this region; signal error because + // it makes _no_ sense to obtain a RDLCK or WRLCK on the region + // Case #3a: We have already got WRLCK on this region; Then the cases + // are identical to case #1 + // Case #3b: We already got RDLCK on this region; If a request for + // WRLCK is made then return ENUGRADE (no upgrading lock!); If a request + // for RDLCK is made then the request is ignored + task_share = search_share_head(share); + if(task_share) { + dbg(); + if(task_share->status & PK_PROC_ALLOC + || task_share->status & PK_PROC_GOT_WR) { + if(flag & PK_FL_WRLCK) { + dbg(); + err = share->pages; + goto dec; + } + else { + dbg(); + err = -ENODGRADE; + goto dec; + } + } + else if(task_share->status & PK_PROC_FREE) { + dbg(); + err = -EDELETED; + goto dec; + } + else if(task_share->status & PK_PROC_GOT_RD) { + if(flag & PK_FL_WRLCK) { + dbg(); + err = -ENOUGRADE; + goto dec; + } else { + dbg(); + err = share->pages; + goto dec; + } + } + } + + prot = PROT_READ; + // Initialize an object of the task_share struct, to link with share obj + task_share = init_pk_task_share(); + if(!task_share) { + dbg(); + err = -ENOMEM; + goto dec; + } + task_share->share = share; + if(flag & PK_FL_WRLCK) { + dbg(); + prot |= (PROT_WRITE); + task_share->status = PK_PROC_GOT_WR; + } + else { + dbg(); + task_share->status = PK_PROC_GOT_RD; + } + + // Advance the wait queue for the shareable region. No one should be + // allowed to hold a lock on a region for ever + err = __advance_queue(task_share, flag); + if(err < 0) { + dbg(); + goto release; + } + + // Call mprotect to protect the region + err = mprotect_for_task(current, share->start, share->pages * PAGE_SIZE, + prot); + if(err < 0) { + BUG(); + err = -EMPROTECT; + goto release; + } + + // Link up this task_share node with the task's share_head list + list_add(&task_share->list, ¤t->pk->share_head); + + // Did someone abort us, while we were executing this call? + // If so, then it is _our_ responsibility to abort the transaction + PK_CUR_TASK_LOCK; + if(current->pk->status & PK_TR_ABORT_PENDING) { + PK_CUR_TASK_UNLOCK; + dbg("calling __do_abort on %d", current->pid); + err = __do_abort(current, 1); + goto out; + } + current->pk->status &= ~(PK_TR_SYS_INPROGRESS); + PK_CUR_TASK_UNLOCK; + err = share->pages; + goto out; + +release: + kmem_cache_free(pk_task_share_cachep, task_share); +dec: + __dec_share_refcnt(share); +reset: + PK_CUR_TASK_LOCK; + current->pk->status &= ~(PK_TR_SYS_INPROGRESS); + PK_CUR_TASK_UNLOCK; +out: + force_successful_syscall_return(); + return err; +} + +//////////////////////////////////////////////////////////////////////////////// +/// sys_pk_free_log() +//////////////////////////////////////////////////////////////////////////////// +asmlinkage long sys_pk_free_log(unsigned long start) +{ + long err; + struct pk_basic *remove; + + // Success only if the status is PK_TR_STOPPED + err = -ENOTSTOPPED; + if(current->pk) { + dbg(); + PK_CUR_TASK_LOCK; + if(current->pk->status & PK_TR_STOPPED) + err = 0; + PK_CUR_TASK_UNLOCK; + } + if(err < 0) + goto out; + + // Check if a log region with log->start = start exists + // in the processes logs_head list + remove = search_logs_head(start); + if(!remove) { + dbg(); + err = -ENOEXIST; + goto out; + } + + // Remove the log region from the task's logs_head list + list_del(&remove->list); + + // Call mprotect to remove the physical memory protection + err = mprotect_for_task(current, remove->start, + remove->pages * PAGE_SIZE, PROT_NONE); + if(err < 0) { + BUG(); + err = -EMPROTECT; + goto out; + } + + // Now, we release the log region back into the free list + PK_LOCK; + __insert_into_freelist(remove); + PK_UNLOCK; + +out: + force_successful_syscall_return(); + return err; +} + +//////////////////////////////////////////////////////////////////////////////// +/// sys_pk_free() +//////////////////////////////////////////////////////////////////////////////// +asmlinkage long sys_pk_free(unsigned long start) +{ + long err; + struct pk_share *share; + struct pk_task_share *task_share; + + // Initialize the transaction and/or change its status + err = setup_transaction(PK_TR_SYS_INPROGRESS); + if(err == -ENOMEM || err == -EABRT) { + dbg(); + goto out; + } + + // First off, search the shareHEAD list, to see if the region + // exists in the first place; if it does exist, then we increment + // the share->refcnt of the region (IMPORTANT) + share = search_shareHEAD(start); + if(!share) { + dbg(); + err = -ENOEXIST; + goto reset; + } + + // If the region is found in the sharedH list, then we come up with the + // following cases + // Case #1: If the region has already been alloced, then we set the + // status of the shareable region to |= PK_ST_DEL + // Case #2: If the region has already been free-ed, then we return 0 + // Case #3a: If the region has already been got (WRLCK) then it is the + // same situation as #1 + // Case #3b: If the region has already been got (RDLCK) then we deny + // the operation + // FIXME? We never do an mprotect() to remove any protection that we + // have on an already alloced/got-wr region. + task_share = search_share_head(share); + if(task_share) { + dbg(); + if(task_share->status & PK_PROC_ALLOC + || task_share->status & PK_PROC_GOT_WR) { + dbg(); + task_share->status |= PK_PROC_FREE; + PK_SHARE_LOCK(task_share->share); + share->status |= PK_ST_DEL; + PK_SHARE_UNLOCK(task_share->share); + err = 0; + goto dec; + } + else if(task_share->status & PK_PROC_FREE) { + dbg(); + err = 0; + goto dec; + } + else if(task_share->status & PK_PROC_GOT_RD) { + dbg(); + err = -EDENIED; + goto dec; + } + } + + // Initialize an object of the task_share struct, to link with share obj + task_share = init_pk_task_share(); + if(!task_share) { + dbg(); + err = -ENOMEM; + goto dec; + } + task_share->share = share; + task_share->status = PK_PROC_FREE; + + // Advance the wait queue for the shareable region. No one should be + // allowed to hold a lock on a region for ever + err = __advance_queue(task_share, PK_FL_WRLCK); + if(err < 0) { + dbg(); + goto release; + } + + // Mark the region as deleted + PK_SHARE_LOCK(task_share->share); + share->status |= PK_ST_DEL; + PK_SHARE_UNLOCK(task_share->share); + + // Link up this task_share node with the task's share_head list + list_add(&task_share->list, ¤t->pk->share_head); + + // Did someone abort us, while we were executing this call? + // If so, then it is _our_ responsibility to abort the transaction + PK_CUR_TASK_LOCK; + if(current->pk->status & PK_TR_ABORT_PENDING) { + PK_CUR_TASK_UNLOCK; + dbg("calling __do_abort on %d", current->pid); + err = __do_abort(current, 1); + goto out; + } + + current->pk->status &= ~(PK_TR_SYS_INPROGRESS); + PK_CUR_TASK_UNLOCK; + goto out; + +release: + kmem_cache_free(pk_task_share_cachep, task_share); +dec: + __dec_share_refcnt(share); +reset: + PK_CUR_TASK_LOCK; + current->pk->status &= ~(PK_TR_SYS_INPROGRESS); + PK_CUR_TASK_UNLOCK; +out: + force_successful_syscall_return(); + return err; +} + +//////////////////////////////////////////////////////////////////////////////// +/// sys_pk_end call +//////////////////////////////////////////////////////////////////////////////// +asmlinkage long sys_pk_end(void) +{ + long err; + struct list_head *idx; + struct pk_task_share *task_share; + err = 0; + + // If the transaction has been stopped, then the call to pk_end is as + // good as a NOP. If the transaction has been aborted then set the + // status to stopped. If the transaction is currently being aborted, + // then userspace would need to call pk_end again. Else set the + // transaction to PK_SYS_IN_PROGRESS + if(current->pk) { + dbg(); + PK_CUR_TASK_LOCK; + if(current->pk->status & PK_TR_STOPPED) + goto unlock; + else if(current->pk->status & PK_TR_ABORTED) { + dbg(); + current->pk->status |= PK_TR_STOPPED; + err = -EABRT; + goto unlock; + } + else if(current->pk->status & PK_TR_ABORT_INPROGRESS) { + dbg(); + err = -EABRTONGOING; + goto unlock; + } + else if(current->pk->status & PK_TR_STARTED) { + dbg(); + current->pk->status |= PK_TR_SYS_INPROGRESS; + } + } + else { + dbg(); + goto out; + } + PK_CUR_TASK_UNLOCK; + + // 0. Delete the task_share node from the processes + // share_head list + // 1. Now, we need to unmap the memory regions point to by the + // task_share elements in the task's share_head list NOTE: If + // PK_PROC_FREE only then don't call mprotect(). All other + // possible combinations call mprotect() to remove protection If the + // region has been marked for deletion, then make sure it + // does get deleted. PK_ST_DEL_FINAL is needed to make sure the region + // is indeed released + // 2. We delete the task_share node from the share region's task_head + // list + // 3. We also decrement the refcnts of all the shared + // regions we have task_share->share + // 4. Free up the task_share datastructure + while(!list_empty(¤t->pk->share_head)) { + unsigned long start, pages; + dbg(); + idx = current->pk->share_head.next; + // 0. + list_del(idx); + task_share = list_entry(idx, struct pk_task_share, list); + // 1. + PK_SHARE_LOCK(task_share->share); + start = task_share->share->start; + pages = task_share->share->pages; + if(task_share->status & PK_PROC_FREE) + task_share->share->status |= PK_ST_DEL_FINAL; + PK_SHARE_UNLOCK(task_share->share); + if(!((task_share->status & PK_PROC_FREE) + && !(task_share->status & PK_PROC_GOT_WR) + && !(task_share->status & PK_PROC_ALLOC))) { + dbg("undoing mprotect at %lu, pages %lu", start, pages); + err = mprotect_for_task(current, start, pages + * PAGE_SIZE, PROT_NONE); + if(err < 0) { + BUG(); + err = -EMPROTECT; + goto out; + } + } + // 2. + PK_LOCK; + PK_SHARE_LOCK(task_share->share); + list_del(&task_share->list_task); + PK_SHARE_UNLOCK(task_share->share); + PK_UNLOCK; + // 3. + __dec_share_refcnt(task_share->share); + // 4. + kmem_cache_free(pk_task_share_cachep, task_share); + } + + // Set the status of the transaction to PK_TR_STOPPED + // NOTE: If someone aborts us during this call, we ignore it and still go + // ahead with the pk_end + PK_CUR_TASK_LOCK; + current->pk->status = PK_TR_STOPPED; + +unlock: + PK_CUR_TASK_UNLOCK; +out: + force_successful_syscall_return(); + return err; +}