|
|
CS385 Operating System Concepts and Design
|
|
|
|
Lecture 1: Introduction
|
|
|
|
Purpose, function and design of an OS
|
|
|
|
A program with out an OS
|
|
|
|
|
#include <avr/io.h> #include <util/delay.h>
int main(void) { srandom(123); DDRB = 0x7; // assign first three pins as outputs
while(1) { PORTB = (PORTB & ~0x7) | (random() & 0x7); _delay_ms(250); } }
|
|
|
|
composing programs
|
|
|
|
user selectability of programs through some interface
|
|
|
|
add/remove programs dynamically
|
|
|
|
provides a consistent view for the application programmer
|
|
|
|
convenient API (application programming interfaces)
|
|
|
|
run multiple programs concurrently
|
|
|
|
protection between processes
|
|
|
|
memory
|
|
|
|
and other state
|
|
|
|
exclusive access to resources
|
|
|
|
communication facilities
|
|
|
|
inter-process communication
|
|
|
|
networking
|
|
|
|
protection between users
|
|
|
|
access rights
|
|
|
|
Lecture 2: First steps in C
|
|
|
|
appeared first in 1972
|
|
|
|
"high-level" assembly
|
|
|
|
minimalist language
|
|
|
|
no garbage collection
|
|
|
|
no runtime safety
|
|
|
|
no polymorphism
|
|
|
|
pointer arithmetic
|
|
|
|
basically portable assembly, with some syntactic sugar
|
|
|
|
Lecture 3: printf and types in C
|
|
|
|
printf format specifiers
|
|
|
|
%d - signed integer
|
|
|
|
%u - unsigned integer
|
|
|
|
%x - hexadecimal
|
|
|
|
%ul - long unsigned integer (64-bits)
|
|
|
|
%c - character
|
|
|
|
%s - string
|
|
|
|
%f - float
|
|
|
|
%lf - double floating point
|
|
|
|
\n
|
|
|
|
\t
|
|
|
|
types
|
|
|
|
char - 1 byte
|
|
|
|
unsigned char - 1 byte
|
|
|
|
signed char - 1 byte
|
|
|
|
short - 16 bit
|
|
|
|
int - 32 bits
|
|
|
|
unsigned int - 32 bits
|
|
|
|
long - 64 bits
|
|
|
|
floats / doubles
|
|
|
|
all pointers = 8/16/32/64 bits (depending on machine)
|
|
|
|
struct {}
|
|
|
|
Lecture 4: function calls and the stack
|
|
|
|
Homework 1 was due right now
|
|
|
|
svn://bits.cs.uic.edu/cs385/notes/solutions/hw1
|
|
|
|
int fib(int val) { return fib(val-1)+fib(val-2);}
|
|
|
|
main() {}
|
|
|
|
Application Binary Interface. System V
|
|
|
|
f(i=9)
|
|
|
|
local variables
|
|
|
|
previous frame pointer
|
|
|
|
f(i=10)
|
|
|
|
return address
|
|
|
|
return value
|
|
|
|
parameter values
|
|
|
|
main
|
|
|
|
a frame consists of
|
|
|
|
local variables
|
|
|
|
previous frame pointer
|
|
|
|
return address
|
|
|
|
return value
|
|
|
|
parameter values
|
|
|
|
Lecture 6: parameter passing and memory management
|
|
|
|
homework2
|
|
|
|
allocation of variables
|
|
|
|
globals
|
|
|
|
at compile time
|
|
|
|
locals (stack)
|
|
|
|
heap
|
|
|
|
allocated at runtime using malloc().
|
|
|
|
lifetime of variables
|
|
|
|
globals - live forever
|
|
|
|
locals (stack)- only during function execution (and subroutines)
|
|
|
|
heap - until free(), or program exit.
|
|
|
|
Lecture 7: mostly hw3
|
|
|
|
I/O <stdio.h>
|
|
|
|
buffered files
|
|
|
|
higher-level functions
|
|
|
|
fopen
|
|
|
|
fread / fwrite
|
|
|
|
fseek
|
|
|
|
fflush
|
|
|
|
fsync
|
|
|
|
fclose
|
|
|
|
fileno
|
|
|
|
raw file IO
|
|
|
|
open
|
|
|
|
read / write
|
|
|
|
lseek
|
|
|
|
close
|
|
|
|
file management
|
|
|
|
Lecture 8: a bit of subversion, and linking
|
|
|
|
svn keep a central repository of all committed versions
|
|
|
|
larger C programs
|
|
|
|
.h - header files
|
|
|
|
int fact(int i);
|
|
|
|
.c - implementation code
|
|
|
|
.o - object files
|
|
|
|
combination of a bunch of .c files (and some .h)
|
|
|
|
.a - libraries
|
|
|
|
.so - dynamic libraries
|
|
|
|
executables
|
|
|
|
Lecture 9: linking and maybe loading
|
|
|
|
Lecture 10: hw4, and dynamic linking/loading
|
|
|
|
Lecture 11: scripts, processes/threads, input/output
|
|
|
|
Process representation
|
|
|
|
register contents
|
|
|
|
stack
|
|
|
|
memory - heap
|
|
|
|
memory manager configuration
|
|
|
|
file descriptors - stdin, stdout, stderr, open("filename")
|
|
|
|
permissions
|
|
|
|
executable image
|
|
|
|
Threads (within a process)
|
|
|
|
register contents
|
|
|
|
stack
|
|
|
|
Creating a process
|
|
|
|
fork()
|
|
|
|
a call to clone with certain settings: copy everything
|
|
|
|
clone()
|
|
|
|
make a thread by copying only register contents and stack
|
|
|
|
In Linux: a task is either/both process and thread
|
|
|
|
Input output manipulation
|
|
|
|
redirection
|
|
|
|
piping - communication between processes, through file descriptors
|
|
|
|
ls | sort
|
|
|
|
pipe()
|
|
|
|
start up process
|
|
|
|
create pipe(pipein, pipeout)
|
|
|
|
fork()
|
|
|
|
if(parent)
|
|
|
|
{
|
|
|
|
close(pipein);
|
|
|
|
redirect(stdout=pipeout)
|
|
|
|
exec(ls)
|
|
|
|
}
|
|
|
|
if(child)
|
|
|
|
{
|
|
|
|
close(pipeout);
|
|
|
|
redirect(stdin=pipein)
|
|
|
|
exec(sort);
|
|
|
|
}
|
|
|
|
Lecture
|
|
|
|
Lecture 12: a first visit to kernel mode
|
|
|
|
user mode / userland / protected mode / ring 3
|
|
|
|
restrict memory access
|
|
|
|
can't change priviledge ring
|
|
|
|
can't change virtual memory layout
|
|
|
|
can't change interrupts
|
|
|
|
kernel mode / kernel / privileged mode / ring 0
|
|
|
|
Lecture 13: synchronization primitives
|
|
|
|
reading: chapters 4, 6 except 6.3, 6.9, 7.1-7.4
|
|
|
|
Lecture 14: pthreads
|
|
|
|
Lecture 15: deadlock, priority inversion
|
|
|
|
mutual exclusion
|
|
|
|
hold and wait
|
|
|
|
reserve all ahead of time
|
|
|
|
release and re-lock
|
|
|
|
no preemption
|
|
|
|
circular dependency
|
|
|
|
enforce ordering of locks
|
|
|
|
monitor
|
|
|
|
object
|
|
|
|
has a number of methods
|
|
|
|
encapsulates some state
|
|
|
|
pathfinder
|
|
|
|
top prio: bc_sched
|
|
|
|
next: bc_dist
|
|
|
|
// other processes
|
|
|
|
low: ASI/MET
|
|
|
|
priority inversion
|
|
|
|
lower level processes able to delay higher level processes
|
|
|
|
priority inheritance
|
|
|
|
when higher prio process waits for a lock,
|
|
|
|
bump up prio of lower prio process holding lock
|
|
|
|
Lecture 16: more on "multitasking", and hw6
|
|
|
|
cooperative multitasking
|
|
|
|
processes / tasks decide when to "yield" the processor
|
|
|
|
pre-emptive multitasking
|
|
|
|
timer going off at regular intervals - 1 to 10 ms
|
|
|
|
task switching howto
|
|
|
|
new_task() to create task
|
|
|
|
allocate stack space
|
|
|
|
copy contents of old stack to new stack
|
|
|
|
push register contents onto new stack
|
|
|
|
except ESP = point to new stack
|
|
|
|
add task to list of tasks
|
|
|
|
yield() function releases CPU to other process
|
|
|
|
store register contents on calling stack
|
|
|
|
pick next task
|
|
|
|
restore registers from next tasks stack
|
|
|
|
return
|
|
|
|
preemptive multitasking
|
|
|
|
timer() - called every N milliseconds
|
|
|
|
same as yield
|
|
|
|
return from interrupt
|
|
|
|
two kinds of tasks
|
|
|
|
CPU-bound tasks
|
|
|
|
I/O-bound tasks
|
|
|
|
run queue, runqueue
|
|
|
|
active tasks
|
|
|
|
still have some CPU allotment
|
|
|
|
expired tasks
|
|
|
|
spent their CPU allotment
|
|
|
|
wait queue(s)
|
|
|
|
waiting for an I/O operation
|
|
|
|
Lecture 17: CPU scheduling and midterm discussion
|
|
|
|
scheduling criteria (quality metrics)
|
|
|
|
throughput
|
|
|
|
finish as many processes as possible
|
|
|
|
efficiency
|
|
|
|
make sure CPU is busy
|
|
|
|
don't spend too much time on scheduling
|
|
|
|
constant factors
|
|
|
|
low complexity (tasks, processors)
|
|
|
|
fairness
|
|
|
|
equal amount of time between processes
|
|
|
|
no way to cheat your way to more CPU
|
|
|
|
no starvation
|
|
|
|
latency
|
|
|
|
real-time systems (guarantees)
|
|
|
|
interactive systems
|
|
|
|
low response time variance (jitter)
|
|
|
|
way to prioritize processes
|
|
|
|
SMP, SMT processors
|
|
|
|
Midterm
|
|
|
|
C programming puzzles
|
|
|
|
pointers
|
|
|
|
arrays
|
|
|
|
casting
|
|
|
|
malloc / free
|
|
|
|
preprocessor
|
|
|
|
Conceptual OS / language questions
|
|
|
|
function calls
|
|
|
|
system calls
|
|
|
|
linking / loading
|
|
|
|
multi-tasking
|
|
|
|
Synchronization puzzles
|
|
|
|
mutex / barrier / condition variables (pthread)
|
|
|
|
race conditions
|
|
|
|
deadlocks
|
|
|
|
livelocks (?)
|
|
|
|
Process management puzzles and concepts
|
|
|
|
fork
|
|
|
|
exec
|
|
|
|
dup2
|
|
|
|
difference between a thread and a process
|
|
|
|
what's shared, what isn't
|
|
|
|
Lecture 18: schedulers and queueing
|
|
|
|
arrival rates
|
|
|
|
service rate
|
|
|
|
wait time
|
|
|
|
queue length
|
|
|
|
Little's law / rule
|
|
|
|
queue length = arrival rate * wait time
|
|
|
|
Lecture 19: Linux CPU schedulers
|
|
|
|
Linux 2.4
|
|
|
|
epoch - time interval in which each process gets to spend their allocation
|
|
|
|
run queue, runqueue
|
|
|
|
active tasks
|
|
|
|
still have some CPU allotment
|
|
|
|
expired tasks
|
|
|
|
spent their CPU allotment
|
|
|
|
wait queue(s)
|
|
|
|
waiting for an I/O operation
|
|
|
|
schedule()
|
|
|
|
iterate through processes : O(N)
|
|
|
|
find most urgent process
|
|
|
|
priority = time_left + nice level (static priority) + 1000*real_time
|
|
|
|
new_epoch()
|
|
|
|
counter = counter/2 + NICE_TO_TICKS(nice_level)
|
|
|
|
coarse grained scheduling interval
|
|
|
|
Linux 2.6 O(1) scheduler
|
|
|
|
priority array
|
|
|
|
prio 1: head->next->next
|
|
|
|
prio 2: head->next
|
|
|
|
prio 3: nil
|
|
|
|
prio 4: head
|
|
|
|
...
|
|
|
|
140
|
|
|
|
BSF: bit scan forward
|
|
|
|
O(1) scheduling
|
|
|
|
upon expiry, before adding to expired array, compute new allotment
|
|
|
|
priority = static_priority + interactivity (+/- 5)
|
|
|
|
heuristics for interactivity I/O bound, CPU bound
|
|
|
|
cpu_time = minimum_time + static_priority * scaling factor
|
|
|
|
when an interactive task expires, reinsert it into the active array, unless someone's starving
|
|
|
|
Linux 2.6: CFS scheduler (Completely Fair Scheduler)
|
|
|
|
every process associated with a "virtual runtime"
|
|
|
|
modified by your priority
|
|
|
|
run the process with least virtual runtime
|
|
|
|
red-black tree
|
|
|
|
at most 2x difference between paths
|
|
|
|
logarithmic time insert / delete / find_min
|
|
|
|
schedule():
|
|
|
|
find minimum vruntime process
|
|
|
|
run it for a bit
|
|
|
|
update the vruntime of process vruntime = actual_runtime*process_weight
|
|
|
|
stick it back in the tree / heap
|
|
|
|
repeat
|
|
|
|
Lecture 20: boot: from power-on to first process
|
|
|
|
Lecture 21: first instruction to first user process
|
|
|
|
Lecture 22: inter-process communication and memory management
|
|
|
|
memory hierarchy
|
|
|
|
removable media - tape drives
|
|
|
|
no automatic access
|
|
|
|
hard drives
|
|
|
|
300 MBps - really fast
|
|
|
|
seek time is a problem 4-12 ms latency
|
|
|
|
sector = 512 bytes
|
|
|
|
RAM / SDRAM
|
|
|
|
DDR3 1333 8Gb, 8-8-8-24, 260 contacts, 64 bits at a time
|
|
|
|
40 ns
|
|
|
|
10 GB/s
|
|
|
|
shared L3 cache
|
|
|
|
12 Mb 40 CPU cycles
|
|
|
|
L2 cache
|
|
|
|
256 Kb 10 CPU cycles
|
|
|
|
L1 cache
|
|
|
|
32 Kb
|
|
|
|
3 CPU cycles
|
|
|
|
registers
|
|
|
|
10-32
|
|
|
|
1 CPU
|
|
|
|
Virtual memory
|
|
|
|
process isolation
|
|
|
|
context independent
|
|
|
|
swap to disk
|
|
|
|
use "memory" accesses to access non-RAM data
|
|
|
|
Lecture 23: Virtual Memory
|
|
|
|
TLB - Translation Lookaside Buffer
|
|
|
|
16-2048 entries, most recently used page table entries
|
|
|
|
i7 (nehalem)
|
|
|
|
L1 64 entries for data
|
|
|
|
L1 128 entries for code
|
|
|
|
L2 512 entries shared
|
|
|
|
Process dereferences logical address L
|
|
|
|
segmentation turns into linear address K
|
|
|
|
look up K in the TLB (cache)
|
|
|
|
if we have it, return value of TLB(K)
|
|
|
|
if we don't have it
|
|
|
|
look up entry in the page table (process specific)
|
|
|
|
potentially multiple lookups
|
|
|
|
return TLB(K)
|
|
|
|
TLB(K)
|
|
|
|
if entry is valid - replace linear address page with physical address page
|
|
|
|
if entry is invalid
|
|
|
|
CPU triggers a page fault
|
|
|
|
if process hasn't allocated this linear address (virtual memory)
|
|
|
|
SEGFAULT the process
|
|
|
|
if process has never touched this allocated address before
|
|
|
|
find some free memory
|
|
|
|
update the process' page table entry
|
|
|
|
return
|
|
|
|
if this memory was paged out to disk
|
|
|
|
find some free memory
|
|
|
|
read in page from disk
|
|
|
|
update the process' page table entry
|
|
|
|
return
|
|
|
|
check the L1 cache
|
|
|
|
check the L2 cache
|
|
|
|
check the L3 cache
|
|
|
|
get from main memory
|
|
|
|
getting "the next byte"
|
|
|
|
page entry is already in TLB cache (L1)
|
|
|
|
physical address known
|
|
|
|
cache lines are usually 64 bytes
|
|
|
|
next byte is already in the L1 cache
|
|
|
|
shm_open()
|
|
|
|
mmap(filehandle, address, size)
|
|
|
|
Lecture 25: caching and performance
|
|
|
|
read: 8.4-8.7 (paging), all of ch 9 (virtual memory)
|
|
|
|
definition of "virtual memory" in the book is a bit outdated
|
|
|
|
virtual address space: 2^64 bytes
|
|
|
|
virtual memory = physical memory + disk swap
|
|
|
|
physical memory = amount installed
|
|
|
|
Lecture 26: paging
|
|
|
|
page replacement
|
|
|
|
recently allocated and wrote to
|
|
|
|
recently paged in and written to
|
|
|
|
recently paged in from disk
|
|
|
|
mmaped file
|
|
|
|
swap contents
|
|
|
|
time consumed
|
|
|
|
time to write dirty page to disk
|
|
|
|
time read new page from disk
|
|
|
|
frame allocation
|
|
|
|
mov (dx):(ax), (ex):(bx)
|
|
|
|
local allocation
|
|
|
|
each process own some frames
|
|
|
|
global allocation
|
|
|
|
active / inactive pages
|
|
|
|
any frame can be taken at any time
|
|
|
|
linux memory manager
|
|
|
|
active list
|
|
|
|
pages recently accessed (more than once)
|
|
|
|
inactive list
|
|
|
|
pages that weren't
|
|
|
|
malloc vs. kernel allocation
|
|
|
|
need flexibility
|
|
|
|
needs to be fast
|
|
|
|
needs to be space efficient
|
|
|
|
needs to have local memory access patterns
|
|
|
|
backed by kernel allocator
|
|
|
|
can request almost unlimited contiguous space
|
|
|
|
may not need a system call
|
|
|
|
not constrained to page size allocations
|
|
|
|
Lecture 29: memory management wrap-up
|
|
|
|
Lecture 30: intro to storage management
|
|
|
|
block devices
|
|
|
|
blocks 512-4096 bytes
|
|
|
|
disk / flash / memory
|
|
|
|
block N -> platter/head P, cylinder, sector
|
|
|
|
file system
|
|
|
|
directories
|
|
|
|
files
|
|
|
|
backed by inodes (filesystem object)
|
|
|
|
links
|
|
|
|
soft-links
|
|
|
|
mounts / disks
|
|
|
|
virtual file system
|
|
|
|
actuals file systems mounted
|
|
|
|
Lecture 31: Virtual file system
|
|
|
|
Virtual File System
|
|
|
|
struct inode* {}
|
|
|
|
struct dirent {}
|
|
|
|
Lecture 32: File system datastructures
|
|
|
|
directory datastructure on disk
|
|
|
|
directory entries
|
|
|
|
inode number
|
|
|
|
name
|
|
|
|
flat file
|
|
|
|
fast insert O(1)
|
|
|
|
slow lookup O(N)
|
|
|
|
hashtable
|
|
|
|
fast lookup and insert O(1)
|
|
|
|
a little messy
|
|
|
|
tree
|
|
|
|
insertion O(log N)
|
|
|
|
lookup O(log N)
|
|
|
|
free list
|
|
|
|
bit vector
|
|
|
|
linked list
|
|
|
|
grouped linked list
|
|
|
|
multiple entries per block
|
|
|
|
in-core/memory representation
|
|
|
|
struct super_block
|
|
|
|
bunch of settings for the filesystem implementation
|
|
|
|
root directory
|
|
|
|
function pointers
|
|
|
|
struct inode
|
|
|
|
bunch attributes, size, permissions
|
|
|
|
block numbers - file content blocks
|
|
|
|
function pointers
|
|
|
|
struct dentry
|
|
|
|
name
|
|
|
|
inode number
|
|
|
|
parent dentry
|
|
|
|
function pointers
|
|
|
|
struct file
|
|
|
|
struct inode*
|
|
|
|
offset - where are reading / writing at the moment
|
|
|
|
attributes
|
|
|
|
Lecture 33: filesystem consistency
|
|
|
|
failures:
|
|
|
|
power failure
|
|
|
|
battery backup
|
|
|
|
diesel backup
|
|
|
|
disk failure
|
|
|
|
either the whole thing
|
|
|
|
or perhaps the odd block
|
|
|
|
system failure
|
|
|
|
motherboard might fry
|
|
|
|
OS / filesystem failure
|
|
|
|
bugs
|
|
|
|
ACID
|
|
|
|
Atomicity
|
|
|
|
all or nothing
|
|
|
|
transactions
|
|
|
|
begin
|
|
|
|
bunch of actions
|
|
|
|
in case of error - abort
|
|
|
|
commit
|
|
|
|
Consistency
|
|
|
|
any write moves file system from one consistent state to another
|
|
|
|
Independence
|
|
|
|
no concurrent access to data modified by a transaction
|
|
|
|
Durability
|
|
|
|
what's written should stay until overwritten
|
|
|
|
system failure cannot cause data loss
|
|
|
|
Journaling - WAL (Write-Ahead Logging)
|
|
|
|
first write "what you want to do"
|
|
|
|
then you do it
|
|
|
|
write "I did it"
|
|
|
|
latency issue
|
|
|
|
two seeks per write (unless we have a separate disk)
|
|
|
|
watch out for write caches
|
|
|
|
unless you can afford a battery backed up cache
|
|
|
|
RAID - Redundant Array of Inexpensive Disks
|
|
|
|
level 0
|
|
|
|
level 1
|
|
|
|
level 4
|
|
|
|
level 5
|
|
|
|
level 10 = 1+0
|
|
|
|
hardware
|
|
|
|
dedicated board
|
|
|
|
fakeraid
|
|
|
|
in BIOS - uses the CPU
|
|
|
|
software raid
|
|
|
|
in OS - uses the CPU
|
|
|
|
Lecture 34: btrees, B+trees and btrfs
|
|
|
|
external datastructure
|
|
|
|
complexity
|
|
|
|
number of disk accesses
|
|
|
|
computation
|
|
|
|
degree = block size / key size
|
|
|
|
parameter t
|
|
|
|
t and 2t
|
|
|
|
t-1 and 2t-1 keys
|
|
|
|
example
|
|
|
|
t=100
|
|
|
|
h=3
|
|
|
|
100
|
|
|
|
10,000
|
|
|
|
1,000,000
|
|
|
|
height of the tree, n nodes O(log_t n)
|
|
|
|
search complexity, computational: O(t * h)
|
|
|
|
insert / delete O(log_t n) complexity
|
|
|
|
insert / delete O(log_t n) disk seeks
|
|
|
|
Lecture 35: btrfs - b-tree filesystem
|
|
|
|
benefits of a b-tree type filesystem
|
|
|
|
no one block per inode
|
|
|
|
better packing of metadata
|
|
|
|
extensible - introduce a new key type
|
|
|
|
file_object_id, file_content_type, 0 -> {size, data}
|
|
|
|
logging
|
|
|
|
after logging the write
|
|
|
|
lazy update to disk
|
|
|
|
shadowing / copy-on-write
|
|
|
|
Lecture 36: filesystem wrap-up
|
|
|
|
"disk"
|
|
|
|
heads, cylinders and sectors
|
|
|
|
"blocks"
|
|
|
|
512-4096 bytes
|
|
|
|
preamble, bunch of bits, ECC (error correcting code)
|
|
|
|
"extents"
|
|
|
|
a.k.a. "clusters"
|
|
|
|
contiguous series of blocks
|
|
|
|
"mounting"
|
|
|
|
interpret block device according to some filesystem
|
|
|
|
presents a directory & file view to the virtual filesystem
|
|
|
|
"superblock"
|
|
|
|
block of metadata
|
|
|
|
size of the disk
|
|
|
|
amount of free space
|
|
|
|
block size
|
|
|
|
reference to the root
|
|
|
|
free space management reference
|
|
|
|
typically stored at some known location on disk
|
|
|
|
"file"
|
|
|
|
a contiguous series of bytes (blocks)
|
|
|
|
some set of permissions
|
|
|
|
potentially represented discontiguously on disk
|
|
|
|
usually space is allocated block by block (at least)
|
|
|
|
allocation methods
|
|
|
|
contiguous allocation
|
|
|
|
linked list
|
|
|
|
file allocation table (FAT)
|
|
|
|
indexed allocation
|
|
|
|
inode - index node, indirect node
|
|
|
|
direct block pointers
|
|
|
|
indirect pointers
|
|
|
|
points another block full of direct pointers
|
|
|
|
double indirect
|
|
|
|
triple indirect
|
|
|
|
btree type allocation
|
|
|
|
"directory"
|
|
|
|
list of names, file references
|
|
|
|
usually, this the only place where filenames are stored
|
|
|
|
"link"
|
|
|
|
name -> file reference mapping
|
|
|
|
only for files
|
|
|
|
just a directory entry
|
|
|
|
sometimes known as "hard link"
|
|
|
|
"softlink"
|
|
|
|
name -> name mapping
|
|
|
|
"free list"
|
|
|
|
free space management
|
|
|
|
bit array
|
|
|
|
need to scan searching for a set bit
|
|
|
|
linked list
|
|
|
|
LIFO (stack) policy (?)
|
|
|
|
"file handle"
|
|
|
|
file reference
|
|
|
|
offset
|
|
|
|
"disk cache"
|
|
|
|
RAM used to speed disk access
|
|
|
|
"buffers" in RAM reflecting "blocks" on disk
|
|
|
|
"dirty buffer" - modified buffer that hasn't yet been written to disk
|
|
|
|
not to be confused with
|
|
|
|
"pages" - virtual memory, backed by "swapped" pages, in-memory "frames"
|
|
|
|
fsync() - syncs file contents to disk
|
|
|
|
sync() - syncs all open files "sync"
|
|
|
|
disable write caching to be safe
|
|
|
|
consistency and fault tolerance
|
|
|
|
journaling file systems
|
|
|
|
recover from intermittent errors
|
|
|
|
power loss / system crash / dropped laptop
|
|
|
|
append intended update to a journal
|
|
|
|
update actual structure on disk
|
|
|
|
append update completion record
|
|
|
|
RAID
|
|
|
|
redundant array of inexpensive disks
|
|
|
|
striping
|
|
|
|
N disks: every nth block from disk 1
|
|
|
|
maintains storage capacity
|
|
|
|
mirroring
|
|
|
|
write the same block to N disks
|
|
|
|
1/N capacity
|
|
|
|
p=0.99 1-(1-p)^N
|
|
|
|
parity disk
|
|
|
|
N+1 disks
|
|
|
|
stripe across N disks
|
|
|
|
N+1th disk is parity disk (block-wise xor of contents of all disks)
|
|
|
|
Lecture 37: homework 9 discussion + hw10
|
|
|
|
security
|
|
|
|
protection
|
|
|
|
protect
|
|
|
|
from
|
|
|
|
unauthorized access
|
|
|
|
unauthorized modification
|
|
|
|
unauthorized execution
|
|
|
|
overuse
|
|
|
|
quotas
|
|
|
|
pieces
|
|
|
|
objects / resources
|
|
|
|
domain - user / group / program / roles
|
|
|
|
privileges - read/write/execute - append/delete
|
|
|
|
security
|
|
|
|
authentication user authentication´ - who are you?
|
|
|
|
message authentication
|
|
|
|
privacy / confidentiality
|
|
|
|
denial of service
|
|
|
|
ddos -
|
|
|
|
system integrity
|
|
|
|
buffer overflows
|
|
|
|
command injection
|
|
|
|
SQL injection
|
|
|
|
Lecture 38: permissions management
|
|
|
|
three groups of permissions
|
|
|
|
owner
|
|
|
|
group
|
|
|
|
others
|
|
|
|
Access Control List
|
|
|
|
Lecture 39: user and data authentication
|
|
|
|
possession of the card
|
|
|
|
ID card
|
|
|
|
biometric
|
|
|
|
home address (billing address)
|
|
|
|
signature
|
|
|
|
unknown benefit
|
|
|
|
pin code
|
|
|
|
4 digits
|
|
|
|
limited number of consecutive failed attempts
|
|
|
|
login prompt
|
|
|
|
name / password
|
|
|
|
if the password is stored safely
|
|
|
|
rate limit
|
|
|
|
storage
|
|
|
|
plaintext
|
|
|
|
make sure the file is really really safe
|
|
|
|
one way hash
|
|
|
|
H(plaintext) -> digest
|
|
|
|
preimage resistance: infeasible to compose a message that results in a given digest
|
|
|
|
second preimage resistance: given a message m, infeasible to produce m' s.t. digests are the same
|
|
|
|
collision resistance: infeasible to find two messages m1, m2 s.t. digests are the same
|
|
|
|
store H(plaintext) on disk
|
|
|
|
store salt,H(salt+plaintext) on disk
|
|
|
|
shoulder surfing
|
|
|
|
password sharing
|
|
|
|
remote login - plaintext transmission
|
|
|
|
biometrics
|
|
|
|
need to trust the reader
|
|
|
|
remote reader
|
|
|
|
easily replaced by some other software
|
|
|
|
message, H(message+secret) = message authentication code
|
|
|
|
multi-factor authentication
|
|
|
|
secret
|
|
|
|
physical device
|
|
|
|
biometric
|
|
|
|
from a local reader
|
|
|
|
Lecture 40: cryptography
|
|
|
|
caesar cipher - transposition cipher
|
|
|
|
c = m + k % q
|
|
|
|
c = 'A' + 7 % q = 'H'
|
|
|
|
monalphabetic cipher - 5-bit block cipher
|
|
|
|
n-bit cipher
|
|
|
|
2^n! 32!
|
|
|
|
1024-bit
|
|
|
|
n*2^n
|
|
|
|
1024*2^1024
|
|
|
|
cyclic groups
|
|
|
|
q = 31
|
|
|
|
k = 7
|
|
|
|
n = 0...30
|
|
|
|
s = n*k % 31
|
|
|
|
c = m * k % 31
|
|
|
|
identify k^-1 = 9
|
|
|
|
m = (m * k) *k^-1 % 31 = c * 9 % 31 =
|
|
|
|
diffie-hellman key exchange
|
|
|
|
alice and bob
|
|
|
|
x^b for large b
|
|
|
|
x^2 = x^1 * x^2
|
|
|
|
x^4 = x^2 * x^2
|
|
|
|
x^1024 =
|
|
|
|
x^1025 = x^1024 * x
|
|
|
|
b = {1,0,0,0,0,0,0,0,0,1}
|
|
|
|
ElGamal cipher
|
|
|
|
j
|
|
|
|
J = g^j mod q
|
|
|
|
J ^ b = g^jb
|
|
|
|
c = m encrypted using g^jb, B
|
|
|
|
Alice would post A on her website
|
|
|
|
B would send c (m encrypted with g^ab=A^b), B
|
|
|
|
Lecture 41: signatures
|
|
|
|
Hierarchical cryptosystem
|
|
|
|
Alice posts her key on the website
|
|
|
|
also posts a signature by a trusted third party
|
|
|
|
several root certificates (public keys) from trusted third parties
|
|
|
|
Verisign, Thawte
|
|
|
|
RSA - public key encryption and authentication / signatures
|
|
|
|
publish you public key
|
|
|
|
anyone can send you encrypted messages
|
|
|
|
sign messages, anyone can verify
|
|
|
|
certificate = public key + signature by trusted third party
|
|
|
|
Euler's Theorem
|
|
|
|
a ^ phi(n) ~= 1 (mod n)
|
|
|
|
n = p * q (p,q are large primes)
|
|
|
|
phi(n) = (p-1)(q-1)
|
|
|
|
factoring large integers is difficult
|
|
|
|
how to make a crypto out of this theorem?
|
|
|
|
m^((p-1)*(q-1)) = 1 (mod p*q)
|
|
|
|
m^(k*(p-1)*(q-1)+1) = m (mod p*q)
|
|
|
|
c = m^e (mod pq)
|
|
|
|
p = c^d (mod pq)
|
|
|
|
p = m^de = m
|
|
|
|
de = 1 + k(p-1)(q-1) = 1 mod (p-1)(q-1)
|
|
|
|
choose a random e < (p-1)(q-1)
|
|
|
|
then find d, s.t. ed = 1 mod (p-1)(q-1)
|
|
|
|
c = m^e (mod pq)
|
|
|
|
m = c^d (mod pq) = m^ed (mod pq) = m^(1+k(p-1)(q-1)) mod pq = m * m^(p-1)(q-1) mod pq = m
|
|
|
|
signatures in RSA
|
|
|
|
signature: H(m)^d
|
|
|
|
(H(m)^d)^e = H(m)^de = H(m)
|
|
|
|
Man in the middle
|
|
|
|
intercepting communication
|
|
|
|
invalid certificates
|
|
|
|
incorrect binaries (not actually verifying correctly)
|
|
|
|
vulnerable binaries
|
|
|
|
Keeping the key safe
|
|
|
|
Lecture 42: code vulnerabilities - buffer overflows and related attacks
|
|
|
|
protection - managing permissions
|
|
|
|
user authentication - making sure users are who they say they are
|
|
|
|
software authentication
|
|
|
|
who compiled this binary
|
|
|
|
who vouches for its correctness
|
|
|
|
what compiler was used
|
|
|
|
exploits or vulnerabilities in binaries
|
|
|
|
buffer overflow
|
|
|
|
input
|
|
|
|
return address
|
|
|
|
previous stack frame ptr
|
|
|
|
amount
|
|
|
|
name[8]
|
|
|
|
return-to-libc
|
|
|
|
execve() / system()
|
|
|
|
address of system() in return address field
|
|
|
|
////////////////////////bin/sh
|
|
|
|
NOP
|
|
|
|
write a correct program: DO NOT USE
|
|
|
|
strcpy
|
|
|
|
strncpy
|
|
|
|
scanf
|
|
|
|
careful format strings %s
|
|
|
|
gets
|
|
|
|
stack canaries
|
|
|
|
input
|
|
|
|
return address
|
|
|
|
canary (secret number)
|
|
|
|
previous stack frame ptr
|
|
|
|
amount
|
|
|
|
name[8]
|
|
|
|
address space randomization moves:
|
|
|
|
the code
|
|
|
|
top of the stack
|
|
|
|
the beginning of the mmap zone()
|
|
|
|
the beginning of the heap
|
|
|
|
NX flag - in AMD64
|
|
|
|
Lecture 43: code vulnerabilities - viruses, worms, shell/SQL injection
|
|
|
|
common technique for protecting servers
|
|
|
|
create a new user - with limited permissions
|
|
|
|
qmail
|
|
|
|
viruses
|
|
|
|
scans for "hosts"
|
|
|
|
executables
|
|
|
|
boot sector
|
|
|
|
autorun.exe
|
|
|
|
worms
|
|
|
|
compartmentalizing functionality / permissions
|
|
|
|
propagates on its own
|
|
|
|
botnets
|
|
|
|
attacks (DDoS)
|
|
|
|
spam
|
|
|
|
trojan horses / spyware / malware
|
|
|
|
Lecture 44: semester wrap-up
|
|
|
|
call number 17428
|
|
|
|
C programming
|
|
|
|
function calls
|
|
|
|
call stack
|
|
|
|
memory allocation
|
|
|
|
local variables and parameters - stack
|
|
|
|
malloced stuff - heap
|
|
|
|
statics - compile time
|
|
|
|
statics - load time
|
|
|
|
stack
|
|
|
|
mmap()
|
|
|
|
heap
|
|
|
|
statics
|
|
|
|
executable code
|
|
|
|
pointer arithmetic
|
|
|
|
size of primitive types
|
|
|
|
primitives vs. structs vs. enums vs. unions
|
|
|
|
size of some type
|
|
|
|
ptr + numbr =
|
|
|
|
ptr[10] = *(ptr + 10)
|
|
|
|
linking and loading
|
|
|
|
start with C program with pre-processor directives
|
|
|
|
preprocessor -> .c
|
|
|
|
compiler -> assembly file -> .S
|
|
|
|
assembler -> binary object -> .o
|
|
|
|
linker -> executable
|
|
|
|
replace symbols with addresses
|
|
|
|
at runtime:
|
|
|
|
loader -> binary in memory
|
|
|
|
dynamic linker -> finished process image
|
|
|
|
resolve symbols
|
|
|
|
Global offset table -> GOT
|
|
|
|
dynamically linked libraries
|
|
|
|
position independent code
|
|
|
|
process management
|
|
|
|
processes vs. threads
|
|
|
|
process
|
|
|
|
address space
|
|
|
|
file descriptors
|
|
|
|
stack
|
|
|
|
register contents
|
|
|
|
thread
|
|
|
|
stack
|
|
|
|
register contents
|
|
|
|
init first user process: process id 0
|
|
|
|
fork()
|
|
|
|
execve()
|
|
|
|
sharing the CPU
|
|
|
|
cooperative time sharing
|
|
|
|
yield()
|
|
|
|
I/O wait() - automatic yield
|
|
|
|
pre-emptive scheduling
|
|
|
|
timer interrupt
|
|
|
|
scheduling
|
|
|
|
O(1) scheduler
|
|
|
|
starvation
|
|
|
|
ready queue
|
|
|
|
CFS - completely fair scheduler
|
|
|
|
run in order of time left
|
|
|
|
concurrency
|
|
|
|
disks
|
|
|
|
tape recorders
|
|
|
|
transmitters
|
|
|
|
memory
|
|
|
|
mutual exclusion primitives
|
|
|
|
semaphores
|
|
|
|
initialize with a number
|
|
|
|
wait: reduce the number by one
|
|
|
|
if the number is 0, wait
|
|
|
|
signal: increase number by one
|
|
|
|
wake up whoever is waiting
|
|
|
|
mutex
|
|
|
|
binary semaphore
|
|
|
|
lock()
|
|
|
|
unlock()
|
|
|
|
hardware support required
|
|
|
|
test-and-set
|
|
|
|
atomic operation
|
|
|
|
cache consistency
|
|
|
|
race conditions
|
|
|
|
a ++
|
|
|
|
read value of a into register
|
|
|
|
add 1 to register
|
|
|
|
write register to a
|
|
|
|
deadlock
|
|
|
|
mutual exclusion
|
|
|
|
hold and wait
|
|
|
|
circular wait
|
|
|
|
no pre-emption
|
|
|
|
dining philosophers problem
|
|
|
|
monitor
|
|
|
|
object that abstracts away concurrency problems
|
|
|
|
contains one or more locks
|
|
|
|
methods lock/unlock the locks
|
|
|
|
cpu modes
|
|
|
|
userland - user level - ring 3
|
|
|
|
privileged instructions
|
|
|
|
descriptor table (virtual memory)
|
|
|
|
interrupt table
|
|
|
|
control registers
|
|
|
|
memory accesses are restricted by the page table
|
|
|
|
I/O operations are restricted
|
|
|
|
return from interrupt / syscall - opcode
|
|
|
|
supervisor mode - ring 0
|
|
|
|
enter it through system call (syscall), interrupt
|
|
|
|
system calls
|
|
|
|
like function calls, except doesn't jump to a specified address
|
|
|
|
instead, dispatch via entry point - system call number in register
|
|
|
|
main memory
|
|
|
|
virtual memory management
|
|
|
|
virtual memory pages
|
|
|
|
map to physical memory frames
|
|
|
|
page table
|
|
|
|
page table directories (1 or more levels)
|
|
|
|
page table
|
|
|
|
page table entries
|
|
|
|
phy memory frame number
|
|
|
|
invalid
|
|
|
|
accessed
|
|
|
|
page replacement
|
|
|
|
page fault
|
|
|
|
least recent use
|
|
|
|
tlb - translation lookaside buffer
|
|
|
|
caches
|
|
|
|
registers
|
|
|
|
l1 - small but very fast, 4 kb
|
|
|
|
l2 - 32-128k, pretty fast, 12 cycle?
|
|
|
|
l3 - shared between all cpus, larger (1-12megs), 40 cycles
|
|
|
|
main memory - 30 ns
|
|
|
|
secondary storage
|
|
|
|
5-8 ms
|
|
|
|
memory allocators
|
|
|
|
slab allocators
|
|
|
|
in-kernel common data structures
|
|
|
|
buddy allocator
|
|
|
|
in-kernel
|
|
|
|
contiguous sets of pages
|
|
|
|
malloc-style allocators
|
|
|
|
userland
|
|
|
|
allocate memory using sbrk()
|
|
|
|
freed chunks are added to a list of free elements
|
|
|
|
secondary storage
|
|
|
|
file system
|
|
|
|
allocation mechanism
|
|
|
|
FAT - file allocation table
|
|
|
|
inode
|
|
|
|
indirect node / index node
|
|
|
|
direct pointers / direct blocks
|
|
|
|
indirects -> table -> directs
|
|
|
|
double indir
|
|
|
|
triple indir
|
|
|
|
btree-based filesystems
|
|
|
|
on-disk tree datastructure
|
|
|
|
btrfs
|
|
|
|
free space management
|
|
|
|
extents
|
|
|
|
clusters of blocks
|
|
|
|
security
|
|
|
|
protection
|
|
|
|
privilege management
|
|
|
|
resources & users
|
|
|
|
access control lists
|
|
|
|
standard unix model
|
|
|
|
authentication
|
|
|
|
password
|
|
|
|
biometrics
|
|
|
|
encryption / signatures
|
|
|
|
authenticating and hiding contents
|
|
|
|
exploits and vulnerabilities
|
|
|
|
buffer overflow attacks
|
|
|
|
privilege escalation
|
|
|
|
SQL injection / shell code injection
|
|
|