|
 |
CS385 Operating System Concepts and Design
|
|
|
 |
Lecture 1: Introduction
|
|
|
 |
Purpose, function and design of an OS
|
|
|
 |
A program with out an OS
|
|
|
|
|
#include <avr/io.h> #include <util/delay.h>
int main(void) { srandom(123); DDRB = 0x7; // assign first three pins as outputs
while(1) { PORTB = (PORTB & ~0x7) | (random() & 0x7); _delay_ms(250); } }
|
|
|
 |
composing programs
|
|
|
 |
user selectability of programs through some interface
|
|
|
 |
add/remove programs dynamically
|
|
|
 |
provides a consistent view for the application programmer
|
|
|
 |
convenient API (application programming interfaces)
|
|
|
 |
run multiple programs concurrently
|
|
|
 |
protection between processes
|
|
|
 |
memory
|
|
|
 |
and other state
|
|
|
 |
exclusive access to resources
|
|
|
 |
communication facilities
|
|
|
 |
inter-process communication
|
|
|
 |
networking
|
|
|
 |
protection between users
|
|
|
 |
access rights
|
|
|
 |
Lecture 2: First steps in C
|
|
|
 |
appeared first in 1972
|
|
|
 |
"high-level" assembly
|
|
|
 |
minimalist language
|
|
|
 |
no garbage collection
|
|
|
 |
no runtime safety
|
|
|
 |
no polymorphism
|
|
|
 |
pointer arithmetic
|
|
|
 |
basically portable assembly, with some syntactic sugar
|
|
|
 |
Lecture 3: printf and types in C
|
|
|
 |
printf format specifiers
|
|
|
 |
%d - signed integer
|
|
|
 |
%u - unsigned integer
|
|
|
 |
%x - hexadecimal
|
|
|
 |
%ul - long unsigned integer (64-bits)
|
|
|
 |
%c - character
|
|
|
 |
%s - string
|
|
|
 |
%f - float
|
|
|
 |
%lf - double floating point
|
|
|
 |
\n
|
|
|
 |
\t
|
|
|
 |
types
|
|
|
 |
char - 1 byte
|
|
|
 |
unsigned char - 1 byte
|
|
|
 |
signed char - 1 byte
|
|
|
 |
short - 16 bit
|
|
|
 |
int - 32 bits
|
|
|
 |
unsigned int - 32 bits
|
|
|
 |
long - 64 bits
|
|
|
 |
floats / doubles
|
|
|
 |
all pointers = 8/16/32/64 bits (depending on machine)
|
|
|
 |
struct {}
|
|
|
 |
Lecture 4: function calls and the stack
|
|
|
 |
Homework 1 was due right now
|
|
|
 |
svn://bits.cs.uic.edu/cs385/notes/solutions/hw1
|
|
|
 |
int fib(int val) { return fib(val-1)+fib(val-2);}
|
|
|
 |
main() {}
|
|
|
 |
Application Binary Interface. System V
|
|
|
 |
f(i=9)
|
|
|
 |
local variables
|
|
|
 |
previous frame pointer
|
|
|
 |
f(i=10)
|
|
|
 |
return address
|
|
|
 |
return value
|
|
|
 |
parameter values
|
|
|
 |
main
|
|
|
 |
a frame consists of
|
|
|
 |
local variables
|
|
|
 |
previous frame pointer
|
|
|
 |
return address
|
|
|
 |
return value
|
|
|
 |
parameter values
|
|
|
 |
Lecture 6: parameter passing and memory management
|
|
|
 |
homework2
|
|
|
 |
allocation of variables
|
|
|
 |
globals
|
|
|
 |
at compile time
|
|
|
 |
locals (stack)
|
|
|
 |
heap
|
|
|
 |
allocated at runtime using malloc().
|
|
|
 |
lifetime of variables
|
|
|
 |
globals - live forever
|
|
|
 |
locals (stack)- only during function execution (and subroutines)
|
|
|
 |
heap - until free(), or program exit.
|
|
|
 |
Lecture 7: mostly hw3
|
|
|
 |
I/O <stdio.h>
|
|
|
 |
buffered files
|
|
|
 |
higher-level functions
|
|
|
 |
fopen
|
|
|
 |
fread / fwrite
|
|
|
 |
fseek
|
|
|
 |
fflush
|
|
|
 |
fsync
|
|
|
 |
fclose
|
|
|
 |
fileno
|
|
|
 |
raw file IO
|
|
|
 |
open
|
|
|
 |
read / write
|
|
|
 |
lseek
|
|
|
 |
close
|
|
|
 |
file management
|
|
|
 |
Lecture 8: a bit of subversion, and linking
|
|
|
 |
svn keep a central repository of all committed versions
|
|
|
 |
larger C programs
|
|
|
 |
.h - header files
|
|
|
 |
int fact(int i);
|
|
|
 |
.c - implementation code
|
|
|
 |
.o - object files
|
|
|
 |
combination of a bunch of .c files (and some .h)
|
|
|
 |
.a - libraries
|
|
|
 |
.so - dynamic libraries
|
|
|
 |
executables
|
|
|
 |
Lecture 9: linking and maybe loading
|
|
|
 |
Lecture 10: hw4, and dynamic linking/loading
|
|
|
 |
Lecture 11: scripts, processes/threads, input/output
|
|
|
 |
Process representation
|
|
|
 |
register contents
|
|
|
 |
stack
|
|
|
 |
memory - heap
|
|
|
 |
memory manager configuration
|
|
|
 |
file descriptors - stdin, stdout, stderr, open("filename")
|
|
|
 |
permissions
|
|
|
 |
executable image
|
|
|
 |
Threads (within a process)
|
|
|
 |
register contents
|
|
|
 |
stack
|
|
|
 |
Creating a process
|
|
|
 |
fork()
|
|
|
 |
a call to clone with certain settings: copy everything
|
|
|
 |
clone()
|
|
|
 |
make a thread by copying only register contents and stack
|
|
|
 |
In Linux: a task is either/both process and thread
|
|
|
 |
Input output manipulation
|
|
|
 |
redirection
|
|
|
 |
piping - communication between processes, through file descriptors
|
|
|
 |
ls | sort
|
|
|
 |
pipe()
|
|
|
 |
start up process
|
|
|
 |
create pipe(pipein, pipeout)
|
|
|
 |
fork()
|
|
|
 |
if(parent)
|
|
|
 |
{
|
|
|
 |
close(pipein);
|
|
|
 |
redirect(stdout=pipeout)
|
|
|
 |
exec(ls)
|
|
|
 |
}
|
|
|
 |
if(child)
|
|
|
 |
{
|
|
|
 |
close(pipeout);
|
|
|
 |
redirect(stdin=pipein)
|
|
|
 |
exec(sort);
|
|
|
 |
}
|
|
|
 |
Lecture
|
|
|
 |
Lecture 12: a first visit to kernel mode
|
|
|
 |
user mode / userland / protected mode / ring 3
|
|
|
 |
restrict memory access
|
|
|
 |
can't change priviledge ring
|
|
|
 |
can't change virtual memory layout
|
|
|
 |
can't change interrupts
|
|
|
 |
kernel mode / kernel / privileged mode / ring 0
|
|
|
 |
Lecture 13: synchronization primitives
|
|
|
 |
reading: chapters 4, 6 except 6.3, 6.9, 7.1-7.4
|
|
|
 |
Lecture 14: pthreads
|
|
|
 |
Lecture 15: deadlock, priority inversion
|
|
|
 |
mutual exclusion
|
|
|
 |
hold and wait
|
|
|
 |
reserve all ahead of time
|
|
|
 |
release and re-lock
|
|
|
 |
no preemption
|
|
|
 |
circular dependency
|
|
|
 |
enforce ordering of locks
|
|
|
 |
monitor
|
|
|
 |
object
|
|
|
 |
has a number of methods
|
|
|
 |
encapsulates some state
|
|
|
 |
pathfinder
|
|
|
 |
top prio: bc_sched
|
|
|
 |
next: bc_dist
|
|
|
 |
// other processes
|
|
|
 |
low: ASI/MET
|
|
|
 |
priority inversion
|
|
|
 |
lower level processes able to delay higher level processes
|
|
|
 |
priority inheritance
|
|
|
 |
when higher prio process waits for a lock,
|
|
|
 |
bump up prio of lower prio process holding lock
|
|
|
 |
Lecture 16: more on "multitasking", and hw6
|
|
|
 |
cooperative multitasking
|
|
|
 |
processes / tasks decide when to "yield" the processor
|
|
|
 |
pre-emptive multitasking
|
|
|
 |
timer going off at regular intervals - 1 to 10 ms
|
|
|
 |
task switching howto
|
|
|
 |
new_task() to create task
|
|
|
 |
allocate stack space
|
|
|
 |
copy contents of old stack to new stack
|
|
|
 |
push register contents onto new stack
|
|
|
 |
except ESP = point to new stack
|
|
|
 |
add task to list of tasks
|
|
|
 |
yield() function releases CPU to other process
|
|
|
 |
store register contents on calling stack
|
|
|
 |
pick next task
|
|
|
 |
restore registers from next tasks stack
|
|
|
 |
return
|
|
|
 |
preemptive multitasking
|
|
|
 |
timer() - called every N milliseconds
|
|
|
 |
same as yield
|
|
|
 |
return from interrupt
|
|
|
 |
two kinds of tasks
|
|
|
 |
CPU-bound tasks
|
|
|
 |
I/O-bound tasks
|
|
|
 |
run queue, runqueue
|
|
|
 |
active tasks
|
|
|
 |
still have some CPU allotment
|
|
|
 |
expired tasks
|
|
|
 |
spent their CPU allotment
|
|
|
 |
wait queue(s)
|
|
|
 |
waiting for an I/O operation
|
|
|
 |
Lecture 17: CPU scheduling and midterm discussion
|
|
|
 |
scheduling criteria (quality metrics)
|
|
|
 |
throughput
|
|
|
 |
finish as many processes as possible
|
|
|
 |
efficiency
|
|
|
 |
make sure CPU is busy
|
|
|
 |
don't spend too much time on scheduling
|
|
|
 |
constant factors
|
|
|
 |
low complexity (tasks, processors)
|
|
|
 |
fairness
|
|
|
 |
equal amount of time between processes
|
|
|
 |
no way to cheat your way to more CPU
|
|
|
 |
no starvation
|
|
|
 |
latency
|
|
|
 |
real-time systems (guarantees)
|
|
|
 |
interactive systems
|
|
|
 |
low response time variance (jitter)
|
|
|
 |
way to prioritize processes
|
|
|
 |
SMP, SMT processors
|
|
|
 |
Midterm
|
|
|
 |
C programming puzzles
|
|
|
 |
pointers
|
|
|
 |
arrays
|
|
|
 |
casting
|
|
|
 |
malloc / free
|
|
|
 |
preprocessor
|
|
|
 |
Conceptual OS / language questions
|
|
|
 |
function calls
|
|
|
 |
system calls
|
|
|
 |
linking / loading
|
|
|
 |
multi-tasking
|
|
|
 |
Synchronization puzzles
|
|
|
 |
mutex / barrier / condition variables (pthread)
|
|
|
 |
race conditions
|
|
|
 |
deadlocks
|
|
|
 |
livelocks (?)
|
|
|
 |
Process management puzzles and concepts
|
|
|
 |
fork
|
|
|
 |
exec
|
|
|
 |
dup2
|
|
|
 |
difference between a thread and a process
|
|
|
 |
what's shared, what isn't
|
|
|
 |
Lecture 18: schedulers and queueing
|
|
|
 |
arrival rates
|
|
|
 |
service rate
|
|
|
 |
wait time
|
|
|
 |
queue length
|
|
|
 |
Little's law / rule
|
|
|
 |
queue length = arrival rate * wait time
|
|
|
 |
Lecture 19: Linux CPU schedulers
|
|
|
 |
Linux 2.4
|
|
|
 |
epoch - time interval in which each process gets to spend their allocation
|
|
|
 |
run queue, runqueue
|
|
|
 |
active tasks
|
|
|
 |
still have some CPU allotment
|
|
|
 |
expired tasks
|
|
|
 |
spent their CPU allotment
|
|
|
 |
wait queue(s)
|
|
|
 |
waiting for an I/O operation
|
|
|
 |
schedule()
|
|
|
 |
iterate through processes : O(N)
|
|
|
 |
find most urgent process
|
|
|
 |
priority = time_left + nice level (static priority) + 1000*real_time
|
|
|
 |
new_epoch()
|
|
|
 |
counter = counter/2 + NICE_TO_TICKS(nice_level)
|
|
|
 |
coarse grained scheduling interval
|
|
|
 |
Linux 2.6 O(1) scheduler
|
|
|
 |
priority array
|
|
|
 |
prio 1: head->next->next
|
|
|
 |
prio 2: head->next
|
|
|
 |
prio 3: nil
|
|
|
 |
prio 4: head
|
|
|
 |
...
|
|
|
 |
140
|
|
|
 |
BSF: bit scan forward
|
|
|
 |
O(1) scheduling
|
|
|
 |
upon expiry, before adding to expired array, compute new allotment
|
|
|
 |
priority = static_priority + interactivity (+/- 5)
|
|
|
 |
heuristics for interactivity I/O bound, CPU bound
|
|
|
 |
cpu_time = minimum_time + static_priority * scaling factor
|
|
|
 |
when an interactive task expires, reinsert it into the active array, unless someone's starving
|
|
|
 |
Linux 2.6: CFS scheduler (Completely Fair Scheduler)
|
|
|
 |
every process associated with a "virtual runtime"
|
|
|
 |
modified by your priority
|
|
|
 |
run the process with least virtual runtime
|
|
|
 |
red-black tree
|
|
|
 |
at most 2x difference between paths
|
|
|
 |
logarithmic time insert / delete / find_min
|
|
|
 |
schedule():
|
|
|
 |
find minimum vruntime process
|
|
|
 |
run it for a bit
|
|
|
 |
update the vruntime of process vruntime = actual_runtime*process_weight
|
|
|
 |
stick it back in the tree / heap
|
|
|
 |
repeat
|
|
|
 |
Lecture 20: boot: from power-on to first process
|
|
|
 |
Lecture 21: first instruction to first user process
|
|
|
 |
Lecture 22: inter-process communication and memory management
|
|
|
 |
memory hierarchy
|
|
|
 |
removable media - tape drives
|
|
|
 |
no automatic access
|
|
|
 |
hard drives
|
|
|
 |
300 MBps - really fast
|
|
|
 |
seek time is a problem 4-12 ms latency
|
|
|
 |
sector = 512 bytes
|
|
|
 |
RAM / SDRAM
|
|
|
 |
DDR3 1333 8Gb, 8-8-8-24, 260 contacts, 64 bits at a time
|
|
|
 |
40 ns
|
|
|
 |
10 GB/s
|
|
|
 |
shared L3 cache
|
|
|
 |
12 Mb 40 CPU cycles
|
|
|
 |
L2 cache
|
|
|
 |
256 Kb 10 CPU cycles
|
|
|
 |
L1 cache
|
|
|
 |
32 Kb
|
|
|
 |
3 CPU cycles
|
|
|
 |
registers
|
|
|
 |
10-32
|
|
|
 |
1 CPU
|
|
|
 |
Virtual memory
|
|
|
 |
process isolation
|
|
|
 |
context independent
|
|
|
 |
swap to disk
|
|
|
 |
use "memory" accesses to access non-RAM data
|
|
|
 |
Lecture 23: Virtual Memory
|
|
|
 |
TLB - Translation Lookaside Buffer
|
|
|
 |
16-2048 entries, most recently used page table entries
|
|
|
 |
i7 (nehalem)
|
|
|
 |
L1 64 entries for data
|
|
|
 |
L1 128 entries for code
|
|
|
 |
L2 512 entries shared
|
|
|
 |
Process dereferences logical address L
|
|
|
 |
segmentation turns into linear address K
|
|
|
 |
look up K in the TLB (cache)
|
|
|
 |
if we have it, return value of TLB(K)
|
|
|
 |
if we don't have it
|
|
|
 |
look up entry in the page table (process specific)
|
|
|
 |
potentially multiple lookups
|
|
|
 |
return TLB(K)
|
|
|
 |
TLB(K)
|
|
|
 |
if entry is valid - replace linear address page with physical address page
|
|
|
 |
if entry is invalid
|
|
|
 |
CPU triggers a page fault
|
|
|
 |
if process hasn't allocated this linear address (virtual memory)
|
|
|
 |
SEGFAULT the process
|
|
|
 |
if process has never touched this allocated address before
|
|
|
 |
find some free memory
|
|
|
 |
update the process' page table entry
|
|
|
 |
return
|
|
|
 |
if this memory was paged out to disk
|
|
|
 |
find some free memory
|
|
|
 |
read in page from disk
|
|
|
 |
update the process' page table entry
|
|
|
 |
return
|
|
|
 |
check the L1 cache
|
|
|
 |
check the L2 cache
|
|
|
 |
check the L3 cache
|
|
|
 |
get from main memory
|
|
|
 |
getting "the next byte"
|
|
|
 |
page entry is already in TLB cache (L1)
|
|
|
 |
physical address known
|
|
|
 |
cache lines are usually 64 bytes
|
|
|
 |
next byte is already in the L1 cache
|
|
|
 |
shm_open()
|
|
|
 |
mmap(filehandle, address, size)
|
|
|
 |
Lecture 25: caching and performance
|
|
|
 |
read: 8.4-8.7 (paging), all of ch 9 (virtual memory)
|
|
|
 |
definition of "virtual memory" in the book is a bit outdated
|
|
|
 |
virtual address space: 2^64 bytes
|
|
|
 |
virtual memory = physical memory + disk swap
|
|
|
 |
physical memory = amount installed
|
|
|
 |
Lecture 26: paging
|
|
|
 |
page replacement
|
|
|
 |
recently allocated and wrote to
|
|
|
 |
recently paged in and written to
|
|
|
 |
recently paged in from disk
|
|
|
 |
mmaped file
|
|
|
 |
swap contents
|
|
|
 |
time consumed
|
|
|
 |
time to write dirty page to disk
|
|
|
 |
time read new page from disk
|
|
|
 |
frame allocation
|
|
|
 |
mov (dx):(ax), (ex):(bx)
|
|
|
 |
local allocation
|
|
|
 |
each process own some frames
|
|
|
 |
global allocation
|
|
|
 |
active / inactive pages
|
|
|
 |
any frame can be taken at any time
|
|
|
 |
linux memory manager
|
|
|
 |
active list
|
|
|
 |
pages recently accessed (more than once)
|
|
|
 |
inactive list
|
|
|
 |
pages that weren't
|
|
|
 |
malloc vs. kernel allocation
|
|
|
 |
need flexibility
|
|
|
 |
needs to be fast
|
|
|
 |
needs to be space efficient
|
|
|
 |
needs to have local memory access patterns
|
|
|
 |
backed by kernel allocator
|
|
|
 |
can request almost unlimited contiguous space
|
|
|
 |
may not need a system call
|
|
|
 |
not constrained to page size allocations
|
|
|
 |
Lecture 29: memory management wrap-up
|
|
|
 |
Lecture 30: intro to storage management
|
|
|
 |
block devices
|
|
|
 |
blocks 512-4096 bytes
|
|
|
 |
disk / flash / memory
|
|
|
 |
block N -> platter/head P, cylinder, sector
|
|
|
 |
file system
|
|
|
 |
directories
|
|
|
 |
files
|
|
|
 |
backed by inodes (filesystem object)
|
|
|
 |
links
|
|
|
 |
soft-links
|
|
|
 |
mounts / disks
|
|
|
 |
virtual file system
|
|
|
 |
actuals file systems mounted
|
|
|
 |
Lecture 31: Virtual file system
|
|
|
 |
Virtual File System
|
|
|
 |
struct inode* {}
|
|
|
 |
struct dirent {}
|
|
|
 |
Lecture 32: File system datastructures
|
|
|
 |
directory datastructure on disk
|
|
|
 |
directory entries
|
|
|
 |
inode number
|
|
|
 |
name
|
|
|
 |
flat file
|
|
|
 |
fast insert O(1)
|
|
|
 |
slow lookup O(N)
|
|
|
 |
hashtable
|
|
|
 |
fast lookup and insert O(1)
|
|
|
 |
a little messy
|
|
|
 |
tree
|
|
|
 |
insertion O(log N)
|
|
|
 |
lookup O(log N)
|
|
|
 |
free list
|
|
|
 |
bit vector
|
|
|
 |
linked list
|
|
|
 |
grouped linked list
|
|
|
 |
multiple entries per block
|
|
|
 |
in-core/memory representation
|
|
|
 |
struct super_block
|
|
|
 |
bunch of settings for the filesystem implementation
|
|
|
 |
root directory
|
|
|
 |
function pointers
|
|
|
 |
struct inode
|
|
|
 |
bunch attributes, size, permissions
|
|
|
 |
block numbers - file content blocks
|
|
|
 |
function pointers
|
|
|
 |
struct dentry
|
|
|
 |
name
|
|
|
 |
inode number
|
|
|
 |
parent dentry
|
|
|
 |
function pointers
|
|
|
 |
struct file
|
|
|
 |
struct inode*
|
|
|
 |
offset - where are reading / writing at the moment
|
|
|
 |
attributes
|
|
|
 |
Lecture 33: filesystem consistency
|
|
|
 |
failures:
|
|
|
 |
power failure
|
|
|
 |
battery backup
|
|
|
 |
diesel backup
|
|
|
 |
disk failure
|
|
|
 |
either the whole thing
|
|
|
 |
or perhaps the odd block
|
|
|
 |
system failure
|
|
|
 |
motherboard might fry
|
|
|
 |
OS / filesystem failure
|
|
|
 |
bugs
|
|
|
 |
ACID
|
|
|
 |
Atomicity
|
|
|
 |
all or nothing
|
|
|
 |
transactions
|
|
|
 |
begin
|
|
|
 |
bunch of actions
|
|
|
 |
in case of error - abort
|
|
|
 |
commit
|
|
|
 |
Consistency
|
|
|
 |
any write moves file system from one consistent state to another
|
|
|
 |
Independence
|
|
|
 |
no concurrent access to data modified by a transaction
|
|
|
 |
Durability
|
|
|
 |
what's written should stay until overwritten
|
|
|
 |
system failure cannot cause data loss
|
|
|
 |
Journaling - WAL (Write-Ahead Logging)
|
|
|
 |
first write "what you want to do"
|
|
|
 |
then you do it
|
|
|
 |
write "I did it"
|
|
|
 |
latency issue
|
|
|
 |
two seeks per write (unless we have a separate disk)
|
|
|
 |
watch out for write caches
|
|
|
 |
unless you can afford a battery backed up cache
|
|
|
 |
RAID - Redundant Array of Inexpensive Disks
|
|
|
 |
level 0
|
|
|
 |
level 1
|
|
|
 |
level 4
|
|
|
 |
level 5
|
|
|
 |
level 10 = 1+0
|
|
|
 |
hardware
|
|
|
 |
dedicated board
|
|
|
 |
fakeraid
|
|
|
 |
in BIOS - uses the CPU
|
|
|
 |
software raid
|
|
|
 |
in OS - uses the CPU
|
|
|
 |
Lecture 34: btrees, B+trees and btrfs
|
|
|
 |
external datastructure
|
|
|
 |
complexity
|
|
|
 |
number of disk accesses
|
|
|
 |
computation
|
|
|
 |
degree = block size / key size
|
|
|
 |
parameter t
|
|
|
 |
t and 2t
|
|
|
 |
t-1 and 2t-1 keys
|
|
|
 |
example
|
|
|
 |
t=100
|
|
|
 |
h=3
|
|
|
 |
100
|
|
|
 |
10,000
|
|
|
 |
1,000,000
|
|
|
 |
height of the tree, n nodes O(log_t n)
|
|
|
 |
search complexity, computational: O(t * h)
|
|
|
 |
insert / delete O(log_t n) complexity
|
|
|
 |
insert / delete O(log_t n) disk seeks
|
|
|
 |
Lecture 35: btrfs - b-tree filesystem
|
|
|
 |
benefits of a b-tree type filesystem
|
|
|
 |
no one block per inode
|
|
|
 |
better packing of metadata
|
|
|
 |
extensible - introduce a new key type
|
|
|
 |
file_object_id, file_content_type, 0 -> {size, data}
|
|
|
 |
logging
|
|
|
 |
after logging the write
|
|
|
 |
lazy update to disk
|
|
|
 |
shadowing / copy-on-write
|
|
|
 |
Lecture 36: filesystem wrap-up
|
|
|
 |
"disk"
|
|
|
 |
heads, cylinders and sectors
|
|
|
 |
"blocks"
|
|
|
 |
512-4096 bytes
|
|
|
 |
preamble, bunch of bits, ECC (error correcting code)
|
|
|
 |
"extents"
|
|
|
 |
a.k.a. "clusters"
|
|
|
 |
contiguous series of blocks
|
|
|
 |
"mounting"
|
|
|
 |
interpret block device according to some filesystem
|
|
|
 |
presents a directory & file view to the virtual filesystem
|
|
|
 |
"superblock"
|
|
|
 |
block of metadata
|
|
|
 |
size of the disk
|
|
|
 |
amount of free space
|
|
|
 |
block size
|
|
|
 |
reference to the root
|
|
|
 |
free space management reference
|
|
|
 |
typically stored at some known location on disk
|
|
|
 |
"file"
|
|
|
 |
a contiguous series of bytes (blocks)
|
|
|
 |
some set of permissions
|
|
|
 |
potentially represented discontiguously on disk
|
|
|
 |
usually space is allocated block by block (at least)
|
|
|
 |
allocation methods
|
|
|
 |
contiguous allocation
|
|
|
 |
linked list
|
|
|
 |
file allocation table (FAT)
|
|
|
 |
indexed allocation
|
|
|
 |
inode - index node, indirect node
|
|
|
 |
direct block pointers
|
|
|
 |
indirect pointers
|
|
|
 |
points another block full of direct pointers
|
|
|
 |
double indirect
|
|
|
 |
triple indirect
|
|
|
 |
btree type allocation
|
|
|
 |
"directory"
|
|
|
 |
list of names, file references
|
|
|
 |
usually, this the only place where filenames are stored
|
|
|
 |
"link"
|
|
|
 |
name -> file reference mapping
|
|
|
 |
only for files
|
|
|
 |
just a directory entry
|
|
|
 |
sometimes known as "hard link"
|
|
|
 |
"softlink"
|
|
|
 |
name -> name mapping
|
|
|
 |
"free list"
|
|
|
 |
free space management
|
|
|
 |
bit array
|
|
|
 |
need to scan searching for a set bit
|
|
|
 |
linked list
|
|
|
 |
LIFO (stack) policy (?)
|
|
|
 |
"file handle"
|
|
|
 |
file reference
|
|
|
 |
offset
|
|
|
 |
"disk cache"
|
|
|
 |
RAM used to speed disk access
|
|
|
 |
"buffers" in RAM reflecting "blocks" on disk
|
|
|
 |
"dirty buffer" - modified buffer that hasn't yet been written to disk
|
|
|
 |
not to be confused with
|
|
|
 |
"pages" - virtual memory, backed by "swapped" pages, in-memory "frames"
|
|
|
 |
fsync() - syncs file contents to disk
|
|
|
 |
sync() - syncs all open files "sync"
|
|
|
 |
disable write caching to be safe
|
|
|
 |
consistency and fault tolerance
|
|
|
 |
journaling file systems
|
|
|
 |
recover from intermittent errors
|
|
|
 |
power loss / system crash / dropped laptop
|
|
|
 |
append intended update to a journal
|
|
|
 |
update actual structure on disk
|
|
|
 |
append update completion record
|
|
|
 |
RAID
|
|
|
 |
redundant array of inexpensive disks
|
|
|
 |
striping
|
|
|
 |
N disks: every nth block from disk 1
|
|
|
 |
maintains storage capacity
|
|
|
 |
mirroring
|
|
|
 |
write the same block to N disks
|
|
|
 |
1/N capacity
|
|
|
 |
p=0.99 1-(1-p)^N
|
|
|
 |
parity disk
|
|
|
 |
N+1 disks
|
|
|
 |
stripe across N disks
|
|
|
 |
N+1th disk is parity disk (block-wise xor of contents of all disks)
|
|
|
 |
Lecture 37: homework 9 discussion + hw10
|
|
|
 |
security
|
|
|
 |
protection
|
|
|
 |
protect
|
|
|
 |
from
|
|
|
 |
unauthorized access
|
|
|
 |
unauthorized modification
|
|
|
 |
unauthorized execution
|
|
|
 |
overuse
|
|
|
 |
quotas
|
|
|
 |
pieces
|
|
|
 |
objects / resources
|
|
|
 |
domain - user / group / program / roles
|
|
|
 |
privileges - read/write/execute - append/delete
|
|
|
 |
security
|
|
|
 |
authentication user authentication´ - who are you?
|
|
|
 |
message authentication
|
|
|
 |
privacy / confidentiality
|
|
|
 |
denial of service
|
|
|
 |
ddos -
|
|
|
 |
system integrity
|
|
|
 |
buffer overflows
|
|
|
 |
command injection
|
|
|
 |
SQL injection
|
|
|
 |
Lecture 38: permissions management
|
|
|
 |
three groups of permissions
|
|
|
 |
owner
|
|
|
 |
group
|
|
|
 |
others
|
|
|
 |
Access Control List
|
|
|
 |
Lecture 39: user and data authentication
|
|
|
 |
possession of the card
|
|
|
 |
ID card
|
|
|
 |
biometric
|
|
|
 |
home address (billing address)
|
|
|
 |
signature
|
|
|
 |
unknown benefit
|
|
|
 |
pin code
|
|
|
 |
4 digits
|
|
|
 |
limited number of consecutive failed attempts
|
|
|
 |
login prompt
|
|
|
 |
name / password
|
|
|
 |
if the password is stored safely
|
|
|
 |
rate limit
|
|
|
 |
storage
|
|
|
 |
plaintext
|
|
|
 |
make sure the file is really really safe
|
|
|
 |
one way hash
|
|
|
 |
H(plaintext) -> digest
|
|
|
 |
preimage resistance: infeasible to compose a message that results in a given digest
|
|
|
 |
second preimage resistance: given a message m, infeasible to produce m' s.t. digests are the same
|
|
|
 |
collision resistance: infeasible to find two messages m1, m2 s.t. digests are the same
|
|
|
 |
store H(plaintext) on disk
|
|
|
 |
store salt,H(salt+plaintext) on disk
|
|
|
 |
shoulder surfing
|
|
|
 |
password sharing
|
|
|
 |
remote login - plaintext transmission
|
|
|
 |
biometrics
|
|
|
 |
need to trust the reader
|
|
|
 |
remote reader
|
|
|
 |
easily replaced by some other software
|
|
|
 |
message, H(message+secret) = message authentication code
|
|
|
 |
multi-factor authentication
|
|
|
 |
secret
|
|
|
 |
physical device
|
|
|
 |
biometric
|
|
|
 |
from a local reader
|
|
|
 |
Lecture 40: cryptography
|
|
|
 |
caesar cipher - transposition cipher
|
|
|
 |
c = m + k % q
|
|
|
 |
c = 'A' + 7 % q = 'H'
|
|
|
 |
monalphabetic cipher - 5-bit block cipher
|
|
|
 |
n-bit cipher
|
|
|
 |
2^n! 32!
|
|
|
 |
1024-bit
|
|
|
 |
n*2^n
|
|
|
 |
1024*2^1024
|
|
|
 |
cyclic groups
|
|
|
 |
q = 31
|
|
|
 |
k = 7
|
|
|
 |
n = 0...30
|
|
|
 |
s = n*k % 31
|
|
|
 |
c = m * k % 31
|
|
|
 |
identify k^-1 = 9
|
|
|
 |
m = (m * k) *k^-1 % 31 = c * 9 % 31 =
|
|
|
 |
diffie-hellman key exchange
|
|
|
 |
alice and bob
|
|
|
 |
x^b for large b
|
|
|
 |
x^2 = x^1 * x^2
|
|
|
 |
x^4 = x^2 * x^2
|
|
|
 |
x^1024 =
|
|
|
 |
x^1025 = x^1024 * x
|
|
|
 |
b = {1,0,0,0,0,0,0,0,0,1}
|
|
|
 |
ElGamal cipher
|
|
|
 |
j
|
|
|
 |
J = g^j mod q
|
|
|
 |
J ^ b = g^jb
|
|
|
 |
c = m encrypted using g^jb, B
|
|
|
 |
Alice would post A on her website
|
|
|
 |
B would send c (m encrypted with g^ab=A^b), B
|
|
|
 |
Lecture 41: signatures
|
|
|
 |
Hierarchical cryptosystem
|
|
|
 |
Alice posts her key on the website
|
|
|
 |
also posts a signature by a trusted third party
|
|
|
 |
several root certificates (public keys) from trusted third parties
|
|
|
 |
Verisign, Thawte
|
|
|
 |
RSA - public key encryption and authentication / signatures
|
|
|
 |
publish you public key
|
|
|
 |
anyone can send you encrypted messages
|
|
|
 |
sign messages, anyone can verify
|
|
|
 |
certificate = public key + signature by trusted third party
|
|
|
 |
Euler's Theorem
|
|
|
 |
a ^ phi(n) ~= 1 (mod n)
|
|
|
 |
n = p * q (p,q are large primes)
|
|
|
 |
phi(n) = (p-1)(q-1)
|
|
|
 |
factoring large integers is difficult
|
|
|
 |
how to make a crypto out of this theorem?
|
|
|
 |
m^((p-1)*(q-1)) = 1 (mod p*q)
|
|
|
 |
m^(k*(p-1)*(q-1)+1) = m (mod p*q)
|
|
|
 |
c = m^e (mod pq)
|
|
|
 |
p = c^d (mod pq)
|
|
|
 |
p = m^de = m
|
|
|
 |
de = 1 + k(p-1)(q-1) = 1 mod (p-1)(q-1)
|
|
|
 |
choose a random e < (p-1)(q-1)
|
|
|
 |
then find d, s.t. ed = 1 mod (p-1)(q-1)
|
|
|
 |
c = m^e (mod pq)
|
|
|
 |
m = c^d (mod pq) = m^ed (mod pq) = m^(1+k(p-1)(q-1)) mod pq = m * m^(p-1)(q-1) mod pq = m
|
|
|
 |
signatures in RSA
|
|
|
 |
signature: H(m)^d
|
|
|
 |
(H(m)^d)^e = H(m)^de = H(m)
|
|
|
 |
Man in the middle
|
|
|
 |
intercepting communication
|
|
|
 |
invalid certificates
|
|
|
 |
incorrect binaries (not actually verifying correctly)
|
|
|
 |
vulnerable binaries
|
|
|
 |
Keeping the key safe
|
|
|
 |
Lecture 42: code vulnerabilities - buffer overflows and related attacks
|
|
|
 |
protection - managing permissions
|
|
|
 |
user authentication - making sure users are who they say they are
|
|
|
 |
software authentication
|
|
|
 |
who compiled this binary
|
|
|
 |
who vouches for its correctness
|
|
|
 |
what compiler was used
|
|
|
 |
exploits or vulnerabilities in binaries
|
|
|
 |
buffer overflow
|
|
|
 |
input
|
|
|
 |
return address
|
|
|
 |
previous stack frame ptr
|
|
|
 |
amount
|
|
|
 |
name[8]
|
|
|
 |
return-to-libc
|
|
|
 |
execve() / system()
|
|
|
 |
address of system() in return address field
|
|
|
 |
////////////////////////bin/sh
|
|
|
 |
NOP
|
|
|
 |
write a correct program: DO NOT USE
|
|
|
 |
strcpy
|
|
|
 |
strncpy
|
|
|
 |
scanf
|
|
|
 |
careful format strings %s
|
|
|
 |
gets
|
|
|
 |
stack canaries
|
|
|
 |
input
|
|
|
 |
return address
|
|
|
 |
canary (secret number)
|
|
|
 |
previous stack frame ptr
|
|
|
 |
amount
|
|
|
 |
name[8]
|
|
|
 |
address space randomization moves:
|
|
|
 |
the code
|
|
|
 |
top of the stack
|
|
|
 |
the beginning of the mmap zone()
|
|
|
 |
the beginning of the heap
|
|
|
 |
NX flag - in AMD64
|
|
|
 |
Lecture 43: code vulnerabilities - viruses, worms, shell/SQL injection
|
|
|
 |
common technique for protecting servers
|
|
|
 |
create a new user - with limited permissions
|
|
|
 |
qmail
|
|
|
 |
viruses
|
|
|
 |
scans for "hosts"
|
|
|
 |
executables
|
|
|
 |
boot sector
|
|
|
 |
autorun.exe
|
|
|
 |
worms
|
|
|
 |
compartmentalizing functionality / permissions
|
|
|
 |
propagates on its own
|
|
|
 |
botnets
|
|
|
 |
attacks (DDoS)
|
|
|
 |
spam
|
|
|
 |
trojan horses / spyware / malware
|
|
|
 |
Lecture 44: semester wrap-up
|
|
|
 |
call number 17428
|
|
|
 |
C programming
|
|
|
 |
function calls
|
|
|
 |
call stack
|
|
|
 |
memory allocation
|
|
|
 |
local variables and parameters - stack
|
|
|
 |
malloced stuff - heap
|
|
|
 |
statics - compile time
|
|
|
 |
statics - load time
|
|
|
 |
stack
|
|
|
 |
mmap()
|
|
|
 |
heap
|
|
|
 |
statics
|
|
|
 |
executable code
|
|
|
 |
pointer arithmetic
|
|
|
 |
size of primitive types
|
|
|
 |
primitives vs. structs vs. enums vs. unions
|
|
|
 |
size of some type
|
|
|
 |
ptr + numbr =
|
|
|
 |
ptr[10] = *(ptr + 10)
|
|
|
 |
linking and loading
|
|
|
 |
start with C program with pre-processor directives
|
|
|
 |
preprocessor -> .c
|
|
|
 |
compiler -> assembly file -> .S
|
|
|
 |
assembler -> binary object -> .o
|
|
|
 |
linker -> executable
|
|
|
 |
replace symbols with addresses
|
|
|
 |
at runtime:
|
|
|
 |
loader -> binary in memory
|
|
|
 |
dynamic linker -> finished process image
|
|
|
 |
resolve symbols
|
|
|
 |
Global offset table -> GOT
|
|
|
 |
dynamically linked libraries
|
|
|
 |
position independent code
|
|
|
 |
process management
|
|
|
 |
processes vs. threads
|
|
|
 |
process
|
|
|
 |
address space
|
|
|
 |
file descriptors
|
|
|
 |
stack
|
|
|
 |
register contents
|
|
|
 |
thread
|
|
|
 |
stack
|
|
|
 |
register contents
|
|
|
 |
init first user process: process id 0
|
|
|
 |
fork()
|
|
|
 |
execve()
|
|
|
 |
sharing the CPU
|
|
|
 |
cooperative time sharing
|
|
|
 |
yield()
|
|
|
 |
I/O wait() - automatic yield
|
|
|
 |
pre-emptive scheduling
|
|
|
 |
timer interrupt
|
|
|
 |
scheduling
|
|
|
 |
O(1) scheduler
|
|
|
 |
starvation
|
|
|
 |
ready queue
|
|
|
 |
CFS - completely fair scheduler
|
|
|
 |
run in order of time left
|
|
|
 |
concurrency
|
|
|
 |
disks
|
|
|
 |
tape recorders
|
|
|
 |
transmitters
|
|
|
 |
memory
|
|
|
 |
mutual exclusion primitives
|
|
|
 |
semaphores
|
|
|
 |
initialize with a number
|
|
|
 |
wait: reduce the number by one
|
|
|
 |
if the number is 0, wait
|
|
|
 |
signal: increase number by one
|
|
|
 |
wake up whoever is waiting
|
|
|
 |
mutex
|
|
|
 |
binary semaphore
|
|
|
 |
lock()
|
|
|
 |
unlock()
|
|
|
 |
hardware support required
|
|
|
 |
test-and-set
|
|
|
 |
atomic operation
|
|
|
 |
cache consistency
|
|
|
 |
race conditions
|
|
|
 |
a ++
|
|
|
 |
read value of a into register
|
|
|
 |
add 1 to register
|
|
|
 |
write register to a
|
|
|
 |
deadlock
|
|
|
 |
mutual exclusion
|
|
|
 |
hold and wait
|
|
|
 |
circular wait
|
|
|
 |
no pre-emption
|
|
|
 |
dining philosophers problem
|
|
|
 |
monitor
|
|
|
 |
object that abstracts away concurrency problems
|
|
|
 |
contains one or more locks
|
|
|
 |
methods lock/unlock the locks
|
|
|
 |
cpu modes
|
|
|
 |
userland - user level - ring 3
|
|
|
 |
privileged instructions
|
|
|
 |
descriptor table (virtual memory)
|
|
|
 |
interrupt table
|
|
|
 |
control registers
|
|
|
 |
memory accesses are restricted by the page table
|
|
|
 |
I/O operations are restricted
|
|
|
 |
return from interrupt / syscall - opcode
|
|
|
 |
supervisor mode - ring 0
|
|
|
 |
enter it through system call (syscall), interrupt
|
|
|
 |
system calls
|
|
|
 |
like function calls, except doesn't jump to a specified address
|
|
|
 |
instead, dispatch via entry point - system call number in register
|
|
|
 |
main memory
|
|
|
 |
virtual memory management
|
|
|
 |
virtual memory pages
|
|
|
 |
map to physical memory frames
|
|
|
 |
page table
|
|
|
 |
page table directories (1 or more levels)
|
|
|
 |
page table
|
|
|
 |
page table entries
|
|
|
 |
phy memory frame number
|
|
|
 |
invalid
|
|
|
 |
accessed
|
|
|
 |
page replacement
|
|
|
 |
page fault
|
|
|
 |
least recent use
|
|
|
 |
tlb - translation lookaside buffer
|
|
|
 |
caches
|
|
|
 |
registers
|
|
|
 |
l1 - small but very fast, 4 kb
|
|
|
 |
l2 - 32-128k, pretty fast, 12 cycle?
|
|
|
 |
l3 - shared between all cpus, larger (1-12megs), 40 cycles
|
|
|
 |
main memory - 30 ns
|
|
|
 |
secondary storage
|
|
|
 |
5-8 ms
|
|
|
 |
memory allocators
|
|
|
 |
slab allocators
|
|
|
 |
in-kernel common data structures
|
|
|
 |
buddy allocator
|
|
|
 |
in-kernel
|
|
|
 |
contiguous sets of pages
|
|
|
 |
malloc-style allocators
|
|
|
 |
userland
|
|
|
 |
allocate memory using sbrk()
|
|
|
 |
freed chunks are added to a list of free elements
|
|
|
 |
secondary storage
|
|
|
 |
file system
|
|
|
 |
allocation mechanism
|
|
|
 |
FAT - file allocation table
|
|
|
 |
inode
|
|
|
 |
indirect node / index node
|
|
|
 |
direct pointers / direct blocks
|
|
|
 |
indirects -> table -> directs
|
|
|
 |
double indir
|
|
|
 |
triple indir
|
|
|
 |
btree-based filesystems
|
|
|
 |
on-disk tree datastructure
|
|
|
 |
btrfs
|
|
|
 |
free space management
|
|
|
 |
extents
|
|
|
 |
clusters of blocks
|
|
|
 |
security
|
|
|
 |
protection
|
|
|
 |
privilege management
|
|
|
 |
resources & users
|
|
|
 |
access control lists
|
|
|
 |
standard unix model
|
|
|
 |
authentication
|
|
|
 |
password
|
|
|
 |
biometrics
|
|
|
 |
encryption / signatures
|
|
|
 |
authenticating and hiding contents
|
|
|
 |
exploits and vulnerabilities
|
|
|
 |
buffer overflow attacks
|
|
|
 |
privilege escalation
|
|
|
 |
SQL injection / shell code injection
|
|
|