Python vs Java (Modified from http://hyperpolyglot.org/)
a side-by-side reference sheet
Indexes: arithmetic and logic | strings | regexes | dates and time | arrays and lists | sets
|
dictionaries | functions | execution control | files | directories | processes and
environment | libraries and modules | objects | reflection | web | tests | debugging and profiling | interop
python (1991) |
java (1995) |
|
2.7; 3.2 |
SE6; SE7 |
|
import os, re, sys |
none |
|
$ python -V |
$ javac --version |
|
$ python foo.py |
$ javac foo.java |
|
$ python |
$ java |
|
$ python -c
"print('hi')" |
$ |
|
newline or ; |
; |
|
offside rule |
{} |
|
assignments can be
chained but otherwise don't return values: |
int v = 1; |
|
x, y, z = 1, 2, 3 |
none |
|
x, y = y, x |
none |
|
compound assignment
operators: arithmetic, string, logical, bit |
#
do not return values: |
+= -= *= /= %= |
none |
none |
|
#
in function body: |
String v = null; int x = 1; int y = 2; |
|
nestable (read
only): |
top level: |
|
g1, g2 = 7, 8 |
int x=1; |
|
#
uppercase identifiers |
#
warning if capitalized |
|
# comment |
#
comment |
|
use triple quote
string literal: |
// |
|
None |
null |
|
v == None |
v == null |
|
raises NameError |
? |
|
not_defined = False |
? |
|
python |
java |
|
True False |
true false |
|
False None 0 0.0 '' [] {} |
false null |
|
and or not |
&& || ! |
|
x if
x > 0 else -x |
x > 0 ? x : -x |
|
comparison operators
are chainable: |
== != > < >= <= |
|
removed from Python
3: |
none |
|
7 + int('12') |
7 + Integer.parseInt("12") |
|
+ - * / // % ** |
+ - * / % |
|
13 // 5 |
int quotient= 13 / 5; |
|
float(13) / 5 |
13.0 / 5 or |
|
from math import sqrt, exp,
log, \ |
import java.lang.Math; |
|
import math |
import java.lang.Math; (long)3.77 |
|
min(1,2,3) |
none |
|
raises ZeroDivisionError |
throws ArithmeticException |
|
becomes arbitrary
length integer of type
long |
? |
|
raises OverflowError |
? |
|
#
raises ValueError: |
throws ArithmeticException |
|
from fractions import
Fraction |
||
z = 1 + 1.414j |
none |
|
import random |
import java.util.Random; Random rand=new Random(); rand.nextInt(100); rand.nextDouble(); |
|
import random |
import java.util.Random; Random rand=new Random(10); rand.setSeed(20); |
|
<< >> & | ^ ~ |
<< >> & | ^ ~ |
|
0b101010 bin() |
0b101010 |
|
int("60",
7) |
Integer. parseInt(42, 8); |
|
python |
java |
|
'don\'t say
"no"' |
"don't say
\"no\"" |
|
triple quote
literals only |
no |
|
single and double
quoted: |
double quoted: |
|
count = 3 |
int count = 3; |
|
none |
none |
|
'lorem %s %d %f' % ('ipsum', 13, 3.7) |
none |
|
none |
none |
|
s = 'Hello, ' |
s = "Hello,
"; |
|
hbar = '-' * 80 |
none |
|
'do re mi fa'.split() |
"do re mi
fa".split("
"); |
|
' '.join(['do', 're', 'mi', 'fa']) |
none |
|
'lorem'.upper() |
"lorem".toUpperCase(); |
|
' lorem '.strip() |
" lorem
".trim() |
|
'lorem'.ljust(10) |
none |
|
len('lorem') |
"lorem".length() |
|
'do re re'.index('re') |
"do re
re".indexOf("re") |
|
'lorem ipsum'[6:11] |
"lorem
ipsum".substring(10,13) |
|
'lorem ipsum'[6] |
"lorem
ipsum".charAt(3) |
|
chr(65) |
char d=(char)100; |
|
from string import lowercase
as ins |
||
python |
java |
|
re.compile('lorem|ipsum') |
||
char class abbrevs: |
||
if re.search('1999', s): |
||
re.search('lorem',
'Lorem', re.I) |
||
re.I re.M re.S re.X |
||
s = 'do re mi mi mi' |
||
m = re.search('\d{4}', s) |
boolean isMatch =
"hello".matches(".*ll.*"); |
|
rx = '(\d{4})-(\d{2})-(\d{2})' |
rx = /(\d{4})-(\d{2})-(\d{2})/ |
|
s = 'dolor sit amet' |
a = "dolor
sit amet".scan(/\w+/) |
|
none |
/(\w+) \1/.match("do do") |
|
none |
none |
|
python |
java |
|
datetime.datetime |
java.util.Date |
|
import datetime |
long millis =
System.currentTimeMillis(); |
|
from datetime import
datetime as dt |
long epoch =
dt.getTimeInMillis()/1000; |
|
import datetime |
epoch = Time.now.to_i |
|
t.strftime('%Y-%m-%d %H:%M:%S') |
DateFormat fmt = new
SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); |
|
line= “106,2007-02-20 00:05:41,121.480600,31.319600, 92, 90,1” day_of_week =
int(time.strftime("%w",
time.strptime("{timestamp}".format(**line), "%Y-%m-%d
%H:%M:%S"))) hour_of_day =
int(time.strftime("%H", time.strptime("{timestamp}".format(**line),
"%Y-%m-%d %H:%M:%S"))) minute =
int(time.strftime("%M",
time.strptime("{timestamp}".format(**line), "%Y-%m-%d
%H:%M:%S"))) sec =
int(time.strftime("%S",
time.strptime("{timestamp}".format(**line), "%Y-%m-%d
%H:%M:%S"))) |
DateFormat fmt = new
SimpleDateFormat(“yyyy-MM-dd HH:mm:ss”); Calendar
cl=Calendar.getNewInstance(); cl.setTime(dt); System.out.println(cl.get(Calendar.HOUR_OF_DAY)); System.out.println(cl.get(Calendar.DAY_OF_WEEK)); System.out.println(cl.get(Calendar.MINUTE)); System.out.println(cl.get(Calendar.SECOND)); System.out.println(cl.get(Calendar.MILLISECOND)); |
|
2011-08-23 19:35:59.411135 |
2011-08-23 17:44:53
-0700 |
|
from datetime import
datetime |
String s =
"2011-05-03 17:00:00"; |
|
#
pip install python-dateutil |
||
datetime.timedelta object |
Float containing time difference in
seconds |
|
import datetime |
require 'date/delta' |
|
a datetime object
has no timezone information unless a tzinfo object is provided when it is
created |
if no timezone is
specified the local timezone is used |
|
import time |
t.zone |
|
t.microsecond |
t.usec |
|
import time |
sleep(0.5) |
|
import signal, time |
require 'timeout' |
|
python |
java |
|
a = [1, 2, 3, 4] |
int[] a = {1, 2, 3, 4}; |
|
arrays must be
allocated on heap int[] a = new int[10]; arraylist must be
allocated on heap List<T> al=new
List<T>(10); ArrayList<T> al=new
ArrayList<T>(anotherAL); |
||
none |
none |
|
len(a) |
a.length; al.size(); |
|
not a |
if(a.length==0) if(al.size()==0) or
if(al.isEmpty()) |
|
a[0] |
a[0] al.get(0) |
|
a[0] = 'lorem' |
a[0] = "lorem"; al.set(0,”new string”); |
|
a = [] |
int[] a ={1,2,3}; |
|
a = ['x', 'y', 'z', 'w'] |
none int idx = al.indexOf(“obj”); |
|
select 3rd and 4th elements: |
none |
|
a[1:] |
none |
|
a = [6,7,8] |
none al.add(newObj);
al.remove(obj); al.remove(idx); |
|
a = [6,7,8] |
none al.add(idx, obj); |
|
a = [1,2,3] |
none al.addAll(anotherAL); al.addAll(idx, anotherAL); |
|
a = [None]
* 10 |
none ArrayList<T> newAL=new
ArrayList<T>(al); |
|
import copy |
shallow
copy ArrayList<T> newAL=new
ArrayList<T>(al); deep
copy ArrayList<T> newAL=new
ArrayList<T>(al); Collections.copy(newAL, al); |
|
parameter contains
address copy |
parameter contains
shallow copy |
|
for i in [1,2,3]: |
none for(T ele: al) System.out.println(ele.toString()); |
|
a = ['do', 're', 'mi', 'fa'] |
for(int
i=0;i<a.length;i++) System.out.println(a[i]); for(int
i=0;i<al.size();i++) System.out.println(al.get(i)); |
|
range replaces xrange in Python 3: |
for(int i=0;i<1000;i++){ code } |
|
a = range(1, 11) |
none |
|
a = [1,2,3] |
none Collections.reverse(al); |
|
a = ['b', 'A', 'a', 'B'] |
none Collections.sort(al,
comparator); |
|
a = [1,2,2,3] |
ArrayList deduped = new
ArrayList(new HashSet(duplication_al)); |
|
python |
java |
|
7 in
a |
al.containst(obj); |
|
{1,2} & {2,3,4} |
Set
intersec=new HashSet(set_1); intersec.retainAll(set_2); |
|
{1,2} | {2,3,4} |
Set
union=new HashSet(set_1); union.addAll(set_2); |
|
{1,2,3} - {2} |
Set
diff=new HashSet(set_1); diff.removeAll(set_2); |
|
from random import shuffle,
sample |
//s
is a set int
rand_idx=rand.nextInt(s.size()); int i=0; for(Object o: s){
if(i==rand_idx){ return o;
}
i+=1; } |
|
map(lambda x: x * x,
[1,2,3]) |
||
filter(lambda x: x > 1,
[1,2,3]) |
||
#
import needed in Python 3 only |
||
all(i%2 == 0 for i in [1,2,3,4]) |
||
from random import shuffle,
sample |
||
#
array of 3 pairs: |
||
python |
java |
|
d = { 't':1, 'f':0 } |
none, has to allocate
first java.util.TreeMap<String,
Integer> m = new java.util.TreeMap<String, Integer>(); |
|
len(d) |
m.size() |
|
d['t'] |
m.get("hello") |
|
d = {} |
|
|
'y' in d |
m.containsKey(key); |
|
d = {1: True, 0: False} |
m.remove(key); |
|
a = [[1,'a'], [2,'b'], [3,'c']] |
a = [[1,"a"],
[2,"b"], [3,"c"]] |
|
d1 = {'a':1, 'b':2} |
m1.putAll(m2); |
|
to_num = {'t':1, 'f':0} |
to_num = {"t"=>1,
"f"=>0} |
|
for k, v in d.iteritems(): |
for (
java.util.Map.Entry<String, Integer> e : m.entrySet() ) { |
|
d.keys() |
m.keySet(); |
|
from collections import
defaultdict |
counts = Hash.new(0) |
|
python |
java |
|
def add(a, b): |
public static int add(int a, int b){
return a+b; } |
|
add(1, 2) |
add(1, 2); |
|
raises TypeError |
throws
IllegalArgumentException |
|
import math |
implemented by
method overloading void my_log(int x){
my_log(x, 2); } void my_log(int x, int base); |
|
def foo(*a): |
public static String concat(String
first, String… rest) { |
|
def fequal(x, y,
**opts): |
none |
|
not possible |
not possible primitive types are
always passed by value |
|
def foo(x, y): |
objects and arrays
are always passed by reference |
|
return arg or None |
return arg or none |
|
def first_and_second(a): |
none |
|
body must be an
expression: |
none |
|
sqr(2) |
none |
|
func = add |
none |
|
#
state not private: |
none |
|
#
Python 3: |
none |
|
def make_counter(): |
none |
|
def logcall(f): |
||
python |
java |
|
if 0 == n: |
if (i>0) { |
|
none |
switch(i) { |
|
while i < 100: |
int i = 0; |
|
none |
int n = 1; |
|
break continue none |
break continue |
|
elif else for if
while |
switch case if
else for do while |
|
raises NameError unless
a value was assigned to it |
do while |
|
none |
none |
|
raise Exception('bad arg') |
throw new
Exception("failed"); |
|
try: |
try { |
|
last exception: sys.exc_info()[1] |
||
class Bam(Exception): |
||
try: |
none |
|
acquire_resource() |
try { |
|
class sleep10(threading.Thread): |
// alternative definition:
class PrimeRun extends Thread class PrimeRun implements Runnable
{ long minPrime; PrimeRun(long minPrime) { this.minPrime = minPrime; } public void run() { // compute primes larger than
minPrime . . . } } PrimeRun p = new PrimeRun(143); new Thread(p).start(); |
|
thr.join() |
new Thread(p).join(); new Thread(p).join(100); |
|
python |
java |
|
print('Hello, World!') |
System.out.print("Hello, World!"); |
|
line = sys.stdin.readline() |
import java.util.Scanner; Scanner sc=new Scanner(System.in); int a=sc.nextInt(); |
|
sys.stdin sys.stdout sys.stderr |
$stdin $stdout
$stderr |
|
f = open('/etc/hosts') |
import java.util.Scanner; Scanner sc=new Scanner(new
File(“/etc/host”)); |
|
f = open('/tmp/test', 'w') |
import java.io.BufferedWriter; BufferedWriter fout = new
BufferedWriter(new FileWriter(fo)); |
|
with open('/tmp/test') as f: |
import java.io.BufferedWriter; |
|
f.close() |
f.close() |
|
f.readline() |
// alternatively, use Scanner
class import java.io.BufferedReader; |
|
for line in f: |
while ((line = in.readLine()) !=
null) { |
|
line = line.rstrip('\r\n') |
String line = in.readLine(); line.replaceAll(”\n”,””); |
|
a = f.readlines() |
String line = in.readLine(); line.replaceAll(”\n”,””); |
|
f.write('lorem
ipsum') |
import java.io.BufferedWriter; |
|
f.flush() |
fout.flush(); |
|
os.path.exists('/etc/hosts') |
File f=new
File("/etc/hosts"); f.exists() f.isFile(); f.isAbsolute(); |
|
import shutil |
File f=new
File("/etc/hosts"); f.delete() f.renameTo(); // copy a file public static void copyFile(File
sourceFile, File destFile) throws IOException { |
|
os.chmod('/tmp/foo',
0755) |
f.setExecutable() f.setReadble(); f.setWritable(); |
|
import tempfile |
File.createTempFile(name, suffix); |
|
from StringIO import
StringIO |
||
python |
java |
|
os.path.join('/etc', 'hosts') |
||
os.path.dirname('/etc/hosts') |
Only works for file File f=new File(“/etc/hosts”); f.getName(); |
|
os.path.abspath('..') |
File f=new
File("/etc/hosts"); String abp=f.getAbsolutePath(); |
|
for filename in os.listdir('/etc'): # alternative for filename in glob.glob('/etc'+ '/*'): print(filename) |
||
dirname = '/tmp/foo/bar' |
File f=new
File("/etc/hosts"); f.mkdir(); |
|
import shutil |
||
os.rmdir('/tmp/foodir') |
f.delete(); |
|
import shutil |
||
os.path.isdir('/tmp') |
f.isDirectory(); |
|
python |
java |
|
len(sys.argv)-1 |
ARGV.size |
|
import argparse |
require 'getoptlong' |
|
os.getenv('HOME') |
ENV["HOME"] |
|
sys.exit(0) |
exit(0) |
|
import signal |
Signal.trap("INT", |
|
os.access('/bin/ls',
os.X_OK) |
File.executable?("/bin/ls") |
|
if os.system('ls -l /tmp'): |
unless system("ls -l /tmp") |
|
import subprocess |
path = gets |
|
import subprocess |
files = `ls -l /tmp` |
|
python |
java |
|
import foo |
require 'foo' # or |
|
reload(foo) |
load 'foo.rb' |
|
sys.path.append('/some/path') |
$: << "/some/path" |
|
PYTHONPATH |
RUBYLIB |
|
none |
-I |
|
if __name__ ==
'__main__': |
if $0 == __FILE__ |
|
put
declarations in foo.py |
class Foo or
module Foo |
|
create
directory foo in library path
containing file bar.py |
module Foo::Bar or |
|
foo.bar.baz() |
Foo::Bar.baz |
|
from foo import * |
include Foo |
|
from foo import
bar, baz |
none |
|
$ virtualenv -p
/usr/bin/python foo |
$ ruby-build 1.9.3-p0 \ |
|
$ pip freeze |
$ gem list |
|
in setup.py: |
in foo.gemspec: |
|
python |
java |
|
class Int: |
class Int |
|
i = Int() |
i = Int.new |
|
v = i.value |
v = i.value |
|
public;
attributes starting with underscore private by convention |
private
by default; use attr_reader, attr_writer,
attr_accessor to make public |
|
def plus(self,v): |
def plus(i) |
|
i.plus(7) |
i.plus(7) |
|
def __del__(self): |
val = i.value |
|
def
__getattr__(self, name): |
def method_missing(name, *a) |
|
class
Counter(Int): |
class Counter < Int |
|
Counter.instances |
Counter.instances |
|
class Fixnum |
||
python |
java |
|
id(o) |
o.object_id |
|
type([]) == list |
[].class == Array |
|
NoneType |
NilClass |
|
o.__class__ == Foo |
o.class == Foo |
|
o.__class__.__bases__ |
o.class.superclass |
|
hasattr(o,
'reverse') |
o.respond_to?("reverse") |
|
for i in
range(1,10): |
(1..9).each do |i| |
|
argument
of eval must be an
expression: |
loop do |
|
[m for m in dir(o) |
o.methods |
|
dir(o) |
o.instance_variables |
|
import pprint |
require 'pp' |
|
import inspect |
__LINE__ |
|
python |
java |
|
import httplib |
require 'net/http' |
|
# Python 3
location: urllib.parse |
require 'cgi' |
|
import base64 |
require 'base64' |
|
import json |
Ruby
1.8: sudo gem install json |
|
import
xml.etree.ElementTree as ET |
# gem install builder |
|
from xml.etree
import ElementTree |
require 'rexml/document' |
|
from xml.etree
import ElementTree |
require 'rexml/document' |
|
python |
java |
|
import unittest |
require 'test/unit' |
|
$ python
foo_test.py |
$ ruby foo_test.rb |
|
s = 'do re me' |
s = "do re me" |
|
s = 'lorem ipsum' |
s = "lorem ipsum" |
|
a = [] |
assert_raises(ZeroDivisionError)
do |
|
# in class
FooTest: |
# in class FooTest: |
|
# in class
FooTest: |
# in class FooTest: |
|
python |
java |
|
import py_compile |
$ ruby -c foo.rb |
|
$ python -t foo.py |
$ ruby -w foo.rb |
|
$ sudo pip install
pylint |
||
$ python -m pdb
foo.py |
$ sudo gem install ruby-debug |
|
h l n s b c w u d
p q |
h l n s b c w u down p q |
|
import timeit |
require 'benchmark' |
|
$ python -m
cProfile foo.py |
$ sudo gem install ruby-prof |
|
python |
java |
|
Jython
2.5 |
JRuby
1.4 |
|
$ jython |
$ jirb |
|
$ jython |
$ jruby |
|
none
in 2.5.1 |
$ jrubyc |
|
import java |
none |
|
rnd =
java.util.Random() |
rnd = java.util.Random.new |
|
rnd.nextFloat() |
rnd.next_float |
|
from java.util
import Random |
java_import java.util.Random |
|
import sys |
require 'path/to/mycode.jar' |
|
import java.io as
javaio |
module JavaIO |
|
import jarray |
[1,2,3].to_java(Java::int) |
|
yes |
yes |
|
no |
yes |
The versions used for testing code in the reference
sheet.
Code which examples in the sheet assume to have already
been executed.
python:
To keep the examples short we assume that os, re, and sys are always imported.
How to get the version.
python:
The following function will return the version number as
a string:
import platform
platform.python_version()
The customary name of the interpreter and how to invoke it.
The customary name of the repl.
python:
The python repl saves the result of the last statement in _.
How to pass the code to be executed to the interpreter as a command line
argument.
How the parser determines the end of a statement.
python:
Newline does not terminate a statement when:
Python single quote '' and double quote "" strings cannot contain
newlines except as the two character escaped form \n. Putting a newline in
these strings results in a syntax error. There is however a multi-line string
literal which starts and ends with three single quotes ''' or three double
quotes: """.
A newline that would normally terminate a statement can be escaped with a
backslash.
How blocks are delimited.
python:
Python blocks begin with a line that ends in a colon. The block ends with
the first line that is not indented further than the initial line. Python
raises an IndentationError if the statements in the block that are not in a
nested block are not all indented the same. Using tabs in Python source code is
unrecommended and many editors replace them automatically with spaces. If the
Python interpreter encounters a tab, it is treated as 8 spaces.
The python repl switches from a >>>
prompt to a … prompt inside a block. A blank line terminates the block.
java:
How to assign a value to a variable.
python:
If the variable on the left has not previously been defined in the current
scope, then it is created. This may hide a variable in a containing scope.
Assignment does not return a value and cannot be used in an expression.
Thus, assignment cannot be used in a conditional test, removing the possibility
of using assignment (=) in place of an equality test (==). Assignments can
nevertheless be chained to assign a value to multiple variables:
a = b = 3
java:
Assignment operators have right precedence and evaluate to the right
argument, so they can be chained. If the variable on the left does not exist,
then it is created.
How to assign values to variables in parallel.
python:
The r-value can be a list or tuple:
nums = [1,2,3]
a,b,c = nums
more_nums = (6,7,8)
d,e,f = more_nums
Nested sequences of expression can be assigned to a nested sequences of l-values,
provided the nesting matches. This assignment will set a to 1, b to 2, and c to
3:
(a,[b,c]) = [1,(2,3)]
This assignment will raise a TypeError:
(a,(b,c)) = ((1,2),3)
In Python 3 the splat operator * can be used to
collect the remaining right side elements in a list:
x, y, *z = 1, 2 # assigns [] to z
x, y, *z = 1, 2, 3 # assigns [3] to z
x, y, *z = 1, 2, 3, 4 # assigns [3, 4] to z
java:
The r-value can be an array:
nums = [1,2,3]
a,b,c = nums
How to swap the values held by two variables.
Compound assignment operators mutate a variable, setting it to the value of
an operation which takes the value of the variable as an argument.
First row: arithmetic operator assignment:
addition, subtraction, multiplication, (float) division, integer division,
modulus, and exponentiation.
Second row:
string concatenation assignment and string replication assignment
Third row:
logical operator assignment: and, or, xor
Fourth row:
bit operator assignment: left shift, right shift, and, or, xor.
python:
Python compound assignment operators do not return a value and hence cannot
be used in expressions.
The C-style increment and decrement operators can be used to increment or
decrement values. They return values and thus can be used in expressions. The
prefix versions return the value in the variable after mutation, and the
postfix version return the value before mutation.
Incrementing a value two or more times in an expression makes the order of
evaluation significant:
x = 1;
foo(++x, ++x); // foo(2, 3) or foo(3, 2)?
x = 1;
y = ++x/++x; // y = 2/3 or y = 3/2?
Python avoids the problem by not having an in-expression increment or
decrement.
Ruby mostly avoids the problem by providing a non-mutating increment and
decrement. However, here is a Ruby expression which is dependent on order of
evaluation:
x = 1
y = (x += 1)/(x += 1)
java:
The Integer class defines succ,
pred, and next, which is a synonym
for succ.
The String class defines succ,
succ!, next, and next!. succ! and next! mutate the string.
How to declare variables which are local to the scope defining region which
immediately contain them.
python:
A variable is created by assignment if one does not already exist. If the
variable is inside a function or method, then its scope is the body of the
function or method. Otherwise it is a global.
A list of regions which define a scope for the local variables they
contain.
Local variables defined inside the region are only in scope while code
within the region is executing. If the language does not have closures, then
code outside the region has no access to local variables defined inside the
region. If the language does have closures, then code inside the region can
make local variables accessible to code outside the region by returning a
reference.
A region which is top
level hides local variables in the scope which contains it from the
code it contains. A region can also be top level if the syntax requirements of
the language prohibit it from being placed inside another scope defining
region.
A region is nestable
if it can be placed inside another scope defining region, and if code in the
inner region can access local variables in the outer region.
python:
Only functions and methods define scope. Function definitions can be
nested. When this is done, inner scopes have read access to variables defined
in outer scopes. Attempting to write (i.e. assign) to a variable defined in an
outer scope will instead result in a variable getting created in the inner
scope. Python trivia question: what would happen if the following code were
executed?
def foo():
v = 1
def bar():
print(v)
v = 2
print(v)
bar()
foo()
x = 3
id = lambda { |x| x }
id.call(7)
puts x # 1.8 prints 7; 1.9 prints 3
How to declare and access a variable with global scope.
python:
A variable is global if it is defined at the top level of a file (i.e.
outside any function definition). Although the variable is global, it must be
imported individually or be prefixed with the module name prefix to be accessed
from another file. To be accessed from inside a function or method it must be
declared with the global
keyword.
ruby:
A variable is global if it starts with a dollar sign: $.
How to declare a constant.
How to create a comment that ends at the next newline.
How to comment out multiple lines.
python:
The triple single quote ''' and triple double quote """
syntax is a syntax for string literals.
The null literal.
How to test if a variable contains null.
The result of attempting to access an undefined variable.
Literals for the booleans.
These are the return values of the comparison operators.
Values which behave like the false boolean in a conditional context.
Examples of conditional contexts are the conditional clause of an if statement and the test of a while loop.
python:
Whether a object evaluates to True or False in a boolean context can be
customized by implementing a __nonzero__
(Python 2) or __bool__ (Python 3)
instance method for the class.
Logical and, or, and not.
How to write a conditional expression. A ternary operator is an operator
which takes three arguments. Since
condition ? true value : false value
is the only ternary operator in C, it is unambiguous to refer to it as the ternary operator.
python:
The Python conditional expression comes from Algol.
Equality, inequality, greater than, less than, greater than or equal, less
than or equal.
Also known as the relational operators.
python:
Comparison operators can be chained. The following expressions evaluate to
true:
1 < 2 < 3
1 == 1 != 2
In general if Ai
are expressions and opi
are comparison operators, then
A1
op1 A2 op2 A3 … An opn
An+1
is true if and only if each of the following is true
A1
op1 A2
A2 op2 A3
…
An opn An+1
Binary comparison operators which return -1, 0, or 1 depending upon whether
the left argument is less than, equal to, or greater than the right argument.
The <=> symbol is called the spaceship
operator.
How to convert string data to numeric data and vice versa.
python:
float and int raise an error if called on a string and any part of the string
is not numeric.
The operators for addition, subtraction, multiplication, float division,
integer division, modulus, and exponentiation.
How to get the integer quotient of two integers. How to get the integer
quotient and remainder.
How to perform floating point division, even if the operands might be
integers.
Some arithmetic functions. Trigonometric functions are in radians unless
otherwise noted. Logarithms are natural unless otherwise noted.
python:
Python also has math.log10.
To compute the log of x
for base b,
use:
math.log(x)/math.log(b)
How to truncate a float to the nearest integer towards zero; how to round a
float to the nearest integer; how to find the nearest integer above a float; how
to find the nearest integer below a float; how to take the absolute value.
How to get the min and max.
What happens when division by zero is performed.
What happens when the largest representable integer is exceeded.
What happens when the largest representable float is exceeded.
The result of taking the square root of negative two.
How to create rational numbers and get the numerator and denominator.
python:
Most of the functions in math
have analogues in cmath
which will work correctly on complex numbers.
How to generate a random integer between 0 and 99, include, float between
zero and one in a uniform distribution, or a float in a normal distribution
with mean zero and standard deviation one.
How to set the random seed; how to get the current random seed and later
restore it.
All the languages in the sheet set the seed automatically to a value that
is difficult to predict. The Ruby 1.9 MRI interpreter uses the current time and
process ID, for example. As a result there is usually no need to set the seed.
Setting the seed to a hardcoded value yields a random but repeatable
sequence of numbers. This can be used to ensure that unit tests which cover
code using random numbers doesn't intermittently fail.
The seed is global state. If multiple functions are generating random
numbers then saving and restoring the seed may be necessary to produce a
repeatable sequence.
The bit operators for left shift, right shift, and, inclusive or, exclusive
or, and negation.
Binary, octal, and hex integer literals
How to convert integers to strings of digits of a given base. How to convert
such strings into integers.
python
Python has the functions bin, oct, and hex which take an integer and return a string encoding the
integer in base 2, 8, and 16.
bin(42)
oct(42)
hex(42)
The syntax for string literals.
Whether newlines are permitted in string literals.
python:
Newlines are not permitted in single quote and double quote string literals.
A string can continue onto the following line if the last character on the line
is a backslash. In this case, neither the backslash nor the newline are taken
to be part of the string.
Triple quote literals, which are string literals terminated by three single
quotes or three double quotes, can contain newlines:
'''This is
two lines'''
"""This is also
two lines"""
Backslash escape sequences for inserting special characters into string
literals.
unrecognized backslash escape sequence |
||
double quote |
single quote |
|
Python |
preserve backslash |
preserve backslash |
python:
When string literals have an r or R prefix there are no backslash escape sequences and any backslashes thus
appear in the created string. The delimiter can be inserted into a string if it
is preceded by a backslash, but the backslash is also inserted. It is thus not
possible to create a string with an r or R prefix that ends in a backslash. The r and R prefixes can be used with single or double quotes:
r'C:\Documents and Settings\Admin'
r"C:\Windows\System32"
The \uhhhh
escapes are also available inside Python 2 Unicode literals. Unicode literals
have a u
prefiix:
u'lambda: \u03bb'
How to interpolate variables into strings.
python:
str.format will take named or positional parameters. When used with named parameters str.format can mimic the variable interpolation feature of the other languages.
A selection of variables in scope can be passed explicitly:
count = 3
item = 'ball'
print('{count} {item}s'.format(
count=count,
item=item))
Python 3 has format_map which accepts a dict as an argument:
count = 3
item = 'ball'
print('{count} {item}s'.format_map(locals()))
How to specify custom delimiters for single and double quoted strings.
These can be used to avoid backslash escaping. If the left delimiter is (, [,
or { the right delimiter must be ), ], or }, respectively.
How to create a string using a printf style format.
python:
The % operator will interpolate arguments into printf-style format strings.
The str.format with positional parameters
provides an alternative format using curly braces {0}, {1}, … for replacement
fields.
The curly braces are escaped by doubling:
'to insert parameter {0} into a format, use {{{0}}}'.format(3)
If the replacement fields appear in sequential order and aren't repeated,
the numbers can be omitted:
'lorem {} {} {}'.format('ipsum', 13, 3.7)
Here documents are strings terminated by a custom identifier. They perform variable
substitution and honor the same backslash escapes as double quoted strings.
python:
Python lacks variable interpolation in strings. Triple quotes honor the
same backslash escape sequences as regular quotes, so triple quotes can
otherwise be used like here documents:
s = '''here document
there computer
'''
The string concatenation operator.
The string replication operator.
How to split a string containing a separator into an array of substrings;
how to split a string in two; how to split a string with the delimiters
preserved as separate elements; how to split a string into an array of single
character strings.
python:
str.split() takes simple strings as delimiters; use re.split() to split on a regular expression:
re.split('\s+', 'do re mi fa')
re.split('\s+', 'do re mi fa', 1)
How to concatenate the elements of an array into a string with a separator.
How to put a string into all caps or all lower case letters. How to
capitalize the first letter of a string.
How to remove whitespace from the ends of a string.
How to pad the edge of a string with spaces so that it is a prescribed
length.
How to get the length in characters of a string.
How to find the index of the leftmost occurrence of a substring in a
string; how to find the index of the rightmost occurrence.
python:
Methods for splitting a string into three parts using the first or last
occurrence of a substring:
'do re re mi'.partition('re') # returns ('do ', 're', ' re mi')
'do re re mi'.rpartition('re') # returns ('do re ', 're', ' mi')
How to extract a substring from a string by index.
How to extract a character from a string by its index.
Converting characters to ASCII codes and back.
The languages in this reference sheet do not have character literals, so
characters are represented by strings of length one.
How to apply a character mapping to a string.
Regular expressions or regexes are a way of specifying sets of strings. If
a string belongs to the set, the string and regex "match". Regexes
can also be used to parse strings.
The modern notation for regexes was introduced by Unix command line tools
in the 1970s. POSIX standardized the notation into two types: extended regexes
and the more archaic basic regexes. Perl regexes are extended regexes augmented
by new character class abbreviations and a few other features introduced by the
Perl interpreter in the 1990s. All the languages in this sheet use Perl
regexes.
Any string that doesn't contain regex metacharacters is a regex which
matches itself. The regex metacharacters are: [ ] . | ( ) * + ? { } ^ $ \
character classes: [ ] .
A character class is a set of characters in brackets: [ ]. When used in a
regex it matches any character it contains.
Character classes have their own set of metacharacters: ^ - \ ]
The ^ is only special when it the first character in the character class.
Such a character class matches its complement; that is, any character not
inside the brackets. When not the first character the ^ refers to itself.
The hypen is used to specify character ranges: e.g. 0-9 or A-Z. When the
hyphen is first or last inside the brackets it matches itself.
The backslash can be used to escape the above characters or the terminal
character class delimiter: ]. It can be used in character class abbreviations
or string backslash escapes.
The period . is a character class abbreviation which matches any character
except for newline. In all languages the period can be made to match all
characters. In PHP and Perl use the m modifer. In Python
use the re.M flag. In Ruby use the s modifer.
character class
abbreviations:
abbrev |
name |
character class |
\d |
digit |
[0-9] |
\D |
nondigit |
[^0-9] |
\h |
PHP, Perl: horizontal whitespace character |
PHP, Perl: [ \t] |
\H |
PHP, Perl: not a horizontal whitespace character |
PHP, Perl: [^ \t] |
\s |
whitespace character |
[ \t\r\n\f] |
\S |
non whitespace character |
[^ \t\r\n\f] |
\v |
vertical whitespace character |
[\r\n\f] |
\V |
not a vertical whitespace
character |
[^\r\n\f] |
\w |
word character |
[A-Za-z0-9_] |
\W |
non word character |
[^A-Za-z0-9_] |
alternation and grouping: | ( )
The vertical pipe | is used for alternation and parens () for grouping.
A vertical pipe takes as its arguments everything up to the next vertical pipe,
enclosing paren, or end of string.
Parentheses control the scope of alternation and the quantifiers described
below. The are also used for capturing groups, which are the substrings which
matched parenthesized parts of the regular expression. Each language numbers
the groups and provides a mechanism for extracting when a match is made. A
parenthesized subexpression can be removed from the groups with this syntax: (?:expr)
quantifiers: * + ? { }
As an argument quantifiers take the preceding regular character, character
class, or group. The argument can itself be quantified, so that ^a{4}*$ matches strings with the letter a in multiples of 4.
quantifier |
# of occurrences of argument matched |
* |
zero or more, greedy |
+ |
one or more, greedy |
? |
zero or one, greedy |
{m,n} |
m to n,
greedy |
{n} |
exactly n |
{m,} |
m or more, greedy |
{,n} |
zero to n, greedy |
*? |
zero or more, lazy |
+? |
one or more, lazy |
{m,n}? |
m to n,
lazy |
{m,}? |
m or more, lazy |
{,n}? |
zero to n, lazy |
When there is a choice, greedy quantifiers will match the maximum possible
number of occurrences of the argument. Lazy quantifiers match the minimum
possible number.
anchor |
matches |
^ |
beginning of a string. In Ruby or
when m
modifier is used also matches right side of a newline |
$ |
end of a string. In Ruby or when m modifier is used
also matches left side of a newline |
\A |
beginning of the string |
\b |
word boundary. In between a \w and
a \W character or in between a \w character and the edge of the string |
\B |
not a word boundary. In between
two \w characters or two \W characters |
\z |
end of the string |
\Z |
end of the string unless it is a newline,
in which case it matches the left side of the terminal newline |
escaping: \
To match a metacharacter, put a backslash in front of it. To match a
backslash use two backslashes.
The literal for a regular expression; the literal for a regular expression
with a custom delimiter.
python:
Python does not have a regex literal, but the re.compile function can be used to create regex objects.
Compiling regexes can always be avoided:
re.compile('\d{4}').search('1999')
re.search('\d{4}', '1999')
re.compile('foo').sub('bar', 'foo bar')
re.sub('foo', 'bar', 'foo bar')
re.compile('\w+').findall('do re me')
re.findall('\w+', 'do re me')
The supported character class
abbreviations and anchors.
Note that \h refers to horizontal whitespace (i.e. a space or tab) in
PHP and Perl and a hex digit in Ruby. Similarly \H refers to something that isn't horizontal whitespace in PHP and Perl and
isn't a hex digit in Ruby.
How to test whether a string matches a regular expression.
python:
The re.match function is like the re.search function, except that it is only true if the regular expression matches
the entire string.
How to perform a case insensitive match test.
Modifiers that can be used to adjust the behavior of a regular expression.
The lists are not comprehensive. For all languages except Ruby there are
additional modifiers.
modifier |
behavior |
e |
PHP: when used with preg_replace, the replacement string,
after backreferences are substituted, is eval'ed as PHP code and the result
is used as the replacement. |
i, re.I |
all: ignores case. Upper case letters match lower case letters
and vice versa. |
m, re.M |
PHP, Perl, Python: makes the ^ and $ match the right and left edge of
newlines in addition to the beginning and end of the string. |
o |
Ruby: performs variable interpolation #{ } only once per execution
of the program. |
p |
Perl: sets ${^MATCH} ${^PREMATCH} and ${^POSTMATCH} |
s, re.S |
PHP, Perl, Python: makes the period . match newline characters. |
x, re.X |
all: ignores whitespace in the regex which permits it to be
used for formatting. |
Python modifiers are bit flags. To use more than one flag at the same time,
join them with bit or: |
How to replace all occurrences of a matching pattern in a string with the
provided substitution string.
python:
The 3rd argument to sub controls the number
of occurrences which are replaced.
s = 'foo bar bar'
re.compile('bar').sub('baz', s, 1)
If there is no 3rd argument, all occurrences are replaced.
How to get the substring that matched the regular expression, as well as
the part of the string before and after the matching substring.
How to get the substrings which matched the parenthesized parts of a
regular expression.
How to return all non-overlapping substrings which match a regular
expression as an array.
How to use backreferences in a regex; how to use backreferences in the
replacement string of substitution.
Examples of recursive regexes.
The examples match substrings containing balanced parens.
In ISO 8601 terminology, a date
specifies a day in the Gregorian calendar and a time does not contain date information; it
merely specifies a time of day. A data type which combines both date and time information
is probably more useful than one which contains just date information or just
time information; it is unfortunate that ISO 8601 doesn't provide a name for
this entity. The word timestamp
often gets used to denote a combined date and time. PHP and Python use the
compound noun datetime
for combined date and time values.
An useful property of ISO
8601 dates, times, and date/time combinations is that they are correctly ordered
by a lexical sort on their string representations. This is because they are
big-endian (the year is the leftmost element) and they used fixed-length fields
for each term in the string representation.
The C standard library provides two methods for representing dates. The
first is the UNIX epoch, which is the seconds since January 1, 1970 in UTC. If
such a time were stored in a 32-bit signed integer, the rollover would happen
on January 18, 2038.
The other method of representing dates is the tm struct, a definition of which can be found on Unix systems in /usr/include/time.h:
struct tm {
int tm_sec; /* seconds after the minute [0-60] */
int tm_min; /* minutes after the hour [0-59] */
int tm_hour; /* hours since midnight [0-23] */
int tm_mday; /* day of the month [1-31] */
int tm_mon; /* months since January [0-11] */
int tm_year; /* years since 1900 */
int tm_wday; /* days since Sunday [0-6] */
int tm_yday; /* days since January 1 [0-365] */
int tm_isdst; /* Daylight Savings Time flag */
long tm_gmtoff; /* offset from CUT in seconds */
char *tm_zone; /* timezone abbreviation */
};
Python uses and expose the tm struct of the
standard library. In the case of Perl, the first nine values of the struct (up
to the member tm_isdst) are put into an array. Python, meanwhile, has a module
called time which is a thin wrapper to the standard library functions which operate on
this struct. Here is how get a tm struct in Python:
import time
utc = time.gmtime(time.time())
t = time.localtime(time.time())
The tm struct is a low level entity, and interacting with it directly
should be avoided. In the case of Python it is usually sufficient to use the datetime module instead. For Perl, one can use the Time::Piece module to wrap the tm struct in an object.
The data type used to hold a combined date and time.
How to get the combined date and time for the present moment in both local
time and UTC.
How to convert the native date/time type to the Unix epoch which is the
number of seconds since the start of January 1, 1970 UTC.
How to get the current time as a Unix epoch timestamp.
How to format a date/time as a string using the format notation of the strftime function from
the standard C library. This same format notation is used by the Unix date command.
Given a date object, how to get fields such as HOUR_OF_DAY, DAY_OF_WEEK,
SECOND, MINUTE, etc.
java:
Java’s util.Calendar has some counterintuitive use cases. For the DAY_OF_WEEK field,
this field “takes values SUNDAY, MONDAY, TUESDAY, WEDNESDAY, THURSDAY, FRIDAY,
and SATURDAY." When your program prints 6, it's telling you that the day
of week is FRIDAY. This constant value has nothing to do with the
beginning of the week - it's just a constant that means FRIDAY. It does
coincidentally happen to be the same as the day of the week if the first day of
the week is SUNDAY (1) - but it doesn't change if the first day of the week is
redefined. Compare this to the API for WEEK_OF_MONTH and WEEK_OF_YEAR,
which do say that they depend on the first day of the
week. You can test to see if this works correctly for your purposes.
If you really need a number representing day of week with 1 meaning Monday and
7 meaning Sunday, you can get it with a workaround.
int dayOfWeek = myCalendar.get(Calendar.DAY_OF_WEEK) - 1;
if (dayOfWeek == 0)
dayOfWeek = 7;
Examples of how a date/time object appears when treated as a string such as
when it is printed to standard out.
The formats are in all likelihood locale dependent. The provided examples
come from a machine running Mac OS X in the Pacific time zone of the USA.
How to parse a date/time using the format notation of the strptime function from the standard C library.
How to parse a date without providing a format string.
The data type that results when subtraction is performed on two combined
date and time values.
How to add a time duration to a date/time.
A time duration can easily be added to a date/time value when the value is
a Unix epoch value.
ISO 8601 distinguishes between a time interval, which is defined by two
date/time endpoints, and a duration, which is the length of a time interval and
can be defined by a unit of time such as '10 minutes'. A time interval can also
be defined by date and time representing the start of the interval and a
duration.
ISO 8601 defines notation
for durations. This notation starts with a 'P' and uses a 'T' to separate
the day and larger units from the hour and smaller units. Observing the
location relative to the 'T' is important for interpreting the letter 'M',
which is used for both months and minutes.
Do date/time values include timezone information. When a date/time value
for the local time is created, how the local timezone is determined.
A date/time value can represent a local time but not have any timezone
information associated with it.
On Unix systems processes determine the local timezone by inspecting the
file /etc/localtime.
How to get time zone information: the name of the timezone, the offset in
hours from UTC, and whether the timezone is currently in daylight savings.
Timezones are often identified by three or
four letter abbreviations. As can be seen from the list, many of the
abbreviations do not uniquely identify a timezone. Furthermore many of the
timezones have been altered in the past. The Olson database (aka Tz
database) decomposes the world into zones in which the local clocks have all
been set to the same time since 1970 and it gives these zones unique names.
How to get the microseconds component of a combined date and time value. The
SI abbreviations for milliseconds and microseconds are ms and μs, respectively. The C standard library uses the letter u as an abbreviation for micro. Here is a struct
defined in /usr/include/sys/time.h:
struct timeval {
time_t tv_sec; /* seconds since Jan. 1, 1970 */
suseconds_t tv_usec; /* and microseconds */
};
How to put the process to sleep for a specified number of seconds. In
Python and Ruby the default version of sleep supports a fractional number of seconds.
How to cause a process to timeout if it takes too long.
Techniques relying on SIGALRM only work on Unix systems.
What the languages call their basic container types:
python |
java |
|
list, tuple, sequence |
array, List |
|
dict, mapping |
Map |
python:
Python has the mutable list
and the immutable tuple.
Both are sequences.
To be a sequence,
a class must implement __getitem__, __setitem__, __delitem__,
__len__, __contains__, __iter__,
__add__, __mul__, __radd__,
and __rmul__.
java:
Java provides powerful generics containers for list, set, dictionary, etc.
Array literal syntax.
The quote words operator, which is a literal for arrays of strings where
each string contains a single word.
How to get the number of elements in an array.
How to test whether an array is empty.
How to access a value in an array by index.
python:
A negative index refers to the length
- index element.
>>> a = [1,2,3]
>>> a[-1]
3
How to update the value at an index.
What happens when the value at an out-of-bounds index is refererenced.
Some techniques for
getting the index of an array element.
How to slice a subarray from an array by specifying a start index and an
end index; how to slice a subarray from an array by specifying an offset index
and a length index.
python:
Slices can leave the first or last index unspecified, in which case the
first or last index of the sequence is used:
>>> a=[1,2,3,4,5]
>>> a[:3]
[1, 2, 3]
Python has notation for taking every nth element:
>>> a=[1,2,3,4,5]
>>> a[::2]
[1, 3, 5]
The third argument in the colon-delimited slice argument can be negative,
which reverses the order of the result:
>>> a = [1,2,3,4]
>>> a[::-1]
[4, 3, 2, 1]
How to slice to the end of an array.
The examples take all but the first element of the array.
How to add and remove elements from the back or high index end of an array.
These operations can be used to use the array as a stack.
How to add and remove elements from the front or low index end of an array.
These operations can be used to use the array as a stack. They can be used
with the operations that manipulate the back of the array to use the array as a
queue.
How to create an array by concatenating two arrays; how to modify an array
by concatenating another array to the end of it.
How to create an array containing the same value replicated n times.
How to make an address copy, a shallow copy, and a deep copy of an array.
After an address copy is made, modifications to the copy also modify the
original array.
After a shallow copy is made, the addition, removal, or replacement of
elements in the copy does not modify of the original array. However, if
elements in the copy are modified, those elements are also modified in the
original array.
A deep copy is a recursive copy. The original array is copied and a deep
copy is performed on all elements of the array. No change to the contents of
the copy will modify the contents of the original array.
python:
The slice operator can be used to make a shallow copy:
a2 = a[:]
list(v) always returns a list, but v[:] returns a value of
the same as v. The slice operator can be used in this manner on strings
and tuples but there is little incentive to do so since both are immutable.
copy.copy can be used to make a shallow copy on types that don't support the slice
operator such as a dictionary. Like the slice operator copy.copy returns a value with the same type as the argument.
How arrays are passed as arguments.
How to iterate through the elements of an array.
How to iterate through the elements of an array while keeping track of the
index of each element.
Iterate over a range without instantiating it as a list.
How to convert a range to an array.
Python 3 ranges and Ruby ranges implement some of the functionality of
arrays without allocating space to hold all the elements.
python:
In Python 2 range() returns a list.
In Python 3 range() returns an object which implements the immutable
sequence API.
How to create a reversed copy of an array, and how to reverse an array in
place.
python:
reversed returns an iterator which can be used in a for/in construct:
print("counting down:")
for i in reversed([1,2,3]):
print(i)
reversed can be used to create a reversed list:
a = list(reversed([1,2,3]))
How to create a sorted copy of an array, and how to sort an array in place.
Also, how to set the comparison function when sorting.
How to remove extra occurrences of elements from an array.
python:
Python sets support the len, in, and for operators. It may be more efficient to work with the
result of the set constructor directly rather than convert it back to a list.
What the languages call their basic container types:
How to test for membership in an array.
How to compute an intersection.
python:
Python has literal notation for sets:
{1,2,3}
Use set and list to convert lists to sets
and vice versa:
a = list({1,2,3})
ensemble = set([1,2,3])
How to compute the relative complement of two arrays or sets; how to
compute the symmetric difference.
How to pick a random element from a set.
Create an array by applying a function to each element of a source array.
Create an array containing the elements of a source array which match a
predicate.
Return the result of applying a binary operator to all the elements of the
array.
python:
reduce is not needed to sum a list of numbers:
sum([1,2,3])
How to test whether a condition holds for all members of an array; how to
test whether a condition holds for at least one member of any array.
A universal test is always true for an empty array. An existential test is
always false for an empty array.
A existential test can readily be implemented with a filter. A universal
test can also be implemented with a filter, but it is more work: one must set
the condition of the filter to the negation of the predicate and test whether
the result is empty.
How to shuffle an array. How to extract a random sample from an array.
How to interleave arrays. In the case of two arrays the result is an array
of pairs or an associative list.
java:
java does not have language level support for this function. To implement it in
Java, it can be done as follows:
1. Define a Pair class
2. Create a list of Pair Objects.
2. Loop through the two input lists and zip them to the list of pairs.
How to get the number of dictionary keys in a dictionary.
How to lookup a dictionary value using a dictionary key.
What happens when a lookup is performed on a key that is not in a
dictionary.
python:
Use dict.get() to avoid handling KeyError exceptions:
d = {}
d.get('lorem') # returns None
d.get('lorem', '') # returns ''
How to check for the presence of a key in a dictionary without raising an
exception. Distinguishes from the case where the key is present but mapped to
null or a value which evaluates to false.
How to remove a key/value pair from a dictionary.
How to create a dictionary from an array of pairs; how to create a
dictionary from an even length array.
How to merge the values of two dictionaries.
In the examples, if the dictionaries d1 and d2 share keys then the values from d2 will be used in the merged dictionary.
How to turn a dictionary into its inverse. If a key 'foo' is mapped to
value 'bar' by a dictionary, then its inverse will map the key 'bar' to the
value 'foo'. However, if multiple keys are mapped to the same value in the
original dictionary, then some of the keys will be discarded in the inverse.
How to iterate through the key/value pairs in a dictionary.
python:
In Python 2.7 dict.items() returns a list of pairs and dict.iteritems() returns an iterator on the list of pairs.
In Python 3 dict.items() returns an iterator and dict.iteritems() has been removed.
How to convert the keys of a dictionary to an array; how to convert the
values of a dictionary to an array.
python:
In Python 3 dict.keys() and dict.values() return read-only views into the dict. The following code illustrates the
change in behavior:
d = {}
keys = d.keys()
d['foo'] = 'bar'
if 'foo' in keys:
print('running Python 3')
else:
print('running Python 2')
How to create a dictionary with a default value for missing keys; how to
compute and store the value on lookup.
Python has both functions and methods. Ruby only has methods: functions
defined at the top level are in fact methods on a special main object. Perl
subroutines can be invoked with a function syntax or a method syntax.
How to define a function.
How to invoke a function.
python:
When invoking methods and functions, parens are mandatory, even for
functions which take no arguments. Omitting the parens returns the function or
method as an object. Whitespace can occur between the function name and the
following left paren.
Starting with 3.0, print is treated as a function instead of a keyword.
Thus parens are mandatory around the print argument.
How incorrect number of arguments upon invocation are handled.
python:
TypeError is raised if the number of
arguments is incorrect.
How to declare a default value for an argument.
How to write a function which accepts a variable number of argument.
python:
This function accepts one or more arguments. Invoking it without any
arguments raises a TypeError:
def poker(dealer, *players):
...
How to write a function which uses named parameters and how to invoke it.
python:
In a function definition, the splat operator * collects the remaining
arguments into a list. In a function invocation, the splat can be used to
expand an array into separate arguments.
In a function definition the double splat operator ** collects named
parameters into a dictionary. In a function invocation, the double splat
expands a dictionary into named parameters.
In Python 3 named parameters can be made mandatory:
def fequal(x, y, *, eps):
return abs(x-y) < eps
fequal(1.0, 1.001, eps=0.01) # True
fequal(1.0, 1.001) # raises TypeError
How to pass numbers or strings by reference.
The three common methods of parameter passing are pass by value, pass by reference, and pass by address. Pass by
value is the default in most languages.
When a parameter is passed by reference, the callee can changed the value
in the variable that was provided as a parameter, and the caller will see the
new value when the callee returns. When the parameter is passed by value the
callee cannot do this.
When a language has mutable data types it can be unclear whether the
language is using pass by value or pass by reference.
How to pass an array or dictionary without making a copy of it.
How the return value of a function is determined.
How to return multiple values from a function.
How to define and invoke a lambda function.
python:
Python lambdas cannot contain newlines or semicolons, and thus are limited
to a single statement or expression. Unlike named functions, the value of the
last statement or expression is returned, and a return is not necessary or permitted.
Lambdas are closures and can refer to local variables in scope, even if they
are returned from that scope.
If a closure function is needed that contains more than one statement, use
a nested function:
def make_nest(x):
b = 37
def nest(y):
c = x*y
c *= b
return c
return nest
n = make_nest(12*2)
print(n(23))
Python closures are read only.
A nested function can be returned and hence be invoked outside of its
containing function, but it is not visible by its name outside of its
containing function.
How to store a function in a variable.
python:
Python function are stored in variables by default. As a result a function
and a variable with the same name cannot share the same scope. This is also the
reason parens are mandatory when invoking Python functions.
How to create a function with private state which persists between function
invocations.
python:
Here is a technique for creating private state which exploits the fact that
the expression for a default value is evaluated only once:
def counter(_state=[0]):
_state[0] += 1
return _state[0]
print(counter())
How to create a first class function with access to the local variables of
the local scope in which it was created.
python:
Python 2 has limited closures: access to local variables in the containing
scope is read only and the bodies of anonymous functions must consist of a
single expression.
Python 3 permits write access to local variables outside the immediate
scope when declared with nonlocal.
How to create a function which can yield a value back to its caller and
suspend execution.
python:
Python generators can be used in for/in
statements and list comprehensions.
The if statement.
The switch statement.
How to write a C-style for loop.
break exits a for
or while loop
immediately. continue
goes to the next iteration of the loop. redo
goes back to the beginning of the current iteration.
A list of control structure keywords. The loop control keywords from the
previous line are excluded.
The list summarizes the available control structures. It excludes the
keywords for exception handling, loading libraries, and returning from
functions.
How the do keyword is used.
Clauses added to the end of a statement to control execution.
How to raise exceptions.
How to catch exceptions.
The global variable name for the last exception raised.
How to define a new variable class.
How to catch exceptions of a specific type and assign the exception a name.
Clauses that are guaranteed to be executed even if an exception is thrown
or caught.
How to make a thread wait for another thread to finish.
python:
print appends a newline to the output. To suppress this
behavior, put a trailing comma after the last argument. If given multiple
arguments, print
joins them with spaces.
In Python 2 print
parses as a keyword and parentheses are not required:
print "Hello, World!"
How to read from standard input.
The names for standard input, standard output, and standard error.
How to open a file for writing. If the file exists its contents will be
overwritten.
How to open a file with the seek point at the end of the file. If the file
exists its contents will be preserved.
How to close a file.
How to read up to the next newline in a file.
How to iterate over a file line by line.
Remove a newline, carriage return, or carriage return newline pair from the
end of a line if there is one.
python:
Python strings are immutable. rstrip
returns a modified copy of the string. rstrip('\r\n')
is not identical to chomp
because it removes all contiguous carriage returns and newlines at the end of
the string.
How to read the contents of a file into memory.
How to write to a file handle.
How to flush a file handle that has been written to.
How to test whether a file exists; how to test whether a file is a regular
file (i.e. not a directory, special device, or named pipe).
How to copy a file; how to remove a file; how to rename a file.
java:
Apache Commons IO is the way to go, specifically FileUtils.copyFile().
How to set the permissions on the file.
For Perl, Python, and Ruby, the mode argument is in the same format as the
one used with the Unix chmod command. It uses
bitmasking to get the various permissions which is why it is normally an octal
literal.
The mode argument should not
be provided as a string such as "0755". Python and Ruby will raise an
exception if a string is provided. Perl will convert "0755" to 755
and not 0755 which is equal to 493 in decimal.
How to create and use a temporary file.
Temporary file libraries solve two problems: (1) finding a unused pathname,
and (2) putting the file in a location where the system will eventually remove
it should the application fail to clean up after itself.
How to create a file descriptor which writes to an in-memory buffer.
How to construct a pathname without hard coding the system file separator.
How to extract the directory portion of a pathname; how to extract the
non-directory portion of a pathname.
How to get the get the absolute pathname for a pathname. If the pathname is
relative the current working directory will be appended.
In the examples provided, if /foo/bar is the current working directory and .. is the relative path, then the return value is foo
How to iterate through the files in a directory.
python:
file() is the file handle constructor. file can be used as
a local variable name but doing so hides the constructor. It can still be
invoked by the synonym open(), however.
How to create a directory.
If needed, the examples will create more than one directory.
No error will result if a directory at the pathname already exists. An exception
will be raised if the pathname is occupied by a regular file, however.
How to perform a recursive copy. If the source is a directory, then the
directory and all its contents will be copied.
How to remove an empty directory. The operation will fail if the directory
is not empty.
How to remove a directory and all its contents.
How to determine if a pathname is a directory.
How to access arguments provided at the command line when the script was
run; how to get the name of the script.
How to process command line options.
Command line options are arguments which start with a special character
such as a hyphen '-'. Command line option libraries remove options arguments
from the ARGV array but leave other arguments for later processing.
How to get and set an environment variable. If an environment variable is
set the new value is inherited by child processes.
python:
It is possible to register code to be executed upon exit:
import atexit
atexit.register(print, "goodbye")
It is possible to terminate a script without executing registered exit code
by calling os._exit.
How to register a signal handling function.
How to test whether a file is executable.
How to execute an external command.
How to prevent shell injection.
How to invoke an external command and read its output into a variable.
The use of backticks for this operation goes back to the Bourne shell
(1977).
python:
A more concise solution is:
file = os.popen('ls -l /tmp').read()
os.popen was marked as deprecated in Python 2.6 but it is still available in Python
2.7 and Python 3.2.
How terminology is used in this sheet:
A few notes:
According to our terminology, Perl and Java packages are modules, not
packages.
PHP and C++ namespaces are another of example of modules.
We prefer to reserve the term namespace
for divisions of the set of names imposed by the parser. For example, the
identifier foo in the Perl variables $foo and @foo belong to different namespaces. Another example of
namespaces in this sense is the Lisp-1 vs. Lisp-2 distinction: Scheme is a
Lisp-1 and has a single namespace, whereas Common Lisp is a Lisp-2 and has
multiple namespaces.
Some languages (e.g. Python, Java) impose a one-to-one mapping between
libraries and modules. All the definitions for a module must be in a single
file, and there are typically restrictions on how the file must be named and
where it is located on the filesystem. Other languages allow the definitions
for a module to be spread over multiple files or permit a file to contain
multiple modules. Ruby and C++ are such languages.
Execute the specified file. Normally this is used on a file which only
contains declarations at the top level.
How to reload a library. Altered definitions in the library will replace
previous versions of the definition.
How to augment the library path by calling a function or manipulating a
global variable.
How to augment the library path by setting an environment variable before
invoking the interpreter.
How to augment the library path by providing a command line option when
invoking the interpreter.
How to put code in a library which executes when the file is run as a
top-level script and not when the file is loaded as a library.
How to declare a section of code as belonging to a module.
How to declare a section of code as belonging to a submodule.
The punctuation used to separate the labels in the full name of a
submodule.
How to import all the definitions in a module.
How to import specific definitions from a module.
How to manage multiple versions of the interpreter on the same machine; how
to manage multiple versions of 3rd party libraries for the interpreter.
The examples show how to (1) create an installation, (2) enter the
environment, (3) display the current environment, and (4) exit the environment.
While in the environment executing the interpreter by its customary name
will invoke the version of the interpreter specified when the environment was
created. 3rd party libraries installed when in the environment will only be
available to processes running in the environment.
python:
virtualenv can be downloaded and installed by running this in the virtualenv source directory:
sudo python setup.py install
When virtualenv is run it creates a bin directory with copies of the the python executable, pip, and easy_install. When the activate script is sourced the bin directory is appended
to the front of the PATH environment variable.
By default the activate script puts the name of the environment in the shell
prompt variable PS1. A different name can be provided with the --prompt flag when virtualenv is run. To remove the name completely it is necessary to edit the activate script.
How to show the installed 3rd party packages, and how to install a new 3rd
party package.
python
Two ways to list the installed modules and the modules in the standard
library:
$ pydoc modules
$ python
>>> help('modules')
Most 3rd party Python code is packaged using distutils, which is in the Python standard library. The code is placed in a
directory with a setup.py file. The code is installed by running the Python
interpreter on setup.py:
The format of the file used to specify a package.
python:
Here is an example of how to create a Python package using distutils. Suppose that the file foo.py contains the
following code:
def add(x, y):
return x+y
In the same directory as foo.py create setup.py with the following contents:
#!/usr/bin/env python
from distutils.core import setup
setup(name='foo',
version='1.0',
py_modules=['foo'],
)
Create a tarball of the directory for distribution:
$ tar cf foo-1.0.tar foo
$ gzip foo-1.0.tar
To install a tar, perform the following:
$ tar xf foo-1.0.tar.gz
$ cd foo
$ sudo python setup.py install
If you want people to be able to install the package with pip, upload the tarball to the Python
Package Index.
perl:
The sheet shows how to create objects using the CPAN module Moose. To the
client of an object, Moose objects and traditional Perl objects are largely
indistinguishable. Moose provides convenience functions to aid in the
definition of a class, and as a result a Moose class definition and a
traditional Perl class definition look quite different.
The most common keywords used when defining a Moose class are has, extends, subtype.
The before, after, and around keywords are used to define method modifiers. The with keyword indicates that a Moose class implements a role.
The no Moose; statement at the end of a Moose
class definition removes class definition keywords, which would otherwise be visible
to the client as methods.
Here is how to define a class in the traditional Perl way:
package Int;
sub new {
my $class = shift;
my $v = $_[0] || 0;
my $self = {value => $v};
bless $self, $class;
$self;
}
sub value {
my $self = shift;
if ( @_ > 0 ) {
$self->{'value'} = shift;
}
$self->{'value'};
}
sub add {
my $self = shift;
$self->value + $_[0];
}
sub DESTROY {
my $self = shift;
my $v = $self->value;
print "bye, $v\n";
}
python:
As of Python 2.2, classes are of two types: new-style classes and old-style
classes. The class type is determined by the type of class(es) the class
inherits from. If no superclasses are specified, then the class is old-style.
As of Python 3.0, all classes are new-style.
New-style classes have these features which old-style classes don't:
How to create an object.
How to get and set an attribute.
python:
Defining explicit setters and getters in Python is considered poor style.
If it becomes necessary to extra logic to attribute, this can be achieved
without disrupting the clients of the class by creating a property:
def getValue(self):
print("getValue called")
return self.__dict__['value']
def setValue(self,v):
print("setValue called")
self.__dict__['value'] = v
value = property(fget=getValue, fset = setValue)
How instance variable access works.
How to define a method.
How to invoke a method.
How to define a destructor.
python:
A Python destructor is not guaranteed to be called when all references to
an object go out of scope, but apparently this is how the CPython
implementations work.
How to handle when a caller invokes an undefined method.
python:
__getattr__ is invoked when an attribute (instance variable or method) is missing. By
contrast, __getattribute__,
which is only available in Python 3, is always invoked, and can be used to
intercept access to attributes that exist. __setattr__ and __delattr__
are invoked when attempting to set or delete attributes that don't exist. The del statement is used to
delete an attribute.
How to use inheritance.
How to invoke a class method.
How to get an identifier for an object or a value.
How to get the class of an object.
python:
hasattr(o,'reverse') will return True if there is an instance variable named 'reverse'.
How to interpret a string as code and return its value.
python:
The argument of eval must be an expression
or a SyntaxError is raised. The Python version of the mini-REPL is thus considerably less
powerful than the versions for the other languages. It cannot define a function
or even create a variable via assignment.
python:
dir(o) returns methods and instance variables.
How to display the contents of a data structure for debugging purposes.
How to get the current line number and file name of the source code.
How to make an HTTP GET request and read the response into a string.
How to URL encode and URL unencode a string.
URL encoding, also called percent encoding, is described in RFC 3986. It replaces all
characters except for the letters, digits, and a few punctuation marks with a
percent sign followed by their two digit hex encoding. The characters which are
not escaped are:
A-Z a-z 0-9 - _ . ~
URL encoding can be used to encode UTF-8, in which case each byte of a
UTF-8 character is encoded separately.
When form data is sent from a browser to a server via an HTTP GET or an
HTTP POST, the data is percent encoded but spaces are replaced by plus signs + instead of %20. The MIME type for form data is application/x-www-form-urlencoded.
python:
In Python 3 the functions quote_plus, unquote_plus, quote, and unquote moved from urllib to urllib.parse.
urllib.quote replaces a space character with %20.
urllib.unquote does not replace + with a space
character.
How to encode binary data in ASCII using the Base64 encoding scheme.
How to encode data in a JSON string; how to decode such a string.
How to build an XML document.
An XML document can be constructed by concatenating strings, but the
techniques illustrated here guarantee the result to be well-formed XML.
How to parse XML
How to extract data from XML using XPath.
How to define a test class.
How to run all the tests in a test class; how to run a single test from the
test class.
How to test for equality.
How to test that a string matches a regex.
How to test whether an exception is raised.
How to define a setup method which gets called before every test.
How to define a cleanup method which gets called after every test.
How to check the syntax of code without executing it.
Flags to increase the warnings issued by the interpreter.
python:
The -t flag warns about inconsistent use of tabs in the source code. The -3
flag is a Python 2.X option which warns about syntax which is no longer valid
in Python 3.X.
A lint tool.
How to run a script under the debugger.
A selection of commands available when running the debugger. The gdb
commands are provided for comparison.
cmd |
perl -d |
python -m pdb |
rdebug |
gdb |
help |
h |
h |
h |
h |
list |
l [first, last] |
l [first, last] |
l [first, last] |
l [first, last] |
next statement |
n |
n |
n |
n |
step into function |
s |
s |
s |
s |
set breakpoint |
b |
b [file:]line |
b [file:]line |
b [file:]line |
list breakpoints |
L |
b |
info b |
i b |
delete breakpoint |
B num |
cl num |
del num |
d num |
continue |
c |
c |
c |
c |
show backtrace |
T |
w |
w |
bt |
move up stack |
u |
u |
u |
|
move down stack |
d |
down |
do |
|
print expression |
p expr |
p expr |
p expr |
p expr |
(re)run |
R |
restart [arg1[, arg2 …]] |
restart [arg1[, arg2 …]] |
r [arg1[, arg2 …]] |
quit debugger |
q |
q |
q |
q |
How to run a snippet of code repeatedly and get the user, system, and total
wall clock time.
How to run the interpreter on a script and get the number of calls and
total execution time for each function or method.
Both Python and Ruby have JVM implementations. It is possible to compile
both Python code and Ruby code to Java bytecode and run it on the JVM. It is
also possible to run a version of the Python interpreter or the Ruby
interpreter on the JVM which reads Python code or Ruby code, respectively.
Version of the scripting language JVM implementation used in this reference
sheet.
Command line name of the repl.
Command line name of the interpreter.
Command line name of the tool which compiles source to java byte code.
Code necessary to make java code accessible.
How to create a java object.
How to invoke a java method.
How to import names into the current namespace.
How to import a non-bundled Java library
How to import Java names which are the same as native names.
How to convert a native array to a Java array.
Can a Java class be subclassed?
Can a Java array be monkey patched?
Every program is a "script": a set of instructions for the
computer to follow. But early in the evolution of computers a need arose for
scripts composed not of just of machine instructions but other programs.
IBM introduced Job Control Language (JCL) with System 360 in 1964. Apparently
before JCL IBM machines were run by operators who fed programs through the
machine one at a time. JCL provided the ability to run a sequence of jobs as
specified on punch cards without manual intervention. The language was
rudimentary, not having loops or variable assignment, though it did have
parametrized procedures. In the body of a procedure a parameter was preceded by
an ampersand: &. The language had conditional logic for taking actions
depending upon the return code of a previously executed program. The return
code was an integer and zero was used to indicate success.
Also in 1964 Louis Pouzin wrote a program called RUNCOM for the CTSS operating system which could run scripts of CTSS commands.
Pouzin thought that shells or command line interpreters should be designed with
scriptability in mind and he wrote a paper to that effect.
Unix
The first Unix shell was the one Ken Thomson wrote in 1971. It was
scriptable in that it supported if and goto as external commands. It did not have assignment or variables.
In the late 1970s the Unix shell scripting landscape came into place. The
Bourne shell replaced the Thomson shell in 7th Edition Unix which shipped in
1979. The Bourne shell dispensed with the goto and instead provided an internally implemented if statement and while and for loops. The Bourne
shell had user defined variables which used a dollar sign sigil ($) for access
but not assignment.
The C-shell also made its appearance in 1979 with the 2nd Berkeley standard
distribution of Unix. It was so named because its control structures resembled
the control structures of C. The C-shell eventually acquired a bad reputation
as a programming environment. Its true contribution was the introduction of job
control and command history. Later shells such as the Korn Shell (1982) and the
Bourne Again Shell would attempt to incorporate these features in a manner
backwardly comptable with the Bourne shell.
Another landmark in Unix shell scripting was awk which appeared in 1977. awk is a specialized
language in that there is an implicit loop and the commands are by default
executed on every line of input. However, this was a common pattern in the text
file oriented environment of Unix.
more IBM developments
The PC made its appearance in 1981. It came with a command interpreter
called COMMAND.COM which could run on a batch file. PC-DOS for that matter was patterned
closely on CP/M, the reigning operating system of home computers at the time,
which itself borrowed from various DEC operating systems such as TOPS-10. I'm not
certain whether CP/M or even TOPS-10 for that matter had batch files. As a
programming environment COMMAND.COM was inferior
to the Unix shells. Modern Windows systems make this programming environment
available with CMD.EXE.
IBM released a scripting language called Rexx for its mainframe operating
systems in 1982. Rexx was superior as a programming environment to the Unix
shells of the time, and in fact Unix didn't have anything comparable until the
appearance of Perl and Tcl in the late 1980s. IBM also released versions of
Rexx for OS/2 and PC-DOS.
The Original
HTTP as defined in 1991
HTML Specification
Draft June 1993
WorldWideWeb Browser
Mosaic Web Browser
Tim Berners-Lee created the web in 1990. It ran on a NeXT cube. The browser
and the web server communicated via a protocol invented for the purpose called
HTTP. The documents were marked up in a type of SGML called HTML. The key
innovation was the hyperlink. If the user clicked on a hyperlink, the browser
would load the document pointed to by the link. A hyperlink could also take the
user to a different section of the current document.
The initial version of HTML included these tags:
html, head, title, body, h1, h2, h3, h4, h5, h6, pre,
blockquote, b, i, a, img, ul, ol, li, dl, dt, dd
The browser developed by Berners-Lee was called WorldWideWeb. It was
graphical, but it wasn't widely used because it only ran on NeXT. Nicola Pellow
wrote a text-only browser and ported it to a variety of platforms in 1991.
Mosaic was developed by Andreesen and others at NCSA and released in February
1993. Mosaic was the first browser which could display images in-line with
text. It was originally released for X Windows, and it was ported to Macintosh
a few months later. Ports for the Amiga and Windows were available in October
and December of 1993.
CGI and Forms
RFC 3875: CGI Version 1.1
2004
HTML 2.0
1995
NSAPI Programmer's Guide (pdf)
2000
Apache HTTP Server Project
History of mod_perl
FastCGI Specification
1996
The original web permitted a user to edit a document with a browser,
provided he or she had permission to do so. But otherwise the web was static.
The group at NCSA developed forms so users could submit data to a web server.
They developed the CGI protocol so the server could invoke a separate
executable and pass form data to it. The separate executable, referred to as a
CGI script in the RFC, could be implemented in almost any language. Perl was a
popular early choice. What the CGI script writes to standard out becomes the
HTTP response. Usually this would contain a dynamically generated HTML
document.
HTML 2.0 introduced the following tags to support forms:
form input select option textarea
The input
tag has a type
attribute which can be one of the following:
text password checkbox radio image hidden submit reset
If the browser submits the form data with a GET, the form data is included
in the URL after a question mark (?). The form data consists of key value
pairs. Each key is separated from its value by an equals (=), and the pairs are
separated from each other by ampersands (&). The CGI protocol introduces an
encoding scheme for escaping the preceding characters in the form data or any
other characters that are meaningful or prohibited in URLs. Typically, the web
server will set a QUERY_STRING environment variable to pass the GET form data
to the CGI script. If the browser submits the data with POST, the form data is encoded
in the same manner as for GET, but the data is placed in the HTTP request body.
The media type is set to application/x-www-form-urlencoded.
Andreesen and others at NCSA joined the newly founded company Netscape,
which released a browser in 1994. Netscape also released a web server with a
plug-in architecture. The architecture was an attempt to address the fact that
handling web requests with CGI scripts was slow: a separate process was created
for each request. With the Netscape web server, the equivalent of a CGI script
would be written in C and linked in to the server. The C API that the developer
used was called NSAPI. Microsoft developed a similar API called ISAPI for the
IIS web server.
The NCSA web server had no such plug-in architecture, but it remained the
most popular web server in 1995 even though development had come to a halt. The
Apache web server project started up that year; it used the NCSA httpd 1.3 code
as a starting point and it was the most popular web server within a year.
Apache introduced the Apache API, which permitted C style web development in
the manner of NSAPI and ISAPI. The Apache extension mod_perl, released in March 1996, was a
client of the Apache API. By means of mod_perl
an Apache web server could handle a CGI request in memory using an embedded
perl interpreter instead of forking off a separate perl process.
Ousterhout wrote an article for IEEE
Computer in 1998 which drew a distinction between system
programming languages and scripting languages. As examples of scripting
languages Ousterhout cited Perl, Python, Texx, Tcl, Visual Basic, and the Unix
shells. To Ousterhout the biggest difference between the two classes of
language is that system programming languages are strongly typed whereas
scripting languages are typeless. Being typeless was in Ousterhout's mind a
necessary trait for a scripting language to serve as "glue language"
to connect the components of an application written in other languages.
Ousterhout also noted that system programming languages are usually compiled
whereas scripting langauges are usually interpreted, and he predicted that the
relative use of scripting language would rise.
HTML Templates
Web development with CGI scripts written in Perl was easier than writing
web server plug-ins in C. The task of writing Perl CGI scripts was made easier
by libraries such as cgi-lib.pl and CGI.pm. These libraries made the query
parameters available in a uniform fashion regardless of whether a GET or POST
request was being handled and also took care of assembling the headers in the
response. Still, CGI scripts tended to be difficult to maintain because of the
piecemeal manner in which the response document is assembled.
Rasmus Lerdorf adopted a template approach for maintaining his personal
home page. The document to be served up was mostly static HTML with an escaping
mechanism for inserting snippets of code. In version 2.0 the escapes were <?
code
> and <?echo code >. Lerdorf released the code for the original
version, called PHP/FI and implemented in Perl, in 1995. The original version
was re-implemented in C and version 2.0 was released in 1997. For version 3.0,
released in 1998, the name was simplified to PHP. Versions 4.0 and 5.0 were
released in 2000 and 2004. PHP greatly increased in popularity with the release
of version 4.0. Forum software, blogging software, wikis, and other content
management systems (CMS) are often implemented in PHP.
Microsoft added a tempate engine called Active Server Pages (ASP) for IIS
in 1996. ASP uses <% code %> and <%= code %> for escapes; the code
inside the script could be any number of languages but was usually a dialect of
Visual Basic called VBScript. Java Server Pages (JSP), introduced by Sun in
1999, uses the same escapes to embed Java.
MVC Frameworks
The template approach to web development has limitations. Consider the case
where the web designer wants to present a certain page if the user is logged in,
and a completely unrelated page if the user is not logged in. If the request is
routed to an HTML template, then the template will likely have to contain a
branch and two mostly unrelated HTML templates. The page that is presented when
the user is not logged in might also be displayed under other circumstances,
and unless some code sharing mechanism is devised, there will be duplicate code
and the maintenance problem that entails.
The solution is for the request to initially be handled by a controller. Based
upon the circumstances of the request, the controller chooses the correct HTML
template, or view, to present to the user.
Websites frequently retrieve data from and persist data to a database. In a
simple PHP website, the SQL might be placed directly in the HTML template.
However, this results in a file which mixes three languages: SQL, HTML, and
PHP. It is cleaner to put all database access into a separate file or model,
and this also promotes code reuse.
The Model-View-Controller design pattern was conceived in 1978. It was used
in Smalltalk for GUI design. It was perhaps in Java that the MVC pattern was
introduced to web development.
Early versions of Java were more likely to be run in the browser as an
applet than in the server. Sun finalized the Servlet API in June 1997. Servlets
handled requests and returned responses, and thus were the equivalent of
controllers in the MVC pattern. Sun worked on a reference web server which used
servlets. This code was donated to the Apache foundation, which used it in the
Tomcat webserver, released in 1999. The same year Sun introduced JSP, which
corresponds to the view of the MVC pattern.
The Struts MVC framework was introduced in 2000. The Spring MVC framework
was introduced in 2002; some prefer it to Struts because it doesn't use
Enterprise JavaBeans. Hibernate, introduced in 2002, is an ORM and can serve as
the model of an MVC framework.
Ruby on Rails was released in 2004. Ruby has a couple of advantages over
Java when implementing an MVC framework. The models can inspect the database
and create accessor methods for each column in the underlying table on the fly.
Ruby is more concise than Java and has better string manipulation features, so
it is a better language to use in HTML templates. Other dynamic languages have
built MVC frameworks, e.g. Django for Python.
2.7: Language, Standard Library
Why
Python3 Summary of Backwardly Non-compatible Changes in Python 3
3.2: Language, Standard Library
PEP 8: Style Guide for
Python Code van Rossum
Python uses leading whitespace to indicate block structure. It is not
recommended to mix tabs and spaces in leading whitespace, but when this is
done, a tab is equal to 8 spaces. The command line options '-t' and '-tt' will
warn and raise an error respectively when tabs are used inconsistently for
indentation.
Regular expressions and functions for interacting with the operating system
are not available by default and must be imported to be used, i.e.
import re, sys, os
Identifiers in imported modules must be fully qualified unless imported
with from/import:
from sys import path
from re import *
There are two basic sequence types: the mutable list and the immutable
tuple. The literal syntax for lists uses square brackets and commas [1,2,3] and
the literal syntax for tuples uses parens and commas (1,2,3).
The dictionary data type literal syntax uses curly brackets, colons, and
commas { “hello”:5, “goodbye”:7 }. Python 3 adds a literal syntax for sets
which uses curly brackets and commas: {1,2,3}. This notation is also available
in Python 2.7. Dictionaries and sets are implemented using hash tables and as a
result dictionary keys and set elements must be hashable.
All values that can be stored in a variable and passed to functions as
arguments are objects in the sense that they have methods which can be invoked
using the method syntax.
Attributes are settable by default. This can be changed by defining a __setattr__ method for the class. The
attributes of an object are stored in the __dict__
attribute. Methods must declare the receiver as the first argument.
Classes, methods, functions, and modules are objects. If the body of a
class, method, or function definition starts with is a string, it is available
available at runtime via __doc__.
Code examples in the string which are preceded with '>>>' (the python repl prompt) can be executed by
doctest and compared with the output that follows.
Java was developed by James Gosling at Sun and made publicly available in
1996. It is a compiled, objected oriented language with syntax similar to C++.
It is perhaps best understood by how it differs from C++:
Compared to C++, the language is easier to use and easier to port. Applets
that ran in the browser were an early use of the language that helped
popularize it.
Version 1.1 (1997) added RMI and several types of nested classes including
anonymous classes. Version 1.2 (1998) added the ability to reflect on the
methods of a class or object at runtime. Version 1.4 (2002) added Perl style
regular expressions. Version 1.5 (2004) added generics, which are roughly
similar to C++ templates, and autoboxing, in which the compiler automatically
wraps a primitive type with an instance of a wrapper class when needed.
Over the years Java has developed an extensive standard library; as of
version 1.5 the standard library contains 3000 classes. Third parties are
encouraged to use their internet domain name to determine the location of their
code in the Java code namespace, a technique which makes it easy to integrate
code from non-standard sources.
Other languages have targeted the JVM: Jython since 1997, Scala and Groovy
since 2003, and Clojure since 2007. JVM languages include interpreters written
in Java and languages which can be compiled to bytecode.