In [1]:
#Here I import the relevant function libraries
#This can be done in many ways
#To import an entire library (e.g. scipy) so that functions accessed by typing "lib_name.func_name"
import matplotlib

#To import a subsection of a library
import scipy.optimize

#To import the whole library under a different name, so you can type "diff_name.func_name"
import numpy as np
import matplotlib.pyplot as plt

#To import just a particular function
from time import time
from math import sin
#This may be confusing but I'm grabbing a function usually denoted with "time.time"

In [2]:
#First of all we need to read the example data file into a list
#I will create an empty list and append the data as I read the file
#Note appending is a VERY inefficient opperation, however for less than a ~million repeats it's fine

#Make an empty list:
datalist = []

#Open the file. Here the 'r' means 'read'
f = open('ex_data.dat','r')

#Now read all the lines of the file into a list called lines
#For very large files this isn't always a good plan as you may not want to store the whole file in the memory
#Such files should be read line by line

#Now loop over ever line and record the value as a floating point number in datalist
#Notice that python's loop functionality is very flexible
for line in lines:
datalist.append(float(line))

In [3]:
#Now we want to we want to create a plot
#Create a figure called 'fig' and a set of axes called 'ax'
fig,ax = plt.subplots(figsize=(8,6))

#Create a dummy list of numbers to plot along the x-axis
#We will create a numpy array using the 'numpy.arange()' function
#The syntax for this is (start_value,non_inclusive_end_value,step_size)
#Here I also use the 'len()' function which returns the length of a list/array
x_vals = np.arange(0,len(datalist),1)

#Now plot the data on the axes
#Here I choose to plot the data as blue dots
ax.plot(x_vals,datalist,'b.')

#Set the limits, and label the axes
ax.set_xlim(0,100)
ax.set_ylim(-5,50)
ax.set_xlabel('x')
ax.set_ylabel('val')

#Force the plot to be displayed
plt.show()

#Save the plot
fig.savefig('ex_fig.jpg',dpi=300)

In [4]:
#Now make a function of a gaussian distribution
def gaussian(x,mean,sigma,A):

#Set the normalisation
norm = A/(sigma*np.sqrt(2.*np.pi))

return norm*np.exp( -((x-mean)**2.) / (2.*sigma**2.) )

In [5]:
#First we will fit this the bad, slow way

least_sq = 1000000000.

#Loop over each parameter and over each value of x and calculate where least_sq is minimised
#Make some value approximate guesses of where the min/max values of the parameters are
for mean in np.arange(50.,60.,1.):
for sigma in np.arange(5.,15.,1.):
for A in np.arange(500.,1500.,10.):
least_sq_temp = 0.
for i in range(0,len(x_vals),1):
#Add the current deviations to the temp value of least_sq
least_sq_temp += (datalist[i] - gaussian(x_vals[i],mean,sigma,A))**2.

#If least_sq_temp is a better fit than your current stored value then update the fit
if least_sq_temp < least_sq:
least_sq = least_sq_temp
A_fit = A
mean_fit = mean
sigma_fit = sigma

print 'A = ',A_fit
print 'Mean = ',mean_fit
print 'Sigma = ',sigma_fit

A =  1010.0
Mean =  56.0
Sigma =  9.0


In [6]:
#Reproduce the plot, but with the fit
fig,ax = plt.subplots(figsize=(8,6))

x_vals = np.arange(0,len(datalist),1)

#Here I select the plot the fit as a green, dotted, 2pt line
ax.plot(x_vals,gaussian(x_vals,mean_fit,sigma_fit,A_fit),lw=2,ls='dotted',color='green')
ax.plot(x_vals,datalist,'b.')

ax.set_xlim(0,100)
ax.set_ylim(-5,50)
ax.set_xlabel('x')
ax.set_ylabel('val')

plt.show()

#Save the plot
fig.savefig('ex_fit1_fig.jpg',dpi=300)

In [7]:
#Now do the fit the smart way, by using a built in curve fitting function
#The function returns the fit parameters and the covariances
#The syntax is (function_to_fit, x_values, data_values, p0 = list_of_initial_guesses)
fit,cov = scipy.optimize.curve_fit(gaussian, x_vals, datalist, p0 = [55.,10.,800.])

print 'Mean = ',fit[0]
print 'Sigma = ',fit[1]
print 'A = ',fit[2]

Mean =  56.0803931594
Sigma =  8.99317585744
A =  1008.72516834


In [8]:
#Reproduce the plot, but with the new fit
fig,ax = plt.subplots(figsize=(8,6))

x_vals = np.arange(0,len(datalist),1)

ax.plot(x_vals,gaussian(x_vals,fit[0],fit[1],fit[2]),lw=2,ls='dashed',color='red')
ax.plot(x_vals,datalist,'b.')

ax.set_xlim(0,100)
ax.set_ylim(-5,50)
ax.set_xlabel('x')
ax.set_ylabel('val')

plt.show()

#Save the plot
fig.savefig('ex_fit2_fig.jpg',dpi=300)

In [8]:


In [9]:
#This is the end of the example, but below are some useful things to note

In [10]:
#Array Arithmetic

#When dealing with large lists (the norm in astronomy) array arithmetic is nearly always namy times faster
#You should write as much of your code using numpy arrays as you possibly can
#The only real drawback of arrays is that you cannot append values to them, they have fixed length

#Here I will take the sine of a million element array using a loop and using array arithmetic
#Look at the difference in duration

#Generate an array of 10 million numbers between 0 and pi
val_arr = np.random.uniform(0.,np.pi,1E6)

#Recast these arrays as lists
val_list = list(val_arr)

start_time = time()
#Note I've used the shorthand version of range here (the default start is 0 and default step is 1)
for i in range(1000000):
#This will overwrite the current value with it's sine
val_list[i] = sin(val_list[i])
print "Loop time: ",(time()-start_time),"sec"

start_time = time()
val_arr = np.sin(val_arr)
print "Array arithmetic time: ",(time()-start_time),"sec"

Loop time:  1.03399991989 sec
Array arithmetic time:  0.0680000782013 sec


In [11]:
#Integer Arithmetic

#If you're not used to coding, I've got some bad news for you, computer are pretty dumb
#Python will identify what a number you give is by how you write it
#If the number doesn't have a decimal point it will assume it's an integer

print 3/2

print 3./2.

1
1.5


In [12]:
#Pass by Reference vs Pass by Value

#In most cases python defaults to passing by reference
#This means that when you say 'a = b' you are essentially telling the computer that they are the same thing
#Whatever you do to b is also done to a

a = [1,2,3,4,5]
b = a

print a,b

#Delete the value 3 from the list a
a.remove(3)

print a,b

[1, 2, 3, 4, 5] [1, 2, 3, 4, 5]
[1, 2, 4, 5] [1, 2, 4, 5]


In [13]:
#To overcome this you have to trick the computer into duplicating the list

a = [1,2,3,4,5]
#This will copy the array from 'start_point:end_point', the default is the start/end of the list
b = a[:]

a.remove(3)

print a,b

[1, 2, 4, 5] [1, 2, 3, 4, 5]


In [14]:
#There some other tricks you can play with this

a = [1,2,3,4,5]

#To return the list in reverse
#This is saying go through the whole list, but stepping one place backwards each time
print a[::-1]

#To return every other value
print a[::2]

[5, 4, 3, 2, 1]
[1, 3, 5]


In [15]:
#Another python is to use sets
#If you want to remove duplicates from a list you don't have to do a complicated algorithm

a = [1,1,2,3,3,3,3,4,5,5,5]

#To remove duplicates you can define the list as a set
#Sets are mathematical objects that cannot have duplicates
#Then you can redefine as a list
#Doing this all at once

print list(set(a))

[1, 2, 3, 4, 5]


In []:


In []:


In []: