Export SQL Server Stored Procedures using Python

The following short Python 2.7 script creates an export of all the stored procedures and functions within a SQL Server database. This will export each procedure as a CREATE PROCEDURE or CREATE FUNCTION statement. The code uses PyODBC to connect to the installation of SQL Server.


import pyodbc, time

# Database configuration
dbServer = "" # The database server to connect to
db = "" # The database to connect to
uid = "" # The user
pwd = "" # The password
connString = """
driver={SQL Server};
server=""" + dbServer + """;
database=""" + db + """;
user id=""" + uid + """;
password=""" + pwd + """;
"""
dateStr = time.strftime("%Y-%m-%d")
fileName = dbServer + "-" + db + "-" + dateStr + "-procedures.sql"
# i.e. TESTSERVER-TESTDB-2014-08-19-procedures.sql

# Connect to the database
conn = pyodbc.connect(connString)
cursor = conn.cursor()

##
# Export a stored procedure using its name and connected cursor
#
# @param string name: The name of the procedure to export
# @return string
##
def exportProcedure(name):
sql = "EXEC sp_helptext N'" + name + "';"
cursor.execute(sql)
rows1 = cursor.fetchall()
content = ""
for r in rows1:
content += r.Text.strip() + "\n"
return content

"""
sysobjects contains different types of objects
-- P is a procedure
-- If the type contains F then it is a function
-- V is a view
-- U is a table
category = 0 indicates it is user-created
"""

# Get a list of system objects sorted by views, tables, procedures, and functions created by the user
content = ""
sql = "SELECT * FROM sysobjects WHERE (type LIKE '%F%' OR type = 'V' OR type = 'U' OR type='P') AND category = 0 ORDER BY type DESC, name"
cursor.execute(sql)
rows = cursor.fetchall()

# Export each of the stored procedures and functions
for r in rows:
if r.type.strip() == "P" or "F" in r.type.strip():
content += exportProcedure(r.name) + "\n\n"

# Write out the content
f = open(fileName, "w")
f.write(content)
f.close()

conn.close()

Rijndael / AES (128 bit) in VB.net, PHP, Python

Being able to transport encrypted data is important in some of my projects at work. One-way hashes using MD5 usually suffice for most encryption purposes but Symmetric Encryption algorithms are important for encrypting and then decrypting data. For this, we use the Rijndael and AES algorithm in a few different languages. Here is what we do for VB.net, PHP, and Python. I also take no responsibility for misuse of this code.

Note: You will see a string replace which is necessary for passing encrypted data through a URL.

VB.net:

' Keys required for Symmetric encryption / decryption
Dim rijnKey As Byte() = {&H1, &H2, &H3, &H4, &H5, &H6, &H7, &H8, &H9, &H10, &H11, &H12, &H13, &H14, &H15, &H16}
Dim rijnIV As Byte() = {&H1, &H2, &H3, &H4, &H5, &H6, &H7, &H8, &H9, &H10, &H11, &H12, &H13, &H14, &H15, &H16}

Function Decrypt(S As String)
If S = "" Then
Return S
End If
' Turn the cipherText into a ByteArray from Base64
Dim cipherText As Byte()
Try
' Replace any + that will lead to the error
cipherText = Convert.FromBase64String(S.Replace("BIN00101011BIN", "+"))
Catch ex As Exception
' There is a problem with the string, perhaps it has bad base64 padding
Return S
End Try
'Creates the default implementation, which is RijndaelManaged.
Dim rijn As SymmetricAlgorithm = SymmetricAlgorithm.Create()
Try
' Create the streams used for decryption.
Using msDecrypt As New MemoryStream(cipherText)
Using csDecrypt As New CryptoStream(msDecrypt, rijn.CreateDecryptor(rijnKey, rijnIV), CryptoStreamMode.Read)
Using srDecrypt As New StreamReader(csDecrypt)
' Read the decrypted bytes from the decrypting stream and place them in a string.
S = srDecrypt.ReadToEnd()
End Using
End Using
End Using
Catch E As CryptographicException
Return S
End Try
Return S
End Function


Function Encrypt(S As String)
'Creates the default implementation, which is RijndaelManaged.
Dim rijn As SymmetricAlgorithm = SymmetricAlgorithm.Create()
Dim encrypted() As Byte
Using msEncrypt As New MemoryStream()
Dim csEncrypt As New CryptoStream(msEncrypt, rijn.CreateEncryptor(rijnKey, rijnIV), CryptoStreamMode.Write)
Using swEncrypt As New StreamWriter(csEncrypt)
'Write all data to the stream.
swEncrypt.Write(S)
End Using
encrypted = msEncrypt.toArray()
End Using

' You cannot convert the byte to a string or you will get strange characters so base64 encode the string
' Replace any + that will lead to the error
Return Convert.ToBase64String(encrypted).Replace("+", "BIN00101011BIN")
End Function

PHP:

$rijnKey = "\x1\x2\x3\x4\x5\x6\x7\x8\x9\x10\x11\x12\x13\x14\x15\x16";
$rijnIV = "\x1\x2\x3\x4\x5\x6\x7\x8\x9\x10\x11\x12\x13\x14\x15\x16";
function Decrypt($s){
global $rijnKey, $rijnIV;

if ($s == "") { return $s; }

// Turn the cipherText into a ByteArray from Base64
try {
$s = str_replace("BIN00101011BIN", "+", $s);
$s = base64_decode($s);
$s = mcrypt_decrypt(MCRYPT_RIJNDAEL_128, $rijnKey, $s, MCRYPT_MODE_CBC, $rijnIV);
} catch(Exception $e) {
// There is a problem with the string, perhaps it has bad base64 padding
// Do Nothing
}
return $s;
}

function Encrypt($s){
global $rijnKey, $rijnIV;

// Have to pad if it is too small
$block = mcrypt_get_block_size(MCRYPT_RIJNDAEL_128, 'cbc');
$pad = $block - (strlen($s) % $block);
$s .= str_repeat(chr($pad), $pad);

$s = mcrypt_encrypt(MCRYPT_RIJNDAEL_128, $rijnKey, $s, MCRYPT_MODE_CBC, $rijnIV);
$s = base64_encode($s);
$s = str_replace("+", "BIN00101011BIN", $s);
return $s;
}

Python 2.7:
You must first install the pyCrypto package which gives you access to AES functions. If you are using Windows, you can go to http://www.voidspace.org.uk/python/modules.shtml#index to download the pyCrypto binary.


from Crypto.Cipher import AES
import base64
import os

key = "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x10\x11\x12\x13\x14\x15\x16"
iv = "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x10\x11\x12\x13\x14\x15\x16"
text = "10"
replace_plus = "BIN00101011BIN"

# the block size for the cipher object; must be 16, 24, or 32 for AES
BLOCK_SIZE = 16

def repeat_to_length(string_to_expand, length):
return (string_to_expand * ((length/len(string_to_expand))+1))[:length]

pad = lambda s: s + (BLOCK_SIZE - len(s) % BLOCK_SIZE) \
* chr(BLOCK_SIZE - len(s) % BLOCK_SIZE)
unpad = lambda s : s[0:-ord(s[-1])]

# one-liners to encrypt/encode and decrypt/decode a string
# encrypt with AES, encode with base64
def EncodeAES(s):
c = AES.new(key, AES.MODE_CBC, iv)
s = pad(s)
s = c.encrypt(s)
s = base64.b64encode(s)
return s.replace("+", replace_plus)

def DecodeAES(enc):
c = AES.new(key, AES.MODE_CBC, iv)
enc = enc.replace(replace_plus, "+")
enc = base64.b64decode(enc)
enc = c.decrypt(enc)
return unpad(enc)

# encode a string
encoded = EncodeAES(text)
print 'Encrypted string:', encoded

# decode the encoded string
decoded = DecodeAES(encoded)
print 'Decrypted string:', decoded

Multidimensional Scaling

Multidimensional Scaling (MDS) is a linear embedding method used when we only know the pairwise distances between data points. For linear systems, MDS works well with as little as 10 points and the system is 2 dimensional. For this program, you choose the window size or embedding for the system and MDS identifies what the true embedding should be. The following program uses Logistic map data when r = 3.2 but you can change r to see how well MDS works as the map bifurcates and eventually becomes chaotic and nonlinear at 4. MDS breaks down at this point and the embedding defaults to just 1 over the embedding you choose.

# Requires Numpy and Scipy libraries

from numpy import *
import scipy.linalg
import random

r=3.2 # parameter on Logistic map
n = 10 # the number of timeseries points to use
d = 40 # your embedding choice

# Create a time series
def timeseries(number,throw, d):
  xList = []
  dataX0 = []
  t = 0
  while t < d:
    xList.append(.1)
    t += 1
  while t <= number+throw+d:
    x = r * xList[t-1] * (1 - xList[t-1]) # Logistic Map
    xList.append(x)
    if t > throw:
      y = xList[t-1]
      dataX0.append(x)
    t = t + 1
  return dataX0

# Construct an n x n centering matrix
# The form is P = I - (1/n) U where U is a matrix of all ones
def centering_matrix(n):
  P = eye(n) - 1/float(n) * ones((n,n))
  return P

def create_data(number, d):
  dataX0 = timeseries(number, 500, d)
  # Create a sliding window of size d
  data = []
  i = d
  while i < len(dataX0):
    data.append([])
    j = i - d
    while j < i:
      data[len(data)-1].append(dataX0[j])
      j = j + 1
    i = i + 1
  return data

def main():
  print "n =",n, ": number of timeseries points used"
  print "d =",d, ": embedding"

  P = centering_matrix(n)
  # Create the data with the number of dimensions
  data = create_data(n, d)
  X = pairwise_distances(data)
  A = -1/2.0 * P * X * P
  # Calculate the eigenvalues/vectors
  [vals, vectors] = scipy.linalg.eig(A)
  # Sort the values
  vals = sort(vals)
  embedding = 0
  for x in vals:
    if x > 10**(-10):
      embedding += 1
  print "mds embedding =",embedding

# Compute the pairwise distance between vector x and y
def metric(x1, y1):
  d = 2
  summ = []
  i = 0
  while i < len(x1):
    # in this case use euclidean distance
    summ.append((x1[i] - y1[i])**d)
    i = i + 1
  return sum(summ) ** (1 / float(d))

# Return a matrix of pairwise distances
def pairwise_distances(data):
  distances = []
  i = 0
  while i < len(data):
    distances.append([])
    x1 = data[i]
    j = 0
    while j < len(data):
      y1 = data[j]
      distances[i].append(metric(x1, y1)**2)
      j = j + 1
    i = i + 1
  return distances

main()

In-depth details of Particle Swarm Optimization

In the first part of this series on Particle Swarm Optimization (PSO), I posted an general overview of the algorithm and an example of how the algorithm searches for the minimum of a parabola. In this post, I explain an example of the algorithm constructed in Python. I will end with an example problem, called the Rastrigin function, a function that researchers use to test their optimization algorithms on.

The full Python code that I will be discussing is located here. It may be helpful to open this code since I will only be discussing specific portions of the code. If you want to run the code, please save the page as a .py (Python) file.

The file is set up in the following way:
– global variables and user-defined parameters
– function F(): the function being optimized, in the example file, I use the Rastrigin function
– function main(): the function that constructs the swarm and attempts to optimize F
– class Particle: the functions and local variables associated with each Particle

The function main, constructs the swarm and iterates for a user-defined number of trials (cmax). The best velocity, position, and error are saved and output at the end of the program. It is not necessary to use the number of trials to signal the end of optimization. One could also stop the optimization if the best error achieves a certain value or if all of the particles are below some error bound.

During optimization, particles’ behavior are determined by the following set of equations:

The first equation vij(t) determines the velocity of particle i in dimension j for time step t. Particles adjust their trajectory in the search space according to their local neighborhood’s best position x**, their own best position x*, and their previous velocity. The first term of the velocity characterizes how much the previous velocity affects the new velocity with w set to 1 in the code. In the code, a dampening parameter is included that can be used to slow particles as optimization progresses. The second term or “cognitive” term with c1r1 pull particles back to their best previous position. The third term or “social” term with c2r2 pushes particles together so they fly collaboratively through space. The second equation xij(t) determines the position of particle i in dimension j for time step t. New positions are determined by the previous position and the current velocity.

In the Python code, the particle class randomly initializes the positions and velocities (the InitPosition(self) and InitVelocity(self) functions) for each particle within some search space. After each time step in the optimization, the UpdateVelocity(self) and UpdatePosition(self) functions are called to construct new velocity and position vectors for each particle.

Within the main function, the particles’ positions are evaluated after each update as the swarm moves towards the optimal position in the space.

The following graphs are from an example trial of the algorithm run on the 1-D Rastrigin function:

PSO variations are often tested on the Rastrigin function because of how simple changes to the parameter d can extend the function to higher dimensions. Additionally, the number of deep local optima is quite large, as seen in the graph below. The global optimum is at x=0. For these results, the following parameters were used:

# The dimension of the function
num_dimensions = 1
# The number of particles in the swarm
num_particles = 5

# Bounds on the positions and velocities
v_max = 5
v_min = -5
p_min = -10
p_max = 10
# The number of updates to do
cmax = 1000
# The amount to dampen the velocity after each update
dampener = 1
dampen_rate = 1
Initial Swarm Positions (the green dots)
After 250 steps
After 250 steps
After 500 updates
After 500 updates
After 750 steps
After 750 steps, one particle ends up very far from the rest of the swarm, possibly because the particle ends up overshooting where the swarm is
After 1000 steps

As you can see, the swarm does not necessarily converge to the global optimum (x = 0) but they are all somewhat close. If you measured the best position, at least one particle has encountered the optimal position during optimization but the particle did not end up settling down because of how the parameters, the dampening factor, wc1, and c2 were chosen. One of the simplest ways to improve the algorithm is to allowing the velocity to decay (w < 1 or dampening factor) which should help all of the particles converge to an optimum.

Of course, this is just one example of how to set up the algorithm. Many researchers have experimented with portions of the algorithm trying to find the best way to optimize a function. As we will see later, changes to the particle’s equations and the distributions used to select r1 and r2 are just a few open problems in PSO.

This post is part of a series:

  1. An overview of Particle Swarm Optimization Algorithm
  2. In-depth details of the algorithm
  3. More applications of Particle Swarm Optimization

Estimating Lyapunov Spectra of ODEs using Python

Wolf et al. (1985) outlined an algorithm that estimates the Lyapunov spectra of systems whose equations are known using local Jacobian matrices and Gram-Schmidt orthonormalization. Python code is available for Wolf’s algorithm and discrete maps and their inverted counterparts.  I have adapted this code to estimate Lyapunov spectra for continuous-time systems like the Lorenz attractor and Rossler attractor. Additionally, Python code is available to generate time series for ordinary differential equations. Lyapunov spectrum code is also available on Clint Sprott’s website.

Lorenz Attractor

Source Code:

import math, operator, random
h = 0.01
cmax = 1000     #   number of iterations to perform
choice = input("Which system would you like?\n (1) Rossler \n (2) Lorenz\n")
while str(choice) != "1" and str(choice) != "2":
  print("Please select 1 or 2")
  choice = input("Which system would you like?\n (1) Rossler \n (2) Lorenz\n")
print "\n",
a = 0.2
b = 0.2
c = 5.7

def derivs(x, xnew, n):
  if choice == 1:
    return Rossler(x, xnew, n)
  else:
    return Lorenz(x, xnew, n)

def Rossler(x, xnew, n):
  # Nonlinear Rossler Equations
  xnew[1]  = -x[2]-x[3]
  xnew[2]  = x[1] + a * x[2]
  xnew[3]  = b + x[3] * (x[1] - c)
  # Linearized Rossler Equations
  xnew[4]  = -1*x[7]-x[10]
  xnew[5]  = -1*x[8]-x[11]
  xnew[6]  = -1*x[9]-x[12]
  xnew[7]  = x[4] + a*x[7]
  xnew[8]  = x[5] + a*x[8]
  xnew[9]  = x[6] + a*x[9]
  xnew[10] = x[3]*x[4] + x[1]*x[10] - c*x[10]
  xnew[11] = x[3]*x[5] + x[1]*x[11] - c*x[11]
  xnew[12] = x[3]*x[6] + x[1]*x[12] - c*x[12]
  return [x, xnew]

def Lorenz(x, xnew, n):
  # Nonlinear Lorenz Equations
  xnew[1]  = 10 * (x[2] - x[1])
  xnew[2]  = -1*x[1] * x[3] + 28 * x[1] - x[2]
  xnew[3]  = x[1] * x[2] - 8/3.0 * x[3]
  # Linearized Lorenz Equations
  xnew[4]  = -10 * x[4] + 10 * x[7]
  xnew[5]  = -10 * x[5] + 10 * x[8]
  xnew[6]  = -10 * x[6] + 10 * x[9]
  xnew[7]  = 28*x[4]-x[3]*x[4] - x[7] - x[1]*x[10]
  xnew[8]  = 28*x[5]-x[3]*x[5] - x[8] - x[1]*x[11]
  xnew[9]  = 28*x[6]-x[3]*x[6] - x[9] - x[1]*x[12]
  xnew[10] = x[2]*x[4] + x[1]*x[7] - 8/3.0 * x[10]
  xnew[11] = x[2]*x[5] + x[1]*x[8] - 8/3.0 * x[11]
  xnew[12] = x[2]*x[6] + x[1]*x[9] - 8/3.0 * x[12]
  return [x, xnew] 

def timeseries(cmax):
  X0 = []
  Y0 = []
  Z0 = []
  xList = []
  yList = []
  zList = []
  changeInTime = h
  # Initial conditions
  if choice == 1:
    # Rossler
    X0.append(0.01)
    Y0.append(0.01)
    Z0.append(0.01)
  else:
    # Lorenz
    X0.append(0)
    Y0.append(1)
    Z0.append(0)
  t = 0
  while len(xList) <= cmax:
    [x,y,z] = Rk4o(X0, Y0, Z0, h, len(X0))
    X0.append(x)
    Y0.append(y)
    Z0.append(z)
    if 200 < t:
      xList.append(x)
      yList.append(y)
      zList.append(z)
    changeInTime += h
    t = t + 1
  return [xList, yList, zList]

def f(x,y,z):
  if choice == 1:
    dxdt = -y-z
  else:
    dxdt = 10 * (y - x)
  return dxdt

def g(x,y,z):
  if choice == 1:
    dydt = x + a * y
  else:
    dydt = 28 * x - y - x*z
  return dydt

def e(x,y,z):
  if choice == 1:
    dzdt = b + z * (x - c)
  else:
    dzdt = x * y - 8/3.0 * z
  return dzdt

def Rk4o(xList, yList, zList, h, t):
  k1x = h*f(xList[t-1],yList[t-1], zList[t-1])
  k1y = h*g(xList[t-1],yList[t-1], zList[t-1])
  k1z = h*e(xList[t-1],yList[t-1], zList[t-1])

  k2x = h*f(xList[t-1] + k1x/2,yList[t-1] + k1y/2, zList[t-1] + k1y/2)
  k2y = h*g(xList[t-1] + k1x/2,yList[t-1] + k1y/2, zList[t-1] + k1y/2)
  k2z = h*e(xList[t-1] + k1x/2,yList[t-1] + k1y/2, zList[t-1] + k1y/2)

  k3x = h*f(xList[t-1] + k2x/2,yList[t-1] + k2y/2, zList[t-1] + k2y/2)
  k3y = h*g(xList[t-1] + k2x/2,yList[t-1] + k2y/2, zList[t-1] + k2y/2)
  k3z = h*e(xList[t-1] + k2x/2,yList[t-1] + k2y/2, zList[t-1] + k2y/2)

  k4x = h*f(xList[t-1] + k3x/2,yList[t-1] + k3y/2, zList[t-1] + k3y/2)
  k4y = h*g(xList[t-1] + k3x/2,yList[t-1] + k3y/2, zList[t-1] + k3y/2)
  k4z = h*e(xList[t-1] + k3x/2,yList[t-1] + k3y/2, zList[t-1] + k3y/2)

  x = xList[t-1] + k1x/6 + k2x/3 + k3x/3 + k4x/6
  y = yList[t-1] + k1y/6 + k2y/3 + k3y/3 + k4y/6
  z = zList[t-1] + k1z/6 + k2z/3 + k3z/3 + k4z/6
  return [x,y,z]

n = 3           #   number of variables in nonlinear system
nn=n*(n+1)      #   total number of variables (nonlinear + linear)
m = 0
x = []
xnew = []
v = []
ltot = []
znorm = []
gsc = []
A = []
B = []
C = []
D = []
i = 0
while i <= nn:
  x.append(0)
  xnew.append(0)
  v.append(0)
  A.append(0)
  B.append(0)
  C.append(0)
  D.append(0)
  i = i + 1

i = 0
while i <= n:
  ltot.append(0)
  znorm.append(0)
  gsc.append(0)
  i = i + 1

irate=10      #   integration steps per reorthonormalization
io= 100       #   number of iterations between normalization

#   initial conditions for nonlinear maps
#   must be within the basin of attraction

# Generate a random transient before starting the initial conditions
i = 1
while i <= n:
  v[i] = 0.001
  i = i + 1

transient = random.randint(n,100000)
# Generate the initial conditions for the system
[tempx,tempy,tempz] = timeseries(transient)

v[1] = tempx[len(tempx)-1]
v[2] = tempy[len(tempy)-1]
v[3] = tempz[len(tempz)-1]

i = n+1
while i <= nn:  #   initial conditions for linearized maps
  v[i]=0        #   Don't mess with these; they are problem independent!
  i = i + 1

i = 1
while i <= n:
  v[(n+1)*i]=1
  ltot[i]=0
  i = i + 1
#print "v = ",v
t=0
w = 0
while (w < cmax):
  j = 1
  while j <= irate:
    i = 1
    while i <= nn:
      x[i]=v[i]
      i = i + 1
    [x, xnew] = derivs(x, xnew, n)
    i = 1
    while i <= nn:
      A[i] = xnew[i]
      x[i] = v[i] + (h*A[i]) / 2.0
      i = i + 1
    [x, xnew] = derivs(x, xnew, n)
    i = 1
    while i <= nn:
      B[i] = xnew[i]
      x[i] = v[i] + (h*B[i]) / 2.0
      i = i + 1
    [x, xnew] = derivs(x, xnew, n)
    i = 1
    while i <= nn:
      C[i] = xnew[i]
      x[i] = v[i] + h*C[i]
      i = i + 1
    [x, xnew] = derivs(x, xnew, n)
    i = 1
    while i <= nn:
      D[i] = xnew[i]
      v[i] = v[i] + h*(A[i] + D[i] + 2*(B[i] + C[i]))/6.0
      i = i + 1
    t = t + h
    j = j + 1

  #construct new orthonormal basis by gram-schmidt:
  znorm[1]=0  #normalize first vector

  j = 1
  while j <= n:
    znorm[1]=znorm[1]+v[n*j+1]**2
    j = j + 1

  znorm[1] = math.sqrt(znorm[1])

  j = 1
  while j <= n:
    v[n*j+1]=v[n*j+1]/znorm[1]
    j = j + 1

  #generate new orthonormal set:
  j = 2
  while j <= n:
    k = 1
    while k <= j-1:
      gsc[k]=0
      l = 1
      while l <= n:
        gsc[k]=gsc[k]+v[n*l+j]*v[n*l+k]
        l = l + 1
      k = k + 1

    k = 1
    while k <= n: #construct a new vector
      l = 1
      while l <= j-1:
        v[n*k+j]=v[n*k+j]-gsc[l]*v[n*k+l]
        l = l + 1
      k = k + 1

    znorm[j]=0     #calculate the vector's norm

    k = 1
    while k <= n: #construct a new vector
      znorm[j]=znorm[j]+v[n*k+j]**2
      k = k + 1

    znorm[j]=math.sqrt(znorm[j])

    k = 1
    while k <= n: #normalize the new vector
      v[n*k+j] = v[n*k+j] / znorm[j]
      k = k + 1

    j = j + 1

  k = 1
  while k <= n: #update running vector magnitudes
    if znorm[k] > 0:
      ltot[k] = ltot[k] + math.log(znorm[k])
    k = k + 1

  m = m + 1
  if m % io == 0 or w == cmax-1:  #normalize exponent and print every io iterations
    lsum=0
    kmax=0
    k = 1
    while k <= n:
      le = ltot[k] / t
      lsum = lsum + le
      if lsum > 0:
        lsum0 = lsum
        kmax = k
      k = k + 1
  w = w + 1
if choice == 1:
  print "Rossler:"
else:
  print "Lorenz:"
print n, "LEs = "
lsum=0
kmax=0
k = 1
while k <= n:
  le = ltot[k] / t
  lsum = lsum + le
  if lsum > 0:
    lsum0 = lsum
    kmax = k
  print le
  k = k + 1

Modelling Chaotic Systems using Support Vector Regression and Python

Similar to Neural Networks, Support Vector Machines (SVM) are powerful modelling techniques in machine learning that can be applied to a variety of tasks. SVMs are primarily used to classify data while one of its variants, Support Vector Regression (SVR), can be used for time series analysis. In this post, we will perform SVR on chaotic time series using common Python libraries and a simple wrapper program.

The following libraries will be required to use the script I will demonstrate using Python 2.7:

  1. Numpy
  2. Scipy
  3. Scikits.Learn (a toolbox that hosts a variety of machine learning algorithms and requires Numpy and Scipy)
  4. MatPlotLib (used for graphing and visualization)
  5. Other libraries such as SetupTools may be required depending on your system

You can download these from their websites or if you are using Windows “Unofficial Windows Binaries for Python Extension Packages

With the packages installed, we can begin to build a program that fits an SVR model to chaotic data. First, we define a time series. In this case, I will use the Delayed Henon Map with a delay of 4, for more information please see my posts on that system.

The model will be embedded in a d dimension space.

def TimeSeries(cmax, transient, d):
  return DelayedHenon(cmax, transient, d)
def DelayedHenon(cmax, transient, d):
  temp = []
  rF = [] # Features
  rT = [] # Targets
  dX = []
  i = 0
  while i < d:
    dX.append(0.1)
    i = i + 1
  while i <= cmax + transient:
    x = 1 - 1.6 * dX[i-1] ** 2 + 0.1 * dX[i-4]
    if i > transient-d:
      temp.append(x)
    if i > transient:
      rT.append(x)
    dX.append(x)
    i = i + 1
  rF = SlidingWindow(temp, d)
  return {"F":rF, "T":rT}

I added a function that formats the data properly so it can be used by the SVR Fit function:

def SlidingWindow(x, d):
  i = d
  y = []
  while i < len(x):
    temp = []
    j = 1
    while j <= d:
      temp.append(x[i - d + j])
      j = j + 1
    y.append(temp)
    i = i + 1
  return y

We then define the model, note that SVR has a few user defined parameters which can be chosen to fit the data better.

  clf = svm.SVR(kernel='rbf', degree=d, tol=tolerance, gamma=gam, C=c, epsilon=eps)

In this case, you can choose the following parameters to produce a model with an error near 0.0035 for 1024 points taken from the time series. In practice, you can create a training algorithm to find these values.

  gam = 0.117835847192
  c = 1.02352954164
  eps = 0.00163811344978
  tolerance = 0.00539604526663
  d = 5
  cmax = 1024

Finally, we create functions to fit the time series and plot the results:

def Fit(cmax, d, tolerance, gam, c, eps):
  data = TimeSeries(cmax, random.randint(0,10000), d)
  test = TimeSeries(cmax, random.randint(0,10000), d)

  x = data["F"]
  y = data["T"]

  sout = test["T"]

  clf = svm.SVR(kernel='rbf', degree=d, tol=tolerance, gamma=gam, C=c, epsilon=eps)

  try:
    m = clf.fit(x, y)
  except:
    return [1000, clf, sout, sout]

  mout = clf.predict(test["F"])

  # Determine the error for the system
  err = 0
  i = 0
  while i < len(mout):
    err += (mout[i] - sout[i])**2
    i = i + 1
  err = math.sqrt(err / (len(mout)+.0))

  return [err, clf, mout, sout]

def Plotter(err, mout, sout, SaveGraph):
  plt.title("SVR: Err=" + str(err))
  plt.scatter(sout[0:len(sout)-2], sout[1:len(sout)-1], s=1, marker='o', edgecolors='none')
  plt.scatter(mout[0:len(mout)-2], mout[1:len(mout)-1], s=1, marker='o', edgecolors='none', facecolor='r')

  plt.show()
  if SaveGraph:
    try:
      plt.savefig("svr")
    except:
      # Do Nothing
      SaveGraph = SaveGraph
  plt.clf()

Here is the final program after putting all of the functions together (and moving some stuff around) with example output:

#!/usr/bin/env python
from scikits.learn import svm
import random
import math
import matplotlib.pyplot as plt

d = 5       # Degree of the SVR model
cmax = 1024 # Number of points to run

def TimeSeries(cmax, transient, d):
  return DelayedHenon(cmax, transient, d)

def main():
  global cmax, d, perturbation
  gam = 0.117835847192
  c = 1.02352954164
  eps = 0.00163811344978
  tolerance = 0.00539604526663
  # Create and Fit a model to data, we just need to create support vectors
  [err, svr, mout, sout] = Fit(cmax, d, tolerance, gam, c, eps)
  Plotter(err, mout, sout, False)
  print "error: ", err
def DelayedHenon(cmax, transient, d):
  temp = []
  rF = [] # Features
  rT = [] # Targets
  dX = []
  i = 0
  while i < d:
    dX.append(0.1)
    i = i + 1
  while i <= cmax + transient:
    x = 1 - 1.6 * dX[i-1] ** 2 + 0.1 * dX[i-4]
    if i > transient-d:
      temp.append(x)
    if i > transient:
      rT.append(x)
    dX.append(x)
    i = i + 1
  rF = SlidingWindow(temp, d)
  return {"F":rF, "T":rT}
def Fit(cmax, d, tolerance, gam, c, eps):
  data = TimeSeries(cmax, random.randint(0,10000), d)
  test = TimeSeries(cmax, random.randint(0,10000), d)

  x = data["F"]
  y = data["T"]

  sout = test["T"]

  clf = svm.SVR(kernel='rbf', degree=d, tol=tolerance, gamma=gam, C=c, epsilon=eps)

  try:
    m = clf.fit(x, y)
  except:
    return [1000, clf, sout, sout]

  mout = clf.predict(test["F"])

  # Determine the error for the system
  err = 0
  i = 0
  while i < len(mout):
    err += (mout[i] - sout[i])**2
    i = i + 1
  err = math.sqrt(err / (len(mout)+.0))

  return [err, clf, mout, sout]

def Plotter(err, mout, sout, SaveGraph):
  plt.title("Delayed Henon Map and SVR Model")
  p1 = plt.scatter(sout[0:len(sout)-2], sout[1:len(sout)-1], s=1, marker='o', edgecolors='none')
  p2 = plt.scatter(mout[0:len(mout)-2], mout[1:len(mout)-1], s=1, marker='o', edgecolors='none', facecolor='r')
  plt.legend([p2, p1], ["Model (Err="+ str(round(err,4))+")", "Map"], loc=8)

  plt.draw()
  plt.show()
  if SaveGraph:
    try:
      plt.savefig("svr")
    except:
      # Do Nothing
      SaveGraph = SaveGraph
  plt.clf()

def SlidingWindow(x, d):
  i = d
  y = []
  while i < len(x):
    temp = []
    j = 1
    while j <= d:
      temp.append(x[i - d + j])
      j = j + 1
    y.append(temp)
    i = i + 1
  return y

def WriteFile(err, BestTol, BestGam, BestC, BestEps, s):
  # Write the model, sensitivities to a file
  f = open("sensitivities", "w")
  f.write("Model Parameters:\n")
  f.write("d=" + str(d) + "\n")
  f.write("gam=" + str(BestGam) + "\n")
  f.write("c=" + str(BestC) + "\n")
  f.write("eps=" + str(BestEps) + "\n")
  f.write("tolerance=" + str(BestTol) + "\n")
  f.write("Model Data:\n")
  f.write("err=" + str(err) + "\n")
  f.write("s=" + str(s) + "\n")
  f.close()
main()

Example Output:

err:  0.00381722720161
Example Output

To fit data taken from other systems, simply change the Timeseries function.

Lyapunov spectra of inverted discrete dynamical systems

One can estimate the Lyapunov spectrum of dynamical systems and their inverted counterparts using local Jacobian matrices and Wolf’s algorithm. Basically, Jacobian matrices are calculated at each point in a trajectory and multiplied together to form a product matrix whose eigenvalues represent the Lyapunov exponents for the system studied. More specifically, these exponents measure the divergence of a ball of initial conditions as they move around an attractor, in this case, a strange attractor. As the Jacobians are multiplied together, Gram-Schmidt reorthonormalization is used to maintain the system of coordinates and unify the divergence because the ball of initial conditions quickly becomes an ellipisoid.

In 1985, Alan Wolf et al. published the paper that outlined a program that can be used to determine the spectrum of Lyapunov exponents for system’s whose equations are known. Wolf’s algorithm requires that the equations are linearized. After performing the necessary calculations, one can plug them into the program available here (in Python) and estimate the Lyapunov spectrum.

Here is an example of how to linearize the Henon map and more complex Tinkerbell map:

The Henon map:

The Henon’s Jacobian matrix:

Linearizing the Henon map:

Partial Code for Wolf’s algorithm:

def Henon(x, xnew, n):
	a=1.4
	b=0.3
	#   Nonlinear Henon map equations:
	xnew[1] = 1-a*x[1]*x[1]+b*x[2]
	xnew[2] = x[1]
	#   Linearized Henon map equations:
	xnew[3] = -2*a*x[1]*x[3]+b*x[5]
	xnew[4] = -2*a*x[1]*x[4]+b*x[6]
	xnew[5] = x[3]
	xnew[6] = x[4]
	return [x, xnew]

The Tinkerbell map:


The Tinkerbell’s Jacobian matrix:

Linearizing the Tinkerbell map:

Partial Code for Wolf’s algorithm:

def tinkerbell(x, xnew, n):
	a =  0.9
	b = -0.6
	c =  2.0
	d =  0.5
	# Nonlinear
	xnew[1] = x[1]**2 - x[2]**2 + a*x[1] + b*x[2]   # x
	xnew[2] = 2*x[1]*x[2] + c * x[1] + d * x[2]	 # y
	# Linearized
	xnew[3] = 2 * x[1] * x[3] + a * x[3] - 2 * x[2] * x[5] + b * x[5] # delta x
	xnew[4] = 2 * x[1] * x[4] + a * x[4] - 2 * x[2] * x[6] + b * x[6] # delta x
	xnew[5] = 2 * x[2] * x[3] + c * x[3] + 2 * x[1] * x[5] + d * x[5] # delta y
	xnew[6] = 2 * x[2] * x[4] + c * x[4] + 2 * x[1] * x[6] + d * x[6] # delta y
	return [x, xnew]

To estimate the Lyapunov spectrum of the inverted system. One can simply reverse the signs on the exponent values of the forward system or take the more roundabout way and estimate them using Wolf’s algorithm.

To estimate the exponents, it is necessay to obtain to invert the system’s equations. In some cases, this is not possible such as the case of the Logistic map where each point could come from one of two previous points. To estimate the inverted system’s Jacobians (and the inverted system’s equations), one can simply invert the Jacobian matrix of the forward equations.

For the Henon map:

For the Tinkerbell map (and those with a magnifying glass):

The last issue that needs to be solved is generating data for the system. Since the system is inverted, the system has most likely turned from an attractor to a repellor and thus any trajectory will wander off to infinity. Therefore, we use the forward system’s equations and use the linearizations for the inverted system to estimate the Lyapunov spectrum. You can use the following Python program and plug in the code above to see an example.

Generating Poincaré Sections and Return Maps

Using the iterative 4-th order Runge-Kutta method as described here, we can create low dimensional slices of the system’s attractor known as Poincare Sections, Return maps, or Recurrence maps.

We will use the Rossler attractor for this example,


with a, b, and c set to 0.2, 0.2, and 5.7, respectively.

A short time series from the Rosseler Attractor

Poincare sections are important for visualizing an attractor that is embedded in greater than 3 spatial dimensions. Additionally, certain dynamical and topological properties are invariant to these transformations such as Lyapunov exponents and bifurcation diagrams (See Sprott 2003). However, the Lyapunov exponent with a value of 0 is lost due to the removal of the direction of the flow.

To build a Poincaré section, imagine that you have selected a slice of phase space. Every time the orbit passes through that slice, we collect a point. We repeat this over and over to create an image of what the attractor looks like in that slice of space.

Return map for the Y and Z variables where the value of X is between 3.5 and 3.75

We can also find local maximums to create a return map. In the following graph, we look for points that are local maximums and plot them versus the previous local maximum.

The local maximums of the variable X versus previous local maximums of the variable X

The following Python code uses the 4-th order Runge-Kutta method to create this return map.

import math, operator, random

dataX0 = []
dataY0 = []
dataZ0 = []
yList = []
xList = []
zList = []

count = 1
t = 1

x = -9
y = 0
z = 0

xList.append(x)
yList.append(y)
zList.append(z)

h = .01

a = .2
b = .2
c = 5.7

def f(x,y,z):
	global a,b,c
	dxdt = -y-z
	return dxdt

def g(x,y,z):
	global a,b,c
	dydt = x + a * y
	return dydt

def e(x,y,z):
	global a,b,c
	dzdt = b + z * (x - c)
	return dzdt

def rk4o(x, y, z):
	global h
	k1x = h*f(x, y, z)
	k1y = h*g(x, y, z)
	k1z = h*e(x, y, z)

	k2x = h*f(x + k1x/2.0, y + k1y/2.0, z + k1z/2.0)
	k2y = h*g(x + k1x/2.0, y + k1y/2.0, z + k1z/2.0)
	k2z = h*e(x + k1x/2.0, y + k1y/2.0, z + k1z/2.0)

	k3x = h*f(x + k2x/2.0, y + k2y/2.0, z + k2z/2.0)
	k3y = h*g(x + k2x/2.0, y + k2y/2.0, z + k2z/2.0)
	k3z = h*e(x + k2x/2.0, y + k2y/2.0, z + k2z/2.0)

	k4x = h*f(x + k3x, y + k3y, z + k3z)
	k4y = h*g(x + k3x, y + k3y, z + k3z)
	k4z = h*e(x + k3x, y + k3y, z + k3z)

	x = x + k1x/6.0 + k2x/3.0 + k3x/3.0 + k4x/6.0
	y = y + k1y/6.0 + k2y/3.0 + k3y/3.0 + k4y/6.0
	z = z + k1z/6.0 + k2z/3.0 + k3z/3.0 + k4z/6.0

	return [x,y,z]

X0 = []
Y0 = []
Z0 = []
previousLocalMax = x
localMax = x
t = 1
changeInTime = h
avgChange = []
LastPoint = changeInTime
while changeInTime < 20000 and len(dataX0) < 1000:

	[x,y,z] = rk4o(xList[t-1], yList[t-1], zList[t-1])

	xList.append(x)
	yList.append(y)
	zList.append(z)
	if 201 < changeInTime:
                # Print y and z points when x is between 3.5 and 3.75 (as shown in the figure)
		if x < 3.75 and x > 3.5:
			print y , "," , z

		if x < xList[t-1] and xList[t-2] < xList[t-1]:
			previousLocalMax = localMax
			localMax = xList[t-1]
			dataX0.append(localMax)
			dataY0.append(previousLocalMax)
			# Calculate the change between this and last point
			avgChange.append(changeInTime - LastPoint)
			LastPoint = changeInTime

	t = t + 1
	changeInTime += h
# Print the average steps between points
print "Average Number of Steps between points:", sum(avgChange[1:]) / float(len(avgChange[1:]))
print "First Return Map Length: "  + str(len(dataX0))
f = open("rosseler.csv", "w")
i = 0
while i < len(X0) and i < 10000:
	f.write(str(X0[i]) + "," + str(Y0[i])+ "," + str(Z0[i])+"\n")
	i = i + 1
f.close()

f = open("fr-rossler-x", "w")
i = 0
while i < len(dataX0):
	f.write(str(dataX0[i])+"\r")
	i = i + 1
f.close()

Generating time series for Ordinary Differential Equations

Ordinary Differential Equations (ODEs) can be used to define systems in fields as varied as biology to engineering to mathematics. These equations express a relationship between an unknown function and its derivative. One example, the Rossler attractor,

A short time series from the Rossler Attractor

We can produce a time series from these equations by solving the equations using the iterative 4-th order Runge-Kutta method and plugging each of the solutions back into the equations. Along with the following script which allows you to implement the Runge-Kutta method on ODEs, I have included code to numerically estimate the largest Lyapunov exponent. The largest Lyapunov exponent is used to indicate chaos, or sensitive dependence to initial conditions, within a system. For the Rossler attractor, defined parameters (a=b=.2, c=5.7), and initial conditions (x=-9, y=z= 0), the largest Lyapunov exponent is about 0.0714. Numerous resources are available for more information about ordinary differential equations and other systems that you may want to explore with this script.

import math, random
# Step Size
h = .001
#Initial conditions for Rossler system
x = -9
y = 0
z = 0

# Parameters for the Rossler System
a = .2
b = .2
c = 5.7

# The perturbation used to calculate the Largest Lyapunov exponent
perturb = .000000001

# Functions that define the system
def f(x,y,z):
	global a,b,c
	dxdt = -y-z
	return dxdt

def g(x,y,z):
	global a,b,c
	dydt = x + a * y
	return dydt

def e(x,y,z):
	global a,b,c
	dzdt = b + z * (x - c)
	return dzdt

# randomly perturb the initial conditions to create variable time series
x = x + random.random() / 2.0
y = y + random.random() / 2.0
z = z + random.random() / 2.0

dataX0 = []
dataY0 = []
dataZ0 = []
yList = []
xList = []
zList = []
lamdaList = []
lyapunovList = []

t = 1

xList.append(x)
yList.append(y)
zList.append(z)

# Use the 4th order Runge-Kutta method
def rk4o(x, y, z):
	global h
	k1x = h*f(x, y, z)
	k1y = h*g(x, y, z)
	k1z = h*e(x, y, z)

	k2x = h*f(x + k1x/2.0, y + k1y/2.0, z + k1z/2.0)
	k2y = h*g(x + k1x/2.0, y + k1y/2.0, z + k1z/2.0)
	k2z = h*e(x + k1x/2.0, y + k1y/2.0, z + k1z/2.0)

	k3x = h*f(x + k2x/2.0, y + k2y/2.0, z + k2z/2.0)
	k3y = h*g(x + k2x/2.0, y + k2y/2.0, z + k2z/2.0)
	k3z = h*e(x + k2x/2.0, y + k2y/2.0, z + k2z/2.0)

	k4x = h*f(x + k3x, y + k3y, z + k3z)
	k4y = h*g(x + k3x, y + k3y, z + k3z)
	k4z = h*e(x + k3x, y + k3y, z + k3z)

	x = x + k1x/6.0 + k2x/3.0 + k3x/3.0 + k4x/6.0
	y = y + k1y/6.0 + k2y/3.0 + k3y/3.0 + k4y/6.0
	z = z + k1z/6.0 + k2z/3.0 + k3z/3.0 + k4z/6.0

	return [x,y,z]

t = 1
changeInTime = h
startLE = True
while changeInTime < 20000: # Perform 20000 / h iterations

	[x,y,z] = rk4o(xList[t-1], yList[t-1], zList[t-1])

	xList.append(x)
	yList.append(y)
	zList.append(z)
	if 200 < changeInTime: # Remove the transient after 200 / h iterations
		if startLE:
			cx = xList[t-1] + perturb
			cy = yList[t-1]
			cz = zList[t-1]
			startLE = False

		# Calculate the Largest Lyapunov Exponent
		[cx, cy, cz] = rk4o(cx, cy, cz)

		delx = cx - x
		dely = cy - y
		delz = cz - z

		delR1 = ((delx)**2+(dely)**2+(delz)**2)

		df = 1.0 / (perturb**2) * delR1
		rs = 1.0 / math.sqrt(df)

		cx = x + rs*delx
		cy = y + rs*dely
		cz = z + rs*delz

		lamda = math.log(df)
		lamdaList.append(lamda)
		#if t % 1000 == 0: # Print the Lyapunov Exponent as you go
		#	print t, " ", .5*sum(lamdaList) / (len(lamdaList)+.0) / h

	t = t + 1
	changeInTime += h

lyapunov = .5*sum(lamdaList) / (len(lamdaList)+.0) / h
print lyapunov
# Output the x-component to a file
f = open("rossler-x", "w")
i = 0
while i < len(dataX0):
	f.write(str(dataX0[i])+"\r")
	i = i + 1
f.close()

GET/POST to a URL in Python

A relatively simple and straightforward way to post information to a server using Python.

import urllib

# Tries to open the url with the params through the method specified
def fetch_url(url, params, method):
  params = urllib.urlencode(params)
  if method=="GET":
    f = urllib.urlopen(url+"?"+params)
  else:
    # Usually a POST
    f = urllib.urlopen(url, params)
  return (f.read(), f.code)

url = "http://google.com"
method = "POST"
params = {"Param1": "Value1"}

# Fetch the content and response code
[content, response_code] = fetch_url(url, params, method)

# Check if the server responded with a success code (200)
if (response_code == 200):
  print content
else:
  print response_code