Scientific Features in NumPy and SciPy

[ad_1]

import datetime

import tensorflow as tf

import matplotlib.pyplot as plt

import numpy as np

import numba

def tSNE(X, ndims=2, perplexity=30, seed=0, max_iter=500, stop_lying_iter=100, mom_switch_iter=400):

“”“The t-SNE algorithm

Args:

X: the high-dimensional coordinates

ndims: variety of dimensions in output area

Returns:

Factors of X in low dimension

““”

momentum = 0.5

final_momentum = 0.8

eta = 200.0

N, _D = X.form

np.random.seed(seed)

# normalize enter

X -= X.imply(axis=0) # zero imply

X /= np.abs(X).max() # min-max scaled

# compute enter similarity for actual t-SNE

P = computeGaussianPerplexity(X, perplexity)

# symmetrize and normalize enter similarities

P = P + P.T

P /= P.sum()

# lie in regards to the P-values

P *= 12.0

# initialize answer

Y = np.random.randn(N, ndims) * 0.0001

# carry out predominant coaching loop

positive aspects = np.ones_like(Y)

uY = np.zeros_like(Y)

for i in vary(max_iter):

# compute gradient, replace positive aspects

dY = computeExactGradient(P, Y)

positive aspects = np.the place(np.signal(dY) != np.signal(uY), positive aspects+0.2, positive aspects*0.8).clip(0.1)

# gradient replace with momentum and positive aspects

uY = momentum * uY – eta * positive aspects * dY

Y = Y + uY

# make the answer zero-mean

Y -= Y.imply(axis=0)

# Cease mendacity in regards to the P-values after some time, and change momentum

if i == stop_lying_iter:

P /= 12.0

if i == mom_switch_iter:

momentum = remaining_momentum

# print progress

if (i % 50) == 0:

C = evaluateError(P, Y)

now = datetime.datetime.now()

print(f“{now} – Iteration {i}: Error = {C}”)

return Y

@numba.jit(nopython=True)

def computeExactGradient(P, Y):

“”“Gradient of t-SNE value perform

Args:

P: similarity matrix

Y: low-dimensional coordinates

Returns:

dY, a numpy array of form (N,D)

““”

N, _D = Y.form

# compute squared Euclidean distance matrix of Y, the Q matrix, and the normalization sum

DD = computeSquaredEuclideanDistance(Y)

Q = 1/(1+DD)

sum_Q = Q.sum()

# compute gradient

mult = (P – (Q/sum_Q)) * Q

dY = np.zeros_like(Y)

for n in vary(N):

for m in vary(N):

if n==m: proceed

dY[n] += (Y[n] – Y[m]) * mult[n,m]

return dY

@numba.jit(nopython=True)

def evaluateError(P, Y):

“”“Consider t-SNE value perform

Args:

P: similarity matrix

Y: low-dimensional coordinates

Returns:

Whole t-SNE error C

““”

DD = computeSquaredEuclideanDistance(Y)

# Compute Q-matrix and normalization sum

Q = 1/(1+DD)

np.fill_diagonal(Q, np.finfo(np.float32).eps)

Q /= Q.sum()

# Sum t-SNE error: sum P log(P/Q)

error = P * np.log( (P + np.finfo(np.float32).eps) / (Q + np.finfo(np.float32).eps) )

return error.sum()

@numba.jit(nopython=True)

def computeGaussianPerplexity(X, perplexity):

“”“Compute Gaussian Perplexity

Args:

X: numpy array of form (N,D)

perplexity: double

Returns:

Similarity matrix P

““”

# Compute the squared Euclidean distance matrix

N, _D = X.form

DD = computeSquaredEuclideanDistance(X)

# Compute the Gaussian kernel row by row

P = np.zeros_like(DD)

for n in vary(N):

discovered = False

beta = 1.0

min_beta = –np.inf

max_beta = np.inf

tol = 1e–5

# iterate till we get an excellent perplexity

n_iter = 0

whereas not discovered and n_iter < 200:

# compute Gaussian kernel row

P[n] = np.exp(–beta * DD[n])

P[n,n] = np.finfo(np.float32).eps

# compute entropy of present row

# Gaussians to be row-normalized to make it a chance

# then H = sum_i -P[i] log(P[i])

# = sum_i -P[i] (-beta * DD[n] – log(sum_P))

# = sum_i P[i] * beta * DD[n] + log(sum_P)

sum_P = P[n].sum()

H = beta * (DD[n] @ P[n]) / sum_P + np.log(sum_P)

# Consider if entropy inside tolerance degree

Hdiff = H – np.log2(perplexity)

if –tol < Hdiff < tol:

discovered = True

break

if Hdiff > 0:

min_beta = beta

if max_beta in (np.inf, –np.inf):

beta *= 2

else:

beta = (beta + max_beta) / 2

else:

max_beta = beta

if min_beta in (np.inf, –np.inf):

beta /= 2

else:

beta = (beta + min_beta) / 2

n_iter += 1

# normalize this row

P[n] /= P[n].sum()

assert not np.isnan(P).any()

return P

@numba.jit(nopython=True)

def computeSquaredEuclideanDistance(X):

“”“Compute squared distance

Args:

X: numpy array of form (N,D)

Returns:

numpy array of form (N,N) of squared distances

““”

N, _D = X.form

DD = np.zeros((N,N))

for i in vary(N–1):

for j in vary(i+1, N):

diff = X[i] – X[j]

DD[j][i] = DD[i][j] = diff @ diff

return DD

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

# choose 1000 samples from the dataset

rows = np.random.alternative(X_test.form[0], 1000, exchange=False)

X_data = X_train[rows].reshape(1000, –1).astype(“float”)

X_label = y_train[rows]

# run t-SNE to rework into 2D and visualize in scatter plot

Y = tSNE(X_data, 2, 30, 0, 500, 100, 400)

plt.determine(figsize=(8,8))

plt.scatter(Y[:,0], Y[:,1], c=X_label)

plt.present()

[ad_2]

Leave a ReplyCancel Reply